##// END OF EJS Templates
revlog: use a `reading` context in `_enforceinlinesize`...
marmoute -
r51918:9011c38b default
parent child Browse files
Show More
@@ -1,3546 +1,3546 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class revlog:
244 class revlog:
245 """
245 """
246 the underlying revision storage object
246 the underlying revision storage object
247
247
248 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
249
249
250 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
255 data.
255 data.
256
256
257 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
263
263
264 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
267 for locking while reading.
268
268
269 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
271
271
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 index will be mmapped rather than read if it is larger than the
273 index will be mmapped rather than read if it is larger than the
274 configured threshold.
274 configured threshold.
275
275
276 If censorable is True, the revlog can have censored revisions.
276 If censorable is True, the revlog can have censored revisions.
277
277
278 If `upperboundcomp` is not None, this is the expected maximal gain from
278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 compression for the data content.
279 compression for the data content.
280
280
281 `concurrencychecker` is an optional function that receives 3 arguments: a
281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 file handle, a filename, and an expected position. It should check whether
282 file handle, a filename, and an expected position. It should check whether
283 the current position in the file handle is valid, and log/warn/fail (by
283 the current position in the file handle is valid, and log/warn/fail (by
284 raising).
284 raising).
285
285
286 See mercurial/revlogutils/contants.py for details about the content of an
286 See mercurial/revlogutils/contants.py for details about the content of an
287 index entry.
287 index entry.
288 """
288 """
289
289
290 _flagserrorclass = error.RevlogError
290 _flagserrorclass = error.RevlogError
291
291
292 @staticmethod
292 @staticmethod
293 def is_inline_index(header_bytes):
293 def is_inline_index(header_bytes):
294 """Determine if a revlog is inline from the initial bytes of the index"""
294 """Determine if a revlog is inline from the initial bytes of the index"""
295 header = INDEX_HEADER.unpack(header_bytes)[0]
295 header = INDEX_HEADER.unpack(header_bytes)[0]
296
296
297 _format_flags = header & ~0xFFFF
297 _format_flags = header & ~0xFFFF
298 _format_version = header & 0xFFFF
298 _format_version = header & 0xFFFF
299
299
300 features = FEATURES_BY_VERSION[_format_version]
300 features = FEATURES_BY_VERSION[_format_version]
301 return features[b'inline'](_format_flags)
301 return features[b'inline'](_format_flags)
302
302
303 def __init__(
303 def __init__(
304 self,
304 self,
305 opener,
305 opener,
306 target,
306 target,
307 radix,
307 radix,
308 postfix=None, # only exist for `tmpcensored` now
308 postfix=None, # only exist for `tmpcensored` now
309 checkambig=False,
309 checkambig=False,
310 mmaplargeindex=False,
310 mmaplargeindex=False,
311 censorable=False,
311 censorable=False,
312 upperboundcomp=None,
312 upperboundcomp=None,
313 persistentnodemap=False,
313 persistentnodemap=False,
314 concurrencychecker=None,
314 concurrencychecker=None,
315 trypending=False,
315 trypending=False,
316 try_split=False,
316 try_split=False,
317 canonical_parent_order=True,
317 canonical_parent_order=True,
318 ):
318 ):
319 """
319 """
320 create a revlog object
320 create a revlog object
321
321
322 opener is a function that abstracts the file opening operation
322 opener is a function that abstracts the file opening operation
323 and can be used to implement COW semantics or the like.
323 and can be used to implement COW semantics or the like.
324
324
325 `target`: a (KIND, ID) tuple that identify the content stored in
325 `target`: a (KIND, ID) tuple that identify the content stored in
326 this revlog. It help the rest of the code to understand what the revlog
326 this revlog. It help the rest of the code to understand what the revlog
327 is about without having to resort to heuristic and index filename
327 is about without having to resort to heuristic and index filename
328 analysis. Note: that this must be reliably be set by normal code, but
328 analysis. Note: that this must be reliably be set by normal code, but
329 that test, debug, or performance measurement code might not set this to
329 that test, debug, or performance measurement code might not set this to
330 accurate value.
330 accurate value.
331 """
331 """
332 self.upperboundcomp = upperboundcomp
332 self.upperboundcomp = upperboundcomp
333
333
334 self.radix = radix
334 self.radix = radix
335
335
336 self._docket_file = None
336 self._docket_file = None
337 self._indexfile = None
337 self._indexfile = None
338 self._datafile = None
338 self._datafile = None
339 self._sidedatafile = None
339 self._sidedatafile = None
340 self._nodemap_file = None
340 self._nodemap_file = None
341 self.postfix = postfix
341 self.postfix = postfix
342 self._trypending = trypending
342 self._trypending = trypending
343 self._try_split = try_split
343 self._try_split = try_split
344 self.opener = opener
344 self.opener = opener
345 if persistentnodemap:
345 if persistentnodemap:
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
347
347
348 assert target[0] in ALL_KINDS
348 assert target[0] in ALL_KINDS
349 assert len(target) == 2
349 assert len(target) == 2
350 self.target = target
350 self.target = target
351 # When True, indexfile is opened with checkambig=True at writing, to
351 # When True, indexfile is opened with checkambig=True at writing, to
352 # avoid file stat ambiguity.
352 # avoid file stat ambiguity.
353 self._checkambig = checkambig
353 self._checkambig = checkambig
354 self._mmaplargeindex = mmaplargeindex
354 self._mmaplargeindex = mmaplargeindex
355 self._censorable = censorable
355 self._censorable = censorable
356 # 3-tuple of (node, rev, text) for a raw revision.
356 # 3-tuple of (node, rev, text) for a raw revision.
357 self._revisioncache = None
357 self._revisioncache = None
358 # Maps rev to chain base rev.
358 # Maps rev to chain base rev.
359 self._chainbasecache = util.lrucachedict(100)
359 self._chainbasecache = util.lrucachedict(100)
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 self._chunkcache = (0, b'')
361 self._chunkcache = (0, b'')
362 # How much data to read and cache into the raw revlog data cache.
362 # How much data to read and cache into the raw revlog data cache.
363 self._chunkcachesize = 65536
363 self._chunkcachesize = 65536
364 self._maxchainlen = None
364 self._maxchainlen = None
365 self._deltabothparents = True
365 self._deltabothparents = True
366 self._candidate_group_chunk_size = 0
366 self._candidate_group_chunk_size = 0
367 self._debug_delta = False
367 self._debug_delta = False
368 self.index = None
368 self.index = None
369 self._docket = None
369 self._docket = None
370 self._nodemap_docket = None
370 self._nodemap_docket = None
371 # Mapping of partial identifiers to full nodes.
371 # Mapping of partial identifiers to full nodes.
372 self._pcache = {}
372 self._pcache = {}
373 # Mapping of revision integer to full node.
373 # Mapping of revision integer to full node.
374 self._compengine = b'zlib'
374 self._compengine = b'zlib'
375 self._compengineopts = {}
375 self._compengineopts = {}
376 self._maxdeltachainspan = -1
376 self._maxdeltachainspan = -1
377 self._withsparseread = False
377 self._withsparseread = False
378 self._sparserevlog = False
378 self._sparserevlog = False
379 self.hassidedata = False
379 self.hassidedata = False
380 self._srdensitythreshold = 0.50
380 self._srdensitythreshold = 0.50
381 self._srmingapsize = 262144
381 self._srmingapsize = 262144
382
382
383 # other optionnals features
383 # other optionnals features
384
384
385 # might remove rank configuration once the computation has no impact
385 # might remove rank configuration once the computation has no impact
386 self._compute_rank = False
386 self._compute_rank = False
387
387
388 # Make copy of flag processors so each revlog instance can support
388 # Make copy of flag processors so each revlog instance can support
389 # custom flags.
389 # custom flags.
390 self._flagprocessors = dict(flagutil.flagprocessors)
390 self._flagprocessors = dict(flagutil.flagprocessors)
391
391
392 # 3-tuple of file handles being used for active writing.
392 # 3-tuple of file handles being used for active writing.
393 self._writinghandles = None
393 self._writinghandles = None
394 # prevent nesting of addgroup
394 # prevent nesting of addgroup
395 self._adding_group = None
395 self._adding_group = None
396
396
397 self._loadindex()
397 self._loadindex()
398
398
399 self._concurrencychecker = concurrencychecker
399 self._concurrencychecker = concurrencychecker
400
400
401 # parent order is supposed to be semantically irrelevant, so we
401 # parent order is supposed to be semantically irrelevant, so we
402 # normally resort parents to ensure that the first parent is non-null,
402 # normally resort parents to ensure that the first parent is non-null,
403 # if there is a non-null parent at all.
403 # if there is a non-null parent at all.
404 # filelog abuses the parent order as flag to mark some instances of
404 # filelog abuses the parent order as flag to mark some instances of
405 # meta-encoded files, so allow it to disable this behavior.
405 # meta-encoded files, so allow it to disable this behavior.
406 self.canonical_parent_order = canonical_parent_order
406 self.canonical_parent_order = canonical_parent_order
407
407
408 def _init_opts(self):
408 def _init_opts(self):
409 """process options (from above/config) to setup associated default revlog mode
409 """process options (from above/config) to setup associated default revlog mode
410
410
411 These values might be affected when actually reading on disk information.
411 These values might be affected when actually reading on disk information.
412
412
413 The relevant values are returned for use in _loadindex().
413 The relevant values are returned for use in _loadindex().
414
414
415 * newversionflags:
415 * newversionflags:
416 version header to use if we need to create a new revlog
416 version header to use if we need to create a new revlog
417
417
418 * mmapindexthreshold:
418 * mmapindexthreshold:
419 minimal index size for start to use mmap
419 minimal index size for start to use mmap
420
420
421 * force_nodemap:
421 * force_nodemap:
422 force the usage of a "development" version of the nodemap code
422 force the usage of a "development" version of the nodemap code
423 """
423 """
424 mmapindexthreshold = None
424 mmapindexthreshold = None
425 opts = self.opener.options
425 opts = self.opener.options
426
426
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
428 new_header = CHANGELOGV2
428 new_header = CHANGELOGV2
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
430 elif b'revlogv2' in opts:
430 elif b'revlogv2' in opts:
431 new_header = REVLOGV2
431 new_header = REVLOGV2
432 elif b'revlogv1' in opts:
432 elif b'revlogv1' in opts:
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
434 if b'generaldelta' in opts:
434 if b'generaldelta' in opts:
435 new_header |= FLAG_GENERALDELTA
435 new_header |= FLAG_GENERALDELTA
436 elif b'revlogv0' in self.opener.options:
436 elif b'revlogv0' in self.opener.options:
437 new_header = REVLOGV0
437 new_header = REVLOGV0
438 else:
438 else:
439 new_header = REVLOG_DEFAULT_VERSION
439 new_header = REVLOG_DEFAULT_VERSION
440
440
441 if b'chunkcachesize' in opts:
441 if b'chunkcachesize' in opts:
442 self._chunkcachesize = opts[b'chunkcachesize']
442 self._chunkcachesize = opts[b'chunkcachesize']
443 if b'maxchainlen' in opts:
443 if b'maxchainlen' in opts:
444 self._maxchainlen = opts[b'maxchainlen']
444 self._maxchainlen = opts[b'maxchainlen']
445 if b'deltabothparents' in opts:
445 if b'deltabothparents' in opts:
446 self._deltabothparents = opts[b'deltabothparents']
446 self._deltabothparents = opts[b'deltabothparents']
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
448 if dps_cgds:
448 if dps_cgds:
449 self._candidate_group_chunk_size = dps_cgds
449 self._candidate_group_chunk_size = dps_cgds
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
451 self._lazydeltabase = False
451 self._lazydeltabase = False
452 if self._lazydelta:
452 if self._lazydelta:
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
454 if b'debug-delta' in opts:
454 if b'debug-delta' in opts:
455 self._debug_delta = opts[b'debug-delta']
455 self._debug_delta = opts[b'debug-delta']
456 if b'compengine' in opts:
456 if b'compengine' in opts:
457 self._compengine = opts[b'compengine']
457 self._compengine = opts[b'compengine']
458 if b'zlib.level' in opts:
458 if b'zlib.level' in opts:
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
460 if b'zstd.level' in opts:
460 if b'zstd.level' in opts:
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
462 if b'maxdeltachainspan' in opts:
462 if b'maxdeltachainspan' in opts:
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
465 mmapindexthreshold = opts[b'mmapindexthreshold']
465 mmapindexthreshold = opts[b'mmapindexthreshold']
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
468 # sparse-revlog forces sparse-read
468 # sparse-revlog forces sparse-read
469 self._withsparseread = self._sparserevlog or withsparseread
469 self._withsparseread = self._sparserevlog or withsparseread
470 if b'sparse-read-density-threshold' in opts:
470 if b'sparse-read-density-threshold' in opts:
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
472 if b'sparse-read-min-gap-size' in opts:
472 if b'sparse-read-min-gap-size' in opts:
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
474 if opts.get(b'enableellipsis'):
474 if opts.get(b'enableellipsis'):
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
476
476
477 # revlog v0 doesn't have flag processors
477 # revlog v0 doesn't have flag processors
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
480
480
481 if self._chunkcachesize <= 0:
481 if self._chunkcachesize <= 0:
482 raise error.RevlogError(
482 raise error.RevlogError(
483 _(b'revlog chunk cache size %r is not greater than 0')
483 _(b'revlog chunk cache size %r is not greater than 0')
484 % self._chunkcachesize
484 % self._chunkcachesize
485 )
485 )
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
487 raise error.RevlogError(
487 raise error.RevlogError(
488 _(b'revlog chunk cache size %r is not a power of 2')
488 _(b'revlog chunk cache size %r is not a power of 2')
489 % self._chunkcachesize
489 % self._chunkcachesize
490 )
490 )
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
492 return new_header, mmapindexthreshold, force_nodemap
492 return new_header, mmapindexthreshold, force_nodemap
493
493
494 def _get_data(self, filepath, mmap_threshold, size=None):
494 def _get_data(self, filepath, mmap_threshold, size=None):
495 """return a file content with or without mmap
495 """return a file content with or without mmap
496
496
497 If the file is missing return the empty string"""
497 If the file is missing return the empty string"""
498 try:
498 try:
499 with self.opener(filepath) as fp:
499 with self.opener(filepath) as fp:
500 if mmap_threshold is not None:
500 if mmap_threshold is not None:
501 file_size = self.opener.fstat(fp).st_size
501 file_size = self.opener.fstat(fp).st_size
502 if file_size >= mmap_threshold:
502 if file_size >= mmap_threshold:
503 if size is not None:
503 if size is not None:
504 # avoid potentiel mmap crash
504 # avoid potentiel mmap crash
505 size = min(file_size, size)
505 size = min(file_size, size)
506 # TODO: should .close() to release resources without
506 # TODO: should .close() to release resources without
507 # relying on Python GC
507 # relying on Python GC
508 if size is None:
508 if size is None:
509 return util.buffer(util.mmapread(fp))
509 return util.buffer(util.mmapread(fp))
510 else:
510 else:
511 return util.buffer(util.mmapread(fp, size))
511 return util.buffer(util.mmapread(fp, size))
512 if size is None:
512 if size is None:
513 return fp.read()
513 return fp.read()
514 else:
514 else:
515 return fp.read(size)
515 return fp.read(size)
516 except FileNotFoundError:
516 except FileNotFoundError:
517 return b''
517 return b''
518
518
519 def get_streams(self, max_linkrev, force_inline=False):
519 def get_streams(self, max_linkrev, force_inline=False):
520 """return a list of streams that represent this revlog
520 """return a list of streams that represent this revlog
521
521
522 This is used by stream-clone to do bytes to bytes copies of a repository.
522 This is used by stream-clone to do bytes to bytes copies of a repository.
523
523
524 This streams data for all revisions that refer to a changelog revision up
524 This streams data for all revisions that refer to a changelog revision up
525 to `max_linkrev`.
525 to `max_linkrev`.
526
526
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
528
528
529 It returns is a list of three-tuple:
529 It returns is a list of three-tuple:
530
530
531 [
531 [
532 (filename, bytes_stream, stream_size),
532 (filename, bytes_stream, stream_size),
533 …
533 …
534 ]
534 ]
535 """
535 """
536 n = len(self)
536 n = len(self)
537 index = self.index
537 index = self.index
538 while n > 0:
538 while n > 0:
539 linkrev = index[n - 1][4]
539 linkrev = index[n - 1][4]
540 if linkrev < max_linkrev:
540 if linkrev < max_linkrev:
541 break
541 break
542 # note: this loop will rarely go through multiple iterations, since
542 # note: this loop will rarely go through multiple iterations, since
543 # it only traverses commits created during the current streaming
543 # it only traverses commits created during the current streaming
544 # pull operation.
544 # pull operation.
545 #
545 #
546 # If this become a problem, using a binary search should cap the
546 # If this become a problem, using a binary search should cap the
547 # runtime of this.
547 # runtime of this.
548 n = n - 1
548 n = n - 1
549 if n == 0:
549 if n == 0:
550 # no data to send
550 # no data to send
551 return []
551 return []
552 index_size = n * index.entry_size
552 index_size = n * index.entry_size
553 data_size = self.end(n - 1)
553 data_size = self.end(n - 1)
554
554
555 # XXX we might have been split (or stripped) since the object
555 # XXX we might have been split (or stripped) since the object
556 # initialization, We need to close this race too, but having a way to
556 # initialization, We need to close this race too, but having a way to
557 # pre-open the file we feed to the revlog and never closing them before
557 # pre-open the file we feed to the revlog and never closing them before
558 # we are done streaming.
558 # we are done streaming.
559
559
560 if self._inline:
560 if self._inline:
561
561
562 def get_stream():
562 def get_stream():
563 with self._indexfp() as fp:
563 with self._indexfp() as fp:
564 yield None
564 yield None
565 size = index_size + data_size
565 size = index_size + data_size
566 if size <= 65536:
566 if size <= 65536:
567 yield fp.read(size)
567 yield fp.read(size)
568 else:
568 else:
569 yield from util.filechunkiter(fp, limit=size)
569 yield from util.filechunkiter(fp, limit=size)
570
570
571 inline_stream = get_stream()
571 inline_stream = get_stream()
572 next(inline_stream)
572 next(inline_stream)
573 return [
573 return [
574 (self._indexfile, inline_stream, index_size + data_size),
574 (self._indexfile, inline_stream, index_size + data_size),
575 ]
575 ]
576 elif force_inline:
576 elif force_inline:
577
577
578 def get_stream():
578 def get_stream():
579 with self.reading():
579 with self.reading():
580 yield None
580 yield None
581
581
582 for rev in range(n):
582 for rev in range(n):
583 idx = self.index.entry_binary(rev)
583 idx = self.index.entry_binary(rev)
584 if rev == 0 and self._docket is None:
584 if rev == 0 and self._docket is None:
585 # re-inject the inline flag
585 # re-inject the inline flag
586 header = self._format_flags
586 header = self._format_flags
587 header |= self._format_version
587 header |= self._format_version
588 header |= FLAG_INLINE_DATA
588 header |= FLAG_INLINE_DATA
589 header = self.index.pack_header(header)
589 header = self.index.pack_header(header)
590 idx = header + idx
590 idx = header + idx
591 yield idx
591 yield idx
592 yield self._getsegmentforrevs(rev, rev)[1]
592 yield self._getsegmentforrevs(rev, rev)[1]
593
593
594 inline_stream = get_stream()
594 inline_stream = get_stream()
595 next(inline_stream)
595 next(inline_stream)
596 return [
596 return [
597 (self._indexfile, inline_stream, index_size + data_size),
597 (self._indexfile, inline_stream, index_size + data_size),
598 ]
598 ]
599 else:
599 else:
600
600
601 def get_index_stream():
601 def get_index_stream():
602 with self._indexfp() as fp:
602 with self._indexfp() as fp:
603 yield None
603 yield None
604 if index_size <= 65536:
604 if index_size <= 65536:
605 yield fp.read(index_size)
605 yield fp.read(index_size)
606 else:
606 else:
607 yield from util.filechunkiter(fp, limit=index_size)
607 yield from util.filechunkiter(fp, limit=index_size)
608
608
609 def get_data_stream():
609 def get_data_stream():
610 with self._datafp() as fp:
610 with self._datafp() as fp:
611 yield None
611 yield None
612 if data_size <= 65536:
612 if data_size <= 65536:
613 yield fp.read(data_size)
613 yield fp.read(data_size)
614 else:
614 else:
615 yield from util.filechunkiter(fp, limit=data_size)
615 yield from util.filechunkiter(fp, limit=data_size)
616
616
617 index_stream = get_index_stream()
617 index_stream = get_index_stream()
618 next(index_stream)
618 next(index_stream)
619 data_stream = get_data_stream()
619 data_stream = get_data_stream()
620 next(data_stream)
620 next(data_stream)
621 return [
621 return [
622 (self._datafile, data_stream, data_size),
622 (self._datafile, data_stream, data_size),
623 (self._indexfile, index_stream, index_size),
623 (self._indexfile, index_stream, index_size),
624 ]
624 ]
625
625
626 def _loadindex(self, docket=None):
626 def _loadindex(self, docket=None):
627
627
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
629
629
630 if self.postfix is not None:
630 if self.postfix is not None:
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
633 entry_point = b'%s.i.a' % self.radix
633 entry_point = b'%s.i.a' % self.radix
634 elif self._try_split and self.opener.exists(self._split_index_file):
634 elif self._try_split and self.opener.exists(self._split_index_file):
635 entry_point = self._split_index_file
635 entry_point = self._split_index_file
636 else:
636 else:
637 entry_point = b'%s.i' % self.radix
637 entry_point = b'%s.i' % self.radix
638
638
639 if docket is not None:
639 if docket is not None:
640 self._docket = docket
640 self._docket = docket
641 self._docket_file = entry_point
641 self._docket_file = entry_point
642 else:
642 else:
643 self._initempty = True
643 self._initempty = True
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
645 if len(entry_data) > 0:
645 if len(entry_data) > 0:
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
647 self._initempty = False
647 self._initempty = False
648 else:
648 else:
649 header = new_header
649 header = new_header
650
650
651 self._format_flags = header & ~0xFFFF
651 self._format_flags = header & ~0xFFFF
652 self._format_version = header & 0xFFFF
652 self._format_version = header & 0xFFFF
653
653
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
655 if supported_flags is None:
655 if supported_flags is None:
656 msg = _(b'unknown version (%d) in revlog %s')
656 msg = _(b'unknown version (%d) in revlog %s')
657 msg %= (self._format_version, self.display_id)
657 msg %= (self._format_version, self.display_id)
658 raise error.RevlogError(msg)
658 raise error.RevlogError(msg)
659 elif self._format_flags & ~supported_flags:
659 elif self._format_flags & ~supported_flags:
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
661 display_flag = self._format_flags >> 16
661 display_flag = self._format_flags >> 16
662 msg %= (display_flag, self._format_version, self.display_id)
662 msg %= (display_flag, self._format_version, self.display_id)
663 raise error.RevlogError(msg)
663 raise error.RevlogError(msg)
664
664
665 features = FEATURES_BY_VERSION[self._format_version]
665 features = FEATURES_BY_VERSION[self._format_version]
666 self._inline = features[b'inline'](self._format_flags)
666 self._inline = features[b'inline'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
668 self.hassidedata = features[b'sidedata']
668 self.hassidedata = features[b'sidedata']
669
669
670 if not features[b'docket']:
670 if not features[b'docket']:
671 self._indexfile = entry_point
671 self._indexfile = entry_point
672 index_data = entry_data
672 index_data = entry_data
673 else:
673 else:
674 self._docket_file = entry_point
674 self._docket_file = entry_point
675 if self._initempty:
675 if self._initempty:
676 self._docket = docketutil.default_docket(self, header)
676 self._docket = docketutil.default_docket(self, header)
677 else:
677 else:
678 self._docket = docketutil.parse_docket(
678 self._docket = docketutil.parse_docket(
679 self, entry_data, use_pending=self._trypending
679 self, entry_data, use_pending=self._trypending
680 )
680 )
681
681
682 if self._docket is not None:
682 if self._docket is not None:
683 self._indexfile = self._docket.index_filepath()
683 self._indexfile = self._docket.index_filepath()
684 index_data = b''
684 index_data = b''
685 index_size = self._docket.index_end
685 index_size = self._docket.index_end
686 if index_size > 0:
686 if index_size > 0:
687 index_data = self._get_data(
687 index_data = self._get_data(
688 self._indexfile, mmapindexthreshold, size=index_size
688 self._indexfile, mmapindexthreshold, size=index_size
689 )
689 )
690 if len(index_data) < index_size:
690 if len(index_data) < index_size:
691 msg = _(b'too few index data for %s: got %d, expected %d')
691 msg = _(b'too few index data for %s: got %d, expected %d')
692 msg %= (self.display_id, len(index_data), index_size)
692 msg %= (self.display_id, len(index_data), index_size)
693 raise error.RevlogError(msg)
693 raise error.RevlogError(msg)
694
694
695 self._inline = False
695 self._inline = False
696 # generaldelta implied by version 2 revlogs.
696 # generaldelta implied by version 2 revlogs.
697 self._generaldelta = True
697 self._generaldelta = True
698 # the logic for persistent nodemap will be dealt with within the
698 # the logic for persistent nodemap will be dealt with within the
699 # main docket, so disable it for now.
699 # main docket, so disable it for now.
700 self._nodemap_file = None
700 self._nodemap_file = None
701
701
702 if self._docket is not None:
702 if self._docket is not None:
703 self._datafile = self._docket.data_filepath()
703 self._datafile = self._docket.data_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
705 elif self.postfix is None:
705 elif self.postfix is None:
706 self._datafile = b'%s.d' % self.radix
706 self._datafile = b'%s.d' % self.radix
707 else:
707 else:
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
709
709
710 self.nodeconstants = sha1nodeconstants
710 self.nodeconstants = sha1nodeconstants
711 self.nullid = self.nodeconstants.nullid
711 self.nullid = self.nodeconstants.nullid
712
712
713 # sparse-revlog can't be on without general-delta (issue6056)
713 # sparse-revlog can't be on without general-delta (issue6056)
714 if not self._generaldelta:
714 if not self._generaldelta:
715 self._sparserevlog = False
715 self._sparserevlog = False
716
716
717 self._storedeltachains = True
717 self._storedeltachains = True
718
718
719 devel_nodemap = (
719 devel_nodemap = (
720 self._nodemap_file
720 self._nodemap_file
721 and force_nodemap
721 and force_nodemap
722 and parse_index_v1_nodemap is not None
722 and parse_index_v1_nodemap is not None
723 )
723 )
724
724
725 use_rust_index = False
725 use_rust_index = False
726 if rustrevlog is not None:
726 if rustrevlog is not None:
727 if self._nodemap_file is not None:
727 if self._nodemap_file is not None:
728 use_rust_index = True
728 use_rust_index = True
729 else:
729 else:
730 use_rust_index = self.opener.options.get(b'rust.index')
730 use_rust_index = self.opener.options.get(b'rust.index')
731
731
732 self._parse_index = parse_index_v1
732 self._parse_index = parse_index_v1
733 if self._format_version == REVLOGV0:
733 if self._format_version == REVLOGV0:
734 self._parse_index = revlogv0.parse_index_v0
734 self._parse_index = revlogv0.parse_index_v0
735 elif self._format_version == REVLOGV2:
735 elif self._format_version == REVLOGV2:
736 self._parse_index = parse_index_v2
736 self._parse_index = parse_index_v2
737 elif self._format_version == CHANGELOGV2:
737 elif self._format_version == CHANGELOGV2:
738 self._parse_index = parse_index_cl_v2
738 self._parse_index = parse_index_cl_v2
739 elif devel_nodemap:
739 elif devel_nodemap:
740 self._parse_index = parse_index_v1_nodemap
740 self._parse_index = parse_index_v1_nodemap
741 elif use_rust_index:
741 elif use_rust_index:
742 self._parse_index = parse_index_v1_mixed
742 self._parse_index = parse_index_v1_mixed
743 try:
743 try:
744 d = self._parse_index(index_data, self._inline)
744 d = self._parse_index(index_data, self._inline)
745 index, chunkcache = d
745 index, chunkcache = d
746 use_nodemap = (
746 use_nodemap = (
747 not self._inline
747 not self._inline
748 and self._nodemap_file is not None
748 and self._nodemap_file is not None
749 and hasattr(index, 'update_nodemap_data')
749 and hasattr(index, 'update_nodemap_data')
750 )
750 )
751 if use_nodemap:
751 if use_nodemap:
752 nodemap_data = nodemaputil.persisted_data(self)
752 nodemap_data = nodemaputil.persisted_data(self)
753 if nodemap_data is not None:
753 if nodemap_data is not None:
754 docket = nodemap_data[0]
754 docket = nodemap_data[0]
755 if (
755 if (
756 len(d[0]) > docket.tip_rev
756 len(d[0]) > docket.tip_rev
757 and d[0][docket.tip_rev][7] == docket.tip_node
757 and d[0][docket.tip_rev][7] == docket.tip_node
758 ):
758 ):
759 # no changelog tampering
759 # no changelog tampering
760 self._nodemap_docket = docket
760 self._nodemap_docket = docket
761 index.update_nodemap_data(*nodemap_data)
761 index.update_nodemap_data(*nodemap_data)
762 except (ValueError, IndexError):
762 except (ValueError, IndexError):
763 raise error.RevlogError(
763 raise error.RevlogError(
764 _(b"index %s is corrupted") % self.display_id
764 _(b"index %s is corrupted") % self.display_id
765 )
765 )
766 self.index = index
766 self.index = index
767 self._segmentfile = randomaccessfile.randomaccessfile(
767 self._segmentfile = randomaccessfile.randomaccessfile(
768 self.opener,
768 self.opener,
769 (self._indexfile if self._inline else self._datafile),
769 (self._indexfile if self._inline else self._datafile),
770 self._chunkcachesize,
770 self._chunkcachesize,
771 chunkcache,
771 chunkcache,
772 )
772 )
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
774 self.opener,
774 self.opener,
775 self._sidedatafile,
775 self._sidedatafile,
776 self._chunkcachesize,
776 self._chunkcachesize,
777 )
777 )
778 # revnum -> (chain-length, sum-delta-length)
778 # revnum -> (chain-length, sum-delta-length)
779 self._chaininfocache = util.lrucachedict(500)
779 self._chaininfocache = util.lrucachedict(500)
780 # revlog header -> revlog compressor
780 # revlog header -> revlog compressor
781 self._decompressors = {}
781 self._decompressors = {}
782
782
783 def get_revlog(self):
783 def get_revlog(self):
784 """simple function to mirror API of other not-really-revlog API"""
784 """simple function to mirror API of other not-really-revlog API"""
785 return self
785 return self
786
786
787 @util.propertycache
787 @util.propertycache
788 def revlog_kind(self):
788 def revlog_kind(self):
789 return self.target[0]
789 return self.target[0]
790
790
791 @util.propertycache
791 @util.propertycache
792 def display_id(self):
792 def display_id(self):
793 """The public facing "ID" of the revlog that we use in message"""
793 """The public facing "ID" of the revlog that we use in message"""
794 if self.revlog_kind == KIND_FILELOG:
794 if self.revlog_kind == KIND_FILELOG:
795 # Reference the file without the "data/" prefix, so it is familiar
795 # Reference the file without the "data/" prefix, so it is familiar
796 # to the user.
796 # to the user.
797 return self.target[1]
797 return self.target[1]
798 else:
798 else:
799 return self.radix
799 return self.radix
800
800
801 def _get_decompressor(self, t):
801 def _get_decompressor(self, t):
802 try:
802 try:
803 compressor = self._decompressors[t]
803 compressor = self._decompressors[t]
804 except KeyError:
804 except KeyError:
805 try:
805 try:
806 engine = util.compengines.forrevlogheader(t)
806 engine = util.compengines.forrevlogheader(t)
807 compressor = engine.revlogcompressor(self._compengineopts)
807 compressor = engine.revlogcompressor(self._compengineopts)
808 self._decompressors[t] = compressor
808 self._decompressors[t] = compressor
809 except KeyError:
809 except KeyError:
810 raise error.RevlogError(
810 raise error.RevlogError(
811 _(b'unknown compression type %s') % binascii.hexlify(t)
811 _(b'unknown compression type %s') % binascii.hexlify(t)
812 )
812 )
813 return compressor
813 return compressor
814
814
815 @util.propertycache
815 @util.propertycache
816 def _compressor(self):
816 def _compressor(self):
817 engine = util.compengines[self._compengine]
817 engine = util.compengines[self._compengine]
818 return engine.revlogcompressor(self._compengineopts)
818 return engine.revlogcompressor(self._compengineopts)
819
819
820 @util.propertycache
820 @util.propertycache
821 def _decompressor(self):
821 def _decompressor(self):
822 """the default decompressor"""
822 """the default decompressor"""
823 if self._docket is None:
823 if self._docket is None:
824 return None
824 return None
825 t = self._docket.default_compression_header
825 t = self._docket.default_compression_header
826 c = self._get_decompressor(t)
826 c = self._get_decompressor(t)
827 return c.decompress
827 return c.decompress
828
828
829 def _indexfp(self):
829 def _indexfp(self):
830 """file object for the revlog's index file"""
830 """file object for the revlog's index file"""
831 return self.opener(self._indexfile, mode=b"r")
831 return self.opener(self._indexfile, mode=b"r")
832
832
833 def __index_write_fp(self):
833 def __index_write_fp(self):
834 # You should not use this directly and use `_writing` instead
834 # You should not use this directly and use `_writing` instead
835 try:
835 try:
836 f = self.opener(
836 f = self.opener(
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
838 )
838 )
839 if self._docket is None:
839 if self._docket is None:
840 f.seek(0, os.SEEK_END)
840 f.seek(0, os.SEEK_END)
841 else:
841 else:
842 f.seek(self._docket.index_end, os.SEEK_SET)
842 f.seek(self._docket.index_end, os.SEEK_SET)
843 return f
843 return f
844 except FileNotFoundError:
844 except FileNotFoundError:
845 return self.opener(
845 return self.opener(
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
847 )
847 )
848
848
849 def __index_new_fp(self):
849 def __index_new_fp(self):
850 # You should not use this unless you are upgrading from inline revlog
850 # You should not use this unless you are upgrading from inline revlog
851 return self.opener(
851 return self.opener(
852 self._indexfile,
852 self._indexfile,
853 mode=b"w",
853 mode=b"w",
854 checkambig=self._checkambig,
854 checkambig=self._checkambig,
855 atomictemp=True,
855 atomictemp=True,
856 )
856 )
857
857
858 def _datafp(self, mode=b'r'):
858 def _datafp(self, mode=b'r'):
859 """file object for the revlog's data file"""
859 """file object for the revlog's data file"""
860 return self.opener(self._datafile, mode=mode)
860 return self.opener(self._datafile, mode=mode)
861
861
862 @contextlib.contextmanager
862 @contextlib.contextmanager
863 def _sidedatareadfp(self):
863 def _sidedatareadfp(self):
864 """file object suitable to read sidedata"""
864 """file object suitable to read sidedata"""
865 if self._writinghandles:
865 if self._writinghandles:
866 yield self._writinghandles[2]
866 yield self._writinghandles[2]
867 else:
867 else:
868 with self.opener(self._sidedatafile) as fp:
868 with self.opener(self._sidedatafile) as fp:
869 yield fp
869 yield fp
870
870
871 def tiprev(self):
871 def tiprev(self):
872 return len(self.index) - 1
872 return len(self.index) - 1
873
873
874 def tip(self):
874 def tip(self):
875 return self.node(self.tiprev())
875 return self.node(self.tiprev())
876
876
877 def __contains__(self, rev):
877 def __contains__(self, rev):
878 return 0 <= rev < len(self)
878 return 0 <= rev < len(self)
879
879
880 def __len__(self):
880 def __len__(self):
881 return len(self.index)
881 return len(self.index)
882
882
883 def __iter__(self):
883 def __iter__(self):
884 return iter(range(len(self)))
884 return iter(range(len(self)))
885
885
886 def revs(self, start=0, stop=None):
886 def revs(self, start=0, stop=None):
887 """iterate over all rev in this revlog (from start to stop)"""
887 """iterate over all rev in this revlog (from start to stop)"""
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
889
889
890 def hasnode(self, node):
890 def hasnode(self, node):
891 try:
891 try:
892 self.rev(node)
892 self.rev(node)
893 return True
893 return True
894 except KeyError:
894 except KeyError:
895 return False
895 return False
896
896
897 def _candelta(self, baserev, rev):
897 def _candelta(self, baserev, rev):
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
899 # Disable delta if either rev requires a content-changing flag
899 # Disable delta if either rev requires a content-changing flag
900 # processor (ex. LFS). This is because such flag processor can alter
900 # processor (ex. LFS). This is because such flag processor can alter
901 # the rawtext content that the delta will be based on, and two clients
901 # the rawtext content that the delta will be based on, and two clients
902 # could have a same revlog node with different flags (i.e. different
902 # could have a same revlog node with different flags (i.e. different
903 # rawtext contents) and the delta could be incompatible.
903 # rawtext contents) and the delta could be incompatible.
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
906 ):
906 ):
907 return False
907 return False
908 return True
908 return True
909
909
910 def update_caches(self, transaction):
910 def update_caches(self, transaction):
911 """update on disk cache
911 """update on disk cache
912
912
913 If a transaction is passed, the update may be delayed to transaction
913 If a transaction is passed, the update may be delayed to transaction
914 commit."""
914 commit."""
915 if self._nodemap_file is not None:
915 if self._nodemap_file is not None:
916 if transaction is None:
916 if transaction is None:
917 nodemaputil.update_persistent_nodemap(self)
917 nodemaputil.update_persistent_nodemap(self)
918 else:
918 else:
919 nodemaputil.setup_persistent_nodemap(transaction, self)
919 nodemaputil.setup_persistent_nodemap(transaction, self)
920
920
921 def clearcaches(self):
921 def clearcaches(self):
922 """Clear in-memory caches"""
922 """Clear in-memory caches"""
923 self._revisioncache = None
923 self._revisioncache = None
924 self._chainbasecache.clear()
924 self._chainbasecache.clear()
925 self._segmentfile.clear_cache()
925 self._segmentfile.clear_cache()
926 self._segmentfile_sidedata.clear_cache()
926 self._segmentfile_sidedata.clear_cache()
927 self._pcache = {}
927 self._pcache = {}
928 self._nodemap_docket = None
928 self._nodemap_docket = None
929 self.index.clearcaches()
929 self.index.clearcaches()
930 # The python code is the one responsible for validating the docket, we
930 # The python code is the one responsible for validating the docket, we
931 # end up having to refresh it here.
931 # end up having to refresh it here.
932 use_nodemap = (
932 use_nodemap = (
933 not self._inline
933 not self._inline
934 and self._nodemap_file is not None
934 and self._nodemap_file is not None
935 and hasattr(self.index, 'update_nodemap_data')
935 and hasattr(self.index, 'update_nodemap_data')
936 )
936 )
937 if use_nodemap:
937 if use_nodemap:
938 nodemap_data = nodemaputil.persisted_data(self)
938 nodemap_data = nodemaputil.persisted_data(self)
939 if nodemap_data is not None:
939 if nodemap_data is not None:
940 self._nodemap_docket = nodemap_data[0]
940 self._nodemap_docket = nodemap_data[0]
941 self.index.update_nodemap_data(*nodemap_data)
941 self.index.update_nodemap_data(*nodemap_data)
942
942
943 def rev(self, node):
943 def rev(self, node):
944 """return the revision number associated with a <nodeid>"""
944 """return the revision number associated with a <nodeid>"""
945 try:
945 try:
946 return self.index.rev(node)
946 return self.index.rev(node)
947 except TypeError:
947 except TypeError:
948 raise
948 raise
949 except error.RevlogError:
949 except error.RevlogError:
950 # parsers.c radix tree lookup failed
950 # parsers.c radix tree lookup failed
951 if (
951 if (
952 node == self.nodeconstants.wdirid
952 node == self.nodeconstants.wdirid
953 or node in self.nodeconstants.wdirfilenodeids
953 or node in self.nodeconstants.wdirfilenodeids
954 ):
954 ):
955 raise error.WdirUnsupported
955 raise error.WdirUnsupported
956 raise error.LookupError(node, self.display_id, _(b'no node'))
956 raise error.LookupError(node, self.display_id, _(b'no node'))
957
957
958 # Accessors for index entries.
958 # Accessors for index entries.
959
959
960 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
960 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
961 # are flags.
961 # are flags.
962 def start(self, rev):
962 def start(self, rev):
963 return int(self.index[rev][0] >> 16)
963 return int(self.index[rev][0] >> 16)
964
964
965 def sidedata_cut_off(self, rev):
965 def sidedata_cut_off(self, rev):
966 sd_cut_off = self.index[rev][8]
966 sd_cut_off = self.index[rev][8]
967 if sd_cut_off != 0:
967 if sd_cut_off != 0:
968 return sd_cut_off
968 return sd_cut_off
969 # This is some annoying dance, because entries without sidedata
969 # This is some annoying dance, because entries without sidedata
970 # currently use 0 as their ofsset. (instead of previous-offset +
970 # currently use 0 as their ofsset. (instead of previous-offset +
971 # previous-size)
971 # previous-size)
972 #
972 #
973 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
973 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
974 # In the meantime, we need this.
974 # In the meantime, we need this.
975 while 0 <= rev:
975 while 0 <= rev:
976 e = self.index[rev]
976 e = self.index[rev]
977 if e[9] != 0:
977 if e[9] != 0:
978 return e[8] + e[9]
978 return e[8] + e[9]
979 rev -= 1
979 rev -= 1
980 return 0
980 return 0
981
981
982 def flags(self, rev):
982 def flags(self, rev):
983 return self.index[rev][0] & 0xFFFF
983 return self.index[rev][0] & 0xFFFF
984
984
985 def length(self, rev):
985 def length(self, rev):
986 return self.index[rev][1]
986 return self.index[rev][1]
987
987
988 def sidedata_length(self, rev):
988 def sidedata_length(self, rev):
989 if not self.hassidedata:
989 if not self.hassidedata:
990 return 0
990 return 0
991 return self.index[rev][9]
991 return self.index[rev][9]
992
992
993 def rawsize(self, rev):
993 def rawsize(self, rev):
994 """return the length of the uncompressed text for a given revision"""
994 """return the length of the uncompressed text for a given revision"""
995 l = self.index[rev][2]
995 l = self.index[rev][2]
996 if l >= 0:
996 if l >= 0:
997 return l
997 return l
998
998
999 t = self.rawdata(rev)
999 t = self.rawdata(rev)
1000 return len(t)
1000 return len(t)
1001
1001
1002 def size(self, rev):
1002 def size(self, rev):
1003 """length of non-raw text (processed by a "read" flag processor)"""
1003 """length of non-raw text (processed by a "read" flag processor)"""
1004 # fast path: if no "read" flag processor could change the content,
1004 # fast path: if no "read" flag processor could change the content,
1005 # size is rawsize. note: ELLIPSIS is known to not change the content.
1005 # size is rawsize. note: ELLIPSIS is known to not change the content.
1006 flags = self.flags(rev)
1006 flags = self.flags(rev)
1007 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1007 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1008 return self.rawsize(rev)
1008 return self.rawsize(rev)
1009
1009
1010 return len(self.revision(rev))
1010 return len(self.revision(rev))
1011
1011
1012 def fast_rank(self, rev):
1012 def fast_rank(self, rev):
1013 """Return the rank of a revision if already known, or None otherwise.
1013 """Return the rank of a revision if already known, or None otherwise.
1014
1014
1015 The rank of a revision is the size of the sub-graph it defines as a
1015 The rank of a revision is the size of the sub-graph it defines as a
1016 head. Equivalently, the rank of a revision `r` is the size of the set
1016 head. Equivalently, the rank of a revision `r` is the size of the set
1017 `ancestors(r)`, `r` included.
1017 `ancestors(r)`, `r` included.
1018
1018
1019 This method returns the rank retrieved from the revlog in constant
1019 This method returns the rank retrieved from the revlog in constant
1020 time. It makes no attempt at computing unknown values for versions of
1020 time. It makes no attempt at computing unknown values for versions of
1021 the revlog which do not persist the rank.
1021 the revlog which do not persist the rank.
1022 """
1022 """
1023 rank = self.index[rev][ENTRY_RANK]
1023 rank = self.index[rev][ENTRY_RANK]
1024 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1024 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1025 return None
1025 return None
1026 if rev == nullrev:
1026 if rev == nullrev:
1027 return 0 # convention
1027 return 0 # convention
1028 return rank
1028 return rank
1029
1029
1030 def chainbase(self, rev):
1030 def chainbase(self, rev):
1031 base = self._chainbasecache.get(rev)
1031 base = self._chainbasecache.get(rev)
1032 if base is not None:
1032 if base is not None:
1033 return base
1033 return base
1034
1034
1035 index = self.index
1035 index = self.index
1036 iterrev = rev
1036 iterrev = rev
1037 base = index[iterrev][3]
1037 base = index[iterrev][3]
1038 while base != iterrev:
1038 while base != iterrev:
1039 iterrev = base
1039 iterrev = base
1040 base = index[iterrev][3]
1040 base = index[iterrev][3]
1041
1041
1042 self._chainbasecache[rev] = base
1042 self._chainbasecache[rev] = base
1043 return base
1043 return base
1044
1044
1045 def linkrev(self, rev):
1045 def linkrev(self, rev):
1046 return self.index[rev][4]
1046 return self.index[rev][4]
1047
1047
1048 def parentrevs(self, rev):
1048 def parentrevs(self, rev):
1049 try:
1049 try:
1050 entry = self.index[rev]
1050 entry = self.index[rev]
1051 except IndexError:
1051 except IndexError:
1052 if rev == wdirrev:
1052 if rev == wdirrev:
1053 raise error.WdirUnsupported
1053 raise error.WdirUnsupported
1054 raise
1054 raise
1055
1055
1056 if self.canonical_parent_order and entry[5] == nullrev:
1056 if self.canonical_parent_order and entry[5] == nullrev:
1057 return entry[6], entry[5]
1057 return entry[6], entry[5]
1058 else:
1058 else:
1059 return entry[5], entry[6]
1059 return entry[5], entry[6]
1060
1060
1061 # fast parentrevs(rev) where rev isn't filtered
1061 # fast parentrevs(rev) where rev isn't filtered
1062 _uncheckedparentrevs = parentrevs
1062 _uncheckedparentrevs = parentrevs
1063
1063
1064 def node(self, rev):
1064 def node(self, rev):
1065 try:
1065 try:
1066 return self.index[rev][7]
1066 return self.index[rev][7]
1067 except IndexError:
1067 except IndexError:
1068 if rev == wdirrev:
1068 if rev == wdirrev:
1069 raise error.WdirUnsupported
1069 raise error.WdirUnsupported
1070 raise
1070 raise
1071
1071
1072 # Derived from index values.
1072 # Derived from index values.
1073
1073
1074 def end(self, rev):
1074 def end(self, rev):
1075 return self.start(rev) + self.length(rev)
1075 return self.start(rev) + self.length(rev)
1076
1076
1077 def parents(self, node):
1077 def parents(self, node):
1078 i = self.index
1078 i = self.index
1079 d = i[self.rev(node)]
1079 d = i[self.rev(node)]
1080 # inline node() to avoid function call overhead
1080 # inline node() to avoid function call overhead
1081 if self.canonical_parent_order and d[5] == self.nullid:
1081 if self.canonical_parent_order and d[5] == self.nullid:
1082 return i[d[6]][7], i[d[5]][7]
1082 return i[d[6]][7], i[d[5]][7]
1083 else:
1083 else:
1084 return i[d[5]][7], i[d[6]][7]
1084 return i[d[5]][7], i[d[6]][7]
1085
1085
1086 def chainlen(self, rev):
1086 def chainlen(self, rev):
1087 return self._chaininfo(rev)[0]
1087 return self._chaininfo(rev)[0]
1088
1088
1089 def _chaininfo(self, rev):
1089 def _chaininfo(self, rev):
1090 chaininfocache = self._chaininfocache
1090 chaininfocache = self._chaininfocache
1091 if rev in chaininfocache:
1091 if rev in chaininfocache:
1092 return chaininfocache[rev]
1092 return chaininfocache[rev]
1093 index = self.index
1093 index = self.index
1094 generaldelta = self._generaldelta
1094 generaldelta = self._generaldelta
1095 iterrev = rev
1095 iterrev = rev
1096 e = index[iterrev]
1096 e = index[iterrev]
1097 clen = 0
1097 clen = 0
1098 compresseddeltalen = 0
1098 compresseddeltalen = 0
1099 while iterrev != e[3]:
1099 while iterrev != e[3]:
1100 clen += 1
1100 clen += 1
1101 compresseddeltalen += e[1]
1101 compresseddeltalen += e[1]
1102 if generaldelta:
1102 if generaldelta:
1103 iterrev = e[3]
1103 iterrev = e[3]
1104 else:
1104 else:
1105 iterrev -= 1
1105 iterrev -= 1
1106 if iterrev in chaininfocache:
1106 if iterrev in chaininfocache:
1107 t = chaininfocache[iterrev]
1107 t = chaininfocache[iterrev]
1108 clen += t[0]
1108 clen += t[0]
1109 compresseddeltalen += t[1]
1109 compresseddeltalen += t[1]
1110 break
1110 break
1111 e = index[iterrev]
1111 e = index[iterrev]
1112 else:
1112 else:
1113 # Add text length of base since decompressing that also takes
1113 # Add text length of base since decompressing that also takes
1114 # work. For cache hits the length is already included.
1114 # work. For cache hits the length is already included.
1115 compresseddeltalen += e[1]
1115 compresseddeltalen += e[1]
1116 r = (clen, compresseddeltalen)
1116 r = (clen, compresseddeltalen)
1117 chaininfocache[rev] = r
1117 chaininfocache[rev] = r
1118 return r
1118 return r
1119
1119
1120 def _deltachain(self, rev, stoprev=None):
1120 def _deltachain(self, rev, stoprev=None):
1121 """Obtain the delta chain for a revision.
1121 """Obtain the delta chain for a revision.
1122
1122
1123 ``stoprev`` specifies a revision to stop at. If not specified, we
1123 ``stoprev`` specifies a revision to stop at. If not specified, we
1124 stop at the base of the chain.
1124 stop at the base of the chain.
1125
1125
1126 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1126 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1127 revs in ascending order and ``stopped`` is a bool indicating whether
1127 revs in ascending order and ``stopped`` is a bool indicating whether
1128 ``stoprev`` was hit.
1128 ``stoprev`` was hit.
1129 """
1129 """
1130 # Try C implementation.
1130 # Try C implementation.
1131 try:
1131 try:
1132 return self.index.deltachain(rev, stoprev, self._generaldelta)
1132 return self.index.deltachain(rev, stoprev, self._generaldelta)
1133 except AttributeError:
1133 except AttributeError:
1134 pass
1134 pass
1135
1135
1136 chain = []
1136 chain = []
1137
1137
1138 # Alias to prevent attribute lookup in tight loop.
1138 # Alias to prevent attribute lookup in tight loop.
1139 index = self.index
1139 index = self.index
1140 generaldelta = self._generaldelta
1140 generaldelta = self._generaldelta
1141
1141
1142 iterrev = rev
1142 iterrev = rev
1143 e = index[iterrev]
1143 e = index[iterrev]
1144 while iterrev != e[3] and iterrev != stoprev:
1144 while iterrev != e[3] and iterrev != stoprev:
1145 chain.append(iterrev)
1145 chain.append(iterrev)
1146 if generaldelta:
1146 if generaldelta:
1147 iterrev = e[3]
1147 iterrev = e[3]
1148 else:
1148 else:
1149 iterrev -= 1
1149 iterrev -= 1
1150 e = index[iterrev]
1150 e = index[iterrev]
1151
1151
1152 if iterrev == stoprev:
1152 if iterrev == stoprev:
1153 stopped = True
1153 stopped = True
1154 else:
1154 else:
1155 chain.append(iterrev)
1155 chain.append(iterrev)
1156 stopped = False
1156 stopped = False
1157
1157
1158 chain.reverse()
1158 chain.reverse()
1159 return chain, stopped
1159 return chain, stopped
1160
1160
1161 def ancestors(self, revs, stoprev=0, inclusive=False):
1161 def ancestors(self, revs, stoprev=0, inclusive=False):
1162 """Generate the ancestors of 'revs' in reverse revision order.
1162 """Generate the ancestors of 'revs' in reverse revision order.
1163 Does not generate revs lower than stoprev.
1163 Does not generate revs lower than stoprev.
1164
1164
1165 See the documentation for ancestor.lazyancestors for more details."""
1165 See the documentation for ancestor.lazyancestors for more details."""
1166
1166
1167 # first, make sure start revisions aren't filtered
1167 # first, make sure start revisions aren't filtered
1168 revs = list(revs)
1168 revs = list(revs)
1169 checkrev = self.node
1169 checkrev = self.node
1170 for r in revs:
1170 for r in revs:
1171 checkrev(r)
1171 checkrev(r)
1172 # and we're sure ancestors aren't filtered as well
1172 # and we're sure ancestors aren't filtered as well
1173
1173
1174 if rustancestor is not None and self.index.rust_ext_compat:
1174 if rustancestor is not None and self.index.rust_ext_compat:
1175 lazyancestors = rustancestor.LazyAncestors
1175 lazyancestors = rustancestor.LazyAncestors
1176 arg = self.index
1176 arg = self.index
1177 else:
1177 else:
1178 lazyancestors = ancestor.lazyancestors
1178 lazyancestors = ancestor.lazyancestors
1179 arg = self._uncheckedparentrevs
1179 arg = self._uncheckedparentrevs
1180 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1180 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1181
1181
1182 def descendants(self, revs):
1182 def descendants(self, revs):
1183 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1183 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1184
1184
1185 def findcommonmissing(self, common=None, heads=None):
1185 def findcommonmissing(self, common=None, heads=None):
1186 """Return a tuple of the ancestors of common and the ancestors of heads
1186 """Return a tuple of the ancestors of common and the ancestors of heads
1187 that are not ancestors of common. In revset terminology, we return the
1187 that are not ancestors of common. In revset terminology, we return the
1188 tuple:
1188 tuple:
1189
1189
1190 ::common, (::heads) - (::common)
1190 ::common, (::heads) - (::common)
1191
1191
1192 The list is sorted by revision number, meaning it is
1192 The list is sorted by revision number, meaning it is
1193 topologically sorted.
1193 topologically sorted.
1194
1194
1195 'heads' and 'common' are both lists of node IDs. If heads is
1195 'heads' and 'common' are both lists of node IDs. If heads is
1196 not supplied, uses all of the revlog's heads. If common is not
1196 not supplied, uses all of the revlog's heads. If common is not
1197 supplied, uses nullid."""
1197 supplied, uses nullid."""
1198 if common is None:
1198 if common is None:
1199 common = [self.nullid]
1199 common = [self.nullid]
1200 if heads is None:
1200 if heads is None:
1201 heads = self.heads()
1201 heads = self.heads()
1202
1202
1203 common = [self.rev(n) for n in common]
1203 common = [self.rev(n) for n in common]
1204 heads = [self.rev(n) for n in heads]
1204 heads = [self.rev(n) for n in heads]
1205
1205
1206 # we want the ancestors, but inclusive
1206 # we want the ancestors, but inclusive
1207 class lazyset:
1207 class lazyset:
1208 def __init__(self, lazyvalues):
1208 def __init__(self, lazyvalues):
1209 self.addedvalues = set()
1209 self.addedvalues = set()
1210 self.lazyvalues = lazyvalues
1210 self.lazyvalues = lazyvalues
1211
1211
1212 def __contains__(self, value):
1212 def __contains__(self, value):
1213 return value in self.addedvalues or value in self.lazyvalues
1213 return value in self.addedvalues or value in self.lazyvalues
1214
1214
1215 def __iter__(self):
1215 def __iter__(self):
1216 added = self.addedvalues
1216 added = self.addedvalues
1217 for r in added:
1217 for r in added:
1218 yield r
1218 yield r
1219 for r in self.lazyvalues:
1219 for r in self.lazyvalues:
1220 if not r in added:
1220 if not r in added:
1221 yield r
1221 yield r
1222
1222
1223 def add(self, value):
1223 def add(self, value):
1224 self.addedvalues.add(value)
1224 self.addedvalues.add(value)
1225
1225
1226 def update(self, values):
1226 def update(self, values):
1227 self.addedvalues.update(values)
1227 self.addedvalues.update(values)
1228
1228
1229 has = lazyset(self.ancestors(common))
1229 has = lazyset(self.ancestors(common))
1230 has.add(nullrev)
1230 has.add(nullrev)
1231 has.update(common)
1231 has.update(common)
1232
1232
1233 # take all ancestors from heads that aren't in has
1233 # take all ancestors from heads that aren't in has
1234 missing = set()
1234 missing = set()
1235 visit = collections.deque(r for r in heads if r not in has)
1235 visit = collections.deque(r for r in heads if r not in has)
1236 while visit:
1236 while visit:
1237 r = visit.popleft()
1237 r = visit.popleft()
1238 if r in missing:
1238 if r in missing:
1239 continue
1239 continue
1240 else:
1240 else:
1241 missing.add(r)
1241 missing.add(r)
1242 for p in self.parentrevs(r):
1242 for p in self.parentrevs(r):
1243 if p not in has:
1243 if p not in has:
1244 visit.append(p)
1244 visit.append(p)
1245 missing = list(missing)
1245 missing = list(missing)
1246 missing.sort()
1246 missing.sort()
1247 return has, [self.node(miss) for miss in missing]
1247 return has, [self.node(miss) for miss in missing]
1248
1248
1249 def incrementalmissingrevs(self, common=None):
1249 def incrementalmissingrevs(self, common=None):
1250 """Return an object that can be used to incrementally compute the
1250 """Return an object that can be used to incrementally compute the
1251 revision numbers of the ancestors of arbitrary sets that are not
1251 revision numbers of the ancestors of arbitrary sets that are not
1252 ancestors of common. This is an ancestor.incrementalmissingancestors
1252 ancestors of common. This is an ancestor.incrementalmissingancestors
1253 object.
1253 object.
1254
1254
1255 'common' is a list of revision numbers. If common is not supplied, uses
1255 'common' is a list of revision numbers. If common is not supplied, uses
1256 nullrev.
1256 nullrev.
1257 """
1257 """
1258 if common is None:
1258 if common is None:
1259 common = [nullrev]
1259 common = [nullrev]
1260
1260
1261 if rustancestor is not None and self.index.rust_ext_compat:
1261 if rustancestor is not None and self.index.rust_ext_compat:
1262 return rustancestor.MissingAncestors(self.index, common)
1262 return rustancestor.MissingAncestors(self.index, common)
1263 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1263 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1264
1264
1265 def findmissingrevs(self, common=None, heads=None):
1265 def findmissingrevs(self, common=None, heads=None):
1266 """Return the revision numbers of the ancestors of heads that
1266 """Return the revision numbers of the ancestors of heads that
1267 are not ancestors of common.
1267 are not ancestors of common.
1268
1268
1269 More specifically, return a list of revision numbers corresponding to
1269 More specifically, return a list of revision numbers corresponding to
1270 nodes N such that every N satisfies the following constraints:
1270 nodes N such that every N satisfies the following constraints:
1271
1271
1272 1. N is an ancestor of some node in 'heads'
1272 1. N is an ancestor of some node in 'heads'
1273 2. N is not an ancestor of any node in 'common'
1273 2. N is not an ancestor of any node in 'common'
1274
1274
1275 The list is sorted by revision number, meaning it is
1275 The list is sorted by revision number, meaning it is
1276 topologically sorted.
1276 topologically sorted.
1277
1277
1278 'heads' and 'common' are both lists of revision numbers. If heads is
1278 'heads' and 'common' are both lists of revision numbers. If heads is
1279 not supplied, uses all of the revlog's heads. If common is not
1279 not supplied, uses all of the revlog's heads. If common is not
1280 supplied, uses nullid."""
1280 supplied, uses nullid."""
1281 if common is None:
1281 if common is None:
1282 common = [nullrev]
1282 common = [nullrev]
1283 if heads is None:
1283 if heads is None:
1284 heads = self.headrevs()
1284 heads = self.headrevs()
1285
1285
1286 inc = self.incrementalmissingrevs(common=common)
1286 inc = self.incrementalmissingrevs(common=common)
1287 return inc.missingancestors(heads)
1287 return inc.missingancestors(heads)
1288
1288
1289 def findmissing(self, common=None, heads=None):
1289 def findmissing(self, common=None, heads=None):
1290 """Return the ancestors of heads that are not ancestors of common.
1290 """Return the ancestors of heads that are not ancestors of common.
1291
1291
1292 More specifically, return a list of nodes N such that every N
1292 More specifically, return a list of nodes N such that every N
1293 satisfies the following constraints:
1293 satisfies the following constraints:
1294
1294
1295 1. N is an ancestor of some node in 'heads'
1295 1. N is an ancestor of some node in 'heads'
1296 2. N is not an ancestor of any node in 'common'
1296 2. N is not an ancestor of any node in 'common'
1297
1297
1298 The list is sorted by revision number, meaning it is
1298 The list is sorted by revision number, meaning it is
1299 topologically sorted.
1299 topologically sorted.
1300
1300
1301 'heads' and 'common' are both lists of node IDs. If heads is
1301 'heads' and 'common' are both lists of node IDs. If heads is
1302 not supplied, uses all of the revlog's heads. If common is not
1302 not supplied, uses all of the revlog's heads. If common is not
1303 supplied, uses nullid."""
1303 supplied, uses nullid."""
1304 if common is None:
1304 if common is None:
1305 common = [self.nullid]
1305 common = [self.nullid]
1306 if heads is None:
1306 if heads is None:
1307 heads = self.heads()
1307 heads = self.heads()
1308
1308
1309 common = [self.rev(n) for n in common]
1309 common = [self.rev(n) for n in common]
1310 heads = [self.rev(n) for n in heads]
1310 heads = [self.rev(n) for n in heads]
1311
1311
1312 inc = self.incrementalmissingrevs(common=common)
1312 inc = self.incrementalmissingrevs(common=common)
1313 return [self.node(r) for r in inc.missingancestors(heads)]
1313 return [self.node(r) for r in inc.missingancestors(heads)]
1314
1314
1315 def nodesbetween(self, roots=None, heads=None):
1315 def nodesbetween(self, roots=None, heads=None):
1316 """Return a topological path from 'roots' to 'heads'.
1316 """Return a topological path from 'roots' to 'heads'.
1317
1317
1318 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1318 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1319 topologically sorted list of all nodes N that satisfy both of
1319 topologically sorted list of all nodes N that satisfy both of
1320 these constraints:
1320 these constraints:
1321
1321
1322 1. N is a descendant of some node in 'roots'
1322 1. N is a descendant of some node in 'roots'
1323 2. N is an ancestor of some node in 'heads'
1323 2. N is an ancestor of some node in 'heads'
1324
1324
1325 Every node is considered to be both a descendant and an ancestor
1325 Every node is considered to be both a descendant and an ancestor
1326 of itself, so every reachable node in 'roots' and 'heads' will be
1326 of itself, so every reachable node in 'roots' and 'heads' will be
1327 included in 'nodes'.
1327 included in 'nodes'.
1328
1328
1329 'outroots' is the list of reachable nodes in 'roots', i.e., the
1329 'outroots' is the list of reachable nodes in 'roots', i.e., the
1330 subset of 'roots' that is returned in 'nodes'. Likewise,
1330 subset of 'roots' that is returned in 'nodes'. Likewise,
1331 'outheads' is the subset of 'heads' that is also in 'nodes'.
1331 'outheads' is the subset of 'heads' that is also in 'nodes'.
1332
1332
1333 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1333 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1334 unspecified, uses nullid as the only root. If 'heads' is
1334 unspecified, uses nullid as the only root. If 'heads' is
1335 unspecified, uses list of all of the revlog's heads."""
1335 unspecified, uses list of all of the revlog's heads."""
1336 nonodes = ([], [], [])
1336 nonodes = ([], [], [])
1337 if roots is not None:
1337 if roots is not None:
1338 roots = list(roots)
1338 roots = list(roots)
1339 if not roots:
1339 if not roots:
1340 return nonodes
1340 return nonodes
1341 lowestrev = min([self.rev(n) for n in roots])
1341 lowestrev = min([self.rev(n) for n in roots])
1342 else:
1342 else:
1343 roots = [self.nullid] # Everybody's a descendant of nullid
1343 roots = [self.nullid] # Everybody's a descendant of nullid
1344 lowestrev = nullrev
1344 lowestrev = nullrev
1345 if (lowestrev == nullrev) and (heads is None):
1345 if (lowestrev == nullrev) and (heads is None):
1346 # We want _all_ the nodes!
1346 # We want _all_ the nodes!
1347 return (
1347 return (
1348 [self.node(r) for r in self],
1348 [self.node(r) for r in self],
1349 [self.nullid],
1349 [self.nullid],
1350 list(self.heads()),
1350 list(self.heads()),
1351 )
1351 )
1352 if heads is None:
1352 if heads is None:
1353 # All nodes are ancestors, so the latest ancestor is the last
1353 # All nodes are ancestors, so the latest ancestor is the last
1354 # node.
1354 # node.
1355 highestrev = len(self) - 1
1355 highestrev = len(self) - 1
1356 # Set ancestors to None to signal that every node is an ancestor.
1356 # Set ancestors to None to signal that every node is an ancestor.
1357 ancestors = None
1357 ancestors = None
1358 # Set heads to an empty dictionary for later discovery of heads
1358 # Set heads to an empty dictionary for later discovery of heads
1359 heads = {}
1359 heads = {}
1360 else:
1360 else:
1361 heads = list(heads)
1361 heads = list(heads)
1362 if not heads:
1362 if not heads:
1363 return nonodes
1363 return nonodes
1364 ancestors = set()
1364 ancestors = set()
1365 # Turn heads into a dictionary so we can remove 'fake' heads.
1365 # Turn heads into a dictionary so we can remove 'fake' heads.
1366 # Also, later we will be using it to filter out the heads we can't
1366 # Also, later we will be using it to filter out the heads we can't
1367 # find from roots.
1367 # find from roots.
1368 heads = dict.fromkeys(heads, False)
1368 heads = dict.fromkeys(heads, False)
1369 # Start at the top and keep marking parents until we're done.
1369 # Start at the top and keep marking parents until we're done.
1370 nodestotag = set(heads)
1370 nodestotag = set(heads)
1371 # Remember where the top was so we can use it as a limit later.
1371 # Remember where the top was so we can use it as a limit later.
1372 highestrev = max([self.rev(n) for n in nodestotag])
1372 highestrev = max([self.rev(n) for n in nodestotag])
1373 while nodestotag:
1373 while nodestotag:
1374 # grab a node to tag
1374 # grab a node to tag
1375 n = nodestotag.pop()
1375 n = nodestotag.pop()
1376 # Never tag nullid
1376 # Never tag nullid
1377 if n == self.nullid:
1377 if n == self.nullid:
1378 continue
1378 continue
1379 # A node's revision number represents its place in a
1379 # A node's revision number represents its place in a
1380 # topologically sorted list of nodes.
1380 # topologically sorted list of nodes.
1381 r = self.rev(n)
1381 r = self.rev(n)
1382 if r >= lowestrev:
1382 if r >= lowestrev:
1383 if n not in ancestors:
1383 if n not in ancestors:
1384 # If we are possibly a descendant of one of the roots
1384 # If we are possibly a descendant of one of the roots
1385 # and we haven't already been marked as an ancestor
1385 # and we haven't already been marked as an ancestor
1386 ancestors.add(n) # Mark as ancestor
1386 ancestors.add(n) # Mark as ancestor
1387 # Add non-nullid parents to list of nodes to tag.
1387 # Add non-nullid parents to list of nodes to tag.
1388 nodestotag.update(
1388 nodestotag.update(
1389 [p for p in self.parents(n) if p != self.nullid]
1389 [p for p in self.parents(n) if p != self.nullid]
1390 )
1390 )
1391 elif n in heads: # We've seen it before, is it a fake head?
1391 elif n in heads: # We've seen it before, is it a fake head?
1392 # So it is, real heads should not be the ancestors of
1392 # So it is, real heads should not be the ancestors of
1393 # any other heads.
1393 # any other heads.
1394 heads.pop(n)
1394 heads.pop(n)
1395 if not ancestors:
1395 if not ancestors:
1396 return nonodes
1396 return nonodes
1397 # Now that we have our set of ancestors, we want to remove any
1397 # Now that we have our set of ancestors, we want to remove any
1398 # roots that are not ancestors.
1398 # roots that are not ancestors.
1399
1399
1400 # If one of the roots was nullid, everything is included anyway.
1400 # If one of the roots was nullid, everything is included anyway.
1401 if lowestrev > nullrev:
1401 if lowestrev > nullrev:
1402 # But, since we weren't, let's recompute the lowest rev to not
1402 # But, since we weren't, let's recompute the lowest rev to not
1403 # include roots that aren't ancestors.
1403 # include roots that aren't ancestors.
1404
1404
1405 # Filter out roots that aren't ancestors of heads
1405 # Filter out roots that aren't ancestors of heads
1406 roots = [root for root in roots if root in ancestors]
1406 roots = [root for root in roots if root in ancestors]
1407 # Recompute the lowest revision
1407 # Recompute the lowest revision
1408 if roots:
1408 if roots:
1409 lowestrev = min([self.rev(root) for root in roots])
1409 lowestrev = min([self.rev(root) for root in roots])
1410 else:
1410 else:
1411 # No more roots? Return empty list
1411 # No more roots? Return empty list
1412 return nonodes
1412 return nonodes
1413 else:
1413 else:
1414 # We are descending from nullid, and don't need to care about
1414 # We are descending from nullid, and don't need to care about
1415 # any other roots.
1415 # any other roots.
1416 lowestrev = nullrev
1416 lowestrev = nullrev
1417 roots = [self.nullid]
1417 roots = [self.nullid]
1418 # Transform our roots list into a set.
1418 # Transform our roots list into a set.
1419 descendants = set(roots)
1419 descendants = set(roots)
1420 # Also, keep the original roots so we can filter out roots that aren't
1420 # Also, keep the original roots so we can filter out roots that aren't
1421 # 'real' roots (i.e. are descended from other roots).
1421 # 'real' roots (i.e. are descended from other roots).
1422 roots = descendants.copy()
1422 roots = descendants.copy()
1423 # Our topologically sorted list of output nodes.
1423 # Our topologically sorted list of output nodes.
1424 orderedout = []
1424 orderedout = []
1425 # Don't start at nullid since we don't want nullid in our output list,
1425 # Don't start at nullid since we don't want nullid in our output list,
1426 # and if nullid shows up in descendants, empty parents will look like
1426 # and if nullid shows up in descendants, empty parents will look like
1427 # they're descendants.
1427 # they're descendants.
1428 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1428 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1429 n = self.node(r)
1429 n = self.node(r)
1430 isdescendant = False
1430 isdescendant = False
1431 if lowestrev == nullrev: # Everybody is a descendant of nullid
1431 if lowestrev == nullrev: # Everybody is a descendant of nullid
1432 isdescendant = True
1432 isdescendant = True
1433 elif n in descendants:
1433 elif n in descendants:
1434 # n is already a descendant
1434 # n is already a descendant
1435 isdescendant = True
1435 isdescendant = True
1436 # This check only needs to be done here because all the roots
1436 # This check only needs to be done here because all the roots
1437 # will start being marked is descendants before the loop.
1437 # will start being marked is descendants before the loop.
1438 if n in roots:
1438 if n in roots:
1439 # If n was a root, check if it's a 'real' root.
1439 # If n was a root, check if it's a 'real' root.
1440 p = tuple(self.parents(n))
1440 p = tuple(self.parents(n))
1441 # If any of its parents are descendants, it's not a root.
1441 # If any of its parents are descendants, it's not a root.
1442 if (p[0] in descendants) or (p[1] in descendants):
1442 if (p[0] in descendants) or (p[1] in descendants):
1443 roots.remove(n)
1443 roots.remove(n)
1444 else:
1444 else:
1445 p = tuple(self.parents(n))
1445 p = tuple(self.parents(n))
1446 # A node is a descendant if either of its parents are
1446 # A node is a descendant if either of its parents are
1447 # descendants. (We seeded the dependents list with the roots
1447 # descendants. (We seeded the dependents list with the roots
1448 # up there, remember?)
1448 # up there, remember?)
1449 if (p[0] in descendants) or (p[1] in descendants):
1449 if (p[0] in descendants) or (p[1] in descendants):
1450 descendants.add(n)
1450 descendants.add(n)
1451 isdescendant = True
1451 isdescendant = True
1452 if isdescendant and ((ancestors is None) or (n in ancestors)):
1452 if isdescendant and ((ancestors is None) or (n in ancestors)):
1453 # Only include nodes that are both descendants and ancestors.
1453 # Only include nodes that are both descendants and ancestors.
1454 orderedout.append(n)
1454 orderedout.append(n)
1455 if (ancestors is not None) and (n in heads):
1455 if (ancestors is not None) and (n in heads):
1456 # We're trying to figure out which heads are reachable
1456 # We're trying to figure out which heads are reachable
1457 # from roots.
1457 # from roots.
1458 # Mark this head as having been reached
1458 # Mark this head as having been reached
1459 heads[n] = True
1459 heads[n] = True
1460 elif ancestors is None:
1460 elif ancestors is None:
1461 # Otherwise, we're trying to discover the heads.
1461 # Otherwise, we're trying to discover the heads.
1462 # Assume this is a head because if it isn't, the next step
1462 # Assume this is a head because if it isn't, the next step
1463 # will eventually remove it.
1463 # will eventually remove it.
1464 heads[n] = True
1464 heads[n] = True
1465 # But, obviously its parents aren't.
1465 # But, obviously its parents aren't.
1466 for p in self.parents(n):
1466 for p in self.parents(n):
1467 heads.pop(p, None)
1467 heads.pop(p, None)
1468 heads = [head for head, flag in heads.items() if flag]
1468 heads = [head for head, flag in heads.items() if flag]
1469 roots = list(roots)
1469 roots = list(roots)
1470 assert orderedout
1470 assert orderedout
1471 assert roots
1471 assert roots
1472 assert heads
1472 assert heads
1473 return (orderedout, roots, heads)
1473 return (orderedout, roots, heads)
1474
1474
1475 def headrevs(self, revs=None):
1475 def headrevs(self, revs=None):
1476 if revs is None:
1476 if revs is None:
1477 try:
1477 try:
1478 return self.index.headrevs()
1478 return self.index.headrevs()
1479 except AttributeError:
1479 except AttributeError:
1480 return self._headrevs()
1480 return self._headrevs()
1481 if rustdagop is not None and self.index.rust_ext_compat:
1481 if rustdagop is not None and self.index.rust_ext_compat:
1482 return rustdagop.headrevs(self.index, revs)
1482 return rustdagop.headrevs(self.index, revs)
1483 return dagop.headrevs(revs, self._uncheckedparentrevs)
1483 return dagop.headrevs(revs, self._uncheckedparentrevs)
1484
1484
1485 def computephases(self, roots):
1485 def computephases(self, roots):
1486 return self.index.computephasesmapsets(roots)
1486 return self.index.computephasesmapsets(roots)
1487
1487
1488 def _headrevs(self):
1488 def _headrevs(self):
1489 count = len(self)
1489 count = len(self)
1490 if not count:
1490 if not count:
1491 return [nullrev]
1491 return [nullrev]
1492 # we won't iter over filtered rev so nobody is a head at start
1492 # we won't iter over filtered rev so nobody is a head at start
1493 ishead = [0] * (count + 1)
1493 ishead = [0] * (count + 1)
1494 index = self.index
1494 index = self.index
1495 for r in self:
1495 for r in self:
1496 ishead[r] = 1 # I may be an head
1496 ishead[r] = 1 # I may be an head
1497 e = index[r]
1497 e = index[r]
1498 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1498 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1499 return [r for r, val in enumerate(ishead) if val]
1499 return [r for r, val in enumerate(ishead) if val]
1500
1500
1501 def heads(self, start=None, stop=None):
1501 def heads(self, start=None, stop=None):
1502 """return the list of all nodes that have no children
1502 """return the list of all nodes that have no children
1503
1503
1504 if start is specified, only heads that are descendants of
1504 if start is specified, only heads that are descendants of
1505 start will be returned
1505 start will be returned
1506 if stop is specified, it will consider all the revs from stop
1506 if stop is specified, it will consider all the revs from stop
1507 as if they had no children
1507 as if they had no children
1508 """
1508 """
1509 if start is None and stop is None:
1509 if start is None and stop is None:
1510 if not len(self):
1510 if not len(self):
1511 return [self.nullid]
1511 return [self.nullid]
1512 return [self.node(r) for r in self.headrevs()]
1512 return [self.node(r) for r in self.headrevs()]
1513
1513
1514 if start is None:
1514 if start is None:
1515 start = nullrev
1515 start = nullrev
1516 else:
1516 else:
1517 start = self.rev(start)
1517 start = self.rev(start)
1518
1518
1519 stoprevs = {self.rev(n) for n in stop or []}
1519 stoprevs = {self.rev(n) for n in stop or []}
1520
1520
1521 revs = dagop.headrevssubset(
1521 revs = dagop.headrevssubset(
1522 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1522 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1523 )
1523 )
1524
1524
1525 return [self.node(rev) for rev in revs]
1525 return [self.node(rev) for rev in revs]
1526
1526
1527 def children(self, node):
1527 def children(self, node):
1528 """find the children of a given node"""
1528 """find the children of a given node"""
1529 c = []
1529 c = []
1530 p = self.rev(node)
1530 p = self.rev(node)
1531 for r in self.revs(start=p + 1):
1531 for r in self.revs(start=p + 1):
1532 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1532 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1533 if prevs:
1533 if prevs:
1534 for pr in prevs:
1534 for pr in prevs:
1535 if pr == p:
1535 if pr == p:
1536 c.append(self.node(r))
1536 c.append(self.node(r))
1537 elif p == nullrev:
1537 elif p == nullrev:
1538 c.append(self.node(r))
1538 c.append(self.node(r))
1539 return c
1539 return c
1540
1540
1541 def commonancestorsheads(self, a, b):
1541 def commonancestorsheads(self, a, b):
1542 """calculate all the heads of the common ancestors of nodes a and b"""
1542 """calculate all the heads of the common ancestors of nodes a and b"""
1543 a, b = self.rev(a), self.rev(b)
1543 a, b = self.rev(a), self.rev(b)
1544 ancs = self._commonancestorsheads(a, b)
1544 ancs = self._commonancestorsheads(a, b)
1545 return pycompat.maplist(self.node, ancs)
1545 return pycompat.maplist(self.node, ancs)
1546
1546
1547 def _commonancestorsheads(self, *revs):
1547 def _commonancestorsheads(self, *revs):
1548 """calculate all the heads of the common ancestors of revs"""
1548 """calculate all the heads of the common ancestors of revs"""
1549 try:
1549 try:
1550 ancs = self.index.commonancestorsheads(*revs)
1550 ancs = self.index.commonancestorsheads(*revs)
1551 except (AttributeError, OverflowError): # C implementation failed
1551 except (AttributeError, OverflowError): # C implementation failed
1552 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1552 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1553 return ancs
1553 return ancs
1554
1554
1555 def isancestor(self, a, b):
1555 def isancestor(self, a, b):
1556 """return True if node a is an ancestor of node b
1556 """return True if node a is an ancestor of node b
1557
1557
1558 A revision is considered an ancestor of itself."""
1558 A revision is considered an ancestor of itself."""
1559 a, b = self.rev(a), self.rev(b)
1559 a, b = self.rev(a), self.rev(b)
1560 return self.isancestorrev(a, b)
1560 return self.isancestorrev(a, b)
1561
1561
1562 def isancestorrev(self, a, b):
1562 def isancestorrev(self, a, b):
1563 """return True if revision a is an ancestor of revision b
1563 """return True if revision a is an ancestor of revision b
1564
1564
1565 A revision is considered an ancestor of itself.
1565 A revision is considered an ancestor of itself.
1566
1566
1567 The implementation of this is trivial but the use of
1567 The implementation of this is trivial but the use of
1568 reachableroots is not."""
1568 reachableroots is not."""
1569 if a == nullrev:
1569 if a == nullrev:
1570 return True
1570 return True
1571 elif a == b:
1571 elif a == b:
1572 return True
1572 return True
1573 elif a > b:
1573 elif a > b:
1574 return False
1574 return False
1575 return bool(self.reachableroots(a, [b], [a], includepath=False))
1575 return bool(self.reachableroots(a, [b], [a], includepath=False))
1576
1576
1577 def reachableroots(self, minroot, heads, roots, includepath=False):
1577 def reachableroots(self, minroot, heads, roots, includepath=False):
1578 """return (heads(::(<roots> and <roots>::<heads>)))
1578 """return (heads(::(<roots> and <roots>::<heads>)))
1579
1579
1580 If includepath is True, return (<roots>::<heads>)."""
1580 If includepath is True, return (<roots>::<heads>)."""
1581 try:
1581 try:
1582 return self.index.reachableroots2(
1582 return self.index.reachableroots2(
1583 minroot, heads, roots, includepath
1583 minroot, heads, roots, includepath
1584 )
1584 )
1585 except AttributeError:
1585 except AttributeError:
1586 return dagop._reachablerootspure(
1586 return dagop._reachablerootspure(
1587 self.parentrevs, minroot, roots, heads, includepath
1587 self.parentrevs, minroot, roots, heads, includepath
1588 )
1588 )
1589
1589
1590 def ancestor(self, a, b):
1590 def ancestor(self, a, b):
1591 """calculate the "best" common ancestor of nodes a and b"""
1591 """calculate the "best" common ancestor of nodes a and b"""
1592
1592
1593 a, b = self.rev(a), self.rev(b)
1593 a, b = self.rev(a), self.rev(b)
1594 try:
1594 try:
1595 ancs = self.index.ancestors(a, b)
1595 ancs = self.index.ancestors(a, b)
1596 except (AttributeError, OverflowError):
1596 except (AttributeError, OverflowError):
1597 ancs = ancestor.ancestors(self.parentrevs, a, b)
1597 ancs = ancestor.ancestors(self.parentrevs, a, b)
1598 if ancs:
1598 if ancs:
1599 # choose a consistent winner when there's a tie
1599 # choose a consistent winner when there's a tie
1600 return min(map(self.node, ancs))
1600 return min(map(self.node, ancs))
1601 return self.nullid
1601 return self.nullid
1602
1602
1603 def _match(self, id):
1603 def _match(self, id):
1604 if isinstance(id, int):
1604 if isinstance(id, int):
1605 # rev
1605 # rev
1606 return self.node(id)
1606 return self.node(id)
1607 if len(id) == self.nodeconstants.nodelen:
1607 if len(id) == self.nodeconstants.nodelen:
1608 # possibly a binary node
1608 # possibly a binary node
1609 # odds of a binary node being all hex in ASCII are 1 in 10**25
1609 # odds of a binary node being all hex in ASCII are 1 in 10**25
1610 try:
1610 try:
1611 node = id
1611 node = id
1612 self.rev(node) # quick search the index
1612 self.rev(node) # quick search the index
1613 return node
1613 return node
1614 except error.LookupError:
1614 except error.LookupError:
1615 pass # may be partial hex id
1615 pass # may be partial hex id
1616 try:
1616 try:
1617 # str(rev)
1617 # str(rev)
1618 rev = int(id)
1618 rev = int(id)
1619 if b"%d" % rev != id:
1619 if b"%d" % rev != id:
1620 raise ValueError
1620 raise ValueError
1621 if rev < 0:
1621 if rev < 0:
1622 rev = len(self) + rev
1622 rev = len(self) + rev
1623 if rev < 0 or rev >= len(self):
1623 if rev < 0 or rev >= len(self):
1624 raise ValueError
1624 raise ValueError
1625 return self.node(rev)
1625 return self.node(rev)
1626 except (ValueError, OverflowError):
1626 except (ValueError, OverflowError):
1627 pass
1627 pass
1628 if len(id) == 2 * self.nodeconstants.nodelen:
1628 if len(id) == 2 * self.nodeconstants.nodelen:
1629 try:
1629 try:
1630 # a full hex nodeid?
1630 # a full hex nodeid?
1631 node = bin(id)
1631 node = bin(id)
1632 self.rev(node)
1632 self.rev(node)
1633 return node
1633 return node
1634 except (binascii.Error, error.LookupError):
1634 except (binascii.Error, error.LookupError):
1635 pass
1635 pass
1636
1636
1637 def _partialmatch(self, id):
1637 def _partialmatch(self, id):
1638 # we don't care wdirfilenodeids as they should be always full hash
1638 # we don't care wdirfilenodeids as they should be always full hash
1639 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1639 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1640 ambiguous = False
1640 ambiguous = False
1641 try:
1641 try:
1642 partial = self.index.partialmatch(id)
1642 partial = self.index.partialmatch(id)
1643 if partial and self.hasnode(partial):
1643 if partial and self.hasnode(partial):
1644 if maybewdir:
1644 if maybewdir:
1645 # single 'ff...' match in radix tree, ambiguous with wdir
1645 # single 'ff...' match in radix tree, ambiguous with wdir
1646 ambiguous = True
1646 ambiguous = True
1647 else:
1647 else:
1648 return partial
1648 return partial
1649 elif maybewdir:
1649 elif maybewdir:
1650 # no 'ff...' match in radix tree, wdir identified
1650 # no 'ff...' match in radix tree, wdir identified
1651 raise error.WdirUnsupported
1651 raise error.WdirUnsupported
1652 else:
1652 else:
1653 return None
1653 return None
1654 except error.RevlogError:
1654 except error.RevlogError:
1655 # parsers.c radix tree lookup gave multiple matches
1655 # parsers.c radix tree lookup gave multiple matches
1656 # fast path: for unfiltered changelog, radix tree is accurate
1656 # fast path: for unfiltered changelog, radix tree is accurate
1657 if not getattr(self, 'filteredrevs', None):
1657 if not getattr(self, 'filteredrevs', None):
1658 ambiguous = True
1658 ambiguous = True
1659 # fall through to slow path that filters hidden revisions
1659 # fall through to slow path that filters hidden revisions
1660 except (AttributeError, ValueError):
1660 except (AttributeError, ValueError):
1661 # we are pure python, or key is not hex
1661 # we are pure python, or key is not hex
1662 pass
1662 pass
1663 if ambiguous:
1663 if ambiguous:
1664 raise error.AmbiguousPrefixLookupError(
1664 raise error.AmbiguousPrefixLookupError(
1665 id, self.display_id, _(b'ambiguous identifier')
1665 id, self.display_id, _(b'ambiguous identifier')
1666 )
1666 )
1667
1667
1668 if id in self._pcache:
1668 if id in self._pcache:
1669 return self._pcache[id]
1669 return self._pcache[id]
1670
1670
1671 if len(id) <= 40:
1671 if len(id) <= 40:
1672 # hex(node)[:...]
1672 # hex(node)[:...]
1673 l = len(id) // 2 * 2 # grab an even number of digits
1673 l = len(id) // 2 * 2 # grab an even number of digits
1674 try:
1674 try:
1675 # we're dropping the last digit, so let's check that it's hex,
1675 # we're dropping the last digit, so let's check that it's hex,
1676 # to avoid the expensive computation below if it's not
1676 # to avoid the expensive computation below if it's not
1677 if len(id) % 2 > 0:
1677 if len(id) % 2 > 0:
1678 if not (id[-1] in hexdigits):
1678 if not (id[-1] in hexdigits):
1679 return None
1679 return None
1680 prefix = bin(id[:l])
1680 prefix = bin(id[:l])
1681 except binascii.Error:
1681 except binascii.Error:
1682 pass
1682 pass
1683 else:
1683 else:
1684 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1684 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1685 nl = [
1685 nl = [
1686 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1686 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1687 ]
1687 ]
1688 if self.nodeconstants.nullhex.startswith(id):
1688 if self.nodeconstants.nullhex.startswith(id):
1689 nl.append(self.nullid)
1689 nl.append(self.nullid)
1690 if len(nl) > 0:
1690 if len(nl) > 0:
1691 if len(nl) == 1 and not maybewdir:
1691 if len(nl) == 1 and not maybewdir:
1692 self._pcache[id] = nl[0]
1692 self._pcache[id] = nl[0]
1693 return nl[0]
1693 return nl[0]
1694 raise error.AmbiguousPrefixLookupError(
1694 raise error.AmbiguousPrefixLookupError(
1695 id, self.display_id, _(b'ambiguous identifier')
1695 id, self.display_id, _(b'ambiguous identifier')
1696 )
1696 )
1697 if maybewdir:
1697 if maybewdir:
1698 raise error.WdirUnsupported
1698 raise error.WdirUnsupported
1699 return None
1699 return None
1700
1700
1701 def lookup(self, id):
1701 def lookup(self, id):
1702 """locate a node based on:
1702 """locate a node based on:
1703 - revision number or str(revision number)
1703 - revision number or str(revision number)
1704 - nodeid or subset of hex nodeid
1704 - nodeid or subset of hex nodeid
1705 """
1705 """
1706 n = self._match(id)
1706 n = self._match(id)
1707 if n is not None:
1707 if n is not None:
1708 return n
1708 return n
1709 n = self._partialmatch(id)
1709 n = self._partialmatch(id)
1710 if n:
1710 if n:
1711 return n
1711 return n
1712
1712
1713 raise error.LookupError(id, self.display_id, _(b'no match found'))
1713 raise error.LookupError(id, self.display_id, _(b'no match found'))
1714
1714
1715 def shortest(self, node, minlength=1):
1715 def shortest(self, node, minlength=1):
1716 """Find the shortest unambiguous prefix that matches node."""
1716 """Find the shortest unambiguous prefix that matches node."""
1717
1717
1718 def isvalid(prefix):
1718 def isvalid(prefix):
1719 try:
1719 try:
1720 matchednode = self._partialmatch(prefix)
1720 matchednode = self._partialmatch(prefix)
1721 except error.AmbiguousPrefixLookupError:
1721 except error.AmbiguousPrefixLookupError:
1722 return False
1722 return False
1723 except error.WdirUnsupported:
1723 except error.WdirUnsupported:
1724 # single 'ff...' match
1724 # single 'ff...' match
1725 return True
1725 return True
1726 if matchednode is None:
1726 if matchednode is None:
1727 raise error.LookupError(node, self.display_id, _(b'no node'))
1727 raise error.LookupError(node, self.display_id, _(b'no node'))
1728 return True
1728 return True
1729
1729
1730 def maybewdir(prefix):
1730 def maybewdir(prefix):
1731 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1731 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1732
1732
1733 hexnode = hex(node)
1733 hexnode = hex(node)
1734
1734
1735 def disambiguate(hexnode, minlength):
1735 def disambiguate(hexnode, minlength):
1736 """Disambiguate against wdirid."""
1736 """Disambiguate against wdirid."""
1737 for length in range(minlength, len(hexnode) + 1):
1737 for length in range(minlength, len(hexnode) + 1):
1738 prefix = hexnode[:length]
1738 prefix = hexnode[:length]
1739 if not maybewdir(prefix):
1739 if not maybewdir(prefix):
1740 return prefix
1740 return prefix
1741
1741
1742 if not getattr(self, 'filteredrevs', None):
1742 if not getattr(self, 'filteredrevs', None):
1743 try:
1743 try:
1744 length = max(self.index.shortest(node), minlength)
1744 length = max(self.index.shortest(node), minlength)
1745 return disambiguate(hexnode, length)
1745 return disambiguate(hexnode, length)
1746 except error.RevlogError:
1746 except error.RevlogError:
1747 if node != self.nodeconstants.wdirid:
1747 if node != self.nodeconstants.wdirid:
1748 raise error.LookupError(
1748 raise error.LookupError(
1749 node, self.display_id, _(b'no node')
1749 node, self.display_id, _(b'no node')
1750 )
1750 )
1751 except AttributeError:
1751 except AttributeError:
1752 # Fall through to pure code
1752 # Fall through to pure code
1753 pass
1753 pass
1754
1754
1755 if node == self.nodeconstants.wdirid:
1755 if node == self.nodeconstants.wdirid:
1756 for length in range(minlength, len(hexnode) + 1):
1756 for length in range(minlength, len(hexnode) + 1):
1757 prefix = hexnode[:length]
1757 prefix = hexnode[:length]
1758 if isvalid(prefix):
1758 if isvalid(prefix):
1759 return prefix
1759 return prefix
1760
1760
1761 for length in range(minlength, len(hexnode) + 1):
1761 for length in range(minlength, len(hexnode) + 1):
1762 prefix = hexnode[:length]
1762 prefix = hexnode[:length]
1763 if isvalid(prefix):
1763 if isvalid(prefix):
1764 return disambiguate(hexnode, length)
1764 return disambiguate(hexnode, length)
1765
1765
1766 def cmp(self, node, text):
1766 def cmp(self, node, text):
1767 """compare text with a given file revision
1767 """compare text with a given file revision
1768
1768
1769 returns True if text is different than what is stored.
1769 returns True if text is different than what is stored.
1770 """
1770 """
1771 p1, p2 = self.parents(node)
1771 p1, p2 = self.parents(node)
1772 return storageutil.hashrevisionsha1(text, p1, p2) != node
1772 return storageutil.hashrevisionsha1(text, p1, p2) != node
1773
1773
1774 def _getsegmentforrevs(self, startrev, endrev, df=None):
1774 def _getsegmentforrevs(self, startrev, endrev, df=None):
1775 """Obtain a segment of raw data corresponding to a range of revisions.
1775 """Obtain a segment of raw data corresponding to a range of revisions.
1776
1776
1777 Accepts the start and end revisions and an optional already-open
1777 Accepts the start and end revisions and an optional already-open
1778 file handle to be used for reading. If the file handle is read, its
1778 file handle to be used for reading. If the file handle is read, its
1779 seek position will not be preserved.
1779 seek position will not be preserved.
1780
1780
1781 Requests for data may be satisfied by a cache.
1781 Requests for data may be satisfied by a cache.
1782
1782
1783 Returns a 2-tuple of (offset, data) for the requested range of
1783 Returns a 2-tuple of (offset, data) for the requested range of
1784 revisions. Offset is the integer offset from the beginning of the
1784 revisions. Offset is the integer offset from the beginning of the
1785 revlog and data is a str or buffer of the raw byte data.
1785 revlog and data is a str or buffer of the raw byte data.
1786
1786
1787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1788 to determine where each revision's data begins and ends.
1788 to determine where each revision's data begins and ends.
1789 """
1789 """
1790 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1790 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1791 # (functions are expensive).
1791 # (functions are expensive).
1792 index = self.index
1792 index = self.index
1793 istart = index[startrev]
1793 istart = index[startrev]
1794 start = int(istart[0] >> 16)
1794 start = int(istart[0] >> 16)
1795 if startrev == endrev:
1795 if startrev == endrev:
1796 end = start + istart[1]
1796 end = start + istart[1]
1797 else:
1797 else:
1798 iend = index[endrev]
1798 iend = index[endrev]
1799 end = int(iend[0] >> 16) + iend[1]
1799 end = int(iend[0] >> 16) + iend[1]
1800
1800
1801 if self._inline:
1801 if self._inline:
1802 start += (startrev + 1) * self.index.entry_size
1802 start += (startrev + 1) * self.index.entry_size
1803 end += (endrev + 1) * self.index.entry_size
1803 end += (endrev + 1) * self.index.entry_size
1804 length = end - start
1804 length = end - start
1805
1805
1806 return start, self._segmentfile.read_chunk(start, length, df)
1806 return start, self._segmentfile.read_chunk(start, length, df)
1807
1807
1808 def _chunk(self, rev, df=None):
1808 def _chunk(self, rev, df=None):
1809 """Obtain a single decompressed chunk for a revision.
1809 """Obtain a single decompressed chunk for a revision.
1810
1810
1811 Accepts an integer revision and an optional already-open file handle
1811 Accepts an integer revision and an optional already-open file handle
1812 to be used for reading. If used, the seek position of the file will not
1812 to be used for reading. If used, the seek position of the file will not
1813 be preserved.
1813 be preserved.
1814
1814
1815 Returns a str holding uncompressed data for the requested revision.
1815 Returns a str holding uncompressed data for the requested revision.
1816 """
1816 """
1817 compression_mode = self.index[rev][10]
1817 compression_mode = self.index[rev][10]
1818 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1818 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1819 if compression_mode == COMP_MODE_PLAIN:
1819 if compression_mode == COMP_MODE_PLAIN:
1820 return data
1820 return data
1821 elif compression_mode == COMP_MODE_DEFAULT:
1821 elif compression_mode == COMP_MODE_DEFAULT:
1822 return self._decompressor(data)
1822 return self._decompressor(data)
1823 elif compression_mode == COMP_MODE_INLINE:
1823 elif compression_mode == COMP_MODE_INLINE:
1824 return self.decompress(data)
1824 return self.decompress(data)
1825 else:
1825 else:
1826 msg = b'unknown compression mode %d'
1826 msg = b'unknown compression mode %d'
1827 msg %= compression_mode
1827 msg %= compression_mode
1828 raise error.RevlogError(msg)
1828 raise error.RevlogError(msg)
1829
1829
1830 def _chunks(self, revs, df=None, targetsize=None):
1830 def _chunks(self, revs, df=None, targetsize=None):
1831 """Obtain decompressed chunks for the specified revisions.
1831 """Obtain decompressed chunks for the specified revisions.
1832
1832
1833 Accepts an iterable of numeric revisions that are assumed to be in
1833 Accepts an iterable of numeric revisions that are assumed to be in
1834 ascending order. Also accepts an optional already-open file handle
1834 ascending order. Also accepts an optional already-open file handle
1835 to be used for reading. If used, the seek position of the file will
1835 to be used for reading. If used, the seek position of the file will
1836 not be preserved.
1836 not be preserved.
1837
1837
1838 This function is similar to calling ``self._chunk()`` multiple times,
1838 This function is similar to calling ``self._chunk()`` multiple times,
1839 but is faster.
1839 but is faster.
1840
1840
1841 Returns a list with decompressed data for each requested revision.
1841 Returns a list with decompressed data for each requested revision.
1842 """
1842 """
1843 if not revs:
1843 if not revs:
1844 return []
1844 return []
1845 start = self.start
1845 start = self.start
1846 length = self.length
1846 length = self.length
1847 inline = self._inline
1847 inline = self._inline
1848 iosize = self.index.entry_size
1848 iosize = self.index.entry_size
1849 buffer = util.buffer
1849 buffer = util.buffer
1850
1850
1851 l = []
1851 l = []
1852 ladd = l.append
1852 ladd = l.append
1853
1853
1854 if not self._withsparseread:
1854 if not self._withsparseread:
1855 slicedchunks = (revs,)
1855 slicedchunks = (revs,)
1856 else:
1856 else:
1857 slicedchunks = deltautil.slicechunk(
1857 slicedchunks = deltautil.slicechunk(
1858 self, revs, targetsize=targetsize
1858 self, revs, targetsize=targetsize
1859 )
1859 )
1860
1860
1861 for revschunk in slicedchunks:
1861 for revschunk in slicedchunks:
1862 firstrev = revschunk[0]
1862 firstrev = revschunk[0]
1863 # Skip trailing revisions with empty diff
1863 # Skip trailing revisions with empty diff
1864 for lastrev in revschunk[::-1]:
1864 for lastrev in revschunk[::-1]:
1865 if length(lastrev) != 0:
1865 if length(lastrev) != 0:
1866 break
1866 break
1867
1867
1868 try:
1868 try:
1869 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1869 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1870 except OverflowError:
1870 except OverflowError:
1871 # issue4215 - we can't cache a run of chunks greater than
1871 # issue4215 - we can't cache a run of chunks greater than
1872 # 2G on Windows
1872 # 2G on Windows
1873 return [self._chunk(rev, df=df) for rev in revschunk]
1873 return [self._chunk(rev, df=df) for rev in revschunk]
1874
1874
1875 decomp = self.decompress
1875 decomp = self.decompress
1876 # self._decompressor might be None, but will not be used in that case
1876 # self._decompressor might be None, but will not be used in that case
1877 def_decomp = self._decompressor
1877 def_decomp = self._decompressor
1878 for rev in revschunk:
1878 for rev in revschunk:
1879 chunkstart = start(rev)
1879 chunkstart = start(rev)
1880 if inline:
1880 if inline:
1881 chunkstart += (rev + 1) * iosize
1881 chunkstart += (rev + 1) * iosize
1882 chunklength = length(rev)
1882 chunklength = length(rev)
1883 comp_mode = self.index[rev][10]
1883 comp_mode = self.index[rev][10]
1884 c = buffer(data, chunkstart - offset, chunklength)
1884 c = buffer(data, chunkstart - offset, chunklength)
1885 if comp_mode == COMP_MODE_PLAIN:
1885 if comp_mode == COMP_MODE_PLAIN:
1886 ladd(c)
1886 ladd(c)
1887 elif comp_mode == COMP_MODE_INLINE:
1887 elif comp_mode == COMP_MODE_INLINE:
1888 ladd(decomp(c))
1888 ladd(decomp(c))
1889 elif comp_mode == COMP_MODE_DEFAULT:
1889 elif comp_mode == COMP_MODE_DEFAULT:
1890 ladd(def_decomp(c))
1890 ladd(def_decomp(c))
1891 else:
1891 else:
1892 msg = b'unknown compression mode %d'
1892 msg = b'unknown compression mode %d'
1893 msg %= comp_mode
1893 msg %= comp_mode
1894 raise error.RevlogError(msg)
1894 raise error.RevlogError(msg)
1895
1895
1896 return l
1896 return l
1897
1897
1898 def deltaparent(self, rev):
1898 def deltaparent(self, rev):
1899 """return deltaparent of the given revision"""
1899 """return deltaparent of the given revision"""
1900 base = self.index[rev][3]
1900 base = self.index[rev][3]
1901 if base == rev:
1901 if base == rev:
1902 return nullrev
1902 return nullrev
1903 elif self._generaldelta:
1903 elif self._generaldelta:
1904 return base
1904 return base
1905 else:
1905 else:
1906 return rev - 1
1906 return rev - 1
1907
1907
1908 def issnapshot(self, rev):
1908 def issnapshot(self, rev):
1909 """tells whether rev is a snapshot"""
1909 """tells whether rev is a snapshot"""
1910 if not self._sparserevlog:
1910 if not self._sparserevlog:
1911 return self.deltaparent(rev) == nullrev
1911 return self.deltaparent(rev) == nullrev
1912 elif hasattr(self.index, 'issnapshot'):
1912 elif hasattr(self.index, 'issnapshot'):
1913 # directly assign the method to cache the testing and access
1913 # directly assign the method to cache the testing and access
1914 self.issnapshot = self.index.issnapshot
1914 self.issnapshot = self.index.issnapshot
1915 return self.issnapshot(rev)
1915 return self.issnapshot(rev)
1916 if rev == nullrev:
1916 if rev == nullrev:
1917 return True
1917 return True
1918 entry = self.index[rev]
1918 entry = self.index[rev]
1919 base = entry[3]
1919 base = entry[3]
1920 if base == rev:
1920 if base == rev:
1921 return True
1921 return True
1922 if base == nullrev:
1922 if base == nullrev:
1923 return True
1923 return True
1924 p1 = entry[5]
1924 p1 = entry[5]
1925 while self.length(p1) == 0:
1925 while self.length(p1) == 0:
1926 b = self.deltaparent(p1)
1926 b = self.deltaparent(p1)
1927 if b == p1:
1927 if b == p1:
1928 break
1928 break
1929 p1 = b
1929 p1 = b
1930 p2 = entry[6]
1930 p2 = entry[6]
1931 while self.length(p2) == 0:
1931 while self.length(p2) == 0:
1932 b = self.deltaparent(p2)
1932 b = self.deltaparent(p2)
1933 if b == p2:
1933 if b == p2:
1934 break
1934 break
1935 p2 = b
1935 p2 = b
1936 if base == p1 or base == p2:
1936 if base == p1 or base == p2:
1937 return False
1937 return False
1938 return self.issnapshot(base)
1938 return self.issnapshot(base)
1939
1939
1940 def snapshotdepth(self, rev):
1940 def snapshotdepth(self, rev):
1941 """number of snapshot in the chain before this one"""
1941 """number of snapshot in the chain before this one"""
1942 if not self.issnapshot(rev):
1942 if not self.issnapshot(rev):
1943 raise error.ProgrammingError(b'revision %d not a snapshot')
1943 raise error.ProgrammingError(b'revision %d not a snapshot')
1944 return len(self._deltachain(rev)[0]) - 1
1944 return len(self._deltachain(rev)[0]) - 1
1945
1945
1946 def revdiff(self, rev1, rev2):
1946 def revdiff(self, rev1, rev2):
1947 """return or calculate a delta between two revisions
1947 """return or calculate a delta between two revisions
1948
1948
1949 The delta calculated is in binary form and is intended to be written to
1949 The delta calculated is in binary form and is intended to be written to
1950 revlog data directly. So this function needs raw revision data.
1950 revlog data directly. So this function needs raw revision data.
1951 """
1951 """
1952 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1952 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1953 return bytes(self._chunk(rev2))
1953 return bytes(self._chunk(rev2))
1954
1954
1955 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1955 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1956
1956
1957 def revision(self, nodeorrev):
1957 def revision(self, nodeorrev):
1958 """return an uncompressed revision of a given node or revision
1958 """return an uncompressed revision of a given node or revision
1959 number.
1959 number.
1960 """
1960 """
1961 return self._revisiondata(nodeorrev)
1961 return self._revisiondata(nodeorrev)
1962
1962
1963 def sidedata(self, nodeorrev):
1963 def sidedata(self, nodeorrev):
1964 """a map of extra data related to the changeset but not part of the hash
1964 """a map of extra data related to the changeset but not part of the hash
1965
1965
1966 This function currently return a dictionary. However, more advanced
1966 This function currently return a dictionary. However, more advanced
1967 mapping object will likely be used in the future for a more
1967 mapping object will likely be used in the future for a more
1968 efficient/lazy code.
1968 efficient/lazy code.
1969 """
1969 """
1970 # deal with <nodeorrev> argument type
1970 # deal with <nodeorrev> argument type
1971 if isinstance(nodeorrev, int):
1971 if isinstance(nodeorrev, int):
1972 rev = nodeorrev
1972 rev = nodeorrev
1973 else:
1973 else:
1974 rev = self.rev(nodeorrev)
1974 rev = self.rev(nodeorrev)
1975 return self._sidedata(rev)
1975 return self._sidedata(rev)
1976
1976
1977 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1977 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1978 # deal with <nodeorrev> argument type
1978 # deal with <nodeorrev> argument type
1979 if isinstance(nodeorrev, int):
1979 if isinstance(nodeorrev, int):
1980 rev = nodeorrev
1980 rev = nodeorrev
1981 node = self.node(rev)
1981 node = self.node(rev)
1982 else:
1982 else:
1983 node = nodeorrev
1983 node = nodeorrev
1984 rev = None
1984 rev = None
1985
1985
1986 # fast path the special `nullid` rev
1986 # fast path the special `nullid` rev
1987 if node == self.nullid:
1987 if node == self.nullid:
1988 return b""
1988 return b""
1989
1989
1990 # ``rawtext`` is the text as stored inside the revlog. Might be the
1990 # ``rawtext`` is the text as stored inside the revlog. Might be the
1991 # revision or might need to be processed to retrieve the revision.
1991 # revision or might need to be processed to retrieve the revision.
1992 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1992 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1993
1993
1994 if raw and validated:
1994 if raw and validated:
1995 # if we don't want to process the raw text and that raw
1995 # if we don't want to process the raw text and that raw
1996 # text is cached, we can exit early.
1996 # text is cached, we can exit early.
1997 return rawtext
1997 return rawtext
1998 if rev is None:
1998 if rev is None:
1999 rev = self.rev(node)
1999 rev = self.rev(node)
2000 # the revlog's flag for this revision
2000 # the revlog's flag for this revision
2001 # (usually alter its state or content)
2001 # (usually alter its state or content)
2002 flags = self.flags(rev)
2002 flags = self.flags(rev)
2003
2003
2004 if validated and flags == REVIDX_DEFAULT_FLAGS:
2004 if validated and flags == REVIDX_DEFAULT_FLAGS:
2005 # no extra flags set, no flag processor runs, text = rawtext
2005 # no extra flags set, no flag processor runs, text = rawtext
2006 return rawtext
2006 return rawtext
2007
2007
2008 if raw:
2008 if raw:
2009 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2009 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2010 text = rawtext
2010 text = rawtext
2011 else:
2011 else:
2012 r = flagutil.processflagsread(self, rawtext, flags)
2012 r = flagutil.processflagsread(self, rawtext, flags)
2013 text, validatehash = r
2013 text, validatehash = r
2014 if validatehash:
2014 if validatehash:
2015 self.checkhash(text, node, rev=rev)
2015 self.checkhash(text, node, rev=rev)
2016 if not validated:
2016 if not validated:
2017 self._revisioncache = (node, rev, rawtext)
2017 self._revisioncache = (node, rev, rawtext)
2018
2018
2019 return text
2019 return text
2020
2020
2021 def _rawtext(self, node, rev, _df=None):
2021 def _rawtext(self, node, rev, _df=None):
2022 """return the possibly unvalidated rawtext for a revision
2022 """return the possibly unvalidated rawtext for a revision
2023
2023
2024 returns (rev, rawtext, validated)
2024 returns (rev, rawtext, validated)
2025 """
2025 """
2026
2026
2027 # revision in the cache (could be useful to apply delta)
2027 # revision in the cache (could be useful to apply delta)
2028 cachedrev = None
2028 cachedrev = None
2029 # An intermediate text to apply deltas to
2029 # An intermediate text to apply deltas to
2030 basetext = None
2030 basetext = None
2031
2031
2032 # Check if we have the entry in cache
2032 # Check if we have the entry in cache
2033 # The cache entry looks like (node, rev, rawtext)
2033 # The cache entry looks like (node, rev, rawtext)
2034 if self._revisioncache:
2034 if self._revisioncache:
2035 if self._revisioncache[0] == node:
2035 if self._revisioncache[0] == node:
2036 return (rev, self._revisioncache[2], True)
2036 return (rev, self._revisioncache[2], True)
2037 cachedrev = self._revisioncache[1]
2037 cachedrev = self._revisioncache[1]
2038
2038
2039 if rev is None:
2039 if rev is None:
2040 rev = self.rev(node)
2040 rev = self.rev(node)
2041
2041
2042 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2042 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2043 if stopped:
2043 if stopped:
2044 basetext = self._revisioncache[2]
2044 basetext = self._revisioncache[2]
2045
2045
2046 # drop cache to save memory, the caller is expected to
2046 # drop cache to save memory, the caller is expected to
2047 # update self._revisioncache after validating the text
2047 # update self._revisioncache after validating the text
2048 self._revisioncache = None
2048 self._revisioncache = None
2049
2049
2050 targetsize = None
2050 targetsize = None
2051 rawsize = self.index[rev][2]
2051 rawsize = self.index[rev][2]
2052 if 0 <= rawsize:
2052 if 0 <= rawsize:
2053 targetsize = 4 * rawsize
2053 targetsize = 4 * rawsize
2054
2054
2055 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2055 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2056 if basetext is None:
2056 if basetext is None:
2057 basetext = bytes(bins[0])
2057 basetext = bytes(bins[0])
2058 bins = bins[1:]
2058 bins = bins[1:]
2059
2059
2060 rawtext = mdiff.patches(basetext, bins)
2060 rawtext = mdiff.patches(basetext, bins)
2061 del basetext # let us have a chance to free memory early
2061 del basetext # let us have a chance to free memory early
2062 return (rev, rawtext, False)
2062 return (rev, rawtext, False)
2063
2063
2064 def _sidedata(self, rev):
2064 def _sidedata(self, rev):
2065 """Return the sidedata for a given revision number."""
2065 """Return the sidedata for a given revision number."""
2066 index_entry = self.index[rev]
2066 index_entry = self.index[rev]
2067 sidedata_offset = index_entry[8]
2067 sidedata_offset = index_entry[8]
2068 sidedata_size = index_entry[9]
2068 sidedata_size = index_entry[9]
2069
2069
2070 if self._inline:
2070 if self._inline:
2071 sidedata_offset += self.index.entry_size * (1 + rev)
2071 sidedata_offset += self.index.entry_size * (1 + rev)
2072 if sidedata_size == 0:
2072 if sidedata_size == 0:
2073 return {}
2073 return {}
2074
2074
2075 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2075 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2076 filename = self._sidedatafile
2076 filename = self._sidedatafile
2077 end = self._docket.sidedata_end
2077 end = self._docket.sidedata_end
2078 offset = sidedata_offset
2078 offset = sidedata_offset
2079 length = sidedata_size
2079 length = sidedata_size
2080 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2080 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2081 raise error.RevlogError(m)
2081 raise error.RevlogError(m)
2082
2082
2083 comp_segment = self._segmentfile_sidedata.read_chunk(
2083 comp_segment = self._segmentfile_sidedata.read_chunk(
2084 sidedata_offset, sidedata_size
2084 sidedata_offset, sidedata_size
2085 )
2085 )
2086
2086
2087 comp = self.index[rev][11]
2087 comp = self.index[rev][11]
2088 if comp == COMP_MODE_PLAIN:
2088 if comp == COMP_MODE_PLAIN:
2089 segment = comp_segment
2089 segment = comp_segment
2090 elif comp == COMP_MODE_DEFAULT:
2090 elif comp == COMP_MODE_DEFAULT:
2091 segment = self._decompressor(comp_segment)
2091 segment = self._decompressor(comp_segment)
2092 elif comp == COMP_MODE_INLINE:
2092 elif comp == COMP_MODE_INLINE:
2093 segment = self.decompress(comp_segment)
2093 segment = self.decompress(comp_segment)
2094 else:
2094 else:
2095 msg = b'unknown compression mode %d'
2095 msg = b'unknown compression mode %d'
2096 msg %= comp
2096 msg %= comp
2097 raise error.RevlogError(msg)
2097 raise error.RevlogError(msg)
2098
2098
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2100 return sidedata
2100 return sidedata
2101
2101
2102 def rawdata(self, nodeorrev):
2102 def rawdata(self, nodeorrev):
2103 """return an uncompressed raw data of a given node or revision number."""
2103 """return an uncompressed raw data of a given node or revision number."""
2104 return self._revisiondata(nodeorrev, raw=True)
2104 return self._revisiondata(nodeorrev, raw=True)
2105
2105
2106 def hash(self, text, p1, p2):
2106 def hash(self, text, p1, p2):
2107 """Compute a node hash.
2107 """Compute a node hash.
2108
2108
2109 Available as a function so that subclasses can replace the hash
2109 Available as a function so that subclasses can replace the hash
2110 as needed.
2110 as needed.
2111 """
2111 """
2112 return storageutil.hashrevisionsha1(text, p1, p2)
2112 return storageutil.hashrevisionsha1(text, p1, p2)
2113
2113
2114 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2114 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2115 """Check node hash integrity.
2115 """Check node hash integrity.
2116
2116
2117 Available as a function so that subclasses can extend hash mismatch
2117 Available as a function so that subclasses can extend hash mismatch
2118 behaviors as needed.
2118 behaviors as needed.
2119 """
2119 """
2120 try:
2120 try:
2121 if p1 is None and p2 is None:
2121 if p1 is None and p2 is None:
2122 p1, p2 = self.parents(node)
2122 p1, p2 = self.parents(node)
2123 if node != self.hash(text, p1, p2):
2123 if node != self.hash(text, p1, p2):
2124 # Clear the revision cache on hash failure. The revision cache
2124 # Clear the revision cache on hash failure. The revision cache
2125 # only stores the raw revision and clearing the cache does have
2125 # only stores the raw revision and clearing the cache does have
2126 # the side-effect that we won't have a cache hit when the raw
2126 # the side-effect that we won't have a cache hit when the raw
2127 # revision data is accessed. But this case should be rare and
2127 # revision data is accessed. But this case should be rare and
2128 # it is extra work to teach the cache about the hash
2128 # it is extra work to teach the cache about the hash
2129 # verification state.
2129 # verification state.
2130 if self._revisioncache and self._revisioncache[0] == node:
2130 if self._revisioncache and self._revisioncache[0] == node:
2131 self._revisioncache = None
2131 self._revisioncache = None
2132
2132
2133 revornode = rev
2133 revornode = rev
2134 if revornode is None:
2134 if revornode is None:
2135 revornode = templatefilters.short(hex(node))
2135 revornode = templatefilters.short(hex(node))
2136 raise error.RevlogError(
2136 raise error.RevlogError(
2137 _(b"integrity check failed on %s:%s")
2137 _(b"integrity check failed on %s:%s")
2138 % (self.display_id, pycompat.bytestr(revornode))
2138 % (self.display_id, pycompat.bytestr(revornode))
2139 )
2139 )
2140 except error.RevlogError:
2140 except error.RevlogError:
2141 if self._censorable and storageutil.iscensoredtext(text):
2141 if self._censorable and storageutil.iscensoredtext(text):
2142 raise error.CensoredNodeError(self.display_id, node, text)
2142 raise error.CensoredNodeError(self.display_id, node, text)
2143 raise
2143 raise
2144
2144
2145 @property
2145 @property
2146 def _split_index_file(self):
2146 def _split_index_file(self):
2147 """the path where to expect the index of an ongoing splitting operation
2147 """the path where to expect the index of an ongoing splitting operation
2148
2148
2149 The file will only exist if a splitting operation is in progress, but
2149 The file will only exist if a splitting operation is in progress, but
2150 it is always expected at the same location."""
2150 it is always expected at the same location."""
2151 parts = self.radix.split(b'/')
2151 parts = self.radix.split(b'/')
2152 if len(parts) > 1:
2152 if len(parts) > 1:
2153 # adds a '-s' prefix to the ``data/` or `meta/` base
2153 # adds a '-s' prefix to the ``data/` or `meta/` base
2154 head = parts[0] + b'-s'
2154 head = parts[0] + b'-s'
2155 mids = parts[1:-1]
2155 mids = parts[1:-1]
2156 tail = parts[-1] + b'.i'
2156 tail = parts[-1] + b'.i'
2157 pieces = [head] + mids + [tail]
2157 pieces = [head] + mids + [tail]
2158 return b'/'.join(pieces)
2158 return b'/'.join(pieces)
2159 else:
2159 else:
2160 # the revlog is stored at the root of the store (changelog or
2160 # the revlog is stored at the root of the store (changelog or
2161 # manifest), no risk of collision.
2161 # manifest), no risk of collision.
2162 return self.radix + b'.i.s'
2162 return self.radix + b'.i.s'
2163
2163
2164 def _enforceinlinesize(self, tr, side_write=True):
2164 def _enforceinlinesize(self, tr, side_write=True):
2165 """Check if the revlog is too big for inline and convert if so.
2165 """Check if the revlog is too big for inline and convert if so.
2166
2166
2167 This should be called after revisions are added to the revlog. If the
2167 This should be called after revisions are added to the revlog. If the
2168 revlog has grown too large to be an inline revlog, it will convert it
2168 revlog has grown too large to be an inline revlog, it will convert it
2169 to use multiple index and data files.
2169 to use multiple index and data files.
2170 """
2170 """
2171 tiprev = len(self) - 1
2171 tiprev = len(self) - 1
2172 total_size = self.start(tiprev) + self.length(tiprev)
2172 total_size = self.start(tiprev) + self.length(tiprev)
2173 if not self._inline or total_size < _maxinline:
2173 if not self._inline or total_size < _maxinline:
2174 return
2174 return
2175
2175
2176 troffset = tr.findoffset(self._indexfile)
2176 troffset = tr.findoffset(self._indexfile)
2177 if troffset is None:
2177 if troffset is None:
2178 raise error.RevlogError(
2178 raise error.RevlogError(
2179 _(b"%s not found in the transaction") % self._indexfile
2179 _(b"%s not found in the transaction") % self._indexfile
2180 )
2180 )
2181 if troffset:
2181 if troffset:
2182 tr.addbackup(self._indexfile, for_offset=True)
2182 tr.addbackup(self._indexfile, for_offset=True)
2183 tr.add(self._datafile, 0)
2183 tr.add(self._datafile, 0)
2184
2184
2185 existing_handles = False
2185 existing_handles = False
2186 if self._writinghandles is not None:
2186 if self._writinghandles is not None:
2187 existing_handles = True
2187 existing_handles = True
2188 fp = self._writinghandles[0]
2188 fp = self._writinghandles[0]
2189 fp.flush()
2189 fp.flush()
2190 fp.close()
2190 fp.close()
2191 # We can't use the cached file handle after close(). So prevent
2191 # We can't use the cached file handle after close(). So prevent
2192 # its usage.
2192 # its usage.
2193 self._writinghandles = None
2193 self._writinghandles = None
2194 self._segmentfile.writing_handle = None
2194 self._segmentfile.writing_handle = None
2195 # No need to deal with sidedata writing handle as it is only
2195 # No need to deal with sidedata writing handle as it is only
2196 # relevant with revlog-v2 which is never inline, not reaching
2196 # relevant with revlog-v2 which is never inline, not reaching
2197 # this code
2197 # this code
2198 if side_write:
2198 if side_write:
2199 old_index_file_path = self._indexfile
2199 old_index_file_path = self._indexfile
2200 new_index_file_path = self._split_index_file
2200 new_index_file_path = self._split_index_file
2201 opener = self.opener
2201 opener = self.opener
2202 weak_self = weakref.ref(self)
2202 weak_self = weakref.ref(self)
2203
2203
2204 # the "split" index replace the real index when the transaction is finalized
2204 # the "split" index replace the real index when the transaction is finalized
2205 def finalize_callback(tr):
2205 def finalize_callback(tr):
2206 opener.rename(
2206 opener.rename(
2207 new_index_file_path,
2207 new_index_file_path,
2208 old_index_file_path,
2208 old_index_file_path,
2209 checkambig=True,
2209 checkambig=True,
2210 )
2210 )
2211 maybe_self = weak_self()
2211 maybe_self = weak_self()
2212 if maybe_self is not None:
2212 if maybe_self is not None:
2213 maybe_self._indexfile = old_index_file_path
2213 maybe_self._indexfile = old_index_file_path
2214
2214
2215 def abort_callback(tr):
2215 def abort_callback(tr):
2216 maybe_self = weak_self()
2216 maybe_self = weak_self()
2217 if maybe_self is not None:
2217 if maybe_self is not None:
2218 maybe_self._indexfile = old_index_file_path
2218 maybe_self._indexfile = old_index_file_path
2219
2219
2220 tr.registertmp(new_index_file_path)
2220 tr.registertmp(new_index_file_path)
2221 if self.target[1] is not None:
2221 if self.target[1] is not None:
2222 callback_id = b'000-revlog-split-%d-%s' % self.target
2222 callback_id = b'000-revlog-split-%d-%s' % self.target
2223 else:
2223 else:
2224 callback_id = b'000-revlog-split-%d' % self.target[0]
2224 callback_id = b'000-revlog-split-%d' % self.target[0]
2225 tr.addfinalize(callback_id, finalize_callback)
2225 tr.addfinalize(callback_id, finalize_callback)
2226 tr.addabort(callback_id, abort_callback)
2226 tr.addabort(callback_id, abort_callback)
2227
2227
2228 new_dfh = self._datafp(b'w+')
2228 new_dfh = self._datafp(b'w+')
2229 new_dfh.truncate(0) # drop any potentially existing data
2229 new_dfh.truncate(0) # drop any potentially existing data
2230 try:
2230 try:
2231 with self._indexfp() as read_ifh:
2231 with self.reading():
2232 for r in self:
2232 for r in self:
2233 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2233 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2234 new_dfh.flush()
2234 new_dfh.flush()
2235
2235
2236 if side_write:
2236 if side_write:
2237 self._indexfile = new_index_file_path
2237 self._indexfile = new_index_file_path
2238 with self.__index_new_fp() as fp:
2238 with self.__index_new_fp() as fp:
2239 self._format_flags &= ~FLAG_INLINE_DATA
2239 self._format_flags &= ~FLAG_INLINE_DATA
2240 self._inline = False
2240 self._inline = False
2241 for i in self:
2241 for i in self:
2242 e = self.index.entry_binary(i)
2242 e = self.index.entry_binary(i)
2243 if i == 0 and self._docket is None:
2243 if i == 0 and self._docket is None:
2244 header = self._format_flags | self._format_version
2244 header = self._format_flags | self._format_version
2245 header = self.index.pack_header(header)
2245 header = self.index.pack_header(header)
2246 e = header + e
2246 e = header + e
2247 fp.write(e)
2247 fp.write(e)
2248 if self._docket is not None:
2248 if self._docket is not None:
2249 self._docket.index_end = fp.tell()
2249 self._docket.index_end = fp.tell()
2250
2250
2251 # If we don't use side-write, the temp file replace the real
2251 # If we don't use side-write, the temp file replace the real
2252 # index when we exit the context manager
2252 # index when we exit the context manager
2253
2253
2254 nodemaputil.setup_persistent_nodemap(tr, self)
2254 nodemaputil.setup_persistent_nodemap(tr, self)
2255 self._segmentfile = randomaccessfile.randomaccessfile(
2255 self._segmentfile = randomaccessfile.randomaccessfile(
2256 self.opener,
2256 self.opener,
2257 self._datafile,
2257 self._datafile,
2258 self._chunkcachesize,
2258 self._chunkcachesize,
2259 )
2259 )
2260
2260
2261 if existing_handles:
2261 if existing_handles:
2262 # switched from inline to conventional reopen the index
2262 # switched from inline to conventional reopen the index
2263 ifh = self.__index_write_fp()
2263 ifh = self.__index_write_fp()
2264 self._writinghandles = (ifh, new_dfh, None)
2264 self._writinghandles = (ifh, new_dfh, None)
2265 self._segmentfile.writing_handle = new_dfh
2265 self._segmentfile.writing_handle = new_dfh
2266 new_dfh = None
2266 new_dfh = None
2267 # No need to deal with sidedata writing handle as it is only
2267 # No need to deal with sidedata writing handle as it is only
2268 # relevant with revlog-v2 which is never inline, not reaching
2268 # relevant with revlog-v2 which is never inline, not reaching
2269 # this code
2269 # this code
2270 finally:
2270 finally:
2271 if new_dfh is not None:
2271 if new_dfh is not None:
2272 new_dfh.close()
2272 new_dfh.close()
2273
2273
2274 def _nodeduplicatecallback(self, transaction, node):
2274 def _nodeduplicatecallback(self, transaction, node):
2275 """called when trying to add a node already stored."""
2275 """called when trying to add a node already stored."""
2276
2276
2277 @contextlib.contextmanager
2277 @contextlib.contextmanager
2278 def reading(self):
2278 def reading(self):
2279 """Context manager that keeps data and sidedata files open for reading"""
2279 """Context manager that keeps data and sidedata files open for reading"""
2280 if len(self.index) == 0:
2280 if len(self.index) == 0:
2281 yield # nothing to be read
2281 yield # nothing to be read
2282 else:
2282 else:
2283 with self._segmentfile.reading():
2283 with self._segmentfile.reading():
2284 with self._segmentfile_sidedata.reading():
2284 with self._segmentfile_sidedata.reading():
2285 yield
2285 yield
2286
2286
2287 @contextlib.contextmanager
2287 @contextlib.contextmanager
2288 def _writing(self, transaction):
2288 def _writing(self, transaction):
2289 if self._trypending:
2289 if self._trypending:
2290 msg = b'try to write in a `trypending` revlog: %s'
2290 msg = b'try to write in a `trypending` revlog: %s'
2291 msg %= self.display_id
2291 msg %= self.display_id
2292 raise error.ProgrammingError(msg)
2292 raise error.ProgrammingError(msg)
2293 if self._writinghandles is not None:
2293 if self._writinghandles is not None:
2294 yield
2294 yield
2295 else:
2295 else:
2296 ifh = dfh = sdfh = None
2296 ifh = dfh = sdfh = None
2297 try:
2297 try:
2298 r = len(self)
2298 r = len(self)
2299 # opening the data file.
2299 # opening the data file.
2300 dsize = 0
2300 dsize = 0
2301 if r:
2301 if r:
2302 dsize = self.end(r - 1)
2302 dsize = self.end(r - 1)
2303 dfh = None
2303 dfh = None
2304 if not self._inline:
2304 if not self._inline:
2305 try:
2305 try:
2306 dfh = self._datafp(b"r+")
2306 dfh = self._datafp(b"r+")
2307 if self._docket is None:
2307 if self._docket is None:
2308 dfh.seek(0, os.SEEK_END)
2308 dfh.seek(0, os.SEEK_END)
2309 else:
2309 else:
2310 dfh.seek(self._docket.data_end, os.SEEK_SET)
2310 dfh.seek(self._docket.data_end, os.SEEK_SET)
2311 except FileNotFoundError:
2311 except FileNotFoundError:
2312 dfh = self._datafp(b"w+")
2312 dfh = self._datafp(b"w+")
2313 transaction.add(self._datafile, dsize)
2313 transaction.add(self._datafile, dsize)
2314 if self._sidedatafile is not None:
2314 if self._sidedatafile is not None:
2315 # revlog-v2 does not inline, help Pytype
2315 # revlog-v2 does not inline, help Pytype
2316 assert dfh is not None
2316 assert dfh is not None
2317 try:
2317 try:
2318 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2318 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2319 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2319 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2320 except FileNotFoundError:
2320 except FileNotFoundError:
2321 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2321 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2322 transaction.add(
2322 transaction.add(
2323 self._sidedatafile, self._docket.sidedata_end
2323 self._sidedatafile, self._docket.sidedata_end
2324 )
2324 )
2325
2325
2326 # opening the index file.
2326 # opening the index file.
2327 isize = r * self.index.entry_size
2327 isize = r * self.index.entry_size
2328 ifh = self.__index_write_fp()
2328 ifh = self.__index_write_fp()
2329 if self._inline:
2329 if self._inline:
2330 transaction.add(self._indexfile, dsize + isize)
2330 transaction.add(self._indexfile, dsize + isize)
2331 else:
2331 else:
2332 transaction.add(self._indexfile, isize)
2332 transaction.add(self._indexfile, isize)
2333 # exposing all file handle for writing.
2333 # exposing all file handle for writing.
2334 self._writinghandles = (ifh, dfh, sdfh)
2334 self._writinghandles = (ifh, dfh, sdfh)
2335 self._segmentfile.writing_handle = ifh if self._inline else dfh
2335 self._segmentfile.writing_handle = ifh if self._inline else dfh
2336 self._segmentfile_sidedata.writing_handle = sdfh
2336 self._segmentfile_sidedata.writing_handle = sdfh
2337 yield
2337 yield
2338 if self._docket is not None:
2338 if self._docket is not None:
2339 self._write_docket(transaction)
2339 self._write_docket(transaction)
2340 finally:
2340 finally:
2341 self._writinghandles = None
2341 self._writinghandles = None
2342 self._segmentfile.writing_handle = None
2342 self._segmentfile.writing_handle = None
2343 self._segmentfile_sidedata.writing_handle = None
2343 self._segmentfile_sidedata.writing_handle = None
2344 if dfh is not None:
2344 if dfh is not None:
2345 dfh.close()
2345 dfh.close()
2346 if sdfh is not None:
2346 if sdfh is not None:
2347 sdfh.close()
2347 sdfh.close()
2348 # closing the index file last to avoid exposing referent to
2348 # closing the index file last to avoid exposing referent to
2349 # potential unflushed data content.
2349 # potential unflushed data content.
2350 if ifh is not None:
2350 if ifh is not None:
2351 ifh.close()
2351 ifh.close()
2352
2352
2353 def _write_docket(self, transaction):
2353 def _write_docket(self, transaction):
2354 """write the current docket on disk
2354 """write the current docket on disk
2355
2355
2356 Exist as a method to help changelog to implement transaction logic
2356 Exist as a method to help changelog to implement transaction logic
2357
2357
2358 We could also imagine using the same transaction logic for all revlog
2358 We could also imagine using the same transaction logic for all revlog
2359 since docket are cheap."""
2359 since docket are cheap."""
2360 self._docket.write(transaction)
2360 self._docket.write(transaction)
2361
2361
2362 def addrevision(
2362 def addrevision(
2363 self,
2363 self,
2364 text,
2364 text,
2365 transaction,
2365 transaction,
2366 link,
2366 link,
2367 p1,
2367 p1,
2368 p2,
2368 p2,
2369 cachedelta=None,
2369 cachedelta=None,
2370 node=None,
2370 node=None,
2371 flags=REVIDX_DEFAULT_FLAGS,
2371 flags=REVIDX_DEFAULT_FLAGS,
2372 deltacomputer=None,
2372 deltacomputer=None,
2373 sidedata=None,
2373 sidedata=None,
2374 ):
2374 ):
2375 """add a revision to the log
2375 """add a revision to the log
2376
2376
2377 text - the revision data to add
2377 text - the revision data to add
2378 transaction - the transaction object used for rollback
2378 transaction - the transaction object used for rollback
2379 link - the linkrev data to add
2379 link - the linkrev data to add
2380 p1, p2 - the parent nodeids of the revision
2380 p1, p2 - the parent nodeids of the revision
2381 cachedelta - an optional precomputed delta
2381 cachedelta - an optional precomputed delta
2382 node - nodeid of revision; typically node is not specified, and it is
2382 node - nodeid of revision; typically node is not specified, and it is
2383 computed by default as hash(text, p1, p2), however subclasses might
2383 computed by default as hash(text, p1, p2), however subclasses might
2384 use different hashing method (and override checkhash() in such case)
2384 use different hashing method (and override checkhash() in such case)
2385 flags - the known flags to set on the revision
2385 flags - the known flags to set on the revision
2386 deltacomputer - an optional deltacomputer instance shared between
2386 deltacomputer - an optional deltacomputer instance shared between
2387 multiple calls
2387 multiple calls
2388 """
2388 """
2389 if link == nullrev:
2389 if link == nullrev:
2390 raise error.RevlogError(
2390 raise error.RevlogError(
2391 _(b"attempted to add linkrev -1 to %s") % self.display_id
2391 _(b"attempted to add linkrev -1 to %s") % self.display_id
2392 )
2392 )
2393
2393
2394 if sidedata is None:
2394 if sidedata is None:
2395 sidedata = {}
2395 sidedata = {}
2396 elif sidedata and not self.hassidedata:
2396 elif sidedata and not self.hassidedata:
2397 raise error.ProgrammingError(
2397 raise error.ProgrammingError(
2398 _(b"trying to add sidedata to a revlog who don't support them")
2398 _(b"trying to add sidedata to a revlog who don't support them")
2399 )
2399 )
2400
2400
2401 if flags:
2401 if flags:
2402 node = node or self.hash(text, p1, p2)
2402 node = node or self.hash(text, p1, p2)
2403
2403
2404 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2404 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2405
2405
2406 # If the flag processor modifies the revision data, ignore any provided
2406 # If the flag processor modifies the revision data, ignore any provided
2407 # cachedelta.
2407 # cachedelta.
2408 if rawtext != text:
2408 if rawtext != text:
2409 cachedelta = None
2409 cachedelta = None
2410
2410
2411 if len(rawtext) > _maxentrysize:
2411 if len(rawtext) > _maxentrysize:
2412 raise error.RevlogError(
2412 raise error.RevlogError(
2413 _(
2413 _(
2414 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2414 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2415 )
2415 )
2416 % (self.display_id, len(rawtext))
2416 % (self.display_id, len(rawtext))
2417 )
2417 )
2418
2418
2419 node = node or self.hash(rawtext, p1, p2)
2419 node = node or self.hash(rawtext, p1, p2)
2420 rev = self.index.get_rev(node)
2420 rev = self.index.get_rev(node)
2421 if rev is not None:
2421 if rev is not None:
2422 return rev
2422 return rev
2423
2423
2424 if validatehash:
2424 if validatehash:
2425 self.checkhash(rawtext, node, p1=p1, p2=p2)
2425 self.checkhash(rawtext, node, p1=p1, p2=p2)
2426
2426
2427 return self.addrawrevision(
2427 return self.addrawrevision(
2428 rawtext,
2428 rawtext,
2429 transaction,
2429 transaction,
2430 link,
2430 link,
2431 p1,
2431 p1,
2432 p2,
2432 p2,
2433 node,
2433 node,
2434 flags,
2434 flags,
2435 cachedelta=cachedelta,
2435 cachedelta=cachedelta,
2436 deltacomputer=deltacomputer,
2436 deltacomputer=deltacomputer,
2437 sidedata=sidedata,
2437 sidedata=sidedata,
2438 )
2438 )
2439
2439
2440 def addrawrevision(
2440 def addrawrevision(
2441 self,
2441 self,
2442 rawtext,
2442 rawtext,
2443 transaction,
2443 transaction,
2444 link,
2444 link,
2445 p1,
2445 p1,
2446 p2,
2446 p2,
2447 node,
2447 node,
2448 flags,
2448 flags,
2449 cachedelta=None,
2449 cachedelta=None,
2450 deltacomputer=None,
2450 deltacomputer=None,
2451 sidedata=None,
2451 sidedata=None,
2452 ):
2452 ):
2453 """add a raw revision with known flags, node and parents
2453 """add a raw revision with known flags, node and parents
2454 useful when reusing a revision not stored in this revlog (ex: received
2454 useful when reusing a revision not stored in this revlog (ex: received
2455 over wire, or read from an external bundle).
2455 over wire, or read from an external bundle).
2456 """
2456 """
2457 with self._writing(transaction):
2457 with self._writing(transaction):
2458 return self._addrevision(
2458 return self._addrevision(
2459 node,
2459 node,
2460 rawtext,
2460 rawtext,
2461 transaction,
2461 transaction,
2462 link,
2462 link,
2463 p1,
2463 p1,
2464 p2,
2464 p2,
2465 flags,
2465 flags,
2466 cachedelta,
2466 cachedelta,
2467 deltacomputer=deltacomputer,
2467 deltacomputer=deltacomputer,
2468 sidedata=sidedata,
2468 sidedata=sidedata,
2469 )
2469 )
2470
2470
2471 def compress(self, data):
2471 def compress(self, data):
2472 """Generate a possibly-compressed representation of data."""
2472 """Generate a possibly-compressed representation of data."""
2473 if not data:
2473 if not data:
2474 return b'', data
2474 return b'', data
2475
2475
2476 compressed = self._compressor.compress(data)
2476 compressed = self._compressor.compress(data)
2477
2477
2478 if compressed:
2478 if compressed:
2479 # The revlog compressor added the header in the returned data.
2479 # The revlog compressor added the header in the returned data.
2480 return b'', compressed
2480 return b'', compressed
2481
2481
2482 if data[0:1] == b'\0':
2482 if data[0:1] == b'\0':
2483 return b'', data
2483 return b'', data
2484 return b'u', data
2484 return b'u', data
2485
2485
2486 def decompress(self, data):
2486 def decompress(self, data):
2487 """Decompress a revlog chunk.
2487 """Decompress a revlog chunk.
2488
2488
2489 The chunk is expected to begin with a header identifying the
2489 The chunk is expected to begin with a header identifying the
2490 format type so it can be routed to an appropriate decompressor.
2490 format type so it can be routed to an appropriate decompressor.
2491 """
2491 """
2492 if not data:
2492 if not data:
2493 return data
2493 return data
2494
2494
2495 # Revlogs are read much more frequently than they are written and many
2495 # Revlogs are read much more frequently than they are written and many
2496 # chunks only take microseconds to decompress, so performance is
2496 # chunks only take microseconds to decompress, so performance is
2497 # important here.
2497 # important here.
2498 #
2498 #
2499 # We can make a few assumptions about revlogs:
2499 # We can make a few assumptions about revlogs:
2500 #
2500 #
2501 # 1) the majority of chunks will be compressed (as opposed to inline
2501 # 1) the majority of chunks will be compressed (as opposed to inline
2502 # raw data).
2502 # raw data).
2503 # 2) decompressing *any* data will likely by at least 10x slower than
2503 # 2) decompressing *any* data will likely by at least 10x slower than
2504 # returning raw inline data.
2504 # returning raw inline data.
2505 # 3) we want to prioritize common and officially supported compression
2505 # 3) we want to prioritize common and officially supported compression
2506 # engines
2506 # engines
2507 #
2507 #
2508 # It follows that we want to optimize for "decompress compressed data
2508 # It follows that we want to optimize for "decompress compressed data
2509 # when encoded with common and officially supported compression engines"
2509 # when encoded with common and officially supported compression engines"
2510 # case over "raw data" and "data encoded by less common or non-official
2510 # case over "raw data" and "data encoded by less common or non-official
2511 # compression engines." That is why we have the inline lookup first
2511 # compression engines." That is why we have the inline lookup first
2512 # followed by the compengines lookup.
2512 # followed by the compengines lookup.
2513 #
2513 #
2514 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2514 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2515 # compressed chunks. And this matters for changelog and manifest reads.
2515 # compressed chunks. And this matters for changelog and manifest reads.
2516 t = data[0:1]
2516 t = data[0:1]
2517
2517
2518 if t == b'x':
2518 if t == b'x':
2519 try:
2519 try:
2520 return _zlibdecompress(data)
2520 return _zlibdecompress(data)
2521 except zlib.error as e:
2521 except zlib.error as e:
2522 raise error.RevlogError(
2522 raise error.RevlogError(
2523 _(b'revlog decompress error: %s')
2523 _(b'revlog decompress error: %s')
2524 % stringutil.forcebytestr(e)
2524 % stringutil.forcebytestr(e)
2525 )
2525 )
2526 # '\0' is more common than 'u' so it goes first.
2526 # '\0' is more common than 'u' so it goes first.
2527 elif t == b'\0':
2527 elif t == b'\0':
2528 return data
2528 return data
2529 elif t == b'u':
2529 elif t == b'u':
2530 return util.buffer(data, 1)
2530 return util.buffer(data, 1)
2531
2531
2532 compressor = self._get_decompressor(t)
2532 compressor = self._get_decompressor(t)
2533
2533
2534 return compressor.decompress(data)
2534 return compressor.decompress(data)
2535
2535
2536 def _addrevision(
2536 def _addrevision(
2537 self,
2537 self,
2538 node,
2538 node,
2539 rawtext,
2539 rawtext,
2540 transaction,
2540 transaction,
2541 link,
2541 link,
2542 p1,
2542 p1,
2543 p2,
2543 p2,
2544 flags,
2544 flags,
2545 cachedelta,
2545 cachedelta,
2546 alwayscache=False,
2546 alwayscache=False,
2547 deltacomputer=None,
2547 deltacomputer=None,
2548 sidedata=None,
2548 sidedata=None,
2549 ):
2549 ):
2550 """internal function to add revisions to the log
2550 """internal function to add revisions to the log
2551
2551
2552 see addrevision for argument descriptions.
2552 see addrevision for argument descriptions.
2553
2553
2554 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2554 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2555
2555
2556 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2556 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2557 be used.
2557 be used.
2558
2558
2559 invariants:
2559 invariants:
2560 - rawtext is optional (can be None); if not set, cachedelta must be set.
2560 - rawtext is optional (can be None); if not set, cachedelta must be set.
2561 if both are set, they must correspond to each other.
2561 if both are set, they must correspond to each other.
2562 """
2562 """
2563 if node == self.nullid:
2563 if node == self.nullid:
2564 raise error.RevlogError(
2564 raise error.RevlogError(
2565 _(b"%s: attempt to add null revision") % self.display_id
2565 _(b"%s: attempt to add null revision") % self.display_id
2566 )
2566 )
2567 if (
2567 if (
2568 node == self.nodeconstants.wdirid
2568 node == self.nodeconstants.wdirid
2569 or node in self.nodeconstants.wdirfilenodeids
2569 or node in self.nodeconstants.wdirfilenodeids
2570 ):
2570 ):
2571 raise error.RevlogError(
2571 raise error.RevlogError(
2572 _(b"%s: attempt to add wdir revision") % self.display_id
2572 _(b"%s: attempt to add wdir revision") % self.display_id
2573 )
2573 )
2574 if self._writinghandles is None:
2574 if self._writinghandles is None:
2575 msg = b'adding revision outside `revlog._writing` context'
2575 msg = b'adding revision outside `revlog._writing` context'
2576 raise error.ProgrammingError(msg)
2576 raise error.ProgrammingError(msg)
2577
2577
2578 btext = [rawtext]
2578 btext = [rawtext]
2579
2579
2580 curr = len(self)
2580 curr = len(self)
2581 prev = curr - 1
2581 prev = curr - 1
2582
2582
2583 offset = self._get_data_offset(prev)
2583 offset = self._get_data_offset(prev)
2584
2584
2585 if self._concurrencychecker:
2585 if self._concurrencychecker:
2586 ifh, dfh, sdfh = self._writinghandles
2586 ifh, dfh, sdfh = self._writinghandles
2587 # XXX no checking for the sidedata file
2587 # XXX no checking for the sidedata file
2588 if self._inline:
2588 if self._inline:
2589 # offset is "as if" it were in the .d file, so we need to add on
2589 # offset is "as if" it were in the .d file, so we need to add on
2590 # the size of the entry metadata.
2590 # the size of the entry metadata.
2591 self._concurrencychecker(
2591 self._concurrencychecker(
2592 ifh, self._indexfile, offset + curr * self.index.entry_size
2592 ifh, self._indexfile, offset + curr * self.index.entry_size
2593 )
2593 )
2594 else:
2594 else:
2595 # Entries in the .i are a consistent size.
2595 # Entries in the .i are a consistent size.
2596 self._concurrencychecker(
2596 self._concurrencychecker(
2597 ifh, self._indexfile, curr * self.index.entry_size
2597 ifh, self._indexfile, curr * self.index.entry_size
2598 )
2598 )
2599 self._concurrencychecker(dfh, self._datafile, offset)
2599 self._concurrencychecker(dfh, self._datafile, offset)
2600
2600
2601 p1r, p2r = self.rev(p1), self.rev(p2)
2601 p1r, p2r = self.rev(p1), self.rev(p2)
2602
2602
2603 # full versions are inserted when the needed deltas
2603 # full versions are inserted when the needed deltas
2604 # become comparable to the uncompressed text
2604 # become comparable to the uncompressed text
2605 if rawtext is None:
2605 if rawtext is None:
2606 # need rawtext size, before changed by flag processors, which is
2606 # need rawtext size, before changed by flag processors, which is
2607 # the non-raw size. use revlog explicitly to avoid filelog's extra
2607 # the non-raw size. use revlog explicitly to avoid filelog's extra
2608 # logic that might remove metadata size.
2608 # logic that might remove metadata size.
2609 textlen = mdiff.patchedsize(
2609 textlen = mdiff.patchedsize(
2610 revlog.size(self, cachedelta[0]), cachedelta[1]
2610 revlog.size(self, cachedelta[0]), cachedelta[1]
2611 )
2611 )
2612 else:
2612 else:
2613 textlen = len(rawtext)
2613 textlen = len(rawtext)
2614
2614
2615 if deltacomputer is None:
2615 if deltacomputer is None:
2616 write_debug = None
2616 write_debug = None
2617 if self._debug_delta:
2617 if self._debug_delta:
2618 write_debug = transaction._report
2618 write_debug = transaction._report
2619 deltacomputer = deltautil.deltacomputer(
2619 deltacomputer = deltautil.deltacomputer(
2620 self, write_debug=write_debug
2620 self, write_debug=write_debug
2621 )
2621 )
2622
2622
2623 if cachedelta is not None and len(cachedelta) == 2:
2623 if cachedelta is not None and len(cachedelta) == 2:
2624 # If the cached delta has no information about how it should be
2624 # If the cached delta has no information about how it should be
2625 # reused, add the default reuse instruction according to the
2625 # reused, add the default reuse instruction according to the
2626 # revlog's configuration.
2626 # revlog's configuration.
2627 if self._generaldelta and self._lazydeltabase:
2627 if self._generaldelta and self._lazydeltabase:
2628 delta_base_reuse = DELTA_BASE_REUSE_TRY
2628 delta_base_reuse = DELTA_BASE_REUSE_TRY
2629 else:
2629 else:
2630 delta_base_reuse = DELTA_BASE_REUSE_NO
2630 delta_base_reuse = DELTA_BASE_REUSE_NO
2631 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2631 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2632
2632
2633 revinfo = revlogutils.revisioninfo(
2633 revinfo = revlogutils.revisioninfo(
2634 node,
2634 node,
2635 p1,
2635 p1,
2636 p2,
2636 p2,
2637 btext,
2637 btext,
2638 textlen,
2638 textlen,
2639 cachedelta,
2639 cachedelta,
2640 flags,
2640 flags,
2641 )
2641 )
2642
2642
2643 deltainfo = deltacomputer.finddeltainfo(revinfo)
2643 deltainfo = deltacomputer.finddeltainfo(revinfo)
2644
2644
2645 compression_mode = COMP_MODE_INLINE
2645 compression_mode = COMP_MODE_INLINE
2646 if self._docket is not None:
2646 if self._docket is not None:
2647 default_comp = self._docket.default_compression_header
2647 default_comp = self._docket.default_compression_header
2648 r = deltautil.delta_compression(default_comp, deltainfo)
2648 r = deltautil.delta_compression(default_comp, deltainfo)
2649 compression_mode, deltainfo = r
2649 compression_mode, deltainfo = r
2650
2650
2651 sidedata_compression_mode = COMP_MODE_INLINE
2651 sidedata_compression_mode = COMP_MODE_INLINE
2652 if sidedata and self.hassidedata:
2652 if sidedata and self.hassidedata:
2653 sidedata_compression_mode = COMP_MODE_PLAIN
2653 sidedata_compression_mode = COMP_MODE_PLAIN
2654 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2654 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2655 sidedata_offset = self._docket.sidedata_end
2655 sidedata_offset = self._docket.sidedata_end
2656 h, comp_sidedata = self.compress(serialized_sidedata)
2656 h, comp_sidedata = self.compress(serialized_sidedata)
2657 if (
2657 if (
2658 h != b'u'
2658 h != b'u'
2659 and comp_sidedata[0:1] != b'\0'
2659 and comp_sidedata[0:1] != b'\0'
2660 and len(comp_sidedata) < len(serialized_sidedata)
2660 and len(comp_sidedata) < len(serialized_sidedata)
2661 ):
2661 ):
2662 assert not h
2662 assert not h
2663 if (
2663 if (
2664 comp_sidedata[0:1]
2664 comp_sidedata[0:1]
2665 == self._docket.default_compression_header
2665 == self._docket.default_compression_header
2666 ):
2666 ):
2667 sidedata_compression_mode = COMP_MODE_DEFAULT
2667 sidedata_compression_mode = COMP_MODE_DEFAULT
2668 serialized_sidedata = comp_sidedata
2668 serialized_sidedata = comp_sidedata
2669 else:
2669 else:
2670 sidedata_compression_mode = COMP_MODE_INLINE
2670 sidedata_compression_mode = COMP_MODE_INLINE
2671 serialized_sidedata = comp_sidedata
2671 serialized_sidedata = comp_sidedata
2672 else:
2672 else:
2673 serialized_sidedata = b""
2673 serialized_sidedata = b""
2674 # Don't store the offset if the sidedata is empty, that way
2674 # Don't store the offset if the sidedata is empty, that way
2675 # we can easily detect empty sidedata and they will be no different
2675 # we can easily detect empty sidedata and they will be no different
2676 # than ones we manually add.
2676 # than ones we manually add.
2677 sidedata_offset = 0
2677 sidedata_offset = 0
2678
2678
2679 rank = RANK_UNKNOWN
2679 rank = RANK_UNKNOWN
2680 if self._compute_rank:
2680 if self._compute_rank:
2681 if (p1r, p2r) == (nullrev, nullrev):
2681 if (p1r, p2r) == (nullrev, nullrev):
2682 rank = 1
2682 rank = 1
2683 elif p1r != nullrev and p2r == nullrev:
2683 elif p1r != nullrev and p2r == nullrev:
2684 rank = 1 + self.fast_rank(p1r)
2684 rank = 1 + self.fast_rank(p1r)
2685 elif p1r == nullrev and p2r != nullrev:
2685 elif p1r == nullrev and p2r != nullrev:
2686 rank = 1 + self.fast_rank(p2r)
2686 rank = 1 + self.fast_rank(p2r)
2687 else: # merge node
2687 else: # merge node
2688 if rustdagop is not None and self.index.rust_ext_compat:
2688 if rustdagop is not None and self.index.rust_ext_compat:
2689 rank = rustdagop.rank(self.index, p1r, p2r)
2689 rank = rustdagop.rank(self.index, p1r, p2r)
2690 else:
2690 else:
2691 pmin, pmax = sorted((p1r, p2r))
2691 pmin, pmax = sorted((p1r, p2r))
2692 rank = 1 + self.fast_rank(pmax)
2692 rank = 1 + self.fast_rank(pmax)
2693 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2693 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2694
2694
2695 e = revlogutils.entry(
2695 e = revlogutils.entry(
2696 flags=flags,
2696 flags=flags,
2697 data_offset=offset,
2697 data_offset=offset,
2698 data_compressed_length=deltainfo.deltalen,
2698 data_compressed_length=deltainfo.deltalen,
2699 data_uncompressed_length=textlen,
2699 data_uncompressed_length=textlen,
2700 data_compression_mode=compression_mode,
2700 data_compression_mode=compression_mode,
2701 data_delta_base=deltainfo.base,
2701 data_delta_base=deltainfo.base,
2702 link_rev=link,
2702 link_rev=link,
2703 parent_rev_1=p1r,
2703 parent_rev_1=p1r,
2704 parent_rev_2=p2r,
2704 parent_rev_2=p2r,
2705 node_id=node,
2705 node_id=node,
2706 sidedata_offset=sidedata_offset,
2706 sidedata_offset=sidedata_offset,
2707 sidedata_compressed_length=len(serialized_sidedata),
2707 sidedata_compressed_length=len(serialized_sidedata),
2708 sidedata_compression_mode=sidedata_compression_mode,
2708 sidedata_compression_mode=sidedata_compression_mode,
2709 rank=rank,
2709 rank=rank,
2710 )
2710 )
2711
2711
2712 self.index.append(e)
2712 self.index.append(e)
2713 entry = self.index.entry_binary(curr)
2713 entry = self.index.entry_binary(curr)
2714 if curr == 0 and self._docket is None:
2714 if curr == 0 and self._docket is None:
2715 header = self._format_flags | self._format_version
2715 header = self._format_flags | self._format_version
2716 header = self.index.pack_header(header)
2716 header = self.index.pack_header(header)
2717 entry = header + entry
2717 entry = header + entry
2718 self._writeentry(
2718 self._writeentry(
2719 transaction,
2719 transaction,
2720 entry,
2720 entry,
2721 deltainfo.data,
2721 deltainfo.data,
2722 link,
2722 link,
2723 offset,
2723 offset,
2724 serialized_sidedata,
2724 serialized_sidedata,
2725 sidedata_offset,
2725 sidedata_offset,
2726 )
2726 )
2727
2727
2728 rawtext = btext[0]
2728 rawtext = btext[0]
2729
2729
2730 if alwayscache and rawtext is None:
2730 if alwayscache and rawtext is None:
2731 rawtext = deltacomputer.buildtext(revinfo)
2731 rawtext = deltacomputer.buildtext(revinfo)
2732
2732
2733 if type(rawtext) == bytes: # only accept immutable objects
2733 if type(rawtext) == bytes: # only accept immutable objects
2734 self._revisioncache = (node, curr, rawtext)
2734 self._revisioncache = (node, curr, rawtext)
2735 self._chainbasecache[curr] = deltainfo.chainbase
2735 self._chainbasecache[curr] = deltainfo.chainbase
2736 return curr
2736 return curr
2737
2737
2738 def _get_data_offset(self, prev):
2738 def _get_data_offset(self, prev):
2739 """Returns the current offset in the (in-transaction) data file.
2739 """Returns the current offset in the (in-transaction) data file.
2740 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2740 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2741 file to store that information: since sidedata can be rewritten to the
2741 file to store that information: since sidedata can be rewritten to the
2742 end of the data file within a transaction, you can have cases where, for
2742 end of the data file within a transaction, you can have cases where, for
2743 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2743 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2744 to `n - 1`'s sidedata being written after `n`'s data.
2744 to `n - 1`'s sidedata being written after `n`'s data.
2745
2745
2746 TODO cache this in a docket file before getting out of experimental."""
2746 TODO cache this in a docket file before getting out of experimental."""
2747 if self._docket is None:
2747 if self._docket is None:
2748 return self.end(prev)
2748 return self.end(prev)
2749 else:
2749 else:
2750 return self._docket.data_end
2750 return self._docket.data_end
2751
2751
2752 def _writeentry(
2752 def _writeentry(
2753 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2753 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2754 ):
2754 ):
2755 # Files opened in a+ mode have inconsistent behavior on various
2755 # Files opened in a+ mode have inconsistent behavior on various
2756 # platforms. Windows requires that a file positioning call be made
2756 # platforms. Windows requires that a file positioning call be made
2757 # when the file handle transitions between reads and writes. See
2757 # when the file handle transitions between reads and writes. See
2758 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2758 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2759 # platforms, Python or the platform itself can be buggy. Some versions
2759 # platforms, Python or the platform itself can be buggy. Some versions
2760 # of Solaris have been observed to not append at the end of the file
2760 # of Solaris have been observed to not append at the end of the file
2761 # if the file was seeked to before the end. See issue4943 for more.
2761 # if the file was seeked to before the end. See issue4943 for more.
2762 #
2762 #
2763 # We work around this issue by inserting a seek() before writing.
2763 # We work around this issue by inserting a seek() before writing.
2764 # Note: This is likely not necessary on Python 3. However, because
2764 # Note: This is likely not necessary on Python 3. However, because
2765 # the file handle is reused for reads and may be seeked there, we need
2765 # the file handle is reused for reads and may be seeked there, we need
2766 # to be careful before changing this.
2766 # to be careful before changing this.
2767 if self._writinghandles is None:
2767 if self._writinghandles is None:
2768 msg = b'adding revision outside `revlog._writing` context'
2768 msg = b'adding revision outside `revlog._writing` context'
2769 raise error.ProgrammingError(msg)
2769 raise error.ProgrammingError(msg)
2770 ifh, dfh, sdfh = self._writinghandles
2770 ifh, dfh, sdfh = self._writinghandles
2771 if self._docket is None:
2771 if self._docket is None:
2772 ifh.seek(0, os.SEEK_END)
2772 ifh.seek(0, os.SEEK_END)
2773 else:
2773 else:
2774 ifh.seek(self._docket.index_end, os.SEEK_SET)
2774 ifh.seek(self._docket.index_end, os.SEEK_SET)
2775 if dfh:
2775 if dfh:
2776 if self._docket is None:
2776 if self._docket is None:
2777 dfh.seek(0, os.SEEK_END)
2777 dfh.seek(0, os.SEEK_END)
2778 else:
2778 else:
2779 dfh.seek(self._docket.data_end, os.SEEK_SET)
2779 dfh.seek(self._docket.data_end, os.SEEK_SET)
2780 if sdfh:
2780 if sdfh:
2781 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2781 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2782
2782
2783 curr = len(self) - 1
2783 curr = len(self) - 1
2784 if not self._inline:
2784 if not self._inline:
2785 transaction.add(self._datafile, offset)
2785 transaction.add(self._datafile, offset)
2786 if self._sidedatafile:
2786 if self._sidedatafile:
2787 transaction.add(self._sidedatafile, sidedata_offset)
2787 transaction.add(self._sidedatafile, sidedata_offset)
2788 transaction.add(self._indexfile, curr * len(entry))
2788 transaction.add(self._indexfile, curr * len(entry))
2789 if data[0]:
2789 if data[0]:
2790 dfh.write(data[0])
2790 dfh.write(data[0])
2791 dfh.write(data[1])
2791 dfh.write(data[1])
2792 if sidedata:
2792 if sidedata:
2793 sdfh.write(sidedata)
2793 sdfh.write(sidedata)
2794 ifh.write(entry)
2794 ifh.write(entry)
2795 else:
2795 else:
2796 offset += curr * self.index.entry_size
2796 offset += curr * self.index.entry_size
2797 transaction.add(self._indexfile, offset)
2797 transaction.add(self._indexfile, offset)
2798 ifh.write(entry)
2798 ifh.write(entry)
2799 ifh.write(data[0])
2799 ifh.write(data[0])
2800 ifh.write(data[1])
2800 ifh.write(data[1])
2801 assert not sidedata
2801 assert not sidedata
2802 self._enforceinlinesize(transaction)
2802 self._enforceinlinesize(transaction)
2803 if self._docket is not None:
2803 if self._docket is not None:
2804 # revlog-v2 always has 3 writing handles, help Pytype
2804 # revlog-v2 always has 3 writing handles, help Pytype
2805 wh1 = self._writinghandles[0]
2805 wh1 = self._writinghandles[0]
2806 wh2 = self._writinghandles[1]
2806 wh2 = self._writinghandles[1]
2807 wh3 = self._writinghandles[2]
2807 wh3 = self._writinghandles[2]
2808 assert wh1 is not None
2808 assert wh1 is not None
2809 assert wh2 is not None
2809 assert wh2 is not None
2810 assert wh3 is not None
2810 assert wh3 is not None
2811 self._docket.index_end = wh1.tell()
2811 self._docket.index_end = wh1.tell()
2812 self._docket.data_end = wh2.tell()
2812 self._docket.data_end = wh2.tell()
2813 self._docket.sidedata_end = wh3.tell()
2813 self._docket.sidedata_end = wh3.tell()
2814
2814
2815 nodemaputil.setup_persistent_nodemap(transaction, self)
2815 nodemaputil.setup_persistent_nodemap(transaction, self)
2816
2816
2817 def addgroup(
2817 def addgroup(
2818 self,
2818 self,
2819 deltas,
2819 deltas,
2820 linkmapper,
2820 linkmapper,
2821 transaction,
2821 transaction,
2822 alwayscache=False,
2822 alwayscache=False,
2823 addrevisioncb=None,
2823 addrevisioncb=None,
2824 duplicaterevisioncb=None,
2824 duplicaterevisioncb=None,
2825 debug_info=None,
2825 debug_info=None,
2826 delta_base_reuse_policy=None,
2826 delta_base_reuse_policy=None,
2827 ):
2827 ):
2828 """
2828 """
2829 add a delta group
2829 add a delta group
2830
2830
2831 given a set of deltas, add them to the revision log. the
2831 given a set of deltas, add them to the revision log. the
2832 first delta is against its parent, which should be in our
2832 first delta is against its parent, which should be in our
2833 log, the rest are against the previous delta.
2833 log, the rest are against the previous delta.
2834
2834
2835 If ``addrevisioncb`` is defined, it will be called with arguments of
2835 If ``addrevisioncb`` is defined, it will be called with arguments of
2836 this revlog and the node that was added.
2836 this revlog and the node that was added.
2837 """
2837 """
2838
2838
2839 if self._adding_group:
2839 if self._adding_group:
2840 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2840 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2841
2841
2842 # read the default delta-base reuse policy from revlog config if the
2842 # read the default delta-base reuse policy from revlog config if the
2843 # group did not specify one.
2843 # group did not specify one.
2844 if delta_base_reuse_policy is None:
2844 if delta_base_reuse_policy is None:
2845 if self._generaldelta and self._lazydeltabase:
2845 if self._generaldelta and self._lazydeltabase:
2846 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2846 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2847 else:
2847 else:
2848 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2848 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2849
2849
2850 self._adding_group = True
2850 self._adding_group = True
2851 empty = True
2851 empty = True
2852 try:
2852 try:
2853 with self._writing(transaction):
2853 with self._writing(transaction):
2854 write_debug = None
2854 write_debug = None
2855 if self._debug_delta:
2855 if self._debug_delta:
2856 write_debug = transaction._report
2856 write_debug = transaction._report
2857 deltacomputer = deltautil.deltacomputer(
2857 deltacomputer = deltautil.deltacomputer(
2858 self,
2858 self,
2859 write_debug=write_debug,
2859 write_debug=write_debug,
2860 debug_info=debug_info,
2860 debug_info=debug_info,
2861 )
2861 )
2862 # loop through our set of deltas
2862 # loop through our set of deltas
2863 for data in deltas:
2863 for data in deltas:
2864 (
2864 (
2865 node,
2865 node,
2866 p1,
2866 p1,
2867 p2,
2867 p2,
2868 linknode,
2868 linknode,
2869 deltabase,
2869 deltabase,
2870 delta,
2870 delta,
2871 flags,
2871 flags,
2872 sidedata,
2872 sidedata,
2873 ) = data
2873 ) = data
2874 link = linkmapper(linknode)
2874 link = linkmapper(linknode)
2875 flags = flags or REVIDX_DEFAULT_FLAGS
2875 flags = flags or REVIDX_DEFAULT_FLAGS
2876
2876
2877 rev = self.index.get_rev(node)
2877 rev = self.index.get_rev(node)
2878 if rev is not None:
2878 if rev is not None:
2879 # this can happen if two branches make the same change
2879 # this can happen if two branches make the same change
2880 self._nodeduplicatecallback(transaction, rev)
2880 self._nodeduplicatecallback(transaction, rev)
2881 if duplicaterevisioncb:
2881 if duplicaterevisioncb:
2882 duplicaterevisioncb(self, rev)
2882 duplicaterevisioncb(self, rev)
2883 empty = False
2883 empty = False
2884 continue
2884 continue
2885
2885
2886 for p in (p1, p2):
2886 for p in (p1, p2):
2887 if not self.index.has_node(p):
2887 if not self.index.has_node(p):
2888 raise error.LookupError(
2888 raise error.LookupError(
2889 p, self.radix, _(b'unknown parent')
2889 p, self.radix, _(b'unknown parent')
2890 )
2890 )
2891
2891
2892 if not self.index.has_node(deltabase):
2892 if not self.index.has_node(deltabase):
2893 raise error.LookupError(
2893 raise error.LookupError(
2894 deltabase, self.display_id, _(b'unknown delta base')
2894 deltabase, self.display_id, _(b'unknown delta base')
2895 )
2895 )
2896
2896
2897 baserev = self.rev(deltabase)
2897 baserev = self.rev(deltabase)
2898
2898
2899 if baserev != nullrev and self.iscensored(baserev):
2899 if baserev != nullrev and self.iscensored(baserev):
2900 # if base is censored, delta must be full replacement in a
2900 # if base is censored, delta must be full replacement in a
2901 # single patch operation
2901 # single patch operation
2902 hlen = struct.calcsize(b">lll")
2902 hlen = struct.calcsize(b">lll")
2903 oldlen = self.rawsize(baserev)
2903 oldlen = self.rawsize(baserev)
2904 newlen = len(delta) - hlen
2904 newlen = len(delta) - hlen
2905 if delta[:hlen] != mdiff.replacediffheader(
2905 if delta[:hlen] != mdiff.replacediffheader(
2906 oldlen, newlen
2906 oldlen, newlen
2907 ):
2907 ):
2908 raise error.CensoredBaseError(
2908 raise error.CensoredBaseError(
2909 self.display_id, self.node(baserev)
2909 self.display_id, self.node(baserev)
2910 )
2910 )
2911
2911
2912 if not flags and self._peek_iscensored(baserev, delta):
2912 if not flags and self._peek_iscensored(baserev, delta):
2913 flags |= REVIDX_ISCENSORED
2913 flags |= REVIDX_ISCENSORED
2914
2914
2915 # We assume consumers of addrevisioncb will want to retrieve
2915 # We assume consumers of addrevisioncb will want to retrieve
2916 # the added revision, which will require a call to
2916 # the added revision, which will require a call to
2917 # revision(). revision() will fast path if there is a cache
2917 # revision(). revision() will fast path if there is a cache
2918 # hit. So, we tell _addrevision() to always cache in this case.
2918 # hit. So, we tell _addrevision() to always cache in this case.
2919 # We're only using addgroup() in the context of changegroup
2919 # We're only using addgroup() in the context of changegroup
2920 # generation so the revision data can always be handled as raw
2920 # generation so the revision data can always be handled as raw
2921 # by the flagprocessor.
2921 # by the flagprocessor.
2922 rev = self._addrevision(
2922 rev = self._addrevision(
2923 node,
2923 node,
2924 None,
2924 None,
2925 transaction,
2925 transaction,
2926 link,
2926 link,
2927 p1,
2927 p1,
2928 p2,
2928 p2,
2929 flags,
2929 flags,
2930 (baserev, delta, delta_base_reuse_policy),
2930 (baserev, delta, delta_base_reuse_policy),
2931 alwayscache=alwayscache,
2931 alwayscache=alwayscache,
2932 deltacomputer=deltacomputer,
2932 deltacomputer=deltacomputer,
2933 sidedata=sidedata,
2933 sidedata=sidedata,
2934 )
2934 )
2935
2935
2936 if addrevisioncb:
2936 if addrevisioncb:
2937 addrevisioncb(self, rev)
2937 addrevisioncb(self, rev)
2938 empty = False
2938 empty = False
2939 finally:
2939 finally:
2940 self._adding_group = False
2940 self._adding_group = False
2941 return not empty
2941 return not empty
2942
2942
2943 def iscensored(self, rev):
2943 def iscensored(self, rev):
2944 """Check if a file revision is censored."""
2944 """Check if a file revision is censored."""
2945 if not self._censorable:
2945 if not self._censorable:
2946 return False
2946 return False
2947
2947
2948 return self.flags(rev) & REVIDX_ISCENSORED
2948 return self.flags(rev) & REVIDX_ISCENSORED
2949
2949
2950 def _peek_iscensored(self, baserev, delta):
2950 def _peek_iscensored(self, baserev, delta):
2951 """Quickly check if a delta produces a censored revision."""
2951 """Quickly check if a delta produces a censored revision."""
2952 if not self._censorable:
2952 if not self._censorable:
2953 return False
2953 return False
2954
2954
2955 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2955 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2956
2956
2957 def getstrippoint(self, minlink):
2957 def getstrippoint(self, minlink):
2958 """find the minimum rev that must be stripped to strip the linkrev
2958 """find the minimum rev that must be stripped to strip the linkrev
2959
2959
2960 Returns a tuple containing the minimum rev and a set of all revs that
2960 Returns a tuple containing the minimum rev and a set of all revs that
2961 have linkrevs that will be broken by this strip.
2961 have linkrevs that will be broken by this strip.
2962 """
2962 """
2963 return storageutil.resolvestripinfo(
2963 return storageutil.resolvestripinfo(
2964 minlink,
2964 minlink,
2965 len(self) - 1,
2965 len(self) - 1,
2966 self.headrevs(),
2966 self.headrevs(),
2967 self.linkrev,
2967 self.linkrev,
2968 self.parentrevs,
2968 self.parentrevs,
2969 )
2969 )
2970
2970
2971 def strip(self, minlink, transaction):
2971 def strip(self, minlink, transaction):
2972 """truncate the revlog on the first revision with a linkrev >= minlink
2972 """truncate the revlog on the first revision with a linkrev >= minlink
2973
2973
2974 This function is called when we're stripping revision minlink and
2974 This function is called when we're stripping revision minlink and
2975 its descendants from the repository.
2975 its descendants from the repository.
2976
2976
2977 We have to remove all revisions with linkrev >= minlink, because
2977 We have to remove all revisions with linkrev >= minlink, because
2978 the equivalent changelog revisions will be renumbered after the
2978 the equivalent changelog revisions will be renumbered after the
2979 strip.
2979 strip.
2980
2980
2981 So we truncate the revlog on the first of these revisions, and
2981 So we truncate the revlog on the first of these revisions, and
2982 trust that the caller has saved the revisions that shouldn't be
2982 trust that the caller has saved the revisions that shouldn't be
2983 removed and that it'll re-add them after this truncation.
2983 removed and that it'll re-add them after this truncation.
2984 """
2984 """
2985 if len(self) == 0:
2985 if len(self) == 0:
2986 return
2986 return
2987
2987
2988 rev, _ = self.getstrippoint(minlink)
2988 rev, _ = self.getstrippoint(minlink)
2989 if rev == len(self):
2989 if rev == len(self):
2990 return
2990 return
2991
2991
2992 # first truncate the files on disk
2992 # first truncate the files on disk
2993 data_end = self.start(rev)
2993 data_end = self.start(rev)
2994 if not self._inline:
2994 if not self._inline:
2995 transaction.add(self._datafile, data_end)
2995 transaction.add(self._datafile, data_end)
2996 end = rev * self.index.entry_size
2996 end = rev * self.index.entry_size
2997 else:
2997 else:
2998 end = data_end + (rev * self.index.entry_size)
2998 end = data_end + (rev * self.index.entry_size)
2999
2999
3000 if self._sidedatafile:
3000 if self._sidedatafile:
3001 sidedata_end = self.sidedata_cut_off(rev)
3001 sidedata_end = self.sidedata_cut_off(rev)
3002 transaction.add(self._sidedatafile, sidedata_end)
3002 transaction.add(self._sidedatafile, sidedata_end)
3003
3003
3004 transaction.add(self._indexfile, end)
3004 transaction.add(self._indexfile, end)
3005 if self._docket is not None:
3005 if self._docket is not None:
3006 # XXX we could, leverage the docket while stripping. However it is
3006 # XXX we could, leverage the docket while stripping. However it is
3007 # not powerfull enough at the time of this comment
3007 # not powerfull enough at the time of this comment
3008 self._docket.index_end = end
3008 self._docket.index_end = end
3009 self._docket.data_end = data_end
3009 self._docket.data_end = data_end
3010 self._docket.sidedata_end = sidedata_end
3010 self._docket.sidedata_end = sidedata_end
3011 self._docket.write(transaction, stripping=True)
3011 self._docket.write(transaction, stripping=True)
3012
3012
3013 # then reset internal state in memory to forget those revisions
3013 # then reset internal state in memory to forget those revisions
3014 self._revisioncache = None
3014 self._revisioncache = None
3015 self._chaininfocache = util.lrucachedict(500)
3015 self._chaininfocache = util.lrucachedict(500)
3016 self._segmentfile.clear_cache()
3016 self._segmentfile.clear_cache()
3017 self._segmentfile_sidedata.clear_cache()
3017 self._segmentfile_sidedata.clear_cache()
3018
3018
3019 del self.index[rev:-1]
3019 del self.index[rev:-1]
3020
3020
3021 def checksize(self):
3021 def checksize(self):
3022 """Check size of index and data files
3022 """Check size of index and data files
3023
3023
3024 return a (dd, di) tuple.
3024 return a (dd, di) tuple.
3025 - dd: extra bytes for the "data" file
3025 - dd: extra bytes for the "data" file
3026 - di: extra bytes for the "index" file
3026 - di: extra bytes for the "index" file
3027
3027
3028 A healthy revlog will return (0, 0).
3028 A healthy revlog will return (0, 0).
3029 """
3029 """
3030 expected = 0
3030 expected = 0
3031 if len(self):
3031 if len(self):
3032 expected = max(0, self.end(len(self) - 1))
3032 expected = max(0, self.end(len(self) - 1))
3033
3033
3034 try:
3034 try:
3035 with self._datafp() as f:
3035 with self._datafp() as f:
3036 f.seek(0, io.SEEK_END)
3036 f.seek(0, io.SEEK_END)
3037 actual = f.tell()
3037 actual = f.tell()
3038 dd = actual - expected
3038 dd = actual - expected
3039 except FileNotFoundError:
3039 except FileNotFoundError:
3040 dd = 0
3040 dd = 0
3041
3041
3042 try:
3042 try:
3043 f = self.opener(self._indexfile)
3043 f = self.opener(self._indexfile)
3044 f.seek(0, io.SEEK_END)
3044 f.seek(0, io.SEEK_END)
3045 actual = f.tell()
3045 actual = f.tell()
3046 f.close()
3046 f.close()
3047 s = self.index.entry_size
3047 s = self.index.entry_size
3048 i = max(0, actual // s)
3048 i = max(0, actual // s)
3049 di = actual - (i * s)
3049 di = actual - (i * s)
3050 if self._inline:
3050 if self._inline:
3051 databytes = 0
3051 databytes = 0
3052 for r in self:
3052 for r in self:
3053 databytes += max(0, self.length(r))
3053 databytes += max(0, self.length(r))
3054 dd = 0
3054 dd = 0
3055 di = actual - len(self) * s - databytes
3055 di = actual - len(self) * s - databytes
3056 except FileNotFoundError:
3056 except FileNotFoundError:
3057 di = 0
3057 di = 0
3058
3058
3059 return (dd, di)
3059 return (dd, di)
3060
3060
3061 def files(self):
3061 def files(self):
3062 res = [self._indexfile]
3062 res = [self._indexfile]
3063 if self._docket_file is None:
3063 if self._docket_file is None:
3064 if not self._inline:
3064 if not self._inline:
3065 res.append(self._datafile)
3065 res.append(self._datafile)
3066 else:
3066 else:
3067 res.append(self._docket_file)
3067 res.append(self._docket_file)
3068 res.extend(self._docket.old_index_filepaths(include_empty=False))
3068 res.extend(self._docket.old_index_filepaths(include_empty=False))
3069 if self._docket.data_end:
3069 if self._docket.data_end:
3070 res.append(self._datafile)
3070 res.append(self._datafile)
3071 res.extend(self._docket.old_data_filepaths(include_empty=False))
3071 res.extend(self._docket.old_data_filepaths(include_empty=False))
3072 if self._docket.sidedata_end:
3072 if self._docket.sidedata_end:
3073 res.append(self._sidedatafile)
3073 res.append(self._sidedatafile)
3074 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3074 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3075 return res
3075 return res
3076
3076
3077 def emitrevisions(
3077 def emitrevisions(
3078 self,
3078 self,
3079 nodes,
3079 nodes,
3080 nodesorder=None,
3080 nodesorder=None,
3081 revisiondata=False,
3081 revisiondata=False,
3082 assumehaveparentrevisions=False,
3082 assumehaveparentrevisions=False,
3083 deltamode=repository.CG_DELTAMODE_STD,
3083 deltamode=repository.CG_DELTAMODE_STD,
3084 sidedata_helpers=None,
3084 sidedata_helpers=None,
3085 debug_info=None,
3085 debug_info=None,
3086 ):
3086 ):
3087 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3087 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3088 raise error.ProgrammingError(
3088 raise error.ProgrammingError(
3089 b'unhandled value for nodesorder: %s' % nodesorder
3089 b'unhandled value for nodesorder: %s' % nodesorder
3090 )
3090 )
3091
3091
3092 if nodesorder is None and not self._generaldelta:
3092 if nodesorder is None and not self._generaldelta:
3093 nodesorder = b'storage'
3093 nodesorder = b'storage'
3094
3094
3095 if (
3095 if (
3096 not self._storedeltachains
3096 not self._storedeltachains
3097 and deltamode != repository.CG_DELTAMODE_PREV
3097 and deltamode != repository.CG_DELTAMODE_PREV
3098 ):
3098 ):
3099 deltamode = repository.CG_DELTAMODE_FULL
3099 deltamode = repository.CG_DELTAMODE_FULL
3100
3100
3101 return storageutil.emitrevisions(
3101 return storageutil.emitrevisions(
3102 self,
3102 self,
3103 nodes,
3103 nodes,
3104 nodesorder,
3104 nodesorder,
3105 revlogrevisiondelta,
3105 revlogrevisiondelta,
3106 deltaparentfn=self.deltaparent,
3106 deltaparentfn=self.deltaparent,
3107 candeltafn=self._candelta,
3107 candeltafn=self._candelta,
3108 rawsizefn=self.rawsize,
3108 rawsizefn=self.rawsize,
3109 revdifffn=self.revdiff,
3109 revdifffn=self.revdiff,
3110 flagsfn=self.flags,
3110 flagsfn=self.flags,
3111 deltamode=deltamode,
3111 deltamode=deltamode,
3112 revisiondata=revisiondata,
3112 revisiondata=revisiondata,
3113 assumehaveparentrevisions=assumehaveparentrevisions,
3113 assumehaveparentrevisions=assumehaveparentrevisions,
3114 sidedata_helpers=sidedata_helpers,
3114 sidedata_helpers=sidedata_helpers,
3115 debug_info=debug_info,
3115 debug_info=debug_info,
3116 )
3116 )
3117
3117
3118 DELTAREUSEALWAYS = b'always'
3118 DELTAREUSEALWAYS = b'always'
3119 DELTAREUSESAMEREVS = b'samerevs'
3119 DELTAREUSESAMEREVS = b'samerevs'
3120 DELTAREUSENEVER = b'never'
3120 DELTAREUSENEVER = b'never'
3121
3121
3122 DELTAREUSEFULLADD = b'fulladd'
3122 DELTAREUSEFULLADD = b'fulladd'
3123
3123
3124 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3124 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3125
3125
3126 def clone(
3126 def clone(
3127 self,
3127 self,
3128 tr,
3128 tr,
3129 destrevlog,
3129 destrevlog,
3130 addrevisioncb=None,
3130 addrevisioncb=None,
3131 deltareuse=DELTAREUSESAMEREVS,
3131 deltareuse=DELTAREUSESAMEREVS,
3132 forcedeltabothparents=None,
3132 forcedeltabothparents=None,
3133 sidedata_helpers=None,
3133 sidedata_helpers=None,
3134 ):
3134 ):
3135 """Copy this revlog to another, possibly with format changes.
3135 """Copy this revlog to another, possibly with format changes.
3136
3136
3137 The destination revlog will contain the same revisions and nodes.
3137 The destination revlog will contain the same revisions and nodes.
3138 However, it may not be bit-for-bit identical due to e.g. delta encoding
3138 However, it may not be bit-for-bit identical due to e.g. delta encoding
3139 differences.
3139 differences.
3140
3140
3141 The ``deltareuse`` argument control how deltas from the existing revlog
3141 The ``deltareuse`` argument control how deltas from the existing revlog
3142 are preserved in the destination revlog. The argument can have the
3142 are preserved in the destination revlog. The argument can have the
3143 following values:
3143 following values:
3144
3144
3145 DELTAREUSEALWAYS
3145 DELTAREUSEALWAYS
3146 Deltas will always be reused (if possible), even if the destination
3146 Deltas will always be reused (if possible), even if the destination
3147 revlog would not select the same revisions for the delta. This is the
3147 revlog would not select the same revisions for the delta. This is the
3148 fastest mode of operation.
3148 fastest mode of operation.
3149 DELTAREUSESAMEREVS
3149 DELTAREUSESAMEREVS
3150 Deltas will be reused if the destination revlog would pick the same
3150 Deltas will be reused if the destination revlog would pick the same
3151 revisions for the delta. This mode strikes a balance between speed
3151 revisions for the delta. This mode strikes a balance between speed
3152 and optimization.
3152 and optimization.
3153 DELTAREUSENEVER
3153 DELTAREUSENEVER
3154 Deltas will never be reused. This is the slowest mode of execution.
3154 Deltas will never be reused. This is the slowest mode of execution.
3155 This mode can be used to recompute deltas (e.g. if the diff/delta
3155 This mode can be used to recompute deltas (e.g. if the diff/delta
3156 algorithm changes).
3156 algorithm changes).
3157 DELTAREUSEFULLADD
3157 DELTAREUSEFULLADD
3158 Revision will be re-added as if their were new content. This is
3158 Revision will be re-added as if their were new content. This is
3159 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3159 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3160 eg: large file detection and handling.
3160 eg: large file detection and handling.
3161
3161
3162 Delta computation can be slow, so the choice of delta reuse policy can
3162 Delta computation can be slow, so the choice of delta reuse policy can
3163 significantly affect run time.
3163 significantly affect run time.
3164
3164
3165 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3165 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3166 two extremes. Deltas will be reused if they are appropriate. But if the
3166 two extremes. Deltas will be reused if they are appropriate. But if the
3167 delta could choose a better revision, it will do so. This means if you
3167 delta could choose a better revision, it will do so. This means if you
3168 are converting a non-generaldelta revlog to a generaldelta revlog,
3168 are converting a non-generaldelta revlog to a generaldelta revlog,
3169 deltas will be recomputed if the delta's parent isn't a parent of the
3169 deltas will be recomputed if the delta's parent isn't a parent of the
3170 revision.
3170 revision.
3171
3171
3172 In addition to the delta policy, the ``forcedeltabothparents``
3172 In addition to the delta policy, the ``forcedeltabothparents``
3173 argument controls whether to force compute deltas against both parents
3173 argument controls whether to force compute deltas against both parents
3174 for merges. By default, the current default is used.
3174 for merges. By default, the current default is used.
3175
3175
3176 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3176 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3177 `sidedata_helpers`.
3177 `sidedata_helpers`.
3178 """
3178 """
3179 if deltareuse not in self.DELTAREUSEALL:
3179 if deltareuse not in self.DELTAREUSEALL:
3180 raise ValueError(
3180 raise ValueError(
3181 _(b'value for deltareuse invalid: %s') % deltareuse
3181 _(b'value for deltareuse invalid: %s') % deltareuse
3182 )
3182 )
3183
3183
3184 if len(destrevlog):
3184 if len(destrevlog):
3185 raise ValueError(_(b'destination revlog is not empty'))
3185 raise ValueError(_(b'destination revlog is not empty'))
3186
3186
3187 if getattr(self, 'filteredrevs', None):
3187 if getattr(self, 'filteredrevs', None):
3188 raise ValueError(_(b'source revlog has filtered revisions'))
3188 raise ValueError(_(b'source revlog has filtered revisions'))
3189 if getattr(destrevlog, 'filteredrevs', None):
3189 if getattr(destrevlog, 'filteredrevs', None):
3190 raise ValueError(_(b'destination revlog has filtered revisions'))
3190 raise ValueError(_(b'destination revlog has filtered revisions'))
3191
3191
3192 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3192 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3193 # if possible.
3193 # if possible.
3194 oldlazydelta = destrevlog._lazydelta
3194 oldlazydelta = destrevlog._lazydelta
3195 oldlazydeltabase = destrevlog._lazydeltabase
3195 oldlazydeltabase = destrevlog._lazydeltabase
3196 oldamd = destrevlog._deltabothparents
3196 oldamd = destrevlog._deltabothparents
3197
3197
3198 try:
3198 try:
3199 if deltareuse == self.DELTAREUSEALWAYS:
3199 if deltareuse == self.DELTAREUSEALWAYS:
3200 destrevlog._lazydeltabase = True
3200 destrevlog._lazydeltabase = True
3201 destrevlog._lazydelta = True
3201 destrevlog._lazydelta = True
3202 elif deltareuse == self.DELTAREUSESAMEREVS:
3202 elif deltareuse == self.DELTAREUSESAMEREVS:
3203 destrevlog._lazydeltabase = False
3203 destrevlog._lazydeltabase = False
3204 destrevlog._lazydelta = True
3204 destrevlog._lazydelta = True
3205 elif deltareuse == self.DELTAREUSENEVER:
3205 elif deltareuse == self.DELTAREUSENEVER:
3206 destrevlog._lazydeltabase = False
3206 destrevlog._lazydeltabase = False
3207 destrevlog._lazydelta = False
3207 destrevlog._lazydelta = False
3208
3208
3209 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3209 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3210
3210
3211 with self.reading():
3211 with self.reading():
3212 self._clone(
3212 self._clone(
3213 tr,
3213 tr,
3214 destrevlog,
3214 destrevlog,
3215 addrevisioncb,
3215 addrevisioncb,
3216 deltareuse,
3216 deltareuse,
3217 forcedeltabothparents,
3217 forcedeltabothparents,
3218 sidedata_helpers,
3218 sidedata_helpers,
3219 )
3219 )
3220
3220
3221 finally:
3221 finally:
3222 destrevlog._lazydelta = oldlazydelta
3222 destrevlog._lazydelta = oldlazydelta
3223 destrevlog._lazydeltabase = oldlazydeltabase
3223 destrevlog._lazydeltabase = oldlazydeltabase
3224 destrevlog._deltabothparents = oldamd
3224 destrevlog._deltabothparents = oldamd
3225
3225
3226 def _clone(
3226 def _clone(
3227 self,
3227 self,
3228 tr,
3228 tr,
3229 destrevlog,
3229 destrevlog,
3230 addrevisioncb,
3230 addrevisioncb,
3231 deltareuse,
3231 deltareuse,
3232 forcedeltabothparents,
3232 forcedeltabothparents,
3233 sidedata_helpers,
3233 sidedata_helpers,
3234 ):
3234 ):
3235 """perform the core duty of `revlog.clone` after parameter processing"""
3235 """perform the core duty of `revlog.clone` after parameter processing"""
3236 write_debug = None
3236 write_debug = None
3237 if self._debug_delta:
3237 if self._debug_delta:
3238 write_debug = tr._report
3238 write_debug = tr._report
3239 deltacomputer = deltautil.deltacomputer(
3239 deltacomputer = deltautil.deltacomputer(
3240 destrevlog,
3240 destrevlog,
3241 write_debug=write_debug,
3241 write_debug=write_debug,
3242 )
3242 )
3243 index = self.index
3243 index = self.index
3244 for rev in self:
3244 for rev in self:
3245 entry = index[rev]
3245 entry = index[rev]
3246
3246
3247 # Some classes override linkrev to take filtered revs into
3247 # Some classes override linkrev to take filtered revs into
3248 # account. Use raw entry from index.
3248 # account. Use raw entry from index.
3249 flags = entry[0] & 0xFFFF
3249 flags = entry[0] & 0xFFFF
3250 linkrev = entry[4]
3250 linkrev = entry[4]
3251 p1 = index[entry[5]][7]
3251 p1 = index[entry[5]][7]
3252 p2 = index[entry[6]][7]
3252 p2 = index[entry[6]][7]
3253 node = entry[7]
3253 node = entry[7]
3254
3254
3255 # (Possibly) reuse the delta from the revlog if allowed and
3255 # (Possibly) reuse the delta from the revlog if allowed and
3256 # the revlog chunk is a delta.
3256 # the revlog chunk is a delta.
3257 cachedelta = None
3257 cachedelta = None
3258 rawtext = None
3258 rawtext = None
3259 if deltareuse == self.DELTAREUSEFULLADD:
3259 if deltareuse == self.DELTAREUSEFULLADD:
3260 text = self._revisiondata(rev)
3260 text = self._revisiondata(rev)
3261 sidedata = self.sidedata(rev)
3261 sidedata = self.sidedata(rev)
3262
3262
3263 if sidedata_helpers is not None:
3263 if sidedata_helpers is not None:
3264 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3264 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3265 self, sidedata_helpers, sidedata, rev
3265 self, sidedata_helpers, sidedata, rev
3266 )
3266 )
3267 flags = flags | new_flags[0] & ~new_flags[1]
3267 flags = flags | new_flags[0] & ~new_flags[1]
3268
3268
3269 destrevlog.addrevision(
3269 destrevlog.addrevision(
3270 text,
3270 text,
3271 tr,
3271 tr,
3272 linkrev,
3272 linkrev,
3273 p1,
3273 p1,
3274 p2,
3274 p2,
3275 cachedelta=cachedelta,
3275 cachedelta=cachedelta,
3276 node=node,
3276 node=node,
3277 flags=flags,
3277 flags=flags,
3278 deltacomputer=deltacomputer,
3278 deltacomputer=deltacomputer,
3279 sidedata=sidedata,
3279 sidedata=sidedata,
3280 )
3280 )
3281 else:
3281 else:
3282 if destrevlog._lazydelta:
3282 if destrevlog._lazydelta:
3283 dp = self.deltaparent(rev)
3283 dp = self.deltaparent(rev)
3284 if dp != nullrev:
3284 if dp != nullrev:
3285 cachedelta = (dp, bytes(self._chunk(rev)))
3285 cachedelta = (dp, bytes(self._chunk(rev)))
3286
3286
3287 sidedata = None
3287 sidedata = None
3288 if not cachedelta:
3288 if not cachedelta:
3289 rawtext = self._revisiondata(rev)
3289 rawtext = self._revisiondata(rev)
3290 sidedata = self.sidedata(rev)
3290 sidedata = self.sidedata(rev)
3291 if sidedata is None:
3291 if sidedata is None:
3292 sidedata = self.sidedata(rev)
3292 sidedata = self.sidedata(rev)
3293
3293
3294 if sidedata_helpers is not None:
3294 if sidedata_helpers is not None:
3295 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3295 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3296 self, sidedata_helpers, sidedata, rev
3296 self, sidedata_helpers, sidedata, rev
3297 )
3297 )
3298 flags = flags | new_flags[0] & ~new_flags[1]
3298 flags = flags | new_flags[0] & ~new_flags[1]
3299
3299
3300 with destrevlog._writing(tr):
3300 with destrevlog._writing(tr):
3301 destrevlog._addrevision(
3301 destrevlog._addrevision(
3302 node,
3302 node,
3303 rawtext,
3303 rawtext,
3304 tr,
3304 tr,
3305 linkrev,
3305 linkrev,
3306 p1,
3306 p1,
3307 p2,
3307 p2,
3308 flags,
3308 flags,
3309 cachedelta,
3309 cachedelta,
3310 deltacomputer=deltacomputer,
3310 deltacomputer=deltacomputer,
3311 sidedata=sidedata,
3311 sidedata=sidedata,
3312 )
3312 )
3313
3313
3314 if addrevisioncb:
3314 if addrevisioncb:
3315 addrevisioncb(self, rev, node)
3315 addrevisioncb(self, rev, node)
3316
3316
3317 def censorrevision(self, tr, censornode, tombstone=b''):
3317 def censorrevision(self, tr, censornode, tombstone=b''):
3318 if self._format_version == REVLOGV0:
3318 if self._format_version == REVLOGV0:
3319 raise error.RevlogError(
3319 raise error.RevlogError(
3320 _(b'cannot censor with version %d revlogs')
3320 _(b'cannot censor with version %d revlogs')
3321 % self._format_version
3321 % self._format_version
3322 )
3322 )
3323 elif self._format_version == REVLOGV1:
3323 elif self._format_version == REVLOGV1:
3324 rewrite.v1_censor(self, tr, censornode, tombstone)
3324 rewrite.v1_censor(self, tr, censornode, tombstone)
3325 else:
3325 else:
3326 rewrite.v2_censor(self, tr, censornode, tombstone)
3326 rewrite.v2_censor(self, tr, censornode, tombstone)
3327
3327
3328 def verifyintegrity(self, state):
3328 def verifyintegrity(self, state):
3329 """Verifies the integrity of the revlog.
3329 """Verifies the integrity of the revlog.
3330
3330
3331 Yields ``revlogproblem`` instances describing problems that are
3331 Yields ``revlogproblem`` instances describing problems that are
3332 found.
3332 found.
3333 """
3333 """
3334 dd, di = self.checksize()
3334 dd, di = self.checksize()
3335 if dd:
3335 if dd:
3336 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3336 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3337 if di:
3337 if di:
3338 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3338 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3339
3339
3340 version = self._format_version
3340 version = self._format_version
3341
3341
3342 # The verifier tells us what version revlog we should be.
3342 # The verifier tells us what version revlog we should be.
3343 if version != state[b'expectedversion']:
3343 if version != state[b'expectedversion']:
3344 yield revlogproblem(
3344 yield revlogproblem(
3345 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3345 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3346 % (self.display_id, version, state[b'expectedversion'])
3346 % (self.display_id, version, state[b'expectedversion'])
3347 )
3347 )
3348
3348
3349 state[b'skipread'] = set()
3349 state[b'skipread'] = set()
3350 state[b'safe_renamed'] = set()
3350 state[b'safe_renamed'] = set()
3351
3351
3352 for rev in self:
3352 for rev in self:
3353 node = self.node(rev)
3353 node = self.node(rev)
3354
3354
3355 # Verify contents. 4 cases to care about:
3355 # Verify contents. 4 cases to care about:
3356 #
3356 #
3357 # common: the most common case
3357 # common: the most common case
3358 # rename: with a rename
3358 # rename: with a rename
3359 # meta: file content starts with b'\1\n', the metadata
3359 # meta: file content starts with b'\1\n', the metadata
3360 # header defined in filelog.py, but without a rename
3360 # header defined in filelog.py, but without a rename
3361 # ext: content stored externally
3361 # ext: content stored externally
3362 #
3362 #
3363 # More formally, their differences are shown below:
3363 # More formally, their differences are shown below:
3364 #
3364 #
3365 # | common | rename | meta | ext
3365 # | common | rename | meta | ext
3366 # -------------------------------------------------------
3366 # -------------------------------------------------------
3367 # flags() | 0 | 0 | 0 | not 0
3367 # flags() | 0 | 0 | 0 | not 0
3368 # renamed() | False | True | False | ?
3368 # renamed() | False | True | False | ?
3369 # rawtext[0:2]=='\1\n'| False | True | True | ?
3369 # rawtext[0:2]=='\1\n'| False | True | True | ?
3370 #
3370 #
3371 # "rawtext" means the raw text stored in revlog data, which
3371 # "rawtext" means the raw text stored in revlog data, which
3372 # could be retrieved by "rawdata(rev)". "text"
3372 # could be retrieved by "rawdata(rev)". "text"
3373 # mentioned below is "revision(rev)".
3373 # mentioned below is "revision(rev)".
3374 #
3374 #
3375 # There are 3 different lengths stored physically:
3375 # There are 3 different lengths stored physically:
3376 # 1. L1: rawsize, stored in revlog index
3376 # 1. L1: rawsize, stored in revlog index
3377 # 2. L2: len(rawtext), stored in revlog data
3377 # 2. L2: len(rawtext), stored in revlog data
3378 # 3. L3: len(text), stored in revlog data if flags==0, or
3378 # 3. L3: len(text), stored in revlog data if flags==0, or
3379 # possibly somewhere else if flags!=0
3379 # possibly somewhere else if flags!=0
3380 #
3380 #
3381 # L1 should be equal to L2. L3 could be different from them.
3381 # L1 should be equal to L2. L3 could be different from them.
3382 # "text" may or may not affect commit hash depending on flag
3382 # "text" may or may not affect commit hash depending on flag
3383 # processors (see flagutil.addflagprocessor).
3383 # processors (see flagutil.addflagprocessor).
3384 #
3384 #
3385 # | common | rename | meta | ext
3385 # | common | rename | meta | ext
3386 # -------------------------------------------------
3386 # -------------------------------------------------
3387 # rawsize() | L1 | L1 | L1 | L1
3387 # rawsize() | L1 | L1 | L1 | L1
3388 # size() | L1 | L2-LM | L1(*) | L1 (?)
3388 # size() | L1 | L2-LM | L1(*) | L1 (?)
3389 # len(rawtext) | L2 | L2 | L2 | L2
3389 # len(rawtext) | L2 | L2 | L2 | L2
3390 # len(text) | L2 | L2 | L2 | L3
3390 # len(text) | L2 | L2 | L2 | L3
3391 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3391 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3392 #
3392 #
3393 # LM: length of metadata, depending on rawtext
3393 # LM: length of metadata, depending on rawtext
3394 # (*): not ideal, see comment in filelog.size
3394 # (*): not ideal, see comment in filelog.size
3395 # (?): could be "- len(meta)" if the resolved content has
3395 # (?): could be "- len(meta)" if the resolved content has
3396 # rename metadata
3396 # rename metadata
3397 #
3397 #
3398 # Checks needed to be done:
3398 # Checks needed to be done:
3399 # 1. length check: L1 == L2, in all cases.
3399 # 1. length check: L1 == L2, in all cases.
3400 # 2. hash check: depending on flag processor, we may need to
3400 # 2. hash check: depending on flag processor, we may need to
3401 # use either "text" (external), or "rawtext" (in revlog).
3401 # use either "text" (external), or "rawtext" (in revlog).
3402
3402
3403 try:
3403 try:
3404 skipflags = state.get(b'skipflags', 0)
3404 skipflags = state.get(b'skipflags', 0)
3405 if skipflags:
3405 if skipflags:
3406 skipflags &= self.flags(rev)
3406 skipflags &= self.flags(rev)
3407
3407
3408 _verify_revision(self, skipflags, state, node)
3408 _verify_revision(self, skipflags, state, node)
3409
3409
3410 l1 = self.rawsize(rev)
3410 l1 = self.rawsize(rev)
3411 l2 = len(self.rawdata(node))
3411 l2 = len(self.rawdata(node))
3412
3412
3413 if l1 != l2:
3413 if l1 != l2:
3414 yield revlogproblem(
3414 yield revlogproblem(
3415 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3415 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3416 node=node,
3416 node=node,
3417 )
3417 )
3418
3418
3419 except error.CensoredNodeError:
3419 except error.CensoredNodeError:
3420 if state[b'erroroncensored']:
3420 if state[b'erroroncensored']:
3421 yield revlogproblem(
3421 yield revlogproblem(
3422 error=_(b'censored file data'), node=node
3422 error=_(b'censored file data'), node=node
3423 )
3423 )
3424 state[b'skipread'].add(node)
3424 state[b'skipread'].add(node)
3425 except Exception as e:
3425 except Exception as e:
3426 yield revlogproblem(
3426 yield revlogproblem(
3427 error=_(b'unpacking %s: %s')
3427 error=_(b'unpacking %s: %s')
3428 % (short(node), stringutil.forcebytestr(e)),
3428 % (short(node), stringutil.forcebytestr(e)),
3429 node=node,
3429 node=node,
3430 )
3430 )
3431 state[b'skipread'].add(node)
3431 state[b'skipread'].add(node)
3432
3432
3433 def storageinfo(
3433 def storageinfo(
3434 self,
3434 self,
3435 exclusivefiles=False,
3435 exclusivefiles=False,
3436 sharedfiles=False,
3436 sharedfiles=False,
3437 revisionscount=False,
3437 revisionscount=False,
3438 trackedsize=False,
3438 trackedsize=False,
3439 storedsize=False,
3439 storedsize=False,
3440 ):
3440 ):
3441 d = {}
3441 d = {}
3442
3442
3443 if exclusivefiles:
3443 if exclusivefiles:
3444 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3444 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3445 if not self._inline:
3445 if not self._inline:
3446 d[b'exclusivefiles'].append((self.opener, self._datafile))
3446 d[b'exclusivefiles'].append((self.opener, self._datafile))
3447
3447
3448 if sharedfiles:
3448 if sharedfiles:
3449 d[b'sharedfiles'] = []
3449 d[b'sharedfiles'] = []
3450
3450
3451 if revisionscount:
3451 if revisionscount:
3452 d[b'revisionscount'] = len(self)
3452 d[b'revisionscount'] = len(self)
3453
3453
3454 if trackedsize:
3454 if trackedsize:
3455 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3455 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3456
3456
3457 if storedsize:
3457 if storedsize:
3458 d[b'storedsize'] = sum(
3458 d[b'storedsize'] = sum(
3459 self.opener.stat(path).st_size for path in self.files()
3459 self.opener.stat(path).st_size for path in self.files()
3460 )
3460 )
3461
3461
3462 return d
3462 return d
3463
3463
3464 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3464 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3465 if not self.hassidedata:
3465 if not self.hassidedata:
3466 return
3466 return
3467 # revlog formats with sidedata support does not support inline
3467 # revlog formats with sidedata support does not support inline
3468 assert not self._inline
3468 assert not self._inline
3469 if not helpers[1] and not helpers[2]:
3469 if not helpers[1] and not helpers[2]:
3470 # Nothing to generate or remove
3470 # Nothing to generate or remove
3471 return
3471 return
3472
3472
3473 new_entries = []
3473 new_entries = []
3474 # append the new sidedata
3474 # append the new sidedata
3475 with self._writing(transaction):
3475 with self._writing(transaction):
3476 ifh, dfh, sdfh = self._writinghandles
3476 ifh, dfh, sdfh = self._writinghandles
3477 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3477 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3478
3478
3479 current_offset = sdfh.tell()
3479 current_offset = sdfh.tell()
3480 for rev in range(startrev, endrev + 1):
3480 for rev in range(startrev, endrev + 1):
3481 entry = self.index[rev]
3481 entry = self.index[rev]
3482 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3482 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3483 store=self,
3483 store=self,
3484 sidedata_helpers=helpers,
3484 sidedata_helpers=helpers,
3485 sidedata={},
3485 sidedata={},
3486 rev=rev,
3486 rev=rev,
3487 )
3487 )
3488
3488
3489 serialized_sidedata = sidedatautil.serialize_sidedata(
3489 serialized_sidedata = sidedatautil.serialize_sidedata(
3490 new_sidedata
3490 new_sidedata
3491 )
3491 )
3492
3492
3493 sidedata_compression_mode = COMP_MODE_INLINE
3493 sidedata_compression_mode = COMP_MODE_INLINE
3494 if serialized_sidedata and self.hassidedata:
3494 if serialized_sidedata and self.hassidedata:
3495 sidedata_compression_mode = COMP_MODE_PLAIN
3495 sidedata_compression_mode = COMP_MODE_PLAIN
3496 h, comp_sidedata = self.compress(serialized_sidedata)
3496 h, comp_sidedata = self.compress(serialized_sidedata)
3497 if (
3497 if (
3498 h != b'u'
3498 h != b'u'
3499 and comp_sidedata[0] != b'\0'
3499 and comp_sidedata[0] != b'\0'
3500 and len(comp_sidedata) < len(serialized_sidedata)
3500 and len(comp_sidedata) < len(serialized_sidedata)
3501 ):
3501 ):
3502 assert not h
3502 assert not h
3503 if (
3503 if (
3504 comp_sidedata[0]
3504 comp_sidedata[0]
3505 == self._docket.default_compression_header
3505 == self._docket.default_compression_header
3506 ):
3506 ):
3507 sidedata_compression_mode = COMP_MODE_DEFAULT
3507 sidedata_compression_mode = COMP_MODE_DEFAULT
3508 serialized_sidedata = comp_sidedata
3508 serialized_sidedata = comp_sidedata
3509 else:
3509 else:
3510 sidedata_compression_mode = COMP_MODE_INLINE
3510 sidedata_compression_mode = COMP_MODE_INLINE
3511 serialized_sidedata = comp_sidedata
3511 serialized_sidedata = comp_sidedata
3512 if entry[8] != 0 or entry[9] != 0:
3512 if entry[8] != 0 or entry[9] != 0:
3513 # rewriting entries that already have sidedata is not
3513 # rewriting entries that already have sidedata is not
3514 # supported yet, because it introduces garbage data in the
3514 # supported yet, because it introduces garbage data in the
3515 # revlog.
3515 # revlog.
3516 msg = b"rewriting existing sidedata is not supported yet"
3516 msg = b"rewriting existing sidedata is not supported yet"
3517 raise error.Abort(msg)
3517 raise error.Abort(msg)
3518
3518
3519 # Apply (potential) flags to add and to remove after running
3519 # Apply (potential) flags to add and to remove after running
3520 # the sidedata helpers
3520 # the sidedata helpers
3521 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3521 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3522 entry_update = (
3522 entry_update = (
3523 current_offset,
3523 current_offset,
3524 len(serialized_sidedata),
3524 len(serialized_sidedata),
3525 new_offset_flags,
3525 new_offset_flags,
3526 sidedata_compression_mode,
3526 sidedata_compression_mode,
3527 )
3527 )
3528
3528
3529 # the sidedata computation might have move the file cursors around
3529 # the sidedata computation might have move the file cursors around
3530 sdfh.seek(current_offset, os.SEEK_SET)
3530 sdfh.seek(current_offset, os.SEEK_SET)
3531 sdfh.write(serialized_sidedata)
3531 sdfh.write(serialized_sidedata)
3532 new_entries.append(entry_update)
3532 new_entries.append(entry_update)
3533 current_offset += len(serialized_sidedata)
3533 current_offset += len(serialized_sidedata)
3534 self._docket.sidedata_end = sdfh.tell()
3534 self._docket.sidedata_end = sdfh.tell()
3535
3535
3536 # rewrite the new index entries
3536 # rewrite the new index entries
3537 ifh.seek(startrev * self.index.entry_size)
3537 ifh.seek(startrev * self.index.entry_size)
3538 for i, e in enumerate(new_entries):
3538 for i, e in enumerate(new_entries):
3539 rev = startrev + i
3539 rev = startrev + i
3540 self.index.replace_sidedata_info(rev, *e)
3540 self.index.replace_sidedata_info(rev, *e)
3541 packed = self.index.entry_binary(rev)
3541 packed = self.index.entry_binary(rev)
3542 if rev == 0 and self._docket is None:
3542 if rev == 0 and self._docket is None:
3543 header = self._format_flags | self._format_version
3543 header = self._format_flags | self._format_version
3544 header = self.index.pack_header(header)
3544 header = self.index.pack_header(header)
3545 packed = header + packed
3545 packed = header + packed
3546 ifh.write(packed)
3546 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now