##// END OF EJS Templates
revlog: make try block smaller...
Manuel Jacob -
r50142:3e5f1fb2 default
parent child Browse files
Show More
@@ -1,3342 +1,3343 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 ENTRY_RANK,
42 ENTRY_RANK,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 RANK_UNKNOWN,
48 RANK_UNKNOWN,
49 REVLOGV0,
49 REVLOGV0,
50 REVLOGV1,
50 REVLOGV1,
51 REVLOGV1_FLAGS,
51 REVLOGV1_FLAGS,
52 REVLOGV2,
52 REVLOGV2,
53 REVLOGV2_FLAGS,
53 REVLOGV2_FLAGS,
54 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FLAGS,
55 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_FORMAT,
56 REVLOG_DEFAULT_VERSION,
56 REVLOG_DEFAULT_VERSION,
57 SUPPORTED_FLAGS,
57 SUPPORTED_FLAGS,
58 )
58 )
59 from .revlogutils.flagutil import (
59 from .revlogutils.flagutil import (
60 REVIDX_DEFAULT_FLAGS,
60 REVIDX_DEFAULT_FLAGS,
61 REVIDX_ELLIPSIS,
61 REVIDX_ELLIPSIS,
62 REVIDX_EXTSTORED,
62 REVIDX_EXTSTORED,
63 REVIDX_FLAGS_ORDER,
63 REVIDX_FLAGS_ORDER,
64 REVIDX_HASCOPIESINFO,
64 REVIDX_HASCOPIESINFO,
65 REVIDX_ISCENSORED,
65 REVIDX_ISCENSORED,
66 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 REVIDX_RAWTEXT_CHANGING_FLAGS,
67 )
67 )
68 from .thirdparty import attr
68 from .thirdparty import attr
69 from . import (
69 from . import (
70 ancestor,
70 ancestor,
71 dagop,
71 dagop,
72 error,
72 error,
73 mdiff,
73 mdiff,
74 policy,
74 policy,
75 pycompat,
75 pycompat,
76 revlogutils,
76 revlogutils,
77 templatefilters,
77 templatefilters,
78 util,
78 util,
79 )
79 )
80 from .interfaces import (
80 from .interfaces import (
81 repository,
81 repository,
82 util as interfaceutil,
82 util as interfaceutil,
83 )
83 )
84 from .revlogutils import (
84 from .revlogutils import (
85 deltas as deltautil,
85 deltas as deltautil,
86 docket as docketutil,
86 docket as docketutil,
87 flagutil,
87 flagutil,
88 nodemap as nodemaputil,
88 nodemap as nodemaputil,
89 randomaccessfile,
89 randomaccessfile,
90 revlogv0,
90 revlogv0,
91 rewrite,
91 rewrite,
92 sidedata as sidedatautil,
92 sidedata as sidedatautil,
93 )
93 )
94 from .utils import (
94 from .utils import (
95 storageutil,
95 storageutil,
96 stringutil,
96 stringutil,
97 )
97 )
98
98
99 # blanked usage of all the name to prevent pyflakes constraints
99 # blanked usage of all the name to prevent pyflakes constraints
100 # We need these name available in the module for extensions.
100 # We need these name available in the module for extensions.
101
101
102 REVLOGV0
102 REVLOGV0
103 REVLOGV1
103 REVLOGV1
104 REVLOGV2
104 REVLOGV2
105 CHANGELOGV2
105 CHANGELOGV2
106 FLAG_INLINE_DATA
106 FLAG_INLINE_DATA
107 FLAG_GENERALDELTA
107 FLAG_GENERALDELTA
108 REVLOG_DEFAULT_FLAGS
108 REVLOG_DEFAULT_FLAGS
109 REVLOG_DEFAULT_FORMAT
109 REVLOG_DEFAULT_FORMAT
110 REVLOG_DEFAULT_VERSION
110 REVLOG_DEFAULT_VERSION
111 REVLOGV1_FLAGS
111 REVLOGV1_FLAGS
112 REVLOGV2_FLAGS
112 REVLOGV2_FLAGS
113 REVIDX_ISCENSORED
113 REVIDX_ISCENSORED
114 REVIDX_ELLIPSIS
114 REVIDX_ELLIPSIS
115 REVIDX_HASCOPIESINFO
115 REVIDX_HASCOPIESINFO
116 REVIDX_EXTSTORED
116 REVIDX_EXTSTORED
117 REVIDX_DEFAULT_FLAGS
117 REVIDX_DEFAULT_FLAGS
118 REVIDX_FLAGS_ORDER
118 REVIDX_FLAGS_ORDER
119 REVIDX_RAWTEXT_CHANGING_FLAGS
119 REVIDX_RAWTEXT_CHANGING_FLAGS
120
120
121 parsers = policy.importmod('parsers')
121 parsers = policy.importmod('parsers')
122 rustancestor = policy.importrust('ancestor')
122 rustancestor = policy.importrust('ancestor')
123 rustdagop = policy.importrust('dagop')
123 rustdagop = policy.importrust('dagop')
124 rustrevlog = policy.importrust('revlog')
124 rustrevlog = policy.importrust('revlog')
125
125
126 # Aliased for performance.
126 # Aliased for performance.
127 _zlibdecompress = zlib.decompress
127 _zlibdecompress = zlib.decompress
128
128
129 # max size of revlog with inline data
129 # max size of revlog with inline data
130 _maxinline = 131072
130 _maxinline = 131072
131
131
132 # Flag processors for REVIDX_ELLIPSIS.
132 # Flag processors for REVIDX_ELLIPSIS.
133 def ellipsisreadprocessor(rl, text):
133 def ellipsisreadprocessor(rl, text):
134 return text, False
134 return text, False
135
135
136
136
137 def ellipsiswriteprocessor(rl, text):
137 def ellipsiswriteprocessor(rl, text):
138 return text, False
138 return text, False
139
139
140
140
141 def ellipsisrawprocessor(rl, text):
141 def ellipsisrawprocessor(rl, text):
142 return False
142 return False
143
143
144
144
145 ellipsisprocessor = (
145 ellipsisprocessor = (
146 ellipsisreadprocessor,
146 ellipsisreadprocessor,
147 ellipsiswriteprocessor,
147 ellipsiswriteprocessor,
148 ellipsisrawprocessor,
148 ellipsisrawprocessor,
149 )
149 )
150
150
151
151
152 def _verify_revision(rl, skipflags, state, node):
152 def _verify_revision(rl, skipflags, state, node):
153 """Verify the integrity of the given revlog ``node`` while providing a hook
153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 point for extensions to influence the operation."""
154 point for extensions to influence the operation."""
155 if skipflags:
155 if skipflags:
156 state[b'skipread'].add(node)
156 state[b'skipread'].add(node)
157 else:
157 else:
158 # Side-effect: read content and verify hash.
158 # Side-effect: read content and verify hash.
159 rl.revision(node)
159 rl.revision(node)
160
160
161
161
162 # True if a fast implementation for persistent-nodemap is available
162 # True if a fast implementation for persistent-nodemap is available
163 #
163 #
164 # We also consider we have a "fast" implementation in "pure" python because
164 # We also consider we have a "fast" implementation in "pure" python because
165 # people using pure don't really have performance consideration (and a
165 # people using pure don't really have performance consideration (and a
166 # wheelbarrow of other slowness source)
166 # wheelbarrow of other slowness source)
167 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
167 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 parsers, 'BaseIndexObject'
168 parsers, 'BaseIndexObject'
169 )
169 )
170
170
171
171
172 @interfaceutil.implementer(repository.irevisiondelta)
172 @interfaceutil.implementer(repository.irevisiondelta)
173 @attr.s(slots=True)
173 @attr.s(slots=True)
174 class revlogrevisiondelta:
174 class revlogrevisiondelta:
175 node = attr.ib()
175 node = attr.ib()
176 p1node = attr.ib()
176 p1node = attr.ib()
177 p2node = attr.ib()
177 p2node = attr.ib()
178 basenode = attr.ib()
178 basenode = attr.ib()
179 flags = attr.ib()
179 flags = attr.ib()
180 baserevisionsize = attr.ib()
180 baserevisionsize = attr.ib()
181 revision = attr.ib()
181 revision = attr.ib()
182 delta = attr.ib()
182 delta = attr.ib()
183 sidedata = attr.ib()
183 sidedata = attr.ib()
184 protocol_flags = attr.ib()
184 protocol_flags = attr.ib()
185 linknode = attr.ib(default=None)
185 linknode = attr.ib(default=None)
186
186
187
187
188 @interfaceutil.implementer(repository.iverifyproblem)
188 @interfaceutil.implementer(repository.iverifyproblem)
189 @attr.s(frozen=True)
189 @attr.s(frozen=True)
190 class revlogproblem:
190 class revlogproblem:
191 warning = attr.ib(default=None)
191 warning = attr.ib(default=None)
192 error = attr.ib(default=None)
192 error = attr.ib(default=None)
193 node = attr.ib(default=None)
193 node = attr.ib(default=None)
194
194
195
195
196 def parse_index_v1(data, inline):
196 def parse_index_v1(data, inline):
197 # call the C implementation to parse the index data
197 # call the C implementation to parse the index data
198 index, cache = parsers.parse_index2(data, inline)
198 index, cache = parsers.parse_index2(data, inline)
199 return index, cache
199 return index, cache
200
200
201
201
202 def parse_index_v2(data, inline):
202 def parse_index_v2(data, inline):
203 # call the C implementation to parse the index data
203 # call the C implementation to parse the index data
204 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
204 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
205 return index, cache
205 return index, cache
206
206
207
207
208 def parse_index_cl_v2(data, inline):
208 def parse_index_cl_v2(data, inline):
209 # call the C implementation to parse the index data
209 # call the C implementation to parse the index data
210 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
210 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
211 return index, cache
211 return index, cache
212
212
213
213
214 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
214 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
215
215
216 def parse_index_v1_nodemap(data, inline):
216 def parse_index_v1_nodemap(data, inline):
217 index, cache = parsers.parse_index_devel_nodemap(data, inline)
217 index, cache = parsers.parse_index_devel_nodemap(data, inline)
218 return index, cache
218 return index, cache
219
219
220
220
221 else:
221 else:
222 parse_index_v1_nodemap = None
222 parse_index_v1_nodemap = None
223
223
224
224
225 def parse_index_v1_mixed(data, inline):
225 def parse_index_v1_mixed(data, inline):
226 index, cache = parse_index_v1(data, inline)
226 index, cache = parse_index_v1(data, inline)
227 return rustrevlog.MixedIndex(index), cache
227 return rustrevlog.MixedIndex(index), cache
228
228
229
229
230 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
230 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
231 # signed integer)
231 # signed integer)
232 _maxentrysize = 0x7FFFFFFF
232 _maxentrysize = 0x7FFFFFFF
233
233
234 FILE_TOO_SHORT_MSG = _(
234 FILE_TOO_SHORT_MSG = _(
235 b'cannot read from revlog %s;'
235 b'cannot read from revlog %s;'
236 b' expected %d bytes from offset %d, data size is %d'
236 b' expected %d bytes from offset %d, data size is %d'
237 )
237 )
238
238
239
239
240 class revlog:
240 class revlog:
241 """
241 """
242 the underlying revision storage object
242 the underlying revision storage object
243
243
244 A revlog consists of two parts, an index and the revision data.
244 A revlog consists of two parts, an index and the revision data.
245
245
246 The index is a file with a fixed record size containing
246 The index is a file with a fixed record size containing
247 information on each revision, including its nodeid (hash), the
247 information on each revision, including its nodeid (hash), the
248 nodeids of its parents, the position and offset of its data within
248 nodeids of its parents, the position and offset of its data within
249 the data file, and the revision it's based on. Finally, each entry
249 the data file, and the revision it's based on. Finally, each entry
250 contains a linkrev entry that can serve as a pointer to external
250 contains a linkrev entry that can serve as a pointer to external
251 data.
251 data.
252
252
253 The revision data itself is a linear collection of data chunks.
253 The revision data itself is a linear collection of data chunks.
254 Each chunk represents a revision and is usually represented as a
254 Each chunk represents a revision and is usually represented as a
255 delta against the previous chunk. To bound lookup time, runs of
255 delta against the previous chunk. To bound lookup time, runs of
256 deltas are limited to about 2 times the length of the original
256 deltas are limited to about 2 times the length of the original
257 version data. This makes retrieval of a version proportional to
257 version data. This makes retrieval of a version proportional to
258 its size, or O(1) relative to the number of revisions.
258 its size, or O(1) relative to the number of revisions.
259
259
260 Both pieces of the revlog are written to in an append-only
260 Both pieces of the revlog are written to in an append-only
261 fashion, which means we never need to rewrite a file to insert or
261 fashion, which means we never need to rewrite a file to insert or
262 remove data, and can use some simple techniques to avoid the need
262 remove data, and can use some simple techniques to avoid the need
263 for locking while reading.
263 for locking while reading.
264
264
265 If checkambig, indexfile is opened with checkambig=True at
265 If checkambig, indexfile is opened with checkambig=True at
266 writing, to avoid file stat ambiguity.
266 writing, to avoid file stat ambiguity.
267
267
268 If mmaplargeindex is True, and an mmapindexthreshold is set, the
268 If mmaplargeindex is True, and an mmapindexthreshold is set, the
269 index will be mmapped rather than read if it is larger than the
269 index will be mmapped rather than read if it is larger than the
270 configured threshold.
270 configured threshold.
271
271
272 If censorable is True, the revlog can have censored revisions.
272 If censorable is True, the revlog can have censored revisions.
273
273
274 If `upperboundcomp` is not None, this is the expected maximal gain from
274 If `upperboundcomp` is not None, this is the expected maximal gain from
275 compression for the data content.
275 compression for the data content.
276
276
277 `concurrencychecker` is an optional function that receives 3 arguments: a
277 `concurrencychecker` is an optional function that receives 3 arguments: a
278 file handle, a filename, and an expected position. It should check whether
278 file handle, a filename, and an expected position. It should check whether
279 the current position in the file handle is valid, and log/warn/fail (by
279 the current position in the file handle is valid, and log/warn/fail (by
280 raising).
280 raising).
281
281
282 See mercurial/revlogutils/contants.py for details about the content of an
282 See mercurial/revlogutils/contants.py for details about the content of an
283 index entry.
283 index entry.
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None, # only exist for `tmpcensored` now
293 postfix=None, # only exist for `tmpcensored` now
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 trypending=False,
300 trypending=False,
301 canonical_parent_order=True,
301 canonical_parent_order=True,
302 ):
302 ):
303 """
303 """
304 create a revlog object
304 create a revlog object
305
305
306 opener is a function that abstracts the file opening operation
306 opener is a function that abstracts the file opening operation
307 and can be used to implement COW semantics or the like.
307 and can be used to implement COW semantics or the like.
308
308
309 `target`: a (KIND, ID) tuple that identify the content stored in
309 `target`: a (KIND, ID) tuple that identify the content stored in
310 this revlog. It help the rest of the code to understand what the revlog
310 this revlog. It help the rest of the code to understand what the revlog
311 is about without having to resort to heuristic and index filename
311 is about without having to resort to heuristic and index filename
312 analysis. Note: that this must be reliably be set by normal code, but
312 analysis. Note: that this must be reliably be set by normal code, but
313 that test, debug, or performance measurement code might not set this to
313 that test, debug, or performance measurement code might not set this to
314 accurate value.
314 accurate value.
315 """
315 """
316 self.upperboundcomp = upperboundcomp
316 self.upperboundcomp = upperboundcomp
317
317
318 self.radix = radix
318 self.radix = radix
319
319
320 self._docket_file = None
320 self._docket_file = None
321 self._indexfile = None
321 self._indexfile = None
322 self._datafile = None
322 self._datafile = None
323 self._sidedatafile = None
323 self._sidedatafile = None
324 self._nodemap_file = None
324 self._nodemap_file = None
325 self.postfix = postfix
325 self.postfix = postfix
326 self._trypending = trypending
326 self._trypending = trypending
327 self.opener = opener
327 self.opener = opener
328 if persistentnodemap:
328 if persistentnodemap:
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330
330
331 assert target[0] in ALL_KINDS
331 assert target[0] in ALL_KINDS
332 assert len(target) == 2
332 assert len(target) == 2
333 self.target = target
333 self.target = target
334 # When True, indexfile is opened with checkambig=True at writing, to
334 # When True, indexfile is opened with checkambig=True at writing, to
335 # avoid file stat ambiguity.
335 # avoid file stat ambiguity.
336 self._checkambig = checkambig
336 self._checkambig = checkambig
337 self._mmaplargeindex = mmaplargeindex
337 self._mmaplargeindex = mmaplargeindex
338 self._censorable = censorable
338 self._censorable = censorable
339 # 3-tuple of (node, rev, text) for a raw revision.
339 # 3-tuple of (node, rev, text) for a raw revision.
340 self._revisioncache = None
340 self._revisioncache = None
341 # Maps rev to chain base rev.
341 # Maps rev to chain base rev.
342 self._chainbasecache = util.lrucachedict(100)
342 self._chainbasecache = util.lrucachedict(100)
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 self._chunkcache = (0, b'')
344 self._chunkcache = (0, b'')
345 # How much data to read and cache into the raw revlog data cache.
345 # How much data to read and cache into the raw revlog data cache.
346 self._chunkcachesize = 65536
346 self._chunkcachesize = 65536
347 self._maxchainlen = None
347 self._maxchainlen = None
348 self._deltabothparents = True
348 self._deltabothparents = True
349 self._debug_delta = False
349 self._debug_delta = False
350 self.index = None
350 self.index = None
351 self._docket = None
351 self._docket = None
352 self._nodemap_docket = None
352 self._nodemap_docket = None
353 # Mapping of partial identifiers to full nodes.
353 # Mapping of partial identifiers to full nodes.
354 self._pcache = {}
354 self._pcache = {}
355 # Mapping of revision integer to full node.
355 # Mapping of revision integer to full node.
356 self._compengine = b'zlib'
356 self._compengine = b'zlib'
357 self._compengineopts = {}
357 self._compengineopts = {}
358 self._maxdeltachainspan = -1
358 self._maxdeltachainspan = -1
359 self._withsparseread = False
359 self._withsparseread = False
360 self._sparserevlog = False
360 self._sparserevlog = False
361 self.hassidedata = False
361 self.hassidedata = False
362 self._srdensitythreshold = 0.50
362 self._srdensitythreshold = 0.50
363 self._srmingapsize = 262144
363 self._srmingapsize = 262144
364
364
365 # Make copy of flag processors so each revlog instance can support
365 # Make copy of flag processors so each revlog instance can support
366 # custom flags.
366 # custom flags.
367 self._flagprocessors = dict(flagutil.flagprocessors)
367 self._flagprocessors = dict(flagutil.flagprocessors)
368
368
369 # 3-tuple of file handles being used for active writing.
369 # 3-tuple of file handles being used for active writing.
370 self._writinghandles = None
370 self._writinghandles = None
371 # prevent nesting of addgroup
371 # prevent nesting of addgroup
372 self._adding_group = None
372 self._adding_group = None
373
373
374 self._loadindex()
374 self._loadindex()
375
375
376 self._concurrencychecker = concurrencychecker
376 self._concurrencychecker = concurrencychecker
377
377
378 # parent order is supposed to be semantically irrelevant, so we
378 # parent order is supposed to be semantically irrelevant, so we
379 # normally resort parents to ensure that the first parent is non-null,
379 # normally resort parents to ensure that the first parent is non-null,
380 # if there is a non-null parent at all.
380 # if there is a non-null parent at all.
381 # filelog abuses the parent order as flag to mark some instances of
381 # filelog abuses the parent order as flag to mark some instances of
382 # meta-encoded files, so allow it to disable this behavior.
382 # meta-encoded files, so allow it to disable this behavior.
383 self.canonical_parent_order = canonical_parent_order
383 self.canonical_parent_order = canonical_parent_order
384
384
385 def _init_opts(self):
385 def _init_opts(self):
386 """process options (from above/config) to setup associated default revlog mode
386 """process options (from above/config) to setup associated default revlog mode
387
387
388 These values might be affected when actually reading on disk information.
388 These values might be affected when actually reading on disk information.
389
389
390 The relevant values are returned for use in _loadindex().
390 The relevant values are returned for use in _loadindex().
391
391
392 * newversionflags:
392 * newversionflags:
393 version header to use if we need to create a new revlog
393 version header to use if we need to create a new revlog
394
394
395 * mmapindexthreshold:
395 * mmapindexthreshold:
396 minimal index size for start to use mmap
396 minimal index size for start to use mmap
397
397
398 * force_nodemap:
398 * force_nodemap:
399 force the usage of a "development" version of the nodemap code
399 force the usage of a "development" version of the nodemap code
400 """
400 """
401 mmapindexthreshold = None
401 mmapindexthreshold = None
402 opts = self.opener.options
402 opts = self.opener.options
403
403
404 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
404 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
405 new_header = CHANGELOGV2
405 new_header = CHANGELOGV2
406 elif b'revlogv2' in opts:
406 elif b'revlogv2' in opts:
407 new_header = REVLOGV2
407 new_header = REVLOGV2
408 elif b'revlogv1' in opts:
408 elif b'revlogv1' in opts:
409 new_header = REVLOGV1 | FLAG_INLINE_DATA
409 new_header = REVLOGV1 | FLAG_INLINE_DATA
410 if b'generaldelta' in opts:
410 if b'generaldelta' in opts:
411 new_header |= FLAG_GENERALDELTA
411 new_header |= FLAG_GENERALDELTA
412 elif b'revlogv0' in self.opener.options:
412 elif b'revlogv0' in self.opener.options:
413 new_header = REVLOGV0
413 new_header = REVLOGV0
414 else:
414 else:
415 new_header = REVLOG_DEFAULT_VERSION
415 new_header = REVLOG_DEFAULT_VERSION
416
416
417 if b'chunkcachesize' in opts:
417 if b'chunkcachesize' in opts:
418 self._chunkcachesize = opts[b'chunkcachesize']
418 self._chunkcachesize = opts[b'chunkcachesize']
419 if b'maxchainlen' in opts:
419 if b'maxchainlen' in opts:
420 self._maxchainlen = opts[b'maxchainlen']
420 self._maxchainlen = opts[b'maxchainlen']
421 if b'deltabothparents' in opts:
421 if b'deltabothparents' in opts:
422 self._deltabothparents = opts[b'deltabothparents']
422 self._deltabothparents = opts[b'deltabothparents']
423 self._lazydelta = bool(opts.get(b'lazydelta', True))
423 self._lazydelta = bool(opts.get(b'lazydelta', True))
424 self._lazydeltabase = False
424 self._lazydeltabase = False
425 if self._lazydelta:
425 if self._lazydelta:
426 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
426 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
427 if b'debug-delta' in opts:
427 if b'debug-delta' in opts:
428 self._debug_delta = opts[b'debug-delta']
428 self._debug_delta = opts[b'debug-delta']
429 if b'compengine' in opts:
429 if b'compengine' in opts:
430 self._compengine = opts[b'compengine']
430 self._compengine = opts[b'compengine']
431 if b'zlib.level' in opts:
431 if b'zlib.level' in opts:
432 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
432 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
433 if b'zstd.level' in opts:
433 if b'zstd.level' in opts:
434 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
434 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
435 if b'maxdeltachainspan' in opts:
435 if b'maxdeltachainspan' in opts:
436 self._maxdeltachainspan = opts[b'maxdeltachainspan']
436 self._maxdeltachainspan = opts[b'maxdeltachainspan']
437 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
437 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
438 mmapindexthreshold = opts[b'mmapindexthreshold']
438 mmapindexthreshold = opts[b'mmapindexthreshold']
439 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
439 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
440 withsparseread = bool(opts.get(b'with-sparse-read', False))
440 withsparseread = bool(opts.get(b'with-sparse-read', False))
441 # sparse-revlog forces sparse-read
441 # sparse-revlog forces sparse-read
442 self._withsparseread = self._sparserevlog or withsparseread
442 self._withsparseread = self._sparserevlog or withsparseread
443 if b'sparse-read-density-threshold' in opts:
443 if b'sparse-read-density-threshold' in opts:
444 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
444 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
445 if b'sparse-read-min-gap-size' in opts:
445 if b'sparse-read-min-gap-size' in opts:
446 self._srmingapsize = opts[b'sparse-read-min-gap-size']
446 self._srmingapsize = opts[b'sparse-read-min-gap-size']
447 if opts.get(b'enableellipsis'):
447 if opts.get(b'enableellipsis'):
448 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
448 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
449
449
450 # revlog v0 doesn't have flag processors
450 # revlog v0 doesn't have flag processors
451 for flag, processor in opts.get(b'flagprocessors', {}).items():
451 for flag, processor in opts.get(b'flagprocessors', {}).items():
452 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
452 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
453
453
454 if self._chunkcachesize <= 0:
454 if self._chunkcachesize <= 0:
455 raise error.RevlogError(
455 raise error.RevlogError(
456 _(b'revlog chunk cache size %r is not greater than 0')
456 _(b'revlog chunk cache size %r is not greater than 0')
457 % self._chunkcachesize
457 % self._chunkcachesize
458 )
458 )
459 elif self._chunkcachesize & (self._chunkcachesize - 1):
459 elif self._chunkcachesize & (self._chunkcachesize - 1):
460 raise error.RevlogError(
460 raise error.RevlogError(
461 _(b'revlog chunk cache size %r is not a power of 2')
461 _(b'revlog chunk cache size %r is not a power of 2')
462 % self._chunkcachesize
462 % self._chunkcachesize
463 )
463 )
464 force_nodemap = opts.get(b'devel-force-nodemap', False)
464 force_nodemap = opts.get(b'devel-force-nodemap', False)
465 return new_header, mmapindexthreshold, force_nodemap
465 return new_header, mmapindexthreshold, force_nodemap
466
466
467 def _get_data(self, filepath, mmap_threshold, size=None):
467 def _get_data(self, filepath, mmap_threshold, size=None):
468 """return a file content with or without mmap
468 """return a file content with or without mmap
469
469
470 If the file is missing return the empty string"""
470 If the file is missing return the empty string"""
471 try:
471 try:
472 with self.opener(filepath) as fp:
472 with self.opener(filepath) as fp:
473 if mmap_threshold is not None:
473 if mmap_threshold is not None:
474 file_size = self.opener.fstat(fp).st_size
474 file_size = self.opener.fstat(fp).st_size
475 if file_size >= mmap_threshold:
475 if file_size >= mmap_threshold:
476 if size is not None:
476 if size is not None:
477 # avoid potentiel mmap crash
477 # avoid potentiel mmap crash
478 size = min(file_size, size)
478 size = min(file_size, size)
479 # TODO: should .close() to release resources without
479 # TODO: should .close() to release resources without
480 # relying on Python GC
480 # relying on Python GC
481 if size is None:
481 if size is None:
482 return util.buffer(util.mmapread(fp))
482 return util.buffer(util.mmapread(fp))
483 else:
483 else:
484 return util.buffer(util.mmapread(fp, size))
484 return util.buffer(util.mmapread(fp, size))
485 if size is None:
485 if size is None:
486 return fp.read()
486 return fp.read()
487 else:
487 else:
488 return fp.read(size)
488 return fp.read(size)
489 except IOError as inst:
489 except IOError as inst:
490 if inst.errno != errno.ENOENT:
490 if inst.errno != errno.ENOENT:
491 raise
491 raise
492 return b''
492 return b''
493
493
494 def _loadindex(self, docket=None):
494 def _loadindex(self, docket=None):
495
495
496 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
496 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
497
497
498 if self.postfix is not None:
498 if self.postfix is not None:
499 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
499 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
500 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
500 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
501 entry_point = b'%s.i.a' % self.radix
501 entry_point = b'%s.i.a' % self.radix
502 else:
502 else:
503 entry_point = b'%s.i' % self.radix
503 entry_point = b'%s.i' % self.radix
504
504
505 if docket is not None:
505 if docket is not None:
506 self._docket = docket
506 self._docket = docket
507 self._docket_file = entry_point
507 self._docket_file = entry_point
508 else:
508 else:
509 entry_data = b''
509 entry_data = b''
510 self._initempty = True
510 self._initempty = True
511 entry_data = self._get_data(entry_point, mmapindexthreshold)
511 entry_data = self._get_data(entry_point, mmapindexthreshold)
512 if len(entry_data) > 0:
512 if len(entry_data) > 0:
513 header = INDEX_HEADER.unpack(entry_data[:4])[0]
513 header = INDEX_HEADER.unpack(entry_data[:4])[0]
514 self._initempty = False
514 self._initempty = False
515 else:
515 else:
516 header = new_header
516 header = new_header
517
517
518 self._format_flags = header & ~0xFFFF
518 self._format_flags = header & ~0xFFFF
519 self._format_version = header & 0xFFFF
519 self._format_version = header & 0xFFFF
520
520
521 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
521 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
522 if supported_flags is None:
522 if supported_flags is None:
523 msg = _(b'unknown version (%d) in revlog %s')
523 msg = _(b'unknown version (%d) in revlog %s')
524 msg %= (self._format_version, self.display_id)
524 msg %= (self._format_version, self.display_id)
525 raise error.RevlogError(msg)
525 raise error.RevlogError(msg)
526 elif self._format_flags & ~supported_flags:
526 elif self._format_flags & ~supported_flags:
527 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
527 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
528 display_flag = self._format_flags >> 16
528 display_flag = self._format_flags >> 16
529 msg %= (display_flag, self._format_version, self.display_id)
529 msg %= (display_flag, self._format_version, self.display_id)
530 raise error.RevlogError(msg)
530 raise error.RevlogError(msg)
531
531
532 features = FEATURES_BY_VERSION[self._format_version]
532 features = FEATURES_BY_VERSION[self._format_version]
533 self._inline = features[b'inline'](self._format_flags)
533 self._inline = features[b'inline'](self._format_flags)
534 self._generaldelta = features[b'generaldelta'](self._format_flags)
534 self._generaldelta = features[b'generaldelta'](self._format_flags)
535 self.hassidedata = features[b'sidedata']
535 self.hassidedata = features[b'sidedata']
536
536
537 if not features[b'docket']:
537 if not features[b'docket']:
538 self._indexfile = entry_point
538 self._indexfile = entry_point
539 index_data = entry_data
539 index_data = entry_data
540 else:
540 else:
541 self._docket_file = entry_point
541 self._docket_file = entry_point
542 if self._initempty:
542 if self._initempty:
543 self._docket = docketutil.default_docket(self, header)
543 self._docket = docketutil.default_docket(self, header)
544 else:
544 else:
545 self._docket = docketutil.parse_docket(
545 self._docket = docketutil.parse_docket(
546 self, entry_data, use_pending=self._trypending
546 self, entry_data, use_pending=self._trypending
547 )
547 )
548
548
549 if self._docket is not None:
549 if self._docket is not None:
550 self._indexfile = self._docket.index_filepath()
550 self._indexfile = self._docket.index_filepath()
551 index_data = b''
551 index_data = b''
552 index_size = self._docket.index_end
552 index_size = self._docket.index_end
553 if index_size > 0:
553 if index_size > 0:
554 index_data = self._get_data(
554 index_data = self._get_data(
555 self._indexfile, mmapindexthreshold, size=index_size
555 self._indexfile, mmapindexthreshold, size=index_size
556 )
556 )
557 if len(index_data) < index_size:
557 if len(index_data) < index_size:
558 msg = _(b'too few index data for %s: got %d, expected %d')
558 msg = _(b'too few index data for %s: got %d, expected %d')
559 msg %= (self.display_id, len(index_data), index_size)
559 msg %= (self.display_id, len(index_data), index_size)
560 raise error.RevlogError(msg)
560 raise error.RevlogError(msg)
561
561
562 self._inline = False
562 self._inline = False
563 # generaldelta implied by version 2 revlogs.
563 # generaldelta implied by version 2 revlogs.
564 self._generaldelta = True
564 self._generaldelta = True
565 # the logic for persistent nodemap will be dealt with within the
565 # the logic for persistent nodemap will be dealt with within the
566 # main docket, so disable it for now.
566 # main docket, so disable it for now.
567 self._nodemap_file = None
567 self._nodemap_file = None
568
568
569 if self._docket is not None:
569 if self._docket is not None:
570 self._datafile = self._docket.data_filepath()
570 self._datafile = self._docket.data_filepath()
571 self._sidedatafile = self._docket.sidedata_filepath()
571 self._sidedatafile = self._docket.sidedata_filepath()
572 elif self.postfix is None:
572 elif self.postfix is None:
573 self._datafile = b'%s.d' % self.radix
573 self._datafile = b'%s.d' % self.radix
574 else:
574 else:
575 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
575 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
576
576
577 self.nodeconstants = sha1nodeconstants
577 self.nodeconstants = sha1nodeconstants
578 self.nullid = self.nodeconstants.nullid
578 self.nullid = self.nodeconstants.nullid
579
579
580 # sparse-revlog can't be on without general-delta (issue6056)
580 # sparse-revlog can't be on without general-delta (issue6056)
581 if not self._generaldelta:
581 if not self._generaldelta:
582 self._sparserevlog = False
582 self._sparserevlog = False
583
583
584 self._storedeltachains = True
584 self._storedeltachains = True
585
585
586 devel_nodemap = (
586 devel_nodemap = (
587 self._nodemap_file
587 self._nodemap_file
588 and force_nodemap
588 and force_nodemap
589 and parse_index_v1_nodemap is not None
589 and parse_index_v1_nodemap is not None
590 )
590 )
591
591
592 use_rust_index = False
592 use_rust_index = False
593 if rustrevlog is not None:
593 if rustrevlog is not None:
594 if self._nodemap_file is not None:
594 if self._nodemap_file is not None:
595 use_rust_index = True
595 use_rust_index = True
596 else:
596 else:
597 use_rust_index = self.opener.options.get(b'rust.index')
597 use_rust_index = self.opener.options.get(b'rust.index')
598
598
599 self._parse_index = parse_index_v1
599 self._parse_index = parse_index_v1
600 if self._format_version == REVLOGV0:
600 if self._format_version == REVLOGV0:
601 self._parse_index = revlogv0.parse_index_v0
601 self._parse_index = revlogv0.parse_index_v0
602 elif self._format_version == REVLOGV2:
602 elif self._format_version == REVLOGV2:
603 self._parse_index = parse_index_v2
603 self._parse_index = parse_index_v2
604 elif self._format_version == CHANGELOGV2:
604 elif self._format_version == CHANGELOGV2:
605 self._parse_index = parse_index_cl_v2
605 self._parse_index = parse_index_cl_v2
606 elif devel_nodemap:
606 elif devel_nodemap:
607 self._parse_index = parse_index_v1_nodemap
607 self._parse_index = parse_index_v1_nodemap
608 elif use_rust_index:
608 elif use_rust_index:
609 self._parse_index = parse_index_v1_mixed
609 self._parse_index = parse_index_v1_mixed
610 try:
610 try:
611 d = self._parse_index(index_data, self._inline)
611 d = self._parse_index(index_data, self._inline)
612 index, chunkcache = d
612 index, chunkcache = d
613 use_nodemap = (
613 use_nodemap = (
614 not self._inline
614 not self._inline
615 and self._nodemap_file is not None
615 and self._nodemap_file is not None
616 and util.safehasattr(index, 'update_nodemap_data')
616 and util.safehasattr(index, 'update_nodemap_data')
617 )
617 )
618 if use_nodemap:
618 if use_nodemap:
619 nodemap_data = nodemaputil.persisted_data(self)
619 nodemap_data = nodemaputil.persisted_data(self)
620 if nodemap_data is not None:
620 if nodemap_data is not None:
621 docket = nodemap_data[0]
621 docket = nodemap_data[0]
622 if (
622 if (
623 len(d[0]) > docket.tip_rev
623 len(d[0]) > docket.tip_rev
624 and d[0][docket.tip_rev][7] == docket.tip_node
624 and d[0][docket.tip_rev][7] == docket.tip_node
625 ):
625 ):
626 # no changelog tampering
626 # no changelog tampering
627 self._nodemap_docket = docket
627 self._nodemap_docket = docket
628 index.update_nodemap_data(*nodemap_data)
628 index.update_nodemap_data(*nodemap_data)
629 except (ValueError, IndexError):
629 except (ValueError, IndexError):
630 raise error.RevlogError(
630 raise error.RevlogError(
631 _(b"index %s is corrupted") % self.display_id
631 _(b"index %s is corrupted") % self.display_id
632 )
632 )
633 self.index = index
633 self.index = index
634 self._segmentfile = randomaccessfile.randomaccessfile(
634 self._segmentfile = randomaccessfile.randomaccessfile(
635 self.opener,
635 self.opener,
636 (self._indexfile if self._inline else self._datafile),
636 (self._indexfile if self._inline else self._datafile),
637 self._chunkcachesize,
637 self._chunkcachesize,
638 chunkcache,
638 chunkcache,
639 )
639 )
640 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
640 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
641 self.opener,
641 self.opener,
642 self._sidedatafile,
642 self._sidedatafile,
643 self._chunkcachesize,
643 self._chunkcachesize,
644 )
644 )
645 # revnum -> (chain-length, sum-delta-length)
645 # revnum -> (chain-length, sum-delta-length)
646 self._chaininfocache = util.lrucachedict(500)
646 self._chaininfocache = util.lrucachedict(500)
647 # revlog header -> revlog compressor
647 # revlog header -> revlog compressor
648 self._decompressors = {}
648 self._decompressors = {}
649
649
650 @util.propertycache
650 @util.propertycache
651 def revlog_kind(self):
651 def revlog_kind(self):
652 return self.target[0]
652 return self.target[0]
653
653
654 @util.propertycache
654 @util.propertycache
655 def display_id(self):
655 def display_id(self):
656 """The public facing "ID" of the revlog that we use in message"""
656 """The public facing "ID" of the revlog that we use in message"""
657 # Maybe we should build a user facing representation of
657 # Maybe we should build a user facing representation of
658 # revlog.target instead of using `self.radix`
658 # revlog.target instead of using `self.radix`
659 return self.radix
659 return self.radix
660
660
661 def _get_decompressor(self, t):
661 def _get_decompressor(self, t):
662 try:
662 try:
663 compressor = self._decompressors[t]
663 compressor = self._decompressors[t]
664 except KeyError:
664 except KeyError:
665 try:
665 try:
666 engine = util.compengines.forrevlogheader(t)
666 engine = util.compengines.forrevlogheader(t)
667 compressor = engine.revlogcompressor(self._compengineopts)
667 compressor = engine.revlogcompressor(self._compengineopts)
668 self._decompressors[t] = compressor
668 self._decompressors[t] = compressor
669 except KeyError:
669 except KeyError:
670 raise error.RevlogError(
670 raise error.RevlogError(
671 _(b'unknown compression type %s') % binascii.hexlify(t)
671 _(b'unknown compression type %s') % binascii.hexlify(t)
672 )
672 )
673 return compressor
673 return compressor
674
674
675 @util.propertycache
675 @util.propertycache
676 def _compressor(self):
676 def _compressor(self):
677 engine = util.compengines[self._compengine]
677 engine = util.compengines[self._compengine]
678 return engine.revlogcompressor(self._compengineopts)
678 return engine.revlogcompressor(self._compengineopts)
679
679
680 @util.propertycache
680 @util.propertycache
681 def _decompressor(self):
681 def _decompressor(self):
682 """the default decompressor"""
682 """the default decompressor"""
683 if self._docket is None:
683 if self._docket is None:
684 return None
684 return None
685 t = self._docket.default_compression_header
685 t = self._docket.default_compression_header
686 c = self._get_decompressor(t)
686 c = self._get_decompressor(t)
687 return c.decompress
687 return c.decompress
688
688
689 def _indexfp(self):
689 def _indexfp(self):
690 """file object for the revlog's index file"""
690 """file object for the revlog's index file"""
691 return self.opener(self._indexfile, mode=b"r")
691 return self.opener(self._indexfile, mode=b"r")
692
692
693 def __index_write_fp(self):
693 def __index_write_fp(self):
694 # You should not use this directly and use `_writing` instead
694 # You should not use this directly and use `_writing` instead
695 try:
695 try:
696 f = self.opener(
696 f = self.opener(
697 self._indexfile, mode=b"r+", checkambig=self._checkambig
697 self._indexfile, mode=b"r+", checkambig=self._checkambig
698 )
698 )
699 if self._docket is None:
699 if self._docket is None:
700 f.seek(0, os.SEEK_END)
700 f.seek(0, os.SEEK_END)
701 else:
701 else:
702 f.seek(self._docket.index_end, os.SEEK_SET)
702 f.seek(self._docket.index_end, os.SEEK_SET)
703 return f
703 return f
704 except IOError as inst:
704 except IOError as inst:
705 if inst.errno != errno.ENOENT:
705 if inst.errno != errno.ENOENT:
706 raise
706 raise
707 return self.opener(
707 return self.opener(
708 self._indexfile, mode=b"w+", checkambig=self._checkambig
708 self._indexfile, mode=b"w+", checkambig=self._checkambig
709 )
709 )
710
710
711 def __index_new_fp(self):
711 def __index_new_fp(self):
712 # You should not use this unless you are upgrading from inline revlog
712 # You should not use this unless you are upgrading from inline revlog
713 return self.opener(
713 return self.opener(
714 self._indexfile,
714 self._indexfile,
715 mode=b"w",
715 mode=b"w",
716 checkambig=self._checkambig,
716 checkambig=self._checkambig,
717 atomictemp=True,
717 atomictemp=True,
718 )
718 )
719
719
720 def _datafp(self, mode=b'r'):
720 def _datafp(self, mode=b'r'):
721 """file object for the revlog's data file"""
721 """file object for the revlog's data file"""
722 return self.opener(self._datafile, mode=mode)
722 return self.opener(self._datafile, mode=mode)
723
723
724 @contextlib.contextmanager
724 @contextlib.contextmanager
725 def _sidedatareadfp(self):
725 def _sidedatareadfp(self):
726 """file object suitable to read sidedata"""
726 """file object suitable to read sidedata"""
727 if self._writinghandles:
727 if self._writinghandles:
728 yield self._writinghandles[2]
728 yield self._writinghandles[2]
729 else:
729 else:
730 with self.opener(self._sidedatafile) as fp:
730 with self.opener(self._sidedatafile) as fp:
731 yield fp
731 yield fp
732
732
733 def tiprev(self):
733 def tiprev(self):
734 return len(self.index) - 1
734 return len(self.index) - 1
735
735
736 def tip(self):
736 def tip(self):
737 return self.node(self.tiprev())
737 return self.node(self.tiprev())
738
738
739 def __contains__(self, rev):
739 def __contains__(self, rev):
740 return 0 <= rev < len(self)
740 return 0 <= rev < len(self)
741
741
742 def __len__(self):
742 def __len__(self):
743 return len(self.index)
743 return len(self.index)
744
744
745 def __iter__(self):
745 def __iter__(self):
746 return iter(pycompat.xrange(len(self)))
746 return iter(pycompat.xrange(len(self)))
747
747
748 def revs(self, start=0, stop=None):
748 def revs(self, start=0, stop=None):
749 """iterate over all rev in this revlog (from start to stop)"""
749 """iterate over all rev in this revlog (from start to stop)"""
750 return storageutil.iterrevs(len(self), start=start, stop=stop)
750 return storageutil.iterrevs(len(self), start=start, stop=stop)
751
751
752 def hasnode(self, node):
752 def hasnode(self, node):
753 try:
753 try:
754 self.rev(node)
754 self.rev(node)
755 return True
755 return True
756 except KeyError:
756 except KeyError:
757 return False
757 return False
758
758
759 def candelta(self, baserev, rev):
759 def candelta(self, baserev, rev):
760 """whether two revisions (baserev, rev) can be delta-ed or not"""
760 """whether two revisions (baserev, rev) can be delta-ed or not"""
761 # Disable delta if either rev requires a content-changing flag
761 # Disable delta if either rev requires a content-changing flag
762 # processor (ex. LFS). This is because such flag processor can alter
762 # processor (ex. LFS). This is because such flag processor can alter
763 # the rawtext content that the delta will be based on, and two clients
763 # the rawtext content that the delta will be based on, and two clients
764 # could have a same revlog node with different flags (i.e. different
764 # could have a same revlog node with different flags (i.e. different
765 # rawtext contents) and the delta could be incompatible.
765 # rawtext contents) and the delta could be incompatible.
766 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
766 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
767 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
767 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
768 ):
768 ):
769 return False
769 return False
770 return True
770 return True
771
771
772 def update_caches(self, transaction):
772 def update_caches(self, transaction):
773 if self._nodemap_file is not None:
773 if self._nodemap_file is not None:
774 if transaction is None:
774 if transaction is None:
775 nodemaputil.update_persistent_nodemap(self)
775 nodemaputil.update_persistent_nodemap(self)
776 else:
776 else:
777 nodemaputil.setup_persistent_nodemap(transaction, self)
777 nodemaputil.setup_persistent_nodemap(transaction, self)
778
778
779 def clearcaches(self):
779 def clearcaches(self):
780 self._revisioncache = None
780 self._revisioncache = None
781 self._chainbasecache.clear()
781 self._chainbasecache.clear()
782 self._segmentfile.clear_cache()
782 self._segmentfile.clear_cache()
783 self._segmentfile_sidedata.clear_cache()
783 self._segmentfile_sidedata.clear_cache()
784 self._pcache = {}
784 self._pcache = {}
785 self._nodemap_docket = None
785 self._nodemap_docket = None
786 self.index.clearcaches()
786 self.index.clearcaches()
787 # The python code is the one responsible for validating the docket, we
787 # The python code is the one responsible for validating the docket, we
788 # end up having to refresh it here.
788 # end up having to refresh it here.
789 use_nodemap = (
789 use_nodemap = (
790 not self._inline
790 not self._inline
791 and self._nodemap_file is not None
791 and self._nodemap_file is not None
792 and util.safehasattr(self.index, 'update_nodemap_data')
792 and util.safehasattr(self.index, 'update_nodemap_data')
793 )
793 )
794 if use_nodemap:
794 if use_nodemap:
795 nodemap_data = nodemaputil.persisted_data(self)
795 nodemap_data = nodemaputil.persisted_data(self)
796 if nodemap_data is not None:
796 if nodemap_data is not None:
797 self._nodemap_docket = nodemap_data[0]
797 self._nodemap_docket = nodemap_data[0]
798 self.index.update_nodemap_data(*nodemap_data)
798 self.index.update_nodemap_data(*nodemap_data)
799
799
800 def rev(self, node):
800 def rev(self, node):
801 try:
801 try:
802 return self.index.rev(node)
802 return self.index.rev(node)
803 except TypeError:
803 except TypeError:
804 raise
804 raise
805 except error.RevlogError:
805 except error.RevlogError:
806 # parsers.c radix tree lookup failed
806 # parsers.c radix tree lookup failed
807 if (
807 if (
808 node == self.nodeconstants.wdirid
808 node == self.nodeconstants.wdirid
809 or node in self.nodeconstants.wdirfilenodeids
809 or node in self.nodeconstants.wdirfilenodeids
810 ):
810 ):
811 raise error.WdirUnsupported
811 raise error.WdirUnsupported
812 raise error.LookupError(node, self.display_id, _(b'no node'))
812 raise error.LookupError(node, self.display_id, _(b'no node'))
813
813
814 # Accessors for index entries.
814 # Accessors for index entries.
815
815
816 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
816 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
817 # are flags.
817 # are flags.
818 def start(self, rev):
818 def start(self, rev):
819 return int(self.index[rev][0] >> 16)
819 return int(self.index[rev][0] >> 16)
820
820
821 def sidedata_cut_off(self, rev):
821 def sidedata_cut_off(self, rev):
822 sd_cut_off = self.index[rev][8]
822 sd_cut_off = self.index[rev][8]
823 if sd_cut_off != 0:
823 if sd_cut_off != 0:
824 return sd_cut_off
824 return sd_cut_off
825 # This is some annoying dance, because entries without sidedata
825 # This is some annoying dance, because entries without sidedata
826 # currently use 0 as their ofsset. (instead of previous-offset +
826 # currently use 0 as their ofsset. (instead of previous-offset +
827 # previous-size)
827 # previous-size)
828 #
828 #
829 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
829 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
830 # In the meantime, we need this.
830 # In the meantime, we need this.
831 while 0 <= rev:
831 while 0 <= rev:
832 e = self.index[rev]
832 e = self.index[rev]
833 if e[9] != 0:
833 if e[9] != 0:
834 return e[8] + e[9]
834 return e[8] + e[9]
835 rev -= 1
835 rev -= 1
836 return 0
836 return 0
837
837
838 def flags(self, rev):
838 def flags(self, rev):
839 return self.index[rev][0] & 0xFFFF
839 return self.index[rev][0] & 0xFFFF
840
840
841 def length(self, rev):
841 def length(self, rev):
842 return self.index[rev][1]
842 return self.index[rev][1]
843
843
844 def sidedata_length(self, rev):
844 def sidedata_length(self, rev):
845 if not self.hassidedata:
845 if not self.hassidedata:
846 return 0
846 return 0
847 return self.index[rev][9]
847 return self.index[rev][9]
848
848
849 def rawsize(self, rev):
849 def rawsize(self, rev):
850 """return the length of the uncompressed text for a given revision"""
850 """return the length of the uncompressed text for a given revision"""
851 l = self.index[rev][2]
851 l = self.index[rev][2]
852 if l >= 0:
852 if l >= 0:
853 return l
853 return l
854
854
855 t = self.rawdata(rev)
855 t = self.rawdata(rev)
856 return len(t)
856 return len(t)
857
857
858 def size(self, rev):
858 def size(self, rev):
859 """length of non-raw text (processed by a "read" flag processor)"""
859 """length of non-raw text (processed by a "read" flag processor)"""
860 # fast path: if no "read" flag processor could change the content,
860 # fast path: if no "read" flag processor could change the content,
861 # size is rawsize. note: ELLIPSIS is known to not change the content.
861 # size is rawsize. note: ELLIPSIS is known to not change the content.
862 flags = self.flags(rev)
862 flags = self.flags(rev)
863 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
863 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
864 return self.rawsize(rev)
864 return self.rawsize(rev)
865
865
866 return len(self.revision(rev))
866 return len(self.revision(rev))
867
867
868 def fast_rank(self, rev):
868 def fast_rank(self, rev):
869 """Return the rank of a revision if already known, or None otherwise.
869 """Return the rank of a revision if already known, or None otherwise.
870
870
871 The rank of a revision is the size of the sub-graph it defines as a
871 The rank of a revision is the size of the sub-graph it defines as a
872 head. Equivalently, the rank of a revision `r` is the size of the set
872 head. Equivalently, the rank of a revision `r` is the size of the set
873 `ancestors(r)`, `r` included.
873 `ancestors(r)`, `r` included.
874
874
875 This method returns the rank retrieved from the revlog in constant
875 This method returns the rank retrieved from the revlog in constant
876 time. It makes no attempt at computing unknown values for versions of
876 time. It makes no attempt at computing unknown values for versions of
877 the revlog which do not persist the rank.
877 the revlog which do not persist the rank.
878 """
878 """
879 rank = self.index[rev][ENTRY_RANK]
879 rank = self.index[rev][ENTRY_RANK]
880 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
880 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
881 return None
881 return None
882 if rev == nullrev:
882 if rev == nullrev:
883 return 0 # convention
883 return 0 # convention
884 return rank
884 return rank
885
885
886 def chainbase(self, rev):
886 def chainbase(self, rev):
887 base = self._chainbasecache.get(rev)
887 base = self._chainbasecache.get(rev)
888 if base is not None:
888 if base is not None:
889 return base
889 return base
890
890
891 index = self.index
891 index = self.index
892 iterrev = rev
892 iterrev = rev
893 base = index[iterrev][3]
893 base = index[iterrev][3]
894 while base != iterrev:
894 while base != iterrev:
895 iterrev = base
895 iterrev = base
896 base = index[iterrev][3]
896 base = index[iterrev][3]
897
897
898 self._chainbasecache[rev] = base
898 self._chainbasecache[rev] = base
899 return base
899 return base
900
900
901 def linkrev(self, rev):
901 def linkrev(self, rev):
902 return self.index[rev][4]
902 return self.index[rev][4]
903
903
904 def parentrevs(self, rev):
904 def parentrevs(self, rev):
905 try:
905 try:
906 entry = self.index[rev]
906 entry = self.index[rev]
907 except IndexError:
907 except IndexError:
908 if rev == wdirrev:
908 if rev == wdirrev:
909 raise error.WdirUnsupported
909 raise error.WdirUnsupported
910 raise
910 raise
911
911
912 if self.canonical_parent_order and entry[5] == nullrev:
912 if self.canonical_parent_order and entry[5] == nullrev:
913 return entry[6], entry[5]
913 return entry[6], entry[5]
914 else:
914 else:
915 return entry[5], entry[6]
915 return entry[5], entry[6]
916
916
917 # fast parentrevs(rev) where rev isn't filtered
917 # fast parentrevs(rev) where rev isn't filtered
918 _uncheckedparentrevs = parentrevs
918 _uncheckedparentrevs = parentrevs
919
919
920 def node(self, rev):
920 def node(self, rev):
921 try:
921 try:
922 return self.index[rev][7]
922 return self.index[rev][7]
923 except IndexError:
923 except IndexError:
924 if rev == wdirrev:
924 if rev == wdirrev:
925 raise error.WdirUnsupported
925 raise error.WdirUnsupported
926 raise
926 raise
927
927
928 # Derived from index values.
928 # Derived from index values.
929
929
930 def end(self, rev):
930 def end(self, rev):
931 return self.start(rev) + self.length(rev)
931 return self.start(rev) + self.length(rev)
932
932
933 def parents(self, node):
933 def parents(self, node):
934 i = self.index
934 i = self.index
935 d = i[self.rev(node)]
935 d = i[self.rev(node)]
936 # inline node() to avoid function call overhead
936 # inline node() to avoid function call overhead
937 if self.canonical_parent_order and d[5] == self.nullid:
937 if self.canonical_parent_order and d[5] == self.nullid:
938 return i[d[6]][7], i[d[5]][7]
938 return i[d[6]][7], i[d[5]][7]
939 else:
939 else:
940 return i[d[5]][7], i[d[6]][7]
940 return i[d[5]][7], i[d[6]][7]
941
941
942 def chainlen(self, rev):
942 def chainlen(self, rev):
943 return self._chaininfo(rev)[0]
943 return self._chaininfo(rev)[0]
944
944
945 def _chaininfo(self, rev):
945 def _chaininfo(self, rev):
946 chaininfocache = self._chaininfocache
946 chaininfocache = self._chaininfocache
947 if rev in chaininfocache:
947 if rev in chaininfocache:
948 return chaininfocache[rev]
948 return chaininfocache[rev]
949 index = self.index
949 index = self.index
950 generaldelta = self._generaldelta
950 generaldelta = self._generaldelta
951 iterrev = rev
951 iterrev = rev
952 e = index[iterrev]
952 e = index[iterrev]
953 clen = 0
953 clen = 0
954 compresseddeltalen = 0
954 compresseddeltalen = 0
955 while iterrev != e[3]:
955 while iterrev != e[3]:
956 clen += 1
956 clen += 1
957 compresseddeltalen += e[1]
957 compresseddeltalen += e[1]
958 if generaldelta:
958 if generaldelta:
959 iterrev = e[3]
959 iterrev = e[3]
960 else:
960 else:
961 iterrev -= 1
961 iterrev -= 1
962 if iterrev in chaininfocache:
962 if iterrev in chaininfocache:
963 t = chaininfocache[iterrev]
963 t = chaininfocache[iterrev]
964 clen += t[0]
964 clen += t[0]
965 compresseddeltalen += t[1]
965 compresseddeltalen += t[1]
966 break
966 break
967 e = index[iterrev]
967 e = index[iterrev]
968 else:
968 else:
969 # Add text length of base since decompressing that also takes
969 # Add text length of base since decompressing that also takes
970 # work. For cache hits the length is already included.
970 # work. For cache hits the length is already included.
971 compresseddeltalen += e[1]
971 compresseddeltalen += e[1]
972 r = (clen, compresseddeltalen)
972 r = (clen, compresseddeltalen)
973 chaininfocache[rev] = r
973 chaininfocache[rev] = r
974 return r
974 return r
975
975
976 def _deltachain(self, rev, stoprev=None):
976 def _deltachain(self, rev, stoprev=None):
977 """Obtain the delta chain for a revision.
977 """Obtain the delta chain for a revision.
978
978
979 ``stoprev`` specifies a revision to stop at. If not specified, we
979 ``stoprev`` specifies a revision to stop at. If not specified, we
980 stop at the base of the chain.
980 stop at the base of the chain.
981
981
982 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
982 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
983 revs in ascending order and ``stopped`` is a bool indicating whether
983 revs in ascending order and ``stopped`` is a bool indicating whether
984 ``stoprev`` was hit.
984 ``stoprev`` was hit.
985 """
985 """
986 # Try C implementation.
986 # Try C implementation.
987 try:
987 try:
988 return self.index.deltachain(rev, stoprev, self._generaldelta)
988 return self.index.deltachain(rev, stoprev, self._generaldelta)
989 except AttributeError:
989 except AttributeError:
990 pass
990 pass
991
991
992 chain = []
992 chain = []
993
993
994 # Alias to prevent attribute lookup in tight loop.
994 # Alias to prevent attribute lookup in tight loop.
995 index = self.index
995 index = self.index
996 generaldelta = self._generaldelta
996 generaldelta = self._generaldelta
997
997
998 iterrev = rev
998 iterrev = rev
999 e = index[iterrev]
999 e = index[iterrev]
1000 while iterrev != e[3] and iterrev != stoprev:
1000 while iterrev != e[3] and iterrev != stoprev:
1001 chain.append(iterrev)
1001 chain.append(iterrev)
1002 if generaldelta:
1002 if generaldelta:
1003 iterrev = e[3]
1003 iterrev = e[3]
1004 else:
1004 else:
1005 iterrev -= 1
1005 iterrev -= 1
1006 e = index[iterrev]
1006 e = index[iterrev]
1007
1007
1008 if iterrev == stoprev:
1008 if iterrev == stoprev:
1009 stopped = True
1009 stopped = True
1010 else:
1010 else:
1011 chain.append(iterrev)
1011 chain.append(iterrev)
1012 stopped = False
1012 stopped = False
1013
1013
1014 chain.reverse()
1014 chain.reverse()
1015 return chain, stopped
1015 return chain, stopped
1016
1016
1017 def ancestors(self, revs, stoprev=0, inclusive=False):
1017 def ancestors(self, revs, stoprev=0, inclusive=False):
1018 """Generate the ancestors of 'revs' in reverse revision order.
1018 """Generate the ancestors of 'revs' in reverse revision order.
1019 Does not generate revs lower than stoprev.
1019 Does not generate revs lower than stoprev.
1020
1020
1021 See the documentation for ancestor.lazyancestors for more details."""
1021 See the documentation for ancestor.lazyancestors for more details."""
1022
1022
1023 # first, make sure start revisions aren't filtered
1023 # first, make sure start revisions aren't filtered
1024 revs = list(revs)
1024 revs = list(revs)
1025 checkrev = self.node
1025 checkrev = self.node
1026 for r in revs:
1026 for r in revs:
1027 checkrev(r)
1027 checkrev(r)
1028 # and we're sure ancestors aren't filtered as well
1028 # and we're sure ancestors aren't filtered as well
1029
1029
1030 if rustancestor is not None and self.index.rust_ext_compat:
1030 if rustancestor is not None and self.index.rust_ext_compat:
1031 lazyancestors = rustancestor.LazyAncestors
1031 lazyancestors = rustancestor.LazyAncestors
1032 arg = self.index
1032 arg = self.index
1033 else:
1033 else:
1034 lazyancestors = ancestor.lazyancestors
1034 lazyancestors = ancestor.lazyancestors
1035 arg = self._uncheckedparentrevs
1035 arg = self._uncheckedparentrevs
1036 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1036 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1037
1037
1038 def descendants(self, revs):
1038 def descendants(self, revs):
1039 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1039 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1040
1040
1041 def findcommonmissing(self, common=None, heads=None):
1041 def findcommonmissing(self, common=None, heads=None):
1042 """Return a tuple of the ancestors of common and the ancestors of heads
1042 """Return a tuple of the ancestors of common and the ancestors of heads
1043 that are not ancestors of common. In revset terminology, we return the
1043 that are not ancestors of common. In revset terminology, we return the
1044 tuple:
1044 tuple:
1045
1045
1046 ::common, (::heads) - (::common)
1046 ::common, (::heads) - (::common)
1047
1047
1048 The list is sorted by revision number, meaning it is
1048 The list is sorted by revision number, meaning it is
1049 topologically sorted.
1049 topologically sorted.
1050
1050
1051 'heads' and 'common' are both lists of node IDs. If heads is
1051 'heads' and 'common' are both lists of node IDs. If heads is
1052 not supplied, uses all of the revlog's heads. If common is not
1052 not supplied, uses all of the revlog's heads. If common is not
1053 supplied, uses nullid."""
1053 supplied, uses nullid."""
1054 if common is None:
1054 if common is None:
1055 common = [self.nullid]
1055 common = [self.nullid]
1056 if heads is None:
1056 if heads is None:
1057 heads = self.heads()
1057 heads = self.heads()
1058
1058
1059 common = [self.rev(n) for n in common]
1059 common = [self.rev(n) for n in common]
1060 heads = [self.rev(n) for n in heads]
1060 heads = [self.rev(n) for n in heads]
1061
1061
1062 # we want the ancestors, but inclusive
1062 # we want the ancestors, but inclusive
1063 class lazyset:
1063 class lazyset:
1064 def __init__(self, lazyvalues):
1064 def __init__(self, lazyvalues):
1065 self.addedvalues = set()
1065 self.addedvalues = set()
1066 self.lazyvalues = lazyvalues
1066 self.lazyvalues = lazyvalues
1067
1067
1068 def __contains__(self, value):
1068 def __contains__(self, value):
1069 return value in self.addedvalues or value in self.lazyvalues
1069 return value in self.addedvalues or value in self.lazyvalues
1070
1070
1071 def __iter__(self):
1071 def __iter__(self):
1072 added = self.addedvalues
1072 added = self.addedvalues
1073 for r in added:
1073 for r in added:
1074 yield r
1074 yield r
1075 for r in self.lazyvalues:
1075 for r in self.lazyvalues:
1076 if not r in added:
1076 if not r in added:
1077 yield r
1077 yield r
1078
1078
1079 def add(self, value):
1079 def add(self, value):
1080 self.addedvalues.add(value)
1080 self.addedvalues.add(value)
1081
1081
1082 def update(self, values):
1082 def update(self, values):
1083 self.addedvalues.update(values)
1083 self.addedvalues.update(values)
1084
1084
1085 has = lazyset(self.ancestors(common))
1085 has = lazyset(self.ancestors(common))
1086 has.add(nullrev)
1086 has.add(nullrev)
1087 has.update(common)
1087 has.update(common)
1088
1088
1089 # take all ancestors from heads that aren't in has
1089 # take all ancestors from heads that aren't in has
1090 missing = set()
1090 missing = set()
1091 visit = collections.deque(r for r in heads if r not in has)
1091 visit = collections.deque(r for r in heads if r not in has)
1092 while visit:
1092 while visit:
1093 r = visit.popleft()
1093 r = visit.popleft()
1094 if r in missing:
1094 if r in missing:
1095 continue
1095 continue
1096 else:
1096 else:
1097 missing.add(r)
1097 missing.add(r)
1098 for p in self.parentrevs(r):
1098 for p in self.parentrevs(r):
1099 if p not in has:
1099 if p not in has:
1100 visit.append(p)
1100 visit.append(p)
1101 missing = list(missing)
1101 missing = list(missing)
1102 missing.sort()
1102 missing.sort()
1103 return has, [self.node(miss) for miss in missing]
1103 return has, [self.node(miss) for miss in missing]
1104
1104
1105 def incrementalmissingrevs(self, common=None):
1105 def incrementalmissingrevs(self, common=None):
1106 """Return an object that can be used to incrementally compute the
1106 """Return an object that can be used to incrementally compute the
1107 revision numbers of the ancestors of arbitrary sets that are not
1107 revision numbers of the ancestors of arbitrary sets that are not
1108 ancestors of common. This is an ancestor.incrementalmissingancestors
1108 ancestors of common. This is an ancestor.incrementalmissingancestors
1109 object.
1109 object.
1110
1110
1111 'common' is a list of revision numbers. If common is not supplied, uses
1111 'common' is a list of revision numbers. If common is not supplied, uses
1112 nullrev.
1112 nullrev.
1113 """
1113 """
1114 if common is None:
1114 if common is None:
1115 common = [nullrev]
1115 common = [nullrev]
1116
1116
1117 if rustancestor is not None and self.index.rust_ext_compat:
1117 if rustancestor is not None and self.index.rust_ext_compat:
1118 return rustancestor.MissingAncestors(self.index, common)
1118 return rustancestor.MissingAncestors(self.index, common)
1119 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1119 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1120
1120
1121 def findmissingrevs(self, common=None, heads=None):
1121 def findmissingrevs(self, common=None, heads=None):
1122 """Return the revision numbers of the ancestors of heads that
1122 """Return the revision numbers of the ancestors of heads that
1123 are not ancestors of common.
1123 are not ancestors of common.
1124
1124
1125 More specifically, return a list of revision numbers corresponding to
1125 More specifically, return a list of revision numbers corresponding to
1126 nodes N such that every N satisfies the following constraints:
1126 nodes N such that every N satisfies the following constraints:
1127
1127
1128 1. N is an ancestor of some node in 'heads'
1128 1. N is an ancestor of some node in 'heads'
1129 2. N is not an ancestor of any node in 'common'
1129 2. N is not an ancestor of any node in 'common'
1130
1130
1131 The list is sorted by revision number, meaning it is
1131 The list is sorted by revision number, meaning it is
1132 topologically sorted.
1132 topologically sorted.
1133
1133
1134 'heads' and 'common' are both lists of revision numbers. If heads is
1134 'heads' and 'common' are both lists of revision numbers. If heads is
1135 not supplied, uses all of the revlog's heads. If common is not
1135 not supplied, uses all of the revlog's heads. If common is not
1136 supplied, uses nullid."""
1136 supplied, uses nullid."""
1137 if common is None:
1137 if common is None:
1138 common = [nullrev]
1138 common = [nullrev]
1139 if heads is None:
1139 if heads is None:
1140 heads = self.headrevs()
1140 heads = self.headrevs()
1141
1141
1142 inc = self.incrementalmissingrevs(common=common)
1142 inc = self.incrementalmissingrevs(common=common)
1143 return inc.missingancestors(heads)
1143 return inc.missingancestors(heads)
1144
1144
1145 def findmissing(self, common=None, heads=None):
1145 def findmissing(self, common=None, heads=None):
1146 """Return the ancestors of heads that are not ancestors of common.
1146 """Return the ancestors of heads that are not ancestors of common.
1147
1147
1148 More specifically, return a list of nodes N such that every N
1148 More specifically, return a list of nodes N such that every N
1149 satisfies the following constraints:
1149 satisfies the following constraints:
1150
1150
1151 1. N is an ancestor of some node in 'heads'
1151 1. N is an ancestor of some node in 'heads'
1152 2. N is not an ancestor of any node in 'common'
1152 2. N is not an ancestor of any node in 'common'
1153
1153
1154 The list is sorted by revision number, meaning it is
1154 The list is sorted by revision number, meaning it is
1155 topologically sorted.
1155 topologically sorted.
1156
1156
1157 'heads' and 'common' are both lists of node IDs. If heads is
1157 'heads' and 'common' are both lists of node IDs. If heads is
1158 not supplied, uses all of the revlog's heads. If common is not
1158 not supplied, uses all of the revlog's heads. If common is not
1159 supplied, uses nullid."""
1159 supplied, uses nullid."""
1160 if common is None:
1160 if common is None:
1161 common = [self.nullid]
1161 common = [self.nullid]
1162 if heads is None:
1162 if heads is None:
1163 heads = self.heads()
1163 heads = self.heads()
1164
1164
1165 common = [self.rev(n) for n in common]
1165 common = [self.rev(n) for n in common]
1166 heads = [self.rev(n) for n in heads]
1166 heads = [self.rev(n) for n in heads]
1167
1167
1168 inc = self.incrementalmissingrevs(common=common)
1168 inc = self.incrementalmissingrevs(common=common)
1169 return [self.node(r) for r in inc.missingancestors(heads)]
1169 return [self.node(r) for r in inc.missingancestors(heads)]
1170
1170
1171 def nodesbetween(self, roots=None, heads=None):
1171 def nodesbetween(self, roots=None, heads=None):
1172 """Return a topological path from 'roots' to 'heads'.
1172 """Return a topological path from 'roots' to 'heads'.
1173
1173
1174 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1174 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1175 topologically sorted list of all nodes N that satisfy both of
1175 topologically sorted list of all nodes N that satisfy both of
1176 these constraints:
1176 these constraints:
1177
1177
1178 1. N is a descendant of some node in 'roots'
1178 1. N is a descendant of some node in 'roots'
1179 2. N is an ancestor of some node in 'heads'
1179 2. N is an ancestor of some node in 'heads'
1180
1180
1181 Every node is considered to be both a descendant and an ancestor
1181 Every node is considered to be both a descendant and an ancestor
1182 of itself, so every reachable node in 'roots' and 'heads' will be
1182 of itself, so every reachable node in 'roots' and 'heads' will be
1183 included in 'nodes'.
1183 included in 'nodes'.
1184
1184
1185 'outroots' is the list of reachable nodes in 'roots', i.e., the
1185 'outroots' is the list of reachable nodes in 'roots', i.e., the
1186 subset of 'roots' that is returned in 'nodes'. Likewise,
1186 subset of 'roots' that is returned in 'nodes'. Likewise,
1187 'outheads' is the subset of 'heads' that is also in 'nodes'.
1187 'outheads' is the subset of 'heads' that is also in 'nodes'.
1188
1188
1189 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1189 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1190 unspecified, uses nullid as the only root. If 'heads' is
1190 unspecified, uses nullid as the only root. If 'heads' is
1191 unspecified, uses list of all of the revlog's heads."""
1191 unspecified, uses list of all of the revlog's heads."""
1192 nonodes = ([], [], [])
1192 nonodes = ([], [], [])
1193 if roots is not None:
1193 if roots is not None:
1194 roots = list(roots)
1194 roots = list(roots)
1195 if not roots:
1195 if not roots:
1196 return nonodes
1196 return nonodes
1197 lowestrev = min([self.rev(n) for n in roots])
1197 lowestrev = min([self.rev(n) for n in roots])
1198 else:
1198 else:
1199 roots = [self.nullid] # Everybody's a descendant of nullid
1199 roots = [self.nullid] # Everybody's a descendant of nullid
1200 lowestrev = nullrev
1200 lowestrev = nullrev
1201 if (lowestrev == nullrev) and (heads is None):
1201 if (lowestrev == nullrev) and (heads is None):
1202 # We want _all_ the nodes!
1202 # We want _all_ the nodes!
1203 return (
1203 return (
1204 [self.node(r) for r in self],
1204 [self.node(r) for r in self],
1205 [self.nullid],
1205 [self.nullid],
1206 list(self.heads()),
1206 list(self.heads()),
1207 )
1207 )
1208 if heads is None:
1208 if heads is None:
1209 # All nodes are ancestors, so the latest ancestor is the last
1209 # All nodes are ancestors, so the latest ancestor is the last
1210 # node.
1210 # node.
1211 highestrev = len(self) - 1
1211 highestrev = len(self) - 1
1212 # Set ancestors to None to signal that every node is an ancestor.
1212 # Set ancestors to None to signal that every node is an ancestor.
1213 ancestors = None
1213 ancestors = None
1214 # Set heads to an empty dictionary for later discovery of heads
1214 # Set heads to an empty dictionary for later discovery of heads
1215 heads = {}
1215 heads = {}
1216 else:
1216 else:
1217 heads = list(heads)
1217 heads = list(heads)
1218 if not heads:
1218 if not heads:
1219 return nonodes
1219 return nonodes
1220 ancestors = set()
1220 ancestors = set()
1221 # Turn heads into a dictionary so we can remove 'fake' heads.
1221 # Turn heads into a dictionary so we can remove 'fake' heads.
1222 # Also, later we will be using it to filter out the heads we can't
1222 # Also, later we will be using it to filter out the heads we can't
1223 # find from roots.
1223 # find from roots.
1224 heads = dict.fromkeys(heads, False)
1224 heads = dict.fromkeys(heads, False)
1225 # Start at the top and keep marking parents until we're done.
1225 # Start at the top and keep marking parents until we're done.
1226 nodestotag = set(heads)
1226 nodestotag = set(heads)
1227 # Remember where the top was so we can use it as a limit later.
1227 # Remember where the top was so we can use it as a limit later.
1228 highestrev = max([self.rev(n) for n in nodestotag])
1228 highestrev = max([self.rev(n) for n in nodestotag])
1229 while nodestotag:
1229 while nodestotag:
1230 # grab a node to tag
1230 # grab a node to tag
1231 n = nodestotag.pop()
1231 n = nodestotag.pop()
1232 # Never tag nullid
1232 # Never tag nullid
1233 if n == self.nullid:
1233 if n == self.nullid:
1234 continue
1234 continue
1235 # A node's revision number represents its place in a
1235 # A node's revision number represents its place in a
1236 # topologically sorted list of nodes.
1236 # topologically sorted list of nodes.
1237 r = self.rev(n)
1237 r = self.rev(n)
1238 if r >= lowestrev:
1238 if r >= lowestrev:
1239 if n not in ancestors:
1239 if n not in ancestors:
1240 # If we are possibly a descendant of one of the roots
1240 # If we are possibly a descendant of one of the roots
1241 # and we haven't already been marked as an ancestor
1241 # and we haven't already been marked as an ancestor
1242 ancestors.add(n) # Mark as ancestor
1242 ancestors.add(n) # Mark as ancestor
1243 # Add non-nullid parents to list of nodes to tag.
1243 # Add non-nullid parents to list of nodes to tag.
1244 nodestotag.update(
1244 nodestotag.update(
1245 [p for p in self.parents(n) if p != self.nullid]
1245 [p for p in self.parents(n) if p != self.nullid]
1246 )
1246 )
1247 elif n in heads: # We've seen it before, is it a fake head?
1247 elif n in heads: # We've seen it before, is it a fake head?
1248 # So it is, real heads should not be the ancestors of
1248 # So it is, real heads should not be the ancestors of
1249 # any other heads.
1249 # any other heads.
1250 heads.pop(n)
1250 heads.pop(n)
1251 if not ancestors:
1251 if not ancestors:
1252 return nonodes
1252 return nonodes
1253 # Now that we have our set of ancestors, we want to remove any
1253 # Now that we have our set of ancestors, we want to remove any
1254 # roots that are not ancestors.
1254 # roots that are not ancestors.
1255
1255
1256 # If one of the roots was nullid, everything is included anyway.
1256 # If one of the roots was nullid, everything is included anyway.
1257 if lowestrev > nullrev:
1257 if lowestrev > nullrev:
1258 # But, since we weren't, let's recompute the lowest rev to not
1258 # But, since we weren't, let's recompute the lowest rev to not
1259 # include roots that aren't ancestors.
1259 # include roots that aren't ancestors.
1260
1260
1261 # Filter out roots that aren't ancestors of heads
1261 # Filter out roots that aren't ancestors of heads
1262 roots = [root for root in roots if root in ancestors]
1262 roots = [root for root in roots if root in ancestors]
1263 # Recompute the lowest revision
1263 # Recompute the lowest revision
1264 if roots:
1264 if roots:
1265 lowestrev = min([self.rev(root) for root in roots])
1265 lowestrev = min([self.rev(root) for root in roots])
1266 else:
1266 else:
1267 # No more roots? Return empty list
1267 # No more roots? Return empty list
1268 return nonodes
1268 return nonodes
1269 else:
1269 else:
1270 # We are descending from nullid, and don't need to care about
1270 # We are descending from nullid, and don't need to care about
1271 # any other roots.
1271 # any other roots.
1272 lowestrev = nullrev
1272 lowestrev = nullrev
1273 roots = [self.nullid]
1273 roots = [self.nullid]
1274 # Transform our roots list into a set.
1274 # Transform our roots list into a set.
1275 descendants = set(roots)
1275 descendants = set(roots)
1276 # Also, keep the original roots so we can filter out roots that aren't
1276 # Also, keep the original roots so we can filter out roots that aren't
1277 # 'real' roots (i.e. are descended from other roots).
1277 # 'real' roots (i.e. are descended from other roots).
1278 roots = descendants.copy()
1278 roots = descendants.copy()
1279 # Our topologically sorted list of output nodes.
1279 # Our topologically sorted list of output nodes.
1280 orderedout = []
1280 orderedout = []
1281 # Don't start at nullid since we don't want nullid in our output list,
1281 # Don't start at nullid since we don't want nullid in our output list,
1282 # and if nullid shows up in descendants, empty parents will look like
1282 # and if nullid shows up in descendants, empty parents will look like
1283 # they're descendants.
1283 # they're descendants.
1284 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1284 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1285 n = self.node(r)
1285 n = self.node(r)
1286 isdescendant = False
1286 isdescendant = False
1287 if lowestrev == nullrev: # Everybody is a descendant of nullid
1287 if lowestrev == nullrev: # Everybody is a descendant of nullid
1288 isdescendant = True
1288 isdescendant = True
1289 elif n in descendants:
1289 elif n in descendants:
1290 # n is already a descendant
1290 # n is already a descendant
1291 isdescendant = True
1291 isdescendant = True
1292 # This check only needs to be done here because all the roots
1292 # This check only needs to be done here because all the roots
1293 # will start being marked is descendants before the loop.
1293 # will start being marked is descendants before the loop.
1294 if n in roots:
1294 if n in roots:
1295 # If n was a root, check if it's a 'real' root.
1295 # If n was a root, check if it's a 'real' root.
1296 p = tuple(self.parents(n))
1296 p = tuple(self.parents(n))
1297 # If any of its parents are descendants, it's not a root.
1297 # If any of its parents are descendants, it's not a root.
1298 if (p[0] in descendants) or (p[1] in descendants):
1298 if (p[0] in descendants) or (p[1] in descendants):
1299 roots.remove(n)
1299 roots.remove(n)
1300 else:
1300 else:
1301 p = tuple(self.parents(n))
1301 p = tuple(self.parents(n))
1302 # A node is a descendant if either of its parents are
1302 # A node is a descendant if either of its parents are
1303 # descendants. (We seeded the dependents list with the roots
1303 # descendants. (We seeded the dependents list with the roots
1304 # up there, remember?)
1304 # up there, remember?)
1305 if (p[0] in descendants) or (p[1] in descendants):
1305 if (p[0] in descendants) or (p[1] in descendants):
1306 descendants.add(n)
1306 descendants.add(n)
1307 isdescendant = True
1307 isdescendant = True
1308 if isdescendant and ((ancestors is None) or (n in ancestors)):
1308 if isdescendant and ((ancestors is None) or (n in ancestors)):
1309 # Only include nodes that are both descendants and ancestors.
1309 # Only include nodes that are both descendants and ancestors.
1310 orderedout.append(n)
1310 orderedout.append(n)
1311 if (ancestors is not None) and (n in heads):
1311 if (ancestors is not None) and (n in heads):
1312 # We're trying to figure out which heads are reachable
1312 # We're trying to figure out which heads are reachable
1313 # from roots.
1313 # from roots.
1314 # Mark this head as having been reached
1314 # Mark this head as having been reached
1315 heads[n] = True
1315 heads[n] = True
1316 elif ancestors is None:
1316 elif ancestors is None:
1317 # Otherwise, we're trying to discover the heads.
1317 # Otherwise, we're trying to discover the heads.
1318 # Assume this is a head because if it isn't, the next step
1318 # Assume this is a head because if it isn't, the next step
1319 # will eventually remove it.
1319 # will eventually remove it.
1320 heads[n] = True
1320 heads[n] = True
1321 # But, obviously its parents aren't.
1321 # But, obviously its parents aren't.
1322 for p in self.parents(n):
1322 for p in self.parents(n):
1323 heads.pop(p, None)
1323 heads.pop(p, None)
1324 heads = [head for head, flag in heads.items() if flag]
1324 heads = [head for head, flag in heads.items() if flag]
1325 roots = list(roots)
1325 roots = list(roots)
1326 assert orderedout
1326 assert orderedout
1327 assert roots
1327 assert roots
1328 assert heads
1328 assert heads
1329 return (orderedout, roots, heads)
1329 return (orderedout, roots, heads)
1330
1330
1331 def headrevs(self, revs=None):
1331 def headrevs(self, revs=None):
1332 if revs is None:
1332 if revs is None:
1333 try:
1333 try:
1334 return self.index.headrevs()
1334 return self.index.headrevs()
1335 except AttributeError:
1335 except AttributeError:
1336 return self._headrevs()
1336 return self._headrevs()
1337 if rustdagop is not None and self.index.rust_ext_compat:
1337 if rustdagop is not None and self.index.rust_ext_compat:
1338 return rustdagop.headrevs(self.index, revs)
1338 return rustdagop.headrevs(self.index, revs)
1339 return dagop.headrevs(revs, self._uncheckedparentrevs)
1339 return dagop.headrevs(revs, self._uncheckedparentrevs)
1340
1340
1341 def computephases(self, roots):
1341 def computephases(self, roots):
1342 return self.index.computephasesmapsets(roots)
1342 return self.index.computephasesmapsets(roots)
1343
1343
1344 def _headrevs(self):
1344 def _headrevs(self):
1345 count = len(self)
1345 count = len(self)
1346 if not count:
1346 if not count:
1347 return [nullrev]
1347 return [nullrev]
1348 # we won't iter over filtered rev so nobody is a head at start
1348 # we won't iter over filtered rev so nobody is a head at start
1349 ishead = [0] * (count + 1)
1349 ishead = [0] * (count + 1)
1350 index = self.index
1350 index = self.index
1351 for r in self:
1351 for r in self:
1352 ishead[r] = 1 # I may be an head
1352 ishead[r] = 1 # I may be an head
1353 e = index[r]
1353 e = index[r]
1354 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1354 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1355 return [r for r, val in enumerate(ishead) if val]
1355 return [r for r, val in enumerate(ishead) if val]
1356
1356
1357 def heads(self, start=None, stop=None):
1357 def heads(self, start=None, stop=None):
1358 """return the list of all nodes that have no children
1358 """return the list of all nodes that have no children
1359
1359
1360 if start is specified, only heads that are descendants of
1360 if start is specified, only heads that are descendants of
1361 start will be returned
1361 start will be returned
1362 if stop is specified, it will consider all the revs from stop
1362 if stop is specified, it will consider all the revs from stop
1363 as if they had no children
1363 as if they had no children
1364 """
1364 """
1365 if start is None and stop is None:
1365 if start is None and stop is None:
1366 if not len(self):
1366 if not len(self):
1367 return [self.nullid]
1367 return [self.nullid]
1368 return [self.node(r) for r in self.headrevs()]
1368 return [self.node(r) for r in self.headrevs()]
1369
1369
1370 if start is None:
1370 if start is None:
1371 start = nullrev
1371 start = nullrev
1372 else:
1372 else:
1373 start = self.rev(start)
1373 start = self.rev(start)
1374
1374
1375 stoprevs = {self.rev(n) for n in stop or []}
1375 stoprevs = {self.rev(n) for n in stop or []}
1376
1376
1377 revs = dagop.headrevssubset(
1377 revs = dagop.headrevssubset(
1378 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1378 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1379 )
1379 )
1380
1380
1381 return [self.node(rev) for rev in revs]
1381 return [self.node(rev) for rev in revs]
1382
1382
1383 def children(self, node):
1383 def children(self, node):
1384 """find the children of a given node"""
1384 """find the children of a given node"""
1385 c = []
1385 c = []
1386 p = self.rev(node)
1386 p = self.rev(node)
1387 for r in self.revs(start=p + 1):
1387 for r in self.revs(start=p + 1):
1388 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1388 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1389 if prevs:
1389 if prevs:
1390 for pr in prevs:
1390 for pr in prevs:
1391 if pr == p:
1391 if pr == p:
1392 c.append(self.node(r))
1392 c.append(self.node(r))
1393 elif p == nullrev:
1393 elif p == nullrev:
1394 c.append(self.node(r))
1394 c.append(self.node(r))
1395 return c
1395 return c
1396
1396
1397 def commonancestorsheads(self, a, b):
1397 def commonancestorsheads(self, a, b):
1398 """calculate all the heads of the common ancestors of nodes a and b"""
1398 """calculate all the heads of the common ancestors of nodes a and b"""
1399 a, b = self.rev(a), self.rev(b)
1399 a, b = self.rev(a), self.rev(b)
1400 ancs = self._commonancestorsheads(a, b)
1400 ancs = self._commonancestorsheads(a, b)
1401 return pycompat.maplist(self.node, ancs)
1401 return pycompat.maplist(self.node, ancs)
1402
1402
1403 def _commonancestorsheads(self, *revs):
1403 def _commonancestorsheads(self, *revs):
1404 """calculate all the heads of the common ancestors of revs"""
1404 """calculate all the heads of the common ancestors of revs"""
1405 try:
1405 try:
1406 ancs = self.index.commonancestorsheads(*revs)
1406 ancs = self.index.commonancestorsheads(*revs)
1407 except (AttributeError, OverflowError): # C implementation failed
1407 except (AttributeError, OverflowError): # C implementation failed
1408 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1408 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1409 return ancs
1409 return ancs
1410
1410
1411 def isancestor(self, a, b):
1411 def isancestor(self, a, b):
1412 """return True if node a is an ancestor of node b
1412 """return True if node a is an ancestor of node b
1413
1413
1414 A revision is considered an ancestor of itself."""
1414 A revision is considered an ancestor of itself."""
1415 a, b = self.rev(a), self.rev(b)
1415 a, b = self.rev(a), self.rev(b)
1416 return self.isancestorrev(a, b)
1416 return self.isancestorrev(a, b)
1417
1417
1418 def isancestorrev(self, a, b):
1418 def isancestorrev(self, a, b):
1419 """return True if revision a is an ancestor of revision b
1419 """return True if revision a is an ancestor of revision b
1420
1420
1421 A revision is considered an ancestor of itself.
1421 A revision is considered an ancestor of itself.
1422
1422
1423 The implementation of this is trivial but the use of
1423 The implementation of this is trivial but the use of
1424 reachableroots is not."""
1424 reachableroots is not."""
1425 if a == nullrev:
1425 if a == nullrev:
1426 return True
1426 return True
1427 elif a == b:
1427 elif a == b:
1428 return True
1428 return True
1429 elif a > b:
1429 elif a > b:
1430 return False
1430 return False
1431 return bool(self.reachableroots(a, [b], [a], includepath=False))
1431 return bool(self.reachableroots(a, [b], [a], includepath=False))
1432
1432
1433 def reachableroots(self, minroot, heads, roots, includepath=False):
1433 def reachableroots(self, minroot, heads, roots, includepath=False):
1434 """return (heads(::(<roots> and <roots>::<heads>)))
1434 """return (heads(::(<roots> and <roots>::<heads>)))
1435
1435
1436 If includepath is True, return (<roots>::<heads>)."""
1436 If includepath is True, return (<roots>::<heads>)."""
1437 try:
1437 try:
1438 return self.index.reachableroots2(
1438 return self.index.reachableroots2(
1439 minroot, heads, roots, includepath
1439 minroot, heads, roots, includepath
1440 )
1440 )
1441 except AttributeError:
1441 except AttributeError:
1442 return dagop._reachablerootspure(
1442 return dagop._reachablerootspure(
1443 self.parentrevs, minroot, roots, heads, includepath
1443 self.parentrevs, minroot, roots, heads, includepath
1444 )
1444 )
1445
1445
1446 def ancestor(self, a, b):
1446 def ancestor(self, a, b):
1447 """calculate the "best" common ancestor of nodes a and b"""
1447 """calculate the "best" common ancestor of nodes a and b"""
1448
1448
1449 a, b = self.rev(a), self.rev(b)
1449 a, b = self.rev(a), self.rev(b)
1450 try:
1450 try:
1451 ancs = self.index.ancestors(a, b)
1451 ancs = self.index.ancestors(a, b)
1452 except (AttributeError, OverflowError):
1452 except (AttributeError, OverflowError):
1453 ancs = ancestor.ancestors(self.parentrevs, a, b)
1453 ancs = ancestor.ancestors(self.parentrevs, a, b)
1454 if ancs:
1454 if ancs:
1455 # choose a consistent winner when there's a tie
1455 # choose a consistent winner when there's a tie
1456 return min(map(self.node, ancs))
1456 return min(map(self.node, ancs))
1457 return self.nullid
1457 return self.nullid
1458
1458
1459 def _match(self, id):
1459 def _match(self, id):
1460 if isinstance(id, int):
1460 if isinstance(id, int):
1461 # rev
1461 # rev
1462 return self.node(id)
1462 return self.node(id)
1463 if len(id) == self.nodeconstants.nodelen:
1463 if len(id) == self.nodeconstants.nodelen:
1464 # possibly a binary node
1464 # possibly a binary node
1465 # odds of a binary node being all hex in ASCII are 1 in 10**25
1465 # odds of a binary node being all hex in ASCII are 1 in 10**25
1466 try:
1466 try:
1467 node = id
1467 node = id
1468 self.rev(node) # quick search the index
1468 self.rev(node) # quick search the index
1469 return node
1469 return node
1470 except error.LookupError:
1470 except error.LookupError:
1471 pass # may be partial hex id
1471 pass # may be partial hex id
1472 try:
1472 try:
1473 # str(rev)
1473 # str(rev)
1474 rev = int(id)
1474 rev = int(id)
1475 if b"%d" % rev != id:
1475 if b"%d" % rev != id:
1476 raise ValueError
1476 raise ValueError
1477 if rev < 0:
1477 if rev < 0:
1478 rev = len(self) + rev
1478 rev = len(self) + rev
1479 if rev < 0 or rev >= len(self):
1479 if rev < 0 or rev >= len(self):
1480 raise ValueError
1480 raise ValueError
1481 return self.node(rev)
1481 return self.node(rev)
1482 except (ValueError, OverflowError):
1482 except (ValueError, OverflowError):
1483 pass
1483 pass
1484 if len(id) == 2 * self.nodeconstants.nodelen:
1484 if len(id) == 2 * self.nodeconstants.nodelen:
1485 try:
1485 try:
1486 # a full hex nodeid?
1486 # a full hex nodeid?
1487 node = bin(id)
1487 node = bin(id)
1488 self.rev(node)
1488 self.rev(node)
1489 return node
1489 return node
1490 except (TypeError, error.LookupError):
1490 except (TypeError, error.LookupError):
1491 pass
1491 pass
1492
1492
1493 def _partialmatch(self, id):
1493 def _partialmatch(self, id):
1494 # we don't care wdirfilenodeids as they should be always full hash
1494 # we don't care wdirfilenodeids as they should be always full hash
1495 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1495 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1496 ambiguous = False
1496 ambiguous = False
1497 try:
1497 try:
1498 partial = self.index.partialmatch(id)
1498 partial = self.index.partialmatch(id)
1499 if partial and self.hasnode(partial):
1499 if partial and self.hasnode(partial):
1500 if maybewdir:
1500 if maybewdir:
1501 # single 'ff...' match in radix tree, ambiguous with wdir
1501 # single 'ff...' match in radix tree, ambiguous with wdir
1502 ambiguous = True
1502 ambiguous = True
1503 else:
1503 else:
1504 return partial
1504 return partial
1505 elif maybewdir:
1505 elif maybewdir:
1506 # no 'ff...' match in radix tree, wdir identified
1506 # no 'ff...' match in radix tree, wdir identified
1507 raise error.WdirUnsupported
1507 raise error.WdirUnsupported
1508 else:
1508 else:
1509 return None
1509 return None
1510 except error.RevlogError:
1510 except error.RevlogError:
1511 # parsers.c radix tree lookup gave multiple matches
1511 # parsers.c radix tree lookup gave multiple matches
1512 # fast path: for unfiltered changelog, radix tree is accurate
1512 # fast path: for unfiltered changelog, radix tree is accurate
1513 if not getattr(self, 'filteredrevs', None):
1513 if not getattr(self, 'filteredrevs', None):
1514 ambiguous = True
1514 ambiguous = True
1515 # fall through to slow path that filters hidden revisions
1515 # fall through to slow path that filters hidden revisions
1516 except (AttributeError, ValueError):
1516 except (AttributeError, ValueError):
1517 # we are pure python, or key was too short to search radix tree
1517 # we are pure python, or key was too short to search radix tree
1518 pass
1518 pass
1519 if ambiguous:
1519 if ambiguous:
1520 raise error.AmbiguousPrefixLookupError(
1520 raise error.AmbiguousPrefixLookupError(
1521 id, self.display_id, _(b'ambiguous identifier')
1521 id, self.display_id, _(b'ambiguous identifier')
1522 )
1522 )
1523
1523
1524 if id in self._pcache:
1524 if id in self._pcache:
1525 return self._pcache[id]
1525 return self._pcache[id]
1526
1526
1527 if len(id) <= 40:
1527 if len(id) <= 40:
1528 # hex(node)[:...]
1529 l = len(id) // 2 * 2 # grab an even number of digits
1528 try:
1530 try:
1529 # hex(node)[:...]
1530 l = len(id) // 2 * 2 # grab an even number of digits
1531 prefix = bin(id[:l])
1531 prefix = bin(id[:l])
1532 except TypeError:
1533 pass
1534 else:
1532 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1535 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1533 nl = [
1536 nl = [
1534 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1537 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1535 ]
1538 ]
1536 if self.nodeconstants.nullhex.startswith(id):
1539 if self.nodeconstants.nullhex.startswith(id):
1537 nl.append(self.nullid)
1540 nl.append(self.nullid)
1538 if len(nl) > 0:
1541 if len(nl) > 0:
1539 if len(nl) == 1 and not maybewdir:
1542 if len(nl) == 1 and not maybewdir:
1540 self._pcache[id] = nl[0]
1543 self._pcache[id] = nl[0]
1541 return nl[0]
1544 return nl[0]
1542 raise error.AmbiguousPrefixLookupError(
1545 raise error.AmbiguousPrefixLookupError(
1543 id, self.display_id, _(b'ambiguous identifier')
1546 id, self.display_id, _(b'ambiguous identifier')
1544 )
1547 )
1545 if maybewdir:
1548 if maybewdir:
1546 raise error.WdirUnsupported
1549 raise error.WdirUnsupported
1547 return None
1550 return None
1548 except TypeError:
1549 pass
1550
1551
1551 def lookup(self, id):
1552 def lookup(self, id):
1552 """locate a node based on:
1553 """locate a node based on:
1553 - revision number or str(revision number)
1554 - revision number or str(revision number)
1554 - nodeid or subset of hex nodeid
1555 - nodeid or subset of hex nodeid
1555 """
1556 """
1556 n = self._match(id)
1557 n = self._match(id)
1557 if n is not None:
1558 if n is not None:
1558 return n
1559 return n
1559 n = self._partialmatch(id)
1560 n = self._partialmatch(id)
1560 if n:
1561 if n:
1561 return n
1562 return n
1562
1563
1563 raise error.LookupError(id, self.display_id, _(b'no match found'))
1564 raise error.LookupError(id, self.display_id, _(b'no match found'))
1564
1565
1565 def shortest(self, node, minlength=1):
1566 def shortest(self, node, minlength=1):
1566 """Find the shortest unambiguous prefix that matches node."""
1567 """Find the shortest unambiguous prefix that matches node."""
1567
1568
1568 def isvalid(prefix):
1569 def isvalid(prefix):
1569 try:
1570 try:
1570 matchednode = self._partialmatch(prefix)
1571 matchednode = self._partialmatch(prefix)
1571 except error.AmbiguousPrefixLookupError:
1572 except error.AmbiguousPrefixLookupError:
1572 return False
1573 return False
1573 except error.WdirUnsupported:
1574 except error.WdirUnsupported:
1574 # single 'ff...' match
1575 # single 'ff...' match
1575 return True
1576 return True
1576 if matchednode is None:
1577 if matchednode is None:
1577 raise error.LookupError(node, self.display_id, _(b'no node'))
1578 raise error.LookupError(node, self.display_id, _(b'no node'))
1578 return True
1579 return True
1579
1580
1580 def maybewdir(prefix):
1581 def maybewdir(prefix):
1581 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1582 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1582
1583
1583 hexnode = hex(node)
1584 hexnode = hex(node)
1584
1585
1585 def disambiguate(hexnode, minlength):
1586 def disambiguate(hexnode, minlength):
1586 """Disambiguate against wdirid."""
1587 """Disambiguate against wdirid."""
1587 for length in range(minlength, len(hexnode) + 1):
1588 for length in range(minlength, len(hexnode) + 1):
1588 prefix = hexnode[:length]
1589 prefix = hexnode[:length]
1589 if not maybewdir(prefix):
1590 if not maybewdir(prefix):
1590 return prefix
1591 return prefix
1591
1592
1592 if not getattr(self, 'filteredrevs', None):
1593 if not getattr(self, 'filteredrevs', None):
1593 try:
1594 try:
1594 length = max(self.index.shortest(node), minlength)
1595 length = max(self.index.shortest(node), minlength)
1595 return disambiguate(hexnode, length)
1596 return disambiguate(hexnode, length)
1596 except error.RevlogError:
1597 except error.RevlogError:
1597 if node != self.nodeconstants.wdirid:
1598 if node != self.nodeconstants.wdirid:
1598 raise error.LookupError(
1599 raise error.LookupError(
1599 node, self.display_id, _(b'no node')
1600 node, self.display_id, _(b'no node')
1600 )
1601 )
1601 except AttributeError:
1602 except AttributeError:
1602 # Fall through to pure code
1603 # Fall through to pure code
1603 pass
1604 pass
1604
1605
1605 if node == self.nodeconstants.wdirid:
1606 if node == self.nodeconstants.wdirid:
1606 for length in range(minlength, len(hexnode) + 1):
1607 for length in range(minlength, len(hexnode) + 1):
1607 prefix = hexnode[:length]
1608 prefix = hexnode[:length]
1608 if isvalid(prefix):
1609 if isvalid(prefix):
1609 return prefix
1610 return prefix
1610
1611
1611 for length in range(minlength, len(hexnode) + 1):
1612 for length in range(minlength, len(hexnode) + 1):
1612 prefix = hexnode[:length]
1613 prefix = hexnode[:length]
1613 if isvalid(prefix):
1614 if isvalid(prefix):
1614 return disambiguate(hexnode, length)
1615 return disambiguate(hexnode, length)
1615
1616
1616 def cmp(self, node, text):
1617 def cmp(self, node, text):
1617 """compare text with a given file revision
1618 """compare text with a given file revision
1618
1619
1619 returns True if text is different than what is stored.
1620 returns True if text is different than what is stored.
1620 """
1621 """
1621 p1, p2 = self.parents(node)
1622 p1, p2 = self.parents(node)
1622 return storageutil.hashrevisionsha1(text, p1, p2) != node
1623 return storageutil.hashrevisionsha1(text, p1, p2) != node
1623
1624
1624 def _getsegmentforrevs(self, startrev, endrev, df=None):
1625 def _getsegmentforrevs(self, startrev, endrev, df=None):
1625 """Obtain a segment of raw data corresponding to a range of revisions.
1626 """Obtain a segment of raw data corresponding to a range of revisions.
1626
1627
1627 Accepts the start and end revisions and an optional already-open
1628 Accepts the start and end revisions and an optional already-open
1628 file handle to be used for reading. If the file handle is read, its
1629 file handle to be used for reading. If the file handle is read, its
1629 seek position will not be preserved.
1630 seek position will not be preserved.
1630
1631
1631 Requests for data may be satisfied by a cache.
1632 Requests for data may be satisfied by a cache.
1632
1633
1633 Returns a 2-tuple of (offset, data) for the requested range of
1634 Returns a 2-tuple of (offset, data) for the requested range of
1634 revisions. Offset is the integer offset from the beginning of the
1635 revisions. Offset is the integer offset from the beginning of the
1635 revlog and data is a str or buffer of the raw byte data.
1636 revlog and data is a str or buffer of the raw byte data.
1636
1637
1637 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1638 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1638 to determine where each revision's data begins and ends.
1639 to determine where each revision's data begins and ends.
1639 """
1640 """
1640 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1641 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1641 # (functions are expensive).
1642 # (functions are expensive).
1642 index = self.index
1643 index = self.index
1643 istart = index[startrev]
1644 istart = index[startrev]
1644 start = int(istart[0] >> 16)
1645 start = int(istart[0] >> 16)
1645 if startrev == endrev:
1646 if startrev == endrev:
1646 end = start + istart[1]
1647 end = start + istart[1]
1647 else:
1648 else:
1648 iend = index[endrev]
1649 iend = index[endrev]
1649 end = int(iend[0] >> 16) + iend[1]
1650 end = int(iend[0] >> 16) + iend[1]
1650
1651
1651 if self._inline:
1652 if self._inline:
1652 start += (startrev + 1) * self.index.entry_size
1653 start += (startrev + 1) * self.index.entry_size
1653 end += (endrev + 1) * self.index.entry_size
1654 end += (endrev + 1) * self.index.entry_size
1654 length = end - start
1655 length = end - start
1655
1656
1656 return start, self._segmentfile.read_chunk(start, length, df)
1657 return start, self._segmentfile.read_chunk(start, length, df)
1657
1658
1658 def _chunk(self, rev, df=None):
1659 def _chunk(self, rev, df=None):
1659 """Obtain a single decompressed chunk for a revision.
1660 """Obtain a single decompressed chunk for a revision.
1660
1661
1661 Accepts an integer revision and an optional already-open file handle
1662 Accepts an integer revision and an optional already-open file handle
1662 to be used for reading. If used, the seek position of the file will not
1663 to be used for reading. If used, the seek position of the file will not
1663 be preserved.
1664 be preserved.
1664
1665
1665 Returns a str holding uncompressed data for the requested revision.
1666 Returns a str holding uncompressed data for the requested revision.
1666 """
1667 """
1667 compression_mode = self.index[rev][10]
1668 compression_mode = self.index[rev][10]
1668 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1669 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1669 if compression_mode == COMP_MODE_PLAIN:
1670 if compression_mode == COMP_MODE_PLAIN:
1670 return data
1671 return data
1671 elif compression_mode == COMP_MODE_DEFAULT:
1672 elif compression_mode == COMP_MODE_DEFAULT:
1672 return self._decompressor(data)
1673 return self._decompressor(data)
1673 elif compression_mode == COMP_MODE_INLINE:
1674 elif compression_mode == COMP_MODE_INLINE:
1674 return self.decompress(data)
1675 return self.decompress(data)
1675 else:
1676 else:
1676 msg = b'unknown compression mode %d'
1677 msg = b'unknown compression mode %d'
1677 msg %= compression_mode
1678 msg %= compression_mode
1678 raise error.RevlogError(msg)
1679 raise error.RevlogError(msg)
1679
1680
1680 def _chunks(self, revs, df=None, targetsize=None):
1681 def _chunks(self, revs, df=None, targetsize=None):
1681 """Obtain decompressed chunks for the specified revisions.
1682 """Obtain decompressed chunks for the specified revisions.
1682
1683
1683 Accepts an iterable of numeric revisions that are assumed to be in
1684 Accepts an iterable of numeric revisions that are assumed to be in
1684 ascending order. Also accepts an optional already-open file handle
1685 ascending order. Also accepts an optional already-open file handle
1685 to be used for reading. If used, the seek position of the file will
1686 to be used for reading. If used, the seek position of the file will
1686 not be preserved.
1687 not be preserved.
1687
1688
1688 This function is similar to calling ``self._chunk()`` multiple times,
1689 This function is similar to calling ``self._chunk()`` multiple times,
1689 but is faster.
1690 but is faster.
1690
1691
1691 Returns a list with decompressed data for each requested revision.
1692 Returns a list with decompressed data for each requested revision.
1692 """
1693 """
1693 if not revs:
1694 if not revs:
1694 return []
1695 return []
1695 start = self.start
1696 start = self.start
1696 length = self.length
1697 length = self.length
1697 inline = self._inline
1698 inline = self._inline
1698 iosize = self.index.entry_size
1699 iosize = self.index.entry_size
1699 buffer = util.buffer
1700 buffer = util.buffer
1700
1701
1701 l = []
1702 l = []
1702 ladd = l.append
1703 ladd = l.append
1703
1704
1704 if not self._withsparseread:
1705 if not self._withsparseread:
1705 slicedchunks = (revs,)
1706 slicedchunks = (revs,)
1706 else:
1707 else:
1707 slicedchunks = deltautil.slicechunk(
1708 slicedchunks = deltautil.slicechunk(
1708 self, revs, targetsize=targetsize
1709 self, revs, targetsize=targetsize
1709 )
1710 )
1710
1711
1711 for revschunk in slicedchunks:
1712 for revschunk in slicedchunks:
1712 firstrev = revschunk[0]
1713 firstrev = revschunk[0]
1713 # Skip trailing revisions with empty diff
1714 # Skip trailing revisions with empty diff
1714 for lastrev in revschunk[::-1]:
1715 for lastrev in revschunk[::-1]:
1715 if length(lastrev) != 0:
1716 if length(lastrev) != 0:
1716 break
1717 break
1717
1718
1718 try:
1719 try:
1719 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1720 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1720 except OverflowError:
1721 except OverflowError:
1721 # issue4215 - we can't cache a run of chunks greater than
1722 # issue4215 - we can't cache a run of chunks greater than
1722 # 2G on Windows
1723 # 2G on Windows
1723 return [self._chunk(rev, df=df) for rev in revschunk]
1724 return [self._chunk(rev, df=df) for rev in revschunk]
1724
1725
1725 decomp = self.decompress
1726 decomp = self.decompress
1726 # self._decompressor might be None, but will not be used in that case
1727 # self._decompressor might be None, but will not be used in that case
1727 def_decomp = self._decompressor
1728 def_decomp = self._decompressor
1728 for rev in revschunk:
1729 for rev in revschunk:
1729 chunkstart = start(rev)
1730 chunkstart = start(rev)
1730 if inline:
1731 if inline:
1731 chunkstart += (rev + 1) * iosize
1732 chunkstart += (rev + 1) * iosize
1732 chunklength = length(rev)
1733 chunklength = length(rev)
1733 comp_mode = self.index[rev][10]
1734 comp_mode = self.index[rev][10]
1734 c = buffer(data, chunkstart - offset, chunklength)
1735 c = buffer(data, chunkstart - offset, chunklength)
1735 if comp_mode == COMP_MODE_PLAIN:
1736 if comp_mode == COMP_MODE_PLAIN:
1736 ladd(c)
1737 ladd(c)
1737 elif comp_mode == COMP_MODE_INLINE:
1738 elif comp_mode == COMP_MODE_INLINE:
1738 ladd(decomp(c))
1739 ladd(decomp(c))
1739 elif comp_mode == COMP_MODE_DEFAULT:
1740 elif comp_mode == COMP_MODE_DEFAULT:
1740 ladd(def_decomp(c))
1741 ladd(def_decomp(c))
1741 else:
1742 else:
1742 msg = b'unknown compression mode %d'
1743 msg = b'unknown compression mode %d'
1743 msg %= comp_mode
1744 msg %= comp_mode
1744 raise error.RevlogError(msg)
1745 raise error.RevlogError(msg)
1745
1746
1746 return l
1747 return l
1747
1748
1748 def deltaparent(self, rev):
1749 def deltaparent(self, rev):
1749 """return deltaparent of the given revision"""
1750 """return deltaparent of the given revision"""
1750 base = self.index[rev][3]
1751 base = self.index[rev][3]
1751 if base == rev:
1752 if base == rev:
1752 return nullrev
1753 return nullrev
1753 elif self._generaldelta:
1754 elif self._generaldelta:
1754 return base
1755 return base
1755 else:
1756 else:
1756 return rev - 1
1757 return rev - 1
1757
1758
1758 def issnapshot(self, rev):
1759 def issnapshot(self, rev):
1759 """tells whether rev is a snapshot"""
1760 """tells whether rev is a snapshot"""
1760 if not self._sparserevlog:
1761 if not self._sparserevlog:
1761 return self.deltaparent(rev) == nullrev
1762 return self.deltaparent(rev) == nullrev
1762 elif util.safehasattr(self.index, b'issnapshot'):
1763 elif util.safehasattr(self.index, b'issnapshot'):
1763 # directly assign the method to cache the testing and access
1764 # directly assign the method to cache the testing and access
1764 self.issnapshot = self.index.issnapshot
1765 self.issnapshot = self.index.issnapshot
1765 return self.issnapshot(rev)
1766 return self.issnapshot(rev)
1766 if rev == nullrev:
1767 if rev == nullrev:
1767 return True
1768 return True
1768 entry = self.index[rev]
1769 entry = self.index[rev]
1769 base = entry[3]
1770 base = entry[3]
1770 if base == rev:
1771 if base == rev:
1771 return True
1772 return True
1772 if base == nullrev:
1773 if base == nullrev:
1773 return True
1774 return True
1774 p1 = entry[5]
1775 p1 = entry[5]
1775 p2 = entry[6]
1776 p2 = entry[6]
1776 if base == p1 or base == p2:
1777 if base == p1 or base == p2:
1777 return False
1778 return False
1778 return self.issnapshot(base)
1779 return self.issnapshot(base)
1779
1780
1780 def snapshotdepth(self, rev):
1781 def snapshotdepth(self, rev):
1781 """number of snapshot in the chain before this one"""
1782 """number of snapshot in the chain before this one"""
1782 if not self.issnapshot(rev):
1783 if not self.issnapshot(rev):
1783 raise error.ProgrammingError(b'revision %d not a snapshot')
1784 raise error.ProgrammingError(b'revision %d not a snapshot')
1784 return len(self._deltachain(rev)[0]) - 1
1785 return len(self._deltachain(rev)[0]) - 1
1785
1786
1786 def revdiff(self, rev1, rev2):
1787 def revdiff(self, rev1, rev2):
1787 """return or calculate a delta between two revisions
1788 """return or calculate a delta between two revisions
1788
1789
1789 The delta calculated is in binary form and is intended to be written to
1790 The delta calculated is in binary form and is intended to be written to
1790 revlog data directly. So this function needs raw revision data.
1791 revlog data directly. So this function needs raw revision data.
1791 """
1792 """
1792 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1793 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1793 return bytes(self._chunk(rev2))
1794 return bytes(self._chunk(rev2))
1794
1795
1795 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1796 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1796
1797
1797 def revision(self, nodeorrev, _df=None):
1798 def revision(self, nodeorrev, _df=None):
1798 """return an uncompressed revision of a given node or revision
1799 """return an uncompressed revision of a given node or revision
1799 number.
1800 number.
1800
1801
1801 _df - an existing file handle to read from. (internal-only)
1802 _df - an existing file handle to read from. (internal-only)
1802 """
1803 """
1803 return self._revisiondata(nodeorrev, _df)
1804 return self._revisiondata(nodeorrev, _df)
1804
1805
1805 def sidedata(self, nodeorrev, _df=None):
1806 def sidedata(self, nodeorrev, _df=None):
1806 """a map of extra data related to the changeset but not part of the hash
1807 """a map of extra data related to the changeset but not part of the hash
1807
1808
1808 This function currently return a dictionary. However, more advanced
1809 This function currently return a dictionary. However, more advanced
1809 mapping object will likely be used in the future for a more
1810 mapping object will likely be used in the future for a more
1810 efficient/lazy code.
1811 efficient/lazy code.
1811 """
1812 """
1812 # deal with <nodeorrev> argument type
1813 # deal with <nodeorrev> argument type
1813 if isinstance(nodeorrev, int):
1814 if isinstance(nodeorrev, int):
1814 rev = nodeorrev
1815 rev = nodeorrev
1815 else:
1816 else:
1816 rev = self.rev(nodeorrev)
1817 rev = self.rev(nodeorrev)
1817 return self._sidedata(rev)
1818 return self._sidedata(rev)
1818
1819
1819 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1820 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1820 # deal with <nodeorrev> argument type
1821 # deal with <nodeorrev> argument type
1821 if isinstance(nodeorrev, int):
1822 if isinstance(nodeorrev, int):
1822 rev = nodeorrev
1823 rev = nodeorrev
1823 node = self.node(rev)
1824 node = self.node(rev)
1824 else:
1825 else:
1825 node = nodeorrev
1826 node = nodeorrev
1826 rev = None
1827 rev = None
1827
1828
1828 # fast path the special `nullid` rev
1829 # fast path the special `nullid` rev
1829 if node == self.nullid:
1830 if node == self.nullid:
1830 return b""
1831 return b""
1831
1832
1832 # ``rawtext`` is the text as stored inside the revlog. Might be the
1833 # ``rawtext`` is the text as stored inside the revlog. Might be the
1833 # revision or might need to be processed to retrieve the revision.
1834 # revision or might need to be processed to retrieve the revision.
1834 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1835 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1835
1836
1836 if raw and validated:
1837 if raw and validated:
1837 # if we don't want to process the raw text and that raw
1838 # if we don't want to process the raw text and that raw
1838 # text is cached, we can exit early.
1839 # text is cached, we can exit early.
1839 return rawtext
1840 return rawtext
1840 if rev is None:
1841 if rev is None:
1841 rev = self.rev(node)
1842 rev = self.rev(node)
1842 # the revlog's flag for this revision
1843 # the revlog's flag for this revision
1843 # (usually alter its state or content)
1844 # (usually alter its state or content)
1844 flags = self.flags(rev)
1845 flags = self.flags(rev)
1845
1846
1846 if validated and flags == REVIDX_DEFAULT_FLAGS:
1847 if validated and flags == REVIDX_DEFAULT_FLAGS:
1847 # no extra flags set, no flag processor runs, text = rawtext
1848 # no extra flags set, no flag processor runs, text = rawtext
1848 return rawtext
1849 return rawtext
1849
1850
1850 if raw:
1851 if raw:
1851 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1852 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1852 text = rawtext
1853 text = rawtext
1853 else:
1854 else:
1854 r = flagutil.processflagsread(self, rawtext, flags)
1855 r = flagutil.processflagsread(self, rawtext, flags)
1855 text, validatehash = r
1856 text, validatehash = r
1856 if validatehash:
1857 if validatehash:
1857 self.checkhash(text, node, rev=rev)
1858 self.checkhash(text, node, rev=rev)
1858 if not validated:
1859 if not validated:
1859 self._revisioncache = (node, rev, rawtext)
1860 self._revisioncache = (node, rev, rawtext)
1860
1861
1861 return text
1862 return text
1862
1863
1863 def _rawtext(self, node, rev, _df=None):
1864 def _rawtext(self, node, rev, _df=None):
1864 """return the possibly unvalidated rawtext for a revision
1865 """return the possibly unvalidated rawtext for a revision
1865
1866
1866 returns (rev, rawtext, validated)
1867 returns (rev, rawtext, validated)
1867 """
1868 """
1868
1869
1869 # revision in the cache (could be useful to apply delta)
1870 # revision in the cache (could be useful to apply delta)
1870 cachedrev = None
1871 cachedrev = None
1871 # An intermediate text to apply deltas to
1872 # An intermediate text to apply deltas to
1872 basetext = None
1873 basetext = None
1873
1874
1874 # Check if we have the entry in cache
1875 # Check if we have the entry in cache
1875 # The cache entry looks like (node, rev, rawtext)
1876 # The cache entry looks like (node, rev, rawtext)
1876 if self._revisioncache:
1877 if self._revisioncache:
1877 if self._revisioncache[0] == node:
1878 if self._revisioncache[0] == node:
1878 return (rev, self._revisioncache[2], True)
1879 return (rev, self._revisioncache[2], True)
1879 cachedrev = self._revisioncache[1]
1880 cachedrev = self._revisioncache[1]
1880
1881
1881 if rev is None:
1882 if rev is None:
1882 rev = self.rev(node)
1883 rev = self.rev(node)
1883
1884
1884 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1885 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1885 if stopped:
1886 if stopped:
1886 basetext = self._revisioncache[2]
1887 basetext = self._revisioncache[2]
1887
1888
1888 # drop cache to save memory, the caller is expected to
1889 # drop cache to save memory, the caller is expected to
1889 # update self._revisioncache after validating the text
1890 # update self._revisioncache after validating the text
1890 self._revisioncache = None
1891 self._revisioncache = None
1891
1892
1892 targetsize = None
1893 targetsize = None
1893 rawsize = self.index[rev][2]
1894 rawsize = self.index[rev][2]
1894 if 0 <= rawsize:
1895 if 0 <= rawsize:
1895 targetsize = 4 * rawsize
1896 targetsize = 4 * rawsize
1896
1897
1897 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1898 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1898 if basetext is None:
1899 if basetext is None:
1899 basetext = bytes(bins[0])
1900 basetext = bytes(bins[0])
1900 bins = bins[1:]
1901 bins = bins[1:]
1901
1902
1902 rawtext = mdiff.patches(basetext, bins)
1903 rawtext = mdiff.patches(basetext, bins)
1903 del basetext # let us have a chance to free memory early
1904 del basetext # let us have a chance to free memory early
1904 return (rev, rawtext, False)
1905 return (rev, rawtext, False)
1905
1906
1906 def _sidedata(self, rev):
1907 def _sidedata(self, rev):
1907 """Return the sidedata for a given revision number."""
1908 """Return the sidedata for a given revision number."""
1908 index_entry = self.index[rev]
1909 index_entry = self.index[rev]
1909 sidedata_offset = index_entry[8]
1910 sidedata_offset = index_entry[8]
1910 sidedata_size = index_entry[9]
1911 sidedata_size = index_entry[9]
1911
1912
1912 if self._inline:
1913 if self._inline:
1913 sidedata_offset += self.index.entry_size * (1 + rev)
1914 sidedata_offset += self.index.entry_size * (1 + rev)
1914 if sidedata_size == 0:
1915 if sidedata_size == 0:
1915 return {}
1916 return {}
1916
1917
1917 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1918 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1918 filename = self._sidedatafile
1919 filename = self._sidedatafile
1919 end = self._docket.sidedata_end
1920 end = self._docket.sidedata_end
1920 offset = sidedata_offset
1921 offset = sidedata_offset
1921 length = sidedata_size
1922 length = sidedata_size
1922 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1923 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1923 raise error.RevlogError(m)
1924 raise error.RevlogError(m)
1924
1925
1925 comp_segment = self._segmentfile_sidedata.read_chunk(
1926 comp_segment = self._segmentfile_sidedata.read_chunk(
1926 sidedata_offset, sidedata_size
1927 sidedata_offset, sidedata_size
1927 )
1928 )
1928
1929
1929 comp = self.index[rev][11]
1930 comp = self.index[rev][11]
1930 if comp == COMP_MODE_PLAIN:
1931 if comp == COMP_MODE_PLAIN:
1931 segment = comp_segment
1932 segment = comp_segment
1932 elif comp == COMP_MODE_DEFAULT:
1933 elif comp == COMP_MODE_DEFAULT:
1933 segment = self._decompressor(comp_segment)
1934 segment = self._decompressor(comp_segment)
1934 elif comp == COMP_MODE_INLINE:
1935 elif comp == COMP_MODE_INLINE:
1935 segment = self.decompress(comp_segment)
1936 segment = self.decompress(comp_segment)
1936 else:
1937 else:
1937 msg = b'unknown compression mode %d'
1938 msg = b'unknown compression mode %d'
1938 msg %= comp
1939 msg %= comp
1939 raise error.RevlogError(msg)
1940 raise error.RevlogError(msg)
1940
1941
1941 sidedata = sidedatautil.deserialize_sidedata(segment)
1942 sidedata = sidedatautil.deserialize_sidedata(segment)
1942 return sidedata
1943 return sidedata
1943
1944
1944 def rawdata(self, nodeorrev, _df=None):
1945 def rawdata(self, nodeorrev, _df=None):
1945 """return an uncompressed raw data of a given node or revision number.
1946 """return an uncompressed raw data of a given node or revision number.
1946
1947
1947 _df - an existing file handle to read from. (internal-only)
1948 _df - an existing file handle to read from. (internal-only)
1948 """
1949 """
1949 return self._revisiondata(nodeorrev, _df, raw=True)
1950 return self._revisiondata(nodeorrev, _df, raw=True)
1950
1951
1951 def hash(self, text, p1, p2):
1952 def hash(self, text, p1, p2):
1952 """Compute a node hash.
1953 """Compute a node hash.
1953
1954
1954 Available as a function so that subclasses can replace the hash
1955 Available as a function so that subclasses can replace the hash
1955 as needed.
1956 as needed.
1956 """
1957 """
1957 return storageutil.hashrevisionsha1(text, p1, p2)
1958 return storageutil.hashrevisionsha1(text, p1, p2)
1958
1959
1959 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1960 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1960 """Check node hash integrity.
1961 """Check node hash integrity.
1961
1962
1962 Available as a function so that subclasses can extend hash mismatch
1963 Available as a function so that subclasses can extend hash mismatch
1963 behaviors as needed.
1964 behaviors as needed.
1964 """
1965 """
1965 try:
1966 try:
1966 if p1 is None and p2 is None:
1967 if p1 is None and p2 is None:
1967 p1, p2 = self.parents(node)
1968 p1, p2 = self.parents(node)
1968 if node != self.hash(text, p1, p2):
1969 if node != self.hash(text, p1, p2):
1969 # Clear the revision cache on hash failure. The revision cache
1970 # Clear the revision cache on hash failure. The revision cache
1970 # only stores the raw revision and clearing the cache does have
1971 # only stores the raw revision and clearing the cache does have
1971 # the side-effect that we won't have a cache hit when the raw
1972 # the side-effect that we won't have a cache hit when the raw
1972 # revision data is accessed. But this case should be rare and
1973 # revision data is accessed. But this case should be rare and
1973 # it is extra work to teach the cache about the hash
1974 # it is extra work to teach the cache about the hash
1974 # verification state.
1975 # verification state.
1975 if self._revisioncache and self._revisioncache[0] == node:
1976 if self._revisioncache and self._revisioncache[0] == node:
1976 self._revisioncache = None
1977 self._revisioncache = None
1977
1978
1978 revornode = rev
1979 revornode = rev
1979 if revornode is None:
1980 if revornode is None:
1980 revornode = templatefilters.short(hex(node))
1981 revornode = templatefilters.short(hex(node))
1981 raise error.RevlogError(
1982 raise error.RevlogError(
1982 _(b"integrity check failed on %s:%s")
1983 _(b"integrity check failed on %s:%s")
1983 % (self.display_id, pycompat.bytestr(revornode))
1984 % (self.display_id, pycompat.bytestr(revornode))
1984 )
1985 )
1985 except error.RevlogError:
1986 except error.RevlogError:
1986 if self._censorable and storageutil.iscensoredtext(text):
1987 if self._censorable and storageutil.iscensoredtext(text):
1987 raise error.CensoredNodeError(self.display_id, node, text)
1988 raise error.CensoredNodeError(self.display_id, node, text)
1988 raise
1989 raise
1989
1990
1990 def _enforceinlinesize(self, tr):
1991 def _enforceinlinesize(self, tr):
1991 """Check if the revlog is too big for inline and convert if so.
1992 """Check if the revlog is too big for inline and convert if so.
1992
1993
1993 This should be called after revisions are added to the revlog. If the
1994 This should be called after revisions are added to the revlog. If the
1994 revlog has grown too large to be an inline revlog, it will convert it
1995 revlog has grown too large to be an inline revlog, it will convert it
1995 to use multiple index and data files.
1996 to use multiple index and data files.
1996 """
1997 """
1997 tiprev = len(self) - 1
1998 tiprev = len(self) - 1
1998 total_size = self.start(tiprev) + self.length(tiprev)
1999 total_size = self.start(tiprev) + self.length(tiprev)
1999 if not self._inline or total_size < _maxinline:
2000 if not self._inline or total_size < _maxinline:
2000 return
2001 return
2001
2002
2002 troffset = tr.findoffset(self._indexfile)
2003 troffset = tr.findoffset(self._indexfile)
2003 if troffset is None:
2004 if troffset is None:
2004 raise error.RevlogError(
2005 raise error.RevlogError(
2005 _(b"%s not found in the transaction") % self._indexfile
2006 _(b"%s not found in the transaction") % self._indexfile
2006 )
2007 )
2007 trindex = None
2008 trindex = None
2008 tr.add(self._datafile, 0)
2009 tr.add(self._datafile, 0)
2009
2010
2010 existing_handles = False
2011 existing_handles = False
2011 if self._writinghandles is not None:
2012 if self._writinghandles is not None:
2012 existing_handles = True
2013 existing_handles = True
2013 fp = self._writinghandles[0]
2014 fp = self._writinghandles[0]
2014 fp.flush()
2015 fp.flush()
2015 fp.close()
2016 fp.close()
2016 # We can't use the cached file handle after close(). So prevent
2017 # We can't use the cached file handle after close(). So prevent
2017 # its usage.
2018 # its usage.
2018 self._writinghandles = None
2019 self._writinghandles = None
2019 self._segmentfile.writing_handle = None
2020 self._segmentfile.writing_handle = None
2020 # No need to deal with sidedata writing handle as it is only
2021 # No need to deal with sidedata writing handle as it is only
2021 # relevant with revlog-v2 which is never inline, not reaching
2022 # relevant with revlog-v2 which is never inline, not reaching
2022 # this code
2023 # this code
2023
2024
2024 new_dfh = self._datafp(b'w+')
2025 new_dfh = self._datafp(b'w+')
2025 new_dfh.truncate(0) # drop any potentially existing data
2026 new_dfh.truncate(0) # drop any potentially existing data
2026 try:
2027 try:
2027 with self._indexfp() as read_ifh:
2028 with self._indexfp() as read_ifh:
2028 for r in self:
2029 for r in self:
2029 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2030 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2030 if (
2031 if (
2031 trindex is None
2032 trindex is None
2032 and troffset
2033 and troffset
2033 <= self.start(r) + r * self.index.entry_size
2034 <= self.start(r) + r * self.index.entry_size
2034 ):
2035 ):
2035 trindex = r
2036 trindex = r
2036 new_dfh.flush()
2037 new_dfh.flush()
2037
2038
2038 if trindex is None:
2039 if trindex is None:
2039 trindex = 0
2040 trindex = 0
2040
2041
2041 with self.__index_new_fp() as fp:
2042 with self.__index_new_fp() as fp:
2042 self._format_flags &= ~FLAG_INLINE_DATA
2043 self._format_flags &= ~FLAG_INLINE_DATA
2043 self._inline = False
2044 self._inline = False
2044 for i in self:
2045 for i in self:
2045 e = self.index.entry_binary(i)
2046 e = self.index.entry_binary(i)
2046 if i == 0 and self._docket is None:
2047 if i == 0 and self._docket is None:
2047 header = self._format_flags | self._format_version
2048 header = self._format_flags | self._format_version
2048 header = self.index.pack_header(header)
2049 header = self.index.pack_header(header)
2049 e = header + e
2050 e = header + e
2050 fp.write(e)
2051 fp.write(e)
2051 if self._docket is not None:
2052 if self._docket is not None:
2052 self._docket.index_end = fp.tell()
2053 self._docket.index_end = fp.tell()
2053
2054
2054 # There is a small transactional race here. If the rename of
2055 # There is a small transactional race here. If the rename of
2055 # the index fails, we should remove the datafile. It is more
2056 # the index fails, we should remove the datafile. It is more
2056 # important to ensure that the data file is not truncated
2057 # important to ensure that the data file is not truncated
2057 # when the index is replaced as otherwise data is lost.
2058 # when the index is replaced as otherwise data is lost.
2058 tr.replace(self._datafile, self.start(trindex))
2059 tr.replace(self._datafile, self.start(trindex))
2059
2060
2060 # the temp file replace the real index when we exit the context
2061 # the temp file replace the real index when we exit the context
2061 # manager
2062 # manager
2062
2063
2063 tr.replace(self._indexfile, trindex * self.index.entry_size)
2064 tr.replace(self._indexfile, trindex * self.index.entry_size)
2064 nodemaputil.setup_persistent_nodemap(tr, self)
2065 nodemaputil.setup_persistent_nodemap(tr, self)
2065 self._segmentfile = randomaccessfile.randomaccessfile(
2066 self._segmentfile = randomaccessfile.randomaccessfile(
2066 self.opener,
2067 self.opener,
2067 self._datafile,
2068 self._datafile,
2068 self._chunkcachesize,
2069 self._chunkcachesize,
2069 )
2070 )
2070
2071
2071 if existing_handles:
2072 if existing_handles:
2072 # switched from inline to conventional reopen the index
2073 # switched from inline to conventional reopen the index
2073 ifh = self.__index_write_fp()
2074 ifh = self.__index_write_fp()
2074 self._writinghandles = (ifh, new_dfh, None)
2075 self._writinghandles = (ifh, new_dfh, None)
2075 self._segmentfile.writing_handle = new_dfh
2076 self._segmentfile.writing_handle = new_dfh
2076 new_dfh = None
2077 new_dfh = None
2077 # No need to deal with sidedata writing handle as it is only
2078 # No need to deal with sidedata writing handle as it is only
2078 # relevant with revlog-v2 which is never inline, not reaching
2079 # relevant with revlog-v2 which is never inline, not reaching
2079 # this code
2080 # this code
2080 finally:
2081 finally:
2081 if new_dfh is not None:
2082 if new_dfh is not None:
2082 new_dfh.close()
2083 new_dfh.close()
2083
2084
2084 def _nodeduplicatecallback(self, transaction, node):
2085 def _nodeduplicatecallback(self, transaction, node):
2085 """called when trying to add a node already stored."""
2086 """called when trying to add a node already stored."""
2086
2087
2087 @contextlib.contextmanager
2088 @contextlib.contextmanager
2088 def reading(self):
2089 def reading(self):
2089 """Context manager that keeps data and sidedata files open for reading"""
2090 """Context manager that keeps data and sidedata files open for reading"""
2090 with self._segmentfile.reading():
2091 with self._segmentfile.reading():
2091 with self._segmentfile_sidedata.reading():
2092 with self._segmentfile_sidedata.reading():
2092 yield
2093 yield
2093
2094
2094 @contextlib.contextmanager
2095 @contextlib.contextmanager
2095 def _writing(self, transaction):
2096 def _writing(self, transaction):
2096 if self._trypending:
2097 if self._trypending:
2097 msg = b'try to write in a `trypending` revlog: %s'
2098 msg = b'try to write in a `trypending` revlog: %s'
2098 msg %= self.display_id
2099 msg %= self.display_id
2099 raise error.ProgrammingError(msg)
2100 raise error.ProgrammingError(msg)
2100 if self._writinghandles is not None:
2101 if self._writinghandles is not None:
2101 yield
2102 yield
2102 else:
2103 else:
2103 ifh = dfh = sdfh = None
2104 ifh = dfh = sdfh = None
2104 try:
2105 try:
2105 r = len(self)
2106 r = len(self)
2106 # opening the data file.
2107 # opening the data file.
2107 dsize = 0
2108 dsize = 0
2108 if r:
2109 if r:
2109 dsize = self.end(r - 1)
2110 dsize = self.end(r - 1)
2110 dfh = None
2111 dfh = None
2111 if not self._inline:
2112 if not self._inline:
2112 try:
2113 try:
2113 dfh = self._datafp(b"r+")
2114 dfh = self._datafp(b"r+")
2114 if self._docket is None:
2115 if self._docket is None:
2115 dfh.seek(0, os.SEEK_END)
2116 dfh.seek(0, os.SEEK_END)
2116 else:
2117 else:
2117 dfh.seek(self._docket.data_end, os.SEEK_SET)
2118 dfh.seek(self._docket.data_end, os.SEEK_SET)
2118 except IOError as inst:
2119 except IOError as inst:
2119 if inst.errno != errno.ENOENT:
2120 if inst.errno != errno.ENOENT:
2120 raise
2121 raise
2121 dfh = self._datafp(b"w+")
2122 dfh = self._datafp(b"w+")
2122 transaction.add(self._datafile, dsize)
2123 transaction.add(self._datafile, dsize)
2123 if self._sidedatafile is not None:
2124 if self._sidedatafile is not None:
2124 # revlog-v2 does not inline, help Pytype
2125 # revlog-v2 does not inline, help Pytype
2125 assert dfh is not None
2126 assert dfh is not None
2126 try:
2127 try:
2127 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2128 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2128 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2129 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2129 except IOError as inst:
2130 except IOError as inst:
2130 if inst.errno != errno.ENOENT:
2131 if inst.errno != errno.ENOENT:
2131 raise
2132 raise
2132 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2133 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2133 transaction.add(
2134 transaction.add(
2134 self._sidedatafile, self._docket.sidedata_end
2135 self._sidedatafile, self._docket.sidedata_end
2135 )
2136 )
2136
2137
2137 # opening the index file.
2138 # opening the index file.
2138 isize = r * self.index.entry_size
2139 isize = r * self.index.entry_size
2139 ifh = self.__index_write_fp()
2140 ifh = self.__index_write_fp()
2140 if self._inline:
2141 if self._inline:
2141 transaction.add(self._indexfile, dsize + isize)
2142 transaction.add(self._indexfile, dsize + isize)
2142 else:
2143 else:
2143 transaction.add(self._indexfile, isize)
2144 transaction.add(self._indexfile, isize)
2144 # exposing all file handle for writing.
2145 # exposing all file handle for writing.
2145 self._writinghandles = (ifh, dfh, sdfh)
2146 self._writinghandles = (ifh, dfh, sdfh)
2146 self._segmentfile.writing_handle = ifh if self._inline else dfh
2147 self._segmentfile.writing_handle = ifh if self._inline else dfh
2147 self._segmentfile_sidedata.writing_handle = sdfh
2148 self._segmentfile_sidedata.writing_handle = sdfh
2148 yield
2149 yield
2149 if self._docket is not None:
2150 if self._docket is not None:
2150 self._write_docket(transaction)
2151 self._write_docket(transaction)
2151 finally:
2152 finally:
2152 self._writinghandles = None
2153 self._writinghandles = None
2153 self._segmentfile.writing_handle = None
2154 self._segmentfile.writing_handle = None
2154 self._segmentfile_sidedata.writing_handle = None
2155 self._segmentfile_sidedata.writing_handle = None
2155 if dfh is not None:
2156 if dfh is not None:
2156 dfh.close()
2157 dfh.close()
2157 if sdfh is not None:
2158 if sdfh is not None:
2158 sdfh.close()
2159 sdfh.close()
2159 # closing the index file last to avoid exposing referent to
2160 # closing the index file last to avoid exposing referent to
2160 # potential unflushed data content.
2161 # potential unflushed data content.
2161 if ifh is not None:
2162 if ifh is not None:
2162 ifh.close()
2163 ifh.close()
2163
2164
2164 def _write_docket(self, transaction):
2165 def _write_docket(self, transaction):
2165 """write the current docket on disk
2166 """write the current docket on disk
2166
2167
2167 Exist as a method to help changelog to implement transaction logic
2168 Exist as a method to help changelog to implement transaction logic
2168
2169
2169 We could also imagine using the same transaction logic for all revlog
2170 We could also imagine using the same transaction logic for all revlog
2170 since docket are cheap."""
2171 since docket are cheap."""
2171 self._docket.write(transaction)
2172 self._docket.write(transaction)
2172
2173
2173 def addrevision(
2174 def addrevision(
2174 self,
2175 self,
2175 text,
2176 text,
2176 transaction,
2177 transaction,
2177 link,
2178 link,
2178 p1,
2179 p1,
2179 p2,
2180 p2,
2180 cachedelta=None,
2181 cachedelta=None,
2181 node=None,
2182 node=None,
2182 flags=REVIDX_DEFAULT_FLAGS,
2183 flags=REVIDX_DEFAULT_FLAGS,
2183 deltacomputer=None,
2184 deltacomputer=None,
2184 sidedata=None,
2185 sidedata=None,
2185 ):
2186 ):
2186 """add a revision to the log
2187 """add a revision to the log
2187
2188
2188 text - the revision data to add
2189 text - the revision data to add
2189 transaction - the transaction object used for rollback
2190 transaction - the transaction object used for rollback
2190 link - the linkrev data to add
2191 link - the linkrev data to add
2191 p1, p2 - the parent nodeids of the revision
2192 p1, p2 - the parent nodeids of the revision
2192 cachedelta - an optional precomputed delta
2193 cachedelta - an optional precomputed delta
2193 node - nodeid of revision; typically node is not specified, and it is
2194 node - nodeid of revision; typically node is not specified, and it is
2194 computed by default as hash(text, p1, p2), however subclasses might
2195 computed by default as hash(text, p1, p2), however subclasses might
2195 use different hashing method (and override checkhash() in such case)
2196 use different hashing method (and override checkhash() in such case)
2196 flags - the known flags to set on the revision
2197 flags - the known flags to set on the revision
2197 deltacomputer - an optional deltacomputer instance shared between
2198 deltacomputer - an optional deltacomputer instance shared between
2198 multiple calls
2199 multiple calls
2199 """
2200 """
2200 if link == nullrev:
2201 if link == nullrev:
2201 raise error.RevlogError(
2202 raise error.RevlogError(
2202 _(b"attempted to add linkrev -1 to %s") % self.display_id
2203 _(b"attempted to add linkrev -1 to %s") % self.display_id
2203 )
2204 )
2204
2205
2205 if sidedata is None:
2206 if sidedata is None:
2206 sidedata = {}
2207 sidedata = {}
2207 elif sidedata and not self.hassidedata:
2208 elif sidedata and not self.hassidedata:
2208 raise error.ProgrammingError(
2209 raise error.ProgrammingError(
2209 _(b"trying to add sidedata to a revlog who don't support them")
2210 _(b"trying to add sidedata to a revlog who don't support them")
2210 )
2211 )
2211
2212
2212 if flags:
2213 if flags:
2213 node = node or self.hash(text, p1, p2)
2214 node = node or self.hash(text, p1, p2)
2214
2215
2215 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2216 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2216
2217
2217 # If the flag processor modifies the revision data, ignore any provided
2218 # If the flag processor modifies the revision data, ignore any provided
2218 # cachedelta.
2219 # cachedelta.
2219 if rawtext != text:
2220 if rawtext != text:
2220 cachedelta = None
2221 cachedelta = None
2221
2222
2222 if len(rawtext) > _maxentrysize:
2223 if len(rawtext) > _maxentrysize:
2223 raise error.RevlogError(
2224 raise error.RevlogError(
2224 _(
2225 _(
2225 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2226 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2226 )
2227 )
2227 % (self.display_id, len(rawtext))
2228 % (self.display_id, len(rawtext))
2228 )
2229 )
2229
2230
2230 node = node or self.hash(rawtext, p1, p2)
2231 node = node or self.hash(rawtext, p1, p2)
2231 rev = self.index.get_rev(node)
2232 rev = self.index.get_rev(node)
2232 if rev is not None:
2233 if rev is not None:
2233 return rev
2234 return rev
2234
2235
2235 if validatehash:
2236 if validatehash:
2236 self.checkhash(rawtext, node, p1=p1, p2=p2)
2237 self.checkhash(rawtext, node, p1=p1, p2=p2)
2237
2238
2238 return self.addrawrevision(
2239 return self.addrawrevision(
2239 rawtext,
2240 rawtext,
2240 transaction,
2241 transaction,
2241 link,
2242 link,
2242 p1,
2243 p1,
2243 p2,
2244 p2,
2244 node,
2245 node,
2245 flags,
2246 flags,
2246 cachedelta=cachedelta,
2247 cachedelta=cachedelta,
2247 deltacomputer=deltacomputer,
2248 deltacomputer=deltacomputer,
2248 sidedata=sidedata,
2249 sidedata=sidedata,
2249 )
2250 )
2250
2251
2251 def addrawrevision(
2252 def addrawrevision(
2252 self,
2253 self,
2253 rawtext,
2254 rawtext,
2254 transaction,
2255 transaction,
2255 link,
2256 link,
2256 p1,
2257 p1,
2257 p2,
2258 p2,
2258 node,
2259 node,
2259 flags,
2260 flags,
2260 cachedelta=None,
2261 cachedelta=None,
2261 deltacomputer=None,
2262 deltacomputer=None,
2262 sidedata=None,
2263 sidedata=None,
2263 ):
2264 ):
2264 """add a raw revision with known flags, node and parents
2265 """add a raw revision with known flags, node and parents
2265 useful when reusing a revision not stored in this revlog (ex: received
2266 useful when reusing a revision not stored in this revlog (ex: received
2266 over wire, or read from an external bundle).
2267 over wire, or read from an external bundle).
2267 """
2268 """
2268 with self._writing(transaction):
2269 with self._writing(transaction):
2269 return self._addrevision(
2270 return self._addrevision(
2270 node,
2271 node,
2271 rawtext,
2272 rawtext,
2272 transaction,
2273 transaction,
2273 link,
2274 link,
2274 p1,
2275 p1,
2275 p2,
2276 p2,
2276 flags,
2277 flags,
2277 cachedelta,
2278 cachedelta,
2278 deltacomputer=deltacomputer,
2279 deltacomputer=deltacomputer,
2279 sidedata=sidedata,
2280 sidedata=sidedata,
2280 )
2281 )
2281
2282
2282 def compress(self, data):
2283 def compress(self, data):
2283 """Generate a possibly-compressed representation of data."""
2284 """Generate a possibly-compressed representation of data."""
2284 if not data:
2285 if not data:
2285 return b'', data
2286 return b'', data
2286
2287
2287 compressed = self._compressor.compress(data)
2288 compressed = self._compressor.compress(data)
2288
2289
2289 if compressed:
2290 if compressed:
2290 # The revlog compressor added the header in the returned data.
2291 # The revlog compressor added the header in the returned data.
2291 return b'', compressed
2292 return b'', compressed
2292
2293
2293 if data[0:1] == b'\0':
2294 if data[0:1] == b'\0':
2294 return b'', data
2295 return b'', data
2295 return b'u', data
2296 return b'u', data
2296
2297
2297 def decompress(self, data):
2298 def decompress(self, data):
2298 """Decompress a revlog chunk.
2299 """Decompress a revlog chunk.
2299
2300
2300 The chunk is expected to begin with a header identifying the
2301 The chunk is expected to begin with a header identifying the
2301 format type so it can be routed to an appropriate decompressor.
2302 format type so it can be routed to an appropriate decompressor.
2302 """
2303 """
2303 if not data:
2304 if not data:
2304 return data
2305 return data
2305
2306
2306 # Revlogs are read much more frequently than they are written and many
2307 # Revlogs are read much more frequently than they are written and many
2307 # chunks only take microseconds to decompress, so performance is
2308 # chunks only take microseconds to decompress, so performance is
2308 # important here.
2309 # important here.
2309 #
2310 #
2310 # We can make a few assumptions about revlogs:
2311 # We can make a few assumptions about revlogs:
2311 #
2312 #
2312 # 1) the majority of chunks will be compressed (as opposed to inline
2313 # 1) the majority of chunks will be compressed (as opposed to inline
2313 # raw data).
2314 # raw data).
2314 # 2) decompressing *any* data will likely by at least 10x slower than
2315 # 2) decompressing *any* data will likely by at least 10x slower than
2315 # returning raw inline data.
2316 # returning raw inline data.
2316 # 3) we want to prioritize common and officially supported compression
2317 # 3) we want to prioritize common and officially supported compression
2317 # engines
2318 # engines
2318 #
2319 #
2319 # It follows that we want to optimize for "decompress compressed data
2320 # It follows that we want to optimize for "decompress compressed data
2320 # when encoded with common and officially supported compression engines"
2321 # when encoded with common and officially supported compression engines"
2321 # case over "raw data" and "data encoded by less common or non-official
2322 # case over "raw data" and "data encoded by less common or non-official
2322 # compression engines." That is why we have the inline lookup first
2323 # compression engines." That is why we have the inline lookup first
2323 # followed by the compengines lookup.
2324 # followed by the compengines lookup.
2324 #
2325 #
2325 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2326 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2326 # compressed chunks. And this matters for changelog and manifest reads.
2327 # compressed chunks. And this matters for changelog and manifest reads.
2327 t = data[0:1]
2328 t = data[0:1]
2328
2329
2329 if t == b'x':
2330 if t == b'x':
2330 try:
2331 try:
2331 return _zlibdecompress(data)
2332 return _zlibdecompress(data)
2332 except zlib.error as e:
2333 except zlib.error as e:
2333 raise error.RevlogError(
2334 raise error.RevlogError(
2334 _(b'revlog decompress error: %s')
2335 _(b'revlog decompress error: %s')
2335 % stringutil.forcebytestr(e)
2336 % stringutil.forcebytestr(e)
2336 )
2337 )
2337 # '\0' is more common than 'u' so it goes first.
2338 # '\0' is more common than 'u' so it goes first.
2338 elif t == b'\0':
2339 elif t == b'\0':
2339 return data
2340 return data
2340 elif t == b'u':
2341 elif t == b'u':
2341 return util.buffer(data, 1)
2342 return util.buffer(data, 1)
2342
2343
2343 compressor = self._get_decompressor(t)
2344 compressor = self._get_decompressor(t)
2344
2345
2345 return compressor.decompress(data)
2346 return compressor.decompress(data)
2346
2347
2347 def _addrevision(
2348 def _addrevision(
2348 self,
2349 self,
2349 node,
2350 node,
2350 rawtext,
2351 rawtext,
2351 transaction,
2352 transaction,
2352 link,
2353 link,
2353 p1,
2354 p1,
2354 p2,
2355 p2,
2355 flags,
2356 flags,
2356 cachedelta,
2357 cachedelta,
2357 alwayscache=False,
2358 alwayscache=False,
2358 deltacomputer=None,
2359 deltacomputer=None,
2359 sidedata=None,
2360 sidedata=None,
2360 ):
2361 ):
2361 """internal function to add revisions to the log
2362 """internal function to add revisions to the log
2362
2363
2363 see addrevision for argument descriptions.
2364 see addrevision for argument descriptions.
2364
2365
2365 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2366 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2366
2367
2367 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2368 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2368 be used.
2369 be used.
2369
2370
2370 invariants:
2371 invariants:
2371 - rawtext is optional (can be None); if not set, cachedelta must be set.
2372 - rawtext is optional (can be None); if not set, cachedelta must be set.
2372 if both are set, they must correspond to each other.
2373 if both are set, they must correspond to each other.
2373 """
2374 """
2374 if node == self.nullid:
2375 if node == self.nullid:
2375 raise error.RevlogError(
2376 raise error.RevlogError(
2376 _(b"%s: attempt to add null revision") % self.display_id
2377 _(b"%s: attempt to add null revision") % self.display_id
2377 )
2378 )
2378 if (
2379 if (
2379 node == self.nodeconstants.wdirid
2380 node == self.nodeconstants.wdirid
2380 or node in self.nodeconstants.wdirfilenodeids
2381 or node in self.nodeconstants.wdirfilenodeids
2381 ):
2382 ):
2382 raise error.RevlogError(
2383 raise error.RevlogError(
2383 _(b"%s: attempt to add wdir revision") % self.display_id
2384 _(b"%s: attempt to add wdir revision") % self.display_id
2384 )
2385 )
2385 if self._writinghandles is None:
2386 if self._writinghandles is None:
2386 msg = b'adding revision outside `revlog._writing` context'
2387 msg = b'adding revision outside `revlog._writing` context'
2387 raise error.ProgrammingError(msg)
2388 raise error.ProgrammingError(msg)
2388
2389
2389 if self._inline:
2390 if self._inline:
2390 fh = self._writinghandles[0]
2391 fh = self._writinghandles[0]
2391 else:
2392 else:
2392 fh = self._writinghandles[1]
2393 fh = self._writinghandles[1]
2393
2394
2394 btext = [rawtext]
2395 btext = [rawtext]
2395
2396
2396 curr = len(self)
2397 curr = len(self)
2397 prev = curr - 1
2398 prev = curr - 1
2398
2399
2399 offset = self._get_data_offset(prev)
2400 offset = self._get_data_offset(prev)
2400
2401
2401 if self._concurrencychecker:
2402 if self._concurrencychecker:
2402 ifh, dfh, sdfh = self._writinghandles
2403 ifh, dfh, sdfh = self._writinghandles
2403 # XXX no checking for the sidedata file
2404 # XXX no checking for the sidedata file
2404 if self._inline:
2405 if self._inline:
2405 # offset is "as if" it were in the .d file, so we need to add on
2406 # offset is "as if" it were in the .d file, so we need to add on
2406 # the size of the entry metadata.
2407 # the size of the entry metadata.
2407 self._concurrencychecker(
2408 self._concurrencychecker(
2408 ifh, self._indexfile, offset + curr * self.index.entry_size
2409 ifh, self._indexfile, offset + curr * self.index.entry_size
2409 )
2410 )
2410 else:
2411 else:
2411 # Entries in the .i are a consistent size.
2412 # Entries in the .i are a consistent size.
2412 self._concurrencychecker(
2413 self._concurrencychecker(
2413 ifh, self._indexfile, curr * self.index.entry_size
2414 ifh, self._indexfile, curr * self.index.entry_size
2414 )
2415 )
2415 self._concurrencychecker(dfh, self._datafile, offset)
2416 self._concurrencychecker(dfh, self._datafile, offset)
2416
2417
2417 p1r, p2r = self.rev(p1), self.rev(p2)
2418 p1r, p2r = self.rev(p1), self.rev(p2)
2418
2419
2419 # full versions are inserted when the needed deltas
2420 # full versions are inserted when the needed deltas
2420 # become comparable to the uncompressed text
2421 # become comparable to the uncompressed text
2421 if rawtext is None:
2422 if rawtext is None:
2422 # need rawtext size, before changed by flag processors, which is
2423 # need rawtext size, before changed by flag processors, which is
2423 # the non-raw size. use revlog explicitly to avoid filelog's extra
2424 # the non-raw size. use revlog explicitly to avoid filelog's extra
2424 # logic that might remove metadata size.
2425 # logic that might remove metadata size.
2425 textlen = mdiff.patchedsize(
2426 textlen = mdiff.patchedsize(
2426 revlog.size(self, cachedelta[0]), cachedelta[1]
2427 revlog.size(self, cachedelta[0]), cachedelta[1]
2427 )
2428 )
2428 else:
2429 else:
2429 textlen = len(rawtext)
2430 textlen = len(rawtext)
2430
2431
2431 if deltacomputer is None:
2432 if deltacomputer is None:
2432 write_debug = None
2433 write_debug = None
2433 if self._debug_delta:
2434 if self._debug_delta:
2434 write_debug = transaction._report
2435 write_debug = transaction._report
2435 deltacomputer = deltautil.deltacomputer(
2436 deltacomputer = deltautil.deltacomputer(
2436 self, write_debug=write_debug
2437 self, write_debug=write_debug
2437 )
2438 )
2438
2439
2439 revinfo = revlogutils.revisioninfo(
2440 revinfo = revlogutils.revisioninfo(
2440 node,
2441 node,
2441 p1,
2442 p1,
2442 p2,
2443 p2,
2443 btext,
2444 btext,
2444 textlen,
2445 textlen,
2445 cachedelta,
2446 cachedelta,
2446 flags,
2447 flags,
2447 )
2448 )
2448
2449
2449 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2450 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2450
2451
2451 compression_mode = COMP_MODE_INLINE
2452 compression_mode = COMP_MODE_INLINE
2452 if self._docket is not None:
2453 if self._docket is not None:
2453 default_comp = self._docket.default_compression_header
2454 default_comp = self._docket.default_compression_header
2454 r = deltautil.delta_compression(default_comp, deltainfo)
2455 r = deltautil.delta_compression(default_comp, deltainfo)
2455 compression_mode, deltainfo = r
2456 compression_mode, deltainfo = r
2456
2457
2457 sidedata_compression_mode = COMP_MODE_INLINE
2458 sidedata_compression_mode = COMP_MODE_INLINE
2458 if sidedata and self.hassidedata:
2459 if sidedata and self.hassidedata:
2459 sidedata_compression_mode = COMP_MODE_PLAIN
2460 sidedata_compression_mode = COMP_MODE_PLAIN
2460 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2461 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2461 sidedata_offset = self._docket.sidedata_end
2462 sidedata_offset = self._docket.sidedata_end
2462 h, comp_sidedata = self.compress(serialized_sidedata)
2463 h, comp_sidedata = self.compress(serialized_sidedata)
2463 if (
2464 if (
2464 h != b'u'
2465 h != b'u'
2465 and comp_sidedata[0:1] != b'\0'
2466 and comp_sidedata[0:1] != b'\0'
2466 and len(comp_sidedata) < len(serialized_sidedata)
2467 and len(comp_sidedata) < len(serialized_sidedata)
2467 ):
2468 ):
2468 assert not h
2469 assert not h
2469 if (
2470 if (
2470 comp_sidedata[0:1]
2471 comp_sidedata[0:1]
2471 == self._docket.default_compression_header
2472 == self._docket.default_compression_header
2472 ):
2473 ):
2473 sidedata_compression_mode = COMP_MODE_DEFAULT
2474 sidedata_compression_mode = COMP_MODE_DEFAULT
2474 serialized_sidedata = comp_sidedata
2475 serialized_sidedata = comp_sidedata
2475 else:
2476 else:
2476 sidedata_compression_mode = COMP_MODE_INLINE
2477 sidedata_compression_mode = COMP_MODE_INLINE
2477 serialized_sidedata = comp_sidedata
2478 serialized_sidedata = comp_sidedata
2478 else:
2479 else:
2479 serialized_sidedata = b""
2480 serialized_sidedata = b""
2480 # Don't store the offset if the sidedata is empty, that way
2481 # Don't store the offset if the sidedata is empty, that way
2481 # we can easily detect empty sidedata and they will be no different
2482 # we can easily detect empty sidedata and they will be no different
2482 # than ones we manually add.
2483 # than ones we manually add.
2483 sidedata_offset = 0
2484 sidedata_offset = 0
2484
2485
2485 rank = RANK_UNKNOWN
2486 rank = RANK_UNKNOWN
2486 if self._format_version == CHANGELOGV2:
2487 if self._format_version == CHANGELOGV2:
2487 if (p1r, p2r) == (nullrev, nullrev):
2488 if (p1r, p2r) == (nullrev, nullrev):
2488 rank = 1
2489 rank = 1
2489 elif p1r != nullrev and p2r == nullrev:
2490 elif p1r != nullrev and p2r == nullrev:
2490 rank = 1 + self.fast_rank(p1r)
2491 rank = 1 + self.fast_rank(p1r)
2491 elif p1r == nullrev and p2r != nullrev:
2492 elif p1r == nullrev and p2r != nullrev:
2492 rank = 1 + self.fast_rank(p2r)
2493 rank = 1 + self.fast_rank(p2r)
2493 else: # merge node
2494 else: # merge node
2494 if rustdagop is not None and self.index.rust_ext_compat:
2495 if rustdagop is not None and self.index.rust_ext_compat:
2495 rank = rustdagop.rank(self.index, p1r, p2r)
2496 rank = rustdagop.rank(self.index, p1r, p2r)
2496 else:
2497 else:
2497 pmin, pmax = sorted((p1r, p2r))
2498 pmin, pmax = sorted((p1r, p2r))
2498 rank = 1 + self.fast_rank(pmax)
2499 rank = 1 + self.fast_rank(pmax)
2499 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2500 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2500
2501
2501 e = revlogutils.entry(
2502 e = revlogutils.entry(
2502 flags=flags,
2503 flags=flags,
2503 data_offset=offset,
2504 data_offset=offset,
2504 data_compressed_length=deltainfo.deltalen,
2505 data_compressed_length=deltainfo.deltalen,
2505 data_uncompressed_length=textlen,
2506 data_uncompressed_length=textlen,
2506 data_compression_mode=compression_mode,
2507 data_compression_mode=compression_mode,
2507 data_delta_base=deltainfo.base,
2508 data_delta_base=deltainfo.base,
2508 link_rev=link,
2509 link_rev=link,
2509 parent_rev_1=p1r,
2510 parent_rev_1=p1r,
2510 parent_rev_2=p2r,
2511 parent_rev_2=p2r,
2511 node_id=node,
2512 node_id=node,
2512 sidedata_offset=sidedata_offset,
2513 sidedata_offset=sidedata_offset,
2513 sidedata_compressed_length=len(serialized_sidedata),
2514 sidedata_compressed_length=len(serialized_sidedata),
2514 sidedata_compression_mode=sidedata_compression_mode,
2515 sidedata_compression_mode=sidedata_compression_mode,
2515 rank=rank,
2516 rank=rank,
2516 )
2517 )
2517
2518
2518 self.index.append(e)
2519 self.index.append(e)
2519 entry = self.index.entry_binary(curr)
2520 entry = self.index.entry_binary(curr)
2520 if curr == 0 and self._docket is None:
2521 if curr == 0 and self._docket is None:
2521 header = self._format_flags | self._format_version
2522 header = self._format_flags | self._format_version
2522 header = self.index.pack_header(header)
2523 header = self.index.pack_header(header)
2523 entry = header + entry
2524 entry = header + entry
2524 self._writeentry(
2525 self._writeentry(
2525 transaction,
2526 transaction,
2526 entry,
2527 entry,
2527 deltainfo.data,
2528 deltainfo.data,
2528 link,
2529 link,
2529 offset,
2530 offset,
2530 serialized_sidedata,
2531 serialized_sidedata,
2531 sidedata_offset,
2532 sidedata_offset,
2532 )
2533 )
2533
2534
2534 rawtext = btext[0]
2535 rawtext = btext[0]
2535
2536
2536 if alwayscache and rawtext is None:
2537 if alwayscache and rawtext is None:
2537 rawtext = deltacomputer.buildtext(revinfo, fh)
2538 rawtext = deltacomputer.buildtext(revinfo, fh)
2538
2539
2539 if type(rawtext) == bytes: # only accept immutable objects
2540 if type(rawtext) == bytes: # only accept immutable objects
2540 self._revisioncache = (node, curr, rawtext)
2541 self._revisioncache = (node, curr, rawtext)
2541 self._chainbasecache[curr] = deltainfo.chainbase
2542 self._chainbasecache[curr] = deltainfo.chainbase
2542 return curr
2543 return curr
2543
2544
2544 def _get_data_offset(self, prev):
2545 def _get_data_offset(self, prev):
2545 """Returns the current offset in the (in-transaction) data file.
2546 """Returns the current offset in the (in-transaction) data file.
2546 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2547 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2547 file to store that information: since sidedata can be rewritten to the
2548 file to store that information: since sidedata can be rewritten to the
2548 end of the data file within a transaction, you can have cases where, for
2549 end of the data file within a transaction, you can have cases where, for
2549 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2550 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2550 to `n - 1`'s sidedata being written after `n`'s data.
2551 to `n - 1`'s sidedata being written after `n`'s data.
2551
2552
2552 TODO cache this in a docket file before getting out of experimental."""
2553 TODO cache this in a docket file before getting out of experimental."""
2553 if self._docket is None:
2554 if self._docket is None:
2554 return self.end(prev)
2555 return self.end(prev)
2555 else:
2556 else:
2556 return self._docket.data_end
2557 return self._docket.data_end
2557
2558
2558 def _writeentry(
2559 def _writeentry(
2559 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2560 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2560 ):
2561 ):
2561 # Files opened in a+ mode have inconsistent behavior on various
2562 # Files opened in a+ mode have inconsistent behavior on various
2562 # platforms. Windows requires that a file positioning call be made
2563 # platforms. Windows requires that a file positioning call be made
2563 # when the file handle transitions between reads and writes. See
2564 # when the file handle transitions between reads and writes. See
2564 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2565 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2565 # platforms, Python or the platform itself can be buggy. Some versions
2566 # platforms, Python or the platform itself can be buggy. Some versions
2566 # of Solaris have been observed to not append at the end of the file
2567 # of Solaris have been observed to not append at the end of the file
2567 # if the file was seeked to before the end. See issue4943 for more.
2568 # if the file was seeked to before the end. See issue4943 for more.
2568 #
2569 #
2569 # We work around this issue by inserting a seek() before writing.
2570 # We work around this issue by inserting a seek() before writing.
2570 # Note: This is likely not necessary on Python 3. However, because
2571 # Note: This is likely not necessary on Python 3. However, because
2571 # the file handle is reused for reads and may be seeked there, we need
2572 # the file handle is reused for reads and may be seeked there, we need
2572 # to be careful before changing this.
2573 # to be careful before changing this.
2573 if self._writinghandles is None:
2574 if self._writinghandles is None:
2574 msg = b'adding revision outside `revlog._writing` context'
2575 msg = b'adding revision outside `revlog._writing` context'
2575 raise error.ProgrammingError(msg)
2576 raise error.ProgrammingError(msg)
2576 ifh, dfh, sdfh = self._writinghandles
2577 ifh, dfh, sdfh = self._writinghandles
2577 if self._docket is None:
2578 if self._docket is None:
2578 ifh.seek(0, os.SEEK_END)
2579 ifh.seek(0, os.SEEK_END)
2579 else:
2580 else:
2580 ifh.seek(self._docket.index_end, os.SEEK_SET)
2581 ifh.seek(self._docket.index_end, os.SEEK_SET)
2581 if dfh:
2582 if dfh:
2582 if self._docket is None:
2583 if self._docket is None:
2583 dfh.seek(0, os.SEEK_END)
2584 dfh.seek(0, os.SEEK_END)
2584 else:
2585 else:
2585 dfh.seek(self._docket.data_end, os.SEEK_SET)
2586 dfh.seek(self._docket.data_end, os.SEEK_SET)
2586 if sdfh:
2587 if sdfh:
2587 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2588 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2588
2589
2589 curr = len(self) - 1
2590 curr = len(self) - 1
2590 if not self._inline:
2591 if not self._inline:
2591 transaction.add(self._datafile, offset)
2592 transaction.add(self._datafile, offset)
2592 if self._sidedatafile:
2593 if self._sidedatafile:
2593 transaction.add(self._sidedatafile, sidedata_offset)
2594 transaction.add(self._sidedatafile, sidedata_offset)
2594 transaction.add(self._indexfile, curr * len(entry))
2595 transaction.add(self._indexfile, curr * len(entry))
2595 if data[0]:
2596 if data[0]:
2596 dfh.write(data[0])
2597 dfh.write(data[0])
2597 dfh.write(data[1])
2598 dfh.write(data[1])
2598 if sidedata:
2599 if sidedata:
2599 sdfh.write(sidedata)
2600 sdfh.write(sidedata)
2600 ifh.write(entry)
2601 ifh.write(entry)
2601 else:
2602 else:
2602 offset += curr * self.index.entry_size
2603 offset += curr * self.index.entry_size
2603 transaction.add(self._indexfile, offset)
2604 transaction.add(self._indexfile, offset)
2604 ifh.write(entry)
2605 ifh.write(entry)
2605 ifh.write(data[0])
2606 ifh.write(data[0])
2606 ifh.write(data[1])
2607 ifh.write(data[1])
2607 assert not sidedata
2608 assert not sidedata
2608 self._enforceinlinesize(transaction)
2609 self._enforceinlinesize(transaction)
2609 if self._docket is not None:
2610 if self._docket is not None:
2610 # revlog-v2 always has 3 writing handles, help Pytype
2611 # revlog-v2 always has 3 writing handles, help Pytype
2611 wh1 = self._writinghandles[0]
2612 wh1 = self._writinghandles[0]
2612 wh2 = self._writinghandles[1]
2613 wh2 = self._writinghandles[1]
2613 wh3 = self._writinghandles[2]
2614 wh3 = self._writinghandles[2]
2614 assert wh1 is not None
2615 assert wh1 is not None
2615 assert wh2 is not None
2616 assert wh2 is not None
2616 assert wh3 is not None
2617 assert wh3 is not None
2617 self._docket.index_end = wh1.tell()
2618 self._docket.index_end = wh1.tell()
2618 self._docket.data_end = wh2.tell()
2619 self._docket.data_end = wh2.tell()
2619 self._docket.sidedata_end = wh3.tell()
2620 self._docket.sidedata_end = wh3.tell()
2620
2621
2621 nodemaputil.setup_persistent_nodemap(transaction, self)
2622 nodemaputil.setup_persistent_nodemap(transaction, self)
2622
2623
2623 def addgroup(
2624 def addgroup(
2624 self,
2625 self,
2625 deltas,
2626 deltas,
2626 linkmapper,
2627 linkmapper,
2627 transaction,
2628 transaction,
2628 alwayscache=False,
2629 alwayscache=False,
2629 addrevisioncb=None,
2630 addrevisioncb=None,
2630 duplicaterevisioncb=None,
2631 duplicaterevisioncb=None,
2631 ):
2632 ):
2632 """
2633 """
2633 add a delta group
2634 add a delta group
2634
2635
2635 given a set of deltas, add them to the revision log. the
2636 given a set of deltas, add them to the revision log. the
2636 first delta is against its parent, which should be in our
2637 first delta is against its parent, which should be in our
2637 log, the rest are against the previous delta.
2638 log, the rest are against the previous delta.
2638
2639
2639 If ``addrevisioncb`` is defined, it will be called with arguments of
2640 If ``addrevisioncb`` is defined, it will be called with arguments of
2640 this revlog and the node that was added.
2641 this revlog and the node that was added.
2641 """
2642 """
2642
2643
2643 if self._adding_group:
2644 if self._adding_group:
2644 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2645 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2645
2646
2646 self._adding_group = True
2647 self._adding_group = True
2647 empty = True
2648 empty = True
2648 try:
2649 try:
2649 with self._writing(transaction):
2650 with self._writing(transaction):
2650 write_debug = None
2651 write_debug = None
2651 if self._debug_delta:
2652 if self._debug_delta:
2652 write_debug = transaction._report
2653 write_debug = transaction._report
2653 deltacomputer = deltautil.deltacomputer(
2654 deltacomputer = deltautil.deltacomputer(
2654 self,
2655 self,
2655 write_debug=write_debug,
2656 write_debug=write_debug,
2656 )
2657 )
2657 # loop through our set of deltas
2658 # loop through our set of deltas
2658 for data in deltas:
2659 for data in deltas:
2659 (
2660 (
2660 node,
2661 node,
2661 p1,
2662 p1,
2662 p2,
2663 p2,
2663 linknode,
2664 linknode,
2664 deltabase,
2665 deltabase,
2665 delta,
2666 delta,
2666 flags,
2667 flags,
2667 sidedata,
2668 sidedata,
2668 ) = data
2669 ) = data
2669 link = linkmapper(linknode)
2670 link = linkmapper(linknode)
2670 flags = flags or REVIDX_DEFAULT_FLAGS
2671 flags = flags or REVIDX_DEFAULT_FLAGS
2671
2672
2672 rev = self.index.get_rev(node)
2673 rev = self.index.get_rev(node)
2673 if rev is not None:
2674 if rev is not None:
2674 # this can happen if two branches make the same change
2675 # this can happen if two branches make the same change
2675 self._nodeduplicatecallback(transaction, rev)
2676 self._nodeduplicatecallback(transaction, rev)
2676 if duplicaterevisioncb:
2677 if duplicaterevisioncb:
2677 duplicaterevisioncb(self, rev)
2678 duplicaterevisioncb(self, rev)
2678 empty = False
2679 empty = False
2679 continue
2680 continue
2680
2681
2681 for p in (p1, p2):
2682 for p in (p1, p2):
2682 if not self.index.has_node(p):
2683 if not self.index.has_node(p):
2683 raise error.LookupError(
2684 raise error.LookupError(
2684 p, self.radix, _(b'unknown parent')
2685 p, self.radix, _(b'unknown parent')
2685 )
2686 )
2686
2687
2687 if not self.index.has_node(deltabase):
2688 if not self.index.has_node(deltabase):
2688 raise error.LookupError(
2689 raise error.LookupError(
2689 deltabase, self.display_id, _(b'unknown delta base')
2690 deltabase, self.display_id, _(b'unknown delta base')
2690 )
2691 )
2691
2692
2692 baserev = self.rev(deltabase)
2693 baserev = self.rev(deltabase)
2693
2694
2694 if baserev != nullrev and self.iscensored(baserev):
2695 if baserev != nullrev and self.iscensored(baserev):
2695 # if base is censored, delta must be full replacement in a
2696 # if base is censored, delta must be full replacement in a
2696 # single patch operation
2697 # single patch operation
2697 hlen = struct.calcsize(b">lll")
2698 hlen = struct.calcsize(b">lll")
2698 oldlen = self.rawsize(baserev)
2699 oldlen = self.rawsize(baserev)
2699 newlen = len(delta) - hlen
2700 newlen = len(delta) - hlen
2700 if delta[:hlen] != mdiff.replacediffheader(
2701 if delta[:hlen] != mdiff.replacediffheader(
2701 oldlen, newlen
2702 oldlen, newlen
2702 ):
2703 ):
2703 raise error.CensoredBaseError(
2704 raise error.CensoredBaseError(
2704 self.display_id, self.node(baserev)
2705 self.display_id, self.node(baserev)
2705 )
2706 )
2706
2707
2707 if not flags and self._peek_iscensored(baserev, delta):
2708 if not flags and self._peek_iscensored(baserev, delta):
2708 flags |= REVIDX_ISCENSORED
2709 flags |= REVIDX_ISCENSORED
2709
2710
2710 # We assume consumers of addrevisioncb will want to retrieve
2711 # We assume consumers of addrevisioncb will want to retrieve
2711 # the added revision, which will require a call to
2712 # the added revision, which will require a call to
2712 # revision(). revision() will fast path if there is a cache
2713 # revision(). revision() will fast path if there is a cache
2713 # hit. So, we tell _addrevision() to always cache in this case.
2714 # hit. So, we tell _addrevision() to always cache in this case.
2714 # We're only using addgroup() in the context of changegroup
2715 # We're only using addgroup() in the context of changegroup
2715 # generation so the revision data can always be handled as raw
2716 # generation so the revision data can always be handled as raw
2716 # by the flagprocessor.
2717 # by the flagprocessor.
2717 rev = self._addrevision(
2718 rev = self._addrevision(
2718 node,
2719 node,
2719 None,
2720 None,
2720 transaction,
2721 transaction,
2721 link,
2722 link,
2722 p1,
2723 p1,
2723 p2,
2724 p2,
2724 flags,
2725 flags,
2725 (baserev, delta),
2726 (baserev, delta),
2726 alwayscache=alwayscache,
2727 alwayscache=alwayscache,
2727 deltacomputer=deltacomputer,
2728 deltacomputer=deltacomputer,
2728 sidedata=sidedata,
2729 sidedata=sidedata,
2729 )
2730 )
2730
2731
2731 if addrevisioncb:
2732 if addrevisioncb:
2732 addrevisioncb(self, rev)
2733 addrevisioncb(self, rev)
2733 empty = False
2734 empty = False
2734 finally:
2735 finally:
2735 self._adding_group = False
2736 self._adding_group = False
2736 return not empty
2737 return not empty
2737
2738
2738 def iscensored(self, rev):
2739 def iscensored(self, rev):
2739 """Check if a file revision is censored."""
2740 """Check if a file revision is censored."""
2740 if not self._censorable:
2741 if not self._censorable:
2741 return False
2742 return False
2742
2743
2743 return self.flags(rev) & REVIDX_ISCENSORED
2744 return self.flags(rev) & REVIDX_ISCENSORED
2744
2745
2745 def _peek_iscensored(self, baserev, delta):
2746 def _peek_iscensored(self, baserev, delta):
2746 """Quickly check if a delta produces a censored revision."""
2747 """Quickly check if a delta produces a censored revision."""
2747 if not self._censorable:
2748 if not self._censorable:
2748 return False
2749 return False
2749
2750
2750 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2751 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2751
2752
2752 def getstrippoint(self, minlink):
2753 def getstrippoint(self, minlink):
2753 """find the minimum rev that must be stripped to strip the linkrev
2754 """find the minimum rev that must be stripped to strip the linkrev
2754
2755
2755 Returns a tuple containing the minimum rev and a set of all revs that
2756 Returns a tuple containing the minimum rev and a set of all revs that
2756 have linkrevs that will be broken by this strip.
2757 have linkrevs that will be broken by this strip.
2757 """
2758 """
2758 return storageutil.resolvestripinfo(
2759 return storageutil.resolvestripinfo(
2759 minlink,
2760 minlink,
2760 len(self) - 1,
2761 len(self) - 1,
2761 self.headrevs(),
2762 self.headrevs(),
2762 self.linkrev,
2763 self.linkrev,
2763 self.parentrevs,
2764 self.parentrevs,
2764 )
2765 )
2765
2766
2766 def strip(self, minlink, transaction):
2767 def strip(self, minlink, transaction):
2767 """truncate the revlog on the first revision with a linkrev >= minlink
2768 """truncate the revlog on the first revision with a linkrev >= minlink
2768
2769
2769 This function is called when we're stripping revision minlink and
2770 This function is called when we're stripping revision minlink and
2770 its descendants from the repository.
2771 its descendants from the repository.
2771
2772
2772 We have to remove all revisions with linkrev >= minlink, because
2773 We have to remove all revisions with linkrev >= minlink, because
2773 the equivalent changelog revisions will be renumbered after the
2774 the equivalent changelog revisions will be renumbered after the
2774 strip.
2775 strip.
2775
2776
2776 So we truncate the revlog on the first of these revisions, and
2777 So we truncate the revlog on the first of these revisions, and
2777 trust that the caller has saved the revisions that shouldn't be
2778 trust that the caller has saved the revisions that shouldn't be
2778 removed and that it'll re-add them after this truncation.
2779 removed and that it'll re-add them after this truncation.
2779 """
2780 """
2780 if len(self) == 0:
2781 if len(self) == 0:
2781 return
2782 return
2782
2783
2783 rev, _ = self.getstrippoint(minlink)
2784 rev, _ = self.getstrippoint(minlink)
2784 if rev == len(self):
2785 if rev == len(self):
2785 return
2786 return
2786
2787
2787 # first truncate the files on disk
2788 # first truncate the files on disk
2788 data_end = self.start(rev)
2789 data_end = self.start(rev)
2789 if not self._inline:
2790 if not self._inline:
2790 transaction.add(self._datafile, data_end)
2791 transaction.add(self._datafile, data_end)
2791 end = rev * self.index.entry_size
2792 end = rev * self.index.entry_size
2792 else:
2793 else:
2793 end = data_end + (rev * self.index.entry_size)
2794 end = data_end + (rev * self.index.entry_size)
2794
2795
2795 if self._sidedatafile:
2796 if self._sidedatafile:
2796 sidedata_end = self.sidedata_cut_off(rev)
2797 sidedata_end = self.sidedata_cut_off(rev)
2797 transaction.add(self._sidedatafile, sidedata_end)
2798 transaction.add(self._sidedatafile, sidedata_end)
2798
2799
2799 transaction.add(self._indexfile, end)
2800 transaction.add(self._indexfile, end)
2800 if self._docket is not None:
2801 if self._docket is not None:
2801 # XXX we could, leverage the docket while stripping. However it is
2802 # XXX we could, leverage the docket while stripping. However it is
2802 # not powerfull enough at the time of this comment
2803 # not powerfull enough at the time of this comment
2803 self._docket.index_end = end
2804 self._docket.index_end = end
2804 self._docket.data_end = data_end
2805 self._docket.data_end = data_end
2805 self._docket.sidedata_end = sidedata_end
2806 self._docket.sidedata_end = sidedata_end
2806 self._docket.write(transaction, stripping=True)
2807 self._docket.write(transaction, stripping=True)
2807
2808
2808 # then reset internal state in memory to forget those revisions
2809 # then reset internal state in memory to forget those revisions
2809 self._revisioncache = None
2810 self._revisioncache = None
2810 self._chaininfocache = util.lrucachedict(500)
2811 self._chaininfocache = util.lrucachedict(500)
2811 self._segmentfile.clear_cache()
2812 self._segmentfile.clear_cache()
2812 self._segmentfile_sidedata.clear_cache()
2813 self._segmentfile_sidedata.clear_cache()
2813
2814
2814 del self.index[rev:-1]
2815 del self.index[rev:-1]
2815
2816
2816 def checksize(self):
2817 def checksize(self):
2817 """Check size of index and data files
2818 """Check size of index and data files
2818
2819
2819 return a (dd, di) tuple.
2820 return a (dd, di) tuple.
2820 - dd: extra bytes for the "data" file
2821 - dd: extra bytes for the "data" file
2821 - di: extra bytes for the "index" file
2822 - di: extra bytes for the "index" file
2822
2823
2823 A healthy revlog will return (0, 0).
2824 A healthy revlog will return (0, 0).
2824 """
2825 """
2825 expected = 0
2826 expected = 0
2826 if len(self):
2827 if len(self):
2827 expected = max(0, self.end(len(self) - 1))
2828 expected = max(0, self.end(len(self) - 1))
2828
2829
2829 try:
2830 try:
2830 with self._datafp() as f:
2831 with self._datafp() as f:
2831 f.seek(0, io.SEEK_END)
2832 f.seek(0, io.SEEK_END)
2832 actual = f.tell()
2833 actual = f.tell()
2833 dd = actual - expected
2834 dd = actual - expected
2834 except IOError as inst:
2835 except IOError as inst:
2835 if inst.errno != errno.ENOENT:
2836 if inst.errno != errno.ENOENT:
2836 raise
2837 raise
2837 dd = 0
2838 dd = 0
2838
2839
2839 try:
2840 try:
2840 f = self.opener(self._indexfile)
2841 f = self.opener(self._indexfile)
2841 f.seek(0, io.SEEK_END)
2842 f.seek(0, io.SEEK_END)
2842 actual = f.tell()
2843 actual = f.tell()
2843 f.close()
2844 f.close()
2844 s = self.index.entry_size
2845 s = self.index.entry_size
2845 i = max(0, actual // s)
2846 i = max(0, actual // s)
2846 di = actual - (i * s)
2847 di = actual - (i * s)
2847 if self._inline:
2848 if self._inline:
2848 databytes = 0
2849 databytes = 0
2849 for r in self:
2850 for r in self:
2850 databytes += max(0, self.length(r))
2851 databytes += max(0, self.length(r))
2851 dd = 0
2852 dd = 0
2852 di = actual - len(self) * s - databytes
2853 di = actual - len(self) * s - databytes
2853 except IOError as inst:
2854 except IOError as inst:
2854 if inst.errno != errno.ENOENT:
2855 if inst.errno != errno.ENOENT:
2855 raise
2856 raise
2856 di = 0
2857 di = 0
2857
2858
2858 return (dd, di)
2859 return (dd, di)
2859
2860
2860 def files(self):
2861 def files(self):
2861 res = [self._indexfile]
2862 res = [self._indexfile]
2862 if self._docket_file is None:
2863 if self._docket_file is None:
2863 if not self._inline:
2864 if not self._inline:
2864 res.append(self._datafile)
2865 res.append(self._datafile)
2865 else:
2866 else:
2866 res.append(self._docket_file)
2867 res.append(self._docket_file)
2867 res.extend(self._docket.old_index_filepaths(include_empty=False))
2868 res.extend(self._docket.old_index_filepaths(include_empty=False))
2868 if self._docket.data_end:
2869 if self._docket.data_end:
2869 res.append(self._datafile)
2870 res.append(self._datafile)
2870 res.extend(self._docket.old_data_filepaths(include_empty=False))
2871 res.extend(self._docket.old_data_filepaths(include_empty=False))
2871 if self._docket.sidedata_end:
2872 if self._docket.sidedata_end:
2872 res.append(self._sidedatafile)
2873 res.append(self._sidedatafile)
2873 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2874 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2874 return res
2875 return res
2875
2876
2876 def emitrevisions(
2877 def emitrevisions(
2877 self,
2878 self,
2878 nodes,
2879 nodes,
2879 nodesorder=None,
2880 nodesorder=None,
2880 revisiondata=False,
2881 revisiondata=False,
2881 assumehaveparentrevisions=False,
2882 assumehaveparentrevisions=False,
2882 deltamode=repository.CG_DELTAMODE_STD,
2883 deltamode=repository.CG_DELTAMODE_STD,
2883 sidedata_helpers=None,
2884 sidedata_helpers=None,
2884 ):
2885 ):
2885 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2886 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2886 raise error.ProgrammingError(
2887 raise error.ProgrammingError(
2887 b'unhandled value for nodesorder: %s' % nodesorder
2888 b'unhandled value for nodesorder: %s' % nodesorder
2888 )
2889 )
2889
2890
2890 if nodesorder is None and not self._generaldelta:
2891 if nodesorder is None and not self._generaldelta:
2891 nodesorder = b'storage'
2892 nodesorder = b'storage'
2892
2893
2893 if (
2894 if (
2894 not self._storedeltachains
2895 not self._storedeltachains
2895 and deltamode != repository.CG_DELTAMODE_PREV
2896 and deltamode != repository.CG_DELTAMODE_PREV
2896 ):
2897 ):
2897 deltamode = repository.CG_DELTAMODE_FULL
2898 deltamode = repository.CG_DELTAMODE_FULL
2898
2899
2899 return storageutil.emitrevisions(
2900 return storageutil.emitrevisions(
2900 self,
2901 self,
2901 nodes,
2902 nodes,
2902 nodesorder,
2903 nodesorder,
2903 revlogrevisiondelta,
2904 revlogrevisiondelta,
2904 deltaparentfn=self.deltaparent,
2905 deltaparentfn=self.deltaparent,
2905 candeltafn=self.candelta,
2906 candeltafn=self.candelta,
2906 rawsizefn=self.rawsize,
2907 rawsizefn=self.rawsize,
2907 revdifffn=self.revdiff,
2908 revdifffn=self.revdiff,
2908 flagsfn=self.flags,
2909 flagsfn=self.flags,
2909 deltamode=deltamode,
2910 deltamode=deltamode,
2910 revisiondata=revisiondata,
2911 revisiondata=revisiondata,
2911 assumehaveparentrevisions=assumehaveparentrevisions,
2912 assumehaveparentrevisions=assumehaveparentrevisions,
2912 sidedata_helpers=sidedata_helpers,
2913 sidedata_helpers=sidedata_helpers,
2913 )
2914 )
2914
2915
2915 DELTAREUSEALWAYS = b'always'
2916 DELTAREUSEALWAYS = b'always'
2916 DELTAREUSESAMEREVS = b'samerevs'
2917 DELTAREUSESAMEREVS = b'samerevs'
2917 DELTAREUSENEVER = b'never'
2918 DELTAREUSENEVER = b'never'
2918
2919
2919 DELTAREUSEFULLADD = b'fulladd'
2920 DELTAREUSEFULLADD = b'fulladd'
2920
2921
2921 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2922 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2922
2923
2923 def clone(
2924 def clone(
2924 self,
2925 self,
2925 tr,
2926 tr,
2926 destrevlog,
2927 destrevlog,
2927 addrevisioncb=None,
2928 addrevisioncb=None,
2928 deltareuse=DELTAREUSESAMEREVS,
2929 deltareuse=DELTAREUSESAMEREVS,
2929 forcedeltabothparents=None,
2930 forcedeltabothparents=None,
2930 sidedata_helpers=None,
2931 sidedata_helpers=None,
2931 ):
2932 ):
2932 """Copy this revlog to another, possibly with format changes.
2933 """Copy this revlog to another, possibly with format changes.
2933
2934
2934 The destination revlog will contain the same revisions and nodes.
2935 The destination revlog will contain the same revisions and nodes.
2935 However, it may not be bit-for-bit identical due to e.g. delta encoding
2936 However, it may not be bit-for-bit identical due to e.g. delta encoding
2936 differences.
2937 differences.
2937
2938
2938 The ``deltareuse`` argument control how deltas from the existing revlog
2939 The ``deltareuse`` argument control how deltas from the existing revlog
2939 are preserved in the destination revlog. The argument can have the
2940 are preserved in the destination revlog. The argument can have the
2940 following values:
2941 following values:
2941
2942
2942 DELTAREUSEALWAYS
2943 DELTAREUSEALWAYS
2943 Deltas will always be reused (if possible), even if the destination
2944 Deltas will always be reused (if possible), even if the destination
2944 revlog would not select the same revisions for the delta. This is the
2945 revlog would not select the same revisions for the delta. This is the
2945 fastest mode of operation.
2946 fastest mode of operation.
2946 DELTAREUSESAMEREVS
2947 DELTAREUSESAMEREVS
2947 Deltas will be reused if the destination revlog would pick the same
2948 Deltas will be reused if the destination revlog would pick the same
2948 revisions for the delta. This mode strikes a balance between speed
2949 revisions for the delta. This mode strikes a balance between speed
2949 and optimization.
2950 and optimization.
2950 DELTAREUSENEVER
2951 DELTAREUSENEVER
2951 Deltas will never be reused. This is the slowest mode of execution.
2952 Deltas will never be reused. This is the slowest mode of execution.
2952 This mode can be used to recompute deltas (e.g. if the diff/delta
2953 This mode can be used to recompute deltas (e.g. if the diff/delta
2953 algorithm changes).
2954 algorithm changes).
2954 DELTAREUSEFULLADD
2955 DELTAREUSEFULLADD
2955 Revision will be re-added as if their were new content. This is
2956 Revision will be re-added as if their were new content. This is
2956 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2957 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2957 eg: large file detection and handling.
2958 eg: large file detection and handling.
2958
2959
2959 Delta computation can be slow, so the choice of delta reuse policy can
2960 Delta computation can be slow, so the choice of delta reuse policy can
2960 significantly affect run time.
2961 significantly affect run time.
2961
2962
2962 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2963 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2963 two extremes. Deltas will be reused if they are appropriate. But if the
2964 two extremes. Deltas will be reused if they are appropriate. But if the
2964 delta could choose a better revision, it will do so. This means if you
2965 delta could choose a better revision, it will do so. This means if you
2965 are converting a non-generaldelta revlog to a generaldelta revlog,
2966 are converting a non-generaldelta revlog to a generaldelta revlog,
2966 deltas will be recomputed if the delta's parent isn't a parent of the
2967 deltas will be recomputed if the delta's parent isn't a parent of the
2967 revision.
2968 revision.
2968
2969
2969 In addition to the delta policy, the ``forcedeltabothparents``
2970 In addition to the delta policy, the ``forcedeltabothparents``
2970 argument controls whether to force compute deltas against both parents
2971 argument controls whether to force compute deltas against both parents
2971 for merges. By default, the current default is used.
2972 for merges. By default, the current default is used.
2972
2973
2973 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2974 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2974 `sidedata_helpers`.
2975 `sidedata_helpers`.
2975 """
2976 """
2976 if deltareuse not in self.DELTAREUSEALL:
2977 if deltareuse not in self.DELTAREUSEALL:
2977 raise ValueError(
2978 raise ValueError(
2978 _(b'value for deltareuse invalid: %s') % deltareuse
2979 _(b'value for deltareuse invalid: %s') % deltareuse
2979 )
2980 )
2980
2981
2981 if len(destrevlog):
2982 if len(destrevlog):
2982 raise ValueError(_(b'destination revlog is not empty'))
2983 raise ValueError(_(b'destination revlog is not empty'))
2983
2984
2984 if getattr(self, 'filteredrevs', None):
2985 if getattr(self, 'filteredrevs', None):
2985 raise ValueError(_(b'source revlog has filtered revisions'))
2986 raise ValueError(_(b'source revlog has filtered revisions'))
2986 if getattr(destrevlog, 'filteredrevs', None):
2987 if getattr(destrevlog, 'filteredrevs', None):
2987 raise ValueError(_(b'destination revlog has filtered revisions'))
2988 raise ValueError(_(b'destination revlog has filtered revisions'))
2988
2989
2989 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2990 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2990 # if possible.
2991 # if possible.
2991 oldlazydelta = destrevlog._lazydelta
2992 oldlazydelta = destrevlog._lazydelta
2992 oldlazydeltabase = destrevlog._lazydeltabase
2993 oldlazydeltabase = destrevlog._lazydeltabase
2993 oldamd = destrevlog._deltabothparents
2994 oldamd = destrevlog._deltabothparents
2994
2995
2995 try:
2996 try:
2996 if deltareuse == self.DELTAREUSEALWAYS:
2997 if deltareuse == self.DELTAREUSEALWAYS:
2997 destrevlog._lazydeltabase = True
2998 destrevlog._lazydeltabase = True
2998 destrevlog._lazydelta = True
2999 destrevlog._lazydelta = True
2999 elif deltareuse == self.DELTAREUSESAMEREVS:
3000 elif deltareuse == self.DELTAREUSESAMEREVS:
3000 destrevlog._lazydeltabase = False
3001 destrevlog._lazydeltabase = False
3001 destrevlog._lazydelta = True
3002 destrevlog._lazydelta = True
3002 elif deltareuse == self.DELTAREUSENEVER:
3003 elif deltareuse == self.DELTAREUSENEVER:
3003 destrevlog._lazydeltabase = False
3004 destrevlog._lazydeltabase = False
3004 destrevlog._lazydelta = False
3005 destrevlog._lazydelta = False
3005
3006
3006 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3007 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3007
3008
3008 self._clone(
3009 self._clone(
3009 tr,
3010 tr,
3010 destrevlog,
3011 destrevlog,
3011 addrevisioncb,
3012 addrevisioncb,
3012 deltareuse,
3013 deltareuse,
3013 forcedeltabothparents,
3014 forcedeltabothparents,
3014 sidedata_helpers,
3015 sidedata_helpers,
3015 )
3016 )
3016
3017
3017 finally:
3018 finally:
3018 destrevlog._lazydelta = oldlazydelta
3019 destrevlog._lazydelta = oldlazydelta
3019 destrevlog._lazydeltabase = oldlazydeltabase
3020 destrevlog._lazydeltabase = oldlazydeltabase
3020 destrevlog._deltabothparents = oldamd
3021 destrevlog._deltabothparents = oldamd
3021
3022
3022 def _clone(
3023 def _clone(
3023 self,
3024 self,
3024 tr,
3025 tr,
3025 destrevlog,
3026 destrevlog,
3026 addrevisioncb,
3027 addrevisioncb,
3027 deltareuse,
3028 deltareuse,
3028 forcedeltabothparents,
3029 forcedeltabothparents,
3029 sidedata_helpers,
3030 sidedata_helpers,
3030 ):
3031 ):
3031 """perform the core duty of `revlog.clone` after parameter processing"""
3032 """perform the core duty of `revlog.clone` after parameter processing"""
3032 write_debug = None
3033 write_debug = None
3033 if self._debug_delta:
3034 if self._debug_delta:
3034 write_debug = tr._report
3035 write_debug = tr._report
3035 deltacomputer = deltautil.deltacomputer(
3036 deltacomputer = deltautil.deltacomputer(
3036 destrevlog,
3037 destrevlog,
3037 write_debug=write_debug,
3038 write_debug=write_debug,
3038 )
3039 )
3039 index = self.index
3040 index = self.index
3040 for rev in self:
3041 for rev in self:
3041 entry = index[rev]
3042 entry = index[rev]
3042
3043
3043 # Some classes override linkrev to take filtered revs into
3044 # Some classes override linkrev to take filtered revs into
3044 # account. Use raw entry from index.
3045 # account. Use raw entry from index.
3045 flags = entry[0] & 0xFFFF
3046 flags = entry[0] & 0xFFFF
3046 linkrev = entry[4]
3047 linkrev = entry[4]
3047 p1 = index[entry[5]][7]
3048 p1 = index[entry[5]][7]
3048 p2 = index[entry[6]][7]
3049 p2 = index[entry[6]][7]
3049 node = entry[7]
3050 node = entry[7]
3050
3051
3051 # (Possibly) reuse the delta from the revlog if allowed and
3052 # (Possibly) reuse the delta from the revlog if allowed and
3052 # the revlog chunk is a delta.
3053 # the revlog chunk is a delta.
3053 cachedelta = None
3054 cachedelta = None
3054 rawtext = None
3055 rawtext = None
3055 if deltareuse == self.DELTAREUSEFULLADD:
3056 if deltareuse == self.DELTAREUSEFULLADD:
3056 text = self._revisiondata(rev)
3057 text = self._revisiondata(rev)
3057 sidedata = self.sidedata(rev)
3058 sidedata = self.sidedata(rev)
3058
3059
3059 if sidedata_helpers is not None:
3060 if sidedata_helpers is not None:
3060 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3061 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3061 self, sidedata_helpers, sidedata, rev
3062 self, sidedata_helpers, sidedata, rev
3062 )
3063 )
3063 flags = flags | new_flags[0] & ~new_flags[1]
3064 flags = flags | new_flags[0] & ~new_flags[1]
3064
3065
3065 destrevlog.addrevision(
3066 destrevlog.addrevision(
3066 text,
3067 text,
3067 tr,
3068 tr,
3068 linkrev,
3069 linkrev,
3069 p1,
3070 p1,
3070 p2,
3071 p2,
3071 cachedelta=cachedelta,
3072 cachedelta=cachedelta,
3072 node=node,
3073 node=node,
3073 flags=flags,
3074 flags=flags,
3074 deltacomputer=deltacomputer,
3075 deltacomputer=deltacomputer,
3075 sidedata=sidedata,
3076 sidedata=sidedata,
3076 )
3077 )
3077 else:
3078 else:
3078 if destrevlog._lazydelta:
3079 if destrevlog._lazydelta:
3079 dp = self.deltaparent(rev)
3080 dp = self.deltaparent(rev)
3080 if dp != nullrev:
3081 if dp != nullrev:
3081 cachedelta = (dp, bytes(self._chunk(rev)))
3082 cachedelta = (dp, bytes(self._chunk(rev)))
3082
3083
3083 sidedata = None
3084 sidedata = None
3084 if not cachedelta:
3085 if not cachedelta:
3085 rawtext = self._revisiondata(rev)
3086 rawtext = self._revisiondata(rev)
3086 sidedata = self.sidedata(rev)
3087 sidedata = self.sidedata(rev)
3087 if sidedata is None:
3088 if sidedata is None:
3088 sidedata = self.sidedata(rev)
3089 sidedata = self.sidedata(rev)
3089
3090
3090 if sidedata_helpers is not None:
3091 if sidedata_helpers is not None:
3091 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3092 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3092 self, sidedata_helpers, sidedata, rev
3093 self, sidedata_helpers, sidedata, rev
3093 )
3094 )
3094 flags = flags | new_flags[0] & ~new_flags[1]
3095 flags = flags | new_flags[0] & ~new_flags[1]
3095
3096
3096 with destrevlog._writing(tr):
3097 with destrevlog._writing(tr):
3097 destrevlog._addrevision(
3098 destrevlog._addrevision(
3098 node,
3099 node,
3099 rawtext,
3100 rawtext,
3100 tr,
3101 tr,
3101 linkrev,
3102 linkrev,
3102 p1,
3103 p1,
3103 p2,
3104 p2,
3104 flags,
3105 flags,
3105 cachedelta,
3106 cachedelta,
3106 deltacomputer=deltacomputer,
3107 deltacomputer=deltacomputer,
3107 sidedata=sidedata,
3108 sidedata=sidedata,
3108 )
3109 )
3109
3110
3110 if addrevisioncb:
3111 if addrevisioncb:
3111 addrevisioncb(self, rev, node)
3112 addrevisioncb(self, rev, node)
3112
3113
3113 def censorrevision(self, tr, censornode, tombstone=b''):
3114 def censorrevision(self, tr, censornode, tombstone=b''):
3114 if self._format_version == REVLOGV0:
3115 if self._format_version == REVLOGV0:
3115 raise error.RevlogError(
3116 raise error.RevlogError(
3116 _(b'cannot censor with version %d revlogs')
3117 _(b'cannot censor with version %d revlogs')
3117 % self._format_version
3118 % self._format_version
3118 )
3119 )
3119 elif self._format_version == REVLOGV1:
3120 elif self._format_version == REVLOGV1:
3120 rewrite.v1_censor(self, tr, censornode, tombstone)
3121 rewrite.v1_censor(self, tr, censornode, tombstone)
3121 else:
3122 else:
3122 rewrite.v2_censor(self, tr, censornode, tombstone)
3123 rewrite.v2_censor(self, tr, censornode, tombstone)
3123
3124
3124 def verifyintegrity(self, state):
3125 def verifyintegrity(self, state):
3125 """Verifies the integrity of the revlog.
3126 """Verifies the integrity of the revlog.
3126
3127
3127 Yields ``revlogproblem`` instances describing problems that are
3128 Yields ``revlogproblem`` instances describing problems that are
3128 found.
3129 found.
3129 """
3130 """
3130 dd, di = self.checksize()
3131 dd, di = self.checksize()
3131 if dd:
3132 if dd:
3132 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3133 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3133 if di:
3134 if di:
3134 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3135 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3135
3136
3136 version = self._format_version
3137 version = self._format_version
3137
3138
3138 # The verifier tells us what version revlog we should be.
3139 # The verifier tells us what version revlog we should be.
3139 if version != state[b'expectedversion']:
3140 if version != state[b'expectedversion']:
3140 yield revlogproblem(
3141 yield revlogproblem(
3141 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3142 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3142 % (self.display_id, version, state[b'expectedversion'])
3143 % (self.display_id, version, state[b'expectedversion'])
3143 )
3144 )
3144
3145
3145 state[b'skipread'] = set()
3146 state[b'skipread'] = set()
3146 state[b'safe_renamed'] = set()
3147 state[b'safe_renamed'] = set()
3147
3148
3148 for rev in self:
3149 for rev in self:
3149 node = self.node(rev)
3150 node = self.node(rev)
3150
3151
3151 # Verify contents. 4 cases to care about:
3152 # Verify contents. 4 cases to care about:
3152 #
3153 #
3153 # common: the most common case
3154 # common: the most common case
3154 # rename: with a rename
3155 # rename: with a rename
3155 # meta: file content starts with b'\1\n', the metadata
3156 # meta: file content starts with b'\1\n', the metadata
3156 # header defined in filelog.py, but without a rename
3157 # header defined in filelog.py, but without a rename
3157 # ext: content stored externally
3158 # ext: content stored externally
3158 #
3159 #
3159 # More formally, their differences are shown below:
3160 # More formally, their differences are shown below:
3160 #
3161 #
3161 # | common | rename | meta | ext
3162 # | common | rename | meta | ext
3162 # -------------------------------------------------------
3163 # -------------------------------------------------------
3163 # flags() | 0 | 0 | 0 | not 0
3164 # flags() | 0 | 0 | 0 | not 0
3164 # renamed() | False | True | False | ?
3165 # renamed() | False | True | False | ?
3165 # rawtext[0:2]=='\1\n'| False | True | True | ?
3166 # rawtext[0:2]=='\1\n'| False | True | True | ?
3166 #
3167 #
3167 # "rawtext" means the raw text stored in revlog data, which
3168 # "rawtext" means the raw text stored in revlog data, which
3168 # could be retrieved by "rawdata(rev)". "text"
3169 # could be retrieved by "rawdata(rev)". "text"
3169 # mentioned below is "revision(rev)".
3170 # mentioned below is "revision(rev)".
3170 #
3171 #
3171 # There are 3 different lengths stored physically:
3172 # There are 3 different lengths stored physically:
3172 # 1. L1: rawsize, stored in revlog index
3173 # 1. L1: rawsize, stored in revlog index
3173 # 2. L2: len(rawtext), stored in revlog data
3174 # 2. L2: len(rawtext), stored in revlog data
3174 # 3. L3: len(text), stored in revlog data if flags==0, or
3175 # 3. L3: len(text), stored in revlog data if flags==0, or
3175 # possibly somewhere else if flags!=0
3176 # possibly somewhere else if flags!=0
3176 #
3177 #
3177 # L1 should be equal to L2. L3 could be different from them.
3178 # L1 should be equal to L2. L3 could be different from them.
3178 # "text" may or may not affect commit hash depending on flag
3179 # "text" may or may not affect commit hash depending on flag
3179 # processors (see flagutil.addflagprocessor).
3180 # processors (see flagutil.addflagprocessor).
3180 #
3181 #
3181 # | common | rename | meta | ext
3182 # | common | rename | meta | ext
3182 # -------------------------------------------------
3183 # -------------------------------------------------
3183 # rawsize() | L1 | L1 | L1 | L1
3184 # rawsize() | L1 | L1 | L1 | L1
3184 # size() | L1 | L2-LM | L1(*) | L1 (?)
3185 # size() | L1 | L2-LM | L1(*) | L1 (?)
3185 # len(rawtext) | L2 | L2 | L2 | L2
3186 # len(rawtext) | L2 | L2 | L2 | L2
3186 # len(text) | L2 | L2 | L2 | L3
3187 # len(text) | L2 | L2 | L2 | L3
3187 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3188 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3188 #
3189 #
3189 # LM: length of metadata, depending on rawtext
3190 # LM: length of metadata, depending on rawtext
3190 # (*): not ideal, see comment in filelog.size
3191 # (*): not ideal, see comment in filelog.size
3191 # (?): could be "- len(meta)" if the resolved content has
3192 # (?): could be "- len(meta)" if the resolved content has
3192 # rename metadata
3193 # rename metadata
3193 #
3194 #
3194 # Checks needed to be done:
3195 # Checks needed to be done:
3195 # 1. length check: L1 == L2, in all cases.
3196 # 1. length check: L1 == L2, in all cases.
3196 # 2. hash check: depending on flag processor, we may need to
3197 # 2. hash check: depending on flag processor, we may need to
3197 # use either "text" (external), or "rawtext" (in revlog).
3198 # use either "text" (external), or "rawtext" (in revlog).
3198
3199
3199 try:
3200 try:
3200 skipflags = state.get(b'skipflags', 0)
3201 skipflags = state.get(b'skipflags', 0)
3201 if skipflags:
3202 if skipflags:
3202 skipflags &= self.flags(rev)
3203 skipflags &= self.flags(rev)
3203
3204
3204 _verify_revision(self, skipflags, state, node)
3205 _verify_revision(self, skipflags, state, node)
3205
3206
3206 l1 = self.rawsize(rev)
3207 l1 = self.rawsize(rev)
3207 l2 = len(self.rawdata(node))
3208 l2 = len(self.rawdata(node))
3208
3209
3209 if l1 != l2:
3210 if l1 != l2:
3210 yield revlogproblem(
3211 yield revlogproblem(
3211 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3212 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3212 node=node,
3213 node=node,
3213 )
3214 )
3214
3215
3215 except error.CensoredNodeError:
3216 except error.CensoredNodeError:
3216 if state[b'erroroncensored']:
3217 if state[b'erroroncensored']:
3217 yield revlogproblem(
3218 yield revlogproblem(
3218 error=_(b'censored file data'), node=node
3219 error=_(b'censored file data'), node=node
3219 )
3220 )
3220 state[b'skipread'].add(node)
3221 state[b'skipread'].add(node)
3221 except Exception as e:
3222 except Exception as e:
3222 yield revlogproblem(
3223 yield revlogproblem(
3223 error=_(b'unpacking %s: %s')
3224 error=_(b'unpacking %s: %s')
3224 % (short(node), stringutil.forcebytestr(e)),
3225 % (short(node), stringutil.forcebytestr(e)),
3225 node=node,
3226 node=node,
3226 )
3227 )
3227 state[b'skipread'].add(node)
3228 state[b'skipread'].add(node)
3228
3229
3229 def storageinfo(
3230 def storageinfo(
3230 self,
3231 self,
3231 exclusivefiles=False,
3232 exclusivefiles=False,
3232 sharedfiles=False,
3233 sharedfiles=False,
3233 revisionscount=False,
3234 revisionscount=False,
3234 trackedsize=False,
3235 trackedsize=False,
3235 storedsize=False,
3236 storedsize=False,
3236 ):
3237 ):
3237 d = {}
3238 d = {}
3238
3239
3239 if exclusivefiles:
3240 if exclusivefiles:
3240 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3241 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3241 if not self._inline:
3242 if not self._inline:
3242 d[b'exclusivefiles'].append((self.opener, self._datafile))
3243 d[b'exclusivefiles'].append((self.opener, self._datafile))
3243
3244
3244 if sharedfiles:
3245 if sharedfiles:
3245 d[b'sharedfiles'] = []
3246 d[b'sharedfiles'] = []
3246
3247
3247 if revisionscount:
3248 if revisionscount:
3248 d[b'revisionscount'] = len(self)
3249 d[b'revisionscount'] = len(self)
3249
3250
3250 if trackedsize:
3251 if trackedsize:
3251 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3252 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3252
3253
3253 if storedsize:
3254 if storedsize:
3254 d[b'storedsize'] = sum(
3255 d[b'storedsize'] = sum(
3255 self.opener.stat(path).st_size for path in self.files()
3256 self.opener.stat(path).st_size for path in self.files()
3256 )
3257 )
3257
3258
3258 return d
3259 return d
3259
3260
3260 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3261 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3261 if not self.hassidedata:
3262 if not self.hassidedata:
3262 return
3263 return
3263 # revlog formats with sidedata support does not support inline
3264 # revlog formats with sidedata support does not support inline
3264 assert not self._inline
3265 assert not self._inline
3265 if not helpers[1] and not helpers[2]:
3266 if not helpers[1] and not helpers[2]:
3266 # Nothing to generate or remove
3267 # Nothing to generate or remove
3267 return
3268 return
3268
3269
3269 new_entries = []
3270 new_entries = []
3270 # append the new sidedata
3271 # append the new sidedata
3271 with self._writing(transaction):
3272 with self._writing(transaction):
3272 ifh, dfh, sdfh = self._writinghandles
3273 ifh, dfh, sdfh = self._writinghandles
3273 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3274 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3274
3275
3275 current_offset = sdfh.tell()
3276 current_offset = sdfh.tell()
3276 for rev in range(startrev, endrev + 1):
3277 for rev in range(startrev, endrev + 1):
3277 entry = self.index[rev]
3278 entry = self.index[rev]
3278 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3279 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3279 store=self,
3280 store=self,
3280 sidedata_helpers=helpers,
3281 sidedata_helpers=helpers,
3281 sidedata={},
3282 sidedata={},
3282 rev=rev,
3283 rev=rev,
3283 )
3284 )
3284
3285
3285 serialized_sidedata = sidedatautil.serialize_sidedata(
3286 serialized_sidedata = sidedatautil.serialize_sidedata(
3286 new_sidedata
3287 new_sidedata
3287 )
3288 )
3288
3289
3289 sidedata_compression_mode = COMP_MODE_INLINE
3290 sidedata_compression_mode = COMP_MODE_INLINE
3290 if serialized_sidedata and self.hassidedata:
3291 if serialized_sidedata and self.hassidedata:
3291 sidedata_compression_mode = COMP_MODE_PLAIN
3292 sidedata_compression_mode = COMP_MODE_PLAIN
3292 h, comp_sidedata = self.compress(serialized_sidedata)
3293 h, comp_sidedata = self.compress(serialized_sidedata)
3293 if (
3294 if (
3294 h != b'u'
3295 h != b'u'
3295 and comp_sidedata[0] != b'\0'
3296 and comp_sidedata[0] != b'\0'
3296 and len(comp_sidedata) < len(serialized_sidedata)
3297 and len(comp_sidedata) < len(serialized_sidedata)
3297 ):
3298 ):
3298 assert not h
3299 assert not h
3299 if (
3300 if (
3300 comp_sidedata[0]
3301 comp_sidedata[0]
3301 == self._docket.default_compression_header
3302 == self._docket.default_compression_header
3302 ):
3303 ):
3303 sidedata_compression_mode = COMP_MODE_DEFAULT
3304 sidedata_compression_mode = COMP_MODE_DEFAULT
3304 serialized_sidedata = comp_sidedata
3305 serialized_sidedata = comp_sidedata
3305 else:
3306 else:
3306 sidedata_compression_mode = COMP_MODE_INLINE
3307 sidedata_compression_mode = COMP_MODE_INLINE
3307 serialized_sidedata = comp_sidedata
3308 serialized_sidedata = comp_sidedata
3308 if entry[8] != 0 or entry[9] != 0:
3309 if entry[8] != 0 or entry[9] != 0:
3309 # rewriting entries that already have sidedata is not
3310 # rewriting entries that already have sidedata is not
3310 # supported yet, because it introduces garbage data in the
3311 # supported yet, because it introduces garbage data in the
3311 # revlog.
3312 # revlog.
3312 msg = b"rewriting existing sidedata is not supported yet"
3313 msg = b"rewriting existing sidedata is not supported yet"
3313 raise error.Abort(msg)
3314 raise error.Abort(msg)
3314
3315
3315 # Apply (potential) flags to add and to remove after running
3316 # Apply (potential) flags to add and to remove after running
3316 # the sidedata helpers
3317 # the sidedata helpers
3317 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3318 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3318 entry_update = (
3319 entry_update = (
3319 current_offset,
3320 current_offset,
3320 len(serialized_sidedata),
3321 len(serialized_sidedata),
3321 new_offset_flags,
3322 new_offset_flags,
3322 sidedata_compression_mode,
3323 sidedata_compression_mode,
3323 )
3324 )
3324
3325
3325 # the sidedata computation might have move the file cursors around
3326 # the sidedata computation might have move the file cursors around
3326 sdfh.seek(current_offset, os.SEEK_SET)
3327 sdfh.seek(current_offset, os.SEEK_SET)
3327 sdfh.write(serialized_sidedata)
3328 sdfh.write(serialized_sidedata)
3328 new_entries.append(entry_update)
3329 new_entries.append(entry_update)
3329 current_offset += len(serialized_sidedata)
3330 current_offset += len(serialized_sidedata)
3330 self._docket.sidedata_end = sdfh.tell()
3331 self._docket.sidedata_end = sdfh.tell()
3331
3332
3332 # rewrite the new index entries
3333 # rewrite the new index entries
3333 ifh.seek(startrev * self.index.entry_size)
3334 ifh.seek(startrev * self.index.entry_size)
3334 for i, e in enumerate(new_entries):
3335 for i, e in enumerate(new_entries):
3335 rev = startrev + i
3336 rev = startrev + i
3336 self.index.replace_sidedata_info(rev, *e)
3337 self.index.replace_sidedata_info(rev, *e)
3337 packed = self.index.entry_binary(rev)
3338 packed = self.index.entry_binary(rev)
3338 if rev == 0 and self._docket is None:
3339 if rev == 0 and self._docket is None:
3339 header = self._format_flags | self._format_version
3340 header = self._format_flags | self._format_version
3340 header = self.index.pack_header(header)
3341 header = self.index.pack_header(header)
3341 packed = header + packed
3342 packed = header + packed
3342 ifh.write(packed)
3343 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now