##// END OF EJS Templates
revlog: make _partialmatch fail fast on almost-hex inputs...
Arseniy Alekseyev -
r50310:5fe7e9ed default
parent child Browse files
Show More
@@ -1,3330 +1,3337 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullrev,
28 nullrev,
29 sha1nodeconstants,
29 sha1nodeconstants,
30 short,
30 short,
31 wdirrev,
31 wdirrev,
32 )
32 )
33 from .i18n import _
33 from .i18n import _
34 from .pycompat import getattr
34 from .pycompat import getattr
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 ENTRY_RANK,
41 ENTRY_RANK,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 RANK_UNKNOWN,
47 RANK_UNKNOWN,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 revlogutils,
75 revlogutils,
76 templatefilters,
76 templatefilters,
77 util,
77 util,
78 )
78 )
79 from .interfaces import (
79 from .interfaces import (
80 repository,
80 repository,
81 util as interfaceutil,
81 util as interfaceutil,
82 )
82 )
83 from .revlogutils import (
83 from .revlogutils import (
84 deltas as deltautil,
84 deltas as deltautil,
85 docket as docketutil,
85 docket as docketutil,
86 flagutil,
86 flagutil,
87 nodemap as nodemaputil,
87 nodemap as nodemaputil,
88 randomaccessfile,
88 randomaccessfile,
89 revlogv0,
89 revlogv0,
90 rewrite,
90 rewrite,
91 sidedata as sidedatautil,
91 sidedata as sidedatautil,
92 )
92 )
93 from .utils import (
93 from .utils import (
94 storageutil,
94 storageutil,
95 stringutil,
95 stringutil,
96 )
96 )
97
97
98 # blanked usage of all the name to prevent pyflakes constraints
98 # blanked usage of all the name to prevent pyflakes constraints
99 # We need these name available in the module for extensions.
99 # We need these name available in the module for extensions.
100
100
101 REVLOGV0
101 REVLOGV0
102 REVLOGV1
102 REVLOGV1
103 REVLOGV2
103 REVLOGV2
104 CHANGELOGV2
104 CHANGELOGV2
105 FLAG_INLINE_DATA
105 FLAG_INLINE_DATA
106 FLAG_GENERALDELTA
106 FLAG_GENERALDELTA
107 REVLOG_DEFAULT_FLAGS
107 REVLOG_DEFAULT_FLAGS
108 REVLOG_DEFAULT_FORMAT
108 REVLOG_DEFAULT_FORMAT
109 REVLOG_DEFAULT_VERSION
109 REVLOG_DEFAULT_VERSION
110 REVLOGV1_FLAGS
110 REVLOGV1_FLAGS
111 REVLOGV2_FLAGS
111 REVLOGV2_FLAGS
112 REVIDX_ISCENSORED
112 REVIDX_ISCENSORED
113 REVIDX_ELLIPSIS
113 REVIDX_ELLIPSIS
114 REVIDX_HASCOPIESINFO
114 REVIDX_HASCOPIESINFO
115 REVIDX_EXTSTORED
115 REVIDX_EXTSTORED
116 REVIDX_DEFAULT_FLAGS
116 REVIDX_DEFAULT_FLAGS
117 REVIDX_FLAGS_ORDER
117 REVIDX_FLAGS_ORDER
118 REVIDX_RAWTEXT_CHANGING_FLAGS
118 REVIDX_RAWTEXT_CHANGING_FLAGS
119
119
120 parsers = policy.importmod('parsers')
120 parsers = policy.importmod('parsers')
121 rustancestor = policy.importrust('ancestor')
121 rustancestor = policy.importrust('ancestor')
122 rustdagop = policy.importrust('dagop')
122 rustdagop = policy.importrust('dagop')
123 rustrevlog = policy.importrust('revlog')
123 rustrevlog = policy.importrust('revlog')
124
124
125 # Aliased for performance.
125 # Aliased for performance.
126 _zlibdecompress = zlib.decompress
126 _zlibdecompress = zlib.decompress
127
127
128 # max size of revlog with inline data
128 # max size of revlog with inline data
129 _maxinline = 131072
129 _maxinline = 131072
130
130
131 # Flag processors for REVIDX_ELLIPSIS.
131 # Flag processors for REVIDX_ELLIPSIS.
132 def ellipsisreadprocessor(rl, text):
132 def ellipsisreadprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsiswriteprocessor(rl, text):
136 def ellipsiswriteprocessor(rl, text):
137 return text, False
137 return text, False
138
138
139
139
140 def ellipsisrawprocessor(rl, text):
140 def ellipsisrawprocessor(rl, text):
141 return False
141 return False
142
142
143
143
144 ellipsisprocessor = (
144 ellipsisprocessor = (
145 ellipsisreadprocessor,
145 ellipsisreadprocessor,
146 ellipsiswriteprocessor,
146 ellipsiswriteprocessor,
147 ellipsisrawprocessor,
147 ellipsisrawprocessor,
148 )
148 )
149
149
150
150
151 def _verify_revision(rl, skipflags, state, node):
151 def _verify_revision(rl, skipflags, state, node):
152 """Verify the integrity of the given revlog ``node`` while providing a hook
152 """Verify the integrity of the given revlog ``node`` while providing a hook
153 point for extensions to influence the operation."""
153 point for extensions to influence the operation."""
154 if skipflags:
154 if skipflags:
155 state[b'skipread'].add(node)
155 state[b'skipread'].add(node)
156 else:
156 else:
157 # Side-effect: read content and verify hash.
157 # Side-effect: read content and verify hash.
158 rl.revision(node)
158 rl.revision(node)
159
159
160
160
161 # True if a fast implementation for persistent-nodemap is available
161 # True if a fast implementation for persistent-nodemap is available
162 #
162 #
163 # We also consider we have a "fast" implementation in "pure" python because
163 # We also consider we have a "fast" implementation in "pure" python because
164 # people using pure don't really have performance consideration (and a
164 # people using pure don't really have performance consideration (and a
165 # wheelbarrow of other slowness source)
165 # wheelbarrow of other slowness source)
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
167 parsers, 'BaseIndexObject'
167 parsers, 'BaseIndexObject'
168 )
168 )
169
169
170
170
171 @interfaceutil.implementer(repository.irevisiondelta)
171 @interfaceutil.implementer(repository.irevisiondelta)
172 @attr.s(slots=True)
172 @attr.s(slots=True)
173 class revlogrevisiondelta:
173 class revlogrevisiondelta:
174 node = attr.ib()
174 node = attr.ib()
175 p1node = attr.ib()
175 p1node = attr.ib()
176 p2node = attr.ib()
176 p2node = attr.ib()
177 basenode = attr.ib()
177 basenode = attr.ib()
178 flags = attr.ib()
178 flags = attr.ib()
179 baserevisionsize = attr.ib()
179 baserevisionsize = attr.ib()
180 revision = attr.ib()
180 revision = attr.ib()
181 delta = attr.ib()
181 delta = attr.ib()
182 sidedata = attr.ib()
182 sidedata = attr.ib()
183 protocol_flags = attr.ib()
183 protocol_flags = attr.ib()
184 linknode = attr.ib(default=None)
184 linknode = attr.ib(default=None)
185
185
186
186
187 @interfaceutil.implementer(repository.iverifyproblem)
187 @interfaceutil.implementer(repository.iverifyproblem)
188 @attr.s(frozen=True)
188 @attr.s(frozen=True)
189 class revlogproblem:
189 class revlogproblem:
190 warning = attr.ib(default=None)
190 warning = attr.ib(default=None)
191 error = attr.ib(default=None)
191 error = attr.ib(default=None)
192 node = attr.ib(default=None)
192 node = attr.ib(default=None)
193
193
194
194
195 def parse_index_v1(data, inline):
195 def parse_index_v1(data, inline):
196 # call the C implementation to parse the index data
196 # call the C implementation to parse the index data
197 index, cache = parsers.parse_index2(data, inline)
197 index, cache = parsers.parse_index2(data, inline)
198 return index, cache
198 return index, cache
199
199
200
200
201 def parse_index_v2(data, inline):
201 def parse_index_v2(data, inline):
202 # call the C implementation to parse the index data
202 # call the C implementation to parse the index data
203 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
203 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
204 return index, cache
204 return index, cache
205
205
206
206
207 def parse_index_cl_v2(data, inline):
207 def parse_index_cl_v2(data, inline):
208 # call the C implementation to parse the index data
208 # call the C implementation to parse the index data
209 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
209 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
210 return index, cache
210 return index, cache
211
211
212
212
213 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
213 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
214
214
215 def parse_index_v1_nodemap(data, inline):
215 def parse_index_v1_nodemap(data, inline):
216 index, cache = parsers.parse_index_devel_nodemap(data, inline)
216 index, cache = parsers.parse_index_devel_nodemap(data, inline)
217 return index, cache
217 return index, cache
218
218
219
219
220 else:
220 else:
221 parse_index_v1_nodemap = None
221 parse_index_v1_nodemap = None
222
222
223
223
224 def parse_index_v1_mixed(data, inline):
224 def parse_index_v1_mixed(data, inline):
225 index, cache = parse_index_v1(data, inline)
225 index, cache = parse_index_v1(data, inline)
226 return rustrevlog.MixedIndex(index), cache
226 return rustrevlog.MixedIndex(index), cache
227
227
228
228
229 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
229 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
230 # signed integer)
230 # signed integer)
231 _maxentrysize = 0x7FFFFFFF
231 _maxentrysize = 0x7FFFFFFF
232
232
233 FILE_TOO_SHORT_MSG = _(
233 FILE_TOO_SHORT_MSG = _(
234 b'cannot read from revlog %s;'
234 b'cannot read from revlog %s;'
235 b' expected %d bytes from offset %d, data size is %d'
235 b' expected %d bytes from offset %d, data size is %d'
236 )
236 )
237
237
238 hexdigits = b'0123456789abcdefABCDEF'
239
238
240
239 class revlog:
241 class revlog:
240 """
242 """
241 the underlying revision storage object
243 the underlying revision storage object
242
244
243 A revlog consists of two parts, an index and the revision data.
245 A revlog consists of two parts, an index and the revision data.
244
246
245 The index is a file with a fixed record size containing
247 The index is a file with a fixed record size containing
246 information on each revision, including its nodeid (hash), the
248 information on each revision, including its nodeid (hash), the
247 nodeids of its parents, the position and offset of its data within
249 nodeids of its parents, the position and offset of its data within
248 the data file, and the revision it's based on. Finally, each entry
250 the data file, and the revision it's based on. Finally, each entry
249 contains a linkrev entry that can serve as a pointer to external
251 contains a linkrev entry that can serve as a pointer to external
250 data.
252 data.
251
253
252 The revision data itself is a linear collection of data chunks.
254 The revision data itself is a linear collection of data chunks.
253 Each chunk represents a revision and is usually represented as a
255 Each chunk represents a revision and is usually represented as a
254 delta against the previous chunk. To bound lookup time, runs of
256 delta against the previous chunk. To bound lookup time, runs of
255 deltas are limited to about 2 times the length of the original
257 deltas are limited to about 2 times the length of the original
256 version data. This makes retrieval of a version proportional to
258 version data. This makes retrieval of a version proportional to
257 its size, or O(1) relative to the number of revisions.
259 its size, or O(1) relative to the number of revisions.
258
260
259 Both pieces of the revlog are written to in an append-only
261 Both pieces of the revlog are written to in an append-only
260 fashion, which means we never need to rewrite a file to insert or
262 fashion, which means we never need to rewrite a file to insert or
261 remove data, and can use some simple techniques to avoid the need
263 remove data, and can use some simple techniques to avoid the need
262 for locking while reading.
264 for locking while reading.
263
265
264 If checkambig, indexfile is opened with checkambig=True at
266 If checkambig, indexfile is opened with checkambig=True at
265 writing, to avoid file stat ambiguity.
267 writing, to avoid file stat ambiguity.
266
268
267 If mmaplargeindex is True, and an mmapindexthreshold is set, the
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
268 index will be mmapped rather than read if it is larger than the
270 index will be mmapped rather than read if it is larger than the
269 configured threshold.
271 configured threshold.
270
272
271 If censorable is True, the revlog can have censored revisions.
273 If censorable is True, the revlog can have censored revisions.
272
274
273 If `upperboundcomp` is not None, this is the expected maximal gain from
275 If `upperboundcomp` is not None, this is the expected maximal gain from
274 compression for the data content.
276 compression for the data content.
275
277
276 `concurrencychecker` is an optional function that receives 3 arguments: a
278 `concurrencychecker` is an optional function that receives 3 arguments: a
277 file handle, a filename, and an expected position. It should check whether
279 file handle, a filename, and an expected position. It should check whether
278 the current position in the file handle is valid, and log/warn/fail (by
280 the current position in the file handle is valid, and log/warn/fail (by
279 raising).
281 raising).
280
282
281 See mercurial/revlogutils/contants.py for details about the content of an
283 See mercurial/revlogutils/contants.py for details about the content of an
282 index entry.
284 index entry.
283 """
285 """
284
286
285 _flagserrorclass = error.RevlogError
287 _flagserrorclass = error.RevlogError
286
288
287 def __init__(
289 def __init__(
288 self,
290 self,
289 opener,
291 opener,
290 target,
292 target,
291 radix,
293 radix,
292 postfix=None, # only exist for `tmpcensored` now
294 postfix=None, # only exist for `tmpcensored` now
293 checkambig=False,
295 checkambig=False,
294 mmaplargeindex=False,
296 mmaplargeindex=False,
295 censorable=False,
297 censorable=False,
296 upperboundcomp=None,
298 upperboundcomp=None,
297 persistentnodemap=False,
299 persistentnodemap=False,
298 concurrencychecker=None,
300 concurrencychecker=None,
299 trypending=False,
301 trypending=False,
300 canonical_parent_order=True,
302 canonical_parent_order=True,
301 ):
303 ):
302 """
304 """
303 create a revlog object
305 create a revlog object
304
306
305 opener is a function that abstracts the file opening operation
307 opener is a function that abstracts the file opening operation
306 and can be used to implement COW semantics or the like.
308 and can be used to implement COW semantics or the like.
307
309
308 `target`: a (KIND, ID) tuple that identify the content stored in
310 `target`: a (KIND, ID) tuple that identify the content stored in
309 this revlog. It help the rest of the code to understand what the revlog
311 this revlog. It help the rest of the code to understand what the revlog
310 is about without having to resort to heuristic and index filename
312 is about without having to resort to heuristic and index filename
311 analysis. Note: that this must be reliably be set by normal code, but
313 analysis. Note: that this must be reliably be set by normal code, but
312 that test, debug, or performance measurement code might not set this to
314 that test, debug, or performance measurement code might not set this to
313 accurate value.
315 accurate value.
314 """
316 """
315 self.upperboundcomp = upperboundcomp
317 self.upperboundcomp = upperboundcomp
316
318
317 self.radix = radix
319 self.radix = radix
318
320
319 self._docket_file = None
321 self._docket_file = None
320 self._indexfile = None
322 self._indexfile = None
321 self._datafile = None
323 self._datafile = None
322 self._sidedatafile = None
324 self._sidedatafile = None
323 self._nodemap_file = None
325 self._nodemap_file = None
324 self.postfix = postfix
326 self.postfix = postfix
325 self._trypending = trypending
327 self._trypending = trypending
326 self.opener = opener
328 self.opener = opener
327 if persistentnodemap:
329 if persistentnodemap:
328 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329
331
330 assert target[0] in ALL_KINDS
332 assert target[0] in ALL_KINDS
331 assert len(target) == 2
333 assert len(target) == 2
332 self.target = target
334 self.target = target
333 # When True, indexfile is opened with checkambig=True at writing, to
335 # When True, indexfile is opened with checkambig=True at writing, to
334 # avoid file stat ambiguity.
336 # avoid file stat ambiguity.
335 self._checkambig = checkambig
337 self._checkambig = checkambig
336 self._mmaplargeindex = mmaplargeindex
338 self._mmaplargeindex = mmaplargeindex
337 self._censorable = censorable
339 self._censorable = censorable
338 # 3-tuple of (node, rev, text) for a raw revision.
340 # 3-tuple of (node, rev, text) for a raw revision.
339 self._revisioncache = None
341 self._revisioncache = None
340 # Maps rev to chain base rev.
342 # Maps rev to chain base rev.
341 self._chainbasecache = util.lrucachedict(100)
343 self._chainbasecache = util.lrucachedict(100)
342 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 self._chunkcache = (0, b'')
345 self._chunkcache = (0, b'')
344 # How much data to read and cache into the raw revlog data cache.
346 # How much data to read and cache into the raw revlog data cache.
345 self._chunkcachesize = 65536
347 self._chunkcachesize = 65536
346 self._maxchainlen = None
348 self._maxchainlen = None
347 self._deltabothparents = True
349 self._deltabothparents = True
348 self._debug_delta = False
350 self._debug_delta = False
349 self.index = None
351 self.index = None
350 self._docket = None
352 self._docket = None
351 self._nodemap_docket = None
353 self._nodemap_docket = None
352 # Mapping of partial identifiers to full nodes.
354 # Mapping of partial identifiers to full nodes.
353 self._pcache = {}
355 self._pcache = {}
354 # Mapping of revision integer to full node.
356 # Mapping of revision integer to full node.
355 self._compengine = b'zlib'
357 self._compengine = b'zlib'
356 self._compengineopts = {}
358 self._compengineopts = {}
357 self._maxdeltachainspan = -1
359 self._maxdeltachainspan = -1
358 self._withsparseread = False
360 self._withsparseread = False
359 self._sparserevlog = False
361 self._sparserevlog = False
360 self.hassidedata = False
362 self.hassidedata = False
361 self._srdensitythreshold = 0.50
363 self._srdensitythreshold = 0.50
362 self._srmingapsize = 262144
364 self._srmingapsize = 262144
363
365
364 # Make copy of flag processors so each revlog instance can support
366 # Make copy of flag processors so each revlog instance can support
365 # custom flags.
367 # custom flags.
366 self._flagprocessors = dict(flagutil.flagprocessors)
368 self._flagprocessors = dict(flagutil.flagprocessors)
367
369
368 # 3-tuple of file handles being used for active writing.
370 # 3-tuple of file handles being used for active writing.
369 self._writinghandles = None
371 self._writinghandles = None
370 # prevent nesting of addgroup
372 # prevent nesting of addgroup
371 self._adding_group = None
373 self._adding_group = None
372
374
373 self._loadindex()
375 self._loadindex()
374
376
375 self._concurrencychecker = concurrencychecker
377 self._concurrencychecker = concurrencychecker
376
378
377 # parent order is supposed to be semantically irrelevant, so we
379 # parent order is supposed to be semantically irrelevant, so we
378 # normally resort parents to ensure that the first parent is non-null,
380 # normally resort parents to ensure that the first parent is non-null,
379 # if there is a non-null parent at all.
381 # if there is a non-null parent at all.
380 # filelog abuses the parent order as flag to mark some instances of
382 # filelog abuses the parent order as flag to mark some instances of
381 # meta-encoded files, so allow it to disable this behavior.
383 # meta-encoded files, so allow it to disable this behavior.
382 self.canonical_parent_order = canonical_parent_order
384 self.canonical_parent_order = canonical_parent_order
383
385
384 def _init_opts(self):
386 def _init_opts(self):
385 """process options (from above/config) to setup associated default revlog mode
387 """process options (from above/config) to setup associated default revlog mode
386
388
387 These values might be affected when actually reading on disk information.
389 These values might be affected when actually reading on disk information.
388
390
389 The relevant values are returned for use in _loadindex().
391 The relevant values are returned for use in _loadindex().
390
392
391 * newversionflags:
393 * newversionflags:
392 version header to use if we need to create a new revlog
394 version header to use if we need to create a new revlog
393
395
394 * mmapindexthreshold:
396 * mmapindexthreshold:
395 minimal index size for start to use mmap
397 minimal index size for start to use mmap
396
398
397 * force_nodemap:
399 * force_nodemap:
398 force the usage of a "development" version of the nodemap code
400 force the usage of a "development" version of the nodemap code
399 """
401 """
400 mmapindexthreshold = None
402 mmapindexthreshold = None
401 opts = self.opener.options
403 opts = self.opener.options
402
404
403 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
405 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
404 new_header = CHANGELOGV2
406 new_header = CHANGELOGV2
405 elif b'revlogv2' in opts:
407 elif b'revlogv2' in opts:
406 new_header = REVLOGV2
408 new_header = REVLOGV2
407 elif b'revlogv1' in opts:
409 elif b'revlogv1' in opts:
408 new_header = REVLOGV1 | FLAG_INLINE_DATA
410 new_header = REVLOGV1 | FLAG_INLINE_DATA
409 if b'generaldelta' in opts:
411 if b'generaldelta' in opts:
410 new_header |= FLAG_GENERALDELTA
412 new_header |= FLAG_GENERALDELTA
411 elif b'revlogv0' in self.opener.options:
413 elif b'revlogv0' in self.opener.options:
412 new_header = REVLOGV0
414 new_header = REVLOGV0
413 else:
415 else:
414 new_header = REVLOG_DEFAULT_VERSION
416 new_header = REVLOG_DEFAULT_VERSION
415
417
416 if b'chunkcachesize' in opts:
418 if b'chunkcachesize' in opts:
417 self._chunkcachesize = opts[b'chunkcachesize']
419 self._chunkcachesize = opts[b'chunkcachesize']
418 if b'maxchainlen' in opts:
420 if b'maxchainlen' in opts:
419 self._maxchainlen = opts[b'maxchainlen']
421 self._maxchainlen = opts[b'maxchainlen']
420 if b'deltabothparents' in opts:
422 if b'deltabothparents' in opts:
421 self._deltabothparents = opts[b'deltabothparents']
423 self._deltabothparents = opts[b'deltabothparents']
422 self._lazydelta = bool(opts.get(b'lazydelta', True))
424 self._lazydelta = bool(opts.get(b'lazydelta', True))
423 self._lazydeltabase = False
425 self._lazydeltabase = False
424 if self._lazydelta:
426 if self._lazydelta:
425 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
427 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
426 if b'debug-delta' in opts:
428 if b'debug-delta' in opts:
427 self._debug_delta = opts[b'debug-delta']
429 self._debug_delta = opts[b'debug-delta']
428 if b'compengine' in opts:
430 if b'compengine' in opts:
429 self._compengine = opts[b'compengine']
431 self._compengine = opts[b'compengine']
430 if b'zlib.level' in opts:
432 if b'zlib.level' in opts:
431 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
433 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
432 if b'zstd.level' in opts:
434 if b'zstd.level' in opts:
433 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
435 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
434 if b'maxdeltachainspan' in opts:
436 if b'maxdeltachainspan' in opts:
435 self._maxdeltachainspan = opts[b'maxdeltachainspan']
437 self._maxdeltachainspan = opts[b'maxdeltachainspan']
436 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
438 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
437 mmapindexthreshold = opts[b'mmapindexthreshold']
439 mmapindexthreshold = opts[b'mmapindexthreshold']
438 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
440 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
439 withsparseread = bool(opts.get(b'with-sparse-read', False))
441 withsparseread = bool(opts.get(b'with-sparse-read', False))
440 # sparse-revlog forces sparse-read
442 # sparse-revlog forces sparse-read
441 self._withsparseread = self._sparserevlog or withsparseread
443 self._withsparseread = self._sparserevlog or withsparseread
442 if b'sparse-read-density-threshold' in opts:
444 if b'sparse-read-density-threshold' in opts:
443 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
445 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
444 if b'sparse-read-min-gap-size' in opts:
446 if b'sparse-read-min-gap-size' in opts:
445 self._srmingapsize = opts[b'sparse-read-min-gap-size']
447 self._srmingapsize = opts[b'sparse-read-min-gap-size']
446 if opts.get(b'enableellipsis'):
448 if opts.get(b'enableellipsis'):
447 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
449 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
448
450
449 # revlog v0 doesn't have flag processors
451 # revlog v0 doesn't have flag processors
450 for flag, processor in opts.get(b'flagprocessors', {}).items():
452 for flag, processor in opts.get(b'flagprocessors', {}).items():
451 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
453 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
452
454
453 if self._chunkcachesize <= 0:
455 if self._chunkcachesize <= 0:
454 raise error.RevlogError(
456 raise error.RevlogError(
455 _(b'revlog chunk cache size %r is not greater than 0')
457 _(b'revlog chunk cache size %r is not greater than 0')
456 % self._chunkcachesize
458 % self._chunkcachesize
457 )
459 )
458 elif self._chunkcachesize & (self._chunkcachesize - 1):
460 elif self._chunkcachesize & (self._chunkcachesize - 1):
459 raise error.RevlogError(
461 raise error.RevlogError(
460 _(b'revlog chunk cache size %r is not a power of 2')
462 _(b'revlog chunk cache size %r is not a power of 2')
461 % self._chunkcachesize
463 % self._chunkcachesize
462 )
464 )
463 force_nodemap = opts.get(b'devel-force-nodemap', False)
465 force_nodemap = opts.get(b'devel-force-nodemap', False)
464 return new_header, mmapindexthreshold, force_nodemap
466 return new_header, mmapindexthreshold, force_nodemap
465
467
466 def _get_data(self, filepath, mmap_threshold, size=None):
468 def _get_data(self, filepath, mmap_threshold, size=None):
467 """return a file content with or without mmap
469 """return a file content with or without mmap
468
470
469 If the file is missing return the empty string"""
471 If the file is missing return the empty string"""
470 try:
472 try:
471 with self.opener(filepath) as fp:
473 with self.opener(filepath) as fp:
472 if mmap_threshold is not None:
474 if mmap_threshold is not None:
473 file_size = self.opener.fstat(fp).st_size
475 file_size = self.opener.fstat(fp).st_size
474 if file_size >= mmap_threshold:
476 if file_size >= mmap_threshold:
475 if size is not None:
477 if size is not None:
476 # avoid potentiel mmap crash
478 # avoid potentiel mmap crash
477 size = min(file_size, size)
479 size = min(file_size, size)
478 # TODO: should .close() to release resources without
480 # TODO: should .close() to release resources without
479 # relying on Python GC
481 # relying on Python GC
480 if size is None:
482 if size is None:
481 return util.buffer(util.mmapread(fp))
483 return util.buffer(util.mmapread(fp))
482 else:
484 else:
483 return util.buffer(util.mmapread(fp, size))
485 return util.buffer(util.mmapread(fp, size))
484 if size is None:
486 if size is None:
485 return fp.read()
487 return fp.read()
486 else:
488 else:
487 return fp.read(size)
489 return fp.read(size)
488 except FileNotFoundError:
490 except FileNotFoundError:
489 return b''
491 return b''
490
492
491 def _loadindex(self, docket=None):
493 def _loadindex(self, docket=None):
492
494
493 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
495 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
494
496
495 if self.postfix is not None:
497 if self.postfix is not None:
496 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
498 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
497 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
499 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
498 entry_point = b'%s.i.a' % self.radix
500 entry_point = b'%s.i.a' % self.radix
499 else:
501 else:
500 entry_point = b'%s.i' % self.radix
502 entry_point = b'%s.i' % self.radix
501
503
502 if docket is not None:
504 if docket is not None:
503 self._docket = docket
505 self._docket = docket
504 self._docket_file = entry_point
506 self._docket_file = entry_point
505 else:
507 else:
506 entry_data = b''
508 entry_data = b''
507 self._initempty = True
509 self._initempty = True
508 entry_data = self._get_data(entry_point, mmapindexthreshold)
510 entry_data = self._get_data(entry_point, mmapindexthreshold)
509 if len(entry_data) > 0:
511 if len(entry_data) > 0:
510 header = INDEX_HEADER.unpack(entry_data[:4])[0]
512 header = INDEX_HEADER.unpack(entry_data[:4])[0]
511 self._initempty = False
513 self._initempty = False
512 else:
514 else:
513 header = new_header
515 header = new_header
514
516
515 self._format_flags = header & ~0xFFFF
517 self._format_flags = header & ~0xFFFF
516 self._format_version = header & 0xFFFF
518 self._format_version = header & 0xFFFF
517
519
518 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
520 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
519 if supported_flags is None:
521 if supported_flags is None:
520 msg = _(b'unknown version (%d) in revlog %s')
522 msg = _(b'unknown version (%d) in revlog %s')
521 msg %= (self._format_version, self.display_id)
523 msg %= (self._format_version, self.display_id)
522 raise error.RevlogError(msg)
524 raise error.RevlogError(msg)
523 elif self._format_flags & ~supported_flags:
525 elif self._format_flags & ~supported_flags:
524 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
526 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
525 display_flag = self._format_flags >> 16
527 display_flag = self._format_flags >> 16
526 msg %= (display_flag, self._format_version, self.display_id)
528 msg %= (display_flag, self._format_version, self.display_id)
527 raise error.RevlogError(msg)
529 raise error.RevlogError(msg)
528
530
529 features = FEATURES_BY_VERSION[self._format_version]
531 features = FEATURES_BY_VERSION[self._format_version]
530 self._inline = features[b'inline'](self._format_flags)
532 self._inline = features[b'inline'](self._format_flags)
531 self._generaldelta = features[b'generaldelta'](self._format_flags)
533 self._generaldelta = features[b'generaldelta'](self._format_flags)
532 self.hassidedata = features[b'sidedata']
534 self.hassidedata = features[b'sidedata']
533
535
534 if not features[b'docket']:
536 if not features[b'docket']:
535 self._indexfile = entry_point
537 self._indexfile = entry_point
536 index_data = entry_data
538 index_data = entry_data
537 else:
539 else:
538 self._docket_file = entry_point
540 self._docket_file = entry_point
539 if self._initempty:
541 if self._initempty:
540 self._docket = docketutil.default_docket(self, header)
542 self._docket = docketutil.default_docket(self, header)
541 else:
543 else:
542 self._docket = docketutil.parse_docket(
544 self._docket = docketutil.parse_docket(
543 self, entry_data, use_pending=self._trypending
545 self, entry_data, use_pending=self._trypending
544 )
546 )
545
547
546 if self._docket is not None:
548 if self._docket is not None:
547 self._indexfile = self._docket.index_filepath()
549 self._indexfile = self._docket.index_filepath()
548 index_data = b''
550 index_data = b''
549 index_size = self._docket.index_end
551 index_size = self._docket.index_end
550 if index_size > 0:
552 if index_size > 0:
551 index_data = self._get_data(
553 index_data = self._get_data(
552 self._indexfile, mmapindexthreshold, size=index_size
554 self._indexfile, mmapindexthreshold, size=index_size
553 )
555 )
554 if len(index_data) < index_size:
556 if len(index_data) < index_size:
555 msg = _(b'too few index data for %s: got %d, expected %d')
557 msg = _(b'too few index data for %s: got %d, expected %d')
556 msg %= (self.display_id, len(index_data), index_size)
558 msg %= (self.display_id, len(index_data), index_size)
557 raise error.RevlogError(msg)
559 raise error.RevlogError(msg)
558
560
559 self._inline = False
561 self._inline = False
560 # generaldelta implied by version 2 revlogs.
562 # generaldelta implied by version 2 revlogs.
561 self._generaldelta = True
563 self._generaldelta = True
562 # the logic for persistent nodemap will be dealt with within the
564 # the logic for persistent nodemap will be dealt with within the
563 # main docket, so disable it for now.
565 # main docket, so disable it for now.
564 self._nodemap_file = None
566 self._nodemap_file = None
565
567
566 if self._docket is not None:
568 if self._docket is not None:
567 self._datafile = self._docket.data_filepath()
569 self._datafile = self._docket.data_filepath()
568 self._sidedatafile = self._docket.sidedata_filepath()
570 self._sidedatafile = self._docket.sidedata_filepath()
569 elif self.postfix is None:
571 elif self.postfix is None:
570 self._datafile = b'%s.d' % self.radix
572 self._datafile = b'%s.d' % self.radix
571 else:
573 else:
572 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
574 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
573
575
574 self.nodeconstants = sha1nodeconstants
576 self.nodeconstants = sha1nodeconstants
575 self.nullid = self.nodeconstants.nullid
577 self.nullid = self.nodeconstants.nullid
576
578
577 # sparse-revlog can't be on without general-delta (issue6056)
579 # sparse-revlog can't be on without general-delta (issue6056)
578 if not self._generaldelta:
580 if not self._generaldelta:
579 self._sparserevlog = False
581 self._sparserevlog = False
580
582
581 self._storedeltachains = True
583 self._storedeltachains = True
582
584
583 devel_nodemap = (
585 devel_nodemap = (
584 self._nodemap_file
586 self._nodemap_file
585 and force_nodemap
587 and force_nodemap
586 and parse_index_v1_nodemap is not None
588 and parse_index_v1_nodemap is not None
587 )
589 )
588
590
589 use_rust_index = False
591 use_rust_index = False
590 if rustrevlog is not None:
592 if rustrevlog is not None:
591 if self._nodemap_file is not None:
593 if self._nodemap_file is not None:
592 use_rust_index = True
594 use_rust_index = True
593 else:
595 else:
594 use_rust_index = self.opener.options.get(b'rust.index')
596 use_rust_index = self.opener.options.get(b'rust.index')
595
597
596 self._parse_index = parse_index_v1
598 self._parse_index = parse_index_v1
597 if self._format_version == REVLOGV0:
599 if self._format_version == REVLOGV0:
598 self._parse_index = revlogv0.parse_index_v0
600 self._parse_index = revlogv0.parse_index_v0
599 elif self._format_version == REVLOGV2:
601 elif self._format_version == REVLOGV2:
600 self._parse_index = parse_index_v2
602 self._parse_index = parse_index_v2
601 elif self._format_version == CHANGELOGV2:
603 elif self._format_version == CHANGELOGV2:
602 self._parse_index = parse_index_cl_v2
604 self._parse_index = parse_index_cl_v2
603 elif devel_nodemap:
605 elif devel_nodemap:
604 self._parse_index = parse_index_v1_nodemap
606 self._parse_index = parse_index_v1_nodemap
605 elif use_rust_index:
607 elif use_rust_index:
606 self._parse_index = parse_index_v1_mixed
608 self._parse_index = parse_index_v1_mixed
607 try:
609 try:
608 d = self._parse_index(index_data, self._inline)
610 d = self._parse_index(index_data, self._inline)
609 index, chunkcache = d
611 index, chunkcache = d
610 use_nodemap = (
612 use_nodemap = (
611 not self._inline
613 not self._inline
612 and self._nodemap_file is not None
614 and self._nodemap_file is not None
613 and util.safehasattr(index, 'update_nodemap_data')
615 and util.safehasattr(index, 'update_nodemap_data')
614 )
616 )
615 if use_nodemap:
617 if use_nodemap:
616 nodemap_data = nodemaputil.persisted_data(self)
618 nodemap_data = nodemaputil.persisted_data(self)
617 if nodemap_data is not None:
619 if nodemap_data is not None:
618 docket = nodemap_data[0]
620 docket = nodemap_data[0]
619 if (
621 if (
620 len(d[0]) > docket.tip_rev
622 len(d[0]) > docket.tip_rev
621 and d[0][docket.tip_rev][7] == docket.tip_node
623 and d[0][docket.tip_rev][7] == docket.tip_node
622 ):
624 ):
623 # no changelog tampering
625 # no changelog tampering
624 self._nodemap_docket = docket
626 self._nodemap_docket = docket
625 index.update_nodemap_data(*nodemap_data)
627 index.update_nodemap_data(*nodemap_data)
626 except (ValueError, IndexError):
628 except (ValueError, IndexError):
627 raise error.RevlogError(
629 raise error.RevlogError(
628 _(b"index %s is corrupted") % self.display_id
630 _(b"index %s is corrupted") % self.display_id
629 )
631 )
630 self.index = index
632 self.index = index
631 self._segmentfile = randomaccessfile.randomaccessfile(
633 self._segmentfile = randomaccessfile.randomaccessfile(
632 self.opener,
634 self.opener,
633 (self._indexfile if self._inline else self._datafile),
635 (self._indexfile if self._inline else self._datafile),
634 self._chunkcachesize,
636 self._chunkcachesize,
635 chunkcache,
637 chunkcache,
636 )
638 )
637 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
639 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
638 self.opener,
640 self.opener,
639 self._sidedatafile,
641 self._sidedatafile,
640 self._chunkcachesize,
642 self._chunkcachesize,
641 )
643 )
642 # revnum -> (chain-length, sum-delta-length)
644 # revnum -> (chain-length, sum-delta-length)
643 self._chaininfocache = util.lrucachedict(500)
645 self._chaininfocache = util.lrucachedict(500)
644 # revlog header -> revlog compressor
646 # revlog header -> revlog compressor
645 self._decompressors = {}
647 self._decompressors = {}
646
648
647 @util.propertycache
649 @util.propertycache
648 def revlog_kind(self):
650 def revlog_kind(self):
649 return self.target[0]
651 return self.target[0]
650
652
651 @util.propertycache
653 @util.propertycache
652 def display_id(self):
654 def display_id(self):
653 """The public facing "ID" of the revlog that we use in message"""
655 """The public facing "ID" of the revlog that we use in message"""
654 # Maybe we should build a user facing representation of
656 # Maybe we should build a user facing representation of
655 # revlog.target instead of using `self.radix`
657 # revlog.target instead of using `self.radix`
656 return self.radix
658 return self.radix
657
659
658 def _get_decompressor(self, t):
660 def _get_decompressor(self, t):
659 try:
661 try:
660 compressor = self._decompressors[t]
662 compressor = self._decompressors[t]
661 except KeyError:
663 except KeyError:
662 try:
664 try:
663 engine = util.compengines.forrevlogheader(t)
665 engine = util.compengines.forrevlogheader(t)
664 compressor = engine.revlogcompressor(self._compengineopts)
666 compressor = engine.revlogcompressor(self._compengineopts)
665 self._decompressors[t] = compressor
667 self._decompressors[t] = compressor
666 except KeyError:
668 except KeyError:
667 raise error.RevlogError(
669 raise error.RevlogError(
668 _(b'unknown compression type %s') % binascii.hexlify(t)
670 _(b'unknown compression type %s') % binascii.hexlify(t)
669 )
671 )
670 return compressor
672 return compressor
671
673
672 @util.propertycache
674 @util.propertycache
673 def _compressor(self):
675 def _compressor(self):
674 engine = util.compengines[self._compengine]
676 engine = util.compengines[self._compengine]
675 return engine.revlogcompressor(self._compengineopts)
677 return engine.revlogcompressor(self._compengineopts)
676
678
677 @util.propertycache
679 @util.propertycache
678 def _decompressor(self):
680 def _decompressor(self):
679 """the default decompressor"""
681 """the default decompressor"""
680 if self._docket is None:
682 if self._docket is None:
681 return None
683 return None
682 t = self._docket.default_compression_header
684 t = self._docket.default_compression_header
683 c = self._get_decompressor(t)
685 c = self._get_decompressor(t)
684 return c.decompress
686 return c.decompress
685
687
686 def _indexfp(self):
688 def _indexfp(self):
687 """file object for the revlog's index file"""
689 """file object for the revlog's index file"""
688 return self.opener(self._indexfile, mode=b"r")
690 return self.opener(self._indexfile, mode=b"r")
689
691
690 def __index_write_fp(self):
692 def __index_write_fp(self):
691 # You should not use this directly and use `_writing` instead
693 # You should not use this directly and use `_writing` instead
692 try:
694 try:
693 f = self.opener(
695 f = self.opener(
694 self._indexfile, mode=b"r+", checkambig=self._checkambig
696 self._indexfile, mode=b"r+", checkambig=self._checkambig
695 )
697 )
696 if self._docket is None:
698 if self._docket is None:
697 f.seek(0, os.SEEK_END)
699 f.seek(0, os.SEEK_END)
698 else:
700 else:
699 f.seek(self._docket.index_end, os.SEEK_SET)
701 f.seek(self._docket.index_end, os.SEEK_SET)
700 return f
702 return f
701 except FileNotFoundError:
703 except FileNotFoundError:
702 return self.opener(
704 return self.opener(
703 self._indexfile, mode=b"w+", checkambig=self._checkambig
705 self._indexfile, mode=b"w+", checkambig=self._checkambig
704 )
706 )
705
707
706 def __index_new_fp(self):
708 def __index_new_fp(self):
707 # You should not use this unless you are upgrading from inline revlog
709 # You should not use this unless you are upgrading from inline revlog
708 return self.opener(
710 return self.opener(
709 self._indexfile,
711 self._indexfile,
710 mode=b"w",
712 mode=b"w",
711 checkambig=self._checkambig,
713 checkambig=self._checkambig,
712 atomictemp=True,
714 atomictemp=True,
713 )
715 )
714
716
715 def _datafp(self, mode=b'r'):
717 def _datafp(self, mode=b'r'):
716 """file object for the revlog's data file"""
718 """file object for the revlog's data file"""
717 return self.opener(self._datafile, mode=mode)
719 return self.opener(self._datafile, mode=mode)
718
720
719 @contextlib.contextmanager
721 @contextlib.contextmanager
720 def _sidedatareadfp(self):
722 def _sidedatareadfp(self):
721 """file object suitable to read sidedata"""
723 """file object suitable to read sidedata"""
722 if self._writinghandles:
724 if self._writinghandles:
723 yield self._writinghandles[2]
725 yield self._writinghandles[2]
724 else:
726 else:
725 with self.opener(self._sidedatafile) as fp:
727 with self.opener(self._sidedatafile) as fp:
726 yield fp
728 yield fp
727
729
728 def tiprev(self):
730 def tiprev(self):
729 return len(self.index) - 1
731 return len(self.index) - 1
730
732
731 def tip(self):
733 def tip(self):
732 return self.node(self.tiprev())
734 return self.node(self.tiprev())
733
735
734 def __contains__(self, rev):
736 def __contains__(self, rev):
735 return 0 <= rev < len(self)
737 return 0 <= rev < len(self)
736
738
737 def __len__(self):
739 def __len__(self):
738 return len(self.index)
740 return len(self.index)
739
741
740 def __iter__(self):
742 def __iter__(self):
741 return iter(range(len(self)))
743 return iter(range(len(self)))
742
744
743 def revs(self, start=0, stop=None):
745 def revs(self, start=0, stop=None):
744 """iterate over all rev in this revlog (from start to stop)"""
746 """iterate over all rev in this revlog (from start to stop)"""
745 return storageutil.iterrevs(len(self), start=start, stop=stop)
747 return storageutil.iterrevs(len(self), start=start, stop=stop)
746
748
747 def hasnode(self, node):
749 def hasnode(self, node):
748 try:
750 try:
749 self.rev(node)
751 self.rev(node)
750 return True
752 return True
751 except KeyError:
753 except KeyError:
752 return False
754 return False
753
755
754 def candelta(self, baserev, rev):
756 def candelta(self, baserev, rev):
755 """whether two revisions (baserev, rev) can be delta-ed or not"""
757 """whether two revisions (baserev, rev) can be delta-ed or not"""
756 # Disable delta if either rev requires a content-changing flag
758 # Disable delta if either rev requires a content-changing flag
757 # processor (ex. LFS). This is because such flag processor can alter
759 # processor (ex. LFS). This is because such flag processor can alter
758 # the rawtext content that the delta will be based on, and two clients
760 # the rawtext content that the delta will be based on, and two clients
759 # could have a same revlog node with different flags (i.e. different
761 # could have a same revlog node with different flags (i.e. different
760 # rawtext contents) and the delta could be incompatible.
762 # rawtext contents) and the delta could be incompatible.
761 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
763 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
762 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
764 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
763 ):
765 ):
764 return False
766 return False
765 return True
767 return True
766
768
767 def update_caches(self, transaction):
769 def update_caches(self, transaction):
768 if self._nodemap_file is not None:
770 if self._nodemap_file is not None:
769 if transaction is None:
771 if transaction is None:
770 nodemaputil.update_persistent_nodemap(self)
772 nodemaputil.update_persistent_nodemap(self)
771 else:
773 else:
772 nodemaputil.setup_persistent_nodemap(transaction, self)
774 nodemaputil.setup_persistent_nodemap(transaction, self)
773
775
774 def clearcaches(self):
776 def clearcaches(self):
775 self._revisioncache = None
777 self._revisioncache = None
776 self._chainbasecache.clear()
778 self._chainbasecache.clear()
777 self._segmentfile.clear_cache()
779 self._segmentfile.clear_cache()
778 self._segmentfile_sidedata.clear_cache()
780 self._segmentfile_sidedata.clear_cache()
779 self._pcache = {}
781 self._pcache = {}
780 self._nodemap_docket = None
782 self._nodemap_docket = None
781 self.index.clearcaches()
783 self.index.clearcaches()
782 # The python code is the one responsible for validating the docket, we
784 # The python code is the one responsible for validating the docket, we
783 # end up having to refresh it here.
785 # end up having to refresh it here.
784 use_nodemap = (
786 use_nodemap = (
785 not self._inline
787 not self._inline
786 and self._nodemap_file is not None
788 and self._nodemap_file is not None
787 and util.safehasattr(self.index, 'update_nodemap_data')
789 and util.safehasattr(self.index, 'update_nodemap_data')
788 )
790 )
789 if use_nodemap:
791 if use_nodemap:
790 nodemap_data = nodemaputil.persisted_data(self)
792 nodemap_data = nodemaputil.persisted_data(self)
791 if nodemap_data is not None:
793 if nodemap_data is not None:
792 self._nodemap_docket = nodemap_data[0]
794 self._nodemap_docket = nodemap_data[0]
793 self.index.update_nodemap_data(*nodemap_data)
795 self.index.update_nodemap_data(*nodemap_data)
794
796
795 def rev(self, node):
797 def rev(self, node):
796 try:
798 try:
797 return self.index.rev(node)
799 return self.index.rev(node)
798 except TypeError:
800 except TypeError:
799 raise
801 raise
800 except error.RevlogError:
802 except error.RevlogError:
801 # parsers.c radix tree lookup failed
803 # parsers.c radix tree lookup failed
802 if (
804 if (
803 node == self.nodeconstants.wdirid
805 node == self.nodeconstants.wdirid
804 or node in self.nodeconstants.wdirfilenodeids
806 or node in self.nodeconstants.wdirfilenodeids
805 ):
807 ):
806 raise error.WdirUnsupported
808 raise error.WdirUnsupported
807 raise error.LookupError(node, self.display_id, _(b'no node'))
809 raise error.LookupError(node, self.display_id, _(b'no node'))
808
810
809 # Accessors for index entries.
811 # Accessors for index entries.
810
812
811 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
813 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
812 # are flags.
814 # are flags.
813 def start(self, rev):
815 def start(self, rev):
814 return int(self.index[rev][0] >> 16)
816 return int(self.index[rev][0] >> 16)
815
817
816 def sidedata_cut_off(self, rev):
818 def sidedata_cut_off(self, rev):
817 sd_cut_off = self.index[rev][8]
819 sd_cut_off = self.index[rev][8]
818 if sd_cut_off != 0:
820 if sd_cut_off != 0:
819 return sd_cut_off
821 return sd_cut_off
820 # This is some annoying dance, because entries without sidedata
822 # This is some annoying dance, because entries without sidedata
821 # currently use 0 as their ofsset. (instead of previous-offset +
823 # currently use 0 as their ofsset. (instead of previous-offset +
822 # previous-size)
824 # previous-size)
823 #
825 #
824 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
826 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
825 # In the meantime, we need this.
827 # In the meantime, we need this.
826 while 0 <= rev:
828 while 0 <= rev:
827 e = self.index[rev]
829 e = self.index[rev]
828 if e[9] != 0:
830 if e[9] != 0:
829 return e[8] + e[9]
831 return e[8] + e[9]
830 rev -= 1
832 rev -= 1
831 return 0
833 return 0
832
834
833 def flags(self, rev):
835 def flags(self, rev):
834 return self.index[rev][0] & 0xFFFF
836 return self.index[rev][0] & 0xFFFF
835
837
836 def length(self, rev):
838 def length(self, rev):
837 return self.index[rev][1]
839 return self.index[rev][1]
838
840
839 def sidedata_length(self, rev):
841 def sidedata_length(self, rev):
840 if not self.hassidedata:
842 if not self.hassidedata:
841 return 0
843 return 0
842 return self.index[rev][9]
844 return self.index[rev][9]
843
845
844 def rawsize(self, rev):
846 def rawsize(self, rev):
845 """return the length of the uncompressed text for a given revision"""
847 """return the length of the uncompressed text for a given revision"""
846 l = self.index[rev][2]
848 l = self.index[rev][2]
847 if l >= 0:
849 if l >= 0:
848 return l
850 return l
849
851
850 t = self.rawdata(rev)
852 t = self.rawdata(rev)
851 return len(t)
853 return len(t)
852
854
853 def size(self, rev):
855 def size(self, rev):
854 """length of non-raw text (processed by a "read" flag processor)"""
856 """length of non-raw text (processed by a "read" flag processor)"""
855 # fast path: if no "read" flag processor could change the content,
857 # fast path: if no "read" flag processor could change the content,
856 # size is rawsize. note: ELLIPSIS is known to not change the content.
858 # size is rawsize. note: ELLIPSIS is known to not change the content.
857 flags = self.flags(rev)
859 flags = self.flags(rev)
858 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
860 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
859 return self.rawsize(rev)
861 return self.rawsize(rev)
860
862
861 return len(self.revision(rev))
863 return len(self.revision(rev))
862
864
863 def fast_rank(self, rev):
865 def fast_rank(self, rev):
864 """Return the rank of a revision if already known, or None otherwise.
866 """Return the rank of a revision if already known, or None otherwise.
865
867
866 The rank of a revision is the size of the sub-graph it defines as a
868 The rank of a revision is the size of the sub-graph it defines as a
867 head. Equivalently, the rank of a revision `r` is the size of the set
869 head. Equivalently, the rank of a revision `r` is the size of the set
868 `ancestors(r)`, `r` included.
870 `ancestors(r)`, `r` included.
869
871
870 This method returns the rank retrieved from the revlog in constant
872 This method returns the rank retrieved from the revlog in constant
871 time. It makes no attempt at computing unknown values for versions of
873 time. It makes no attempt at computing unknown values for versions of
872 the revlog which do not persist the rank.
874 the revlog which do not persist the rank.
873 """
875 """
874 rank = self.index[rev][ENTRY_RANK]
876 rank = self.index[rev][ENTRY_RANK]
875 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
877 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
876 return None
878 return None
877 if rev == nullrev:
879 if rev == nullrev:
878 return 0 # convention
880 return 0 # convention
879 return rank
881 return rank
880
882
881 def chainbase(self, rev):
883 def chainbase(self, rev):
882 base = self._chainbasecache.get(rev)
884 base = self._chainbasecache.get(rev)
883 if base is not None:
885 if base is not None:
884 return base
886 return base
885
887
886 index = self.index
888 index = self.index
887 iterrev = rev
889 iterrev = rev
888 base = index[iterrev][3]
890 base = index[iterrev][3]
889 while base != iterrev:
891 while base != iterrev:
890 iterrev = base
892 iterrev = base
891 base = index[iterrev][3]
893 base = index[iterrev][3]
892
894
893 self._chainbasecache[rev] = base
895 self._chainbasecache[rev] = base
894 return base
896 return base
895
897
896 def linkrev(self, rev):
898 def linkrev(self, rev):
897 return self.index[rev][4]
899 return self.index[rev][4]
898
900
899 def parentrevs(self, rev):
901 def parentrevs(self, rev):
900 try:
902 try:
901 entry = self.index[rev]
903 entry = self.index[rev]
902 except IndexError:
904 except IndexError:
903 if rev == wdirrev:
905 if rev == wdirrev:
904 raise error.WdirUnsupported
906 raise error.WdirUnsupported
905 raise
907 raise
906
908
907 if self.canonical_parent_order and entry[5] == nullrev:
909 if self.canonical_parent_order and entry[5] == nullrev:
908 return entry[6], entry[5]
910 return entry[6], entry[5]
909 else:
911 else:
910 return entry[5], entry[6]
912 return entry[5], entry[6]
911
913
912 # fast parentrevs(rev) where rev isn't filtered
914 # fast parentrevs(rev) where rev isn't filtered
913 _uncheckedparentrevs = parentrevs
915 _uncheckedparentrevs = parentrevs
914
916
915 def node(self, rev):
917 def node(self, rev):
916 try:
918 try:
917 return self.index[rev][7]
919 return self.index[rev][7]
918 except IndexError:
920 except IndexError:
919 if rev == wdirrev:
921 if rev == wdirrev:
920 raise error.WdirUnsupported
922 raise error.WdirUnsupported
921 raise
923 raise
922
924
923 # Derived from index values.
925 # Derived from index values.
924
926
925 def end(self, rev):
927 def end(self, rev):
926 return self.start(rev) + self.length(rev)
928 return self.start(rev) + self.length(rev)
927
929
928 def parents(self, node):
930 def parents(self, node):
929 i = self.index
931 i = self.index
930 d = i[self.rev(node)]
932 d = i[self.rev(node)]
931 # inline node() to avoid function call overhead
933 # inline node() to avoid function call overhead
932 if self.canonical_parent_order and d[5] == self.nullid:
934 if self.canonical_parent_order and d[5] == self.nullid:
933 return i[d[6]][7], i[d[5]][7]
935 return i[d[6]][7], i[d[5]][7]
934 else:
936 else:
935 return i[d[5]][7], i[d[6]][7]
937 return i[d[5]][7], i[d[6]][7]
936
938
937 def chainlen(self, rev):
939 def chainlen(self, rev):
938 return self._chaininfo(rev)[0]
940 return self._chaininfo(rev)[0]
939
941
940 def _chaininfo(self, rev):
942 def _chaininfo(self, rev):
941 chaininfocache = self._chaininfocache
943 chaininfocache = self._chaininfocache
942 if rev in chaininfocache:
944 if rev in chaininfocache:
943 return chaininfocache[rev]
945 return chaininfocache[rev]
944 index = self.index
946 index = self.index
945 generaldelta = self._generaldelta
947 generaldelta = self._generaldelta
946 iterrev = rev
948 iterrev = rev
947 e = index[iterrev]
949 e = index[iterrev]
948 clen = 0
950 clen = 0
949 compresseddeltalen = 0
951 compresseddeltalen = 0
950 while iterrev != e[3]:
952 while iterrev != e[3]:
951 clen += 1
953 clen += 1
952 compresseddeltalen += e[1]
954 compresseddeltalen += e[1]
953 if generaldelta:
955 if generaldelta:
954 iterrev = e[3]
956 iterrev = e[3]
955 else:
957 else:
956 iterrev -= 1
958 iterrev -= 1
957 if iterrev in chaininfocache:
959 if iterrev in chaininfocache:
958 t = chaininfocache[iterrev]
960 t = chaininfocache[iterrev]
959 clen += t[0]
961 clen += t[0]
960 compresseddeltalen += t[1]
962 compresseddeltalen += t[1]
961 break
963 break
962 e = index[iterrev]
964 e = index[iterrev]
963 else:
965 else:
964 # Add text length of base since decompressing that also takes
966 # Add text length of base since decompressing that also takes
965 # work. For cache hits the length is already included.
967 # work. For cache hits the length is already included.
966 compresseddeltalen += e[1]
968 compresseddeltalen += e[1]
967 r = (clen, compresseddeltalen)
969 r = (clen, compresseddeltalen)
968 chaininfocache[rev] = r
970 chaininfocache[rev] = r
969 return r
971 return r
970
972
971 def _deltachain(self, rev, stoprev=None):
973 def _deltachain(self, rev, stoprev=None):
972 """Obtain the delta chain for a revision.
974 """Obtain the delta chain for a revision.
973
975
974 ``stoprev`` specifies a revision to stop at. If not specified, we
976 ``stoprev`` specifies a revision to stop at. If not specified, we
975 stop at the base of the chain.
977 stop at the base of the chain.
976
978
977 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
979 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
978 revs in ascending order and ``stopped`` is a bool indicating whether
980 revs in ascending order and ``stopped`` is a bool indicating whether
979 ``stoprev`` was hit.
981 ``stoprev`` was hit.
980 """
982 """
981 # Try C implementation.
983 # Try C implementation.
982 try:
984 try:
983 return self.index.deltachain(rev, stoprev, self._generaldelta)
985 return self.index.deltachain(rev, stoprev, self._generaldelta)
984 except AttributeError:
986 except AttributeError:
985 pass
987 pass
986
988
987 chain = []
989 chain = []
988
990
989 # Alias to prevent attribute lookup in tight loop.
991 # Alias to prevent attribute lookup in tight loop.
990 index = self.index
992 index = self.index
991 generaldelta = self._generaldelta
993 generaldelta = self._generaldelta
992
994
993 iterrev = rev
995 iterrev = rev
994 e = index[iterrev]
996 e = index[iterrev]
995 while iterrev != e[3] and iterrev != stoprev:
997 while iterrev != e[3] and iterrev != stoprev:
996 chain.append(iterrev)
998 chain.append(iterrev)
997 if generaldelta:
999 if generaldelta:
998 iterrev = e[3]
1000 iterrev = e[3]
999 else:
1001 else:
1000 iterrev -= 1
1002 iterrev -= 1
1001 e = index[iterrev]
1003 e = index[iterrev]
1002
1004
1003 if iterrev == stoprev:
1005 if iterrev == stoprev:
1004 stopped = True
1006 stopped = True
1005 else:
1007 else:
1006 chain.append(iterrev)
1008 chain.append(iterrev)
1007 stopped = False
1009 stopped = False
1008
1010
1009 chain.reverse()
1011 chain.reverse()
1010 return chain, stopped
1012 return chain, stopped
1011
1013
1012 def ancestors(self, revs, stoprev=0, inclusive=False):
1014 def ancestors(self, revs, stoprev=0, inclusive=False):
1013 """Generate the ancestors of 'revs' in reverse revision order.
1015 """Generate the ancestors of 'revs' in reverse revision order.
1014 Does not generate revs lower than stoprev.
1016 Does not generate revs lower than stoprev.
1015
1017
1016 See the documentation for ancestor.lazyancestors for more details."""
1018 See the documentation for ancestor.lazyancestors for more details."""
1017
1019
1018 # first, make sure start revisions aren't filtered
1020 # first, make sure start revisions aren't filtered
1019 revs = list(revs)
1021 revs = list(revs)
1020 checkrev = self.node
1022 checkrev = self.node
1021 for r in revs:
1023 for r in revs:
1022 checkrev(r)
1024 checkrev(r)
1023 # and we're sure ancestors aren't filtered as well
1025 # and we're sure ancestors aren't filtered as well
1024
1026
1025 if rustancestor is not None and self.index.rust_ext_compat:
1027 if rustancestor is not None and self.index.rust_ext_compat:
1026 lazyancestors = rustancestor.LazyAncestors
1028 lazyancestors = rustancestor.LazyAncestors
1027 arg = self.index
1029 arg = self.index
1028 else:
1030 else:
1029 lazyancestors = ancestor.lazyancestors
1031 lazyancestors = ancestor.lazyancestors
1030 arg = self._uncheckedparentrevs
1032 arg = self._uncheckedparentrevs
1031 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1033 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1032
1034
1033 def descendants(self, revs):
1035 def descendants(self, revs):
1034 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1036 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1035
1037
1036 def findcommonmissing(self, common=None, heads=None):
1038 def findcommonmissing(self, common=None, heads=None):
1037 """Return a tuple of the ancestors of common and the ancestors of heads
1039 """Return a tuple of the ancestors of common and the ancestors of heads
1038 that are not ancestors of common. In revset terminology, we return the
1040 that are not ancestors of common. In revset terminology, we return the
1039 tuple:
1041 tuple:
1040
1042
1041 ::common, (::heads) - (::common)
1043 ::common, (::heads) - (::common)
1042
1044
1043 The list is sorted by revision number, meaning it is
1045 The list is sorted by revision number, meaning it is
1044 topologically sorted.
1046 topologically sorted.
1045
1047
1046 'heads' and 'common' are both lists of node IDs. If heads is
1048 'heads' and 'common' are both lists of node IDs. If heads is
1047 not supplied, uses all of the revlog's heads. If common is not
1049 not supplied, uses all of the revlog's heads. If common is not
1048 supplied, uses nullid."""
1050 supplied, uses nullid."""
1049 if common is None:
1051 if common is None:
1050 common = [self.nullid]
1052 common = [self.nullid]
1051 if heads is None:
1053 if heads is None:
1052 heads = self.heads()
1054 heads = self.heads()
1053
1055
1054 common = [self.rev(n) for n in common]
1056 common = [self.rev(n) for n in common]
1055 heads = [self.rev(n) for n in heads]
1057 heads = [self.rev(n) for n in heads]
1056
1058
1057 # we want the ancestors, but inclusive
1059 # we want the ancestors, but inclusive
1058 class lazyset:
1060 class lazyset:
1059 def __init__(self, lazyvalues):
1061 def __init__(self, lazyvalues):
1060 self.addedvalues = set()
1062 self.addedvalues = set()
1061 self.lazyvalues = lazyvalues
1063 self.lazyvalues = lazyvalues
1062
1064
1063 def __contains__(self, value):
1065 def __contains__(self, value):
1064 return value in self.addedvalues or value in self.lazyvalues
1066 return value in self.addedvalues or value in self.lazyvalues
1065
1067
1066 def __iter__(self):
1068 def __iter__(self):
1067 added = self.addedvalues
1069 added = self.addedvalues
1068 for r in added:
1070 for r in added:
1069 yield r
1071 yield r
1070 for r in self.lazyvalues:
1072 for r in self.lazyvalues:
1071 if not r in added:
1073 if not r in added:
1072 yield r
1074 yield r
1073
1075
1074 def add(self, value):
1076 def add(self, value):
1075 self.addedvalues.add(value)
1077 self.addedvalues.add(value)
1076
1078
1077 def update(self, values):
1079 def update(self, values):
1078 self.addedvalues.update(values)
1080 self.addedvalues.update(values)
1079
1081
1080 has = lazyset(self.ancestors(common))
1082 has = lazyset(self.ancestors(common))
1081 has.add(nullrev)
1083 has.add(nullrev)
1082 has.update(common)
1084 has.update(common)
1083
1085
1084 # take all ancestors from heads that aren't in has
1086 # take all ancestors from heads that aren't in has
1085 missing = set()
1087 missing = set()
1086 visit = collections.deque(r for r in heads if r not in has)
1088 visit = collections.deque(r for r in heads if r not in has)
1087 while visit:
1089 while visit:
1088 r = visit.popleft()
1090 r = visit.popleft()
1089 if r in missing:
1091 if r in missing:
1090 continue
1092 continue
1091 else:
1093 else:
1092 missing.add(r)
1094 missing.add(r)
1093 for p in self.parentrevs(r):
1095 for p in self.parentrevs(r):
1094 if p not in has:
1096 if p not in has:
1095 visit.append(p)
1097 visit.append(p)
1096 missing = list(missing)
1098 missing = list(missing)
1097 missing.sort()
1099 missing.sort()
1098 return has, [self.node(miss) for miss in missing]
1100 return has, [self.node(miss) for miss in missing]
1099
1101
1100 def incrementalmissingrevs(self, common=None):
1102 def incrementalmissingrevs(self, common=None):
1101 """Return an object that can be used to incrementally compute the
1103 """Return an object that can be used to incrementally compute the
1102 revision numbers of the ancestors of arbitrary sets that are not
1104 revision numbers of the ancestors of arbitrary sets that are not
1103 ancestors of common. This is an ancestor.incrementalmissingancestors
1105 ancestors of common. This is an ancestor.incrementalmissingancestors
1104 object.
1106 object.
1105
1107
1106 'common' is a list of revision numbers. If common is not supplied, uses
1108 'common' is a list of revision numbers. If common is not supplied, uses
1107 nullrev.
1109 nullrev.
1108 """
1110 """
1109 if common is None:
1111 if common is None:
1110 common = [nullrev]
1112 common = [nullrev]
1111
1113
1112 if rustancestor is not None and self.index.rust_ext_compat:
1114 if rustancestor is not None and self.index.rust_ext_compat:
1113 return rustancestor.MissingAncestors(self.index, common)
1115 return rustancestor.MissingAncestors(self.index, common)
1114 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1116 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1115
1117
1116 def findmissingrevs(self, common=None, heads=None):
1118 def findmissingrevs(self, common=None, heads=None):
1117 """Return the revision numbers of the ancestors of heads that
1119 """Return the revision numbers of the ancestors of heads that
1118 are not ancestors of common.
1120 are not ancestors of common.
1119
1121
1120 More specifically, return a list of revision numbers corresponding to
1122 More specifically, return a list of revision numbers corresponding to
1121 nodes N such that every N satisfies the following constraints:
1123 nodes N such that every N satisfies the following constraints:
1122
1124
1123 1. N is an ancestor of some node in 'heads'
1125 1. N is an ancestor of some node in 'heads'
1124 2. N is not an ancestor of any node in 'common'
1126 2. N is not an ancestor of any node in 'common'
1125
1127
1126 The list is sorted by revision number, meaning it is
1128 The list is sorted by revision number, meaning it is
1127 topologically sorted.
1129 topologically sorted.
1128
1130
1129 'heads' and 'common' are both lists of revision numbers. If heads is
1131 'heads' and 'common' are both lists of revision numbers. If heads is
1130 not supplied, uses all of the revlog's heads. If common is not
1132 not supplied, uses all of the revlog's heads. If common is not
1131 supplied, uses nullid."""
1133 supplied, uses nullid."""
1132 if common is None:
1134 if common is None:
1133 common = [nullrev]
1135 common = [nullrev]
1134 if heads is None:
1136 if heads is None:
1135 heads = self.headrevs()
1137 heads = self.headrevs()
1136
1138
1137 inc = self.incrementalmissingrevs(common=common)
1139 inc = self.incrementalmissingrevs(common=common)
1138 return inc.missingancestors(heads)
1140 return inc.missingancestors(heads)
1139
1141
1140 def findmissing(self, common=None, heads=None):
1142 def findmissing(self, common=None, heads=None):
1141 """Return the ancestors of heads that are not ancestors of common.
1143 """Return the ancestors of heads that are not ancestors of common.
1142
1144
1143 More specifically, return a list of nodes N such that every N
1145 More specifically, return a list of nodes N such that every N
1144 satisfies the following constraints:
1146 satisfies the following constraints:
1145
1147
1146 1. N is an ancestor of some node in 'heads'
1148 1. N is an ancestor of some node in 'heads'
1147 2. N is not an ancestor of any node in 'common'
1149 2. N is not an ancestor of any node in 'common'
1148
1150
1149 The list is sorted by revision number, meaning it is
1151 The list is sorted by revision number, meaning it is
1150 topologically sorted.
1152 topologically sorted.
1151
1153
1152 'heads' and 'common' are both lists of node IDs. If heads is
1154 'heads' and 'common' are both lists of node IDs. If heads is
1153 not supplied, uses all of the revlog's heads. If common is not
1155 not supplied, uses all of the revlog's heads. If common is not
1154 supplied, uses nullid."""
1156 supplied, uses nullid."""
1155 if common is None:
1157 if common is None:
1156 common = [self.nullid]
1158 common = [self.nullid]
1157 if heads is None:
1159 if heads is None:
1158 heads = self.heads()
1160 heads = self.heads()
1159
1161
1160 common = [self.rev(n) for n in common]
1162 common = [self.rev(n) for n in common]
1161 heads = [self.rev(n) for n in heads]
1163 heads = [self.rev(n) for n in heads]
1162
1164
1163 inc = self.incrementalmissingrevs(common=common)
1165 inc = self.incrementalmissingrevs(common=common)
1164 return [self.node(r) for r in inc.missingancestors(heads)]
1166 return [self.node(r) for r in inc.missingancestors(heads)]
1165
1167
1166 def nodesbetween(self, roots=None, heads=None):
1168 def nodesbetween(self, roots=None, heads=None):
1167 """Return a topological path from 'roots' to 'heads'.
1169 """Return a topological path from 'roots' to 'heads'.
1168
1170
1169 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1171 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1170 topologically sorted list of all nodes N that satisfy both of
1172 topologically sorted list of all nodes N that satisfy both of
1171 these constraints:
1173 these constraints:
1172
1174
1173 1. N is a descendant of some node in 'roots'
1175 1. N is a descendant of some node in 'roots'
1174 2. N is an ancestor of some node in 'heads'
1176 2. N is an ancestor of some node in 'heads'
1175
1177
1176 Every node is considered to be both a descendant and an ancestor
1178 Every node is considered to be both a descendant and an ancestor
1177 of itself, so every reachable node in 'roots' and 'heads' will be
1179 of itself, so every reachable node in 'roots' and 'heads' will be
1178 included in 'nodes'.
1180 included in 'nodes'.
1179
1181
1180 'outroots' is the list of reachable nodes in 'roots', i.e., the
1182 'outroots' is the list of reachable nodes in 'roots', i.e., the
1181 subset of 'roots' that is returned in 'nodes'. Likewise,
1183 subset of 'roots' that is returned in 'nodes'. Likewise,
1182 'outheads' is the subset of 'heads' that is also in 'nodes'.
1184 'outheads' is the subset of 'heads' that is also in 'nodes'.
1183
1185
1184 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1186 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1185 unspecified, uses nullid as the only root. If 'heads' is
1187 unspecified, uses nullid as the only root. If 'heads' is
1186 unspecified, uses list of all of the revlog's heads."""
1188 unspecified, uses list of all of the revlog's heads."""
1187 nonodes = ([], [], [])
1189 nonodes = ([], [], [])
1188 if roots is not None:
1190 if roots is not None:
1189 roots = list(roots)
1191 roots = list(roots)
1190 if not roots:
1192 if not roots:
1191 return nonodes
1193 return nonodes
1192 lowestrev = min([self.rev(n) for n in roots])
1194 lowestrev = min([self.rev(n) for n in roots])
1193 else:
1195 else:
1194 roots = [self.nullid] # Everybody's a descendant of nullid
1196 roots = [self.nullid] # Everybody's a descendant of nullid
1195 lowestrev = nullrev
1197 lowestrev = nullrev
1196 if (lowestrev == nullrev) and (heads is None):
1198 if (lowestrev == nullrev) and (heads is None):
1197 # We want _all_ the nodes!
1199 # We want _all_ the nodes!
1198 return (
1200 return (
1199 [self.node(r) for r in self],
1201 [self.node(r) for r in self],
1200 [self.nullid],
1202 [self.nullid],
1201 list(self.heads()),
1203 list(self.heads()),
1202 )
1204 )
1203 if heads is None:
1205 if heads is None:
1204 # All nodes are ancestors, so the latest ancestor is the last
1206 # All nodes are ancestors, so the latest ancestor is the last
1205 # node.
1207 # node.
1206 highestrev = len(self) - 1
1208 highestrev = len(self) - 1
1207 # Set ancestors to None to signal that every node is an ancestor.
1209 # Set ancestors to None to signal that every node is an ancestor.
1208 ancestors = None
1210 ancestors = None
1209 # Set heads to an empty dictionary for later discovery of heads
1211 # Set heads to an empty dictionary for later discovery of heads
1210 heads = {}
1212 heads = {}
1211 else:
1213 else:
1212 heads = list(heads)
1214 heads = list(heads)
1213 if not heads:
1215 if not heads:
1214 return nonodes
1216 return nonodes
1215 ancestors = set()
1217 ancestors = set()
1216 # Turn heads into a dictionary so we can remove 'fake' heads.
1218 # Turn heads into a dictionary so we can remove 'fake' heads.
1217 # Also, later we will be using it to filter out the heads we can't
1219 # Also, later we will be using it to filter out the heads we can't
1218 # find from roots.
1220 # find from roots.
1219 heads = dict.fromkeys(heads, False)
1221 heads = dict.fromkeys(heads, False)
1220 # Start at the top and keep marking parents until we're done.
1222 # Start at the top and keep marking parents until we're done.
1221 nodestotag = set(heads)
1223 nodestotag = set(heads)
1222 # Remember where the top was so we can use it as a limit later.
1224 # Remember where the top was so we can use it as a limit later.
1223 highestrev = max([self.rev(n) for n in nodestotag])
1225 highestrev = max([self.rev(n) for n in nodestotag])
1224 while nodestotag:
1226 while nodestotag:
1225 # grab a node to tag
1227 # grab a node to tag
1226 n = nodestotag.pop()
1228 n = nodestotag.pop()
1227 # Never tag nullid
1229 # Never tag nullid
1228 if n == self.nullid:
1230 if n == self.nullid:
1229 continue
1231 continue
1230 # A node's revision number represents its place in a
1232 # A node's revision number represents its place in a
1231 # topologically sorted list of nodes.
1233 # topologically sorted list of nodes.
1232 r = self.rev(n)
1234 r = self.rev(n)
1233 if r >= lowestrev:
1235 if r >= lowestrev:
1234 if n not in ancestors:
1236 if n not in ancestors:
1235 # If we are possibly a descendant of one of the roots
1237 # If we are possibly a descendant of one of the roots
1236 # and we haven't already been marked as an ancestor
1238 # and we haven't already been marked as an ancestor
1237 ancestors.add(n) # Mark as ancestor
1239 ancestors.add(n) # Mark as ancestor
1238 # Add non-nullid parents to list of nodes to tag.
1240 # Add non-nullid parents to list of nodes to tag.
1239 nodestotag.update(
1241 nodestotag.update(
1240 [p for p in self.parents(n) if p != self.nullid]
1242 [p for p in self.parents(n) if p != self.nullid]
1241 )
1243 )
1242 elif n in heads: # We've seen it before, is it a fake head?
1244 elif n in heads: # We've seen it before, is it a fake head?
1243 # So it is, real heads should not be the ancestors of
1245 # So it is, real heads should not be the ancestors of
1244 # any other heads.
1246 # any other heads.
1245 heads.pop(n)
1247 heads.pop(n)
1246 if not ancestors:
1248 if not ancestors:
1247 return nonodes
1249 return nonodes
1248 # Now that we have our set of ancestors, we want to remove any
1250 # Now that we have our set of ancestors, we want to remove any
1249 # roots that are not ancestors.
1251 # roots that are not ancestors.
1250
1252
1251 # If one of the roots was nullid, everything is included anyway.
1253 # If one of the roots was nullid, everything is included anyway.
1252 if lowestrev > nullrev:
1254 if lowestrev > nullrev:
1253 # But, since we weren't, let's recompute the lowest rev to not
1255 # But, since we weren't, let's recompute the lowest rev to not
1254 # include roots that aren't ancestors.
1256 # include roots that aren't ancestors.
1255
1257
1256 # Filter out roots that aren't ancestors of heads
1258 # Filter out roots that aren't ancestors of heads
1257 roots = [root for root in roots if root in ancestors]
1259 roots = [root for root in roots if root in ancestors]
1258 # Recompute the lowest revision
1260 # Recompute the lowest revision
1259 if roots:
1261 if roots:
1260 lowestrev = min([self.rev(root) for root in roots])
1262 lowestrev = min([self.rev(root) for root in roots])
1261 else:
1263 else:
1262 # No more roots? Return empty list
1264 # No more roots? Return empty list
1263 return nonodes
1265 return nonodes
1264 else:
1266 else:
1265 # We are descending from nullid, and don't need to care about
1267 # We are descending from nullid, and don't need to care about
1266 # any other roots.
1268 # any other roots.
1267 lowestrev = nullrev
1269 lowestrev = nullrev
1268 roots = [self.nullid]
1270 roots = [self.nullid]
1269 # Transform our roots list into a set.
1271 # Transform our roots list into a set.
1270 descendants = set(roots)
1272 descendants = set(roots)
1271 # Also, keep the original roots so we can filter out roots that aren't
1273 # Also, keep the original roots so we can filter out roots that aren't
1272 # 'real' roots (i.e. are descended from other roots).
1274 # 'real' roots (i.e. are descended from other roots).
1273 roots = descendants.copy()
1275 roots = descendants.copy()
1274 # Our topologically sorted list of output nodes.
1276 # Our topologically sorted list of output nodes.
1275 orderedout = []
1277 orderedout = []
1276 # Don't start at nullid since we don't want nullid in our output list,
1278 # Don't start at nullid since we don't want nullid in our output list,
1277 # and if nullid shows up in descendants, empty parents will look like
1279 # and if nullid shows up in descendants, empty parents will look like
1278 # they're descendants.
1280 # they're descendants.
1279 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1281 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1280 n = self.node(r)
1282 n = self.node(r)
1281 isdescendant = False
1283 isdescendant = False
1282 if lowestrev == nullrev: # Everybody is a descendant of nullid
1284 if lowestrev == nullrev: # Everybody is a descendant of nullid
1283 isdescendant = True
1285 isdescendant = True
1284 elif n in descendants:
1286 elif n in descendants:
1285 # n is already a descendant
1287 # n is already a descendant
1286 isdescendant = True
1288 isdescendant = True
1287 # This check only needs to be done here because all the roots
1289 # This check only needs to be done here because all the roots
1288 # will start being marked is descendants before the loop.
1290 # will start being marked is descendants before the loop.
1289 if n in roots:
1291 if n in roots:
1290 # If n was a root, check if it's a 'real' root.
1292 # If n was a root, check if it's a 'real' root.
1291 p = tuple(self.parents(n))
1293 p = tuple(self.parents(n))
1292 # If any of its parents are descendants, it's not a root.
1294 # If any of its parents are descendants, it's not a root.
1293 if (p[0] in descendants) or (p[1] in descendants):
1295 if (p[0] in descendants) or (p[1] in descendants):
1294 roots.remove(n)
1296 roots.remove(n)
1295 else:
1297 else:
1296 p = tuple(self.parents(n))
1298 p = tuple(self.parents(n))
1297 # A node is a descendant if either of its parents are
1299 # A node is a descendant if either of its parents are
1298 # descendants. (We seeded the dependents list with the roots
1300 # descendants. (We seeded the dependents list with the roots
1299 # up there, remember?)
1301 # up there, remember?)
1300 if (p[0] in descendants) or (p[1] in descendants):
1302 if (p[0] in descendants) or (p[1] in descendants):
1301 descendants.add(n)
1303 descendants.add(n)
1302 isdescendant = True
1304 isdescendant = True
1303 if isdescendant and ((ancestors is None) or (n in ancestors)):
1305 if isdescendant and ((ancestors is None) or (n in ancestors)):
1304 # Only include nodes that are both descendants and ancestors.
1306 # Only include nodes that are both descendants and ancestors.
1305 orderedout.append(n)
1307 orderedout.append(n)
1306 if (ancestors is not None) and (n in heads):
1308 if (ancestors is not None) and (n in heads):
1307 # We're trying to figure out which heads are reachable
1309 # We're trying to figure out which heads are reachable
1308 # from roots.
1310 # from roots.
1309 # Mark this head as having been reached
1311 # Mark this head as having been reached
1310 heads[n] = True
1312 heads[n] = True
1311 elif ancestors is None:
1313 elif ancestors is None:
1312 # Otherwise, we're trying to discover the heads.
1314 # Otherwise, we're trying to discover the heads.
1313 # Assume this is a head because if it isn't, the next step
1315 # Assume this is a head because if it isn't, the next step
1314 # will eventually remove it.
1316 # will eventually remove it.
1315 heads[n] = True
1317 heads[n] = True
1316 # But, obviously its parents aren't.
1318 # But, obviously its parents aren't.
1317 for p in self.parents(n):
1319 for p in self.parents(n):
1318 heads.pop(p, None)
1320 heads.pop(p, None)
1319 heads = [head for head, flag in heads.items() if flag]
1321 heads = [head for head, flag in heads.items() if flag]
1320 roots = list(roots)
1322 roots = list(roots)
1321 assert orderedout
1323 assert orderedout
1322 assert roots
1324 assert roots
1323 assert heads
1325 assert heads
1324 return (orderedout, roots, heads)
1326 return (orderedout, roots, heads)
1325
1327
1326 def headrevs(self, revs=None):
1328 def headrevs(self, revs=None):
1327 if revs is None:
1329 if revs is None:
1328 try:
1330 try:
1329 return self.index.headrevs()
1331 return self.index.headrevs()
1330 except AttributeError:
1332 except AttributeError:
1331 return self._headrevs()
1333 return self._headrevs()
1332 if rustdagop is not None and self.index.rust_ext_compat:
1334 if rustdagop is not None and self.index.rust_ext_compat:
1333 return rustdagop.headrevs(self.index, revs)
1335 return rustdagop.headrevs(self.index, revs)
1334 return dagop.headrevs(revs, self._uncheckedparentrevs)
1336 return dagop.headrevs(revs, self._uncheckedparentrevs)
1335
1337
1336 def computephases(self, roots):
1338 def computephases(self, roots):
1337 return self.index.computephasesmapsets(roots)
1339 return self.index.computephasesmapsets(roots)
1338
1340
1339 def _headrevs(self):
1341 def _headrevs(self):
1340 count = len(self)
1342 count = len(self)
1341 if not count:
1343 if not count:
1342 return [nullrev]
1344 return [nullrev]
1343 # we won't iter over filtered rev so nobody is a head at start
1345 # we won't iter over filtered rev so nobody is a head at start
1344 ishead = [0] * (count + 1)
1346 ishead = [0] * (count + 1)
1345 index = self.index
1347 index = self.index
1346 for r in self:
1348 for r in self:
1347 ishead[r] = 1 # I may be an head
1349 ishead[r] = 1 # I may be an head
1348 e = index[r]
1350 e = index[r]
1349 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1351 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1350 return [r for r, val in enumerate(ishead) if val]
1352 return [r for r, val in enumerate(ishead) if val]
1351
1353
1352 def heads(self, start=None, stop=None):
1354 def heads(self, start=None, stop=None):
1353 """return the list of all nodes that have no children
1355 """return the list of all nodes that have no children
1354
1356
1355 if start is specified, only heads that are descendants of
1357 if start is specified, only heads that are descendants of
1356 start will be returned
1358 start will be returned
1357 if stop is specified, it will consider all the revs from stop
1359 if stop is specified, it will consider all the revs from stop
1358 as if they had no children
1360 as if they had no children
1359 """
1361 """
1360 if start is None and stop is None:
1362 if start is None and stop is None:
1361 if not len(self):
1363 if not len(self):
1362 return [self.nullid]
1364 return [self.nullid]
1363 return [self.node(r) for r in self.headrevs()]
1365 return [self.node(r) for r in self.headrevs()]
1364
1366
1365 if start is None:
1367 if start is None:
1366 start = nullrev
1368 start = nullrev
1367 else:
1369 else:
1368 start = self.rev(start)
1370 start = self.rev(start)
1369
1371
1370 stoprevs = {self.rev(n) for n in stop or []}
1372 stoprevs = {self.rev(n) for n in stop or []}
1371
1373
1372 revs = dagop.headrevssubset(
1374 revs = dagop.headrevssubset(
1373 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1375 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1374 )
1376 )
1375
1377
1376 return [self.node(rev) for rev in revs]
1378 return [self.node(rev) for rev in revs]
1377
1379
1378 def children(self, node):
1380 def children(self, node):
1379 """find the children of a given node"""
1381 """find the children of a given node"""
1380 c = []
1382 c = []
1381 p = self.rev(node)
1383 p = self.rev(node)
1382 for r in self.revs(start=p + 1):
1384 for r in self.revs(start=p + 1):
1383 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1385 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1384 if prevs:
1386 if prevs:
1385 for pr in prevs:
1387 for pr in prevs:
1386 if pr == p:
1388 if pr == p:
1387 c.append(self.node(r))
1389 c.append(self.node(r))
1388 elif p == nullrev:
1390 elif p == nullrev:
1389 c.append(self.node(r))
1391 c.append(self.node(r))
1390 return c
1392 return c
1391
1393
1392 def commonancestorsheads(self, a, b):
1394 def commonancestorsheads(self, a, b):
1393 """calculate all the heads of the common ancestors of nodes a and b"""
1395 """calculate all the heads of the common ancestors of nodes a and b"""
1394 a, b = self.rev(a), self.rev(b)
1396 a, b = self.rev(a), self.rev(b)
1395 ancs = self._commonancestorsheads(a, b)
1397 ancs = self._commonancestorsheads(a, b)
1396 return pycompat.maplist(self.node, ancs)
1398 return pycompat.maplist(self.node, ancs)
1397
1399
1398 def _commonancestorsheads(self, *revs):
1400 def _commonancestorsheads(self, *revs):
1399 """calculate all the heads of the common ancestors of revs"""
1401 """calculate all the heads of the common ancestors of revs"""
1400 try:
1402 try:
1401 ancs = self.index.commonancestorsheads(*revs)
1403 ancs = self.index.commonancestorsheads(*revs)
1402 except (AttributeError, OverflowError): # C implementation failed
1404 except (AttributeError, OverflowError): # C implementation failed
1403 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1405 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1404 return ancs
1406 return ancs
1405
1407
1406 def isancestor(self, a, b):
1408 def isancestor(self, a, b):
1407 """return True if node a is an ancestor of node b
1409 """return True if node a is an ancestor of node b
1408
1410
1409 A revision is considered an ancestor of itself."""
1411 A revision is considered an ancestor of itself."""
1410 a, b = self.rev(a), self.rev(b)
1412 a, b = self.rev(a), self.rev(b)
1411 return self.isancestorrev(a, b)
1413 return self.isancestorrev(a, b)
1412
1414
1413 def isancestorrev(self, a, b):
1415 def isancestorrev(self, a, b):
1414 """return True if revision a is an ancestor of revision b
1416 """return True if revision a is an ancestor of revision b
1415
1417
1416 A revision is considered an ancestor of itself.
1418 A revision is considered an ancestor of itself.
1417
1419
1418 The implementation of this is trivial but the use of
1420 The implementation of this is trivial but the use of
1419 reachableroots is not."""
1421 reachableroots is not."""
1420 if a == nullrev:
1422 if a == nullrev:
1421 return True
1423 return True
1422 elif a == b:
1424 elif a == b:
1423 return True
1425 return True
1424 elif a > b:
1426 elif a > b:
1425 return False
1427 return False
1426 return bool(self.reachableroots(a, [b], [a], includepath=False))
1428 return bool(self.reachableroots(a, [b], [a], includepath=False))
1427
1429
1428 def reachableroots(self, minroot, heads, roots, includepath=False):
1430 def reachableroots(self, minroot, heads, roots, includepath=False):
1429 """return (heads(::(<roots> and <roots>::<heads>)))
1431 """return (heads(::(<roots> and <roots>::<heads>)))
1430
1432
1431 If includepath is True, return (<roots>::<heads>)."""
1433 If includepath is True, return (<roots>::<heads>)."""
1432 try:
1434 try:
1433 return self.index.reachableroots2(
1435 return self.index.reachableroots2(
1434 minroot, heads, roots, includepath
1436 minroot, heads, roots, includepath
1435 )
1437 )
1436 except AttributeError:
1438 except AttributeError:
1437 return dagop._reachablerootspure(
1439 return dagop._reachablerootspure(
1438 self.parentrevs, minroot, roots, heads, includepath
1440 self.parentrevs, minroot, roots, heads, includepath
1439 )
1441 )
1440
1442
1441 def ancestor(self, a, b):
1443 def ancestor(self, a, b):
1442 """calculate the "best" common ancestor of nodes a and b"""
1444 """calculate the "best" common ancestor of nodes a and b"""
1443
1445
1444 a, b = self.rev(a), self.rev(b)
1446 a, b = self.rev(a), self.rev(b)
1445 try:
1447 try:
1446 ancs = self.index.ancestors(a, b)
1448 ancs = self.index.ancestors(a, b)
1447 except (AttributeError, OverflowError):
1449 except (AttributeError, OverflowError):
1448 ancs = ancestor.ancestors(self.parentrevs, a, b)
1450 ancs = ancestor.ancestors(self.parentrevs, a, b)
1449 if ancs:
1451 if ancs:
1450 # choose a consistent winner when there's a tie
1452 # choose a consistent winner when there's a tie
1451 return min(map(self.node, ancs))
1453 return min(map(self.node, ancs))
1452 return self.nullid
1454 return self.nullid
1453
1455
1454 def _match(self, id):
1456 def _match(self, id):
1455 if isinstance(id, int):
1457 if isinstance(id, int):
1456 # rev
1458 # rev
1457 return self.node(id)
1459 return self.node(id)
1458 if len(id) == self.nodeconstants.nodelen:
1460 if len(id) == self.nodeconstants.nodelen:
1459 # possibly a binary node
1461 # possibly a binary node
1460 # odds of a binary node being all hex in ASCII are 1 in 10**25
1462 # odds of a binary node being all hex in ASCII are 1 in 10**25
1461 try:
1463 try:
1462 node = id
1464 node = id
1463 self.rev(node) # quick search the index
1465 self.rev(node) # quick search the index
1464 return node
1466 return node
1465 except error.LookupError:
1467 except error.LookupError:
1466 pass # may be partial hex id
1468 pass # may be partial hex id
1467 try:
1469 try:
1468 # str(rev)
1470 # str(rev)
1469 rev = int(id)
1471 rev = int(id)
1470 if b"%d" % rev != id:
1472 if b"%d" % rev != id:
1471 raise ValueError
1473 raise ValueError
1472 if rev < 0:
1474 if rev < 0:
1473 rev = len(self) + rev
1475 rev = len(self) + rev
1474 if rev < 0 or rev >= len(self):
1476 if rev < 0 or rev >= len(self):
1475 raise ValueError
1477 raise ValueError
1476 return self.node(rev)
1478 return self.node(rev)
1477 except (ValueError, OverflowError):
1479 except (ValueError, OverflowError):
1478 pass
1480 pass
1479 if len(id) == 2 * self.nodeconstants.nodelen:
1481 if len(id) == 2 * self.nodeconstants.nodelen:
1480 try:
1482 try:
1481 # a full hex nodeid?
1483 # a full hex nodeid?
1482 node = bin(id)
1484 node = bin(id)
1483 self.rev(node)
1485 self.rev(node)
1484 return node
1486 return node
1485 except (binascii.Error, error.LookupError):
1487 except (binascii.Error, error.LookupError):
1486 pass
1488 pass
1487
1489
1488 def _partialmatch(self, id):
1490 def _partialmatch(self, id):
1489 # we don't care wdirfilenodeids as they should be always full hash
1491 # we don't care wdirfilenodeids as they should be always full hash
1490 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1492 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1491 ambiguous = False
1493 ambiguous = False
1492 try:
1494 try:
1493 partial = self.index.partialmatch(id)
1495 partial = self.index.partialmatch(id)
1494 if partial and self.hasnode(partial):
1496 if partial and self.hasnode(partial):
1495 if maybewdir:
1497 if maybewdir:
1496 # single 'ff...' match in radix tree, ambiguous with wdir
1498 # single 'ff...' match in radix tree, ambiguous with wdir
1497 ambiguous = True
1499 ambiguous = True
1498 else:
1500 else:
1499 return partial
1501 return partial
1500 elif maybewdir:
1502 elif maybewdir:
1501 # no 'ff...' match in radix tree, wdir identified
1503 # no 'ff...' match in radix tree, wdir identified
1502 raise error.WdirUnsupported
1504 raise error.WdirUnsupported
1503 else:
1505 else:
1504 return None
1506 return None
1505 except error.RevlogError:
1507 except error.RevlogError:
1506 # parsers.c radix tree lookup gave multiple matches
1508 # parsers.c radix tree lookup gave multiple matches
1507 # fast path: for unfiltered changelog, radix tree is accurate
1509 # fast path: for unfiltered changelog, radix tree is accurate
1508 if not getattr(self, 'filteredrevs', None):
1510 if not getattr(self, 'filteredrevs', None):
1509 ambiguous = True
1511 ambiguous = True
1510 # fall through to slow path that filters hidden revisions
1512 # fall through to slow path that filters hidden revisions
1511 except (AttributeError, ValueError):
1513 except (AttributeError, ValueError):
1512 # we are pure python, or key was too short to search radix tree
1514 # we are pure python, or key is not hex
1513 pass
1515 pass
1514 if ambiguous:
1516 if ambiguous:
1515 raise error.AmbiguousPrefixLookupError(
1517 raise error.AmbiguousPrefixLookupError(
1516 id, self.display_id, _(b'ambiguous identifier')
1518 id, self.display_id, _(b'ambiguous identifier')
1517 )
1519 )
1518
1520
1519 if id in self._pcache:
1521 if id in self._pcache:
1520 return self._pcache[id]
1522 return self._pcache[id]
1521
1523
1522 if len(id) <= 40:
1524 if len(id) <= 40:
1523 # hex(node)[:...]
1525 # hex(node)[:...]
1524 l = len(id) // 2 * 2 # grab an even number of digits
1526 l = len(id) // 2 * 2 # grab an even number of digits
1525 try:
1527 try:
1528 # we're dropping the last digit, so let's check that it's hex,
1529 # to avoid the expensive computation below if it's not
1530 if len(id) % 2 > 0:
1531 if not (id[-1] in hexdigits):
1532 return None
1526 prefix = bin(id[:l])
1533 prefix = bin(id[:l])
1527 except binascii.Error:
1534 except binascii.Error:
1528 pass
1535 pass
1529 else:
1536 else:
1530 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1537 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1531 nl = [
1538 nl = [
1532 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1539 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1533 ]
1540 ]
1534 if self.nodeconstants.nullhex.startswith(id):
1541 if self.nodeconstants.nullhex.startswith(id):
1535 nl.append(self.nullid)
1542 nl.append(self.nullid)
1536 if len(nl) > 0:
1543 if len(nl) > 0:
1537 if len(nl) == 1 and not maybewdir:
1544 if len(nl) == 1 and not maybewdir:
1538 self._pcache[id] = nl[0]
1545 self._pcache[id] = nl[0]
1539 return nl[0]
1546 return nl[0]
1540 raise error.AmbiguousPrefixLookupError(
1547 raise error.AmbiguousPrefixLookupError(
1541 id, self.display_id, _(b'ambiguous identifier')
1548 id, self.display_id, _(b'ambiguous identifier')
1542 )
1549 )
1543 if maybewdir:
1550 if maybewdir:
1544 raise error.WdirUnsupported
1551 raise error.WdirUnsupported
1545 return None
1552 return None
1546
1553
1547 def lookup(self, id):
1554 def lookup(self, id):
1548 """locate a node based on:
1555 """locate a node based on:
1549 - revision number or str(revision number)
1556 - revision number or str(revision number)
1550 - nodeid or subset of hex nodeid
1557 - nodeid or subset of hex nodeid
1551 """
1558 """
1552 n = self._match(id)
1559 n = self._match(id)
1553 if n is not None:
1560 if n is not None:
1554 return n
1561 return n
1555 n = self._partialmatch(id)
1562 n = self._partialmatch(id)
1556 if n:
1563 if n:
1557 return n
1564 return n
1558
1565
1559 raise error.LookupError(id, self.display_id, _(b'no match found'))
1566 raise error.LookupError(id, self.display_id, _(b'no match found'))
1560
1567
1561 def shortest(self, node, minlength=1):
1568 def shortest(self, node, minlength=1):
1562 """Find the shortest unambiguous prefix that matches node."""
1569 """Find the shortest unambiguous prefix that matches node."""
1563
1570
1564 def isvalid(prefix):
1571 def isvalid(prefix):
1565 try:
1572 try:
1566 matchednode = self._partialmatch(prefix)
1573 matchednode = self._partialmatch(prefix)
1567 except error.AmbiguousPrefixLookupError:
1574 except error.AmbiguousPrefixLookupError:
1568 return False
1575 return False
1569 except error.WdirUnsupported:
1576 except error.WdirUnsupported:
1570 # single 'ff...' match
1577 # single 'ff...' match
1571 return True
1578 return True
1572 if matchednode is None:
1579 if matchednode is None:
1573 raise error.LookupError(node, self.display_id, _(b'no node'))
1580 raise error.LookupError(node, self.display_id, _(b'no node'))
1574 return True
1581 return True
1575
1582
1576 def maybewdir(prefix):
1583 def maybewdir(prefix):
1577 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1584 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1578
1585
1579 hexnode = hex(node)
1586 hexnode = hex(node)
1580
1587
1581 def disambiguate(hexnode, minlength):
1588 def disambiguate(hexnode, minlength):
1582 """Disambiguate against wdirid."""
1589 """Disambiguate against wdirid."""
1583 for length in range(minlength, len(hexnode) + 1):
1590 for length in range(minlength, len(hexnode) + 1):
1584 prefix = hexnode[:length]
1591 prefix = hexnode[:length]
1585 if not maybewdir(prefix):
1592 if not maybewdir(prefix):
1586 return prefix
1593 return prefix
1587
1594
1588 if not getattr(self, 'filteredrevs', None):
1595 if not getattr(self, 'filteredrevs', None):
1589 try:
1596 try:
1590 length = max(self.index.shortest(node), minlength)
1597 length = max(self.index.shortest(node), minlength)
1591 return disambiguate(hexnode, length)
1598 return disambiguate(hexnode, length)
1592 except error.RevlogError:
1599 except error.RevlogError:
1593 if node != self.nodeconstants.wdirid:
1600 if node != self.nodeconstants.wdirid:
1594 raise error.LookupError(
1601 raise error.LookupError(
1595 node, self.display_id, _(b'no node')
1602 node, self.display_id, _(b'no node')
1596 )
1603 )
1597 except AttributeError:
1604 except AttributeError:
1598 # Fall through to pure code
1605 # Fall through to pure code
1599 pass
1606 pass
1600
1607
1601 if node == self.nodeconstants.wdirid:
1608 if node == self.nodeconstants.wdirid:
1602 for length in range(minlength, len(hexnode) + 1):
1609 for length in range(minlength, len(hexnode) + 1):
1603 prefix = hexnode[:length]
1610 prefix = hexnode[:length]
1604 if isvalid(prefix):
1611 if isvalid(prefix):
1605 return prefix
1612 return prefix
1606
1613
1607 for length in range(minlength, len(hexnode) + 1):
1614 for length in range(minlength, len(hexnode) + 1):
1608 prefix = hexnode[:length]
1615 prefix = hexnode[:length]
1609 if isvalid(prefix):
1616 if isvalid(prefix):
1610 return disambiguate(hexnode, length)
1617 return disambiguate(hexnode, length)
1611
1618
1612 def cmp(self, node, text):
1619 def cmp(self, node, text):
1613 """compare text with a given file revision
1620 """compare text with a given file revision
1614
1621
1615 returns True if text is different than what is stored.
1622 returns True if text is different than what is stored.
1616 """
1623 """
1617 p1, p2 = self.parents(node)
1624 p1, p2 = self.parents(node)
1618 return storageutil.hashrevisionsha1(text, p1, p2) != node
1625 return storageutil.hashrevisionsha1(text, p1, p2) != node
1619
1626
1620 def _getsegmentforrevs(self, startrev, endrev, df=None):
1627 def _getsegmentforrevs(self, startrev, endrev, df=None):
1621 """Obtain a segment of raw data corresponding to a range of revisions.
1628 """Obtain a segment of raw data corresponding to a range of revisions.
1622
1629
1623 Accepts the start and end revisions and an optional already-open
1630 Accepts the start and end revisions and an optional already-open
1624 file handle to be used for reading. If the file handle is read, its
1631 file handle to be used for reading. If the file handle is read, its
1625 seek position will not be preserved.
1632 seek position will not be preserved.
1626
1633
1627 Requests for data may be satisfied by a cache.
1634 Requests for data may be satisfied by a cache.
1628
1635
1629 Returns a 2-tuple of (offset, data) for the requested range of
1636 Returns a 2-tuple of (offset, data) for the requested range of
1630 revisions. Offset is the integer offset from the beginning of the
1637 revisions. Offset is the integer offset from the beginning of the
1631 revlog and data is a str or buffer of the raw byte data.
1638 revlog and data is a str or buffer of the raw byte data.
1632
1639
1633 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1640 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1634 to determine where each revision's data begins and ends.
1641 to determine where each revision's data begins and ends.
1635 """
1642 """
1636 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1643 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1637 # (functions are expensive).
1644 # (functions are expensive).
1638 index = self.index
1645 index = self.index
1639 istart = index[startrev]
1646 istart = index[startrev]
1640 start = int(istart[0] >> 16)
1647 start = int(istart[0] >> 16)
1641 if startrev == endrev:
1648 if startrev == endrev:
1642 end = start + istart[1]
1649 end = start + istart[1]
1643 else:
1650 else:
1644 iend = index[endrev]
1651 iend = index[endrev]
1645 end = int(iend[0] >> 16) + iend[1]
1652 end = int(iend[0] >> 16) + iend[1]
1646
1653
1647 if self._inline:
1654 if self._inline:
1648 start += (startrev + 1) * self.index.entry_size
1655 start += (startrev + 1) * self.index.entry_size
1649 end += (endrev + 1) * self.index.entry_size
1656 end += (endrev + 1) * self.index.entry_size
1650 length = end - start
1657 length = end - start
1651
1658
1652 return start, self._segmentfile.read_chunk(start, length, df)
1659 return start, self._segmentfile.read_chunk(start, length, df)
1653
1660
1654 def _chunk(self, rev, df=None):
1661 def _chunk(self, rev, df=None):
1655 """Obtain a single decompressed chunk for a revision.
1662 """Obtain a single decompressed chunk for a revision.
1656
1663
1657 Accepts an integer revision and an optional already-open file handle
1664 Accepts an integer revision and an optional already-open file handle
1658 to be used for reading. If used, the seek position of the file will not
1665 to be used for reading. If used, the seek position of the file will not
1659 be preserved.
1666 be preserved.
1660
1667
1661 Returns a str holding uncompressed data for the requested revision.
1668 Returns a str holding uncompressed data for the requested revision.
1662 """
1669 """
1663 compression_mode = self.index[rev][10]
1670 compression_mode = self.index[rev][10]
1664 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1671 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1665 if compression_mode == COMP_MODE_PLAIN:
1672 if compression_mode == COMP_MODE_PLAIN:
1666 return data
1673 return data
1667 elif compression_mode == COMP_MODE_DEFAULT:
1674 elif compression_mode == COMP_MODE_DEFAULT:
1668 return self._decompressor(data)
1675 return self._decompressor(data)
1669 elif compression_mode == COMP_MODE_INLINE:
1676 elif compression_mode == COMP_MODE_INLINE:
1670 return self.decompress(data)
1677 return self.decompress(data)
1671 else:
1678 else:
1672 msg = b'unknown compression mode %d'
1679 msg = b'unknown compression mode %d'
1673 msg %= compression_mode
1680 msg %= compression_mode
1674 raise error.RevlogError(msg)
1681 raise error.RevlogError(msg)
1675
1682
1676 def _chunks(self, revs, df=None, targetsize=None):
1683 def _chunks(self, revs, df=None, targetsize=None):
1677 """Obtain decompressed chunks for the specified revisions.
1684 """Obtain decompressed chunks for the specified revisions.
1678
1685
1679 Accepts an iterable of numeric revisions that are assumed to be in
1686 Accepts an iterable of numeric revisions that are assumed to be in
1680 ascending order. Also accepts an optional already-open file handle
1687 ascending order. Also accepts an optional already-open file handle
1681 to be used for reading. If used, the seek position of the file will
1688 to be used for reading. If used, the seek position of the file will
1682 not be preserved.
1689 not be preserved.
1683
1690
1684 This function is similar to calling ``self._chunk()`` multiple times,
1691 This function is similar to calling ``self._chunk()`` multiple times,
1685 but is faster.
1692 but is faster.
1686
1693
1687 Returns a list with decompressed data for each requested revision.
1694 Returns a list with decompressed data for each requested revision.
1688 """
1695 """
1689 if not revs:
1696 if not revs:
1690 return []
1697 return []
1691 start = self.start
1698 start = self.start
1692 length = self.length
1699 length = self.length
1693 inline = self._inline
1700 inline = self._inline
1694 iosize = self.index.entry_size
1701 iosize = self.index.entry_size
1695 buffer = util.buffer
1702 buffer = util.buffer
1696
1703
1697 l = []
1704 l = []
1698 ladd = l.append
1705 ladd = l.append
1699
1706
1700 if not self._withsparseread:
1707 if not self._withsparseread:
1701 slicedchunks = (revs,)
1708 slicedchunks = (revs,)
1702 else:
1709 else:
1703 slicedchunks = deltautil.slicechunk(
1710 slicedchunks = deltautil.slicechunk(
1704 self, revs, targetsize=targetsize
1711 self, revs, targetsize=targetsize
1705 )
1712 )
1706
1713
1707 for revschunk in slicedchunks:
1714 for revschunk in slicedchunks:
1708 firstrev = revschunk[0]
1715 firstrev = revschunk[0]
1709 # Skip trailing revisions with empty diff
1716 # Skip trailing revisions with empty diff
1710 for lastrev in revschunk[::-1]:
1717 for lastrev in revschunk[::-1]:
1711 if length(lastrev) != 0:
1718 if length(lastrev) != 0:
1712 break
1719 break
1713
1720
1714 try:
1721 try:
1715 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1722 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1716 except OverflowError:
1723 except OverflowError:
1717 # issue4215 - we can't cache a run of chunks greater than
1724 # issue4215 - we can't cache a run of chunks greater than
1718 # 2G on Windows
1725 # 2G on Windows
1719 return [self._chunk(rev, df=df) for rev in revschunk]
1726 return [self._chunk(rev, df=df) for rev in revschunk]
1720
1727
1721 decomp = self.decompress
1728 decomp = self.decompress
1722 # self._decompressor might be None, but will not be used in that case
1729 # self._decompressor might be None, but will not be used in that case
1723 def_decomp = self._decompressor
1730 def_decomp = self._decompressor
1724 for rev in revschunk:
1731 for rev in revschunk:
1725 chunkstart = start(rev)
1732 chunkstart = start(rev)
1726 if inline:
1733 if inline:
1727 chunkstart += (rev + 1) * iosize
1734 chunkstart += (rev + 1) * iosize
1728 chunklength = length(rev)
1735 chunklength = length(rev)
1729 comp_mode = self.index[rev][10]
1736 comp_mode = self.index[rev][10]
1730 c = buffer(data, chunkstart - offset, chunklength)
1737 c = buffer(data, chunkstart - offset, chunklength)
1731 if comp_mode == COMP_MODE_PLAIN:
1738 if comp_mode == COMP_MODE_PLAIN:
1732 ladd(c)
1739 ladd(c)
1733 elif comp_mode == COMP_MODE_INLINE:
1740 elif comp_mode == COMP_MODE_INLINE:
1734 ladd(decomp(c))
1741 ladd(decomp(c))
1735 elif comp_mode == COMP_MODE_DEFAULT:
1742 elif comp_mode == COMP_MODE_DEFAULT:
1736 ladd(def_decomp(c))
1743 ladd(def_decomp(c))
1737 else:
1744 else:
1738 msg = b'unknown compression mode %d'
1745 msg = b'unknown compression mode %d'
1739 msg %= comp_mode
1746 msg %= comp_mode
1740 raise error.RevlogError(msg)
1747 raise error.RevlogError(msg)
1741
1748
1742 return l
1749 return l
1743
1750
1744 def deltaparent(self, rev):
1751 def deltaparent(self, rev):
1745 """return deltaparent of the given revision"""
1752 """return deltaparent of the given revision"""
1746 base = self.index[rev][3]
1753 base = self.index[rev][3]
1747 if base == rev:
1754 if base == rev:
1748 return nullrev
1755 return nullrev
1749 elif self._generaldelta:
1756 elif self._generaldelta:
1750 return base
1757 return base
1751 else:
1758 else:
1752 return rev - 1
1759 return rev - 1
1753
1760
1754 def issnapshot(self, rev):
1761 def issnapshot(self, rev):
1755 """tells whether rev is a snapshot"""
1762 """tells whether rev is a snapshot"""
1756 if not self._sparserevlog:
1763 if not self._sparserevlog:
1757 return self.deltaparent(rev) == nullrev
1764 return self.deltaparent(rev) == nullrev
1758 elif util.safehasattr(self.index, b'issnapshot'):
1765 elif util.safehasattr(self.index, b'issnapshot'):
1759 # directly assign the method to cache the testing and access
1766 # directly assign the method to cache the testing and access
1760 self.issnapshot = self.index.issnapshot
1767 self.issnapshot = self.index.issnapshot
1761 return self.issnapshot(rev)
1768 return self.issnapshot(rev)
1762 if rev == nullrev:
1769 if rev == nullrev:
1763 return True
1770 return True
1764 entry = self.index[rev]
1771 entry = self.index[rev]
1765 base = entry[3]
1772 base = entry[3]
1766 if base == rev:
1773 if base == rev:
1767 return True
1774 return True
1768 if base == nullrev:
1775 if base == nullrev:
1769 return True
1776 return True
1770 p1 = entry[5]
1777 p1 = entry[5]
1771 p2 = entry[6]
1778 p2 = entry[6]
1772 if base == p1 or base == p2:
1779 if base == p1 or base == p2:
1773 return False
1780 return False
1774 return self.issnapshot(base)
1781 return self.issnapshot(base)
1775
1782
1776 def snapshotdepth(self, rev):
1783 def snapshotdepth(self, rev):
1777 """number of snapshot in the chain before this one"""
1784 """number of snapshot in the chain before this one"""
1778 if not self.issnapshot(rev):
1785 if not self.issnapshot(rev):
1779 raise error.ProgrammingError(b'revision %d not a snapshot')
1786 raise error.ProgrammingError(b'revision %d not a snapshot')
1780 return len(self._deltachain(rev)[0]) - 1
1787 return len(self._deltachain(rev)[0]) - 1
1781
1788
1782 def revdiff(self, rev1, rev2):
1789 def revdiff(self, rev1, rev2):
1783 """return or calculate a delta between two revisions
1790 """return or calculate a delta between two revisions
1784
1791
1785 The delta calculated is in binary form and is intended to be written to
1792 The delta calculated is in binary form and is intended to be written to
1786 revlog data directly. So this function needs raw revision data.
1793 revlog data directly. So this function needs raw revision data.
1787 """
1794 """
1788 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1795 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1789 return bytes(self._chunk(rev2))
1796 return bytes(self._chunk(rev2))
1790
1797
1791 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1798 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1792
1799
1793 def revision(self, nodeorrev, _df=None):
1800 def revision(self, nodeorrev, _df=None):
1794 """return an uncompressed revision of a given node or revision
1801 """return an uncompressed revision of a given node or revision
1795 number.
1802 number.
1796
1803
1797 _df - an existing file handle to read from. (internal-only)
1804 _df - an existing file handle to read from. (internal-only)
1798 """
1805 """
1799 return self._revisiondata(nodeorrev, _df)
1806 return self._revisiondata(nodeorrev, _df)
1800
1807
1801 def sidedata(self, nodeorrev, _df=None):
1808 def sidedata(self, nodeorrev, _df=None):
1802 """a map of extra data related to the changeset but not part of the hash
1809 """a map of extra data related to the changeset but not part of the hash
1803
1810
1804 This function currently return a dictionary. However, more advanced
1811 This function currently return a dictionary. However, more advanced
1805 mapping object will likely be used in the future for a more
1812 mapping object will likely be used in the future for a more
1806 efficient/lazy code.
1813 efficient/lazy code.
1807 """
1814 """
1808 # deal with <nodeorrev> argument type
1815 # deal with <nodeorrev> argument type
1809 if isinstance(nodeorrev, int):
1816 if isinstance(nodeorrev, int):
1810 rev = nodeorrev
1817 rev = nodeorrev
1811 else:
1818 else:
1812 rev = self.rev(nodeorrev)
1819 rev = self.rev(nodeorrev)
1813 return self._sidedata(rev)
1820 return self._sidedata(rev)
1814
1821
1815 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1822 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1816 # deal with <nodeorrev> argument type
1823 # deal with <nodeorrev> argument type
1817 if isinstance(nodeorrev, int):
1824 if isinstance(nodeorrev, int):
1818 rev = nodeorrev
1825 rev = nodeorrev
1819 node = self.node(rev)
1826 node = self.node(rev)
1820 else:
1827 else:
1821 node = nodeorrev
1828 node = nodeorrev
1822 rev = None
1829 rev = None
1823
1830
1824 # fast path the special `nullid` rev
1831 # fast path the special `nullid` rev
1825 if node == self.nullid:
1832 if node == self.nullid:
1826 return b""
1833 return b""
1827
1834
1828 # ``rawtext`` is the text as stored inside the revlog. Might be the
1835 # ``rawtext`` is the text as stored inside the revlog. Might be the
1829 # revision or might need to be processed to retrieve the revision.
1836 # revision or might need to be processed to retrieve the revision.
1830 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1837 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1831
1838
1832 if raw and validated:
1839 if raw and validated:
1833 # if we don't want to process the raw text and that raw
1840 # if we don't want to process the raw text and that raw
1834 # text is cached, we can exit early.
1841 # text is cached, we can exit early.
1835 return rawtext
1842 return rawtext
1836 if rev is None:
1843 if rev is None:
1837 rev = self.rev(node)
1844 rev = self.rev(node)
1838 # the revlog's flag for this revision
1845 # the revlog's flag for this revision
1839 # (usually alter its state or content)
1846 # (usually alter its state or content)
1840 flags = self.flags(rev)
1847 flags = self.flags(rev)
1841
1848
1842 if validated and flags == REVIDX_DEFAULT_FLAGS:
1849 if validated and flags == REVIDX_DEFAULT_FLAGS:
1843 # no extra flags set, no flag processor runs, text = rawtext
1850 # no extra flags set, no flag processor runs, text = rawtext
1844 return rawtext
1851 return rawtext
1845
1852
1846 if raw:
1853 if raw:
1847 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1854 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1848 text = rawtext
1855 text = rawtext
1849 else:
1856 else:
1850 r = flagutil.processflagsread(self, rawtext, flags)
1857 r = flagutil.processflagsread(self, rawtext, flags)
1851 text, validatehash = r
1858 text, validatehash = r
1852 if validatehash:
1859 if validatehash:
1853 self.checkhash(text, node, rev=rev)
1860 self.checkhash(text, node, rev=rev)
1854 if not validated:
1861 if not validated:
1855 self._revisioncache = (node, rev, rawtext)
1862 self._revisioncache = (node, rev, rawtext)
1856
1863
1857 return text
1864 return text
1858
1865
1859 def _rawtext(self, node, rev, _df=None):
1866 def _rawtext(self, node, rev, _df=None):
1860 """return the possibly unvalidated rawtext for a revision
1867 """return the possibly unvalidated rawtext for a revision
1861
1868
1862 returns (rev, rawtext, validated)
1869 returns (rev, rawtext, validated)
1863 """
1870 """
1864
1871
1865 # revision in the cache (could be useful to apply delta)
1872 # revision in the cache (could be useful to apply delta)
1866 cachedrev = None
1873 cachedrev = None
1867 # An intermediate text to apply deltas to
1874 # An intermediate text to apply deltas to
1868 basetext = None
1875 basetext = None
1869
1876
1870 # Check if we have the entry in cache
1877 # Check if we have the entry in cache
1871 # The cache entry looks like (node, rev, rawtext)
1878 # The cache entry looks like (node, rev, rawtext)
1872 if self._revisioncache:
1879 if self._revisioncache:
1873 if self._revisioncache[0] == node:
1880 if self._revisioncache[0] == node:
1874 return (rev, self._revisioncache[2], True)
1881 return (rev, self._revisioncache[2], True)
1875 cachedrev = self._revisioncache[1]
1882 cachedrev = self._revisioncache[1]
1876
1883
1877 if rev is None:
1884 if rev is None:
1878 rev = self.rev(node)
1885 rev = self.rev(node)
1879
1886
1880 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1887 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1881 if stopped:
1888 if stopped:
1882 basetext = self._revisioncache[2]
1889 basetext = self._revisioncache[2]
1883
1890
1884 # drop cache to save memory, the caller is expected to
1891 # drop cache to save memory, the caller is expected to
1885 # update self._revisioncache after validating the text
1892 # update self._revisioncache after validating the text
1886 self._revisioncache = None
1893 self._revisioncache = None
1887
1894
1888 targetsize = None
1895 targetsize = None
1889 rawsize = self.index[rev][2]
1896 rawsize = self.index[rev][2]
1890 if 0 <= rawsize:
1897 if 0 <= rawsize:
1891 targetsize = 4 * rawsize
1898 targetsize = 4 * rawsize
1892
1899
1893 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1900 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1894 if basetext is None:
1901 if basetext is None:
1895 basetext = bytes(bins[0])
1902 basetext = bytes(bins[0])
1896 bins = bins[1:]
1903 bins = bins[1:]
1897
1904
1898 rawtext = mdiff.patches(basetext, bins)
1905 rawtext = mdiff.patches(basetext, bins)
1899 del basetext # let us have a chance to free memory early
1906 del basetext # let us have a chance to free memory early
1900 return (rev, rawtext, False)
1907 return (rev, rawtext, False)
1901
1908
1902 def _sidedata(self, rev):
1909 def _sidedata(self, rev):
1903 """Return the sidedata for a given revision number."""
1910 """Return the sidedata for a given revision number."""
1904 index_entry = self.index[rev]
1911 index_entry = self.index[rev]
1905 sidedata_offset = index_entry[8]
1912 sidedata_offset = index_entry[8]
1906 sidedata_size = index_entry[9]
1913 sidedata_size = index_entry[9]
1907
1914
1908 if self._inline:
1915 if self._inline:
1909 sidedata_offset += self.index.entry_size * (1 + rev)
1916 sidedata_offset += self.index.entry_size * (1 + rev)
1910 if sidedata_size == 0:
1917 if sidedata_size == 0:
1911 return {}
1918 return {}
1912
1919
1913 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1920 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1914 filename = self._sidedatafile
1921 filename = self._sidedatafile
1915 end = self._docket.sidedata_end
1922 end = self._docket.sidedata_end
1916 offset = sidedata_offset
1923 offset = sidedata_offset
1917 length = sidedata_size
1924 length = sidedata_size
1918 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1925 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1919 raise error.RevlogError(m)
1926 raise error.RevlogError(m)
1920
1927
1921 comp_segment = self._segmentfile_sidedata.read_chunk(
1928 comp_segment = self._segmentfile_sidedata.read_chunk(
1922 sidedata_offset, sidedata_size
1929 sidedata_offset, sidedata_size
1923 )
1930 )
1924
1931
1925 comp = self.index[rev][11]
1932 comp = self.index[rev][11]
1926 if comp == COMP_MODE_PLAIN:
1933 if comp == COMP_MODE_PLAIN:
1927 segment = comp_segment
1934 segment = comp_segment
1928 elif comp == COMP_MODE_DEFAULT:
1935 elif comp == COMP_MODE_DEFAULT:
1929 segment = self._decompressor(comp_segment)
1936 segment = self._decompressor(comp_segment)
1930 elif comp == COMP_MODE_INLINE:
1937 elif comp == COMP_MODE_INLINE:
1931 segment = self.decompress(comp_segment)
1938 segment = self.decompress(comp_segment)
1932 else:
1939 else:
1933 msg = b'unknown compression mode %d'
1940 msg = b'unknown compression mode %d'
1934 msg %= comp
1941 msg %= comp
1935 raise error.RevlogError(msg)
1942 raise error.RevlogError(msg)
1936
1943
1937 sidedata = sidedatautil.deserialize_sidedata(segment)
1944 sidedata = sidedatautil.deserialize_sidedata(segment)
1938 return sidedata
1945 return sidedata
1939
1946
1940 def rawdata(self, nodeorrev, _df=None):
1947 def rawdata(self, nodeorrev, _df=None):
1941 """return an uncompressed raw data of a given node or revision number.
1948 """return an uncompressed raw data of a given node or revision number.
1942
1949
1943 _df - an existing file handle to read from. (internal-only)
1950 _df - an existing file handle to read from. (internal-only)
1944 """
1951 """
1945 return self._revisiondata(nodeorrev, _df, raw=True)
1952 return self._revisiondata(nodeorrev, _df, raw=True)
1946
1953
1947 def hash(self, text, p1, p2):
1954 def hash(self, text, p1, p2):
1948 """Compute a node hash.
1955 """Compute a node hash.
1949
1956
1950 Available as a function so that subclasses can replace the hash
1957 Available as a function so that subclasses can replace the hash
1951 as needed.
1958 as needed.
1952 """
1959 """
1953 return storageutil.hashrevisionsha1(text, p1, p2)
1960 return storageutil.hashrevisionsha1(text, p1, p2)
1954
1961
1955 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1962 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1956 """Check node hash integrity.
1963 """Check node hash integrity.
1957
1964
1958 Available as a function so that subclasses can extend hash mismatch
1965 Available as a function so that subclasses can extend hash mismatch
1959 behaviors as needed.
1966 behaviors as needed.
1960 """
1967 """
1961 try:
1968 try:
1962 if p1 is None and p2 is None:
1969 if p1 is None and p2 is None:
1963 p1, p2 = self.parents(node)
1970 p1, p2 = self.parents(node)
1964 if node != self.hash(text, p1, p2):
1971 if node != self.hash(text, p1, p2):
1965 # Clear the revision cache on hash failure. The revision cache
1972 # Clear the revision cache on hash failure. The revision cache
1966 # only stores the raw revision and clearing the cache does have
1973 # only stores the raw revision and clearing the cache does have
1967 # the side-effect that we won't have a cache hit when the raw
1974 # the side-effect that we won't have a cache hit when the raw
1968 # revision data is accessed. But this case should be rare and
1975 # revision data is accessed. But this case should be rare and
1969 # it is extra work to teach the cache about the hash
1976 # it is extra work to teach the cache about the hash
1970 # verification state.
1977 # verification state.
1971 if self._revisioncache and self._revisioncache[0] == node:
1978 if self._revisioncache and self._revisioncache[0] == node:
1972 self._revisioncache = None
1979 self._revisioncache = None
1973
1980
1974 revornode = rev
1981 revornode = rev
1975 if revornode is None:
1982 if revornode is None:
1976 revornode = templatefilters.short(hex(node))
1983 revornode = templatefilters.short(hex(node))
1977 raise error.RevlogError(
1984 raise error.RevlogError(
1978 _(b"integrity check failed on %s:%s")
1985 _(b"integrity check failed on %s:%s")
1979 % (self.display_id, pycompat.bytestr(revornode))
1986 % (self.display_id, pycompat.bytestr(revornode))
1980 )
1987 )
1981 except error.RevlogError:
1988 except error.RevlogError:
1982 if self._censorable and storageutil.iscensoredtext(text):
1989 if self._censorable and storageutil.iscensoredtext(text):
1983 raise error.CensoredNodeError(self.display_id, node, text)
1990 raise error.CensoredNodeError(self.display_id, node, text)
1984 raise
1991 raise
1985
1992
1986 def _enforceinlinesize(self, tr):
1993 def _enforceinlinesize(self, tr):
1987 """Check if the revlog is too big for inline and convert if so.
1994 """Check if the revlog is too big for inline and convert if so.
1988
1995
1989 This should be called after revisions are added to the revlog. If the
1996 This should be called after revisions are added to the revlog. If the
1990 revlog has grown too large to be an inline revlog, it will convert it
1997 revlog has grown too large to be an inline revlog, it will convert it
1991 to use multiple index and data files.
1998 to use multiple index and data files.
1992 """
1999 """
1993 tiprev = len(self) - 1
2000 tiprev = len(self) - 1
1994 total_size = self.start(tiprev) + self.length(tiprev)
2001 total_size = self.start(tiprev) + self.length(tiprev)
1995 if not self._inline or total_size < _maxinline:
2002 if not self._inline or total_size < _maxinline:
1996 return
2003 return
1997
2004
1998 troffset = tr.findoffset(self._indexfile)
2005 troffset = tr.findoffset(self._indexfile)
1999 if troffset is None:
2006 if troffset is None:
2000 raise error.RevlogError(
2007 raise error.RevlogError(
2001 _(b"%s not found in the transaction") % self._indexfile
2008 _(b"%s not found in the transaction") % self._indexfile
2002 )
2009 )
2003 trindex = None
2010 trindex = None
2004 tr.add(self._datafile, 0)
2011 tr.add(self._datafile, 0)
2005
2012
2006 existing_handles = False
2013 existing_handles = False
2007 if self._writinghandles is not None:
2014 if self._writinghandles is not None:
2008 existing_handles = True
2015 existing_handles = True
2009 fp = self._writinghandles[0]
2016 fp = self._writinghandles[0]
2010 fp.flush()
2017 fp.flush()
2011 fp.close()
2018 fp.close()
2012 # We can't use the cached file handle after close(). So prevent
2019 # We can't use the cached file handle after close(). So prevent
2013 # its usage.
2020 # its usage.
2014 self._writinghandles = None
2021 self._writinghandles = None
2015 self._segmentfile.writing_handle = None
2022 self._segmentfile.writing_handle = None
2016 # No need to deal with sidedata writing handle as it is only
2023 # No need to deal with sidedata writing handle as it is only
2017 # relevant with revlog-v2 which is never inline, not reaching
2024 # relevant with revlog-v2 which is never inline, not reaching
2018 # this code
2025 # this code
2019
2026
2020 new_dfh = self._datafp(b'w+')
2027 new_dfh = self._datafp(b'w+')
2021 new_dfh.truncate(0) # drop any potentially existing data
2028 new_dfh.truncate(0) # drop any potentially existing data
2022 try:
2029 try:
2023 with self._indexfp() as read_ifh:
2030 with self._indexfp() as read_ifh:
2024 for r in self:
2031 for r in self:
2025 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2032 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2026 if (
2033 if (
2027 trindex is None
2034 trindex is None
2028 and troffset
2035 and troffset
2029 <= self.start(r) + r * self.index.entry_size
2036 <= self.start(r) + r * self.index.entry_size
2030 ):
2037 ):
2031 trindex = r
2038 trindex = r
2032 new_dfh.flush()
2039 new_dfh.flush()
2033
2040
2034 if trindex is None:
2041 if trindex is None:
2035 trindex = 0
2042 trindex = 0
2036
2043
2037 with self.__index_new_fp() as fp:
2044 with self.__index_new_fp() as fp:
2038 self._format_flags &= ~FLAG_INLINE_DATA
2045 self._format_flags &= ~FLAG_INLINE_DATA
2039 self._inline = False
2046 self._inline = False
2040 for i in self:
2047 for i in self:
2041 e = self.index.entry_binary(i)
2048 e = self.index.entry_binary(i)
2042 if i == 0 and self._docket is None:
2049 if i == 0 and self._docket is None:
2043 header = self._format_flags | self._format_version
2050 header = self._format_flags | self._format_version
2044 header = self.index.pack_header(header)
2051 header = self.index.pack_header(header)
2045 e = header + e
2052 e = header + e
2046 fp.write(e)
2053 fp.write(e)
2047 if self._docket is not None:
2054 if self._docket is not None:
2048 self._docket.index_end = fp.tell()
2055 self._docket.index_end = fp.tell()
2049
2056
2050 # There is a small transactional race here. If the rename of
2057 # There is a small transactional race here. If the rename of
2051 # the index fails, we should remove the datafile. It is more
2058 # the index fails, we should remove the datafile. It is more
2052 # important to ensure that the data file is not truncated
2059 # important to ensure that the data file is not truncated
2053 # when the index is replaced as otherwise data is lost.
2060 # when the index is replaced as otherwise data is lost.
2054 tr.replace(self._datafile, self.start(trindex))
2061 tr.replace(self._datafile, self.start(trindex))
2055
2062
2056 # the temp file replace the real index when we exit the context
2063 # the temp file replace the real index when we exit the context
2057 # manager
2064 # manager
2058
2065
2059 tr.replace(self._indexfile, trindex * self.index.entry_size)
2066 tr.replace(self._indexfile, trindex * self.index.entry_size)
2060 nodemaputil.setup_persistent_nodemap(tr, self)
2067 nodemaputil.setup_persistent_nodemap(tr, self)
2061 self._segmentfile = randomaccessfile.randomaccessfile(
2068 self._segmentfile = randomaccessfile.randomaccessfile(
2062 self.opener,
2069 self.opener,
2063 self._datafile,
2070 self._datafile,
2064 self._chunkcachesize,
2071 self._chunkcachesize,
2065 )
2072 )
2066
2073
2067 if existing_handles:
2074 if existing_handles:
2068 # switched from inline to conventional reopen the index
2075 # switched from inline to conventional reopen the index
2069 ifh = self.__index_write_fp()
2076 ifh = self.__index_write_fp()
2070 self._writinghandles = (ifh, new_dfh, None)
2077 self._writinghandles = (ifh, new_dfh, None)
2071 self._segmentfile.writing_handle = new_dfh
2078 self._segmentfile.writing_handle = new_dfh
2072 new_dfh = None
2079 new_dfh = None
2073 # No need to deal with sidedata writing handle as it is only
2080 # No need to deal with sidedata writing handle as it is only
2074 # relevant with revlog-v2 which is never inline, not reaching
2081 # relevant with revlog-v2 which is never inline, not reaching
2075 # this code
2082 # this code
2076 finally:
2083 finally:
2077 if new_dfh is not None:
2084 if new_dfh is not None:
2078 new_dfh.close()
2085 new_dfh.close()
2079
2086
2080 def _nodeduplicatecallback(self, transaction, node):
2087 def _nodeduplicatecallback(self, transaction, node):
2081 """called when trying to add a node already stored."""
2088 """called when trying to add a node already stored."""
2082
2089
2083 @contextlib.contextmanager
2090 @contextlib.contextmanager
2084 def reading(self):
2091 def reading(self):
2085 """Context manager that keeps data and sidedata files open for reading"""
2092 """Context manager that keeps data and sidedata files open for reading"""
2086 with self._segmentfile.reading():
2093 with self._segmentfile.reading():
2087 with self._segmentfile_sidedata.reading():
2094 with self._segmentfile_sidedata.reading():
2088 yield
2095 yield
2089
2096
2090 @contextlib.contextmanager
2097 @contextlib.contextmanager
2091 def _writing(self, transaction):
2098 def _writing(self, transaction):
2092 if self._trypending:
2099 if self._trypending:
2093 msg = b'try to write in a `trypending` revlog: %s'
2100 msg = b'try to write in a `trypending` revlog: %s'
2094 msg %= self.display_id
2101 msg %= self.display_id
2095 raise error.ProgrammingError(msg)
2102 raise error.ProgrammingError(msg)
2096 if self._writinghandles is not None:
2103 if self._writinghandles is not None:
2097 yield
2104 yield
2098 else:
2105 else:
2099 ifh = dfh = sdfh = None
2106 ifh = dfh = sdfh = None
2100 try:
2107 try:
2101 r = len(self)
2108 r = len(self)
2102 # opening the data file.
2109 # opening the data file.
2103 dsize = 0
2110 dsize = 0
2104 if r:
2111 if r:
2105 dsize = self.end(r - 1)
2112 dsize = self.end(r - 1)
2106 dfh = None
2113 dfh = None
2107 if not self._inline:
2114 if not self._inline:
2108 try:
2115 try:
2109 dfh = self._datafp(b"r+")
2116 dfh = self._datafp(b"r+")
2110 if self._docket is None:
2117 if self._docket is None:
2111 dfh.seek(0, os.SEEK_END)
2118 dfh.seek(0, os.SEEK_END)
2112 else:
2119 else:
2113 dfh.seek(self._docket.data_end, os.SEEK_SET)
2120 dfh.seek(self._docket.data_end, os.SEEK_SET)
2114 except FileNotFoundError:
2121 except FileNotFoundError:
2115 dfh = self._datafp(b"w+")
2122 dfh = self._datafp(b"w+")
2116 transaction.add(self._datafile, dsize)
2123 transaction.add(self._datafile, dsize)
2117 if self._sidedatafile is not None:
2124 if self._sidedatafile is not None:
2118 # revlog-v2 does not inline, help Pytype
2125 # revlog-v2 does not inline, help Pytype
2119 assert dfh is not None
2126 assert dfh is not None
2120 try:
2127 try:
2121 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2128 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2122 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2129 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2123 except FileNotFoundError:
2130 except FileNotFoundError:
2124 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2131 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2125 transaction.add(
2132 transaction.add(
2126 self._sidedatafile, self._docket.sidedata_end
2133 self._sidedatafile, self._docket.sidedata_end
2127 )
2134 )
2128
2135
2129 # opening the index file.
2136 # opening the index file.
2130 isize = r * self.index.entry_size
2137 isize = r * self.index.entry_size
2131 ifh = self.__index_write_fp()
2138 ifh = self.__index_write_fp()
2132 if self._inline:
2139 if self._inline:
2133 transaction.add(self._indexfile, dsize + isize)
2140 transaction.add(self._indexfile, dsize + isize)
2134 else:
2141 else:
2135 transaction.add(self._indexfile, isize)
2142 transaction.add(self._indexfile, isize)
2136 # exposing all file handle for writing.
2143 # exposing all file handle for writing.
2137 self._writinghandles = (ifh, dfh, sdfh)
2144 self._writinghandles = (ifh, dfh, sdfh)
2138 self._segmentfile.writing_handle = ifh if self._inline else dfh
2145 self._segmentfile.writing_handle = ifh if self._inline else dfh
2139 self._segmentfile_sidedata.writing_handle = sdfh
2146 self._segmentfile_sidedata.writing_handle = sdfh
2140 yield
2147 yield
2141 if self._docket is not None:
2148 if self._docket is not None:
2142 self._write_docket(transaction)
2149 self._write_docket(transaction)
2143 finally:
2150 finally:
2144 self._writinghandles = None
2151 self._writinghandles = None
2145 self._segmentfile.writing_handle = None
2152 self._segmentfile.writing_handle = None
2146 self._segmentfile_sidedata.writing_handle = None
2153 self._segmentfile_sidedata.writing_handle = None
2147 if dfh is not None:
2154 if dfh is not None:
2148 dfh.close()
2155 dfh.close()
2149 if sdfh is not None:
2156 if sdfh is not None:
2150 sdfh.close()
2157 sdfh.close()
2151 # closing the index file last to avoid exposing referent to
2158 # closing the index file last to avoid exposing referent to
2152 # potential unflushed data content.
2159 # potential unflushed data content.
2153 if ifh is not None:
2160 if ifh is not None:
2154 ifh.close()
2161 ifh.close()
2155
2162
2156 def _write_docket(self, transaction):
2163 def _write_docket(self, transaction):
2157 """write the current docket on disk
2164 """write the current docket on disk
2158
2165
2159 Exist as a method to help changelog to implement transaction logic
2166 Exist as a method to help changelog to implement transaction logic
2160
2167
2161 We could also imagine using the same transaction logic for all revlog
2168 We could also imagine using the same transaction logic for all revlog
2162 since docket are cheap."""
2169 since docket are cheap."""
2163 self._docket.write(transaction)
2170 self._docket.write(transaction)
2164
2171
2165 def addrevision(
2172 def addrevision(
2166 self,
2173 self,
2167 text,
2174 text,
2168 transaction,
2175 transaction,
2169 link,
2176 link,
2170 p1,
2177 p1,
2171 p2,
2178 p2,
2172 cachedelta=None,
2179 cachedelta=None,
2173 node=None,
2180 node=None,
2174 flags=REVIDX_DEFAULT_FLAGS,
2181 flags=REVIDX_DEFAULT_FLAGS,
2175 deltacomputer=None,
2182 deltacomputer=None,
2176 sidedata=None,
2183 sidedata=None,
2177 ):
2184 ):
2178 """add a revision to the log
2185 """add a revision to the log
2179
2186
2180 text - the revision data to add
2187 text - the revision data to add
2181 transaction - the transaction object used for rollback
2188 transaction - the transaction object used for rollback
2182 link - the linkrev data to add
2189 link - the linkrev data to add
2183 p1, p2 - the parent nodeids of the revision
2190 p1, p2 - the parent nodeids of the revision
2184 cachedelta - an optional precomputed delta
2191 cachedelta - an optional precomputed delta
2185 node - nodeid of revision; typically node is not specified, and it is
2192 node - nodeid of revision; typically node is not specified, and it is
2186 computed by default as hash(text, p1, p2), however subclasses might
2193 computed by default as hash(text, p1, p2), however subclasses might
2187 use different hashing method (and override checkhash() in such case)
2194 use different hashing method (and override checkhash() in such case)
2188 flags - the known flags to set on the revision
2195 flags - the known flags to set on the revision
2189 deltacomputer - an optional deltacomputer instance shared between
2196 deltacomputer - an optional deltacomputer instance shared between
2190 multiple calls
2197 multiple calls
2191 """
2198 """
2192 if link == nullrev:
2199 if link == nullrev:
2193 raise error.RevlogError(
2200 raise error.RevlogError(
2194 _(b"attempted to add linkrev -1 to %s") % self.display_id
2201 _(b"attempted to add linkrev -1 to %s") % self.display_id
2195 )
2202 )
2196
2203
2197 if sidedata is None:
2204 if sidedata is None:
2198 sidedata = {}
2205 sidedata = {}
2199 elif sidedata and not self.hassidedata:
2206 elif sidedata and not self.hassidedata:
2200 raise error.ProgrammingError(
2207 raise error.ProgrammingError(
2201 _(b"trying to add sidedata to a revlog who don't support them")
2208 _(b"trying to add sidedata to a revlog who don't support them")
2202 )
2209 )
2203
2210
2204 if flags:
2211 if flags:
2205 node = node or self.hash(text, p1, p2)
2212 node = node or self.hash(text, p1, p2)
2206
2213
2207 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2214 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2208
2215
2209 # If the flag processor modifies the revision data, ignore any provided
2216 # If the flag processor modifies the revision data, ignore any provided
2210 # cachedelta.
2217 # cachedelta.
2211 if rawtext != text:
2218 if rawtext != text:
2212 cachedelta = None
2219 cachedelta = None
2213
2220
2214 if len(rawtext) > _maxentrysize:
2221 if len(rawtext) > _maxentrysize:
2215 raise error.RevlogError(
2222 raise error.RevlogError(
2216 _(
2223 _(
2217 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2224 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2218 )
2225 )
2219 % (self.display_id, len(rawtext))
2226 % (self.display_id, len(rawtext))
2220 )
2227 )
2221
2228
2222 node = node or self.hash(rawtext, p1, p2)
2229 node = node or self.hash(rawtext, p1, p2)
2223 rev = self.index.get_rev(node)
2230 rev = self.index.get_rev(node)
2224 if rev is not None:
2231 if rev is not None:
2225 return rev
2232 return rev
2226
2233
2227 if validatehash:
2234 if validatehash:
2228 self.checkhash(rawtext, node, p1=p1, p2=p2)
2235 self.checkhash(rawtext, node, p1=p1, p2=p2)
2229
2236
2230 return self.addrawrevision(
2237 return self.addrawrevision(
2231 rawtext,
2238 rawtext,
2232 transaction,
2239 transaction,
2233 link,
2240 link,
2234 p1,
2241 p1,
2235 p2,
2242 p2,
2236 node,
2243 node,
2237 flags,
2244 flags,
2238 cachedelta=cachedelta,
2245 cachedelta=cachedelta,
2239 deltacomputer=deltacomputer,
2246 deltacomputer=deltacomputer,
2240 sidedata=sidedata,
2247 sidedata=sidedata,
2241 )
2248 )
2242
2249
2243 def addrawrevision(
2250 def addrawrevision(
2244 self,
2251 self,
2245 rawtext,
2252 rawtext,
2246 transaction,
2253 transaction,
2247 link,
2254 link,
2248 p1,
2255 p1,
2249 p2,
2256 p2,
2250 node,
2257 node,
2251 flags,
2258 flags,
2252 cachedelta=None,
2259 cachedelta=None,
2253 deltacomputer=None,
2260 deltacomputer=None,
2254 sidedata=None,
2261 sidedata=None,
2255 ):
2262 ):
2256 """add a raw revision with known flags, node and parents
2263 """add a raw revision with known flags, node and parents
2257 useful when reusing a revision not stored in this revlog (ex: received
2264 useful when reusing a revision not stored in this revlog (ex: received
2258 over wire, or read from an external bundle).
2265 over wire, or read from an external bundle).
2259 """
2266 """
2260 with self._writing(transaction):
2267 with self._writing(transaction):
2261 return self._addrevision(
2268 return self._addrevision(
2262 node,
2269 node,
2263 rawtext,
2270 rawtext,
2264 transaction,
2271 transaction,
2265 link,
2272 link,
2266 p1,
2273 p1,
2267 p2,
2274 p2,
2268 flags,
2275 flags,
2269 cachedelta,
2276 cachedelta,
2270 deltacomputer=deltacomputer,
2277 deltacomputer=deltacomputer,
2271 sidedata=sidedata,
2278 sidedata=sidedata,
2272 )
2279 )
2273
2280
2274 def compress(self, data):
2281 def compress(self, data):
2275 """Generate a possibly-compressed representation of data."""
2282 """Generate a possibly-compressed representation of data."""
2276 if not data:
2283 if not data:
2277 return b'', data
2284 return b'', data
2278
2285
2279 compressed = self._compressor.compress(data)
2286 compressed = self._compressor.compress(data)
2280
2287
2281 if compressed:
2288 if compressed:
2282 # The revlog compressor added the header in the returned data.
2289 # The revlog compressor added the header in the returned data.
2283 return b'', compressed
2290 return b'', compressed
2284
2291
2285 if data[0:1] == b'\0':
2292 if data[0:1] == b'\0':
2286 return b'', data
2293 return b'', data
2287 return b'u', data
2294 return b'u', data
2288
2295
2289 def decompress(self, data):
2296 def decompress(self, data):
2290 """Decompress a revlog chunk.
2297 """Decompress a revlog chunk.
2291
2298
2292 The chunk is expected to begin with a header identifying the
2299 The chunk is expected to begin with a header identifying the
2293 format type so it can be routed to an appropriate decompressor.
2300 format type so it can be routed to an appropriate decompressor.
2294 """
2301 """
2295 if not data:
2302 if not data:
2296 return data
2303 return data
2297
2304
2298 # Revlogs are read much more frequently than they are written and many
2305 # Revlogs are read much more frequently than they are written and many
2299 # chunks only take microseconds to decompress, so performance is
2306 # chunks only take microseconds to decompress, so performance is
2300 # important here.
2307 # important here.
2301 #
2308 #
2302 # We can make a few assumptions about revlogs:
2309 # We can make a few assumptions about revlogs:
2303 #
2310 #
2304 # 1) the majority of chunks will be compressed (as opposed to inline
2311 # 1) the majority of chunks will be compressed (as opposed to inline
2305 # raw data).
2312 # raw data).
2306 # 2) decompressing *any* data will likely by at least 10x slower than
2313 # 2) decompressing *any* data will likely by at least 10x slower than
2307 # returning raw inline data.
2314 # returning raw inline data.
2308 # 3) we want to prioritize common and officially supported compression
2315 # 3) we want to prioritize common and officially supported compression
2309 # engines
2316 # engines
2310 #
2317 #
2311 # It follows that we want to optimize for "decompress compressed data
2318 # It follows that we want to optimize for "decompress compressed data
2312 # when encoded with common and officially supported compression engines"
2319 # when encoded with common and officially supported compression engines"
2313 # case over "raw data" and "data encoded by less common or non-official
2320 # case over "raw data" and "data encoded by less common or non-official
2314 # compression engines." That is why we have the inline lookup first
2321 # compression engines." That is why we have the inline lookup first
2315 # followed by the compengines lookup.
2322 # followed by the compengines lookup.
2316 #
2323 #
2317 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2324 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2318 # compressed chunks. And this matters for changelog and manifest reads.
2325 # compressed chunks. And this matters for changelog and manifest reads.
2319 t = data[0:1]
2326 t = data[0:1]
2320
2327
2321 if t == b'x':
2328 if t == b'x':
2322 try:
2329 try:
2323 return _zlibdecompress(data)
2330 return _zlibdecompress(data)
2324 except zlib.error as e:
2331 except zlib.error as e:
2325 raise error.RevlogError(
2332 raise error.RevlogError(
2326 _(b'revlog decompress error: %s')
2333 _(b'revlog decompress error: %s')
2327 % stringutil.forcebytestr(e)
2334 % stringutil.forcebytestr(e)
2328 )
2335 )
2329 # '\0' is more common than 'u' so it goes first.
2336 # '\0' is more common than 'u' so it goes first.
2330 elif t == b'\0':
2337 elif t == b'\0':
2331 return data
2338 return data
2332 elif t == b'u':
2339 elif t == b'u':
2333 return util.buffer(data, 1)
2340 return util.buffer(data, 1)
2334
2341
2335 compressor = self._get_decompressor(t)
2342 compressor = self._get_decompressor(t)
2336
2343
2337 return compressor.decompress(data)
2344 return compressor.decompress(data)
2338
2345
2339 def _addrevision(
2346 def _addrevision(
2340 self,
2347 self,
2341 node,
2348 node,
2342 rawtext,
2349 rawtext,
2343 transaction,
2350 transaction,
2344 link,
2351 link,
2345 p1,
2352 p1,
2346 p2,
2353 p2,
2347 flags,
2354 flags,
2348 cachedelta,
2355 cachedelta,
2349 alwayscache=False,
2356 alwayscache=False,
2350 deltacomputer=None,
2357 deltacomputer=None,
2351 sidedata=None,
2358 sidedata=None,
2352 ):
2359 ):
2353 """internal function to add revisions to the log
2360 """internal function to add revisions to the log
2354
2361
2355 see addrevision for argument descriptions.
2362 see addrevision for argument descriptions.
2356
2363
2357 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2364 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2358
2365
2359 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2366 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2360 be used.
2367 be used.
2361
2368
2362 invariants:
2369 invariants:
2363 - rawtext is optional (can be None); if not set, cachedelta must be set.
2370 - rawtext is optional (can be None); if not set, cachedelta must be set.
2364 if both are set, they must correspond to each other.
2371 if both are set, they must correspond to each other.
2365 """
2372 """
2366 if node == self.nullid:
2373 if node == self.nullid:
2367 raise error.RevlogError(
2374 raise error.RevlogError(
2368 _(b"%s: attempt to add null revision") % self.display_id
2375 _(b"%s: attempt to add null revision") % self.display_id
2369 )
2376 )
2370 if (
2377 if (
2371 node == self.nodeconstants.wdirid
2378 node == self.nodeconstants.wdirid
2372 or node in self.nodeconstants.wdirfilenodeids
2379 or node in self.nodeconstants.wdirfilenodeids
2373 ):
2380 ):
2374 raise error.RevlogError(
2381 raise error.RevlogError(
2375 _(b"%s: attempt to add wdir revision") % self.display_id
2382 _(b"%s: attempt to add wdir revision") % self.display_id
2376 )
2383 )
2377 if self._writinghandles is None:
2384 if self._writinghandles is None:
2378 msg = b'adding revision outside `revlog._writing` context'
2385 msg = b'adding revision outside `revlog._writing` context'
2379 raise error.ProgrammingError(msg)
2386 raise error.ProgrammingError(msg)
2380
2387
2381 if self._inline:
2388 if self._inline:
2382 fh = self._writinghandles[0]
2389 fh = self._writinghandles[0]
2383 else:
2390 else:
2384 fh = self._writinghandles[1]
2391 fh = self._writinghandles[1]
2385
2392
2386 btext = [rawtext]
2393 btext = [rawtext]
2387
2394
2388 curr = len(self)
2395 curr = len(self)
2389 prev = curr - 1
2396 prev = curr - 1
2390
2397
2391 offset = self._get_data_offset(prev)
2398 offset = self._get_data_offset(prev)
2392
2399
2393 if self._concurrencychecker:
2400 if self._concurrencychecker:
2394 ifh, dfh, sdfh = self._writinghandles
2401 ifh, dfh, sdfh = self._writinghandles
2395 # XXX no checking for the sidedata file
2402 # XXX no checking for the sidedata file
2396 if self._inline:
2403 if self._inline:
2397 # offset is "as if" it were in the .d file, so we need to add on
2404 # offset is "as if" it were in the .d file, so we need to add on
2398 # the size of the entry metadata.
2405 # the size of the entry metadata.
2399 self._concurrencychecker(
2406 self._concurrencychecker(
2400 ifh, self._indexfile, offset + curr * self.index.entry_size
2407 ifh, self._indexfile, offset + curr * self.index.entry_size
2401 )
2408 )
2402 else:
2409 else:
2403 # Entries in the .i are a consistent size.
2410 # Entries in the .i are a consistent size.
2404 self._concurrencychecker(
2411 self._concurrencychecker(
2405 ifh, self._indexfile, curr * self.index.entry_size
2412 ifh, self._indexfile, curr * self.index.entry_size
2406 )
2413 )
2407 self._concurrencychecker(dfh, self._datafile, offset)
2414 self._concurrencychecker(dfh, self._datafile, offset)
2408
2415
2409 p1r, p2r = self.rev(p1), self.rev(p2)
2416 p1r, p2r = self.rev(p1), self.rev(p2)
2410
2417
2411 # full versions are inserted when the needed deltas
2418 # full versions are inserted when the needed deltas
2412 # become comparable to the uncompressed text
2419 # become comparable to the uncompressed text
2413 if rawtext is None:
2420 if rawtext is None:
2414 # need rawtext size, before changed by flag processors, which is
2421 # need rawtext size, before changed by flag processors, which is
2415 # the non-raw size. use revlog explicitly to avoid filelog's extra
2422 # the non-raw size. use revlog explicitly to avoid filelog's extra
2416 # logic that might remove metadata size.
2423 # logic that might remove metadata size.
2417 textlen = mdiff.patchedsize(
2424 textlen = mdiff.patchedsize(
2418 revlog.size(self, cachedelta[0]), cachedelta[1]
2425 revlog.size(self, cachedelta[0]), cachedelta[1]
2419 )
2426 )
2420 else:
2427 else:
2421 textlen = len(rawtext)
2428 textlen = len(rawtext)
2422
2429
2423 if deltacomputer is None:
2430 if deltacomputer is None:
2424 write_debug = None
2431 write_debug = None
2425 if self._debug_delta:
2432 if self._debug_delta:
2426 write_debug = transaction._report
2433 write_debug = transaction._report
2427 deltacomputer = deltautil.deltacomputer(
2434 deltacomputer = deltautil.deltacomputer(
2428 self, write_debug=write_debug
2435 self, write_debug=write_debug
2429 )
2436 )
2430
2437
2431 revinfo = revlogutils.revisioninfo(
2438 revinfo = revlogutils.revisioninfo(
2432 node,
2439 node,
2433 p1,
2440 p1,
2434 p2,
2441 p2,
2435 btext,
2442 btext,
2436 textlen,
2443 textlen,
2437 cachedelta,
2444 cachedelta,
2438 flags,
2445 flags,
2439 )
2446 )
2440
2447
2441 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2448 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2442
2449
2443 compression_mode = COMP_MODE_INLINE
2450 compression_mode = COMP_MODE_INLINE
2444 if self._docket is not None:
2451 if self._docket is not None:
2445 default_comp = self._docket.default_compression_header
2452 default_comp = self._docket.default_compression_header
2446 r = deltautil.delta_compression(default_comp, deltainfo)
2453 r = deltautil.delta_compression(default_comp, deltainfo)
2447 compression_mode, deltainfo = r
2454 compression_mode, deltainfo = r
2448
2455
2449 sidedata_compression_mode = COMP_MODE_INLINE
2456 sidedata_compression_mode = COMP_MODE_INLINE
2450 if sidedata and self.hassidedata:
2457 if sidedata and self.hassidedata:
2451 sidedata_compression_mode = COMP_MODE_PLAIN
2458 sidedata_compression_mode = COMP_MODE_PLAIN
2452 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2459 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2453 sidedata_offset = self._docket.sidedata_end
2460 sidedata_offset = self._docket.sidedata_end
2454 h, comp_sidedata = self.compress(serialized_sidedata)
2461 h, comp_sidedata = self.compress(serialized_sidedata)
2455 if (
2462 if (
2456 h != b'u'
2463 h != b'u'
2457 and comp_sidedata[0:1] != b'\0'
2464 and comp_sidedata[0:1] != b'\0'
2458 and len(comp_sidedata) < len(serialized_sidedata)
2465 and len(comp_sidedata) < len(serialized_sidedata)
2459 ):
2466 ):
2460 assert not h
2467 assert not h
2461 if (
2468 if (
2462 comp_sidedata[0:1]
2469 comp_sidedata[0:1]
2463 == self._docket.default_compression_header
2470 == self._docket.default_compression_header
2464 ):
2471 ):
2465 sidedata_compression_mode = COMP_MODE_DEFAULT
2472 sidedata_compression_mode = COMP_MODE_DEFAULT
2466 serialized_sidedata = comp_sidedata
2473 serialized_sidedata = comp_sidedata
2467 else:
2474 else:
2468 sidedata_compression_mode = COMP_MODE_INLINE
2475 sidedata_compression_mode = COMP_MODE_INLINE
2469 serialized_sidedata = comp_sidedata
2476 serialized_sidedata = comp_sidedata
2470 else:
2477 else:
2471 serialized_sidedata = b""
2478 serialized_sidedata = b""
2472 # Don't store the offset if the sidedata is empty, that way
2479 # Don't store the offset if the sidedata is empty, that way
2473 # we can easily detect empty sidedata and they will be no different
2480 # we can easily detect empty sidedata and they will be no different
2474 # than ones we manually add.
2481 # than ones we manually add.
2475 sidedata_offset = 0
2482 sidedata_offset = 0
2476
2483
2477 rank = RANK_UNKNOWN
2484 rank = RANK_UNKNOWN
2478 if self._format_version == CHANGELOGV2:
2485 if self._format_version == CHANGELOGV2:
2479 if (p1r, p2r) == (nullrev, nullrev):
2486 if (p1r, p2r) == (nullrev, nullrev):
2480 rank = 1
2487 rank = 1
2481 elif p1r != nullrev and p2r == nullrev:
2488 elif p1r != nullrev and p2r == nullrev:
2482 rank = 1 + self.fast_rank(p1r)
2489 rank = 1 + self.fast_rank(p1r)
2483 elif p1r == nullrev and p2r != nullrev:
2490 elif p1r == nullrev and p2r != nullrev:
2484 rank = 1 + self.fast_rank(p2r)
2491 rank = 1 + self.fast_rank(p2r)
2485 else: # merge node
2492 else: # merge node
2486 if rustdagop is not None and self.index.rust_ext_compat:
2493 if rustdagop is not None and self.index.rust_ext_compat:
2487 rank = rustdagop.rank(self.index, p1r, p2r)
2494 rank = rustdagop.rank(self.index, p1r, p2r)
2488 else:
2495 else:
2489 pmin, pmax = sorted((p1r, p2r))
2496 pmin, pmax = sorted((p1r, p2r))
2490 rank = 1 + self.fast_rank(pmax)
2497 rank = 1 + self.fast_rank(pmax)
2491 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2498 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2492
2499
2493 e = revlogutils.entry(
2500 e = revlogutils.entry(
2494 flags=flags,
2501 flags=flags,
2495 data_offset=offset,
2502 data_offset=offset,
2496 data_compressed_length=deltainfo.deltalen,
2503 data_compressed_length=deltainfo.deltalen,
2497 data_uncompressed_length=textlen,
2504 data_uncompressed_length=textlen,
2498 data_compression_mode=compression_mode,
2505 data_compression_mode=compression_mode,
2499 data_delta_base=deltainfo.base,
2506 data_delta_base=deltainfo.base,
2500 link_rev=link,
2507 link_rev=link,
2501 parent_rev_1=p1r,
2508 parent_rev_1=p1r,
2502 parent_rev_2=p2r,
2509 parent_rev_2=p2r,
2503 node_id=node,
2510 node_id=node,
2504 sidedata_offset=sidedata_offset,
2511 sidedata_offset=sidedata_offset,
2505 sidedata_compressed_length=len(serialized_sidedata),
2512 sidedata_compressed_length=len(serialized_sidedata),
2506 sidedata_compression_mode=sidedata_compression_mode,
2513 sidedata_compression_mode=sidedata_compression_mode,
2507 rank=rank,
2514 rank=rank,
2508 )
2515 )
2509
2516
2510 self.index.append(e)
2517 self.index.append(e)
2511 entry = self.index.entry_binary(curr)
2518 entry = self.index.entry_binary(curr)
2512 if curr == 0 and self._docket is None:
2519 if curr == 0 and self._docket is None:
2513 header = self._format_flags | self._format_version
2520 header = self._format_flags | self._format_version
2514 header = self.index.pack_header(header)
2521 header = self.index.pack_header(header)
2515 entry = header + entry
2522 entry = header + entry
2516 self._writeentry(
2523 self._writeentry(
2517 transaction,
2524 transaction,
2518 entry,
2525 entry,
2519 deltainfo.data,
2526 deltainfo.data,
2520 link,
2527 link,
2521 offset,
2528 offset,
2522 serialized_sidedata,
2529 serialized_sidedata,
2523 sidedata_offset,
2530 sidedata_offset,
2524 )
2531 )
2525
2532
2526 rawtext = btext[0]
2533 rawtext = btext[0]
2527
2534
2528 if alwayscache and rawtext is None:
2535 if alwayscache and rawtext is None:
2529 rawtext = deltacomputer.buildtext(revinfo, fh)
2536 rawtext = deltacomputer.buildtext(revinfo, fh)
2530
2537
2531 if type(rawtext) == bytes: # only accept immutable objects
2538 if type(rawtext) == bytes: # only accept immutable objects
2532 self._revisioncache = (node, curr, rawtext)
2539 self._revisioncache = (node, curr, rawtext)
2533 self._chainbasecache[curr] = deltainfo.chainbase
2540 self._chainbasecache[curr] = deltainfo.chainbase
2534 return curr
2541 return curr
2535
2542
2536 def _get_data_offset(self, prev):
2543 def _get_data_offset(self, prev):
2537 """Returns the current offset in the (in-transaction) data file.
2544 """Returns the current offset in the (in-transaction) data file.
2538 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2545 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2539 file to store that information: since sidedata can be rewritten to the
2546 file to store that information: since sidedata can be rewritten to the
2540 end of the data file within a transaction, you can have cases where, for
2547 end of the data file within a transaction, you can have cases where, for
2541 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2548 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2542 to `n - 1`'s sidedata being written after `n`'s data.
2549 to `n - 1`'s sidedata being written after `n`'s data.
2543
2550
2544 TODO cache this in a docket file before getting out of experimental."""
2551 TODO cache this in a docket file before getting out of experimental."""
2545 if self._docket is None:
2552 if self._docket is None:
2546 return self.end(prev)
2553 return self.end(prev)
2547 else:
2554 else:
2548 return self._docket.data_end
2555 return self._docket.data_end
2549
2556
2550 def _writeentry(
2557 def _writeentry(
2551 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2558 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2552 ):
2559 ):
2553 # Files opened in a+ mode have inconsistent behavior on various
2560 # Files opened in a+ mode have inconsistent behavior on various
2554 # platforms. Windows requires that a file positioning call be made
2561 # platforms. Windows requires that a file positioning call be made
2555 # when the file handle transitions between reads and writes. See
2562 # when the file handle transitions between reads and writes. See
2556 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2563 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2557 # platforms, Python or the platform itself can be buggy. Some versions
2564 # platforms, Python or the platform itself can be buggy. Some versions
2558 # of Solaris have been observed to not append at the end of the file
2565 # of Solaris have been observed to not append at the end of the file
2559 # if the file was seeked to before the end. See issue4943 for more.
2566 # if the file was seeked to before the end. See issue4943 for more.
2560 #
2567 #
2561 # We work around this issue by inserting a seek() before writing.
2568 # We work around this issue by inserting a seek() before writing.
2562 # Note: This is likely not necessary on Python 3. However, because
2569 # Note: This is likely not necessary on Python 3. However, because
2563 # the file handle is reused for reads and may be seeked there, we need
2570 # the file handle is reused for reads and may be seeked there, we need
2564 # to be careful before changing this.
2571 # to be careful before changing this.
2565 if self._writinghandles is None:
2572 if self._writinghandles is None:
2566 msg = b'adding revision outside `revlog._writing` context'
2573 msg = b'adding revision outside `revlog._writing` context'
2567 raise error.ProgrammingError(msg)
2574 raise error.ProgrammingError(msg)
2568 ifh, dfh, sdfh = self._writinghandles
2575 ifh, dfh, sdfh = self._writinghandles
2569 if self._docket is None:
2576 if self._docket is None:
2570 ifh.seek(0, os.SEEK_END)
2577 ifh.seek(0, os.SEEK_END)
2571 else:
2578 else:
2572 ifh.seek(self._docket.index_end, os.SEEK_SET)
2579 ifh.seek(self._docket.index_end, os.SEEK_SET)
2573 if dfh:
2580 if dfh:
2574 if self._docket is None:
2581 if self._docket is None:
2575 dfh.seek(0, os.SEEK_END)
2582 dfh.seek(0, os.SEEK_END)
2576 else:
2583 else:
2577 dfh.seek(self._docket.data_end, os.SEEK_SET)
2584 dfh.seek(self._docket.data_end, os.SEEK_SET)
2578 if sdfh:
2585 if sdfh:
2579 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2586 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2580
2587
2581 curr = len(self) - 1
2588 curr = len(self) - 1
2582 if not self._inline:
2589 if not self._inline:
2583 transaction.add(self._datafile, offset)
2590 transaction.add(self._datafile, offset)
2584 if self._sidedatafile:
2591 if self._sidedatafile:
2585 transaction.add(self._sidedatafile, sidedata_offset)
2592 transaction.add(self._sidedatafile, sidedata_offset)
2586 transaction.add(self._indexfile, curr * len(entry))
2593 transaction.add(self._indexfile, curr * len(entry))
2587 if data[0]:
2594 if data[0]:
2588 dfh.write(data[0])
2595 dfh.write(data[0])
2589 dfh.write(data[1])
2596 dfh.write(data[1])
2590 if sidedata:
2597 if sidedata:
2591 sdfh.write(sidedata)
2598 sdfh.write(sidedata)
2592 ifh.write(entry)
2599 ifh.write(entry)
2593 else:
2600 else:
2594 offset += curr * self.index.entry_size
2601 offset += curr * self.index.entry_size
2595 transaction.add(self._indexfile, offset)
2602 transaction.add(self._indexfile, offset)
2596 ifh.write(entry)
2603 ifh.write(entry)
2597 ifh.write(data[0])
2604 ifh.write(data[0])
2598 ifh.write(data[1])
2605 ifh.write(data[1])
2599 assert not sidedata
2606 assert not sidedata
2600 self._enforceinlinesize(transaction)
2607 self._enforceinlinesize(transaction)
2601 if self._docket is not None:
2608 if self._docket is not None:
2602 # revlog-v2 always has 3 writing handles, help Pytype
2609 # revlog-v2 always has 3 writing handles, help Pytype
2603 wh1 = self._writinghandles[0]
2610 wh1 = self._writinghandles[0]
2604 wh2 = self._writinghandles[1]
2611 wh2 = self._writinghandles[1]
2605 wh3 = self._writinghandles[2]
2612 wh3 = self._writinghandles[2]
2606 assert wh1 is not None
2613 assert wh1 is not None
2607 assert wh2 is not None
2614 assert wh2 is not None
2608 assert wh3 is not None
2615 assert wh3 is not None
2609 self._docket.index_end = wh1.tell()
2616 self._docket.index_end = wh1.tell()
2610 self._docket.data_end = wh2.tell()
2617 self._docket.data_end = wh2.tell()
2611 self._docket.sidedata_end = wh3.tell()
2618 self._docket.sidedata_end = wh3.tell()
2612
2619
2613 nodemaputil.setup_persistent_nodemap(transaction, self)
2620 nodemaputil.setup_persistent_nodemap(transaction, self)
2614
2621
2615 def addgroup(
2622 def addgroup(
2616 self,
2623 self,
2617 deltas,
2624 deltas,
2618 linkmapper,
2625 linkmapper,
2619 transaction,
2626 transaction,
2620 alwayscache=False,
2627 alwayscache=False,
2621 addrevisioncb=None,
2628 addrevisioncb=None,
2622 duplicaterevisioncb=None,
2629 duplicaterevisioncb=None,
2623 ):
2630 ):
2624 """
2631 """
2625 add a delta group
2632 add a delta group
2626
2633
2627 given a set of deltas, add them to the revision log. the
2634 given a set of deltas, add them to the revision log. the
2628 first delta is against its parent, which should be in our
2635 first delta is against its parent, which should be in our
2629 log, the rest are against the previous delta.
2636 log, the rest are against the previous delta.
2630
2637
2631 If ``addrevisioncb`` is defined, it will be called with arguments of
2638 If ``addrevisioncb`` is defined, it will be called with arguments of
2632 this revlog and the node that was added.
2639 this revlog and the node that was added.
2633 """
2640 """
2634
2641
2635 if self._adding_group:
2642 if self._adding_group:
2636 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2643 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2637
2644
2638 self._adding_group = True
2645 self._adding_group = True
2639 empty = True
2646 empty = True
2640 try:
2647 try:
2641 with self._writing(transaction):
2648 with self._writing(transaction):
2642 write_debug = None
2649 write_debug = None
2643 if self._debug_delta:
2650 if self._debug_delta:
2644 write_debug = transaction._report
2651 write_debug = transaction._report
2645 deltacomputer = deltautil.deltacomputer(
2652 deltacomputer = deltautil.deltacomputer(
2646 self,
2653 self,
2647 write_debug=write_debug,
2654 write_debug=write_debug,
2648 )
2655 )
2649 # loop through our set of deltas
2656 # loop through our set of deltas
2650 for data in deltas:
2657 for data in deltas:
2651 (
2658 (
2652 node,
2659 node,
2653 p1,
2660 p1,
2654 p2,
2661 p2,
2655 linknode,
2662 linknode,
2656 deltabase,
2663 deltabase,
2657 delta,
2664 delta,
2658 flags,
2665 flags,
2659 sidedata,
2666 sidedata,
2660 ) = data
2667 ) = data
2661 link = linkmapper(linknode)
2668 link = linkmapper(linknode)
2662 flags = flags or REVIDX_DEFAULT_FLAGS
2669 flags = flags or REVIDX_DEFAULT_FLAGS
2663
2670
2664 rev = self.index.get_rev(node)
2671 rev = self.index.get_rev(node)
2665 if rev is not None:
2672 if rev is not None:
2666 # this can happen if two branches make the same change
2673 # this can happen if two branches make the same change
2667 self._nodeduplicatecallback(transaction, rev)
2674 self._nodeduplicatecallback(transaction, rev)
2668 if duplicaterevisioncb:
2675 if duplicaterevisioncb:
2669 duplicaterevisioncb(self, rev)
2676 duplicaterevisioncb(self, rev)
2670 empty = False
2677 empty = False
2671 continue
2678 continue
2672
2679
2673 for p in (p1, p2):
2680 for p in (p1, p2):
2674 if not self.index.has_node(p):
2681 if not self.index.has_node(p):
2675 raise error.LookupError(
2682 raise error.LookupError(
2676 p, self.radix, _(b'unknown parent')
2683 p, self.radix, _(b'unknown parent')
2677 )
2684 )
2678
2685
2679 if not self.index.has_node(deltabase):
2686 if not self.index.has_node(deltabase):
2680 raise error.LookupError(
2687 raise error.LookupError(
2681 deltabase, self.display_id, _(b'unknown delta base')
2688 deltabase, self.display_id, _(b'unknown delta base')
2682 )
2689 )
2683
2690
2684 baserev = self.rev(deltabase)
2691 baserev = self.rev(deltabase)
2685
2692
2686 if baserev != nullrev and self.iscensored(baserev):
2693 if baserev != nullrev and self.iscensored(baserev):
2687 # if base is censored, delta must be full replacement in a
2694 # if base is censored, delta must be full replacement in a
2688 # single patch operation
2695 # single patch operation
2689 hlen = struct.calcsize(b">lll")
2696 hlen = struct.calcsize(b">lll")
2690 oldlen = self.rawsize(baserev)
2697 oldlen = self.rawsize(baserev)
2691 newlen = len(delta) - hlen
2698 newlen = len(delta) - hlen
2692 if delta[:hlen] != mdiff.replacediffheader(
2699 if delta[:hlen] != mdiff.replacediffheader(
2693 oldlen, newlen
2700 oldlen, newlen
2694 ):
2701 ):
2695 raise error.CensoredBaseError(
2702 raise error.CensoredBaseError(
2696 self.display_id, self.node(baserev)
2703 self.display_id, self.node(baserev)
2697 )
2704 )
2698
2705
2699 if not flags and self._peek_iscensored(baserev, delta):
2706 if not flags and self._peek_iscensored(baserev, delta):
2700 flags |= REVIDX_ISCENSORED
2707 flags |= REVIDX_ISCENSORED
2701
2708
2702 # We assume consumers of addrevisioncb will want to retrieve
2709 # We assume consumers of addrevisioncb will want to retrieve
2703 # the added revision, which will require a call to
2710 # the added revision, which will require a call to
2704 # revision(). revision() will fast path if there is a cache
2711 # revision(). revision() will fast path if there is a cache
2705 # hit. So, we tell _addrevision() to always cache in this case.
2712 # hit. So, we tell _addrevision() to always cache in this case.
2706 # We're only using addgroup() in the context of changegroup
2713 # We're only using addgroup() in the context of changegroup
2707 # generation so the revision data can always be handled as raw
2714 # generation so the revision data can always be handled as raw
2708 # by the flagprocessor.
2715 # by the flagprocessor.
2709 rev = self._addrevision(
2716 rev = self._addrevision(
2710 node,
2717 node,
2711 None,
2718 None,
2712 transaction,
2719 transaction,
2713 link,
2720 link,
2714 p1,
2721 p1,
2715 p2,
2722 p2,
2716 flags,
2723 flags,
2717 (baserev, delta),
2724 (baserev, delta),
2718 alwayscache=alwayscache,
2725 alwayscache=alwayscache,
2719 deltacomputer=deltacomputer,
2726 deltacomputer=deltacomputer,
2720 sidedata=sidedata,
2727 sidedata=sidedata,
2721 )
2728 )
2722
2729
2723 if addrevisioncb:
2730 if addrevisioncb:
2724 addrevisioncb(self, rev)
2731 addrevisioncb(self, rev)
2725 empty = False
2732 empty = False
2726 finally:
2733 finally:
2727 self._adding_group = False
2734 self._adding_group = False
2728 return not empty
2735 return not empty
2729
2736
2730 def iscensored(self, rev):
2737 def iscensored(self, rev):
2731 """Check if a file revision is censored."""
2738 """Check if a file revision is censored."""
2732 if not self._censorable:
2739 if not self._censorable:
2733 return False
2740 return False
2734
2741
2735 return self.flags(rev) & REVIDX_ISCENSORED
2742 return self.flags(rev) & REVIDX_ISCENSORED
2736
2743
2737 def _peek_iscensored(self, baserev, delta):
2744 def _peek_iscensored(self, baserev, delta):
2738 """Quickly check if a delta produces a censored revision."""
2745 """Quickly check if a delta produces a censored revision."""
2739 if not self._censorable:
2746 if not self._censorable:
2740 return False
2747 return False
2741
2748
2742 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2749 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2743
2750
2744 def getstrippoint(self, minlink):
2751 def getstrippoint(self, minlink):
2745 """find the minimum rev that must be stripped to strip the linkrev
2752 """find the minimum rev that must be stripped to strip the linkrev
2746
2753
2747 Returns a tuple containing the minimum rev and a set of all revs that
2754 Returns a tuple containing the minimum rev and a set of all revs that
2748 have linkrevs that will be broken by this strip.
2755 have linkrevs that will be broken by this strip.
2749 """
2756 """
2750 return storageutil.resolvestripinfo(
2757 return storageutil.resolvestripinfo(
2751 minlink,
2758 minlink,
2752 len(self) - 1,
2759 len(self) - 1,
2753 self.headrevs(),
2760 self.headrevs(),
2754 self.linkrev,
2761 self.linkrev,
2755 self.parentrevs,
2762 self.parentrevs,
2756 )
2763 )
2757
2764
2758 def strip(self, minlink, transaction):
2765 def strip(self, minlink, transaction):
2759 """truncate the revlog on the first revision with a linkrev >= minlink
2766 """truncate the revlog on the first revision with a linkrev >= minlink
2760
2767
2761 This function is called when we're stripping revision minlink and
2768 This function is called when we're stripping revision minlink and
2762 its descendants from the repository.
2769 its descendants from the repository.
2763
2770
2764 We have to remove all revisions with linkrev >= minlink, because
2771 We have to remove all revisions with linkrev >= minlink, because
2765 the equivalent changelog revisions will be renumbered after the
2772 the equivalent changelog revisions will be renumbered after the
2766 strip.
2773 strip.
2767
2774
2768 So we truncate the revlog on the first of these revisions, and
2775 So we truncate the revlog on the first of these revisions, and
2769 trust that the caller has saved the revisions that shouldn't be
2776 trust that the caller has saved the revisions that shouldn't be
2770 removed and that it'll re-add them after this truncation.
2777 removed and that it'll re-add them after this truncation.
2771 """
2778 """
2772 if len(self) == 0:
2779 if len(self) == 0:
2773 return
2780 return
2774
2781
2775 rev, _ = self.getstrippoint(minlink)
2782 rev, _ = self.getstrippoint(minlink)
2776 if rev == len(self):
2783 if rev == len(self):
2777 return
2784 return
2778
2785
2779 # first truncate the files on disk
2786 # first truncate the files on disk
2780 data_end = self.start(rev)
2787 data_end = self.start(rev)
2781 if not self._inline:
2788 if not self._inline:
2782 transaction.add(self._datafile, data_end)
2789 transaction.add(self._datafile, data_end)
2783 end = rev * self.index.entry_size
2790 end = rev * self.index.entry_size
2784 else:
2791 else:
2785 end = data_end + (rev * self.index.entry_size)
2792 end = data_end + (rev * self.index.entry_size)
2786
2793
2787 if self._sidedatafile:
2794 if self._sidedatafile:
2788 sidedata_end = self.sidedata_cut_off(rev)
2795 sidedata_end = self.sidedata_cut_off(rev)
2789 transaction.add(self._sidedatafile, sidedata_end)
2796 transaction.add(self._sidedatafile, sidedata_end)
2790
2797
2791 transaction.add(self._indexfile, end)
2798 transaction.add(self._indexfile, end)
2792 if self._docket is not None:
2799 if self._docket is not None:
2793 # XXX we could, leverage the docket while stripping. However it is
2800 # XXX we could, leverage the docket while stripping. However it is
2794 # not powerfull enough at the time of this comment
2801 # not powerfull enough at the time of this comment
2795 self._docket.index_end = end
2802 self._docket.index_end = end
2796 self._docket.data_end = data_end
2803 self._docket.data_end = data_end
2797 self._docket.sidedata_end = sidedata_end
2804 self._docket.sidedata_end = sidedata_end
2798 self._docket.write(transaction, stripping=True)
2805 self._docket.write(transaction, stripping=True)
2799
2806
2800 # then reset internal state in memory to forget those revisions
2807 # then reset internal state in memory to forget those revisions
2801 self._revisioncache = None
2808 self._revisioncache = None
2802 self._chaininfocache = util.lrucachedict(500)
2809 self._chaininfocache = util.lrucachedict(500)
2803 self._segmentfile.clear_cache()
2810 self._segmentfile.clear_cache()
2804 self._segmentfile_sidedata.clear_cache()
2811 self._segmentfile_sidedata.clear_cache()
2805
2812
2806 del self.index[rev:-1]
2813 del self.index[rev:-1]
2807
2814
2808 def checksize(self):
2815 def checksize(self):
2809 """Check size of index and data files
2816 """Check size of index and data files
2810
2817
2811 return a (dd, di) tuple.
2818 return a (dd, di) tuple.
2812 - dd: extra bytes for the "data" file
2819 - dd: extra bytes for the "data" file
2813 - di: extra bytes for the "index" file
2820 - di: extra bytes for the "index" file
2814
2821
2815 A healthy revlog will return (0, 0).
2822 A healthy revlog will return (0, 0).
2816 """
2823 """
2817 expected = 0
2824 expected = 0
2818 if len(self):
2825 if len(self):
2819 expected = max(0, self.end(len(self) - 1))
2826 expected = max(0, self.end(len(self) - 1))
2820
2827
2821 try:
2828 try:
2822 with self._datafp() as f:
2829 with self._datafp() as f:
2823 f.seek(0, io.SEEK_END)
2830 f.seek(0, io.SEEK_END)
2824 actual = f.tell()
2831 actual = f.tell()
2825 dd = actual - expected
2832 dd = actual - expected
2826 except FileNotFoundError:
2833 except FileNotFoundError:
2827 dd = 0
2834 dd = 0
2828
2835
2829 try:
2836 try:
2830 f = self.opener(self._indexfile)
2837 f = self.opener(self._indexfile)
2831 f.seek(0, io.SEEK_END)
2838 f.seek(0, io.SEEK_END)
2832 actual = f.tell()
2839 actual = f.tell()
2833 f.close()
2840 f.close()
2834 s = self.index.entry_size
2841 s = self.index.entry_size
2835 i = max(0, actual // s)
2842 i = max(0, actual // s)
2836 di = actual - (i * s)
2843 di = actual - (i * s)
2837 if self._inline:
2844 if self._inline:
2838 databytes = 0
2845 databytes = 0
2839 for r in self:
2846 for r in self:
2840 databytes += max(0, self.length(r))
2847 databytes += max(0, self.length(r))
2841 dd = 0
2848 dd = 0
2842 di = actual - len(self) * s - databytes
2849 di = actual - len(self) * s - databytes
2843 except FileNotFoundError:
2850 except FileNotFoundError:
2844 di = 0
2851 di = 0
2845
2852
2846 return (dd, di)
2853 return (dd, di)
2847
2854
2848 def files(self):
2855 def files(self):
2849 res = [self._indexfile]
2856 res = [self._indexfile]
2850 if self._docket_file is None:
2857 if self._docket_file is None:
2851 if not self._inline:
2858 if not self._inline:
2852 res.append(self._datafile)
2859 res.append(self._datafile)
2853 else:
2860 else:
2854 res.append(self._docket_file)
2861 res.append(self._docket_file)
2855 res.extend(self._docket.old_index_filepaths(include_empty=False))
2862 res.extend(self._docket.old_index_filepaths(include_empty=False))
2856 if self._docket.data_end:
2863 if self._docket.data_end:
2857 res.append(self._datafile)
2864 res.append(self._datafile)
2858 res.extend(self._docket.old_data_filepaths(include_empty=False))
2865 res.extend(self._docket.old_data_filepaths(include_empty=False))
2859 if self._docket.sidedata_end:
2866 if self._docket.sidedata_end:
2860 res.append(self._sidedatafile)
2867 res.append(self._sidedatafile)
2861 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2868 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2862 return res
2869 return res
2863
2870
2864 def emitrevisions(
2871 def emitrevisions(
2865 self,
2872 self,
2866 nodes,
2873 nodes,
2867 nodesorder=None,
2874 nodesorder=None,
2868 revisiondata=False,
2875 revisiondata=False,
2869 assumehaveparentrevisions=False,
2876 assumehaveparentrevisions=False,
2870 deltamode=repository.CG_DELTAMODE_STD,
2877 deltamode=repository.CG_DELTAMODE_STD,
2871 sidedata_helpers=None,
2878 sidedata_helpers=None,
2872 ):
2879 ):
2873 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2880 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2874 raise error.ProgrammingError(
2881 raise error.ProgrammingError(
2875 b'unhandled value for nodesorder: %s' % nodesorder
2882 b'unhandled value for nodesorder: %s' % nodesorder
2876 )
2883 )
2877
2884
2878 if nodesorder is None and not self._generaldelta:
2885 if nodesorder is None and not self._generaldelta:
2879 nodesorder = b'storage'
2886 nodesorder = b'storage'
2880
2887
2881 if (
2888 if (
2882 not self._storedeltachains
2889 not self._storedeltachains
2883 and deltamode != repository.CG_DELTAMODE_PREV
2890 and deltamode != repository.CG_DELTAMODE_PREV
2884 ):
2891 ):
2885 deltamode = repository.CG_DELTAMODE_FULL
2892 deltamode = repository.CG_DELTAMODE_FULL
2886
2893
2887 return storageutil.emitrevisions(
2894 return storageutil.emitrevisions(
2888 self,
2895 self,
2889 nodes,
2896 nodes,
2890 nodesorder,
2897 nodesorder,
2891 revlogrevisiondelta,
2898 revlogrevisiondelta,
2892 deltaparentfn=self.deltaparent,
2899 deltaparentfn=self.deltaparent,
2893 candeltafn=self.candelta,
2900 candeltafn=self.candelta,
2894 rawsizefn=self.rawsize,
2901 rawsizefn=self.rawsize,
2895 revdifffn=self.revdiff,
2902 revdifffn=self.revdiff,
2896 flagsfn=self.flags,
2903 flagsfn=self.flags,
2897 deltamode=deltamode,
2904 deltamode=deltamode,
2898 revisiondata=revisiondata,
2905 revisiondata=revisiondata,
2899 assumehaveparentrevisions=assumehaveparentrevisions,
2906 assumehaveparentrevisions=assumehaveparentrevisions,
2900 sidedata_helpers=sidedata_helpers,
2907 sidedata_helpers=sidedata_helpers,
2901 )
2908 )
2902
2909
2903 DELTAREUSEALWAYS = b'always'
2910 DELTAREUSEALWAYS = b'always'
2904 DELTAREUSESAMEREVS = b'samerevs'
2911 DELTAREUSESAMEREVS = b'samerevs'
2905 DELTAREUSENEVER = b'never'
2912 DELTAREUSENEVER = b'never'
2906
2913
2907 DELTAREUSEFULLADD = b'fulladd'
2914 DELTAREUSEFULLADD = b'fulladd'
2908
2915
2909 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2916 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2910
2917
2911 def clone(
2918 def clone(
2912 self,
2919 self,
2913 tr,
2920 tr,
2914 destrevlog,
2921 destrevlog,
2915 addrevisioncb=None,
2922 addrevisioncb=None,
2916 deltareuse=DELTAREUSESAMEREVS,
2923 deltareuse=DELTAREUSESAMEREVS,
2917 forcedeltabothparents=None,
2924 forcedeltabothparents=None,
2918 sidedata_helpers=None,
2925 sidedata_helpers=None,
2919 ):
2926 ):
2920 """Copy this revlog to another, possibly with format changes.
2927 """Copy this revlog to another, possibly with format changes.
2921
2928
2922 The destination revlog will contain the same revisions and nodes.
2929 The destination revlog will contain the same revisions and nodes.
2923 However, it may not be bit-for-bit identical due to e.g. delta encoding
2930 However, it may not be bit-for-bit identical due to e.g. delta encoding
2924 differences.
2931 differences.
2925
2932
2926 The ``deltareuse`` argument control how deltas from the existing revlog
2933 The ``deltareuse`` argument control how deltas from the existing revlog
2927 are preserved in the destination revlog. The argument can have the
2934 are preserved in the destination revlog. The argument can have the
2928 following values:
2935 following values:
2929
2936
2930 DELTAREUSEALWAYS
2937 DELTAREUSEALWAYS
2931 Deltas will always be reused (if possible), even if the destination
2938 Deltas will always be reused (if possible), even if the destination
2932 revlog would not select the same revisions for the delta. This is the
2939 revlog would not select the same revisions for the delta. This is the
2933 fastest mode of operation.
2940 fastest mode of operation.
2934 DELTAREUSESAMEREVS
2941 DELTAREUSESAMEREVS
2935 Deltas will be reused if the destination revlog would pick the same
2942 Deltas will be reused if the destination revlog would pick the same
2936 revisions for the delta. This mode strikes a balance between speed
2943 revisions for the delta. This mode strikes a balance between speed
2937 and optimization.
2944 and optimization.
2938 DELTAREUSENEVER
2945 DELTAREUSENEVER
2939 Deltas will never be reused. This is the slowest mode of execution.
2946 Deltas will never be reused. This is the slowest mode of execution.
2940 This mode can be used to recompute deltas (e.g. if the diff/delta
2947 This mode can be used to recompute deltas (e.g. if the diff/delta
2941 algorithm changes).
2948 algorithm changes).
2942 DELTAREUSEFULLADD
2949 DELTAREUSEFULLADD
2943 Revision will be re-added as if their were new content. This is
2950 Revision will be re-added as if their were new content. This is
2944 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2951 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2945 eg: large file detection and handling.
2952 eg: large file detection and handling.
2946
2953
2947 Delta computation can be slow, so the choice of delta reuse policy can
2954 Delta computation can be slow, so the choice of delta reuse policy can
2948 significantly affect run time.
2955 significantly affect run time.
2949
2956
2950 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2957 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2951 two extremes. Deltas will be reused if they are appropriate. But if the
2958 two extremes. Deltas will be reused if they are appropriate. But if the
2952 delta could choose a better revision, it will do so. This means if you
2959 delta could choose a better revision, it will do so. This means if you
2953 are converting a non-generaldelta revlog to a generaldelta revlog,
2960 are converting a non-generaldelta revlog to a generaldelta revlog,
2954 deltas will be recomputed if the delta's parent isn't a parent of the
2961 deltas will be recomputed if the delta's parent isn't a parent of the
2955 revision.
2962 revision.
2956
2963
2957 In addition to the delta policy, the ``forcedeltabothparents``
2964 In addition to the delta policy, the ``forcedeltabothparents``
2958 argument controls whether to force compute deltas against both parents
2965 argument controls whether to force compute deltas against both parents
2959 for merges. By default, the current default is used.
2966 for merges. By default, the current default is used.
2960
2967
2961 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2968 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2962 `sidedata_helpers`.
2969 `sidedata_helpers`.
2963 """
2970 """
2964 if deltareuse not in self.DELTAREUSEALL:
2971 if deltareuse not in self.DELTAREUSEALL:
2965 raise ValueError(
2972 raise ValueError(
2966 _(b'value for deltareuse invalid: %s') % deltareuse
2973 _(b'value for deltareuse invalid: %s') % deltareuse
2967 )
2974 )
2968
2975
2969 if len(destrevlog):
2976 if len(destrevlog):
2970 raise ValueError(_(b'destination revlog is not empty'))
2977 raise ValueError(_(b'destination revlog is not empty'))
2971
2978
2972 if getattr(self, 'filteredrevs', None):
2979 if getattr(self, 'filteredrevs', None):
2973 raise ValueError(_(b'source revlog has filtered revisions'))
2980 raise ValueError(_(b'source revlog has filtered revisions'))
2974 if getattr(destrevlog, 'filteredrevs', None):
2981 if getattr(destrevlog, 'filteredrevs', None):
2975 raise ValueError(_(b'destination revlog has filtered revisions'))
2982 raise ValueError(_(b'destination revlog has filtered revisions'))
2976
2983
2977 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2984 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2978 # if possible.
2985 # if possible.
2979 oldlazydelta = destrevlog._lazydelta
2986 oldlazydelta = destrevlog._lazydelta
2980 oldlazydeltabase = destrevlog._lazydeltabase
2987 oldlazydeltabase = destrevlog._lazydeltabase
2981 oldamd = destrevlog._deltabothparents
2988 oldamd = destrevlog._deltabothparents
2982
2989
2983 try:
2990 try:
2984 if deltareuse == self.DELTAREUSEALWAYS:
2991 if deltareuse == self.DELTAREUSEALWAYS:
2985 destrevlog._lazydeltabase = True
2992 destrevlog._lazydeltabase = True
2986 destrevlog._lazydelta = True
2993 destrevlog._lazydelta = True
2987 elif deltareuse == self.DELTAREUSESAMEREVS:
2994 elif deltareuse == self.DELTAREUSESAMEREVS:
2988 destrevlog._lazydeltabase = False
2995 destrevlog._lazydeltabase = False
2989 destrevlog._lazydelta = True
2996 destrevlog._lazydelta = True
2990 elif deltareuse == self.DELTAREUSENEVER:
2997 elif deltareuse == self.DELTAREUSENEVER:
2991 destrevlog._lazydeltabase = False
2998 destrevlog._lazydeltabase = False
2992 destrevlog._lazydelta = False
2999 destrevlog._lazydelta = False
2993
3000
2994 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3001 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2995
3002
2996 self._clone(
3003 self._clone(
2997 tr,
3004 tr,
2998 destrevlog,
3005 destrevlog,
2999 addrevisioncb,
3006 addrevisioncb,
3000 deltareuse,
3007 deltareuse,
3001 forcedeltabothparents,
3008 forcedeltabothparents,
3002 sidedata_helpers,
3009 sidedata_helpers,
3003 )
3010 )
3004
3011
3005 finally:
3012 finally:
3006 destrevlog._lazydelta = oldlazydelta
3013 destrevlog._lazydelta = oldlazydelta
3007 destrevlog._lazydeltabase = oldlazydeltabase
3014 destrevlog._lazydeltabase = oldlazydeltabase
3008 destrevlog._deltabothparents = oldamd
3015 destrevlog._deltabothparents = oldamd
3009
3016
3010 def _clone(
3017 def _clone(
3011 self,
3018 self,
3012 tr,
3019 tr,
3013 destrevlog,
3020 destrevlog,
3014 addrevisioncb,
3021 addrevisioncb,
3015 deltareuse,
3022 deltareuse,
3016 forcedeltabothparents,
3023 forcedeltabothparents,
3017 sidedata_helpers,
3024 sidedata_helpers,
3018 ):
3025 ):
3019 """perform the core duty of `revlog.clone` after parameter processing"""
3026 """perform the core duty of `revlog.clone` after parameter processing"""
3020 write_debug = None
3027 write_debug = None
3021 if self._debug_delta:
3028 if self._debug_delta:
3022 write_debug = tr._report
3029 write_debug = tr._report
3023 deltacomputer = deltautil.deltacomputer(
3030 deltacomputer = deltautil.deltacomputer(
3024 destrevlog,
3031 destrevlog,
3025 write_debug=write_debug,
3032 write_debug=write_debug,
3026 )
3033 )
3027 index = self.index
3034 index = self.index
3028 for rev in self:
3035 for rev in self:
3029 entry = index[rev]
3036 entry = index[rev]
3030
3037
3031 # Some classes override linkrev to take filtered revs into
3038 # Some classes override linkrev to take filtered revs into
3032 # account. Use raw entry from index.
3039 # account. Use raw entry from index.
3033 flags = entry[0] & 0xFFFF
3040 flags = entry[0] & 0xFFFF
3034 linkrev = entry[4]
3041 linkrev = entry[4]
3035 p1 = index[entry[5]][7]
3042 p1 = index[entry[5]][7]
3036 p2 = index[entry[6]][7]
3043 p2 = index[entry[6]][7]
3037 node = entry[7]
3044 node = entry[7]
3038
3045
3039 # (Possibly) reuse the delta from the revlog if allowed and
3046 # (Possibly) reuse the delta from the revlog if allowed and
3040 # the revlog chunk is a delta.
3047 # the revlog chunk is a delta.
3041 cachedelta = None
3048 cachedelta = None
3042 rawtext = None
3049 rawtext = None
3043 if deltareuse == self.DELTAREUSEFULLADD:
3050 if deltareuse == self.DELTAREUSEFULLADD:
3044 text = self._revisiondata(rev)
3051 text = self._revisiondata(rev)
3045 sidedata = self.sidedata(rev)
3052 sidedata = self.sidedata(rev)
3046
3053
3047 if sidedata_helpers is not None:
3054 if sidedata_helpers is not None:
3048 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3055 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3049 self, sidedata_helpers, sidedata, rev
3056 self, sidedata_helpers, sidedata, rev
3050 )
3057 )
3051 flags = flags | new_flags[0] & ~new_flags[1]
3058 flags = flags | new_flags[0] & ~new_flags[1]
3052
3059
3053 destrevlog.addrevision(
3060 destrevlog.addrevision(
3054 text,
3061 text,
3055 tr,
3062 tr,
3056 linkrev,
3063 linkrev,
3057 p1,
3064 p1,
3058 p2,
3065 p2,
3059 cachedelta=cachedelta,
3066 cachedelta=cachedelta,
3060 node=node,
3067 node=node,
3061 flags=flags,
3068 flags=flags,
3062 deltacomputer=deltacomputer,
3069 deltacomputer=deltacomputer,
3063 sidedata=sidedata,
3070 sidedata=sidedata,
3064 )
3071 )
3065 else:
3072 else:
3066 if destrevlog._lazydelta:
3073 if destrevlog._lazydelta:
3067 dp = self.deltaparent(rev)
3074 dp = self.deltaparent(rev)
3068 if dp != nullrev:
3075 if dp != nullrev:
3069 cachedelta = (dp, bytes(self._chunk(rev)))
3076 cachedelta = (dp, bytes(self._chunk(rev)))
3070
3077
3071 sidedata = None
3078 sidedata = None
3072 if not cachedelta:
3079 if not cachedelta:
3073 rawtext = self._revisiondata(rev)
3080 rawtext = self._revisiondata(rev)
3074 sidedata = self.sidedata(rev)
3081 sidedata = self.sidedata(rev)
3075 if sidedata is None:
3082 if sidedata is None:
3076 sidedata = self.sidedata(rev)
3083 sidedata = self.sidedata(rev)
3077
3084
3078 if sidedata_helpers is not None:
3085 if sidedata_helpers is not None:
3079 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3086 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3080 self, sidedata_helpers, sidedata, rev
3087 self, sidedata_helpers, sidedata, rev
3081 )
3088 )
3082 flags = flags | new_flags[0] & ~new_flags[1]
3089 flags = flags | new_flags[0] & ~new_flags[1]
3083
3090
3084 with destrevlog._writing(tr):
3091 with destrevlog._writing(tr):
3085 destrevlog._addrevision(
3092 destrevlog._addrevision(
3086 node,
3093 node,
3087 rawtext,
3094 rawtext,
3088 tr,
3095 tr,
3089 linkrev,
3096 linkrev,
3090 p1,
3097 p1,
3091 p2,
3098 p2,
3092 flags,
3099 flags,
3093 cachedelta,
3100 cachedelta,
3094 deltacomputer=deltacomputer,
3101 deltacomputer=deltacomputer,
3095 sidedata=sidedata,
3102 sidedata=sidedata,
3096 )
3103 )
3097
3104
3098 if addrevisioncb:
3105 if addrevisioncb:
3099 addrevisioncb(self, rev, node)
3106 addrevisioncb(self, rev, node)
3100
3107
3101 def censorrevision(self, tr, censornode, tombstone=b''):
3108 def censorrevision(self, tr, censornode, tombstone=b''):
3102 if self._format_version == REVLOGV0:
3109 if self._format_version == REVLOGV0:
3103 raise error.RevlogError(
3110 raise error.RevlogError(
3104 _(b'cannot censor with version %d revlogs')
3111 _(b'cannot censor with version %d revlogs')
3105 % self._format_version
3112 % self._format_version
3106 )
3113 )
3107 elif self._format_version == REVLOGV1:
3114 elif self._format_version == REVLOGV1:
3108 rewrite.v1_censor(self, tr, censornode, tombstone)
3115 rewrite.v1_censor(self, tr, censornode, tombstone)
3109 else:
3116 else:
3110 rewrite.v2_censor(self, tr, censornode, tombstone)
3117 rewrite.v2_censor(self, tr, censornode, tombstone)
3111
3118
3112 def verifyintegrity(self, state):
3119 def verifyintegrity(self, state):
3113 """Verifies the integrity of the revlog.
3120 """Verifies the integrity of the revlog.
3114
3121
3115 Yields ``revlogproblem`` instances describing problems that are
3122 Yields ``revlogproblem`` instances describing problems that are
3116 found.
3123 found.
3117 """
3124 """
3118 dd, di = self.checksize()
3125 dd, di = self.checksize()
3119 if dd:
3126 if dd:
3120 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3127 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3121 if di:
3128 if di:
3122 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3129 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3123
3130
3124 version = self._format_version
3131 version = self._format_version
3125
3132
3126 # The verifier tells us what version revlog we should be.
3133 # The verifier tells us what version revlog we should be.
3127 if version != state[b'expectedversion']:
3134 if version != state[b'expectedversion']:
3128 yield revlogproblem(
3135 yield revlogproblem(
3129 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3136 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3130 % (self.display_id, version, state[b'expectedversion'])
3137 % (self.display_id, version, state[b'expectedversion'])
3131 )
3138 )
3132
3139
3133 state[b'skipread'] = set()
3140 state[b'skipread'] = set()
3134 state[b'safe_renamed'] = set()
3141 state[b'safe_renamed'] = set()
3135
3142
3136 for rev in self:
3143 for rev in self:
3137 node = self.node(rev)
3144 node = self.node(rev)
3138
3145
3139 # Verify contents. 4 cases to care about:
3146 # Verify contents. 4 cases to care about:
3140 #
3147 #
3141 # common: the most common case
3148 # common: the most common case
3142 # rename: with a rename
3149 # rename: with a rename
3143 # meta: file content starts with b'\1\n', the metadata
3150 # meta: file content starts with b'\1\n', the metadata
3144 # header defined in filelog.py, but without a rename
3151 # header defined in filelog.py, but without a rename
3145 # ext: content stored externally
3152 # ext: content stored externally
3146 #
3153 #
3147 # More formally, their differences are shown below:
3154 # More formally, their differences are shown below:
3148 #
3155 #
3149 # | common | rename | meta | ext
3156 # | common | rename | meta | ext
3150 # -------------------------------------------------------
3157 # -------------------------------------------------------
3151 # flags() | 0 | 0 | 0 | not 0
3158 # flags() | 0 | 0 | 0 | not 0
3152 # renamed() | False | True | False | ?
3159 # renamed() | False | True | False | ?
3153 # rawtext[0:2]=='\1\n'| False | True | True | ?
3160 # rawtext[0:2]=='\1\n'| False | True | True | ?
3154 #
3161 #
3155 # "rawtext" means the raw text stored in revlog data, which
3162 # "rawtext" means the raw text stored in revlog data, which
3156 # could be retrieved by "rawdata(rev)". "text"
3163 # could be retrieved by "rawdata(rev)". "text"
3157 # mentioned below is "revision(rev)".
3164 # mentioned below is "revision(rev)".
3158 #
3165 #
3159 # There are 3 different lengths stored physically:
3166 # There are 3 different lengths stored physically:
3160 # 1. L1: rawsize, stored in revlog index
3167 # 1. L1: rawsize, stored in revlog index
3161 # 2. L2: len(rawtext), stored in revlog data
3168 # 2. L2: len(rawtext), stored in revlog data
3162 # 3. L3: len(text), stored in revlog data if flags==0, or
3169 # 3. L3: len(text), stored in revlog data if flags==0, or
3163 # possibly somewhere else if flags!=0
3170 # possibly somewhere else if flags!=0
3164 #
3171 #
3165 # L1 should be equal to L2. L3 could be different from them.
3172 # L1 should be equal to L2. L3 could be different from them.
3166 # "text" may or may not affect commit hash depending on flag
3173 # "text" may or may not affect commit hash depending on flag
3167 # processors (see flagutil.addflagprocessor).
3174 # processors (see flagutil.addflagprocessor).
3168 #
3175 #
3169 # | common | rename | meta | ext
3176 # | common | rename | meta | ext
3170 # -------------------------------------------------
3177 # -------------------------------------------------
3171 # rawsize() | L1 | L1 | L1 | L1
3178 # rawsize() | L1 | L1 | L1 | L1
3172 # size() | L1 | L2-LM | L1(*) | L1 (?)
3179 # size() | L1 | L2-LM | L1(*) | L1 (?)
3173 # len(rawtext) | L2 | L2 | L2 | L2
3180 # len(rawtext) | L2 | L2 | L2 | L2
3174 # len(text) | L2 | L2 | L2 | L3
3181 # len(text) | L2 | L2 | L2 | L3
3175 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3182 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3176 #
3183 #
3177 # LM: length of metadata, depending on rawtext
3184 # LM: length of metadata, depending on rawtext
3178 # (*): not ideal, see comment in filelog.size
3185 # (*): not ideal, see comment in filelog.size
3179 # (?): could be "- len(meta)" if the resolved content has
3186 # (?): could be "- len(meta)" if the resolved content has
3180 # rename metadata
3187 # rename metadata
3181 #
3188 #
3182 # Checks needed to be done:
3189 # Checks needed to be done:
3183 # 1. length check: L1 == L2, in all cases.
3190 # 1. length check: L1 == L2, in all cases.
3184 # 2. hash check: depending on flag processor, we may need to
3191 # 2. hash check: depending on flag processor, we may need to
3185 # use either "text" (external), or "rawtext" (in revlog).
3192 # use either "text" (external), or "rawtext" (in revlog).
3186
3193
3187 try:
3194 try:
3188 skipflags = state.get(b'skipflags', 0)
3195 skipflags = state.get(b'skipflags', 0)
3189 if skipflags:
3196 if skipflags:
3190 skipflags &= self.flags(rev)
3197 skipflags &= self.flags(rev)
3191
3198
3192 _verify_revision(self, skipflags, state, node)
3199 _verify_revision(self, skipflags, state, node)
3193
3200
3194 l1 = self.rawsize(rev)
3201 l1 = self.rawsize(rev)
3195 l2 = len(self.rawdata(node))
3202 l2 = len(self.rawdata(node))
3196
3203
3197 if l1 != l2:
3204 if l1 != l2:
3198 yield revlogproblem(
3205 yield revlogproblem(
3199 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3206 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3200 node=node,
3207 node=node,
3201 )
3208 )
3202
3209
3203 except error.CensoredNodeError:
3210 except error.CensoredNodeError:
3204 if state[b'erroroncensored']:
3211 if state[b'erroroncensored']:
3205 yield revlogproblem(
3212 yield revlogproblem(
3206 error=_(b'censored file data'), node=node
3213 error=_(b'censored file data'), node=node
3207 )
3214 )
3208 state[b'skipread'].add(node)
3215 state[b'skipread'].add(node)
3209 except Exception as e:
3216 except Exception as e:
3210 yield revlogproblem(
3217 yield revlogproblem(
3211 error=_(b'unpacking %s: %s')
3218 error=_(b'unpacking %s: %s')
3212 % (short(node), stringutil.forcebytestr(e)),
3219 % (short(node), stringutil.forcebytestr(e)),
3213 node=node,
3220 node=node,
3214 )
3221 )
3215 state[b'skipread'].add(node)
3222 state[b'skipread'].add(node)
3216
3223
3217 def storageinfo(
3224 def storageinfo(
3218 self,
3225 self,
3219 exclusivefiles=False,
3226 exclusivefiles=False,
3220 sharedfiles=False,
3227 sharedfiles=False,
3221 revisionscount=False,
3228 revisionscount=False,
3222 trackedsize=False,
3229 trackedsize=False,
3223 storedsize=False,
3230 storedsize=False,
3224 ):
3231 ):
3225 d = {}
3232 d = {}
3226
3233
3227 if exclusivefiles:
3234 if exclusivefiles:
3228 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3235 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3229 if not self._inline:
3236 if not self._inline:
3230 d[b'exclusivefiles'].append((self.opener, self._datafile))
3237 d[b'exclusivefiles'].append((self.opener, self._datafile))
3231
3238
3232 if sharedfiles:
3239 if sharedfiles:
3233 d[b'sharedfiles'] = []
3240 d[b'sharedfiles'] = []
3234
3241
3235 if revisionscount:
3242 if revisionscount:
3236 d[b'revisionscount'] = len(self)
3243 d[b'revisionscount'] = len(self)
3237
3244
3238 if trackedsize:
3245 if trackedsize:
3239 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3246 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3240
3247
3241 if storedsize:
3248 if storedsize:
3242 d[b'storedsize'] = sum(
3249 d[b'storedsize'] = sum(
3243 self.opener.stat(path).st_size for path in self.files()
3250 self.opener.stat(path).st_size for path in self.files()
3244 )
3251 )
3245
3252
3246 return d
3253 return d
3247
3254
3248 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3255 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3249 if not self.hassidedata:
3256 if not self.hassidedata:
3250 return
3257 return
3251 # revlog formats with sidedata support does not support inline
3258 # revlog formats with sidedata support does not support inline
3252 assert not self._inline
3259 assert not self._inline
3253 if not helpers[1] and not helpers[2]:
3260 if not helpers[1] and not helpers[2]:
3254 # Nothing to generate or remove
3261 # Nothing to generate or remove
3255 return
3262 return
3256
3263
3257 new_entries = []
3264 new_entries = []
3258 # append the new sidedata
3265 # append the new sidedata
3259 with self._writing(transaction):
3266 with self._writing(transaction):
3260 ifh, dfh, sdfh = self._writinghandles
3267 ifh, dfh, sdfh = self._writinghandles
3261 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3268 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3262
3269
3263 current_offset = sdfh.tell()
3270 current_offset = sdfh.tell()
3264 for rev in range(startrev, endrev + 1):
3271 for rev in range(startrev, endrev + 1):
3265 entry = self.index[rev]
3272 entry = self.index[rev]
3266 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3273 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3267 store=self,
3274 store=self,
3268 sidedata_helpers=helpers,
3275 sidedata_helpers=helpers,
3269 sidedata={},
3276 sidedata={},
3270 rev=rev,
3277 rev=rev,
3271 )
3278 )
3272
3279
3273 serialized_sidedata = sidedatautil.serialize_sidedata(
3280 serialized_sidedata = sidedatautil.serialize_sidedata(
3274 new_sidedata
3281 new_sidedata
3275 )
3282 )
3276
3283
3277 sidedata_compression_mode = COMP_MODE_INLINE
3284 sidedata_compression_mode = COMP_MODE_INLINE
3278 if serialized_sidedata and self.hassidedata:
3285 if serialized_sidedata and self.hassidedata:
3279 sidedata_compression_mode = COMP_MODE_PLAIN
3286 sidedata_compression_mode = COMP_MODE_PLAIN
3280 h, comp_sidedata = self.compress(serialized_sidedata)
3287 h, comp_sidedata = self.compress(serialized_sidedata)
3281 if (
3288 if (
3282 h != b'u'
3289 h != b'u'
3283 and comp_sidedata[0] != b'\0'
3290 and comp_sidedata[0] != b'\0'
3284 and len(comp_sidedata) < len(serialized_sidedata)
3291 and len(comp_sidedata) < len(serialized_sidedata)
3285 ):
3292 ):
3286 assert not h
3293 assert not h
3287 if (
3294 if (
3288 comp_sidedata[0]
3295 comp_sidedata[0]
3289 == self._docket.default_compression_header
3296 == self._docket.default_compression_header
3290 ):
3297 ):
3291 sidedata_compression_mode = COMP_MODE_DEFAULT
3298 sidedata_compression_mode = COMP_MODE_DEFAULT
3292 serialized_sidedata = comp_sidedata
3299 serialized_sidedata = comp_sidedata
3293 else:
3300 else:
3294 sidedata_compression_mode = COMP_MODE_INLINE
3301 sidedata_compression_mode = COMP_MODE_INLINE
3295 serialized_sidedata = comp_sidedata
3302 serialized_sidedata = comp_sidedata
3296 if entry[8] != 0 or entry[9] != 0:
3303 if entry[8] != 0 or entry[9] != 0:
3297 # rewriting entries that already have sidedata is not
3304 # rewriting entries that already have sidedata is not
3298 # supported yet, because it introduces garbage data in the
3305 # supported yet, because it introduces garbage data in the
3299 # revlog.
3306 # revlog.
3300 msg = b"rewriting existing sidedata is not supported yet"
3307 msg = b"rewriting existing sidedata is not supported yet"
3301 raise error.Abort(msg)
3308 raise error.Abort(msg)
3302
3309
3303 # Apply (potential) flags to add and to remove after running
3310 # Apply (potential) flags to add and to remove after running
3304 # the sidedata helpers
3311 # the sidedata helpers
3305 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3312 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3306 entry_update = (
3313 entry_update = (
3307 current_offset,
3314 current_offset,
3308 len(serialized_sidedata),
3315 len(serialized_sidedata),
3309 new_offset_flags,
3316 new_offset_flags,
3310 sidedata_compression_mode,
3317 sidedata_compression_mode,
3311 )
3318 )
3312
3319
3313 # the sidedata computation might have move the file cursors around
3320 # the sidedata computation might have move the file cursors around
3314 sdfh.seek(current_offset, os.SEEK_SET)
3321 sdfh.seek(current_offset, os.SEEK_SET)
3315 sdfh.write(serialized_sidedata)
3322 sdfh.write(serialized_sidedata)
3316 new_entries.append(entry_update)
3323 new_entries.append(entry_update)
3317 current_offset += len(serialized_sidedata)
3324 current_offset += len(serialized_sidedata)
3318 self._docket.sidedata_end = sdfh.tell()
3325 self._docket.sidedata_end = sdfh.tell()
3319
3326
3320 # rewrite the new index entries
3327 # rewrite the new index entries
3321 ifh.seek(startrev * self.index.entry_size)
3328 ifh.seek(startrev * self.index.entry_size)
3322 for i, e in enumerate(new_entries):
3329 for i, e in enumerate(new_entries):
3323 rev = startrev + i
3330 rev = startrev + i
3324 self.index.replace_sidedata_info(rev, *e)
3331 self.index.replace_sidedata_info(rev, *e)
3325 packed = self.index.entry_binary(rev)
3332 packed = self.index.entry_binary(rev)
3326 if rev == 0 and self._docket is None:
3333 if rev == 0 and self._docket is None:
3327 header = self._format_flags | self._format_version
3334 header = self._format_flags | self._format_version
3328 header = self.index.pack_header(header)
3335 header = self.index.pack_header(header)
3329 packed = header + packed
3336 packed = header + packed
3330 ifh.write(packed)
3337 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now