##// END OF EJS Templates
revlog: allow to pass an existing docket to `_loadindex()`...
marmoute -
r48194:f7f082bc default
parent child Browse files
Show More
@@ -1,3388 +1,3394 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 revlogutils,
75 revlogutils,
76 templatefilters,
76 templatefilters,
77 util,
77 util,
78 )
78 )
79 from .interfaces import (
79 from .interfaces import (
80 repository,
80 repository,
81 util as interfaceutil,
81 util as interfaceutil,
82 )
82 )
83 from .revlogutils import (
83 from .revlogutils import (
84 censor,
84 censor,
85 deltas as deltautil,
85 deltas as deltautil,
86 docket as docketutil,
86 docket as docketutil,
87 flagutil,
87 flagutil,
88 nodemap as nodemaputil,
88 nodemap as nodemaputil,
89 revlogv0,
89 revlogv0,
90 sidedata as sidedatautil,
90 sidedata as sidedatautil,
91 )
91 )
92 from .utils import (
92 from .utils import (
93 storageutil,
93 storageutil,
94 stringutil,
94 stringutil,
95 )
95 )
96
96
97 # blanked usage of all the name to prevent pyflakes constraints
97 # blanked usage of all the name to prevent pyflakes constraints
98 # We need these name available in the module for extensions.
98 # We need these name available in the module for extensions.
99
99
100 REVLOGV0
100 REVLOGV0
101 REVLOGV1
101 REVLOGV1
102 REVLOGV2
102 REVLOGV2
103 FLAG_INLINE_DATA
103 FLAG_INLINE_DATA
104 FLAG_GENERALDELTA
104 FLAG_GENERALDELTA
105 REVLOG_DEFAULT_FLAGS
105 REVLOG_DEFAULT_FLAGS
106 REVLOG_DEFAULT_FORMAT
106 REVLOG_DEFAULT_FORMAT
107 REVLOG_DEFAULT_VERSION
107 REVLOG_DEFAULT_VERSION
108 REVLOGV1_FLAGS
108 REVLOGV1_FLAGS
109 REVLOGV2_FLAGS
109 REVLOGV2_FLAGS
110 REVIDX_ISCENSORED
110 REVIDX_ISCENSORED
111 REVIDX_ELLIPSIS
111 REVIDX_ELLIPSIS
112 REVIDX_HASCOPIESINFO
112 REVIDX_HASCOPIESINFO
113 REVIDX_EXTSTORED
113 REVIDX_EXTSTORED
114 REVIDX_DEFAULT_FLAGS
114 REVIDX_DEFAULT_FLAGS
115 REVIDX_FLAGS_ORDER
115 REVIDX_FLAGS_ORDER
116 REVIDX_RAWTEXT_CHANGING_FLAGS
116 REVIDX_RAWTEXT_CHANGING_FLAGS
117
117
118 parsers = policy.importmod('parsers')
118 parsers = policy.importmod('parsers')
119 rustancestor = policy.importrust('ancestor')
119 rustancestor = policy.importrust('ancestor')
120 rustdagop = policy.importrust('dagop')
120 rustdagop = policy.importrust('dagop')
121 rustrevlog = policy.importrust('revlog')
121 rustrevlog = policy.importrust('revlog')
122
122
123 # Aliased for performance.
123 # Aliased for performance.
124 _zlibdecompress = zlib.decompress
124 _zlibdecompress = zlib.decompress
125
125
126 # max size of revlog with inline data
126 # max size of revlog with inline data
127 _maxinline = 131072
127 _maxinline = 131072
128 _chunksize = 1048576
128 _chunksize = 1048576
129
129
130 # Flag processors for REVIDX_ELLIPSIS.
130 # Flag processors for REVIDX_ELLIPSIS.
131 def ellipsisreadprocessor(rl, text):
131 def ellipsisreadprocessor(rl, text):
132 return text, False
132 return text, False
133
133
134
134
135 def ellipsiswriteprocessor(rl, text):
135 def ellipsiswriteprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsisrawprocessor(rl, text):
139 def ellipsisrawprocessor(rl, text):
140 return False
140 return False
141
141
142
142
143 ellipsisprocessor = (
143 ellipsisprocessor = (
144 ellipsisreadprocessor,
144 ellipsisreadprocessor,
145 ellipsiswriteprocessor,
145 ellipsiswriteprocessor,
146 ellipsisrawprocessor,
146 ellipsisrawprocessor,
147 )
147 )
148
148
149
149
150 def _verify_revision(rl, skipflags, state, node):
150 def _verify_revision(rl, skipflags, state, node):
151 """Verify the integrity of the given revlog ``node`` while providing a hook
151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 point for extensions to influence the operation."""
152 point for extensions to influence the operation."""
153 if skipflags:
153 if skipflags:
154 state[b'skipread'].add(node)
154 state[b'skipread'].add(node)
155 else:
155 else:
156 # Side-effect: read content and verify hash.
156 # Side-effect: read content and verify hash.
157 rl.revision(node)
157 rl.revision(node)
158
158
159
159
160 # True if a fast implementation for persistent-nodemap is available
160 # True if a fast implementation for persistent-nodemap is available
161 #
161 #
162 # We also consider we have a "fast" implementation in "pure" python because
162 # We also consider we have a "fast" implementation in "pure" python because
163 # people using pure don't really have performance consideration (and a
163 # people using pure don't really have performance consideration (and a
164 # wheelbarrow of other slowness source)
164 # wheelbarrow of other slowness source)
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 parsers, 'BaseIndexObject'
166 parsers, 'BaseIndexObject'
167 )
167 )
168
168
169
169
170 @interfaceutil.implementer(repository.irevisiondelta)
170 @interfaceutil.implementer(repository.irevisiondelta)
171 @attr.s(slots=True)
171 @attr.s(slots=True)
172 class revlogrevisiondelta(object):
172 class revlogrevisiondelta(object):
173 node = attr.ib()
173 node = attr.ib()
174 p1node = attr.ib()
174 p1node = attr.ib()
175 p2node = attr.ib()
175 p2node = attr.ib()
176 basenode = attr.ib()
176 basenode = attr.ib()
177 flags = attr.ib()
177 flags = attr.ib()
178 baserevisionsize = attr.ib()
178 baserevisionsize = attr.ib()
179 revision = attr.ib()
179 revision = attr.ib()
180 delta = attr.ib()
180 delta = attr.ib()
181 sidedata = attr.ib()
181 sidedata = attr.ib()
182 protocol_flags = attr.ib()
182 protocol_flags = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 def parse_index_v1(data, inline):
194 def parse_index_v1(data, inline):
195 # call the C implementation to parse the index data
195 # call the C implementation to parse the index data
196 index, cache = parsers.parse_index2(data, inline)
196 index, cache = parsers.parse_index2(data, inline)
197 return index, cache
197 return index, cache
198
198
199
199
200 def parse_index_v2(data, inline):
200 def parse_index_v2(data, inline):
201 # call the C implementation to parse the index data
201 # call the C implementation to parse the index data
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 return index, cache
203 return index, cache
204
204
205
205
206 def parse_index_cl_v2(data, inline):
206 def parse_index_cl_v2(data, inline):
207 # call the C implementation to parse the index data
207 # call the C implementation to parse the index data
208 assert not inline
208 assert not inline
209 from .pure.parsers import parse_index_cl_v2
209 from .pure.parsers import parse_index_cl_v2
210
210
211 index, cache = parse_index_cl_v2(data)
211 index, cache = parse_index_cl_v2(data)
212 return index, cache
212 return index, cache
213
213
214
214
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216
216
217 def parse_index_v1_nodemap(data, inline):
217 def parse_index_v1_nodemap(data, inline):
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 return index, cache
219 return index, cache
220
220
221
221
222 else:
222 else:
223 parse_index_v1_nodemap = None
223 parse_index_v1_nodemap = None
224
224
225
225
226 def parse_index_v1_mixed(data, inline):
226 def parse_index_v1_mixed(data, inline):
227 index, cache = parse_index_v1(data, inline)
227 index, cache = parse_index_v1(data, inline)
228 return rustrevlog.MixedIndex(index), cache
228 return rustrevlog.MixedIndex(index), cache
229
229
230
230
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # signed integer)
232 # signed integer)
233 _maxentrysize = 0x7FFFFFFF
233 _maxentrysize = 0x7FFFFFFF
234
234
235 PARTIAL_READ_MSG = _(
235 PARTIAL_READ_MSG = _(
236 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
236 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
237 )
237 )
238
238
239 FILE_TOO_SHORT_MSG = _(
239 FILE_TOO_SHORT_MSG = _(
240 b'cannot read from revlog %s;'
240 b'cannot read from revlog %s;'
241 b' expected %d bytes from offset %d, data size is %d'
241 b' expected %d bytes from offset %d, data size is %d'
242 )
242 )
243
243
244
244
245 class revlog(object):
245 class revlog(object):
246 """
246 """
247 the underlying revision storage object
247 the underlying revision storage object
248
248
249 A revlog consists of two parts, an index and the revision data.
249 A revlog consists of two parts, an index and the revision data.
250
250
251 The index is a file with a fixed record size containing
251 The index is a file with a fixed record size containing
252 information on each revision, including its nodeid (hash), the
252 information on each revision, including its nodeid (hash), the
253 nodeids of its parents, the position and offset of its data within
253 nodeids of its parents, the position and offset of its data within
254 the data file, and the revision it's based on. Finally, each entry
254 the data file, and the revision it's based on. Finally, each entry
255 contains a linkrev entry that can serve as a pointer to external
255 contains a linkrev entry that can serve as a pointer to external
256 data.
256 data.
257
257
258 The revision data itself is a linear collection of data chunks.
258 The revision data itself is a linear collection of data chunks.
259 Each chunk represents a revision and is usually represented as a
259 Each chunk represents a revision and is usually represented as a
260 delta against the previous chunk. To bound lookup time, runs of
260 delta against the previous chunk. To bound lookup time, runs of
261 deltas are limited to about 2 times the length of the original
261 deltas are limited to about 2 times the length of the original
262 version data. This makes retrieval of a version proportional to
262 version data. This makes retrieval of a version proportional to
263 its size, or O(1) relative to the number of revisions.
263 its size, or O(1) relative to the number of revisions.
264
264
265 Both pieces of the revlog are written to in an append-only
265 Both pieces of the revlog are written to in an append-only
266 fashion, which means we never need to rewrite a file to insert or
266 fashion, which means we never need to rewrite a file to insert or
267 remove data, and can use some simple techniques to avoid the need
267 remove data, and can use some simple techniques to avoid the need
268 for locking while reading.
268 for locking while reading.
269
269
270 If checkambig, indexfile is opened with checkambig=True at
270 If checkambig, indexfile is opened with checkambig=True at
271 writing, to avoid file stat ambiguity.
271 writing, to avoid file stat ambiguity.
272
272
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 index will be mmapped rather than read if it is larger than the
274 index will be mmapped rather than read if it is larger than the
275 configured threshold.
275 configured threshold.
276
276
277 If censorable is True, the revlog can have censored revisions.
277 If censorable is True, the revlog can have censored revisions.
278
278
279 If `upperboundcomp` is not None, this is the expected maximal gain from
279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 compression for the data content.
280 compression for the data content.
281
281
282 `concurrencychecker` is an optional function that receives 3 arguments: a
282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 file handle, a filename, and an expected position. It should check whether
283 file handle, a filename, and an expected position. It should check whether
284 the current position in the file handle is valid, and log/warn/fail (by
284 the current position in the file handle is valid, and log/warn/fail (by
285 raising).
285 raising).
286
286
287 See mercurial/revlogutils/contants.py for details about the content of an
287 See mercurial/revlogutils/contants.py for details about the content of an
288 index entry.
288 index entry.
289 """
289 """
290
290
291 _flagserrorclass = error.RevlogError
291 _flagserrorclass = error.RevlogError
292
292
293 def __init__(
293 def __init__(
294 self,
294 self,
295 opener,
295 opener,
296 target,
296 target,
297 radix,
297 radix,
298 postfix=None, # only exist for `tmpcensored` now
298 postfix=None, # only exist for `tmpcensored` now
299 checkambig=False,
299 checkambig=False,
300 mmaplargeindex=False,
300 mmaplargeindex=False,
301 censorable=False,
301 censorable=False,
302 upperboundcomp=None,
302 upperboundcomp=None,
303 persistentnodemap=False,
303 persistentnodemap=False,
304 concurrencychecker=None,
304 concurrencychecker=None,
305 trypending=False,
305 trypending=False,
306 ):
306 ):
307 """
307 """
308 create a revlog object
308 create a revlog object
309
309
310 opener is a function that abstracts the file opening operation
310 opener is a function that abstracts the file opening operation
311 and can be used to implement COW semantics or the like.
311 and can be used to implement COW semantics or the like.
312
312
313 `target`: a (KIND, ID) tuple that identify the content stored in
313 `target`: a (KIND, ID) tuple that identify the content stored in
314 this revlog. It help the rest of the code to understand what the revlog
314 this revlog. It help the rest of the code to understand what the revlog
315 is about without having to resort to heuristic and index filename
315 is about without having to resort to heuristic and index filename
316 analysis. Note: that this must be reliably be set by normal code, but
316 analysis. Note: that this must be reliably be set by normal code, but
317 that test, debug, or performance measurement code might not set this to
317 that test, debug, or performance measurement code might not set this to
318 accurate value.
318 accurate value.
319 """
319 """
320 self.upperboundcomp = upperboundcomp
320 self.upperboundcomp = upperboundcomp
321
321
322 self.radix = radix
322 self.radix = radix
323
323
324 self._docket_file = None
324 self._docket_file = None
325 self._indexfile = None
325 self._indexfile = None
326 self._datafile = None
326 self._datafile = None
327 self._sidedatafile = None
327 self._sidedatafile = None
328 self._nodemap_file = None
328 self._nodemap_file = None
329 self.postfix = postfix
329 self.postfix = postfix
330 self._trypending = trypending
330 self._trypending = trypending
331 self.opener = opener
331 self.opener = opener
332 if persistentnodemap:
332 if persistentnodemap:
333 self._nodemap_file = nodemaputil.get_nodemap_file(self)
333 self._nodemap_file = nodemaputil.get_nodemap_file(self)
334
334
335 assert target[0] in ALL_KINDS
335 assert target[0] in ALL_KINDS
336 assert len(target) == 2
336 assert len(target) == 2
337 self.target = target
337 self.target = target
338 # When True, indexfile is opened with checkambig=True at writing, to
338 # When True, indexfile is opened with checkambig=True at writing, to
339 # avoid file stat ambiguity.
339 # avoid file stat ambiguity.
340 self._checkambig = checkambig
340 self._checkambig = checkambig
341 self._mmaplargeindex = mmaplargeindex
341 self._mmaplargeindex = mmaplargeindex
342 self._censorable = censorable
342 self._censorable = censorable
343 # 3-tuple of (node, rev, text) for a raw revision.
343 # 3-tuple of (node, rev, text) for a raw revision.
344 self._revisioncache = None
344 self._revisioncache = None
345 # Maps rev to chain base rev.
345 # Maps rev to chain base rev.
346 self._chainbasecache = util.lrucachedict(100)
346 self._chainbasecache = util.lrucachedict(100)
347 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
347 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 self._chunkcache = (0, b'')
348 self._chunkcache = (0, b'')
349 # How much data to read and cache into the raw revlog data cache.
349 # How much data to read and cache into the raw revlog data cache.
350 self._chunkcachesize = 65536
350 self._chunkcachesize = 65536
351 self._maxchainlen = None
351 self._maxchainlen = None
352 self._deltabothparents = True
352 self._deltabothparents = True
353 self.index = None
353 self.index = None
354 self._docket = None
354 self._docket = None
355 self._nodemap_docket = None
355 self._nodemap_docket = None
356 # Mapping of partial identifiers to full nodes.
356 # Mapping of partial identifiers to full nodes.
357 self._pcache = {}
357 self._pcache = {}
358 # Mapping of revision integer to full node.
358 # Mapping of revision integer to full node.
359 self._compengine = b'zlib'
359 self._compengine = b'zlib'
360 self._compengineopts = {}
360 self._compengineopts = {}
361 self._maxdeltachainspan = -1
361 self._maxdeltachainspan = -1
362 self._withsparseread = False
362 self._withsparseread = False
363 self._sparserevlog = False
363 self._sparserevlog = False
364 self.hassidedata = False
364 self.hassidedata = False
365 self._srdensitythreshold = 0.50
365 self._srdensitythreshold = 0.50
366 self._srmingapsize = 262144
366 self._srmingapsize = 262144
367
367
368 # Make copy of flag processors so each revlog instance can support
368 # Make copy of flag processors so each revlog instance can support
369 # custom flags.
369 # custom flags.
370 self._flagprocessors = dict(flagutil.flagprocessors)
370 self._flagprocessors = dict(flagutil.flagprocessors)
371
371
372 # 3-tuple of file handles being used for active writing.
372 # 3-tuple of file handles being used for active writing.
373 self._writinghandles = None
373 self._writinghandles = None
374 # prevent nesting of addgroup
374 # prevent nesting of addgroup
375 self._adding_group = None
375 self._adding_group = None
376
376
377 self._loadindex()
377 self._loadindex()
378
378
379 self._concurrencychecker = concurrencychecker
379 self._concurrencychecker = concurrencychecker
380
380
381 def _init_opts(self):
381 def _init_opts(self):
382 """process options (from above/config) to setup associated default revlog mode
382 """process options (from above/config) to setup associated default revlog mode
383
383
384 These values might be affected when actually reading on disk information.
384 These values might be affected when actually reading on disk information.
385
385
386 The relevant values are returned for use in _loadindex().
386 The relevant values are returned for use in _loadindex().
387
387
388 * newversionflags:
388 * newversionflags:
389 version header to use if we need to create a new revlog
389 version header to use if we need to create a new revlog
390
390
391 * mmapindexthreshold:
391 * mmapindexthreshold:
392 minimal index size for start to use mmap
392 minimal index size for start to use mmap
393
393
394 * force_nodemap:
394 * force_nodemap:
395 force the usage of a "development" version of the nodemap code
395 force the usage of a "development" version of the nodemap code
396 """
396 """
397 mmapindexthreshold = None
397 mmapindexthreshold = None
398 opts = self.opener.options
398 opts = self.opener.options
399
399
400 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
400 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
401 new_header = CHANGELOGV2
401 new_header = CHANGELOGV2
402 elif b'revlogv2' in opts:
402 elif b'revlogv2' in opts:
403 new_header = REVLOGV2
403 new_header = REVLOGV2
404 elif b'revlogv1' in opts:
404 elif b'revlogv1' in opts:
405 new_header = REVLOGV1 | FLAG_INLINE_DATA
405 new_header = REVLOGV1 | FLAG_INLINE_DATA
406 if b'generaldelta' in opts:
406 if b'generaldelta' in opts:
407 new_header |= FLAG_GENERALDELTA
407 new_header |= FLAG_GENERALDELTA
408 elif b'revlogv0' in self.opener.options:
408 elif b'revlogv0' in self.opener.options:
409 new_header = REVLOGV0
409 new_header = REVLOGV0
410 else:
410 else:
411 new_header = REVLOG_DEFAULT_VERSION
411 new_header = REVLOG_DEFAULT_VERSION
412
412
413 if b'chunkcachesize' in opts:
413 if b'chunkcachesize' in opts:
414 self._chunkcachesize = opts[b'chunkcachesize']
414 self._chunkcachesize = opts[b'chunkcachesize']
415 if b'maxchainlen' in opts:
415 if b'maxchainlen' in opts:
416 self._maxchainlen = opts[b'maxchainlen']
416 self._maxchainlen = opts[b'maxchainlen']
417 if b'deltabothparents' in opts:
417 if b'deltabothparents' in opts:
418 self._deltabothparents = opts[b'deltabothparents']
418 self._deltabothparents = opts[b'deltabothparents']
419 self._lazydelta = bool(opts.get(b'lazydelta', True))
419 self._lazydelta = bool(opts.get(b'lazydelta', True))
420 self._lazydeltabase = False
420 self._lazydeltabase = False
421 if self._lazydelta:
421 if self._lazydelta:
422 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
422 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
423 if b'compengine' in opts:
423 if b'compengine' in opts:
424 self._compengine = opts[b'compengine']
424 self._compengine = opts[b'compengine']
425 if b'zlib.level' in opts:
425 if b'zlib.level' in opts:
426 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
426 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
427 if b'zstd.level' in opts:
427 if b'zstd.level' in opts:
428 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
428 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
429 if b'maxdeltachainspan' in opts:
429 if b'maxdeltachainspan' in opts:
430 self._maxdeltachainspan = opts[b'maxdeltachainspan']
430 self._maxdeltachainspan = opts[b'maxdeltachainspan']
431 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
431 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
432 mmapindexthreshold = opts[b'mmapindexthreshold']
432 mmapindexthreshold = opts[b'mmapindexthreshold']
433 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
433 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
434 withsparseread = bool(opts.get(b'with-sparse-read', False))
434 withsparseread = bool(opts.get(b'with-sparse-read', False))
435 # sparse-revlog forces sparse-read
435 # sparse-revlog forces sparse-read
436 self._withsparseread = self._sparserevlog or withsparseread
436 self._withsparseread = self._sparserevlog or withsparseread
437 if b'sparse-read-density-threshold' in opts:
437 if b'sparse-read-density-threshold' in opts:
438 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
438 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
439 if b'sparse-read-min-gap-size' in opts:
439 if b'sparse-read-min-gap-size' in opts:
440 self._srmingapsize = opts[b'sparse-read-min-gap-size']
440 self._srmingapsize = opts[b'sparse-read-min-gap-size']
441 if opts.get(b'enableellipsis'):
441 if opts.get(b'enableellipsis'):
442 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
442 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
443
443
444 # revlog v0 doesn't have flag processors
444 # revlog v0 doesn't have flag processors
445 for flag, processor in pycompat.iteritems(
445 for flag, processor in pycompat.iteritems(
446 opts.get(b'flagprocessors', {})
446 opts.get(b'flagprocessors', {})
447 ):
447 ):
448 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
448 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
449
449
450 if self._chunkcachesize <= 0:
450 if self._chunkcachesize <= 0:
451 raise error.RevlogError(
451 raise error.RevlogError(
452 _(b'revlog chunk cache size %r is not greater than 0')
452 _(b'revlog chunk cache size %r is not greater than 0')
453 % self._chunkcachesize
453 % self._chunkcachesize
454 )
454 )
455 elif self._chunkcachesize & (self._chunkcachesize - 1):
455 elif self._chunkcachesize & (self._chunkcachesize - 1):
456 raise error.RevlogError(
456 raise error.RevlogError(
457 _(b'revlog chunk cache size %r is not a power of 2')
457 _(b'revlog chunk cache size %r is not a power of 2')
458 % self._chunkcachesize
458 % self._chunkcachesize
459 )
459 )
460 force_nodemap = opts.get(b'devel-force-nodemap', False)
460 force_nodemap = opts.get(b'devel-force-nodemap', False)
461 return new_header, mmapindexthreshold, force_nodemap
461 return new_header, mmapindexthreshold, force_nodemap
462
462
463 def _get_data(self, filepath, mmap_threshold, size=None):
463 def _get_data(self, filepath, mmap_threshold, size=None):
464 """return a file content with or without mmap
464 """return a file content with or without mmap
465
465
466 If the file is missing return the empty string"""
466 If the file is missing return the empty string"""
467 try:
467 try:
468 with self.opener(filepath) as fp:
468 with self.opener(filepath) as fp:
469 if mmap_threshold is not None:
469 if mmap_threshold is not None:
470 file_size = self.opener.fstat(fp).st_size
470 file_size = self.opener.fstat(fp).st_size
471 if file_size >= mmap_threshold:
471 if file_size >= mmap_threshold:
472 if size is not None:
472 if size is not None:
473 # avoid potentiel mmap crash
473 # avoid potentiel mmap crash
474 size = min(file_size, size)
474 size = min(file_size, size)
475 # TODO: should .close() to release resources without
475 # TODO: should .close() to release resources without
476 # relying on Python GC
476 # relying on Python GC
477 if size is None:
477 if size is None:
478 return util.buffer(util.mmapread(fp))
478 return util.buffer(util.mmapread(fp))
479 else:
479 else:
480 return util.buffer(util.mmapread(fp, size))
480 return util.buffer(util.mmapread(fp, size))
481 if size is None:
481 if size is None:
482 return fp.read()
482 return fp.read()
483 else:
483 else:
484 return fp.read(size)
484 return fp.read(size)
485 except IOError as inst:
485 except IOError as inst:
486 if inst.errno != errno.ENOENT:
486 if inst.errno != errno.ENOENT:
487 raise
487 raise
488 return b''
488 return b''
489
489
490 def _loadindex(self):
490 def _loadindex(self, docket=None):
491
491
492 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
492 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
493
493
494 if self.postfix is not None:
494 if self.postfix is not None:
495 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
495 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
496 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
496 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
497 entry_point = b'%s.i.a' % self.radix
497 entry_point = b'%s.i.a' % self.radix
498 else:
498 else:
499 entry_point = b'%s.i' % self.radix
499 entry_point = b'%s.i' % self.radix
500
500
501 entry_data = b''
501 if docket is not None:
502 self._initempty = True
502 self._docket = docket
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
503 self._docket_file = entry_point
504 if len(entry_data) > 0:
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 self._initempty = False
507 else:
504 else:
508 header = new_header
505 entry_data = b''
509
506 self._initempty = True
510 self._format_flags = header & ~0xFFFF
507 entry_data = self._get_data(entry_point, mmapindexthreshold)
511 self._format_version = header & 0xFFFF
508 if len(entry_data) > 0:
512
509 header = INDEX_HEADER.unpack(entry_data[:4])[0]
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
510 self._initempty = False
514 if supported_flags is None:
515 msg = _(b'unknown version (%d) in revlog %s')
516 msg %= (self._format_version, self.display_id)
517 raise error.RevlogError(msg)
518 elif self._format_flags & ~supported_flags:
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 display_flag = self._format_flags >> 16
521 msg %= (display_flag, self._format_version, self.display_id)
522 raise error.RevlogError(msg)
523
524 features = FEATURES_BY_VERSION[self._format_version]
525 self._inline = features[b'inline'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 self.hassidedata = features[b'sidedata']
528
529 if not features[b'docket']:
530 self._indexfile = entry_point
531 index_data = entry_data
532 else:
533 self._docket_file = entry_point
534 if self._initempty:
535 self._docket = docketutil.default_docket(self, header)
536 else:
511 else:
537 self._docket = docketutil.parse_docket(
512 header = new_header
538 self, entry_data, use_pending=self._trypending
513
539 )
514 self._format_flags = header & ~0xFFFF
515 self._format_version = header & 0xFFFF
516
517 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
518 if supported_flags is None:
519 msg = _(b'unknown version (%d) in revlog %s')
520 msg %= (self._format_version, self.display_id)
521 raise error.RevlogError(msg)
522 elif self._format_flags & ~supported_flags:
523 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
524 display_flag = self._format_flags >> 16
525 msg %= (display_flag, self._format_version, self.display_id)
526 raise error.RevlogError(msg)
527
528 features = FEATURES_BY_VERSION[self._format_version]
529 self._inline = features[b'inline'](self._format_flags)
530 self._generaldelta = features[b'generaldelta'](self._format_flags)
531 self.hassidedata = features[b'sidedata']
532
533 if not features[b'docket']:
534 self._indexfile = entry_point
535 index_data = entry_data
536 else:
537 self._docket_file = entry_point
538 if self._initempty:
539 self._docket = docketutil.default_docket(self, header)
540 else:
541 self._docket = docketutil.parse_docket(
542 self, entry_data, use_pending=self._trypending
543 )
544
545 if self._docket is not None:
540 self._indexfile = self._docket.index_filepath()
546 self._indexfile = self._docket.index_filepath()
541 index_data = b''
547 index_data = b''
542 index_size = self._docket.index_end
548 index_size = self._docket.index_end
543 if index_size > 0:
549 if index_size > 0:
544 index_data = self._get_data(
550 index_data = self._get_data(
545 self._indexfile, mmapindexthreshold, size=index_size
551 self._indexfile, mmapindexthreshold, size=index_size
546 )
552 )
547 if len(index_data) < index_size:
553 if len(index_data) < index_size:
548 msg = _(b'too few index data for %s: got %d, expected %d')
554 msg = _(b'too few index data for %s: got %d, expected %d')
549 msg %= (self.display_id, len(index_data), index_size)
555 msg %= (self.display_id, len(index_data), index_size)
550 raise error.RevlogError(msg)
556 raise error.RevlogError(msg)
551
557
552 self._inline = False
558 self._inline = False
553 # generaldelta implied by version 2 revlogs.
559 # generaldelta implied by version 2 revlogs.
554 self._generaldelta = True
560 self._generaldelta = True
555 # the logic for persistent nodemap will be dealt with within the
561 # the logic for persistent nodemap will be dealt with within the
556 # main docket, so disable it for now.
562 # main docket, so disable it for now.
557 self._nodemap_file = None
563 self._nodemap_file = None
558
564
559 if self._docket is not None:
565 if self._docket is not None:
560 self._datafile = self._docket.data_filepath()
566 self._datafile = self._docket.data_filepath()
561 self._sidedatafile = self._docket.sidedata_filepath()
567 self._sidedatafile = self._docket.sidedata_filepath()
562 elif self.postfix is None:
568 elif self.postfix is None:
563 self._datafile = b'%s.d' % self.radix
569 self._datafile = b'%s.d' % self.radix
564 else:
570 else:
565 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
571 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
566
572
567 self.nodeconstants = sha1nodeconstants
573 self.nodeconstants = sha1nodeconstants
568 self.nullid = self.nodeconstants.nullid
574 self.nullid = self.nodeconstants.nullid
569
575
570 # sparse-revlog can't be on without general-delta (issue6056)
576 # sparse-revlog can't be on without general-delta (issue6056)
571 if not self._generaldelta:
577 if not self._generaldelta:
572 self._sparserevlog = False
578 self._sparserevlog = False
573
579
574 self._storedeltachains = True
580 self._storedeltachains = True
575
581
576 devel_nodemap = (
582 devel_nodemap = (
577 self._nodemap_file
583 self._nodemap_file
578 and force_nodemap
584 and force_nodemap
579 and parse_index_v1_nodemap is not None
585 and parse_index_v1_nodemap is not None
580 )
586 )
581
587
582 use_rust_index = False
588 use_rust_index = False
583 if rustrevlog is not None:
589 if rustrevlog is not None:
584 if self._nodemap_file is not None:
590 if self._nodemap_file is not None:
585 use_rust_index = True
591 use_rust_index = True
586 else:
592 else:
587 use_rust_index = self.opener.options.get(b'rust.index')
593 use_rust_index = self.opener.options.get(b'rust.index')
588
594
589 self._parse_index = parse_index_v1
595 self._parse_index = parse_index_v1
590 if self._format_version == REVLOGV0:
596 if self._format_version == REVLOGV0:
591 self._parse_index = revlogv0.parse_index_v0
597 self._parse_index = revlogv0.parse_index_v0
592 elif self._format_version == REVLOGV2:
598 elif self._format_version == REVLOGV2:
593 self._parse_index = parse_index_v2
599 self._parse_index = parse_index_v2
594 elif self._format_version == CHANGELOGV2:
600 elif self._format_version == CHANGELOGV2:
595 self._parse_index = parse_index_cl_v2
601 self._parse_index = parse_index_cl_v2
596 elif devel_nodemap:
602 elif devel_nodemap:
597 self._parse_index = parse_index_v1_nodemap
603 self._parse_index = parse_index_v1_nodemap
598 elif use_rust_index:
604 elif use_rust_index:
599 self._parse_index = parse_index_v1_mixed
605 self._parse_index = parse_index_v1_mixed
600 try:
606 try:
601 d = self._parse_index(index_data, self._inline)
607 d = self._parse_index(index_data, self._inline)
602 index, _chunkcache = d
608 index, _chunkcache = d
603 use_nodemap = (
609 use_nodemap = (
604 not self._inline
610 not self._inline
605 and self._nodemap_file is not None
611 and self._nodemap_file is not None
606 and util.safehasattr(index, 'update_nodemap_data')
612 and util.safehasattr(index, 'update_nodemap_data')
607 )
613 )
608 if use_nodemap:
614 if use_nodemap:
609 nodemap_data = nodemaputil.persisted_data(self)
615 nodemap_data = nodemaputil.persisted_data(self)
610 if nodemap_data is not None:
616 if nodemap_data is not None:
611 docket = nodemap_data[0]
617 docket = nodemap_data[0]
612 if (
618 if (
613 len(d[0]) > docket.tip_rev
619 len(d[0]) > docket.tip_rev
614 and d[0][docket.tip_rev][7] == docket.tip_node
620 and d[0][docket.tip_rev][7] == docket.tip_node
615 ):
621 ):
616 # no changelog tampering
622 # no changelog tampering
617 self._nodemap_docket = docket
623 self._nodemap_docket = docket
618 index.update_nodemap_data(*nodemap_data)
624 index.update_nodemap_data(*nodemap_data)
619 except (ValueError, IndexError):
625 except (ValueError, IndexError):
620 raise error.RevlogError(
626 raise error.RevlogError(
621 _(b"index %s is corrupted") % self.display_id
627 _(b"index %s is corrupted") % self.display_id
622 )
628 )
623 self.index, self._chunkcache = d
629 self.index, self._chunkcache = d
624 if not self._chunkcache:
630 if not self._chunkcache:
625 self._chunkclear()
631 self._chunkclear()
626 # revnum -> (chain-length, sum-delta-length)
632 # revnum -> (chain-length, sum-delta-length)
627 self._chaininfocache = util.lrucachedict(500)
633 self._chaininfocache = util.lrucachedict(500)
628 # revlog header -> revlog compressor
634 # revlog header -> revlog compressor
629 self._decompressors = {}
635 self._decompressors = {}
630
636
631 @util.propertycache
637 @util.propertycache
632 def revlog_kind(self):
638 def revlog_kind(self):
633 return self.target[0]
639 return self.target[0]
634
640
635 @util.propertycache
641 @util.propertycache
636 def display_id(self):
642 def display_id(self):
637 """The public facing "ID" of the revlog that we use in message"""
643 """The public facing "ID" of the revlog that we use in message"""
638 # Maybe we should build a user facing representation of
644 # Maybe we should build a user facing representation of
639 # revlog.target instead of using `self.radix`
645 # revlog.target instead of using `self.radix`
640 return self.radix
646 return self.radix
641
647
642 def _get_decompressor(self, t):
648 def _get_decompressor(self, t):
643 try:
649 try:
644 compressor = self._decompressors[t]
650 compressor = self._decompressors[t]
645 except KeyError:
651 except KeyError:
646 try:
652 try:
647 engine = util.compengines.forrevlogheader(t)
653 engine = util.compengines.forrevlogheader(t)
648 compressor = engine.revlogcompressor(self._compengineopts)
654 compressor = engine.revlogcompressor(self._compengineopts)
649 self._decompressors[t] = compressor
655 self._decompressors[t] = compressor
650 except KeyError:
656 except KeyError:
651 raise error.RevlogError(
657 raise error.RevlogError(
652 _(b'unknown compression type %s') % binascii.hexlify(t)
658 _(b'unknown compression type %s') % binascii.hexlify(t)
653 )
659 )
654 return compressor
660 return compressor
655
661
656 @util.propertycache
662 @util.propertycache
657 def _compressor(self):
663 def _compressor(self):
658 engine = util.compengines[self._compengine]
664 engine = util.compengines[self._compengine]
659 return engine.revlogcompressor(self._compengineopts)
665 return engine.revlogcompressor(self._compengineopts)
660
666
661 @util.propertycache
667 @util.propertycache
662 def _decompressor(self):
668 def _decompressor(self):
663 """the default decompressor"""
669 """the default decompressor"""
664 if self._docket is None:
670 if self._docket is None:
665 return None
671 return None
666 t = self._docket.default_compression_header
672 t = self._docket.default_compression_header
667 c = self._get_decompressor(t)
673 c = self._get_decompressor(t)
668 return c.decompress
674 return c.decompress
669
675
670 def _indexfp(self):
676 def _indexfp(self):
671 """file object for the revlog's index file"""
677 """file object for the revlog's index file"""
672 return self.opener(self._indexfile, mode=b"r")
678 return self.opener(self._indexfile, mode=b"r")
673
679
674 def __index_write_fp(self):
680 def __index_write_fp(self):
675 # You should not use this directly and use `_writing` instead
681 # You should not use this directly and use `_writing` instead
676 try:
682 try:
677 f = self.opener(
683 f = self.opener(
678 self._indexfile, mode=b"r+", checkambig=self._checkambig
684 self._indexfile, mode=b"r+", checkambig=self._checkambig
679 )
685 )
680 if self._docket is None:
686 if self._docket is None:
681 f.seek(0, os.SEEK_END)
687 f.seek(0, os.SEEK_END)
682 else:
688 else:
683 f.seek(self._docket.index_end, os.SEEK_SET)
689 f.seek(self._docket.index_end, os.SEEK_SET)
684 return f
690 return f
685 except IOError as inst:
691 except IOError as inst:
686 if inst.errno != errno.ENOENT:
692 if inst.errno != errno.ENOENT:
687 raise
693 raise
688 return self.opener(
694 return self.opener(
689 self._indexfile, mode=b"w+", checkambig=self._checkambig
695 self._indexfile, mode=b"w+", checkambig=self._checkambig
690 )
696 )
691
697
692 def __index_new_fp(self):
698 def __index_new_fp(self):
693 # You should not use this unless you are upgrading from inline revlog
699 # You should not use this unless you are upgrading from inline revlog
694 return self.opener(
700 return self.opener(
695 self._indexfile,
701 self._indexfile,
696 mode=b"w",
702 mode=b"w",
697 checkambig=self._checkambig,
703 checkambig=self._checkambig,
698 atomictemp=True,
704 atomictemp=True,
699 )
705 )
700
706
701 def _datafp(self, mode=b'r'):
707 def _datafp(self, mode=b'r'):
702 """file object for the revlog's data file"""
708 """file object for the revlog's data file"""
703 return self.opener(self._datafile, mode=mode)
709 return self.opener(self._datafile, mode=mode)
704
710
705 @contextlib.contextmanager
711 @contextlib.contextmanager
706 def _datareadfp(self, existingfp=None):
712 def _datareadfp(self, existingfp=None):
707 """file object suitable to read data"""
713 """file object suitable to read data"""
708 # Use explicit file handle, if given.
714 # Use explicit file handle, if given.
709 if existingfp is not None:
715 if existingfp is not None:
710 yield existingfp
716 yield existingfp
711
717
712 # Use a file handle being actively used for writes, if available.
718 # Use a file handle being actively used for writes, if available.
713 # There is some danger to doing this because reads will seek the
719 # There is some danger to doing this because reads will seek the
714 # file. However, _writeentry() performs a SEEK_END before all writes,
720 # file. However, _writeentry() performs a SEEK_END before all writes,
715 # so we should be safe.
721 # so we should be safe.
716 elif self._writinghandles:
722 elif self._writinghandles:
717 if self._inline:
723 if self._inline:
718 yield self._writinghandles[0]
724 yield self._writinghandles[0]
719 else:
725 else:
720 yield self._writinghandles[1]
726 yield self._writinghandles[1]
721
727
722 # Otherwise open a new file handle.
728 # Otherwise open a new file handle.
723 else:
729 else:
724 if self._inline:
730 if self._inline:
725 func = self._indexfp
731 func = self._indexfp
726 else:
732 else:
727 func = self._datafp
733 func = self._datafp
728 with func() as fp:
734 with func() as fp:
729 yield fp
735 yield fp
730
736
731 @contextlib.contextmanager
737 @contextlib.contextmanager
732 def _sidedatareadfp(self):
738 def _sidedatareadfp(self):
733 """file object suitable to read sidedata"""
739 """file object suitable to read sidedata"""
734 if self._writinghandles:
740 if self._writinghandles:
735 yield self._writinghandles[2]
741 yield self._writinghandles[2]
736 else:
742 else:
737 with self.opener(self._sidedatafile) as fp:
743 with self.opener(self._sidedatafile) as fp:
738 yield fp
744 yield fp
739
745
740 def tiprev(self):
746 def tiprev(self):
741 return len(self.index) - 1
747 return len(self.index) - 1
742
748
743 def tip(self):
749 def tip(self):
744 return self.node(self.tiprev())
750 return self.node(self.tiprev())
745
751
746 def __contains__(self, rev):
752 def __contains__(self, rev):
747 return 0 <= rev < len(self)
753 return 0 <= rev < len(self)
748
754
749 def __len__(self):
755 def __len__(self):
750 return len(self.index)
756 return len(self.index)
751
757
752 def __iter__(self):
758 def __iter__(self):
753 return iter(pycompat.xrange(len(self)))
759 return iter(pycompat.xrange(len(self)))
754
760
755 def revs(self, start=0, stop=None):
761 def revs(self, start=0, stop=None):
756 """iterate over all rev in this revlog (from start to stop)"""
762 """iterate over all rev in this revlog (from start to stop)"""
757 return storageutil.iterrevs(len(self), start=start, stop=stop)
763 return storageutil.iterrevs(len(self), start=start, stop=stop)
758
764
759 @property
765 @property
760 def nodemap(self):
766 def nodemap(self):
761 msg = (
767 msg = (
762 b"revlog.nodemap is deprecated, "
768 b"revlog.nodemap is deprecated, "
763 b"use revlog.index.[has_node|rev|get_rev]"
769 b"use revlog.index.[has_node|rev|get_rev]"
764 )
770 )
765 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
771 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
766 return self.index.nodemap
772 return self.index.nodemap
767
773
768 @property
774 @property
769 def _nodecache(self):
775 def _nodecache(self):
770 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
776 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
771 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
777 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
772 return self.index.nodemap
778 return self.index.nodemap
773
779
774 def hasnode(self, node):
780 def hasnode(self, node):
775 try:
781 try:
776 self.rev(node)
782 self.rev(node)
777 return True
783 return True
778 except KeyError:
784 except KeyError:
779 return False
785 return False
780
786
781 def candelta(self, baserev, rev):
787 def candelta(self, baserev, rev):
782 """whether two revisions (baserev, rev) can be delta-ed or not"""
788 """whether two revisions (baserev, rev) can be delta-ed or not"""
783 # Disable delta if either rev requires a content-changing flag
789 # Disable delta if either rev requires a content-changing flag
784 # processor (ex. LFS). This is because such flag processor can alter
790 # processor (ex. LFS). This is because such flag processor can alter
785 # the rawtext content that the delta will be based on, and two clients
791 # the rawtext content that the delta will be based on, and two clients
786 # could have a same revlog node with different flags (i.e. different
792 # could have a same revlog node with different flags (i.e. different
787 # rawtext contents) and the delta could be incompatible.
793 # rawtext contents) and the delta could be incompatible.
788 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
794 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
789 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
795 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
790 ):
796 ):
791 return False
797 return False
792 return True
798 return True
793
799
794 def update_caches(self, transaction):
800 def update_caches(self, transaction):
795 if self._nodemap_file is not None:
801 if self._nodemap_file is not None:
796 if transaction is None:
802 if transaction is None:
797 nodemaputil.update_persistent_nodemap(self)
803 nodemaputil.update_persistent_nodemap(self)
798 else:
804 else:
799 nodemaputil.setup_persistent_nodemap(transaction, self)
805 nodemaputil.setup_persistent_nodemap(transaction, self)
800
806
801 def clearcaches(self):
807 def clearcaches(self):
802 self._revisioncache = None
808 self._revisioncache = None
803 self._chainbasecache.clear()
809 self._chainbasecache.clear()
804 self._chunkcache = (0, b'')
810 self._chunkcache = (0, b'')
805 self._pcache = {}
811 self._pcache = {}
806 self._nodemap_docket = None
812 self._nodemap_docket = None
807 self.index.clearcaches()
813 self.index.clearcaches()
808 # The python code is the one responsible for validating the docket, we
814 # The python code is the one responsible for validating the docket, we
809 # end up having to refresh it here.
815 # end up having to refresh it here.
810 use_nodemap = (
816 use_nodemap = (
811 not self._inline
817 not self._inline
812 and self._nodemap_file is not None
818 and self._nodemap_file is not None
813 and util.safehasattr(self.index, 'update_nodemap_data')
819 and util.safehasattr(self.index, 'update_nodemap_data')
814 )
820 )
815 if use_nodemap:
821 if use_nodemap:
816 nodemap_data = nodemaputil.persisted_data(self)
822 nodemap_data = nodemaputil.persisted_data(self)
817 if nodemap_data is not None:
823 if nodemap_data is not None:
818 self._nodemap_docket = nodemap_data[0]
824 self._nodemap_docket = nodemap_data[0]
819 self.index.update_nodemap_data(*nodemap_data)
825 self.index.update_nodemap_data(*nodemap_data)
820
826
821 def rev(self, node):
827 def rev(self, node):
822 try:
828 try:
823 return self.index.rev(node)
829 return self.index.rev(node)
824 except TypeError:
830 except TypeError:
825 raise
831 raise
826 except error.RevlogError:
832 except error.RevlogError:
827 # parsers.c radix tree lookup failed
833 # parsers.c radix tree lookup failed
828 if (
834 if (
829 node == self.nodeconstants.wdirid
835 node == self.nodeconstants.wdirid
830 or node in self.nodeconstants.wdirfilenodeids
836 or node in self.nodeconstants.wdirfilenodeids
831 ):
837 ):
832 raise error.WdirUnsupported
838 raise error.WdirUnsupported
833 raise error.LookupError(node, self.display_id, _(b'no node'))
839 raise error.LookupError(node, self.display_id, _(b'no node'))
834
840
835 # Accessors for index entries.
841 # Accessors for index entries.
836
842
837 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
843 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
838 # are flags.
844 # are flags.
839 def start(self, rev):
845 def start(self, rev):
840 return int(self.index[rev][0] >> 16)
846 return int(self.index[rev][0] >> 16)
841
847
842 def sidedata_cut_off(self, rev):
848 def sidedata_cut_off(self, rev):
843 sd_cut_off = self.index[rev][8]
849 sd_cut_off = self.index[rev][8]
844 if sd_cut_off != 0:
850 if sd_cut_off != 0:
845 return sd_cut_off
851 return sd_cut_off
846 # This is some annoying dance, because entries without sidedata
852 # This is some annoying dance, because entries without sidedata
847 # currently use 0 as their ofsset. (instead of previous-offset +
853 # currently use 0 as their ofsset. (instead of previous-offset +
848 # previous-size)
854 # previous-size)
849 #
855 #
850 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
856 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
851 # In the meantime, we need this.
857 # In the meantime, we need this.
852 while 0 <= rev:
858 while 0 <= rev:
853 e = self.index[rev]
859 e = self.index[rev]
854 if e[9] != 0:
860 if e[9] != 0:
855 return e[8] + e[9]
861 return e[8] + e[9]
856 rev -= 1
862 rev -= 1
857 return 0
863 return 0
858
864
859 def flags(self, rev):
865 def flags(self, rev):
860 return self.index[rev][0] & 0xFFFF
866 return self.index[rev][0] & 0xFFFF
861
867
862 def length(self, rev):
868 def length(self, rev):
863 return self.index[rev][1]
869 return self.index[rev][1]
864
870
865 def sidedata_length(self, rev):
871 def sidedata_length(self, rev):
866 if not self.hassidedata:
872 if not self.hassidedata:
867 return 0
873 return 0
868 return self.index[rev][9]
874 return self.index[rev][9]
869
875
870 def rawsize(self, rev):
876 def rawsize(self, rev):
871 """return the length of the uncompressed text for a given revision"""
877 """return the length of the uncompressed text for a given revision"""
872 l = self.index[rev][2]
878 l = self.index[rev][2]
873 if l >= 0:
879 if l >= 0:
874 return l
880 return l
875
881
876 t = self.rawdata(rev)
882 t = self.rawdata(rev)
877 return len(t)
883 return len(t)
878
884
879 def size(self, rev):
885 def size(self, rev):
880 """length of non-raw text (processed by a "read" flag processor)"""
886 """length of non-raw text (processed by a "read" flag processor)"""
881 # fast path: if no "read" flag processor could change the content,
887 # fast path: if no "read" flag processor could change the content,
882 # size is rawsize. note: ELLIPSIS is known to not change the content.
888 # size is rawsize. note: ELLIPSIS is known to not change the content.
883 flags = self.flags(rev)
889 flags = self.flags(rev)
884 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
890 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
885 return self.rawsize(rev)
891 return self.rawsize(rev)
886
892
887 return len(self.revision(rev, raw=False))
893 return len(self.revision(rev, raw=False))
888
894
889 def chainbase(self, rev):
895 def chainbase(self, rev):
890 base = self._chainbasecache.get(rev)
896 base = self._chainbasecache.get(rev)
891 if base is not None:
897 if base is not None:
892 return base
898 return base
893
899
894 index = self.index
900 index = self.index
895 iterrev = rev
901 iterrev = rev
896 base = index[iterrev][3]
902 base = index[iterrev][3]
897 while base != iterrev:
903 while base != iterrev:
898 iterrev = base
904 iterrev = base
899 base = index[iterrev][3]
905 base = index[iterrev][3]
900
906
901 self._chainbasecache[rev] = base
907 self._chainbasecache[rev] = base
902 return base
908 return base
903
909
904 def linkrev(self, rev):
910 def linkrev(self, rev):
905 return self.index[rev][4]
911 return self.index[rev][4]
906
912
907 def parentrevs(self, rev):
913 def parentrevs(self, rev):
908 try:
914 try:
909 entry = self.index[rev]
915 entry = self.index[rev]
910 except IndexError:
916 except IndexError:
911 if rev == wdirrev:
917 if rev == wdirrev:
912 raise error.WdirUnsupported
918 raise error.WdirUnsupported
913 raise
919 raise
914 if entry[5] == nullrev:
920 if entry[5] == nullrev:
915 return entry[6], entry[5]
921 return entry[6], entry[5]
916 else:
922 else:
917 return entry[5], entry[6]
923 return entry[5], entry[6]
918
924
919 # fast parentrevs(rev) where rev isn't filtered
925 # fast parentrevs(rev) where rev isn't filtered
920 _uncheckedparentrevs = parentrevs
926 _uncheckedparentrevs = parentrevs
921
927
922 def node(self, rev):
928 def node(self, rev):
923 try:
929 try:
924 return self.index[rev][7]
930 return self.index[rev][7]
925 except IndexError:
931 except IndexError:
926 if rev == wdirrev:
932 if rev == wdirrev:
927 raise error.WdirUnsupported
933 raise error.WdirUnsupported
928 raise
934 raise
929
935
930 # Derived from index values.
936 # Derived from index values.
931
937
932 def end(self, rev):
938 def end(self, rev):
933 return self.start(rev) + self.length(rev)
939 return self.start(rev) + self.length(rev)
934
940
935 def parents(self, node):
941 def parents(self, node):
936 i = self.index
942 i = self.index
937 d = i[self.rev(node)]
943 d = i[self.rev(node)]
938 # inline node() to avoid function call overhead
944 # inline node() to avoid function call overhead
939 if d[5] == self.nullid:
945 if d[5] == self.nullid:
940 return i[d[6]][7], i[d[5]][7]
946 return i[d[6]][7], i[d[5]][7]
941 else:
947 else:
942 return i[d[5]][7], i[d[6]][7]
948 return i[d[5]][7], i[d[6]][7]
943
949
944 def chainlen(self, rev):
950 def chainlen(self, rev):
945 return self._chaininfo(rev)[0]
951 return self._chaininfo(rev)[0]
946
952
947 def _chaininfo(self, rev):
953 def _chaininfo(self, rev):
948 chaininfocache = self._chaininfocache
954 chaininfocache = self._chaininfocache
949 if rev in chaininfocache:
955 if rev in chaininfocache:
950 return chaininfocache[rev]
956 return chaininfocache[rev]
951 index = self.index
957 index = self.index
952 generaldelta = self._generaldelta
958 generaldelta = self._generaldelta
953 iterrev = rev
959 iterrev = rev
954 e = index[iterrev]
960 e = index[iterrev]
955 clen = 0
961 clen = 0
956 compresseddeltalen = 0
962 compresseddeltalen = 0
957 while iterrev != e[3]:
963 while iterrev != e[3]:
958 clen += 1
964 clen += 1
959 compresseddeltalen += e[1]
965 compresseddeltalen += e[1]
960 if generaldelta:
966 if generaldelta:
961 iterrev = e[3]
967 iterrev = e[3]
962 else:
968 else:
963 iterrev -= 1
969 iterrev -= 1
964 if iterrev in chaininfocache:
970 if iterrev in chaininfocache:
965 t = chaininfocache[iterrev]
971 t = chaininfocache[iterrev]
966 clen += t[0]
972 clen += t[0]
967 compresseddeltalen += t[1]
973 compresseddeltalen += t[1]
968 break
974 break
969 e = index[iterrev]
975 e = index[iterrev]
970 else:
976 else:
971 # Add text length of base since decompressing that also takes
977 # Add text length of base since decompressing that also takes
972 # work. For cache hits the length is already included.
978 # work. For cache hits the length is already included.
973 compresseddeltalen += e[1]
979 compresseddeltalen += e[1]
974 r = (clen, compresseddeltalen)
980 r = (clen, compresseddeltalen)
975 chaininfocache[rev] = r
981 chaininfocache[rev] = r
976 return r
982 return r
977
983
978 def _deltachain(self, rev, stoprev=None):
984 def _deltachain(self, rev, stoprev=None):
979 """Obtain the delta chain for a revision.
985 """Obtain the delta chain for a revision.
980
986
981 ``stoprev`` specifies a revision to stop at. If not specified, we
987 ``stoprev`` specifies a revision to stop at. If not specified, we
982 stop at the base of the chain.
988 stop at the base of the chain.
983
989
984 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
990 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
985 revs in ascending order and ``stopped`` is a bool indicating whether
991 revs in ascending order and ``stopped`` is a bool indicating whether
986 ``stoprev`` was hit.
992 ``stoprev`` was hit.
987 """
993 """
988 # Try C implementation.
994 # Try C implementation.
989 try:
995 try:
990 return self.index.deltachain(rev, stoprev, self._generaldelta)
996 return self.index.deltachain(rev, stoprev, self._generaldelta)
991 except AttributeError:
997 except AttributeError:
992 pass
998 pass
993
999
994 chain = []
1000 chain = []
995
1001
996 # Alias to prevent attribute lookup in tight loop.
1002 # Alias to prevent attribute lookup in tight loop.
997 index = self.index
1003 index = self.index
998 generaldelta = self._generaldelta
1004 generaldelta = self._generaldelta
999
1005
1000 iterrev = rev
1006 iterrev = rev
1001 e = index[iterrev]
1007 e = index[iterrev]
1002 while iterrev != e[3] and iterrev != stoprev:
1008 while iterrev != e[3] and iterrev != stoprev:
1003 chain.append(iterrev)
1009 chain.append(iterrev)
1004 if generaldelta:
1010 if generaldelta:
1005 iterrev = e[3]
1011 iterrev = e[3]
1006 else:
1012 else:
1007 iterrev -= 1
1013 iterrev -= 1
1008 e = index[iterrev]
1014 e = index[iterrev]
1009
1015
1010 if iterrev == stoprev:
1016 if iterrev == stoprev:
1011 stopped = True
1017 stopped = True
1012 else:
1018 else:
1013 chain.append(iterrev)
1019 chain.append(iterrev)
1014 stopped = False
1020 stopped = False
1015
1021
1016 chain.reverse()
1022 chain.reverse()
1017 return chain, stopped
1023 return chain, stopped
1018
1024
1019 def ancestors(self, revs, stoprev=0, inclusive=False):
1025 def ancestors(self, revs, stoprev=0, inclusive=False):
1020 """Generate the ancestors of 'revs' in reverse revision order.
1026 """Generate the ancestors of 'revs' in reverse revision order.
1021 Does not generate revs lower than stoprev.
1027 Does not generate revs lower than stoprev.
1022
1028
1023 See the documentation for ancestor.lazyancestors for more details."""
1029 See the documentation for ancestor.lazyancestors for more details."""
1024
1030
1025 # first, make sure start revisions aren't filtered
1031 # first, make sure start revisions aren't filtered
1026 revs = list(revs)
1032 revs = list(revs)
1027 checkrev = self.node
1033 checkrev = self.node
1028 for r in revs:
1034 for r in revs:
1029 checkrev(r)
1035 checkrev(r)
1030 # and we're sure ancestors aren't filtered as well
1036 # and we're sure ancestors aren't filtered as well
1031
1037
1032 if rustancestor is not None and self.index.rust_ext_compat:
1038 if rustancestor is not None and self.index.rust_ext_compat:
1033 lazyancestors = rustancestor.LazyAncestors
1039 lazyancestors = rustancestor.LazyAncestors
1034 arg = self.index
1040 arg = self.index
1035 else:
1041 else:
1036 lazyancestors = ancestor.lazyancestors
1042 lazyancestors = ancestor.lazyancestors
1037 arg = self._uncheckedparentrevs
1043 arg = self._uncheckedparentrevs
1038 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1044 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1039
1045
1040 def descendants(self, revs):
1046 def descendants(self, revs):
1041 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1047 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1042
1048
1043 def findcommonmissing(self, common=None, heads=None):
1049 def findcommonmissing(self, common=None, heads=None):
1044 """Return a tuple of the ancestors of common and the ancestors of heads
1050 """Return a tuple of the ancestors of common and the ancestors of heads
1045 that are not ancestors of common. In revset terminology, we return the
1051 that are not ancestors of common. In revset terminology, we return the
1046 tuple:
1052 tuple:
1047
1053
1048 ::common, (::heads) - (::common)
1054 ::common, (::heads) - (::common)
1049
1055
1050 The list is sorted by revision number, meaning it is
1056 The list is sorted by revision number, meaning it is
1051 topologically sorted.
1057 topologically sorted.
1052
1058
1053 'heads' and 'common' are both lists of node IDs. If heads is
1059 'heads' and 'common' are both lists of node IDs. If heads is
1054 not supplied, uses all of the revlog's heads. If common is not
1060 not supplied, uses all of the revlog's heads. If common is not
1055 supplied, uses nullid."""
1061 supplied, uses nullid."""
1056 if common is None:
1062 if common is None:
1057 common = [self.nullid]
1063 common = [self.nullid]
1058 if heads is None:
1064 if heads is None:
1059 heads = self.heads()
1065 heads = self.heads()
1060
1066
1061 common = [self.rev(n) for n in common]
1067 common = [self.rev(n) for n in common]
1062 heads = [self.rev(n) for n in heads]
1068 heads = [self.rev(n) for n in heads]
1063
1069
1064 # we want the ancestors, but inclusive
1070 # we want the ancestors, but inclusive
1065 class lazyset(object):
1071 class lazyset(object):
1066 def __init__(self, lazyvalues):
1072 def __init__(self, lazyvalues):
1067 self.addedvalues = set()
1073 self.addedvalues = set()
1068 self.lazyvalues = lazyvalues
1074 self.lazyvalues = lazyvalues
1069
1075
1070 def __contains__(self, value):
1076 def __contains__(self, value):
1071 return value in self.addedvalues or value in self.lazyvalues
1077 return value in self.addedvalues or value in self.lazyvalues
1072
1078
1073 def __iter__(self):
1079 def __iter__(self):
1074 added = self.addedvalues
1080 added = self.addedvalues
1075 for r in added:
1081 for r in added:
1076 yield r
1082 yield r
1077 for r in self.lazyvalues:
1083 for r in self.lazyvalues:
1078 if not r in added:
1084 if not r in added:
1079 yield r
1085 yield r
1080
1086
1081 def add(self, value):
1087 def add(self, value):
1082 self.addedvalues.add(value)
1088 self.addedvalues.add(value)
1083
1089
1084 def update(self, values):
1090 def update(self, values):
1085 self.addedvalues.update(values)
1091 self.addedvalues.update(values)
1086
1092
1087 has = lazyset(self.ancestors(common))
1093 has = lazyset(self.ancestors(common))
1088 has.add(nullrev)
1094 has.add(nullrev)
1089 has.update(common)
1095 has.update(common)
1090
1096
1091 # take all ancestors from heads that aren't in has
1097 # take all ancestors from heads that aren't in has
1092 missing = set()
1098 missing = set()
1093 visit = collections.deque(r for r in heads if r not in has)
1099 visit = collections.deque(r for r in heads if r not in has)
1094 while visit:
1100 while visit:
1095 r = visit.popleft()
1101 r = visit.popleft()
1096 if r in missing:
1102 if r in missing:
1097 continue
1103 continue
1098 else:
1104 else:
1099 missing.add(r)
1105 missing.add(r)
1100 for p in self.parentrevs(r):
1106 for p in self.parentrevs(r):
1101 if p not in has:
1107 if p not in has:
1102 visit.append(p)
1108 visit.append(p)
1103 missing = list(missing)
1109 missing = list(missing)
1104 missing.sort()
1110 missing.sort()
1105 return has, [self.node(miss) for miss in missing]
1111 return has, [self.node(miss) for miss in missing]
1106
1112
1107 def incrementalmissingrevs(self, common=None):
1113 def incrementalmissingrevs(self, common=None):
1108 """Return an object that can be used to incrementally compute the
1114 """Return an object that can be used to incrementally compute the
1109 revision numbers of the ancestors of arbitrary sets that are not
1115 revision numbers of the ancestors of arbitrary sets that are not
1110 ancestors of common. This is an ancestor.incrementalmissingancestors
1116 ancestors of common. This is an ancestor.incrementalmissingancestors
1111 object.
1117 object.
1112
1118
1113 'common' is a list of revision numbers. If common is not supplied, uses
1119 'common' is a list of revision numbers. If common is not supplied, uses
1114 nullrev.
1120 nullrev.
1115 """
1121 """
1116 if common is None:
1122 if common is None:
1117 common = [nullrev]
1123 common = [nullrev]
1118
1124
1119 if rustancestor is not None and self.index.rust_ext_compat:
1125 if rustancestor is not None and self.index.rust_ext_compat:
1120 return rustancestor.MissingAncestors(self.index, common)
1126 return rustancestor.MissingAncestors(self.index, common)
1121 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1127 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1122
1128
1123 def findmissingrevs(self, common=None, heads=None):
1129 def findmissingrevs(self, common=None, heads=None):
1124 """Return the revision numbers of the ancestors of heads that
1130 """Return the revision numbers of the ancestors of heads that
1125 are not ancestors of common.
1131 are not ancestors of common.
1126
1132
1127 More specifically, return a list of revision numbers corresponding to
1133 More specifically, return a list of revision numbers corresponding to
1128 nodes N such that every N satisfies the following constraints:
1134 nodes N such that every N satisfies the following constraints:
1129
1135
1130 1. N is an ancestor of some node in 'heads'
1136 1. N is an ancestor of some node in 'heads'
1131 2. N is not an ancestor of any node in 'common'
1137 2. N is not an ancestor of any node in 'common'
1132
1138
1133 The list is sorted by revision number, meaning it is
1139 The list is sorted by revision number, meaning it is
1134 topologically sorted.
1140 topologically sorted.
1135
1141
1136 'heads' and 'common' are both lists of revision numbers. If heads is
1142 'heads' and 'common' are both lists of revision numbers. If heads is
1137 not supplied, uses all of the revlog's heads. If common is not
1143 not supplied, uses all of the revlog's heads. If common is not
1138 supplied, uses nullid."""
1144 supplied, uses nullid."""
1139 if common is None:
1145 if common is None:
1140 common = [nullrev]
1146 common = [nullrev]
1141 if heads is None:
1147 if heads is None:
1142 heads = self.headrevs()
1148 heads = self.headrevs()
1143
1149
1144 inc = self.incrementalmissingrevs(common=common)
1150 inc = self.incrementalmissingrevs(common=common)
1145 return inc.missingancestors(heads)
1151 return inc.missingancestors(heads)
1146
1152
1147 def findmissing(self, common=None, heads=None):
1153 def findmissing(self, common=None, heads=None):
1148 """Return the ancestors of heads that are not ancestors of common.
1154 """Return the ancestors of heads that are not ancestors of common.
1149
1155
1150 More specifically, return a list of nodes N such that every N
1156 More specifically, return a list of nodes N such that every N
1151 satisfies the following constraints:
1157 satisfies the following constraints:
1152
1158
1153 1. N is an ancestor of some node in 'heads'
1159 1. N is an ancestor of some node in 'heads'
1154 2. N is not an ancestor of any node in 'common'
1160 2. N is not an ancestor of any node in 'common'
1155
1161
1156 The list is sorted by revision number, meaning it is
1162 The list is sorted by revision number, meaning it is
1157 topologically sorted.
1163 topologically sorted.
1158
1164
1159 'heads' and 'common' are both lists of node IDs. If heads is
1165 'heads' and 'common' are both lists of node IDs. If heads is
1160 not supplied, uses all of the revlog's heads. If common is not
1166 not supplied, uses all of the revlog's heads. If common is not
1161 supplied, uses nullid."""
1167 supplied, uses nullid."""
1162 if common is None:
1168 if common is None:
1163 common = [self.nullid]
1169 common = [self.nullid]
1164 if heads is None:
1170 if heads is None:
1165 heads = self.heads()
1171 heads = self.heads()
1166
1172
1167 common = [self.rev(n) for n in common]
1173 common = [self.rev(n) for n in common]
1168 heads = [self.rev(n) for n in heads]
1174 heads = [self.rev(n) for n in heads]
1169
1175
1170 inc = self.incrementalmissingrevs(common=common)
1176 inc = self.incrementalmissingrevs(common=common)
1171 return [self.node(r) for r in inc.missingancestors(heads)]
1177 return [self.node(r) for r in inc.missingancestors(heads)]
1172
1178
1173 def nodesbetween(self, roots=None, heads=None):
1179 def nodesbetween(self, roots=None, heads=None):
1174 """Return a topological path from 'roots' to 'heads'.
1180 """Return a topological path from 'roots' to 'heads'.
1175
1181
1176 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1182 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1177 topologically sorted list of all nodes N that satisfy both of
1183 topologically sorted list of all nodes N that satisfy both of
1178 these constraints:
1184 these constraints:
1179
1185
1180 1. N is a descendant of some node in 'roots'
1186 1. N is a descendant of some node in 'roots'
1181 2. N is an ancestor of some node in 'heads'
1187 2. N is an ancestor of some node in 'heads'
1182
1188
1183 Every node is considered to be both a descendant and an ancestor
1189 Every node is considered to be both a descendant and an ancestor
1184 of itself, so every reachable node in 'roots' and 'heads' will be
1190 of itself, so every reachable node in 'roots' and 'heads' will be
1185 included in 'nodes'.
1191 included in 'nodes'.
1186
1192
1187 'outroots' is the list of reachable nodes in 'roots', i.e., the
1193 'outroots' is the list of reachable nodes in 'roots', i.e., the
1188 subset of 'roots' that is returned in 'nodes'. Likewise,
1194 subset of 'roots' that is returned in 'nodes'. Likewise,
1189 'outheads' is the subset of 'heads' that is also in 'nodes'.
1195 'outheads' is the subset of 'heads' that is also in 'nodes'.
1190
1196
1191 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1197 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1192 unspecified, uses nullid as the only root. If 'heads' is
1198 unspecified, uses nullid as the only root. If 'heads' is
1193 unspecified, uses list of all of the revlog's heads."""
1199 unspecified, uses list of all of the revlog's heads."""
1194 nonodes = ([], [], [])
1200 nonodes = ([], [], [])
1195 if roots is not None:
1201 if roots is not None:
1196 roots = list(roots)
1202 roots = list(roots)
1197 if not roots:
1203 if not roots:
1198 return nonodes
1204 return nonodes
1199 lowestrev = min([self.rev(n) for n in roots])
1205 lowestrev = min([self.rev(n) for n in roots])
1200 else:
1206 else:
1201 roots = [self.nullid] # Everybody's a descendant of nullid
1207 roots = [self.nullid] # Everybody's a descendant of nullid
1202 lowestrev = nullrev
1208 lowestrev = nullrev
1203 if (lowestrev == nullrev) and (heads is None):
1209 if (lowestrev == nullrev) and (heads is None):
1204 # We want _all_ the nodes!
1210 # We want _all_ the nodes!
1205 return (
1211 return (
1206 [self.node(r) for r in self],
1212 [self.node(r) for r in self],
1207 [self.nullid],
1213 [self.nullid],
1208 list(self.heads()),
1214 list(self.heads()),
1209 )
1215 )
1210 if heads is None:
1216 if heads is None:
1211 # All nodes are ancestors, so the latest ancestor is the last
1217 # All nodes are ancestors, so the latest ancestor is the last
1212 # node.
1218 # node.
1213 highestrev = len(self) - 1
1219 highestrev = len(self) - 1
1214 # Set ancestors to None to signal that every node is an ancestor.
1220 # Set ancestors to None to signal that every node is an ancestor.
1215 ancestors = None
1221 ancestors = None
1216 # Set heads to an empty dictionary for later discovery of heads
1222 # Set heads to an empty dictionary for later discovery of heads
1217 heads = {}
1223 heads = {}
1218 else:
1224 else:
1219 heads = list(heads)
1225 heads = list(heads)
1220 if not heads:
1226 if not heads:
1221 return nonodes
1227 return nonodes
1222 ancestors = set()
1228 ancestors = set()
1223 # Turn heads into a dictionary so we can remove 'fake' heads.
1229 # Turn heads into a dictionary so we can remove 'fake' heads.
1224 # Also, later we will be using it to filter out the heads we can't
1230 # Also, later we will be using it to filter out the heads we can't
1225 # find from roots.
1231 # find from roots.
1226 heads = dict.fromkeys(heads, False)
1232 heads = dict.fromkeys(heads, False)
1227 # Start at the top and keep marking parents until we're done.
1233 # Start at the top and keep marking parents until we're done.
1228 nodestotag = set(heads)
1234 nodestotag = set(heads)
1229 # Remember where the top was so we can use it as a limit later.
1235 # Remember where the top was so we can use it as a limit later.
1230 highestrev = max([self.rev(n) for n in nodestotag])
1236 highestrev = max([self.rev(n) for n in nodestotag])
1231 while nodestotag:
1237 while nodestotag:
1232 # grab a node to tag
1238 # grab a node to tag
1233 n = nodestotag.pop()
1239 n = nodestotag.pop()
1234 # Never tag nullid
1240 # Never tag nullid
1235 if n == self.nullid:
1241 if n == self.nullid:
1236 continue
1242 continue
1237 # A node's revision number represents its place in a
1243 # A node's revision number represents its place in a
1238 # topologically sorted list of nodes.
1244 # topologically sorted list of nodes.
1239 r = self.rev(n)
1245 r = self.rev(n)
1240 if r >= lowestrev:
1246 if r >= lowestrev:
1241 if n not in ancestors:
1247 if n not in ancestors:
1242 # If we are possibly a descendant of one of the roots
1248 # If we are possibly a descendant of one of the roots
1243 # and we haven't already been marked as an ancestor
1249 # and we haven't already been marked as an ancestor
1244 ancestors.add(n) # Mark as ancestor
1250 ancestors.add(n) # Mark as ancestor
1245 # Add non-nullid parents to list of nodes to tag.
1251 # Add non-nullid parents to list of nodes to tag.
1246 nodestotag.update(
1252 nodestotag.update(
1247 [p for p in self.parents(n) if p != self.nullid]
1253 [p for p in self.parents(n) if p != self.nullid]
1248 )
1254 )
1249 elif n in heads: # We've seen it before, is it a fake head?
1255 elif n in heads: # We've seen it before, is it a fake head?
1250 # So it is, real heads should not be the ancestors of
1256 # So it is, real heads should not be the ancestors of
1251 # any other heads.
1257 # any other heads.
1252 heads.pop(n)
1258 heads.pop(n)
1253 if not ancestors:
1259 if not ancestors:
1254 return nonodes
1260 return nonodes
1255 # Now that we have our set of ancestors, we want to remove any
1261 # Now that we have our set of ancestors, we want to remove any
1256 # roots that are not ancestors.
1262 # roots that are not ancestors.
1257
1263
1258 # If one of the roots was nullid, everything is included anyway.
1264 # If one of the roots was nullid, everything is included anyway.
1259 if lowestrev > nullrev:
1265 if lowestrev > nullrev:
1260 # But, since we weren't, let's recompute the lowest rev to not
1266 # But, since we weren't, let's recompute the lowest rev to not
1261 # include roots that aren't ancestors.
1267 # include roots that aren't ancestors.
1262
1268
1263 # Filter out roots that aren't ancestors of heads
1269 # Filter out roots that aren't ancestors of heads
1264 roots = [root for root in roots if root in ancestors]
1270 roots = [root for root in roots if root in ancestors]
1265 # Recompute the lowest revision
1271 # Recompute the lowest revision
1266 if roots:
1272 if roots:
1267 lowestrev = min([self.rev(root) for root in roots])
1273 lowestrev = min([self.rev(root) for root in roots])
1268 else:
1274 else:
1269 # No more roots? Return empty list
1275 # No more roots? Return empty list
1270 return nonodes
1276 return nonodes
1271 else:
1277 else:
1272 # We are descending from nullid, and don't need to care about
1278 # We are descending from nullid, and don't need to care about
1273 # any other roots.
1279 # any other roots.
1274 lowestrev = nullrev
1280 lowestrev = nullrev
1275 roots = [self.nullid]
1281 roots = [self.nullid]
1276 # Transform our roots list into a set.
1282 # Transform our roots list into a set.
1277 descendants = set(roots)
1283 descendants = set(roots)
1278 # Also, keep the original roots so we can filter out roots that aren't
1284 # Also, keep the original roots so we can filter out roots that aren't
1279 # 'real' roots (i.e. are descended from other roots).
1285 # 'real' roots (i.e. are descended from other roots).
1280 roots = descendants.copy()
1286 roots = descendants.copy()
1281 # Our topologically sorted list of output nodes.
1287 # Our topologically sorted list of output nodes.
1282 orderedout = []
1288 orderedout = []
1283 # Don't start at nullid since we don't want nullid in our output list,
1289 # Don't start at nullid since we don't want nullid in our output list,
1284 # and if nullid shows up in descendants, empty parents will look like
1290 # and if nullid shows up in descendants, empty parents will look like
1285 # they're descendants.
1291 # they're descendants.
1286 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1292 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1287 n = self.node(r)
1293 n = self.node(r)
1288 isdescendant = False
1294 isdescendant = False
1289 if lowestrev == nullrev: # Everybody is a descendant of nullid
1295 if lowestrev == nullrev: # Everybody is a descendant of nullid
1290 isdescendant = True
1296 isdescendant = True
1291 elif n in descendants:
1297 elif n in descendants:
1292 # n is already a descendant
1298 # n is already a descendant
1293 isdescendant = True
1299 isdescendant = True
1294 # This check only needs to be done here because all the roots
1300 # This check only needs to be done here because all the roots
1295 # will start being marked is descendants before the loop.
1301 # will start being marked is descendants before the loop.
1296 if n in roots:
1302 if n in roots:
1297 # If n was a root, check if it's a 'real' root.
1303 # If n was a root, check if it's a 'real' root.
1298 p = tuple(self.parents(n))
1304 p = tuple(self.parents(n))
1299 # If any of its parents are descendants, it's not a root.
1305 # If any of its parents are descendants, it's not a root.
1300 if (p[0] in descendants) or (p[1] in descendants):
1306 if (p[0] in descendants) or (p[1] in descendants):
1301 roots.remove(n)
1307 roots.remove(n)
1302 else:
1308 else:
1303 p = tuple(self.parents(n))
1309 p = tuple(self.parents(n))
1304 # A node is a descendant if either of its parents are
1310 # A node is a descendant if either of its parents are
1305 # descendants. (We seeded the dependents list with the roots
1311 # descendants. (We seeded the dependents list with the roots
1306 # up there, remember?)
1312 # up there, remember?)
1307 if (p[0] in descendants) or (p[1] in descendants):
1313 if (p[0] in descendants) or (p[1] in descendants):
1308 descendants.add(n)
1314 descendants.add(n)
1309 isdescendant = True
1315 isdescendant = True
1310 if isdescendant and ((ancestors is None) or (n in ancestors)):
1316 if isdescendant and ((ancestors is None) or (n in ancestors)):
1311 # Only include nodes that are both descendants and ancestors.
1317 # Only include nodes that are both descendants and ancestors.
1312 orderedout.append(n)
1318 orderedout.append(n)
1313 if (ancestors is not None) and (n in heads):
1319 if (ancestors is not None) and (n in heads):
1314 # We're trying to figure out which heads are reachable
1320 # We're trying to figure out which heads are reachable
1315 # from roots.
1321 # from roots.
1316 # Mark this head as having been reached
1322 # Mark this head as having been reached
1317 heads[n] = True
1323 heads[n] = True
1318 elif ancestors is None:
1324 elif ancestors is None:
1319 # Otherwise, we're trying to discover the heads.
1325 # Otherwise, we're trying to discover the heads.
1320 # Assume this is a head because if it isn't, the next step
1326 # Assume this is a head because if it isn't, the next step
1321 # will eventually remove it.
1327 # will eventually remove it.
1322 heads[n] = True
1328 heads[n] = True
1323 # But, obviously its parents aren't.
1329 # But, obviously its parents aren't.
1324 for p in self.parents(n):
1330 for p in self.parents(n):
1325 heads.pop(p, None)
1331 heads.pop(p, None)
1326 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1332 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1327 roots = list(roots)
1333 roots = list(roots)
1328 assert orderedout
1334 assert orderedout
1329 assert roots
1335 assert roots
1330 assert heads
1336 assert heads
1331 return (orderedout, roots, heads)
1337 return (orderedout, roots, heads)
1332
1338
1333 def headrevs(self, revs=None):
1339 def headrevs(self, revs=None):
1334 if revs is None:
1340 if revs is None:
1335 try:
1341 try:
1336 return self.index.headrevs()
1342 return self.index.headrevs()
1337 except AttributeError:
1343 except AttributeError:
1338 return self._headrevs()
1344 return self._headrevs()
1339 if rustdagop is not None and self.index.rust_ext_compat:
1345 if rustdagop is not None and self.index.rust_ext_compat:
1340 return rustdagop.headrevs(self.index, revs)
1346 return rustdagop.headrevs(self.index, revs)
1341 return dagop.headrevs(revs, self._uncheckedparentrevs)
1347 return dagop.headrevs(revs, self._uncheckedparentrevs)
1342
1348
1343 def computephases(self, roots):
1349 def computephases(self, roots):
1344 return self.index.computephasesmapsets(roots)
1350 return self.index.computephasesmapsets(roots)
1345
1351
1346 def _headrevs(self):
1352 def _headrevs(self):
1347 count = len(self)
1353 count = len(self)
1348 if not count:
1354 if not count:
1349 return [nullrev]
1355 return [nullrev]
1350 # we won't iter over filtered rev so nobody is a head at start
1356 # we won't iter over filtered rev so nobody is a head at start
1351 ishead = [0] * (count + 1)
1357 ishead = [0] * (count + 1)
1352 index = self.index
1358 index = self.index
1353 for r in self:
1359 for r in self:
1354 ishead[r] = 1 # I may be an head
1360 ishead[r] = 1 # I may be an head
1355 e = index[r]
1361 e = index[r]
1356 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1362 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1357 return [r for r, val in enumerate(ishead) if val]
1363 return [r for r, val in enumerate(ishead) if val]
1358
1364
1359 def heads(self, start=None, stop=None):
1365 def heads(self, start=None, stop=None):
1360 """return the list of all nodes that have no children
1366 """return the list of all nodes that have no children
1361
1367
1362 if start is specified, only heads that are descendants of
1368 if start is specified, only heads that are descendants of
1363 start will be returned
1369 start will be returned
1364 if stop is specified, it will consider all the revs from stop
1370 if stop is specified, it will consider all the revs from stop
1365 as if they had no children
1371 as if they had no children
1366 """
1372 """
1367 if start is None and stop is None:
1373 if start is None and stop is None:
1368 if not len(self):
1374 if not len(self):
1369 return [self.nullid]
1375 return [self.nullid]
1370 return [self.node(r) for r in self.headrevs()]
1376 return [self.node(r) for r in self.headrevs()]
1371
1377
1372 if start is None:
1378 if start is None:
1373 start = nullrev
1379 start = nullrev
1374 else:
1380 else:
1375 start = self.rev(start)
1381 start = self.rev(start)
1376
1382
1377 stoprevs = {self.rev(n) for n in stop or []}
1383 stoprevs = {self.rev(n) for n in stop or []}
1378
1384
1379 revs = dagop.headrevssubset(
1385 revs = dagop.headrevssubset(
1380 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1386 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1381 )
1387 )
1382
1388
1383 return [self.node(rev) for rev in revs]
1389 return [self.node(rev) for rev in revs]
1384
1390
1385 def children(self, node):
1391 def children(self, node):
1386 """find the children of a given node"""
1392 """find the children of a given node"""
1387 c = []
1393 c = []
1388 p = self.rev(node)
1394 p = self.rev(node)
1389 for r in self.revs(start=p + 1):
1395 for r in self.revs(start=p + 1):
1390 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1396 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1391 if prevs:
1397 if prevs:
1392 for pr in prevs:
1398 for pr in prevs:
1393 if pr == p:
1399 if pr == p:
1394 c.append(self.node(r))
1400 c.append(self.node(r))
1395 elif p == nullrev:
1401 elif p == nullrev:
1396 c.append(self.node(r))
1402 c.append(self.node(r))
1397 return c
1403 return c
1398
1404
1399 def commonancestorsheads(self, a, b):
1405 def commonancestorsheads(self, a, b):
1400 """calculate all the heads of the common ancestors of nodes a and b"""
1406 """calculate all the heads of the common ancestors of nodes a and b"""
1401 a, b = self.rev(a), self.rev(b)
1407 a, b = self.rev(a), self.rev(b)
1402 ancs = self._commonancestorsheads(a, b)
1408 ancs = self._commonancestorsheads(a, b)
1403 return pycompat.maplist(self.node, ancs)
1409 return pycompat.maplist(self.node, ancs)
1404
1410
1405 def _commonancestorsheads(self, *revs):
1411 def _commonancestorsheads(self, *revs):
1406 """calculate all the heads of the common ancestors of revs"""
1412 """calculate all the heads of the common ancestors of revs"""
1407 try:
1413 try:
1408 ancs = self.index.commonancestorsheads(*revs)
1414 ancs = self.index.commonancestorsheads(*revs)
1409 except (AttributeError, OverflowError): # C implementation failed
1415 except (AttributeError, OverflowError): # C implementation failed
1410 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1416 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1411 return ancs
1417 return ancs
1412
1418
1413 def isancestor(self, a, b):
1419 def isancestor(self, a, b):
1414 """return True if node a is an ancestor of node b
1420 """return True if node a is an ancestor of node b
1415
1421
1416 A revision is considered an ancestor of itself."""
1422 A revision is considered an ancestor of itself."""
1417 a, b = self.rev(a), self.rev(b)
1423 a, b = self.rev(a), self.rev(b)
1418 return self.isancestorrev(a, b)
1424 return self.isancestorrev(a, b)
1419
1425
1420 def isancestorrev(self, a, b):
1426 def isancestorrev(self, a, b):
1421 """return True if revision a is an ancestor of revision b
1427 """return True if revision a is an ancestor of revision b
1422
1428
1423 A revision is considered an ancestor of itself.
1429 A revision is considered an ancestor of itself.
1424
1430
1425 The implementation of this is trivial but the use of
1431 The implementation of this is trivial but the use of
1426 reachableroots is not."""
1432 reachableroots is not."""
1427 if a == nullrev:
1433 if a == nullrev:
1428 return True
1434 return True
1429 elif a == b:
1435 elif a == b:
1430 return True
1436 return True
1431 elif a > b:
1437 elif a > b:
1432 return False
1438 return False
1433 return bool(self.reachableroots(a, [b], [a], includepath=False))
1439 return bool(self.reachableroots(a, [b], [a], includepath=False))
1434
1440
1435 def reachableroots(self, minroot, heads, roots, includepath=False):
1441 def reachableroots(self, minroot, heads, roots, includepath=False):
1436 """return (heads(::(<roots> and <roots>::<heads>)))
1442 """return (heads(::(<roots> and <roots>::<heads>)))
1437
1443
1438 If includepath is True, return (<roots>::<heads>)."""
1444 If includepath is True, return (<roots>::<heads>)."""
1439 try:
1445 try:
1440 return self.index.reachableroots2(
1446 return self.index.reachableroots2(
1441 minroot, heads, roots, includepath
1447 minroot, heads, roots, includepath
1442 )
1448 )
1443 except AttributeError:
1449 except AttributeError:
1444 return dagop._reachablerootspure(
1450 return dagop._reachablerootspure(
1445 self.parentrevs, minroot, roots, heads, includepath
1451 self.parentrevs, minroot, roots, heads, includepath
1446 )
1452 )
1447
1453
1448 def ancestor(self, a, b):
1454 def ancestor(self, a, b):
1449 """calculate the "best" common ancestor of nodes a and b"""
1455 """calculate the "best" common ancestor of nodes a and b"""
1450
1456
1451 a, b = self.rev(a), self.rev(b)
1457 a, b = self.rev(a), self.rev(b)
1452 try:
1458 try:
1453 ancs = self.index.ancestors(a, b)
1459 ancs = self.index.ancestors(a, b)
1454 except (AttributeError, OverflowError):
1460 except (AttributeError, OverflowError):
1455 ancs = ancestor.ancestors(self.parentrevs, a, b)
1461 ancs = ancestor.ancestors(self.parentrevs, a, b)
1456 if ancs:
1462 if ancs:
1457 # choose a consistent winner when there's a tie
1463 # choose a consistent winner when there's a tie
1458 return min(map(self.node, ancs))
1464 return min(map(self.node, ancs))
1459 return self.nullid
1465 return self.nullid
1460
1466
1461 def _match(self, id):
1467 def _match(self, id):
1462 if isinstance(id, int):
1468 if isinstance(id, int):
1463 # rev
1469 # rev
1464 return self.node(id)
1470 return self.node(id)
1465 if len(id) == self.nodeconstants.nodelen:
1471 if len(id) == self.nodeconstants.nodelen:
1466 # possibly a binary node
1472 # possibly a binary node
1467 # odds of a binary node being all hex in ASCII are 1 in 10**25
1473 # odds of a binary node being all hex in ASCII are 1 in 10**25
1468 try:
1474 try:
1469 node = id
1475 node = id
1470 self.rev(node) # quick search the index
1476 self.rev(node) # quick search the index
1471 return node
1477 return node
1472 except error.LookupError:
1478 except error.LookupError:
1473 pass # may be partial hex id
1479 pass # may be partial hex id
1474 try:
1480 try:
1475 # str(rev)
1481 # str(rev)
1476 rev = int(id)
1482 rev = int(id)
1477 if b"%d" % rev != id:
1483 if b"%d" % rev != id:
1478 raise ValueError
1484 raise ValueError
1479 if rev < 0:
1485 if rev < 0:
1480 rev = len(self) + rev
1486 rev = len(self) + rev
1481 if rev < 0 or rev >= len(self):
1487 if rev < 0 or rev >= len(self):
1482 raise ValueError
1488 raise ValueError
1483 return self.node(rev)
1489 return self.node(rev)
1484 except (ValueError, OverflowError):
1490 except (ValueError, OverflowError):
1485 pass
1491 pass
1486 if len(id) == 2 * self.nodeconstants.nodelen:
1492 if len(id) == 2 * self.nodeconstants.nodelen:
1487 try:
1493 try:
1488 # a full hex nodeid?
1494 # a full hex nodeid?
1489 node = bin(id)
1495 node = bin(id)
1490 self.rev(node)
1496 self.rev(node)
1491 return node
1497 return node
1492 except (TypeError, error.LookupError):
1498 except (TypeError, error.LookupError):
1493 pass
1499 pass
1494
1500
1495 def _partialmatch(self, id):
1501 def _partialmatch(self, id):
1496 # we don't care wdirfilenodeids as they should be always full hash
1502 # we don't care wdirfilenodeids as they should be always full hash
1497 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1503 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1498 ambiguous = False
1504 ambiguous = False
1499 try:
1505 try:
1500 partial = self.index.partialmatch(id)
1506 partial = self.index.partialmatch(id)
1501 if partial and self.hasnode(partial):
1507 if partial and self.hasnode(partial):
1502 if maybewdir:
1508 if maybewdir:
1503 # single 'ff...' match in radix tree, ambiguous with wdir
1509 # single 'ff...' match in radix tree, ambiguous with wdir
1504 ambiguous = True
1510 ambiguous = True
1505 else:
1511 else:
1506 return partial
1512 return partial
1507 elif maybewdir:
1513 elif maybewdir:
1508 # no 'ff...' match in radix tree, wdir identified
1514 # no 'ff...' match in radix tree, wdir identified
1509 raise error.WdirUnsupported
1515 raise error.WdirUnsupported
1510 else:
1516 else:
1511 return None
1517 return None
1512 except error.RevlogError:
1518 except error.RevlogError:
1513 # parsers.c radix tree lookup gave multiple matches
1519 # parsers.c radix tree lookup gave multiple matches
1514 # fast path: for unfiltered changelog, radix tree is accurate
1520 # fast path: for unfiltered changelog, radix tree is accurate
1515 if not getattr(self, 'filteredrevs', None):
1521 if not getattr(self, 'filteredrevs', None):
1516 ambiguous = True
1522 ambiguous = True
1517 # fall through to slow path that filters hidden revisions
1523 # fall through to slow path that filters hidden revisions
1518 except (AttributeError, ValueError):
1524 except (AttributeError, ValueError):
1519 # we are pure python, or key was too short to search radix tree
1525 # we are pure python, or key was too short to search radix tree
1520 pass
1526 pass
1521 if ambiguous:
1527 if ambiguous:
1522 raise error.AmbiguousPrefixLookupError(
1528 raise error.AmbiguousPrefixLookupError(
1523 id, self.display_id, _(b'ambiguous identifier')
1529 id, self.display_id, _(b'ambiguous identifier')
1524 )
1530 )
1525
1531
1526 if id in self._pcache:
1532 if id in self._pcache:
1527 return self._pcache[id]
1533 return self._pcache[id]
1528
1534
1529 if len(id) <= 40:
1535 if len(id) <= 40:
1530 try:
1536 try:
1531 # hex(node)[:...]
1537 # hex(node)[:...]
1532 l = len(id) // 2 # grab an even number of digits
1538 l = len(id) // 2 # grab an even number of digits
1533 prefix = bin(id[: l * 2])
1539 prefix = bin(id[: l * 2])
1534 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1540 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1535 nl = [
1541 nl = [
1536 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1542 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1537 ]
1543 ]
1538 if self.nodeconstants.nullhex.startswith(id):
1544 if self.nodeconstants.nullhex.startswith(id):
1539 nl.append(self.nullid)
1545 nl.append(self.nullid)
1540 if len(nl) > 0:
1546 if len(nl) > 0:
1541 if len(nl) == 1 and not maybewdir:
1547 if len(nl) == 1 and not maybewdir:
1542 self._pcache[id] = nl[0]
1548 self._pcache[id] = nl[0]
1543 return nl[0]
1549 return nl[0]
1544 raise error.AmbiguousPrefixLookupError(
1550 raise error.AmbiguousPrefixLookupError(
1545 id, self.display_id, _(b'ambiguous identifier')
1551 id, self.display_id, _(b'ambiguous identifier')
1546 )
1552 )
1547 if maybewdir:
1553 if maybewdir:
1548 raise error.WdirUnsupported
1554 raise error.WdirUnsupported
1549 return None
1555 return None
1550 except TypeError:
1556 except TypeError:
1551 pass
1557 pass
1552
1558
1553 def lookup(self, id):
1559 def lookup(self, id):
1554 """locate a node based on:
1560 """locate a node based on:
1555 - revision number or str(revision number)
1561 - revision number or str(revision number)
1556 - nodeid or subset of hex nodeid
1562 - nodeid or subset of hex nodeid
1557 """
1563 """
1558 n = self._match(id)
1564 n = self._match(id)
1559 if n is not None:
1565 if n is not None:
1560 return n
1566 return n
1561 n = self._partialmatch(id)
1567 n = self._partialmatch(id)
1562 if n:
1568 if n:
1563 return n
1569 return n
1564
1570
1565 raise error.LookupError(id, self.display_id, _(b'no match found'))
1571 raise error.LookupError(id, self.display_id, _(b'no match found'))
1566
1572
1567 def shortest(self, node, minlength=1):
1573 def shortest(self, node, minlength=1):
1568 """Find the shortest unambiguous prefix that matches node."""
1574 """Find the shortest unambiguous prefix that matches node."""
1569
1575
1570 def isvalid(prefix):
1576 def isvalid(prefix):
1571 try:
1577 try:
1572 matchednode = self._partialmatch(prefix)
1578 matchednode = self._partialmatch(prefix)
1573 except error.AmbiguousPrefixLookupError:
1579 except error.AmbiguousPrefixLookupError:
1574 return False
1580 return False
1575 except error.WdirUnsupported:
1581 except error.WdirUnsupported:
1576 # single 'ff...' match
1582 # single 'ff...' match
1577 return True
1583 return True
1578 if matchednode is None:
1584 if matchednode is None:
1579 raise error.LookupError(node, self.display_id, _(b'no node'))
1585 raise error.LookupError(node, self.display_id, _(b'no node'))
1580 return True
1586 return True
1581
1587
1582 def maybewdir(prefix):
1588 def maybewdir(prefix):
1583 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1589 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1584
1590
1585 hexnode = hex(node)
1591 hexnode = hex(node)
1586
1592
1587 def disambiguate(hexnode, minlength):
1593 def disambiguate(hexnode, minlength):
1588 """Disambiguate against wdirid."""
1594 """Disambiguate against wdirid."""
1589 for length in range(minlength, len(hexnode) + 1):
1595 for length in range(minlength, len(hexnode) + 1):
1590 prefix = hexnode[:length]
1596 prefix = hexnode[:length]
1591 if not maybewdir(prefix):
1597 if not maybewdir(prefix):
1592 return prefix
1598 return prefix
1593
1599
1594 if not getattr(self, 'filteredrevs', None):
1600 if not getattr(self, 'filteredrevs', None):
1595 try:
1601 try:
1596 length = max(self.index.shortest(node), minlength)
1602 length = max(self.index.shortest(node), minlength)
1597 return disambiguate(hexnode, length)
1603 return disambiguate(hexnode, length)
1598 except error.RevlogError:
1604 except error.RevlogError:
1599 if node != self.nodeconstants.wdirid:
1605 if node != self.nodeconstants.wdirid:
1600 raise error.LookupError(
1606 raise error.LookupError(
1601 node, self.display_id, _(b'no node')
1607 node, self.display_id, _(b'no node')
1602 )
1608 )
1603 except AttributeError:
1609 except AttributeError:
1604 # Fall through to pure code
1610 # Fall through to pure code
1605 pass
1611 pass
1606
1612
1607 if node == self.nodeconstants.wdirid:
1613 if node == self.nodeconstants.wdirid:
1608 for length in range(minlength, len(hexnode) + 1):
1614 for length in range(minlength, len(hexnode) + 1):
1609 prefix = hexnode[:length]
1615 prefix = hexnode[:length]
1610 if isvalid(prefix):
1616 if isvalid(prefix):
1611 return prefix
1617 return prefix
1612
1618
1613 for length in range(minlength, len(hexnode) + 1):
1619 for length in range(minlength, len(hexnode) + 1):
1614 prefix = hexnode[:length]
1620 prefix = hexnode[:length]
1615 if isvalid(prefix):
1621 if isvalid(prefix):
1616 return disambiguate(hexnode, length)
1622 return disambiguate(hexnode, length)
1617
1623
1618 def cmp(self, node, text):
1624 def cmp(self, node, text):
1619 """compare text with a given file revision
1625 """compare text with a given file revision
1620
1626
1621 returns True if text is different than what is stored.
1627 returns True if text is different than what is stored.
1622 """
1628 """
1623 p1, p2 = self.parents(node)
1629 p1, p2 = self.parents(node)
1624 return storageutil.hashrevisionsha1(text, p1, p2) != node
1630 return storageutil.hashrevisionsha1(text, p1, p2) != node
1625
1631
1626 def _cachesegment(self, offset, data):
1632 def _cachesegment(self, offset, data):
1627 """Add a segment to the revlog cache.
1633 """Add a segment to the revlog cache.
1628
1634
1629 Accepts an absolute offset and the data that is at that location.
1635 Accepts an absolute offset and the data that is at that location.
1630 """
1636 """
1631 o, d = self._chunkcache
1637 o, d = self._chunkcache
1632 # try to add to existing cache
1638 # try to add to existing cache
1633 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1639 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1634 self._chunkcache = o, d + data
1640 self._chunkcache = o, d + data
1635 else:
1641 else:
1636 self._chunkcache = offset, data
1642 self._chunkcache = offset, data
1637
1643
1638 def _readsegment(self, offset, length, df=None):
1644 def _readsegment(self, offset, length, df=None):
1639 """Load a segment of raw data from the revlog.
1645 """Load a segment of raw data from the revlog.
1640
1646
1641 Accepts an absolute offset, length to read, and an optional existing
1647 Accepts an absolute offset, length to read, and an optional existing
1642 file handle to read from.
1648 file handle to read from.
1643
1649
1644 If an existing file handle is passed, it will be seeked and the
1650 If an existing file handle is passed, it will be seeked and the
1645 original seek position will NOT be restored.
1651 original seek position will NOT be restored.
1646
1652
1647 Returns a str or buffer of raw byte data.
1653 Returns a str or buffer of raw byte data.
1648
1654
1649 Raises if the requested number of bytes could not be read.
1655 Raises if the requested number of bytes could not be read.
1650 """
1656 """
1651 # Cache data both forward and backward around the requested
1657 # Cache data both forward and backward around the requested
1652 # data, in a fixed size window. This helps speed up operations
1658 # data, in a fixed size window. This helps speed up operations
1653 # involving reading the revlog backwards.
1659 # involving reading the revlog backwards.
1654 cachesize = self._chunkcachesize
1660 cachesize = self._chunkcachesize
1655 realoffset = offset & ~(cachesize - 1)
1661 realoffset = offset & ~(cachesize - 1)
1656 reallength = (
1662 reallength = (
1657 (offset + length + cachesize) & ~(cachesize - 1)
1663 (offset + length + cachesize) & ~(cachesize - 1)
1658 ) - realoffset
1664 ) - realoffset
1659 with self._datareadfp(df) as df:
1665 with self._datareadfp(df) as df:
1660 df.seek(realoffset)
1666 df.seek(realoffset)
1661 d = df.read(reallength)
1667 d = df.read(reallength)
1662
1668
1663 self._cachesegment(realoffset, d)
1669 self._cachesegment(realoffset, d)
1664 if offset != realoffset or reallength != length:
1670 if offset != realoffset or reallength != length:
1665 startoffset = offset - realoffset
1671 startoffset = offset - realoffset
1666 if len(d) - startoffset < length:
1672 if len(d) - startoffset < length:
1667 filename = self._indexfile if self._inline else self._datafile
1673 filename = self._indexfile if self._inline else self._datafile
1668 got = len(d) - startoffset
1674 got = len(d) - startoffset
1669 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1675 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1670 raise error.RevlogError(m)
1676 raise error.RevlogError(m)
1671 return util.buffer(d, startoffset, length)
1677 return util.buffer(d, startoffset, length)
1672
1678
1673 if len(d) < length:
1679 if len(d) < length:
1674 filename = self._indexfile if self._inline else self._datafile
1680 filename = self._indexfile if self._inline else self._datafile
1675 got = len(d) - startoffset
1681 got = len(d) - startoffset
1676 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1682 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1677 raise error.RevlogError(m)
1683 raise error.RevlogError(m)
1678
1684
1679 return d
1685 return d
1680
1686
1681 def _getsegment(self, offset, length, df=None):
1687 def _getsegment(self, offset, length, df=None):
1682 """Obtain a segment of raw data from the revlog.
1688 """Obtain a segment of raw data from the revlog.
1683
1689
1684 Accepts an absolute offset, length of bytes to obtain, and an
1690 Accepts an absolute offset, length of bytes to obtain, and an
1685 optional file handle to the already-opened revlog. If the file
1691 optional file handle to the already-opened revlog. If the file
1686 handle is used, it's original seek position will not be preserved.
1692 handle is used, it's original seek position will not be preserved.
1687
1693
1688 Requests for data may be returned from a cache.
1694 Requests for data may be returned from a cache.
1689
1695
1690 Returns a str or a buffer instance of raw byte data.
1696 Returns a str or a buffer instance of raw byte data.
1691 """
1697 """
1692 o, d = self._chunkcache
1698 o, d = self._chunkcache
1693 l = len(d)
1699 l = len(d)
1694
1700
1695 # is it in the cache?
1701 # is it in the cache?
1696 cachestart = offset - o
1702 cachestart = offset - o
1697 cacheend = cachestart + length
1703 cacheend = cachestart + length
1698 if cachestart >= 0 and cacheend <= l:
1704 if cachestart >= 0 and cacheend <= l:
1699 if cachestart == 0 and cacheend == l:
1705 if cachestart == 0 and cacheend == l:
1700 return d # avoid a copy
1706 return d # avoid a copy
1701 return util.buffer(d, cachestart, cacheend - cachestart)
1707 return util.buffer(d, cachestart, cacheend - cachestart)
1702
1708
1703 return self._readsegment(offset, length, df=df)
1709 return self._readsegment(offset, length, df=df)
1704
1710
1705 def _getsegmentforrevs(self, startrev, endrev, df=None):
1711 def _getsegmentforrevs(self, startrev, endrev, df=None):
1706 """Obtain a segment of raw data corresponding to a range of revisions.
1712 """Obtain a segment of raw data corresponding to a range of revisions.
1707
1713
1708 Accepts the start and end revisions and an optional already-open
1714 Accepts the start and end revisions and an optional already-open
1709 file handle to be used for reading. If the file handle is read, its
1715 file handle to be used for reading. If the file handle is read, its
1710 seek position will not be preserved.
1716 seek position will not be preserved.
1711
1717
1712 Requests for data may be satisfied by a cache.
1718 Requests for data may be satisfied by a cache.
1713
1719
1714 Returns a 2-tuple of (offset, data) for the requested range of
1720 Returns a 2-tuple of (offset, data) for the requested range of
1715 revisions. Offset is the integer offset from the beginning of the
1721 revisions. Offset is the integer offset from the beginning of the
1716 revlog and data is a str or buffer of the raw byte data.
1722 revlog and data is a str or buffer of the raw byte data.
1717
1723
1718 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1724 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1719 to determine where each revision's data begins and ends.
1725 to determine where each revision's data begins and ends.
1720 """
1726 """
1721 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1727 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1722 # (functions are expensive).
1728 # (functions are expensive).
1723 index = self.index
1729 index = self.index
1724 istart = index[startrev]
1730 istart = index[startrev]
1725 start = int(istart[0] >> 16)
1731 start = int(istart[0] >> 16)
1726 if startrev == endrev:
1732 if startrev == endrev:
1727 end = start + istart[1]
1733 end = start + istart[1]
1728 else:
1734 else:
1729 iend = index[endrev]
1735 iend = index[endrev]
1730 end = int(iend[0] >> 16) + iend[1]
1736 end = int(iend[0] >> 16) + iend[1]
1731
1737
1732 if self._inline:
1738 if self._inline:
1733 start += (startrev + 1) * self.index.entry_size
1739 start += (startrev + 1) * self.index.entry_size
1734 end += (endrev + 1) * self.index.entry_size
1740 end += (endrev + 1) * self.index.entry_size
1735 length = end - start
1741 length = end - start
1736
1742
1737 return start, self._getsegment(start, length, df=df)
1743 return start, self._getsegment(start, length, df=df)
1738
1744
1739 def _chunk(self, rev, df=None):
1745 def _chunk(self, rev, df=None):
1740 """Obtain a single decompressed chunk for a revision.
1746 """Obtain a single decompressed chunk for a revision.
1741
1747
1742 Accepts an integer revision and an optional already-open file handle
1748 Accepts an integer revision and an optional already-open file handle
1743 to be used for reading. If used, the seek position of the file will not
1749 to be used for reading. If used, the seek position of the file will not
1744 be preserved.
1750 be preserved.
1745
1751
1746 Returns a str holding uncompressed data for the requested revision.
1752 Returns a str holding uncompressed data for the requested revision.
1747 """
1753 """
1748 compression_mode = self.index[rev][10]
1754 compression_mode = self.index[rev][10]
1749 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1755 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1750 if compression_mode == COMP_MODE_PLAIN:
1756 if compression_mode == COMP_MODE_PLAIN:
1751 return data
1757 return data
1752 elif compression_mode == COMP_MODE_DEFAULT:
1758 elif compression_mode == COMP_MODE_DEFAULT:
1753 return self._decompressor(data)
1759 return self._decompressor(data)
1754 elif compression_mode == COMP_MODE_INLINE:
1760 elif compression_mode == COMP_MODE_INLINE:
1755 return self.decompress(data)
1761 return self.decompress(data)
1756 else:
1762 else:
1757 msg = 'unknown compression mode %d'
1763 msg = 'unknown compression mode %d'
1758 msg %= compression_mode
1764 msg %= compression_mode
1759 raise error.RevlogError(msg)
1765 raise error.RevlogError(msg)
1760
1766
1761 def _chunks(self, revs, df=None, targetsize=None):
1767 def _chunks(self, revs, df=None, targetsize=None):
1762 """Obtain decompressed chunks for the specified revisions.
1768 """Obtain decompressed chunks for the specified revisions.
1763
1769
1764 Accepts an iterable of numeric revisions that are assumed to be in
1770 Accepts an iterable of numeric revisions that are assumed to be in
1765 ascending order. Also accepts an optional already-open file handle
1771 ascending order. Also accepts an optional already-open file handle
1766 to be used for reading. If used, the seek position of the file will
1772 to be used for reading. If used, the seek position of the file will
1767 not be preserved.
1773 not be preserved.
1768
1774
1769 This function is similar to calling ``self._chunk()`` multiple times,
1775 This function is similar to calling ``self._chunk()`` multiple times,
1770 but is faster.
1776 but is faster.
1771
1777
1772 Returns a list with decompressed data for each requested revision.
1778 Returns a list with decompressed data for each requested revision.
1773 """
1779 """
1774 if not revs:
1780 if not revs:
1775 return []
1781 return []
1776 start = self.start
1782 start = self.start
1777 length = self.length
1783 length = self.length
1778 inline = self._inline
1784 inline = self._inline
1779 iosize = self.index.entry_size
1785 iosize = self.index.entry_size
1780 buffer = util.buffer
1786 buffer = util.buffer
1781
1787
1782 l = []
1788 l = []
1783 ladd = l.append
1789 ladd = l.append
1784
1790
1785 if not self._withsparseread:
1791 if not self._withsparseread:
1786 slicedchunks = (revs,)
1792 slicedchunks = (revs,)
1787 else:
1793 else:
1788 slicedchunks = deltautil.slicechunk(
1794 slicedchunks = deltautil.slicechunk(
1789 self, revs, targetsize=targetsize
1795 self, revs, targetsize=targetsize
1790 )
1796 )
1791
1797
1792 for revschunk in slicedchunks:
1798 for revschunk in slicedchunks:
1793 firstrev = revschunk[0]
1799 firstrev = revschunk[0]
1794 # Skip trailing revisions with empty diff
1800 # Skip trailing revisions with empty diff
1795 for lastrev in revschunk[::-1]:
1801 for lastrev in revschunk[::-1]:
1796 if length(lastrev) != 0:
1802 if length(lastrev) != 0:
1797 break
1803 break
1798
1804
1799 try:
1805 try:
1800 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1806 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1801 except OverflowError:
1807 except OverflowError:
1802 # issue4215 - we can't cache a run of chunks greater than
1808 # issue4215 - we can't cache a run of chunks greater than
1803 # 2G on Windows
1809 # 2G on Windows
1804 return [self._chunk(rev, df=df) for rev in revschunk]
1810 return [self._chunk(rev, df=df) for rev in revschunk]
1805
1811
1806 decomp = self.decompress
1812 decomp = self.decompress
1807 # self._decompressor might be None, but will not be used in that case
1813 # self._decompressor might be None, but will not be used in that case
1808 def_decomp = self._decompressor
1814 def_decomp = self._decompressor
1809 for rev in revschunk:
1815 for rev in revschunk:
1810 chunkstart = start(rev)
1816 chunkstart = start(rev)
1811 if inline:
1817 if inline:
1812 chunkstart += (rev + 1) * iosize
1818 chunkstart += (rev + 1) * iosize
1813 chunklength = length(rev)
1819 chunklength = length(rev)
1814 comp_mode = self.index[rev][10]
1820 comp_mode = self.index[rev][10]
1815 c = buffer(data, chunkstart - offset, chunklength)
1821 c = buffer(data, chunkstart - offset, chunklength)
1816 if comp_mode == COMP_MODE_PLAIN:
1822 if comp_mode == COMP_MODE_PLAIN:
1817 ladd(c)
1823 ladd(c)
1818 elif comp_mode == COMP_MODE_INLINE:
1824 elif comp_mode == COMP_MODE_INLINE:
1819 ladd(decomp(c))
1825 ladd(decomp(c))
1820 elif comp_mode == COMP_MODE_DEFAULT:
1826 elif comp_mode == COMP_MODE_DEFAULT:
1821 ladd(def_decomp(c))
1827 ladd(def_decomp(c))
1822 else:
1828 else:
1823 msg = 'unknown compression mode %d'
1829 msg = 'unknown compression mode %d'
1824 msg %= comp_mode
1830 msg %= comp_mode
1825 raise error.RevlogError(msg)
1831 raise error.RevlogError(msg)
1826
1832
1827 return l
1833 return l
1828
1834
1829 def _chunkclear(self):
1835 def _chunkclear(self):
1830 """Clear the raw chunk cache."""
1836 """Clear the raw chunk cache."""
1831 self._chunkcache = (0, b'')
1837 self._chunkcache = (0, b'')
1832
1838
1833 def deltaparent(self, rev):
1839 def deltaparent(self, rev):
1834 """return deltaparent of the given revision"""
1840 """return deltaparent of the given revision"""
1835 base = self.index[rev][3]
1841 base = self.index[rev][3]
1836 if base == rev:
1842 if base == rev:
1837 return nullrev
1843 return nullrev
1838 elif self._generaldelta:
1844 elif self._generaldelta:
1839 return base
1845 return base
1840 else:
1846 else:
1841 return rev - 1
1847 return rev - 1
1842
1848
1843 def issnapshot(self, rev):
1849 def issnapshot(self, rev):
1844 """tells whether rev is a snapshot"""
1850 """tells whether rev is a snapshot"""
1845 if not self._sparserevlog:
1851 if not self._sparserevlog:
1846 return self.deltaparent(rev) == nullrev
1852 return self.deltaparent(rev) == nullrev
1847 elif util.safehasattr(self.index, b'issnapshot'):
1853 elif util.safehasattr(self.index, b'issnapshot'):
1848 # directly assign the method to cache the testing and access
1854 # directly assign the method to cache the testing and access
1849 self.issnapshot = self.index.issnapshot
1855 self.issnapshot = self.index.issnapshot
1850 return self.issnapshot(rev)
1856 return self.issnapshot(rev)
1851 if rev == nullrev:
1857 if rev == nullrev:
1852 return True
1858 return True
1853 entry = self.index[rev]
1859 entry = self.index[rev]
1854 base = entry[3]
1860 base = entry[3]
1855 if base == rev:
1861 if base == rev:
1856 return True
1862 return True
1857 if base == nullrev:
1863 if base == nullrev:
1858 return True
1864 return True
1859 p1 = entry[5]
1865 p1 = entry[5]
1860 p2 = entry[6]
1866 p2 = entry[6]
1861 if base == p1 or base == p2:
1867 if base == p1 or base == p2:
1862 return False
1868 return False
1863 return self.issnapshot(base)
1869 return self.issnapshot(base)
1864
1870
1865 def snapshotdepth(self, rev):
1871 def snapshotdepth(self, rev):
1866 """number of snapshot in the chain before this one"""
1872 """number of snapshot in the chain before this one"""
1867 if not self.issnapshot(rev):
1873 if not self.issnapshot(rev):
1868 raise error.ProgrammingError(b'revision %d not a snapshot')
1874 raise error.ProgrammingError(b'revision %d not a snapshot')
1869 return len(self._deltachain(rev)[0]) - 1
1875 return len(self._deltachain(rev)[0]) - 1
1870
1876
1871 def revdiff(self, rev1, rev2):
1877 def revdiff(self, rev1, rev2):
1872 """return or calculate a delta between two revisions
1878 """return or calculate a delta between two revisions
1873
1879
1874 The delta calculated is in binary form and is intended to be written to
1880 The delta calculated is in binary form and is intended to be written to
1875 revlog data directly. So this function needs raw revision data.
1881 revlog data directly. So this function needs raw revision data.
1876 """
1882 """
1877 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1883 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1878 return bytes(self._chunk(rev2))
1884 return bytes(self._chunk(rev2))
1879
1885
1880 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1886 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1881
1887
1882 def _processflags(self, text, flags, operation, raw=False):
1888 def _processflags(self, text, flags, operation, raw=False):
1883 """deprecated entry point to access flag processors"""
1889 """deprecated entry point to access flag processors"""
1884 msg = b'_processflag(...) use the specialized variant'
1890 msg = b'_processflag(...) use the specialized variant'
1885 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1891 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1886 if raw:
1892 if raw:
1887 return text, flagutil.processflagsraw(self, text, flags)
1893 return text, flagutil.processflagsraw(self, text, flags)
1888 elif operation == b'read':
1894 elif operation == b'read':
1889 return flagutil.processflagsread(self, text, flags)
1895 return flagutil.processflagsread(self, text, flags)
1890 else: # write operation
1896 else: # write operation
1891 return flagutil.processflagswrite(self, text, flags)
1897 return flagutil.processflagswrite(self, text, flags)
1892
1898
1893 def revision(self, nodeorrev, _df=None, raw=False):
1899 def revision(self, nodeorrev, _df=None, raw=False):
1894 """return an uncompressed revision of a given node or revision
1900 """return an uncompressed revision of a given node or revision
1895 number.
1901 number.
1896
1902
1897 _df - an existing file handle to read from. (internal-only)
1903 _df - an existing file handle to read from. (internal-only)
1898 raw - an optional argument specifying if the revision data is to be
1904 raw - an optional argument specifying if the revision data is to be
1899 treated as raw data when applying flag transforms. 'raw' should be set
1905 treated as raw data when applying flag transforms. 'raw' should be set
1900 to True when generating changegroups or in debug commands.
1906 to True when generating changegroups or in debug commands.
1901 """
1907 """
1902 if raw:
1908 if raw:
1903 msg = (
1909 msg = (
1904 b'revlog.revision(..., raw=True) is deprecated, '
1910 b'revlog.revision(..., raw=True) is deprecated, '
1905 b'use revlog.rawdata(...)'
1911 b'use revlog.rawdata(...)'
1906 )
1912 )
1907 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1913 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1908 return self._revisiondata(nodeorrev, _df, raw=raw)
1914 return self._revisiondata(nodeorrev, _df, raw=raw)
1909
1915
1910 def sidedata(self, nodeorrev, _df=None):
1916 def sidedata(self, nodeorrev, _df=None):
1911 """a map of extra data related to the changeset but not part of the hash
1917 """a map of extra data related to the changeset but not part of the hash
1912
1918
1913 This function currently return a dictionary. However, more advanced
1919 This function currently return a dictionary. However, more advanced
1914 mapping object will likely be used in the future for a more
1920 mapping object will likely be used in the future for a more
1915 efficient/lazy code.
1921 efficient/lazy code.
1916 """
1922 """
1917 # deal with <nodeorrev> argument type
1923 # deal with <nodeorrev> argument type
1918 if isinstance(nodeorrev, int):
1924 if isinstance(nodeorrev, int):
1919 rev = nodeorrev
1925 rev = nodeorrev
1920 else:
1926 else:
1921 rev = self.rev(nodeorrev)
1927 rev = self.rev(nodeorrev)
1922 return self._sidedata(rev)
1928 return self._sidedata(rev)
1923
1929
1924 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1930 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1925 # deal with <nodeorrev> argument type
1931 # deal with <nodeorrev> argument type
1926 if isinstance(nodeorrev, int):
1932 if isinstance(nodeorrev, int):
1927 rev = nodeorrev
1933 rev = nodeorrev
1928 node = self.node(rev)
1934 node = self.node(rev)
1929 else:
1935 else:
1930 node = nodeorrev
1936 node = nodeorrev
1931 rev = None
1937 rev = None
1932
1938
1933 # fast path the special `nullid` rev
1939 # fast path the special `nullid` rev
1934 if node == self.nullid:
1940 if node == self.nullid:
1935 return b""
1941 return b""
1936
1942
1937 # ``rawtext`` is the text as stored inside the revlog. Might be the
1943 # ``rawtext`` is the text as stored inside the revlog. Might be the
1938 # revision or might need to be processed to retrieve the revision.
1944 # revision or might need to be processed to retrieve the revision.
1939 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1945 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1940
1946
1941 if raw and validated:
1947 if raw and validated:
1942 # if we don't want to process the raw text and that raw
1948 # if we don't want to process the raw text and that raw
1943 # text is cached, we can exit early.
1949 # text is cached, we can exit early.
1944 return rawtext
1950 return rawtext
1945 if rev is None:
1951 if rev is None:
1946 rev = self.rev(node)
1952 rev = self.rev(node)
1947 # the revlog's flag for this revision
1953 # the revlog's flag for this revision
1948 # (usually alter its state or content)
1954 # (usually alter its state or content)
1949 flags = self.flags(rev)
1955 flags = self.flags(rev)
1950
1956
1951 if validated and flags == REVIDX_DEFAULT_FLAGS:
1957 if validated and flags == REVIDX_DEFAULT_FLAGS:
1952 # no extra flags set, no flag processor runs, text = rawtext
1958 # no extra flags set, no flag processor runs, text = rawtext
1953 return rawtext
1959 return rawtext
1954
1960
1955 if raw:
1961 if raw:
1956 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1962 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1957 text = rawtext
1963 text = rawtext
1958 else:
1964 else:
1959 r = flagutil.processflagsread(self, rawtext, flags)
1965 r = flagutil.processflagsread(self, rawtext, flags)
1960 text, validatehash = r
1966 text, validatehash = r
1961 if validatehash:
1967 if validatehash:
1962 self.checkhash(text, node, rev=rev)
1968 self.checkhash(text, node, rev=rev)
1963 if not validated:
1969 if not validated:
1964 self._revisioncache = (node, rev, rawtext)
1970 self._revisioncache = (node, rev, rawtext)
1965
1971
1966 return text
1972 return text
1967
1973
1968 def _rawtext(self, node, rev, _df=None):
1974 def _rawtext(self, node, rev, _df=None):
1969 """return the possibly unvalidated rawtext for a revision
1975 """return the possibly unvalidated rawtext for a revision
1970
1976
1971 returns (rev, rawtext, validated)
1977 returns (rev, rawtext, validated)
1972 """
1978 """
1973
1979
1974 # revision in the cache (could be useful to apply delta)
1980 # revision in the cache (could be useful to apply delta)
1975 cachedrev = None
1981 cachedrev = None
1976 # An intermediate text to apply deltas to
1982 # An intermediate text to apply deltas to
1977 basetext = None
1983 basetext = None
1978
1984
1979 # Check if we have the entry in cache
1985 # Check if we have the entry in cache
1980 # The cache entry looks like (node, rev, rawtext)
1986 # The cache entry looks like (node, rev, rawtext)
1981 if self._revisioncache:
1987 if self._revisioncache:
1982 if self._revisioncache[0] == node:
1988 if self._revisioncache[0] == node:
1983 return (rev, self._revisioncache[2], True)
1989 return (rev, self._revisioncache[2], True)
1984 cachedrev = self._revisioncache[1]
1990 cachedrev = self._revisioncache[1]
1985
1991
1986 if rev is None:
1992 if rev is None:
1987 rev = self.rev(node)
1993 rev = self.rev(node)
1988
1994
1989 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1995 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1990 if stopped:
1996 if stopped:
1991 basetext = self._revisioncache[2]
1997 basetext = self._revisioncache[2]
1992
1998
1993 # drop cache to save memory, the caller is expected to
1999 # drop cache to save memory, the caller is expected to
1994 # update self._revisioncache after validating the text
2000 # update self._revisioncache after validating the text
1995 self._revisioncache = None
2001 self._revisioncache = None
1996
2002
1997 targetsize = None
2003 targetsize = None
1998 rawsize = self.index[rev][2]
2004 rawsize = self.index[rev][2]
1999 if 0 <= rawsize:
2005 if 0 <= rawsize:
2000 targetsize = 4 * rawsize
2006 targetsize = 4 * rawsize
2001
2007
2002 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2008 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2003 if basetext is None:
2009 if basetext is None:
2004 basetext = bytes(bins[0])
2010 basetext = bytes(bins[0])
2005 bins = bins[1:]
2011 bins = bins[1:]
2006
2012
2007 rawtext = mdiff.patches(basetext, bins)
2013 rawtext = mdiff.patches(basetext, bins)
2008 del basetext # let us have a chance to free memory early
2014 del basetext # let us have a chance to free memory early
2009 return (rev, rawtext, False)
2015 return (rev, rawtext, False)
2010
2016
2011 def _sidedata(self, rev):
2017 def _sidedata(self, rev):
2012 """Return the sidedata for a given revision number."""
2018 """Return the sidedata for a given revision number."""
2013 index_entry = self.index[rev]
2019 index_entry = self.index[rev]
2014 sidedata_offset = index_entry[8]
2020 sidedata_offset = index_entry[8]
2015 sidedata_size = index_entry[9]
2021 sidedata_size = index_entry[9]
2016
2022
2017 if self._inline:
2023 if self._inline:
2018 sidedata_offset += self.index.entry_size * (1 + rev)
2024 sidedata_offset += self.index.entry_size * (1 + rev)
2019 if sidedata_size == 0:
2025 if sidedata_size == 0:
2020 return {}
2026 return {}
2021
2027
2022 # XXX this need caching, as we do for data
2028 # XXX this need caching, as we do for data
2023 with self._sidedatareadfp() as sdf:
2029 with self._sidedatareadfp() as sdf:
2024 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2030 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2025 filename = self._sidedatafile
2031 filename = self._sidedatafile
2026 end = self._docket.sidedata_end
2032 end = self._docket.sidedata_end
2027 offset = sidedata_offset
2033 offset = sidedata_offset
2028 length = sidedata_size
2034 length = sidedata_size
2029 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2035 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2030 raise error.RevlogError(m)
2036 raise error.RevlogError(m)
2031
2037
2032 sdf.seek(sidedata_offset, os.SEEK_SET)
2038 sdf.seek(sidedata_offset, os.SEEK_SET)
2033 comp_segment = sdf.read(sidedata_size)
2039 comp_segment = sdf.read(sidedata_size)
2034
2040
2035 if len(comp_segment) < sidedata_size:
2041 if len(comp_segment) < sidedata_size:
2036 filename = self._sidedatafile
2042 filename = self._sidedatafile
2037 length = sidedata_size
2043 length = sidedata_size
2038 offset = sidedata_offset
2044 offset = sidedata_offset
2039 got = len(comp_segment)
2045 got = len(comp_segment)
2040 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2046 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2041 raise error.RevlogError(m)
2047 raise error.RevlogError(m)
2042
2048
2043 comp = self.index[rev][11]
2049 comp = self.index[rev][11]
2044 if comp == COMP_MODE_PLAIN:
2050 if comp == COMP_MODE_PLAIN:
2045 segment = comp_segment
2051 segment = comp_segment
2046 elif comp == COMP_MODE_DEFAULT:
2052 elif comp == COMP_MODE_DEFAULT:
2047 segment = self._decompressor(comp_segment)
2053 segment = self._decompressor(comp_segment)
2048 elif comp == COMP_MODE_INLINE:
2054 elif comp == COMP_MODE_INLINE:
2049 segment = self.decompress(comp_segment)
2055 segment = self.decompress(comp_segment)
2050 else:
2056 else:
2051 msg = 'unknown compression mode %d'
2057 msg = 'unknown compression mode %d'
2052 msg %= comp
2058 msg %= comp
2053 raise error.RevlogError(msg)
2059 raise error.RevlogError(msg)
2054
2060
2055 sidedata = sidedatautil.deserialize_sidedata(segment)
2061 sidedata = sidedatautil.deserialize_sidedata(segment)
2056 return sidedata
2062 return sidedata
2057
2063
2058 def rawdata(self, nodeorrev, _df=None):
2064 def rawdata(self, nodeorrev, _df=None):
2059 """return an uncompressed raw data of a given node or revision number.
2065 """return an uncompressed raw data of a given node or revision number.
2060
2066
2061 _df - an existing file handle to read from. (internal-only)
2067 _df - an existing file handle to read from. (internal-only)
2062 """
2068 """
2063 return self._revisiondata(nodeorrev, _df, raw=True)
2069 return self._revisiondata(nodeorrev, _df, raw=True)
2064
2070
2065 def hash(self, text, p1, p2):
2071 def hash(self, text, p1, p2):
2066 """Compute a node hash.
2072 """Compute a node hash.
2067
2073
2068 Available as a function so that subclasses can replace the hash
2074 Available as a function so that subclasses can replace the hash
2069 as needed.
2075 as needed.
2070 """
2076 """
2071 return storageutil.hashrevisionsha1(text, p1, p2)
2077 return storageutil.hashrevisionsha1(text, p1, p2)
2072
2078
2073 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2079 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2074 """Check node hash integrity.
2080 """Check node hash integrity.
2075
2081
2076 Available as a function so that subclasses can extend hash mismatch
2082 Available as a function so that subclasses can extend hash mismatch
2077 behaviors as needed.
2083 behaviors as needed.
2078 """
2084 """
2079 try:
2085 try:
2080 if p1 is None and p2 is None:
2086 if p1 is None and p2 is None:
2081 p1, p2 = self.parents(node)
2087 p1, p2 = self.parents(node)
2082 if node != self.hash(text, p1, p2):
2088 if node != self.hash(text, p1, p2):
2083 # Clear the revision cache on hash failure. The revision cache
2089 # Clear the revision cache on hash failure. The revision cache
2084 # only stores the raw revision and clearing the cache does have
2090 # only stores the raw revision and clearing the cache does have
2085 # the side-effect that we won't have a cache hit when the raw
2091 # the side-effect that we won't have a cache hit when the raw
2086 # revision data is accessed. But this case should be rare and
2092 # revision data is accessed. But this case should be rare and
2087 # it is extra work to teach the cache about the hash
2093 # it is extra work to teach the cache about the hash
2088 # verification state.
2094 # verification state.
2089 if self._revisioncache and self._revisioncache[0] == node:
2095 if self._revisioncache and self._revisioncache[0] == node:
2090 self._revisioncache = None
2096 self._revisioncache = None
2091
2097
2092 revornode = rev
2098 revornode = rev
2093 if revornode is None:
2099 if revornode is None:
2094 revornode = templatefilters.short(hex(node))
2100 revornode = templatefilters.short(hex(node))
2095 raise error.RevlogError(
2101 raise error.RevlogError(
2096 _(b"integrity check failed on %s:%s")
2102 _(b"integrity check failed on %s:%s")
2097 % (self.display_id, pycompat.bytestr(revornode))
2103 % (self.display_id, pycompat.bytestr(revornode))
2098 )
2104 )
2099 except error.RevlogError:
2105 except error.RevlogError:
2100 if self._censorable and storageutil.iscensoredtext(text):
2106 if self._censorable and storageutil.iscensoredtext(text):
2101 raise error.CensoredNodeError(self.display_id, node, text)
2107 raise error.CensoredNodeError(self.display_id, node, text)
2102 raise
2108 raise
2103
2109
2104 def _enforceinlinesize(self, tr):
2110 def _enforceinlinesize(self, tr):
2105 """Check if the revlog is too big for inline and convert if so.
2111 """Check if the revlog is too big for inline and convert if so.
2106
2112
2107 This should be called after revisions are added to the revlog. If the
2113 This should be called after revisions are added to the revlog. If the
2108 revlog has grown too large to be an inline revlog, it will convert it
2114 revlog has grown too large to be an inline revlog, it will convert it
2109 to use multiple index and data files.
2115 to use multiple index and data files.
2110 """
2116 """
2111 tiprev = len(self) - 1
2117 tiprev = len(self) - 1
2112 total_size = self.start(tiprev) + self.length(tiprev)
2118 total_size = self.start(tiprev) + self.length(tiprev)
2113 if not self._inline or total_size < _maxinline:
2119 if not self._inline or total_size < _maxinline:
2114 return
2120 return
2115
2121
2116 troffset = tr.findoffset(self._indexfile)
2122 troffset = tr.findoffset(self._indexfile)
2117 if troffset is None:
2123 if troffset is None:
2118 raise error.RevlogError(
2124 raise error.RevlogError(
2119 _(b"%s not found in the transaction") % self._indexfile
2125 _(b"%s not found in the transaction") % self._indexfile
2120 )
2126 )
2121 trindex = 0
2127 trindex = 0
2122 tr.add(self._datafile, 0)
2128 tr.add(self._datafile, 0)
2123
2129
2124 existing_handles = False
2130 existing_handles = False
2125 if self._writinghandles is not None:
2131 if self._writinghandles is not None:
2126 existing_handles = True
2132 existing_handles = True
2127 fp = self._writinghandles[0]
2133 fp = self._writinghandles[0]
2128 fp.flush()
2134 fp.flush()
2129 fp.close()
2135 fp.close()
2130 # We can't use the cached file handle after close(). So prevent
2136 # We can't use the cached file handle after close(). So prevent
2131 # its usage.
2137 # its usage.
2132 self._writinghandles = None
2138 self._writinghandles = None
2133
2139
2134 new_dfh = self._datafp(b'w+')
2140 new_dfh = self._datafp(b'w+')
2135 new_dfh.truncate(0) # drop any potentially existing data
2141 new_dfh.truncate(0) # drop any potentially existing data
2136 try:
2142 try:
2137 with self._indexfp() as read_ifh:
2143 with self._indexfp() as read_ifh:
2138 for r in self:
2144 for r in self:
2139 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2145 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2140 if troffset <= self.start(r) + r * self.index.entry_size:
2146 if troffset <= self.start(r) + r * self.index.entry_size:
2141 trindex = r
2147 trindex = r
2142 new_dfh.flush()
2148 new_dfh.flush()
2143
2149
2144 with self.__index_new_fp() as fp:
2150 with self.__index_new_fp() as fp:
2145 self._format_flags &= ~FLAG_INLINE_DATA
2151 self._format_flags &= ~FLAG_INLINE_DATA
2146 self._inline = False
2152 self._inline = False
2147 for i in self:
2153 for i in self:
2148 e = self.index.entry_binary(i)
2154 e = self.index.entry_binary(i)
2149 if i == 0 and self._docket is None:
2155 if i == 0 and self._docket is None:
2150 header = self._format_flags | self._format_version
2156 header = self._format_flags | self._format_version
2151 header = self.index.pack_header(header)
2157 header = self.index.pack_header(header)
2152 e = header + e
2158 e = header + e
2153 fp.write(e)
2159 fp.write(e)
2154 if self._docket is not None:
2160 if self._docket is not None:
2155 self._docket.index_end = fp.tell()
2161 self._docket.index_end = fp.tell()
2156
2162
2157 # There is a small transactional race here. If the rename of
2163 # There is a small transactional race here. If the rename of
2158 # the index fails, we should remove the datafile. It is more
2164 # the index fails, we should remove the datafile. It is more
2159 # important to ensure that the data file is not truncated
2165 # important to ensure that the data file is not truncated
2160 # when the index is replaced as otherwise data is lost.
2166 # when the index is replaced as otherwise data is lost.
2161 tr.replace(self._datafile, self.start(trindex))
2167 tr.replace(self._datafile, self.start(trindex))
2162
2168
2163 # the temp file replace the real index when we exit the context
2169 # the temp file replace the real index when we exit the context
2164 # manager
2170 # manager
2165
2171
2166 tr.replace(self._indexfile, trindex * self.index.entry_size)
2172 tr.replace(self._indexfile, trindex * self.index.entry_size)
2167 nodemaputil.setup_persistent_nodemap(tr, self)
2173 nodemaputil.setup_persistent_nodemap(tr, self)
2168 self._chunkclear()
2174 self._chunkclear()
2169
2175
2170 if existing_handles:
2176 if existing_handles:
2171 # switched from inline to conventional reopen the index
2177 # switched from inline to conventional reopen the index
2172 ifh = self.__index_write_fp()
2178 ifh = self.__index_write_fp()
2173 self._writinghandles = (ifh, new_dfh, None)
2179 self._writinghandles = (ifh, new_dfh, None)
2174 new_dfh = None
2180 new_dfh = None
2175 finally:
2181 finally:
2176 if new_dfh is not None:
2182 if new_dfh is not None:
2177 new_dfh.close()
2183 new_dfh.close()
2178
2184
2179 def _nodeduplicatecallback(self, transaction, node):
2185 def _nodeduplicatecallback(self, transaction, node):
2180 """called when trying to add a node already stored."""
2186 """called when trying to add a node already stored."""
2181
2187
2182 @contextlib.contextmanager
2188 @contextlib.contextmanager
2183 def _writing(self, transaction):
2189 def _writing(self, transaction):
2184 if self._trypending:
2190 if self._trypending:
2185 msg = b'try to write in a `trypending` revlog: %s'
2191 msg = b'try to write in a `trypending` revlog: %s'
2186 msg %= self.display_id
2192 msg %= self.display_id
2187 raise error.ProgrammingError(msg)
2193 raise error.ProgrammingError(msg)
2188 if self._writinghandles is not None:
2194 if self._writinghandles is not None:
2189 yield
2195 yield
2190 else:
2196 else:
2191 ifh = dfh = sdfh = None
2197 ifh = dfh = sdfh = None
2192 try:
2198 try:
2193 r = len(self)
2199 r = len(self)
2194 # opening the data file.
2200 # opening the data file.
2195 dsize = 0
2201 dsize = 0
2196 if r:
2202 if r:
2197 dsize = self.end(r - 1)
2203 dsize = self.end(r - 1)
2198 dfh = None
2204 dfh = None
2199 if not self._inline:
2205 if not self._inline:
2200 try:
2206 try:
2201 dfh = self._datafp(b"r+")
2207 dfh = self._datafp(b"r+")
2202 if self._docket is None:
2208 if self._docket is None:
2203 dfh.seek(0, os.SEEK_END)
2209 dfh.seek(0, os.SEEK_END)
2204 else:
2210 else:
2205 dfh.seek(self._docket.data_end, os.SEEK_SET)
2211 dfh.seek(self._docket.data_end, os.SEEK_SET)
2206 except IOError as inst:
2212 except IOError as inst:
2207 if inst.errno != errno.ENOENT:
2213 if inst.errno != errno.ENOENT:
2208 raise
2214 raise
2209 dfh = self._datafp(b"w+")
2215 dfh = self._datafp(b"w+")
2210 transaction.add(self._datafile, dsize)
2216 transaction.add(self._datafile, dsize)
2211 if self._sidedatafile is not None:
2217 if self._sidedatafile is not None:
2212 try:
2218 try:
2213 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2219 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2214 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2220 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2215 except IOError as inst:
2221 except IOError as inst:
2216 if inst.errno != errno.ENOENT:
2222 if inst.errno != errno.ENOENT:
2217 raise
2223 raise
2218 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2224 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2219 transaction.add(
2225 transaction.add(
2220 self._sidedatafile, self._docket.sidedata_end
2226 self._sidedatafile, self._docket.sidedata_end
2221 )
2227 )
2222
2228
2223 # opening the index file.
2229 # opening the index file.
2224 isize = r * self.index.entry_size
2230 isize = r * self.index.entry_size
2225 ifh = self.__index_write_fp()
2231 ifh = self.__index_write_fp()
2226 if self._inline:
2232 if self._inline:
2227 transaction.add(self._indexfile, dsize + isize)
2233 transaction.add(self._indexfile, dsize + isize)
2228 else:
2234 else:
2229 transaction.add(self._indexfile, isize)
2235 transaction.add(self._indexfile, isize)
2230 # exposing all file handle for writing.
2236 # exposing all file handle for writing.
2231 self._writinghandles = (ifh, dfh, sdfh)
2237 self._writinghandles = (ifh, dfh, sdfh)
2232 yield
2238 yield
2233 if self._docket is not None:
2239 if self._docket is not None:
2234 self._write_docket(transaction)
2240 self._write_docket(transaction)
2235 finally:
2241 finally:
2236 self._writinghandles = None
2242 self._writinghandles = None
2237 if dfh is not None:
2243 if dfh is not None:
2238 dfh.close()
2244 dfh.close()
2239 if sdfh is not None:
2245 if sdfh is not None:
2240 dfh.close()
2246 dfh.close()
2241 # closing the index file last to avoid exposing referent to
2247 # closing the index file last to avoid exposing referent to
2242 # potential unflushed data content.
2248 # potential unflushed data content.
2243 if ifh is not None:
2249 if ifh is not None:
2244 ifh.close()
2250 ifh.close()
2245
2251
2246 def _write_docket(self, transaction):
2252 def _write_docket(self, transaction):
2247 """write the current docket on disk
2253 """write the current docket on disk
2248
2254
2249 Exist as a method to help changelog to implement transaction logic
2255 Exist as a method to help changelog to implement transaction logic
2250
2256
2251 We could also imagine using the same transaction logic for all revlog
2257 We could also imagine using the same transaction logic for all revlog
2252 since docket are cheap."""
2258 since docket are cheap."""
2253 self._docket.write(transaction)
2259 self._docket.write(transaction)
2254
2260
2255 def addrevision(
2261 def addrevision(
2256 self,
2262 self,
2257 text,
2263 text,
2258 transaction,
2264 transaction,
2259 link,
2265 link,
2260 p1,
2266 p1,
2261 p2,
2267 p2,
2262 cachedelta=None,
2268 cachedelta=None,
2263 node=None,
2269 node=None,
2264 flags=REVIDX_DEFAULT_FLAGS,
2270 flags=REVIDX_DEFAULT_FLAGS,
2265 deltacomputer=None,
2271 deltacomputer=None,
2266 sidedata=None,
2272 sidedata=None,
2267 ):
2273 ):
2268 """add a revision to the log
2274 """add a revision to the log
2269
2275
2270 text - the revision data to add
2276 text - the revision data to add
2271 transaction - the transaction object used for rollback
2277 transaction - the transaction object used for rollback
2272 link - the linkrev data to add
2278 link - the linkrev data to add
2273 p1, p2 - the parent nodeids of the revision
2279 p1, p2 - the parent nodeids of the revision
2274 cachedelta - an optional precomputed delta
2280 cachedelta - an optional precomputed delta
2275 node - nodeid of revision; typically node is not specified, and it is
2281 node - nodeid of revision; typically node is not specified, and it is
2276 computed by default as hash(text, p1, p2), however subclasses might
2282 computed by default as hash(text, p1, p2), however subclasses might
2277 use different hashing method (and override checkhash() in such case)
2283 use different hashing method (and override checkhash() in such case)
2278 flags - the known flags to set on the revision
2284 flags - the known flags to set on the revision
2279 deltacomputer - an optional deltacomputer instance shared between
2285 deltacomputer - an optional deltacomputer instance shared between
2280 multiple calls
2286 multiple calls
2281 """
2287 """
2282 if link == nullrev:
2288 if link == nullrev:
2283 raise error.RevlogError(
2289 raise error.RevlogError(
2284 _(b"attempted to add linkrev -1 to %s") % self.display_id
2290 _(b"attempted to add linkrev -1 to %s") % self.display_id
2285 )
2291 )
2286
2292
2287 if sidedata is None:
2293 if sidedata is None:
2288 sidedata = {}
2294 sidedata = {}
2289 elif sidedata and not self.hassidedata:
2295 elif sidedata and not self.hassidedata:
2290 raise error.ProgrammingError(
2296 raise error.ProgrammingError(
2291 _(b"trying to add sidedata to a revlog who don't support them")
2297 _(b"trying to add sidedata to a revlog who don't support them")
2292 )
2298 )
2293
2299
2294 if flags:
2300 if flags:
2295 node = node or self.hash(text, p1, p2)
2301 node = node or self.hash(text, p1, p2)
2296
2302
2297 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2303 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2298
2304
2299 # If the flag processor modifies the revision data, ignore any provided
2305 # If the flag processor modifies the revision data, ignore any provided
2300 # cachedelta.
2306 # cachedelta.
2301 if rawtext != text:
2307 if rawtext != text:
2302 cachedelta = None
2308 cachedelta = None
2303
2309
2304 if len(rawtext) > _maxentrysize:
2310 if len(rawtext) > _maxentrysize:
2305 raise error.RevlogError(
2311 raise error.RevlogError(
2306 _(
2312 _(
2307 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2313 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2308 )
2314 )
2309 % (self.display_id, len(rawtext))
2315 % (self.display_id, len(rawtext))
2310 )
2316 )
2311
2317
2312 node = node or self.hash(rawtext, p1, p2)
2318 node = node or self.hash(rawtext, p1, p2)
2313 rev = self.index.get_rev(node)
2319 rev = self.index.get_rev(node)
2314 if rev is not None:
2320 if rev is not None:
2315 return rev
2321 return rev
2316
2322
2317 if validatehash:
2323 if validatehash:
2318 self.checkhash(rawtext, node, p1=p1, p2=p2)
2324 self.checkhash(rawtext, node, p1=p1, p2=p2)
2319
2325
2320 return self.addrawrevision(
2326 return self.addrawrevision(
2321 rawtext,
2327 rawtext,
2322 transaction,
2328 transaction,
2323 link,
2329 link,
2324 p1,
2330 p1,
2325 p2,
2331 p2,
2326 node,
2332 node,
2327 flags,
2333 flags,
2328 cachedelta=cachedelta,
2334 cachedelta=cachedelta,
2329 deltacomputer=deltacomputer,
2335 deltacomputer=deltacomputer,
2330 sidedata=sidedata,
2336 sidedata=sidedata,
2331 )
2337 )
2332
2338
2333 def addrawrevision(
2339 def addrawrevision(
2334 self,
2340 self,
2335 rawtext,
2341 rawtext,
2336 transaction,
2342 transaction,
2337 link,
2343 link,
2338 p1,
2344 p1,
2339 p2,
2345 p2,
2340 node,
2346 node,
2341 flags,
2347 flags,
2342 cachedelta=None,
2348 cachedelta=None,
2343 deltacomputer=None,
2349 deltacomputer=None,
2344 sidedata=None,
2350 sidedata=None,
2345 ):
2351 ):
2346 """add a raw revision with known flags, node and parents
2352 """add a raw revision with known flags, node and parents
2347 useful when reusing a revision not stored in this revlog (ex: received
2353 useful when reusing a revision not stored in this revlog (ex: received
2348 over wire, or read from an external bundle).
2354 over wire, or read from an external bundle).
2349 """
2355 """
2350 with self._writing(transaction):
2356 with self._writing(transaction):
2351 return self._addrevision(
2357 return self._addrevision(
2352 node,
2358 node,
2353 rawtext,
2359 rawtext,
2354 transaction,
2360 transaction,
2355 link,
2361 link,
2356 p1,
2362 p1,
2357 p2,
2363 p2,
2358 flags,
2364 flags,
2359 cachedelta,
2365 cachedelta,
2360 deltacomputer=deltacomputer,
2366 deltacomputer=deltacomputer,
2361 sidedata=sidedata,
2367 sidedata=sidedata,
2362 )
2368 )
2363
2369
2364 def compress(self, data):
2370 def compress(self, data):
2365 """Generate a possibly-compressed representation of data."""
2371 """Generate a possibly-compressed representation of data."""
2366 if not data:
2372 if not data:
2367 return b'', data
2373 return b'', data
2368
2374
2369 compressed = self._compressor.compress(data)
2375 compressed = self._compressor.compress(data)
2370
2376
2371 if compressed:
2377 if compressed:
2372 # The revlog compressor added the header in the returned data.
2378 # The revlog compressor added the header in the returned data.
2373 return b'', compressed
2379 return b'', compressed
2374
2380
2375 if data[0:1] == b'\0':
2381 if data[0:1] == b'\0':
2376 return b'', data
2382 return b'', data
2377 return b'u', data
2383 return b'u', data
2378
2384
2379 def decompress(self, data):
2385 def decompress(self, data):
2380 """Decompress a revlog chunk.
2386 """Decompress a revlog chunk.
2381
2387
2382 The chunk is expected to begin with a header identifying the
2388 The chunk is expected to begin with a header identifying the
2383 format type so it can be routed to an appropriate decompressor.
2389 format type so it can be routed to an appropriate decompressor.
2384 """
2390 """
2385 if not data:
2391 if not data:
2386 return data
2392 return data
2387
2393
2388 # Revlogs are read much more frequently than they are written and many
2394 # Revlogs are read much more frequently than they are written and many
2389 # chunks only take microseconds to decompress, so performance is
2395 # chunks only take microseconds to decompress, so performance is
2390 # important here.
2396 # important here.
2391 #
2397 #
2392 # We can make a few assumptions about revlogs:
2398 # We can make a few assumptions about revlogs:
2393 #
2399 #
2394 # 1) the majority of chunks will be compressed (as opposed to inline
2400 # 1) the majority of chunks will be compressed (as opposed to inline
2395 # raw data).
2401 # raw data).
2396 # 2) decompressing *any* data will likely by at least 10x slower than
2402 # 2) decompressing *any* data will likely by at least 10x slower than
2397 # returning raw inline data.
2403 # returning raw inline data.
2398 # 3) we want to prioritize common and officially supported compression
2404 # 3) we want to prioritize common and officially supported compression
2399 # engines
2405 # engines
2400 #
2406 #
2401 # It follows that we want to optimize for "decompress compressed data
2407 # It follows that we want to optimize for "decompress compressed data
2402 # when encoded with common and officially supported compression engines"
2408 # when encoded with common and officially supported compression engines"
2403 # case over "raw data" and "data encoded by less common or non-official
2409 # case over "raw data" and "data encoded by less common or non-official
2404 # compression engines." That is why we have the inline lookup first
2410 # compression engines." That is why we have the inline lookup first
2405 # followed by the compengines lookup.
2411 # followed by the compengines lookup.
2406 #
2412 #
2407 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2413 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2408 # compressed chunks. And this matters for changelog and manifest reads.
2414 # compressed chunks. And this matters for changelog and manifest reads.
2409 t = data[0:1]
2415 t = data[0:1]
2410
2416
2411 if t == b'x':
2417 if t == b'x':
2412 try:
2418 try:
2413 return _zlibdecompress(data)
2419 return _zlibdecompress(data)
2414 except zlib.error as e:
2420 except zlib.error as e:
2415 raise error.RevlogError(
2421 raise error.RevlogError(
2416 _(b'revlog decompress error: %s')
2422 _(b'revlog decompress error: %s')
2417 % stringutil.forcebytestr(e)
2423 % stringutil.forcebytestr(e)
2418 )
2424 )
2419 # '\0' is more common than 'u' so it goes first.
2425 # '\0' is more common than 'u' so it goes first.
2420 elif t == b'\0':
2426 elif t == b'\0':
2421 return data
2427 return data
2422 elif t == b'u':
2428 elif t == b'u':
2423 return util.buffer(data, 1)
2429 return util.buffer(data, 1)
2424
2430
2425 compressor = self._get_decompressor(t)
2431 compressor = self._get_decompressor(t)
2426
2432
2427 return compressor.decompress(data)
2433 return compressor.decompress(data)
2428
2434
2429 def _addrevision(
2435 def _addrevision(
2430 self,
2436 self,
2431 node,
2437 node,
2432 rawtext,
2438 rawtext,
2433 transaction,
2439 transaction,
2434 link,
2440 link,
2435 p1,
2441 p1,
2436 p2,
2442 p2,
2437 flags,
2443 flags,
2438 cachedelta,
2444 cachedelta,
2439 alwayscache=False,
2445 alwayscache=False,
2440 deltacomputer=None,
2446 deltacomputer=None,
2441 sidedata=None,
2447 sidedata=None,
2442 ):
2448 ):
2443 """internal function to add revisions to the log
2449 """internal function to add revisions to the log
2444
2450
2445 see addrevision for argument descriptions.
2451 see addrevision for argument descriptions.
2446
2452
2447 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2453 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2448
2454
2449 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2455 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2450 be used.
2456 be used.
2451
2457
2452 invariants:
2458 invariants:
2453 - rawtext is optional (can be None); if not set, cachedelta must be set.
2459 - rawtext is optional (can be None); if not set, cachedelta must be set.
2454 if both are set, they must correspond to each other.
2460 if both are set, they must correspond to each other.
2455 """
2461 """
2456 if node == self.nullid:
2462 if node == self.nullid:
2457 raise error.RevlogError(
2463 raise error.RevlogError(
2458 _(b"%s: attempt to add null revision") % self.display_id
2464 _(b"%s: attempt to add null revision") % self.display_id
2459 )
2465 )
2460 if (
2466 if (
2461 node == self.nodeconstants.wdirid
2467 node == self.nodeconstants.wdirid
2462 or node in self.nodeconstants.wdirfilenodeids
2468 or node in self.nodeconstants.wdirfilenodeids
2463 ):
2469 ):
2464 raise error.RevlogError(
2470 raise error.RevlogError(
2465 _(b"%s: attempt to add wdir revision") % self.display_id
2471 _(b"%s: attempt to add wdir revision") % self.display_id
2466 )
2472 )
2467 if self._writinghandles is None:
2473 if self._writinghandles is None:
2468 msg = b'adding revision outside `revlog._writing` context'
2474 msg = b'adding revision outside `revlog._writing` context'
2469 raise error.ProgrammingError(msg)
2475 raise error.ProgrammingError(msg)
2470
2476
2471 if self._inline:
2477 if self._inline:
2472 fh = self._writinghandles[0]
2478 fh = self._writinghandles[0]
2473 else:
2479 else:
2474 fh = self._writinghandles[1]
2480 fh = self._writinghandles[1]
2475
2481
2476 btext = [rawtext]
2482 btext = [rawtext]
2477
2483
2478 curr = len(self)
2484 curr = len(self)
2479 prev = curr - 1
2485 prev = curr - 1
2480
2486
2481 offset = self._get_data_offset(prev)
2487 offset = self._get_data_offset(prev)
2482
2488
2483 if self._concurrencychecker:
2489 if self._concurrencychecker:
2484 ifh, dfh, sdfh = self._writinghandles
2490 ifh, dfh, sdfh = self._writinghandles
2485 # XXX no checking for the sidedata file
2491 # XXX no checking for the sidedata file
2486 if self._inline:
2492 if self._inline:
2487 # offset is "as if" it were in the .d file, so we need to add on
2493 # offset is "as if" it were in the .d file, so we need to add on
2488 # the size of the entry metadata.
2494 # the size of the entry metadata.
2489 self._concurrencychecker(
2495 self._concurrencychecker(
2490 ifh, self._indexfile, offset + curr * self.index.entry_size
2496 ifh, self._indexfile, offset + curr * self.index.entry_size
2491 )
2497 )
2492 else:
2498 else:
2493 # Entries in the .i are a consistent size.
2499 # Entries in the .i are a consistent size.
2494 self._concurrencychecker(
2500 self._concurrencychecker(
2495 ifh, self._indexfile, curr * self.index.entry_size
2501 ifh, self._indexfile, curr * self.index.entry_size
2496 )
2502 )
2497 self._concurrencychecker(dfh, self._datafile, offset)
2503 self._concurrencychecker(dfh, self._datafile, offset)
2498
2504
2499 p1r, p2r = self.rev(p1), self.rev(p2)
2505 p1r, p2r = self.rev(p1), self.rev(p2)
2500
2506
2501 # full versions are inserted when the needed deltas
2507 # full versions are inserted when the needed deltas
2502 # become comparable to the uncompressed text
2508 # become comparable to the uncompressed text
2503 if rawtext is None:
2509 if rawtext is None:
2504 # need rawtext size, before changed by flag processors, which is
2510 # need rawtext size, before changed by flag processors, which is
2505 # the non-raw size. use revlog explicitly to avoid filelog's extra
2511 # the non-raw size. use revlog explicitly to avoid filelog's extra
2506 # logic that might remove metadata size.
2512 # logic that might remove metadata size.
2507 textlen = mdiff.patchedsize(
2513 textlen = mdiff.patchedsize(
2508 revlog.size(self, cachedelta[0]), cachedelta[1]
2514 revlog.size(self, cachedelta[0]), cachedelta[1]
2509 )
2515 )
2510 else:
2516 else:
2511 textlen = len(rawtext)
2517 textlen = len(rawtext)
2512
2518
2513 if deltacomputer is None:
2519 if deltacomputer is None:
2514 deltacomputer = deltautil.deltacomputer(self)
2520 deltacomputer = deltautil.deltacomputer(self)
2515
2521
2516 revinfo = revlogutils.revisioninfo(
2522 revinfo = revlogutils.revisioninfo(
2517 node,
2523 node,
2518 p1,
2524 p1,
2519 p2,
2525 p2,
2520 btext,
2526 btext,
2521 textlen,
2527 textlen,
2522 cachedelta,
2528 cachedelta,
2523 flags,
2529 flags,
2524 )
2530 )
2525
2531
2526 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2532 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2527
2533
2528 compression_mode = COMP_MODE_INLINE
2534 compression_mode = COMP_MODE_INLINE
2529 if self._docket is not None:
2535 if self._docket is not None:
2530 h, d = deltainfo.data
2536 h, d = deltainfo.data
2531 if not h and not d:
2537 if not h and not d:
2532 # not data to store at all... declare them uncompressed
2538 # not data to store at all... declare them uncompressed
2533 compression_mode = COMP_MODE_PLAIN
2539 compression_mode = COMP_MODE_PLAIN
2534 elif not h:
2540 elif not h:
2535 t = d[0:1]
2541 t = d[0:1]
2536 if t == b'\0':
2542 if t == b'\0':
2537 compression_mode = COMP_MODE_PLAIN
2543 compression_mode = COMP_MODE_PLAIN
2538 elif t == self._docket.default_compression_header:
2544 elif t == self._docket.default_compression_header:
2539 compression_mode = COMP_MODE_DEFAULT
2545 compression_mode = COMP_MODE_DEFAULT
2540 elif h == b'u':
2546 elif h == b'u':
2541 # we have a more efficient way to declare uncompressed
2547 # we have a more efficient way to declare uncompressed
2542 h = b''
2548 h = b''
2543 compression_mode = COMP_MODE_PLAIN
2549 compression_mode = COMP_MODE_PLAIN
2544 deltainfo = deltautil.drop_u_compression(deltainfo)
2550 deltainfo = deltautil.drop_u_compression(deltainfo)
2545
2551
2546 sidedata_compression_mode = COMP_MODE_INLINE
2552 sidedata_compression_mode = COMP_MODE_INLINE
2547 if sidedata and self.hassidedata:
2553 if sidedata and self.hassidedata:
2548 sidedata_compression_mode = COMP_MODE_PLAIN
2554 sidedata_compression_mode = COMP_MODE_PLAIN
2549 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2555 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2550 sidedata_offset = self._docket.sidedata_end
2556 sidedata_offset = self._docket.sidedata_end
2551 h, comp_sidedata = self.compress(serialized_sidedata)
2557 h, comp_sidedata = self.compress(serialized_sidedata)
2552 if (
2558 if (
2553 h != b'u'
2559 h != b'u'
2554 and comp_sidedata[0:1] != b'\0'
2560 and comp_sidedata[0:1] != b'\0'
2555 and len(comp_sidedata) < len(serialized_sidedata)
2561 and len(comp_sidedata) < len(serialized_sidedata)
2556 ):
2562 ):
2557 assert not h
2563 assert not h
2558 if (
2564 if (
2559 comp_sidedata[0:1]
2565 comp_sidedata[0:1]
2560 == self._docket.default_compression_header
2566 == self._docket.default_compression_header
2561 ):
2567 ):
2562 sidedata_compression_mode = COMP_MODE_DEFAULT
2568 sidedata_compression_mode = COMP_MODE_DEFAULT
2563 serialized_sidedata = comp_sidedata
2569 serialized_sidedata = comp_sidedata
2564 else:
2570 else:
2565 sidedata_compression_mode = COMP_MODE_INLINE
2571 sidedata_compression_mode = COMP_MODE_INLINE
2566 serialized_sidedata = comp_sidedata
2572 serialized_sidedata = comp_sidedata
2567 else:
2573 else:
2568 serialized_sidedata = b""
2574 serialized_sidedata = b""
2569 # Don't store the offset if the sidedata is empty, that way
2575 # Don't store the offset if the sidedata is empty, that way
2570 # we can easily detect empty sidedata and they will be no different
2576 # we can easily detect empty sidedata and they will be no different
2571 # than ones we manually add.
2577 # than ones we manually add.
2572 sidedata_offset = 0
2578 sidedata_offset = 0
2573
2579
2574 e = revlogutils.entry(
2580 e = revlogutils.entry(
2575 flags=flags,
2581 flags=flags,
2576 data_offset=offset,
2582 data_offset=offset,
2577 data_compressed_length=deltainfo.deltalen,
2583 data_compressed_length=deltainfo.deltalen,
2578 data_uncompressed_length=textlen,
2584 data_uncompressed_length=textlen,
2579 data_compression_mode=compression_mode,
2585 data_compression_mode=compression_mode,
2580 data_delta_base=deltainfo.base,
2586 data_delta_base=deltainfo.base,
2581 link_rev=link,
2587 link_rev=link,
2582 parent_rev_1=p1r,
2588 parent_rev_1=p1r,
2583 parent_rev_2=p2r,
2589 parent_rev_2=p2r,
2584 node_id=node,
2590 node_id=node,
2585 sidedata_offset=sidedata_offset,
2591 sidedata_offset=sidedata_offset,
2586 sidedata_compressed_length=len(serialized_sidedata),
2592 sidedata_compressed_length=len(serialized_sidedata),
2587 sidedata_compression_mode=sidedata_compression_mode,
2593 sidedata_compression_mode=sidedata_compression_mode,
2588 )
2594 )
2589
2595
2590 self.index.append(e)
2596 self.index.append(e)
2591 entry = self.index.entry_binary(curr)
2597 entry = self.index.entry_binary(curr)
2592 if curr == 0 and self._docket is None:
2598 if curr == 0 and self._docket is None:
2593 header = self._format_flags | self._format_version
2599 header = self._format_flags | self._format_version
2594 header = self.index.pack_header(header)
2600 header = self.index.pack_header(header)
2595 entry = header + entry
2601 entry = header + entry
2596 self._writeentry(
2602 self._writeentry(
2597 transaction,
2603 transaction,
2598 entry,
2604 entry,
2599 deltainfo.data,
2605 deltainfo.data,
2600 link,
2606 link,
2601 offset,
2607 offset,
2602 serialized_sidedata,
2608 serialized_sidedata,
2603 sidedata_offset,
2609 sidedata_offset,
2604 )
2610 )
2605
2611
2606 rawtext = btext[0]
2612 rawtext = btext[0]
2607
2613
2608 if alwayscache and rawtext is None:
2614 if alwayscache and rawtext is None:
2609 rawtext = deltacomputer.buildtext(revinfo, fh)
2615 rawtext = deltacomputer.buildtext(revinfo, fh)
2610
2616
2611 if type(rawtext) == bytes: # only accept immutable objects
2617 if type(rawtext) == bytes: # only accept immutable objects
2612 self._revisioncache = (node, curr, rawtext)
2618 self._revisioncache = (node, curr, rawtext)
2613 self._chainbasecache[curr] = deltainfo.chainbase
2619 self._chainbasecache[curr] = deltainfo.chainbase
2614 return curr
2620 return curr
2615
2621
2616 def _get_data_offset(self, prev):
2622 def _get_data_offset(self, prev):
2617 """Returns the current offset in the (in-transaction) data file.
2623 """Returns the current offset in the (in-transaction) data file.
2618 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2624 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2619 file to store that information: since sidedata can be rewritten to the
2625 file to store that information: since sidedata can be rewritten to the
2620 end of the data file within a transaction, you can have cases where, for
2626 end of the data file within a transaction, you can have cases where, for
2621 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2627 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2622 to `n - 1`'s sidedata being written after `n`'s data.
2628 to `n - 1`'s sidedata being written after `n`'s data.
2623
2629
2624 TODO cache this in a docket file before getting out of experimental."""
2630 TODO cache this in a docket file before getting out of experimental."""
2625 if self._docket is None:
2631 if self._docket is None:
2626 return self.end(prev)
2632 return self.end(prev)
2627 else:
2633 else:
2628 return self._docket.data_end
2634 return self._docket.data_end
2629
2635
2630 def _writeentry(
2636 def _writeentry(
2631 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2637 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2632 ):
2638 ):
2633 # Files opened in a+ mode have inconsistent behavior on various
2639 # Files opened in a+ mode have inconsistent behavior on various
2634 # platforms. Windows requires that a file positioning call be made
2640 # platforms. Windows requires that a file positioning call be made
2635 # when the file handle transitions between reads and writes. See
2641 # when the file handle transitions between reads and writes. See
2636 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2642 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2637 # platforms, Python or the platform itself can be buggy. Some versions
2643 # platforms, Python or the platform itself can be buggy. Some versions
2638 # of Solaris have been observed to not append at the end of the file
2644 # of Solaris have been observed to not append at the end of the file
2639 # if the file was seeked to before the end. See issue4943 for more.
2645 # if the file was seeked to before the end. See issue4943 for more.
2640 #
2646 #
2641 # We work around this issue by inserting a seek() before writing.
2647 # We work around this issue by inserting a seek() before writing.
2642 # Note: This is likely not necessary on Python 3. However, because
2648 # Note: This is likely not necessary on Python 3. However, because
2643 # the file handle is reused for reads and may be seeked there, we need
2649 # the file handle is reused for reads and may be seeked there, we need
2644 # to be careful before changing this.
2650 # to be careful before changing this.
2645 if self._writinghandles is None:
2651 if self._writinghandles is None:
2646 msg = b'adding revision outside `revlog._writing` context'
2652 msg = b'adding revision outside `revlog._writing` context'
2647 raise error.ProgrammingError(msg)
2653 raise error.ProgrammingError(msg)
2648 ifh, dfh, sdfh = self._writinghandles
2654 ifh, dfh, sdfh = self._writinghandles
2649 if self._docket is None:
2655 if self._docket is None:
2650 ifh.seek(0, os.SEEK_END)
2656 ifh.seek(0, os.SEEK_END)
2651 else:
2657 else:
2652 ifh.seek(self._docket.index_end, os.SEEK_SET)
2658 ifh.seek(self._docket.index_end, os.SEEK_SET)
2653 if dfh:
2659 if dfh:
2654 if self._docket is None:
2660 if self._docket is None:
2655 dfh.seek(0, os.SEEK_END)
2661 dfh.seek(0, os.SEEK_END)
2656 else:
2662 else:
2657 dfh.seek(self._docket.data_end, os.SEEK_SET)
2663 dfh.seek(self._docket.data_end, os.SEEK_SET)
2658 if sdfh:
2664 if sdfh:
2659 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2665 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2660
2666
2661 curr = len(self) - 1
2667 curr = len(self) - 1
2662 if not self._inline:
2668 if not self._inline:
2663 transaction.add(self._datafile, offset)
2669 transaction.add(self._datafile, offset)
2664 if self._sidedatafile:
2670 if self._sidedatafile:
2665 transaction.add(self._sidedatafile, sidedata_offset)
2671 transaction.add(self._sidedatafile, sidedata_offset)
2666 transaction.add(self._indexfile, curr * len(entry))
2672 transaction.add(self._indexfile, curr * len(entry))
2667 if data[0]:
2673 if data[0]:
2668 dfh.write(data[0])
2674 dfh.write(data[0])
2669 dfh.write(data[1])
2675 dfh.write(data[1])
2670 if sidedata:
2676 if sidedata:
2671 sdfh.write(sidedata)
2677 sdfh.write(sidedata)
2672 ifh.write(entry)
2678 ifh.write(entry)
2673 else:
2679 else:
2674 offset += curr * self.index.entry_size
2680 offset += curr * self.index.entry_size
2675 transaction.add(self._indexfile, offset)
2681 transaction.add(self._indexfile, offset)
2676 ifh.write(entry)
2682 ifh.write(entry)
2677 ifh.write(data[0])
2683 ifh.write(data[0])
2678 ifh.write(data[1])
2684 ifh.write(data[1])
2679 assert not sidedata
2685 assert not sidedata
2680 self._enforceinlinesize(transaction)
2686 self._enforceinlinesize(transaction)
2681 if self._docket is not None:
2687 if self._docket is not None:
2682 self._docket.index_end = self._writinghandles[0].tell()
2688 self._docket.index_end = self._writinghandles[0].tell()
2683 self._docket.data_end = self._writinghandles[1].tell()
2689 self._docket.data_end = self._writinghandles[1].tell()
2684 self._docket.sidedata_end = self._writinghandles[2].tell()
2690 self._docket.sidedata_end = self._writinghandles[2].tell()
2685
2691
2686 nodemaputil.setup_persistent_nodemap(transaction, self)
2692 nodemaputil.setup_persistent_nodemap(transaction, self)
2687
2693
2688 def addgroup(
2694 def addgroup(
2689 self,
2695 self,
2690 deltas,
2696 deltas,
2691 linkmapper,
2697 linkmapper,
2692 transaction,
2698 transaction,
2693 alwayscache=False,
2699 alwayscache=False,
2694 addrevisioncb=None,
2700 addrevisioncb=None,
2695 duplicaterevisioncb=None,
2701 duplicaterevisioncb=None,
2696 ):
2702 ):
2697 """
2703 """
2698 add a delta group
2704 add a delta group
2699
2705
2700 given a set of deltas, add them to the revision log. the
2706 given a set of deltas, add them to the revision log. the
2701 first delta is against its parent, which should be in our
2707 first delta is against its parent, which should be in our
2702 log, the rest are against the previous delta.
2708 log, the rest are against the previous delta.
2703
2709
2704 If ``addrevisioncb`` is defined, it will be called with arguments of
2710 If ``addrevisioncb`` is defined, it will be called with arguments of
2705 this revlog and the node that was added.
2711 this revlog and the node that was added.
2706 """
2712 """
2707
2713
2708 if self._adding_group:
2714 if self._adding_group:
2709 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2715 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2710
2716
2711 self._adding_group = True
2717 self._adding_group = True
2712 empty = True
2718 empty = True
2713 try:
2719 try:
2714 with self._writing(transaction):
2720 with self._writing(transaction):
2715 deltacomputer = deltautil.deltacomputer(self)
2721 deltacomputer = deltautil.deltacomputer(self)
2716 # loop through our set of deltas
2722 # loop through our set of deltas
2717 for data in deltas:
2723 for data in deltas:
2718 (
2724 (
2719 node,
2725 node,
2720 p1,
2726 p1,
2721 p2,
2727 p2,
2722 linknode,
2728 linknode,
2723 deltabase,
2729 deltabase,
2724 delta,
2730 delta,
2725 flags,
2731 flags,
2726 sidedata,
2732 sidedata,
2727 ) = data
2733 ) = data
2728 link = linkmapper(linknode)
2734 link = linkmapper(linknode)
2729 flags = flags or REVIDX_DEFAULT_FLAGS
2735 flags = flags or REVIDX_DEFAULT_FLAGS
2730
2736
2731 rev = self.index.get_rev(node)
2737 rev = self.index.get_rev(node)
2732 if rev is not None:
2738 if rev is not None:
2733 # this can happen if two branches make the same change
2739 # this can happen if two branches make the same change
2734 self._nodeduplicatecallback(transaction, rev)
2740 self._nodeduplicatecallback(transaction, rev)
2735 if duplicaterevisioncb:
2741 if duplicaterevisioncb:
2736 duplicaterevisioncb(self, rev)
2742 duplicaterevisioncb(self, rev)
2737 empty = False
2743 empty = False
2738 continue
2744 continue
2739
2745
2740 for p in (p1, p2):
2746 for p in (p1, p2):
2741 if not self.index.has_node(p):
2747 if not self.index.has_node(p):
2742 raise error.LookupError(
2748 raise error.LookupError(
2743 p, self.radix, _(b'unknown parent')
2749 p, self.radix, _(b'unknown parent')
2744 )
2750 )
2745
2751
2746 if not self.index.has_node(deltabase):
2752 if not self.index.has_node(deltabase):
2747 raise error.LookupError(
2753 raise error.LookupError(
2748 deltabase, self.display_id, _(b'unknown delta base')
2754 deltabase, self.display_id, _(b'unknown delta base')
2749 )
2755 )
2750
2756
2751 baserev = self.rev(deltabase)
2757 baserev = self.rev(deltabase)
2752
2758
2753 if baserev != nullrev and self.iscensored(baserev):
2759 if baserev != nullrev and self.iscensored(baserev):
2754 # if base is censored, delta must be full replacement in a
2760 # if base is censored, delta must be full replacement in a
2755 # single patch operation
2761 # single patch operation
2756 hlen = struct.calcsize(b">lll")
2762 hlen = struct.calcsize(b">lll")
2757 oldlen = self.rawsize(baserev)
2763 oldlen = self.rawsize(baserev)
2758 newlen = len(delta) - hlen
2764 newlen = len(delta) - hlen
2759 if delta[:hlen] != mdiff.replacediffheader(
2765 if delta[:hlen] != mdiff.replacediffheader(
2760 oldlen, newlen
2766 oldlen, newlen
2761 ):
2767 ):
2762 raise error.CensoredBaseError(
2768 raise error.CensoredBaseError(
2763 self.display_id, self.node(baserev)
2769 self.display_id, self.node(baserev)
2764 )
2770 )
2765
2771
2766 if not flags and self._peek_iscensored(baserev, delta):
2772 if not flags and self._peek_iscensored(baserev, delta):
2767 flags |= REVIDX_ISCENSORED
2773 flags |= REVIDX_ISCENSORED
2768
2774
2769 # We assume consumers of addrevisioncb will want to retrieve
2775 # We assume consumers of addrevisioncb will want to retrieve
2770 # the added revision, which will require a call to
2776 # the added revision, which will require a call to
2771 # revision(). revision() will fast path if there is a cache
2777 # revision(). revision() will fast path if there is a cache
2772 # hit. So, we tell _addrevision() to always cache in this case.
2778 # hit. So, we tell _addrevision() to always cache in this case.
2773 # We're only using addgroup() in the context of changegroup
2779 # We're only using addgroup() in the context of changegroup
2774 # generation so the revision data can always be handled as raw
2780 # generation so the revision data can always be handled as raw
2775 # by the flagprocessor.
2781 # by the flagprocessor.
2776 rev = self._addrevision(
2782 rev = self._addrevision(
2777 node,
2783 node,
2778 None,
2784 None,
2779 transaction,
2785 transaction,
2780 link,
2786 link,
2781 p1,
2787 p1,
2782 p2,
2788 p2,
2783 flags,
2789 flags,
2784 (baserev, delta),
2790 (baserev, delta),
2785 alwayscache=alwayscache,
2791 alwayscache=alwayscache,
2786 deltacomputer=deltacomputer,
2792 deltacomputer=deltacomputer,
2787 sidedata=sidedata,
2793 sidedata=sidedata,
2788 )
2794 )
2789
2795
2790 if addrevisioncb:
2796 if addrevisioncb:
2791 addrevisioncb(self, rev)
2797 addrevisioncb(self, rev)
2792 empty = False
2798 empty = False
2793 finally:
2799 finally:
2794 self._adding_group = False
2800 self._adding_group = False
2795 return not empty
2801 return not empty
2796
2802
2797 def iscensored(self, rev):
2803 def iscensored(self, rev):
2798 """Check if a file revision is censored."""
2804 """Check if a file revision is censored."""
2799 if not self._censorable:
2805 if not self._censorable:
2800 return False
2806 return False
2801
2807
2802 return self.flags(rev) & REVIDX_ISCENSORED
2808 return self.flags(rev) & REVIDX_ISCENSORED
2803
2809
2804 def _peek_iscensored(self, baserev, delta):
2810 def _peek_iscensored(self, baserev, delta):
2805 """Quickly check if a delta produces a censored revision."""
2811 """Quickly check if a delta produces a censored revision."""
2806 if not self._censorable:
2812 if not self._censorable:
2807 return False
2813 return False
2808
2814
2809 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2815 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2810
2816
2811 def getstrippoint(self, minlink):
2817 def getstrippoint(self, minlink):
2812 """find the minimum rev that must be stripped to strip the linkrev
2818 """find the minimum rev that must be stripped to strip the linkrev
2813
2819
2814 Returns a tuple containing the minimum rev and a set of all revs that
2820 Returns a tuple containing the minimum rev and a set of all revs that
2815 have linkrevs that will be broken by this strip.
2821 have linkrevs that will be broken by this strip.
2816 """
2822 """
2817 return storageutil.resolvestripinfo(
2823 return storageutil.resolvestripinfo(
2818 minlink,
2824 minlink,
2819 len(self) - 1,
2825 len(self) - 1,
2820 self.headrevs(),
2826 self.headrevs(),
2821 self.linkrev,
2827 self.linkrev,
2822 self.parentrevs,
2828 self.parentrevs,
2823 )
2829 )
2824
2830
2825 def strip(self, minlink, transaction):
2831 def strip(self, minlink, transaction):
2826 """truncate the revlog on the first revision with a linkrev >= minlink
2832 """truncate the revlog on the first revision with a linkrev >= minlink
2827
2833
2828 This function is called when we're stripping revision minlink and
2834 This function is called when we're stripping revision minlink and
2829 its descendants from the repository.
2835 its descendants from the repository.
2830
2836
2831 We have to remove all revisions with linkrev >= minlink, because
2837 We have to remove all revisions with linkrev >= minlink, because
2832 the equivalent changelog revisions will be renumbered after the
2838 the equivalent changelog revisions will be renumbered after the
2833 strip.
2839 strip.
2834
2840
2835 So we truncate the revlog on the first of these revisions, and
2841 So we truncate the revlog on the first of these revisions, and
2836 trust that the caller has saved the revisions that shouldn't be
2842 trust that the caller has saved the revisions that shouldn't be
2837 removed and that it'll re-add them after this truncation.
2843 removed and that it'll re-add them after this truncation.
2838 """
2844 """
2839 if len(self) == 0:
2845 if len(self) == 0:
2840 return
2846 return
2841
2847
2842 rev, _ = self.getstrippoint(minlink)
2848 rev, _ = self.getstrippoint(minlink)
2843 if rev == len(self):
2849 if rev == len(self):
2844 return
2850 return
2845
2851
2846 # first truncate the files on disk
2852 # first truncate the files on disk
2847 data_end = self.start(rev)
2853 data_end = self.start(rev)
2848 if not self._inline:
2854 if not self._inline:
2849 transaction.add(self._datafile, data_end)
2855 transaction.add(self._datafile, data_end)
2850 end = rev * self.index.entry_size
2856 end = rev * self.index.entry_size
2851 else:
2857 else:
2852 end = data_end + (rev * self.index.entry_size)
2858 end = data_end + (rev * self.index.entry_size)
2853
2859
2854 if self._sidedatafile:
2860 if self._sidedatafile:
2855 sidedata_end = self.sidedata_cut_off(rev)
2861 sidedata_end = self.sidedata_cut_off(rev)
2856 transaction.add(self._sidedatafile, sidedata_end)
2862 transaction.add(self._sidedatafile, sidedata_end)
2857
2863
2858 transaction.add(self._indexfile, end)
2864 transaction.add(self._indexfile, end)
2859 if self._docket is not None:
2865 if self._docket is not None:
2860 # XXX we could, leverage the docket while stripping. However it is
2866 # XXX we could, leverage the docket while stripping. However it is
2861 # not powerfull enough at the time of this comment
2867 # not powerfull enough at the time of this comment
2862 self._docket.index_end = end
2868 self._docket.index_end = end
2863 self._docket.data_end = data_end
2869 self._docket.data_end = data_end
2864 self._docket.sidedata_end = sidedata_end
2870 self._docket.sidedata_end = sidedata_end
2865 self._docket.write(transaction, stripping=True)
2871 self._docket.write(transaction, stripping=True)
2866
2872
2867 # then reset internal state in memory to forget those revisions
2873 # then reset internal state in memory to forget those revisions
2868 self._revisioncache = None
2874 self._revisioncache = None
2869 self._chaininfocache = util.lrucachedict(500)
2875 self._chaininfocache = util.lrucachedict(500)
2870 self._chunkclear()
2876 self._chunkclear()
2871
2877
2872 del self.index[rev:-1]
2878 del self.index[rev:-1]
2873
2879
2874 def checksize(self):
2880 def checksize(self):
2875 """Check size of index and data files
2881 """Check size of index and data files
2876
2882
2877 return a (dd, di) tuple.
2883 return a (dd, di) tuple.
2878 - dd: extra bytes for the "data" file
2884 - dd: extra bytes for the "data" file
2879 - di: extra bytes for the "index" file
2885 - di: extra bytes for the "index" file
2880
2886
2881 A healthy revlog will return (0, 0).
2887 A healthy revlog will return (0, 0).
2882 """
2888 """
2883 expected = 0
2889 expected = 0
2884 if len(self):
2890 if len(self):
2885 expected = max(0, self.end(len(self) - 1))
2891 expected = max(0, self.end(len(self) - 1))
2886
2892
2887 try:
2893 try:
2888 with self._datafp() as f:
2894 with self._datafp() as f:
2889 f.seek(0, io.SEEK_END)
2895 f.seek(0, io.SEEK_END)
2890 actual = f.tell()
2896 actual = f.tell()
2891 dd = actual - expected
2897 dd = actual - expected
2892 except IOError as inst:
2898 except IOError as inst:
2893 if inst.errno != errno.ENOENT:
2899 if inst.errno != errno.ENOENT:
2894 raise
2900 raise
2895 dd = 0
2901 dd = 0
2896
2902
2897 try:
2903 try:
2898 f = self.opener(self._indexfile)
2904 f = self.opener(self._indexfile)
2899 f.seek(0, io.SEEK_END)
2905 f.seek(0, io.SEEK_END)
2900 actual = f.tell()
2906 actual = f.tell()
2901 f.close()
2907 f.close()
2902 s = self.index.entry_size
2908 s = self.index.entry_size
2903 i = max(0, actual // s)
2909 i = max(0, actual // s)
2904 di = actual - (i * s)
2910 di = actual - (i * s)
2905 if self._inline:
2911 if self._inline:
2906 databytes = 0
2912 databytes = 0
2907 for r in self:
2913 for r in self:
2908 databytes += max(0, self.length(r))
2914 databytes += max(0, self.length(r))
2909 dd = 0
2915 dd = 0
2910 di = actual - len(self) * s - databytes
2916 di = actual - len(self) * s - databytes
2911 except IOError as inst:
2917 except IOError as inst:
2912 if inst.errno != errno.ENOENT:
2918 if inst.errno != errno.ENOENT:
2913 raise
2919 raise
2914 di = 0
2920 di = 0
2915
2921
2916 return (dd, di)
2922 return (dd, di)
2917
2923
2918 def files(self):
2924 def files(self):
2919 res = [self._indexfile]
2925 res = [self._indexfile]
2920 if not self._inline:
2926 if not self._inline:
2921 res.append(self._datafile)
2927 res.append(self._datafile)
2922 return res
2928 return res
2923
2929
2924 def emitrevisions(
2930 def emitrevisions(
2925 self,
2931 self,
2926 nodes,
2932 nodes,
2927 nodesorder=None,
2933 nodesorder=None,
2928 revisiondata=False,
2934 revisiondata=False,
2929 assumehaveparentrevisions=False,
2935 assumehaveparentrevisions=False,
2930 deltamode=repository.CG_DELTAMODE_STD,
2936 deltamode=repository.CG_DELTAMODE_STD,
2931 sidedata_helpers=None,
2937 sidedata_helpers=None,
2932 ):
2938 ):
2933 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2939 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2934 raise error.ProgrammingError(
2940 raise error.ProgrammingError(
2935 b'unhandled value for nodesorder: %s' % nodesorder
2941 b'unhandled value for nodesorder: %s' % nodesorder
2936 )
2942 )
2937
2943
2938 if nodesorder is None and not self._generaldelta:
2944 if nodesorder is None and not self._generaldelta:
2939 nodesorder = b'storage'
2945 nodesorder = b'storage'
2940
2946
2941 if (
2947 if (
2942 not self._storedeltachains
2948 not self._storedeltachains
2943 and deltamode != repository.CG_DELTAMODE_PREV
2949 and deltamode != repository.CG_DELTAMODE_PREV
2944 ):
2950 ):
2945 deltamode = repository.CG_DELTAMODE_FULL
2951 deltamode = repository.CG_DELTAMODE_FULL
2946
2952
2947 return storageutil.emitrevisions(
2953 return storageutil.emitrevisions(
2948 self,
2954 self,
2949 nodes,
2955 nodes,
2950 nodesorder,
2956 nodesorder,
2951 revlogrevisiondelta,
2957 revlogrevisiondelta,
2952 deltaparentfn=self.deltaparent,
2958 deltaparentfn=self.deltaparent,
2953 candeltafn=self.candelta,
2959 candeltafn=self.candelta,
2954 rawsizefn=self.rawsize,
2960 rawsizefn=self.rawsize,
2955 revdifffn=self.revdiff,
2961 revdifffn=self.revdiff,
2956 flagsfn=self.flags,
2962 flagsfn=self.flags,
2957 deltamode=deltamode,
2963 deltamode=deltamode,
2958 revisiondata=revisiondata,
2964 revisiondata=revisiondata,
2959 assumehaveparentrevisions=assumehaveparentrevisions,
2965 assumehaveparentrevisions=assumehaveparentrevisions,
2960 sidedata_helpers=sidedata_helpers,
2966 sidedata_helpers=sidedata_helpers,
2961 )
2967 )
2962
2968
2963 DELTAREUSEALWAYS = b'always'
2969 DELTAREUSEALWAYS = b'always'
2964 DELTAREUSESAMEREVS = b'samerevs'
2970 DELTAREUSESAMEREVS = b'samerevs'
2965 DELTAREUSENEVER = b'never'
2971 DELTAREUSENEVER = b'never'
2966
2972
2967 DELTAREUSEFULLADD = b'fulladd'
2973 DELTAREUSEFULLADD = b'fulladd'
2968
2974
2969 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2975 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2970
2976
2971 def clone(
2977 def clone(
2972 self,
2978 self,
2973 tr,
2979 tr,
2974 destrevlog,
2980 destrevlog,
2975 addrevisioncb=None,
2981 addrevisioncb=None,
2976 deltareuse=DELTAREUSESAMEREVS,
2982 deltareuse=DELTAREUSESAMEREVS,
2977 forcedeltabothparents=None,
2983 forcedeltabothparents=None,
2978 sidedata_helpers=None,
2984 sidedata_helpers=None,
2979 ):
2985 ):
2980 """Copy this revlog to another, possibly with format changes.
2986 """Copy this revlog to another, possibly with format changes.
2981
2987
2982 The destination revlog will contain the same revisions and nodes.
2988 The destination revlog will contain the same revisions and nodes.
2983 However, it may not be bit-for-bit identical due to e.g. delta encoding
2989 However, it may not be bit-for-bit identical due to e.g. delta encoding
2984 differences.
2990 differences.
2985
2991
2986 The ``deltareuse`` argument control how deltas from the existing revlog
2992 The ``deltareuse`` argument control how deltas from the existing revlog
2987 are preserved in the destination revlog. The argument can have the
2993 are preserved in the destination revlog. The argument can have the
2988 following values:
2994 following values:
2989
2995
2990 DELTAREUSEALWAYS
2996 DELTAREUSEALWAYS
2991 Deltas will always be reused (if possible), even if the destination
2997 Deltas will always be reused (if possible), even if the destination
2992 revlog would not select the same revisions for the delta. This is the
2998 revlog would not select the same revisions for the delta. This is the
2993 fastest mode of operation.
2999 fastest mode of operation.
2994 DELTAREUSESAMEREVS
3000 DELTAREUSESAMEREVS
2995 Deltas will be reused if the destination revlog would pick the same
3001 Deltas will be reused if the destination revlog would pick the same
2996 revisions for the delta. This mode strikes a balance between speed
3002 revisions for the delta. This mode strikes a balance between speed
2997 and optimization.
3003 and optimization.
2998 DELTAREUSENEVER
3004 DELTAREUSENEVER
2999 Deltas will never be reused. This is the slowest mode of execution.
3005 Deltas will never be reused. This is the slowest mode of execution.
3000 This mode can be used to recompute deltas (e.g. if the diff/delta
3006 This mode can be used to recompute deltas (e.g. if the diff/delta
3001 algorithm changes).
3007 algorithm changes).
3002 DELTAREUSEFULLADD
3008 DELTAREUSEFULLADD
3003 Revision will be re-added as if their were new content. This is
3009 Revision will be re-added as if their were new content. This is
3004 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3010 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3005 eg: large file detection and handling.
3011 eg: large file detection and handling.
3006
3012
3007 Delta computation can be slow, so the choice of delta reuse policy can
3013 Delta computation can be slow, so the choice of delta reuse policy can
3008 significantly affect run time.
3014 significantly affect run time.
3009
3015
3010 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3016 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3011 two extremes. Deltas will be reused if they are appropriate. But if the
3017 two extremes. Deltas will be reused if they are appropriate. But if the
3012 delta could choose a better revision, it will do so. This means if you
3018 delta could choose a better revision, it will do so. This means if you
3013 are converting a non-generaldelta revlog to a generaldelta revlog,
3019 are converting a non-generaldelta revlog to a generaldelta revlog,
3014 deltas will be recomputed if the delta's parent isn't a parent of the
3020 deltas will be recomputed if the delta's parent isn't a parent of the
3015 revision.
3021 revision.
3016
3022
3017 In addition to the delta policy, the ``forcedeltabothparents``
3023 In addition to the delta policy, the ``forcedeltabothparents``
3018 argument controls whether to force compute deltas against both parents
3024 argument controls whether to force compute deltas against both parents
3019 for merges. By default, the current default is used.
3025 for merges. By default, the current default is used.
3020
3026
3021 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3027 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3022 `sidedata_helpers`.
3028 `sidedata_helpers`.
3023 """
3029 """
3024 if deltareuse not in self.DELTAREUSEALL:
3030 if deltareuse not in self.DELTAREUSEALL:
3025 raise ValueError(
3031 raise ValueError(
3026 _(b'value for deltareuse invalid: %s') % deltareuse
3032 _(b'value for deltareuse invalid: %s') % deltareuse
3027 )
3033 )
3028
3034
3029 if len(destrevlog):
3035 if len(destrevlog):
3030 raise ValueError(_(b'destination revlog is not empty'))
3036 raise ValueError(_(b'destination revlog is not empty'))
3031
3037
3032 if getattr(self, 'filteredrevs', None):
3038 if getattr(self, 'filteredrevs', None):
3033 raise ValueError(_(b'source revlog has filtered revisions'))
3039 raise ValueError(_(b'source revlog has filtered revisions'))
3034 if getattr(destrevlog, 'filteredrevs', None):
3040 if getattr(destrevlog, 'filteredrevs', None):
3035 raise ValueError(_(b'destination revlog has filtered revisions'))
3041 raise ValueError(_(b'destination revlog has filtered revisions'))
3036
3042
3037 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3043 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3038 # if possible.
3044 # if possible.
3039 oldlazydelta = destrevlog._lazydelta
3045 oldlazydelta = destrevlog._lazydelta
3040 oldlazydeltabase = destrevlog._lazydeltabase
3046 oldlazydeltabase = destrevlog._lazydeltabase
3041 oldamd = destrevlog._deltabothparents
3047 oldamd = destrevlog._deltabothparents
3042
3048
3043 try:
3049 try:
3044 if deltareuse == self.DELTAREUSEALWAYS:
3050 if deltareuse == self.DELTAREUSEALWAYS:
3045 destrevlog._lazydeltabase = True
3051 destrevlog._lazydeltabase = True
3046 destrevlog._lazydelta = True
3052 destrevlog._lazydelta = True
3047 elif deltareuse == self.DELTAREUSESAMEREVS:
3053 elif deltareuse == self.DELTAREUSESAMEREVS:
3048 destrevlog._lazydeltabase = False
3054 destrevlog._lazydeltabase = False
3049 destrevlog._lazydelta = True
3055 destrevlog._lazydelta = True
3050 elif deltareuse == self.DELTAREUSENEVER:
3056 elif deltareuse == self.DELTAREUSENEVER:
3051 destrevlog._lazydeltabase = False
3057 destrevlog._lazydeltabase = False
3052 destrevlog._lazydelta = False
3058 destrevlog._lazydelta = False
3053
3059
3054 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3060 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3055
3061
3056 self._clone(
3062 self._clone(
3057 tr,
3063 tr,
3058 destrevlog,
3064 destrevlog,
3059 addrevisioncb,
3065 addrevisioncb,
3060 deltareuse,
3066 deltareuse,
3061 forcedeltabothparents,
3067 forcedeltabothparents,
3062 sidedata_helpers,
3068 sidedata_helpers,
3063 )
3069 )
3064
3070
3065 finally:
3071 finally:
3066 destrevlog._lazydelta = oldlazydelta
3072 destrevlog._lazydelta = oldlazydelta
3067 destrevlog._lazydeltabase = oldlazydeltabase
3073 destrevlog._lazydeltabase = oldlazydeltabase
3068 destrevlog._deltabothparents = oldamd
3074 destrevlog._deltabothparents = oldamd
3069
3075
3070 def _clone(
3076 def _clone(
3071 self,
3077 self,
3072 tr,
3078 tr,
3073 destrevlog,
3079 destrevlog,
3074 addrevisioncb,
3080 addrevisioncb,
3075 deltareuse,
3081 deltareuse,
3076 forcedeltabothparents,
3082 forcedeltabothparents,
3077 sidedata_helpers,
3083 sidedata_helpers,
3078 ):
3084 ):
3079 """perform the core duty of `revlog.clone` after parameter processing"""
3085 """perform the core duty of `revlog.clone` after parameter processing"""
3080 deltacomputer = deltautil.deltacomputer(destrevlog)
3086 deltacomputer = deltautil.deltacomputer(destrevlog)
3081 index = self.index
3087 index = self.index
3082 for rev in self:
3088 for rev in self:
3083 entry = index[rev]
3089 entry = index[rev]
3084
3090
3085 # Some classes override linkrev to take filtered revs into
3091 # Some classes override linkrev to take filtered revs into
3086 # account. Use raw entry from index.
3092 # account. Use raw entry from index.
3087 flags = entry[0] & 0xFFFF
3093 flags = entry[0] & 0xFFFF
3088 linkrev = entry[4]
3094 linkrev = entry[4]
3089 p1 = index[entry[5]][7]
3095 p1 = index[entry[5]][7]
3090 p2 = index[entry[6]][7]
3096 p2 = index[entry[6]][7]
3091 node = entry[7]
3097 node = entry[7]
3092
3098
3093 # (Possibly) reuse the delta from the revlog if allowed and
3099 # (Possibly) reuse the delta from the revlog if allowed and
3094 # the revlog chunk is a delta.
3100 # the revlog chunk is a delta.
3095 cachedelta = None
3101 cachedelta = None
3096 rawtext = None
3102 rawtext = None
3097 if deltareuse == self.DELTAREUSEFULLADD:
3103 if deltareuse == self.DELTAREUSEFULLADD:
3098 text = self._revisiondata(rev)
3104 text = self._revisiondata(rev)
3099 sidedata = self.sidedata(rev)
3105 sidedata = self.sidedata(rev)
3100
3106
3101 if sidedata_helpers is not None:
3107 if sidedata_helpers is not None:
3102 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3108 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3103 self, sidedata_helpers, sidedata, rev
3109 self, sidedata_helpers, sidedata, rev
3104 )
3110 )
3105 flags = flags | new_flags[0] & ~new_flags[1]
3111 flags = flags | new_flags[0] & ~new_flags[1]
3106
3112
3107 destrevlog.addrevision(
3113 destrevlog.addrevision(
3108 text,
3114 text,
3109 tr,
3115 tr,
3110 linkrev,
3116 linkrev,
3111 p1,
3117 p1,
3112 p2,
3118 p2,
3113 cachedelta=cachedelta,
3119 cachedelta=cachedelta,
3114 node=node,
3120 node=node,
3115 flags=flags,
3121 flags=flags,
3116 deltacomputer=deltacomputer,
3122 deltacomputer=deltacomputer,
3117 sidedata=sidedata,
3123 sidedata=sidedata,
3118 )
3124 )
3119 else:
3125 else:
3120 if destrevlog._lazydelta:
3126 if destrevlog._lazydelta:
3121 dp = self.deltaparent(rev)
3127 dp = self.deltaparent(rev)
3122 if dp != nullrev:
3128 if dp != nullrev:
3123 cachedelta = (dp, bytes(self._chunk(rev)))
3129 cachedelta = (dp, bytes(self._chunk(rev)))
3124
3130
3125 sidedata = None
3131 sidedata = None
3126 if not cachedelta:
3132 if not cachedelta:
3127 rawtext = self._revisiondata(rev)
3133 rawtext = self._revisiondata(rev)
3128 sidedata = self.sidedata(rev)
3134 sidedata = self.sidedata(rev)
3129 if sidedata is None:
3135 if sidedata is None:
3130 sidedata = self.sidedata(rev)
3136 sidedata = self.sidedata(rev)
3131
3137
3132 if sidedata_helpers is not None:
3138 if sidedata_helpers is not None:
3133 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3139 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3134 self, sidedata_helpers, sidedata, rev
3140 self, sidedata_helpers, sidedata, rev
3135 )
3141 )
3136 flags = flags | new_flags[0] & ~new_flags[1]
3142 flags = flags | new_flags[0] & ~new_flags[1]
3137
3143
3138 with destrevlog._writing(tr):
3144 with destrevlog._writing(tr):
3139 destrevlog._addrevision(
3145 destrevlog._addrevision(
3140 node,
3146 node,
3141 rawtext,
3147 rawtext,
3142 tr,
3148 tr,
3143 linkrev,
3149 linkrev,
3144 p1,
3150 p1,
3145 p2,
3151 p2,
3146 flags,
3152 flags,
3147 cachedelta,
3153 cachedelta,
3148 deltacomputer=deltacomputer,
3154 deltacomputer=deltacomputer,
3149 sidedata=sidedata,
3155 sidedata=sidedata,
3150 )
3156 )
3151
3157
3152 if addrevisioncb:
3158 if addrevisioncb:
3153 addrevisioncb(self, rev, node)
3159 addrevisioncb(self, rev, node)
3154
3160
3155 def censorrevision(self, tr, censornode, tombstone=b''):
3161 def censorrevision(self, tr, censornode, tombstone=b''):
3156 if self._format_version == REVLOGV0:
3162 if self._format_version == REVLOGV0:
3157 raise error.RevlogError(
3163 raise error.RevlogError(
3158 _(b'cannot censor with version %d revlogs')
3164 _(b'cannot censor with version %d revlogs')
3159 % self._format_version
3165 % self._format_version
3160 )
3166 )
3161 elif self._format_version == REVLOGV1:
3167 elif self._format_version == REVLOGV1:
3162 censor.v1_censor(self, tr, censornode, tombstone)
3168 censor.v1_censor(self, tr, censornode, tombstone)
3163 else:
3169 else:
3164 # revlog v2
3170 # revlog v2
3165 raise error.RevlogError(
3171 raise error.RevlogError(
3166 _(b'cannot censor with version %d revlogs')
3172 _(b'cannot censor with version %d revlogs')
3167 % self._format_version
3173 % self._format_version
3168 )
3174 )
3169
3175
3170 def verifyintegrity(self, state):
3176 def verifyintegrity(self, state):
3171 """Verifies the integrity of the revlog.
3177 """Verifies the integrity of the revlog.
3172
3178
3173 Yields ``revlogproblem`` instances describing problems that are
3179 Yields ``revlogproblem`` instances describing problems that are
3174 found.
3180 found.
3175 """
3181 """
3176 dd, di = self.checksize()
3182 dd, di = self.checksize()
3177 if dd:
3183 if dd:
3178 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3184 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3179 if di:
3185 if di:
3180 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3186 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3181
3187
3182 version = self._format_version
3188 version = self._format_version
3183
3189
3184 # The verifier tells us what version revlog we should be.
3190 # The verifier tells us what version revlog we should be.
3185 if version != state[b'expectedversion']:
3191 if version != state[b'expectedversion']:
3186 yield revlogproblem(
3192 yield revlogproblem(
3187 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3193 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3188 % (self.display_id, version, state[b'expectedversion'])
3194 % (self.display_id, version, state[b'expectedversion'])
3189 )
3195 )
3190
3196
3191 state[b'skipread'] = set()
3197 state[b'skipread'] = set()
3192 state[b'safe_renamed'] = set()
3198 state[b'safe_renamed'] = set()
3193
3199
3194 for rev in self:
3200 for rev in self:
3195 node = self.node(rev)
3201 node = self.node(rev)
3196
3202
3197 # Verify contents. 4 cases to care about:
3203 # Verify contents. 4 cases to care about:
3198 #
3204 #
3199 # common: the most common case
3205 # common: the most common case
3200 # rename: with a rename
3206 # rename: with a rename
3201 # meta: file content starts with b'\1\n', the metadata
3207 # meta: file content starts with b'\1\n', the metadata
3202 # header defined in filelog.py, but without a rename
3208 # header defined in filelog.py, but without a rename
3203 # ext: content stored externally
3209 # ext: content stored externally
3204 #
3210 #
3205 # More formally, their differences are shown below:
3211 # More formally, their differences are shown below:
3206 #
3212 #
3207 # | common | rename | meta | ext
3213 # | common | rename | meta | ext
3208 # -------------------------------------------------------
3214 # -------------------------------------------------------
3209 # flags() | 0 | 0 | 0 | not 0
3215 # flags() | 0 | 0 | 0 | not 0
3210 # renamed() | False | True | False | ?
3216 # renamed() | False | True | False | ?
3211 # rawtext[0:2]=='\1\n'| False | True | True | ?
3217 # rawtext[0:2]=='\1\n'| False | True | True | ?
3212 #
3218 #
3213 # "rawtext" means the raw text stored in revlog data, which
3219 # "rawtext" means the raw text stored in revlog data, which
3214 # could be retrieved by "rawdata(rev)". "text"
3220 # could be retrieved by "rawdata(rev)". "text"
3215 # mentioned below is "revision(rev)".
3221 # mentioned below is "revision(rev)".
3216 #
3222 #
3217 # There are 3 different lengths stored physically:
3223 # There are 3 different lengths stored physically:
3218 # 1. L1: rawsize, stored in revlog index
3224 # 1. L1: rawsize, stored in revlog index
3219 # 2. L2: len(rawtext), stored in revlog data
3225 # 2. L2: len(rawtext), stored in revlog data
3220 # 3. L3: len(text), stored in revlog data if flags==0, or
3226 # 3. L3: len(text), stored in revlog data if flags==0, or
3221 # possibly somewhere else if flags!=0
3227 # possibly somewhere else if flags!=0
3222 #
3228 #
3223 # L1 should be equal to L2. L3 could be different from them.
3229 # L1 should be equal to L2. L3 could be different from them.
3224 # "text" may or may not affect commit hash depending on flag
3230 # "text" may or may not affect commit hash depending on flag
3225 # processors (see flagutil.addflagprocessor).
3231 # processors (see flagutil.addflagprocessor).
3226 #
3232 #
3227 # | common | rename | meta | ext
3233 # | common | rename | meta | ext
3228 # -------------------------------------------------
3234 # -------------------------------------------------
3229 # rawsize() | L1 | L1 | L1 | L1
3235 # rawsize() | L1 | L1 | L1 | L1
3230 # size() | L1 | L2-LM | L1(*) | L1 (?)
3236 # size() | L1 | L2-LM | L1(*) | L1 (?)
3231 # len(rawtext) | L2 | L2 | L2 | L2
3237 # len(rawtext) | L2 | L2 | L2 | L2
3232 # len(text) | L2 | L2 | L2 | L3
3238 # len(text) | L2 | L2 | L2 | L3
3233 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3239 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3234 #
3240 #
3235 # LM: length of metadata, depending on rawtext
3241 # LM: length of metadata, depending on rawtext
3236 # (*): not ideal, see comment in filelog.size
3242 # (*): not ideal, see comment in filelog.size
3237 # (?): could be "- len(meta)" if the resolved content has
3243 # (?): could be "- len(meta)" if the resolved content has
3238 # rename metadata
3244 # rename metadata
3239 #
3245 #
3240 # Checks needed to be done:
3246 # Checks needed to be done:
3241 # 1. length check: L1 == L2, in all cases.
3247 # 1. length check: L1 == L2, in all cases.
3242 # 2. hash check: depending on flag processor, we may need to
3248 # 2. hash check: depending on flag processor, we may need to
3243 # use either "text" (external), or "rawtext" (in revlog).
3249 # use either "text" (external), or "rawtext" (in revlog).
3244
3250
3245 try:
3251 try:
3246 skipflags = state.get(b'skipflags', 0)
3252 skipflags = state.get(b'skipflags', 0)
3247 if skipflags:
3253 if skipflags:
3248 skipflags &= self.flags(rev)
3254 skipflags &= self.flags(rev)
3249
3255
3250 _verify_revision(self, skipflags, state, node)
3256 _verify_revision(self, skipflags, state, node)
3251
3257
3252 l1 = self.rawsize(rev)
3258 l1 = self.rawsize(rev)
3253 l2 = len(self.rawdata(node))
3259 l2 = len(self.rawdata(node))
3254
3260
3255 if l1 != l2:
3261 if l1 != l2:
3256 yield revlogproblem(
3262 yield revlogproblem(
3257 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3263 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3258 node=node,
3264 node=node,
3259 )
3265 )
3260
3266
3261 except error.CensoredNodeError:
3267 except error.CensoredNodeError:
3262 if state[b'erroroncensored']:
3268 if state[b'erroroncensored']:
3263 yield revlogproblem(
3269 yield revlogproblem(
3264 error=_(b'censored file data'), node=node
3270 error=_(b'censored file data'), node=node
3265 )
3271 )
3266 state[b'skipread'].add(node)
3272 state[b'skipread'].add(node)
3267 except Exception as e:
3273 except Exception as e:
3268 yield revlogproblem(
3274 yield revlogproblem(
3269 error=_(b'unpacking %s: %s')
3275 error=_(b'unpacking %s: %s')
3270 % (short(node), stringutil.forcebytestr(e)),
3276 % (short(node), stringutil.forcebytestr(e)),
3271 node=node,
3277 node=node,
3272 )
3278 )
3273 state[b'skipread'].add(node)
3279 state[b'skipread'].add(node)
3274
3280
3275 def storageinfo(
3281 def storageinfo(
3276 self,
3282 self,
3277 exclusivefiles=False,
3283 exclusivefiles=False,
3278 sharedfiles=False,
3284 sharedfiles=False,
3279 revisionscount=False,
3285 revisionscount=False,
3280 trackedsize=False,
3286 trackedsize=False,
3281 storedsize=False,
3287 storedsize=False,
3282 ):
3288 ):
3283 d = {}
3289 d = {}
3284
3290
3285 if exclusivefiles:
3291 if exclusivefiles:
3286 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3292 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3287 if not self._inline:
3293 if not self._inline:
3288 d[b'exclusivefiles'].append((self.opener, self._datafile))
3294 d[b'exclusivefiles'].append((self.opener, self._datafile))
3289
3295
3290 if sharedfiles:
3296 if sharedfiles:
3291 d[b'sharedfiles'] = []
3297 d[b'sharedfiles'] = []
3292
3298
3293 if revisionscount:
3299 if revisionscount:
3294 d[b'revisionscount'] = len(self)
3300 d[b'revisionscount'] = len(self)
3295
3301
3296 if trackedsize:
3302 if trackedsize:
3297 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3303 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3298
3304
3299 if storedsize:
3305 if storedsize:
3300 d[b'storedsize'] = sum(
3306 d[b'storedsize'] = sum(
3301 self.opener.stat(path).st_size for path in self.files()
3307 self.opener.stat(path).st_size for path in self.files()
3302 )
3308 )
3303
3309
3304 return d
3310 return d
3305
3311
3306 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3312 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3307 if not self.hassidedata:
3313 if not self.hassidedata:
3308 return
3314 return
3309 # revlog formats with sidedata support does not support inline
3315 # revlog formats with sidedata support does not support inline
3310 assert not self._inline
3316 assert not self._inline
3311 if not helpers[1] and not helpers[2]:
3317 if not helpers[1] and not helpers[2]:
3312 # Nothing to generate or remove
3318 # Nothing to generate or remove
3313 return
3319 return
3314
3320
3315 new_entries = []
3321 new_entries = []
3316 # append the new sidedata
3322 # append the new sidedata
3317 with self._writing(transaction):
3323 with self._writing(transaction):
3318 ifh, dfh, sdfh = self._writinghandles
3324 ifh, dfh, sdfh = self._writinghandles
3319 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3325 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3320
3326
3321 current_offset = sdfh.tell()
3327 current_offset = sdfh.tell()
3322 for rev in range(startrev, endrev + 1):
3328 for rev in range(startrev, endrev + 1):
3323 entry = self.index[rev]
3329 entry = self.index[rev]
3324 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3330 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3325 store=self,
3331 store=self,
3326 sidedata_helpers=helpers,
3332 sidedata_helpers=helpers,
3327 sidedata={},
3333 sidedata={},
3328 rev=rev,
3334 rev=rev,
3329 )
3335 )
3330
3336
3331 serialized_sidedata = sidedatautil.serialize_sidedata(
3337 serialized_sidedata = sidedatautil.serialize_sidedata(
3332 new_sidedata
3338 new_sidedata
3333 )
3339 )
3334
3340
3335 sidedata_compression_mode = COMP_MODE_INLINE
3341 sidedata_compression_mode = COMP_MODE_INLINE
3336 if serialized_sidedata and self.hassidedata:
3342 if serialized_sidedata and self.hassidedata:
3337 sidedata_compression_mode = COMP_MODE_PLAIN
3343 sidedata_compression_mode = COMP_MODE_PLAIN
3338 h, comp_sidedata = self.compress(serialized_sidedata)
3344 h, comp_sidedata = self.compress(serialized_sidedata)
3339 if (
3345 if (
3340 h != b'u'
3346 h != b'u'
3341 and comp_sidedata[0] != b'\0'
3347 and comp_sidedata[0] != b'\0'
3342 and len(comp_sidedata) < len(serialized_sidedata)
3348 and len(comp_sidedata) < len(serialized_sidedata)
3343 ):
3349 ):
3344 assert not h
3350 assert not h
3345 if (
3351 if (
3346 comp_sidedata[0]
3352 comp_sidedata[0]
3347 == self._docket.default_compression_header
3353 == self._docket.default_compression_header
3348 ):
3354 ):
3349 sidedata_compression_mode = COMP_MODE_DEFAULT
3355 sidedata_compression_mode = COMP_MODE_DEFAULT
3350 serialized_sidedata = comp_sidedata
3356 serialized_sidedata = comp_sidedata
3351 else:
3357 else:
3352 sidedata_compression_mode = COMP_MODE_INLINE
3358 sidedata_compression_mode = COMP_MODE_INLINE
3353 serialized_sidedata = comp_sidedata
3359 serialized_sidedata = comp_sidedata
3354 if entry[8] != 0 or entry[9] != 0:
3360 if entry[8] != 0 or entry[9] != 0:
3355 # rewriting entries that already have sidedata is not
3361 # rewriting entries that already have sidedata is not
3356 # supported yet, because it introduces garbage data in the
3362 # supported yet, because it introduces garbage data in the
3357 # revlog.
3363 # revlog.
3358 msg = b"rewriting existing sidedata is not supported yet"
3364 msg = b"rewriting existing sidedata is not supported yet"
3359 raise error.Abort(msg)
3365 raise error.Abort(msg)
3360
3366
3361 # Apply (potential) flags to add and to remove after running
3367 # Apply (potential) flags to add and to remove after running
3362 # the sidedata helpers
3368 # the sidedata helpers
3363 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3369 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3364 entry_update = (
3370 entry_update = (
3365 current_offset,
3371 current_offset,
3366 len(serialized_sidedata),
3372 len(serialized_sidedata),
3367 new_offset_flags,
3373 new_offset_flags,
3368 sidedata_compression_mode,
3374 sidedata_compression_mode,
3369 )
3375 )
3370
3376
3371 # the sidedata computation might have move the file cursors around
3377 # the sidedata computation might have move the file cursors around
3372 sdfh.seek(current_offset, os.SEEK_SET)
3378 sdfh.seek(current_offset, os.SEEK_SET)
3373 sdfh.write(serialized_sidedata)
3379 sdfh.write(serialized_sidedata)
3374 new_entries.append(entry_update)
3380 new_entries.append(entry_update)
3375 current_offset += len(serialized_sidedata)
3381 current_offset += len(serialized_sidedata)
3376 self._docket.sidedata_end = sdfh.tell()
3382 self._docket.sidedata_end = sdfh.tell()
3377
3383
3378 # rewrite the new index entries
3384 # rewrite the new index entries
3379 ifh.seek(startrev * self.index.entry_size)
3385 ifh.seek(startrev * self.index.entry_size)
3380 for i, e in enumerate(new_entries):
3386 for i, e in enumerate(new_entries):
3381 rev = startrev + i
3387 rev = startrev + i
3382 self.index.replace_sidedata_info(rev, *e)
3388 self.index.replace_sidedata_info(rev, *e)
3383 packed = self.index.entry_binary(rev)
3389 packed = self.index.entry_binary(rev)
3384 if rev == 0 and self._docket is None:
3390 if rev == 0 and self._docket is None:
3385 header = self._format_flags | self._format_version
3391 header = self._format_flags | self._format_version
3386 header = self.index.pack_header(header)
3392 header = self.index.pack_header(header)
3387 packed = header + packed
3393 packed = header + packed
3388 ifh.write(packed)
3394 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now