##// END OF EJS Templates
revlog: fix a bug where transaction can be aborted partially...
Arseniy Alekseyev -
r49423:ccd9cb73 stable
parent child Browse files
Show More
@@ -1,3303 +1,3310 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 revlogutils,
75 revlogutils,
76 templatefilters,
76 templatefilters,
77 util,
77 util,
78 )
78 )
79 from .interfaces import (
79 from .interfaces import (
80 repository,
80 repository,
81 util as interfaceutil,
81 util as interfaceutil,
82 )
82 )
83 from .revlogutils import (
83 from .revlogutils import (
84 deltas as deltautil,
84 deltas as deltautil,
85 docket as docketutil,
85 docket as docketutil,
86 flagutil,
86 flagutil,
87 nodemap as nodemaputil,
87 nodemap as nodemaputil,
88 randomaccessfile,
88 randomaccessfile,
89 revlogv0,
89 revlogv0,
90 rewrite,
90 rewrite,
91 sidedata as sidedatautil,
91 sidedata as sidedatautil,
92 )
92 )
93 from .utils import (
93 from .utils import (
94 storageutil,
94 storageutil,
95 stringutil,
95 stringutil,
96 )
96 )
97
97
98 # blanked usage of all the name to prevent pyflakes constraints
98 # blanked usage of all the name to prevent pyflakes constraints
99 # We need these name available in the module for extensions.
99 # We need these name available in the module for extensions.
100
100
101 REVLOGV0
101 REVLOGV0
102 REVLOGV1
102 REVLOGV1
103 REVLOGV2
103 REVLOGV2
104 FLAG_INLINE_DATA
104 FLAG_INLINE_DATA
105 FLAG_GENERALDELTA
105 FLAG_GENERALDELTA
106 REVLOG_DEFAULT_FLAGS
106 REVLOG_DEFAULT_FLAGS
107 REVLOG_DEFAULT_FORMAT
107 REVLOG_DEFAULT_FORMAT
108 REVLOG_DEFAULT_VERSION
108 REVLOG_DEFAULT_VERSION
109 REVLOGV1_FLAGS
109 REVLOGV1_FLAGS
110 REVLOGV2_FLAGS
110 REVLOGV2_FLAGS
111 REVIDX_ISCENSORED
111 REVIDX_ISCENSORED
112 REVIDX_ELLIPSIS
112 REVIDX_ELLIPSIS
113 REVIDX_HASCOPIESINFO
113 REVIDX_HASCOPIESINFO
114 REVIDX_EXTSTORED
114 REVIDX_EXTSTORED
115 REVIDX_DEFAULT_FLAGS
115 REVIDX_DEFAULT_FLAGS
116 REVIDX_FLAGS_ORDER
116 REVIDX_FLAGS_ORDER
117 REVIDX_RAWTEXT_CHANGING_FLAGS
117 REVIDX_RAWTEXT_CHANGING_FLAGS
118
118
119 parsers = policy.importmod('parsers')
119 parsers = policy.importmod('parsers')
120 rustancestor = policy.importrust('ancestor')
120 rustancestor = policy.importrust('ancestor')
121 rustdagop = policy.importrust('dagop')
121 rustdagop = policy.importrust('dagop')
122 rustrevlog = policy.importrust('revlog')
122 rustrevlog = policy.importrust('revlog')
123
123
124 # Aliased for performance.
124 # Aliased for performance.
125 _zlibdecompress = zlib.decompress
125 _zlibdecompress = zlib.decompress
126
126
127 # max size of revlog with inline data
127 # max size of revlog with inline data
128 _maxinline = 131072
128 _maxinline = 131072
129
129
130 # Flag processors for REVIDX_ELLIPSIS.
130 # Flag processors for REVIDX_ELLIPSIS.
131 def ellipsisreadprocessor(rl, text):
131 def ellipsisreadprocessor(rl, text):
132 return text, False
132 return text, False
133
133
134
134
135 def ellipsiswriteprocessor(rl, text):
135 def ellipsiswriteprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsisrawprocessor(rl, text):
139 def ellipsisrawprocessor(rl, text):
140 return False
140 return False
141
141
142
142
143 ellipsisprocessor = (
143 ellipsisprocessor = (
144 ellipsisreadprocessor,
144 ellipsisreadprocessor,
145 ellipsiswriteprocessor,
145 ellipsiswriteprocessor,
146 ellipsisrawprocessor,
146 ellipsisrawprocessor,
147 )
147 )
148
148
149
149
150 def _verify_revision(rl, skipflags, state, node):
150 def _verify_revision(rl, skipflags, state, node):
151 """Verify the integrity of the given revlog ``node`` while providing a hook
151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 point for extensions to influence the operation."""
152 point for extensions to influence the operation."""
153 if skipflags:
153 if skipflags:
154 state[b'skipread'].add(node)
154 state[b'skipread'].add(node)
155 else:
155 else:
156 # Side-effect: read content and verify hash.
156 # Side-effect: read content and verify hash.
157 rl.revision(node)
157 rl.revision(node)
158
158
159
159
160 # True if a fast implementation for persistent-nodemap is available
160 # True if a fast implementation for persistent-nodemap is available
161 #
161 #
162 # We also consider we have a "fast" implementation in "pure" python because
162 # We also consider we have a "fast" implementation in "pure" python because
163 # people using pure don't really have performance consideration (and a
163 # people using pure don't really have performance consideration (and a
164 # wheelbarrow of other slowness source)
164 # wheelbarrow of other slowness source)
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 parsers, 'BaseIndexObject'
166 parsers, 'BaseIndexObject'
167 )
167 )
168
168
169
169
170 @interfaceutil.implementer(repository.irevisiondelta)
170 @interfaceutil.implementer(repository.irevisiondelta)
171 @attr.s(slots=True)
171 @attr.s(slots=True)
172 class revlogrevisiondelta(object):
172 class revlogrevisiondelta(object):
173 node = attr.ib()
173 node = attr.ib()
174 p1node = attr.ib()
174 p1node = attr.ib()
175 p2node = attr.ib()
175 p2node = attr.ib()
176 basenode = attr.ib()
176 basenode = attr.ib()
177 flags = attr.ib()
177 flags = attr.ib()
178 baserevisionsize = attr.ib()
178 baserevisionsize = attr.ib()
179 revision = attr.ib()
179 revision = attr.ib()
180 delta = attr.ib()
180 delta = attr.ib()
181 sidedata = attr.ib()
181 sidedata = attr.ib()
182 protocol_flags = attr.ib()
182 protocol_flags = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 def parse_index_v1(data, inline):
194 def parse_index_v1(data, inline):
195 # call the C implementation to parse the index data
195 # call the C implementation to parse the index data
196 index, cache = parsers.parse_index2(data, inline)
196 index, cache = parsers.parse_index2(data, inline)
197 return index, cache
197 return index, cache
198
198
199
199
200 def parse_index_v2(data, inline):
200 def parse_index_v2(data, inline):
201 # call the C implementation to parse the index data
201 # call the C implementation to parse the index data
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 return index, cache
203 return index, cache
204
204
205
205
206 def parse_index_cl_v2(data, inline):
206 def parse_index_cl_v2(data, inline):
207 # call the C implementation to parse the index data
207 # call the C implementation to parse the index data
208 assert not inline
208 assert not inline
209 from .pure.parsers import parse_index_cl_v2
209 from .pure.parsers import parse_index_cl_v2
210
210
211 index, cache = parse_index_cl_v2(data)
211 index, cache = parse_index_cl_v2(data)
212 return index, cache
212 return index, cache
213
213
214
214
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216
216
217 def parse_index_v1_nodemap(data, inline):
217 def parse_index_v1_nodemap(data, inline):
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 return index, cache
219 return index, cache
220
220
221
221
222 else:
222 else:
223 parse_index_v1_nodemap = None
223 parse_index_v1_nodemap = None
224
224
225
225
226 def parse_index_v1_mixed(data, inline):
226 def parse_index_v1_mixed(data, inline):
227 index, cache = parse_index_v1(data, inline)
227 index, cache = parse_index_v1(data, inline)
228 return rustrevlog.MixedIndex(index), cache
228 return rustrevlog.MixedIndex(index), cache
229
229
230
230
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # signed integer)
232 # signed integer)
233 _maxentrysize = 0x7FFFFFFF
233 _maxentrysize = 0x7FFFFFFF
234
234
235 FILE_TOO_SHORT_MSG = _(
235 FILE_TOO_SHORT_MSG = _(
236 b'cannot read from revlog %s;'
236 b'cannot read from revlog %s;'
237 b' expected %d bytes from offset %d, data size is %d'
237 b' expected %d bytes from offset %d, data size is %d'
238 )
238 )
239
239
240
240
241 class revlog(object):
241 class revlog(object):
242 """
242 """
243 the underlying revision storage object
243 the underlying revision storage object
244
244
245 A revlog consists of two parts, an index and the revision data.
245 A revlog consists of two parts, an index and the revision data.
246
246
247 The index is a file with a fixed record size containing
247 The index is a file with a fixed record size containing
248 information on each revision, including its nodeid (hash), the
248 information on each revision, including its nodeid (hash), the
249 nodeids of its parents, the position and offset of its data within
249 nodeids of its parents, the position and offset of its data within
250 the data file, and the revision it's based on. Finally, each entry
250 the data file, and the revision it's based on. Finally, each entry
251 contains a linkrev entry that can serve as a pointer to external
251 contains a linkrev entry that can serve as a pointer to external
252 data.
252 data.
253
253
254 The revision data itself is a linear collection of data chunks.
254 The revision data itself is a linear collection of data chunks.
255 Each chunk represents a revision and is usually represented as a
255 Each chunk represents a revision and is usually represented as a
256 delta against the previous chunk. To bound lookup time, runs of
256 delta against the previous chunk. To bound lookup time, runs of
257 deltas are limited to about 2 times the length of the original
257 deltas are limited to about 2 times the length of the original
258 version data. This makes retrieval of a version proportional to
258 version data. This makes retrieval of a version proportional to
259 its size, or O(1) relative to the number of revisions.
259 its size, or O(1) relative to the number of revisions.
260
260
261 Both pieces of the revlog are written to in an append-only
261 Both pieces of the revlog are written to in an append-only
262 fashion, which means we never need to rewrite a file to insert or
262 fashion, which means we never need to rewrite a file to insert or
263 remove data, and can use some simple techniques to avoid the need
263 remove data, and can use some simple techniques to avoid the need
264 for locking while reading.
264 for locking while reading.
265
265
266 If checkambig, indexfile is opened with checkambig=True at
266 If checkambig, indexfile is opened with checkambig=True at
267 writing, to avoid file stat ambiguity.
267 writing, to avoid file stat ambiguity.
268
268
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 index will be mmapped rather than read if it is larger than the
270 index will be mmapped rather than read if it is larger than the
271 configured threshold.
271 configured threshold.
272
272
273 If censorable is True, the revlog can have censored revisions.
273 If censorable is True, the revlog can have censored revisions.
274
274
275 If `upperboundcomp` is not None, this is the expected maximal gain from
275 If `upperboundcomp` is not None, this is the expected maximal gain from
276 compression for the data content.
276 compression for the data content.
277
277
278 `concurrencychecker` is an optional function that receives 3 arguments: a
278 `concurrencychecker` is an optional function that receives 3 arguments: a
279 file handle, a filename, and an expected position. It should check whether
279 file handle, a filename, and an expected position. It should check whether
280 the current position in the file handle is valid, and log/warn/fail (by
280 the current position in the file handle is valid, and log/warn/fail (by
281 raising).
281 raising).
282
282
283 See mercurial/revlogutils/contants.py for details about the content of an
283 See mercurial/revlogutils/contants.py for details about the content of an
284 index entry.
284 index entry.
285 """
285 """
286
286
287 _flagserrorclass = error.RevlogError
287 _flagserrorclass = error.RevlogError
288
288
289 def __init__(
289 def __init__(
290 self,
290 self,
291 opener,
291 opener,
292 target,
292 target,
293 radix,
293 radix,
294 postfix=None, # only exist for `tmpcensored` now
294 postfix=None, # only exist for `tmpcensored` now
295 checkambig=False,
295 checkambig=False,
296 mmaplargeindex=False,
296 mmaplargeindex=False,
297 censorable=False,
297 censorable=False,
298 upperboundcomp=None,
298 upperboundcomp=None,
299 persistentnodemap=False,
299 persistentnodemap=False,
300 concurrencychecker=None,
300 concurrencychecker=None,
301 trypending=False,
301 trypending=False,
302 ):
302 ):
303 """
303 """
304 create a revlog object
304 create a revlog object
305
305
306 opener is a function that abstracts the file opening operation
306 opener is a function that abstracts the file opening operation
307 and can be used to implement COW semantics or the like.
307 and can be used to implement COW semantics or the like.
308
308
309 `target`: a (KIND, ID) tuple that identify the content stored in
309 `target`: a (KIND, ID) tuple that identify the content stored in
310 this revlog. It help the rest of the code to understand what the revlog
310 this revlog. It help the rest of the code to understand what the revlog
311 is about without having to resort to heuristic and index filename
311 is about without having to resort to heuristic and index filename
312 analysis. Note: that this must be reliably be set by normal code, but
312 analysis. Note: that this must be reliably be set by normal code, but
313 that test, debug, or performance measurement code might not set this to
313 that test, debug, or performance measurement code might not set this to
314 accurate value.
314 accurate value.
315 """
315 """
316 self.upperboundcomp = upperboundcomp
316 self.upperboundcomp = upperboundcomp
317
317
318 self.radix = radix
318 self.radix = radix
319
319
320 self._docket_file = None
320 self._docket_file = None
321 self._indexfile = None
321 self._indexfile = None
322 self._datafile = None
322 self._datafile = None
323 self._sidedatafile = None
323 self._sidedatafile = None
324 self._nodemap_file = None
324 self._nodemap_file = None
325 self.postfix = postfix
325 self.postfix = postfix
326 self._trypending = trypending
326 self._trypending = trypending
327 self.opener = opener
327 self.opener = opener
328 if persistentnodemap:
328 if persistentnodemap:
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330
330
331 assert target[0] in ALL_KINDS
331 assert target[0] in ALL_KINDS
332 assert len(target) == 2
332 assert len(target) == 2
333 self.target = target
333 self.target = target
334 # When True, indexfile is opened with checkambig=True at writing, to
334 # When True, indexfile is opened with checkambig=True at writing, to
335 # avoid file stat ambiguity.
335 # avoid file stat ambiguity.
336 self._checkambig = checkambig
336 self._checkambig = checkambig
337 self._mmaplargeindex = mmaplargeindex
337 self._mmaplargeindex = mmaplargeindex
338 self._censorable = censorable
338 self._censorable = censorable
339 # 3-tuple of (node, rev, text) for a raw revision.
339 # 3-tuple of (node, rev, text) for a raw revision.
340 self._revisioncache = None
340 self._revisioncache = None
341 # Maps rev to chain base rev.
341 # Maps rev to chain base rev.
342 self._chainbasecache = util.lrucachedict(100)
342 self._chainbasecache = util.lrucachedict(100)
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 self._chunkcache = (0, b'')
344 self._chunkcache = (0, b'')
345 # How much data to read and cache into the raw revlog data cache.
345 # How much data to read and cache into the raw revlog data cache.
346 self._chunkcachesize = 65536
346 self._chunkcachesize = 65536
347 self._maxchainlen = None
347 self._maxchainlen = None
348 self._deltabothparents = True
348 self._deltabothparents = True
349 self.index = None
349 self.index = None
350 self._docket = None
350 self._docket = None
351 self._nodemap_docket = None
351 self._nodemap_docket = None
352 # Mapping of partial identifiers to full nodes.
352 # Mapping of partial identifiers to full nodes.
353 self._pcache = {}
353 self._pcache = {}
354 # Mapping of revision integer to full node.
354 # Mapping of revision integer to full node.
355 self._compengine = b'zlib'
355 self._compengine = b'zlib'
356 self._compengineopts = {}
356 self._compengineopts = {}
357 self._maxdeltachainspan = -1
357 self._maxdeltachainspan = -1
358 self._withsparseread = False
358 self._withsparseread = False
359 self._sparserevlog = False
359 self._sparserevlog = False
360 self.hassidedata = False
360 self.hassidedata = False
361 self._srdensitythreshold = 0.50
361 self._srdensitythreshold = 0.50
362 self._srmingapsize = 262144
362 self._srmingapsize = 262144
363
363
364 # Make copy of flag processors so each revlog instance can support
364 # Make copy of flag processors so each revlog instance can support
365 # custom flags.
365 # custom flags.
366 self._flagprocessors = dict(flagutil.flagprocessors)
366 self._flagprocessors = dict(flagutil.flagprocessors)
367
367
368 # 3-tuple of file handles being used for active writing.
368 # 3-tuple of file handles being used for active writing.
369 self._writinghandles = None
369 self._writinghandles = None
370 # prevent nesting of addgroup
370 # prevent nesting of addgroup
371 self._adding_group = None
371 self._adding_group = None
372
372
373 self._loadindex()
373 self._loadindex()
374
374
375 self._concurrencychecker = concurrencychecker
375 self._concurrencychecker = concurrencychecker
376
376
377 def _init_opts(self):
377 def _init_opts(self):
378 """process options (from above/config) to setup associated default revlog mode
378 """process options (from above/config) to setup associated default revlog mode
379
379
380 These values might be affected when actually reading on disk information.
380 These values might be affected when actually reading on disk information.
381
381
382 The relevant values are returned for use in _loadindex().
382 The relevant values are returned for use in _loadindex().
383
383
384 * newversionflags:
384 * newversionflags:
385 version header to use if we need to create a new revlog
385 version header to use if we need to create a new revlog
386
386
387 * mmapindexthreshold:
387 * mmapindexthreshold:
388 minimal index size for start to use mmap
388 minimal index size for start to use mmap
389
389
390 * force_nodemap:
390 * force_nodemap:
391 force the usage of a "development" version of the nodemap code
391 force the usage of a "development" version of the nodemap code
392 """
392 """
393 mmapindexthreshold = None
393 mmapindexthreshold = None
394 opts = self.opener.options
394 opts = self.opener.options
395
395
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
397 new_header = CHANGELOGV2
397 new_header = CHANGELOGV2
398 elif b'revlogv2' in opts:
398 elif b'revlogv2' in opts:
399 new_header = REVLOGV2
399 new_header = REVLOGV2
400 elif b'revlogv1' in opts:
400 elif b'revlogv1' in opts:
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
402 if b'generaldelta' in opts:
402 if b'generaldelta' in opts:
403 new_header |= FLAG_GENERALDELTA
403 new_header |= FLAG_GENERALDELTA
404 elif b'revlogv0' in self.opener.options:
404 elif b'revlogv0' in self.opener.options:
405 new_header = REVLOGV0
405 new_header = REVLOGV0
406 else:
406 else:
407 new_header = REVLOG_DEFAULT_VERSION
407 new_header = REVLOG_DEFAULT_VERSION
408
408
409 if b'chunkcachesize' in opts:
409 if b'chunkcachesize' in opts:
410 self._chunkcachesize = opts[b'chunkcachesize']
410 self._chunkcachesize = opts[b'chunkcachesize']
411 if b'maxchainlen' in opts:
411 if b'maxchainlen' in opts:
412 self._maxchainlen = opts[b'maxchainlen']
412 self._maxchainlen = opts[b'maxchainlen']
413 if b'deltabothparents' in opts:
413 if b'deltabothparents' in opts:
414 self._deltabothparents = opts[b'deltabothparents']
414 self._deltabothparents = opts[b'deltabothparents']
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
416 self._lazydeltabase = False
416 self._lazydeltabase = False
417 if self._lazydelta:
417 if self._lazydelta:
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
419 if b'compengine' in opts:
419 if b'compengine' in opts:
420 self._compengine = opts[b'compengine']
420 self._compengine = opts[b'compengine']
421 if b'zlib.level' in opts:
421 if b'zlib.level' in opts:
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
423 if b'zstd.level' in opts:
423 if b'zstd.level' in opts:
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
425 if b'maxdeltachainspan' in opts:
425 if b'maxdeltachainspan' in opts:
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
428 mmapindexthreshold = opts[b'mmapindexthreshold']
428 mmapindexthreshold = opts[b'mmapindexthreshold']
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
432 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
433 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439
439
440 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
441 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
442 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
443 ):
443 ):
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445
445
446 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
447 raise error.RevlogError(
447 raise error.RevlogError(
448 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
449 % self._chunkcachesize
449 % self._chunkcachesize
450 )
450 )
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 raise error.RevlogError(
452 raise error.RevlogError(
453 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
454 % self._chunkcachesize
454 % self._chunkcachesize
455 )
455 )
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 return new_header, mmapindexthreshold, force_nodemap
457 return new_header, mmapindexthreshold, force_nodemap
458
458
459 def _get_data(self, filepath, mmap_threshold, size=None):
459 def _get_data(self, filepath, mmap_threshold, size=None):
460 """return a file content with or without mmap
460 """return a file content with or without mmap
461
461
462 If the file is missing return the empty string"""
462 If the file is missing return the empty string"""
463 try:
463 try:
464 with self.opener(filepath) as fp:
464 with self.opener(filepath) as fp:
465 if mmap_threshold is not None:
465 if mmap_threshold is not None:
466 file_size = self.opener.fstat(fp).st_size
466 file_size = self.opener.fstat(fp).st_size
467 if file_size >= mmap_threshold:
467 if file_size >= mmap_threshold:
468 if size is not None:
468 if size is not None:
469 # avoid potentiel mmap crash
469 # avoid potentiel mmap crash
470 size = min(file_size, size)
470 size = min(file_size, size)
471 # TODO: should .close() to release resources without
471 # TODO: should .close() to release resources without
472 # relying on Python GC
472 # relying on Python GC
473 if size is None:
473 if size is None:
474 return util.buffer(util.mmapread(fp))
474 return util.buffer(util.mmapread(fp))
475 else:
475 else:
476 return util.buffer(util.mmapread(fp, size))
476 return util.buffer(util.mmapread(fp, size))
477 if size is None:
477 if size is None:
478 return fp.read()
478 return fp.read()
479 else:
479 else:
480 return fp.read(size)
480 return fp.read(size)
481 except IOError as inst:
481 except IOError as inst:
482 if inst.errno != errno.ENOENT:
482 if inst.errno != errno.ENOENT:
483 raise
483 raise
484 return b''
484 return b''
485
485
486 def _loadindex(self, docket=None):
486 def _loadindex(self, docket=None):
487
487
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
489
489
490 if self.postfix is not None:
490 if self.postfix is not None:
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
493 entry_point = b'%s.i.a' % self.radix
493 entry_point = b'%s.i.a' % self.radix
494 else:
494 else:
495 entry_point = b'%s.i' % self.radix
495 entry_point = b'%s.i' % self.radix
496
496
497 if docket is not None:
497 if docket is not None:
498 self._docket = docket
498 self._docket = docket
499 self._docket_file = entry_point
499 self._docket_file = entry_point
500 else:
500 else:
501 entry_data = b''
501 entry_data = b''
502 self._initempty = True
502 self._initempty = True
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 if len(entry_data) > 0:
504 if len(entry_data) > 0:
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 self._initempty = False
506 self._initempty = False
507 else:
507 else:
508 header = new_header
508 header = new_header
509
509
510 self._format_flags = header & ~0xFFFF
510 self._format_flags = header & ~0xFFFF
511 self._format_version = header & 0xFFFF
511 self._format_version = header & 0xFFFF
512
512
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 if supported_flags is None:
514 if supported_flags is None:
515 msg = _(b'unknown version (%d) in revlog %s')
515 msg = _(b'unknown version (%d) in revlog %s')
516 msg %= (self._format_version, self.display_id)
516 msg %= (self._format_version, self.display_id)
517 raise error.RevlogError(msg)
517 raise error.RevlogError(msg)
518 elif self._format_flags & ~supported_flags:
518 elif self._format_flags & ~supported_flags:
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 display_flag = self._format_flags >> 16
520 display_flag = self._format_flags >> 16
521 msg %= (display_flag, self._format_version, self.display_id)
521 msg %= (display_flag, self._format_version, self.display_id)
522 raise error.RevlogError(msg)
522 raise error.RevlogError(msg)
523
523
524 features = FEATURES_BY_VERSION[self._format_version]
524 features = FEATURES_BY_VERSION[self._format_version]
525 self._inline = features[b'inline'](self._format_flags)
525 self._inline = features[b'inline'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 self.hassidedata = features[b'sidedata']
527 self.hassidedata = features[b'sidedata']
528
528
529 if not features[b'docket']:
529 if not features[b'docket']:
530 self._indexfile = entry_point
530 self._indexfile = entry_point
531 index_data = entry_data
531 index_data = entry_data
532 else:
532 else:
533 self._docket_file = entry_point
533 self._docket_file = entry_point
534 if self._initempty:
534 if self._initempty:
535 self._docket = docketutil.default_docket(self, header)
535 self._docket = docketutil.default_docket(self, header)
536 else:
536 else:
537 self._docket = docketutil.parse_docket(
537 self._docket = docketutil.parse_docket(
538 self, entry_data, use_pending=self._trypending
538 self, entry_data, use_pending=self._trypending
539 )
539 )
540
540
541 if self._docket is not None:
541 if self._docket is not None:
542 self._indexfile = self._docket.index_filepath()
542 self._indexfile = self._docket.index_filepath()
543 index_data = b''
543 index_data = b''
544 index_size = self._docket.index_end
544 index_size = self._docket.index_end
545 if index_size > 0:
545 if index_size > 0:
546 index_data = self._get_data(
546 index_data = self._get_data(
547 self._indexfile, mmapindexthreshold, size=index_size
547 self._indexfile, mmapindexthreshold, size=index_size
548 )
548 )
549 if len(index_data) < index_size:
549 if len(index_data) < index_size:
550 msg = _(b'too few index data for %s: got %d, expected %d')
550 msg = _(b'too few index data for %s: got %d, expected %d')
551 msg %= (self.display_id, len(index_data), index_size)
551 msg %= (self.display_id, len(index_data), index_size)
552 raise error.RevlogError(msg)
552 raise error.RevlogError(msg)
553
553
554 self._inline = False
554 self._inline = False
555 # generaldelta implied by version 2 revlogs.
555 # generaldelta implied by version 2 revlogs.
556 self._generaldelta = True
556 self._generaldelta = True
557 # the logic for persistent nodemap will be dealt with within the
557 # the logic for persistent nodemap will be dealt with within the
558 # main docket, so disable it for now.
558 # main docket, so disable it for now.
559 self._nodemap_file = None
559 self._nodemap_file = None
560
560
561 if self._docket is not None:
561 if self._docket is not None:
562 self._datafile = self._docket.data_filepath()
562 self._datafile = self._docket.data_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
564 elif self.postfix is None:
564 elif self.postfix is None:
565 self._datafile = b'%s.d' % self.radix
565 self._datafile = b'%s.d' % self.radix
566 else:
566 else:
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
568
568
569 self.nodeconstants = sha1nodeconstants
569 self.nodeconstants = sha1nodeconstants
570 self.nullid = self.nodeconstants.nullid
570 self.nullid = self.nodeconstants.nullid
571
571
572 # sparse-revlog can't be on without general-delta (issue6056)
572 # sparse-revlog can't be on without general-delta (issue6056)
573 if not self._generaldelta:
573 if not self._generaldelta:
574 self._sparserevlog = False
574 self._sparserevlog = False
575
575
576 self._storedeltachains = True
576 self._storedeltachains = True
577
577
578 devel_nodemap = (
578 devel_nodemap = (
579 self._nodemap_file
579 self._nodemap_file
580 and force_nodemap
580 and force_nodemap
581 and parse_index_v1_nodemap is not None
581 and parse_index_v1_nodemap is not None
582 )
582 )
583
583
584 use_rust_index = False
584 use_rust_index = False
585 if rustrevlog is not None:
585 if rustrevlog is not None:
586 if self._nodemap_file is not None:
586 if self._nodemap_file is not None:
587 use_rust_index = True
587 use_rust_index = True
588 else:
588 else:
589 use_rust_index = self.opener.options.get(b'rust.index')
589 use_rust_index = self.opener.options.get(b'rust.index')
590
590
591 self._parse_index = parse_index_v1
591 self._parse_index = parse_index_v1
592 if self._format_version == REVLOGV0:
592 if self._format_version == REVLOGV0:
593 self._parse_index = revlogv0.parse_index_v0
593 self._parse_index = revlogv0.parse_index_v0
594 elif self._format_version == REVLOGV2:
594 elif self._format_version == REVLOGV2:
595 self._parse_index = parse_index_v2
595 self._parse_index = parse_index_v2
596 elif self._format_version == CHANGELOGV2:
596 elif self._format_version == CHANGELOGV2:
597 self._parse_index = parse_index_cl_v2
597 self._parse_index = parse_index_cl_v2
598 elif devel_nodemap:
598 elif devel_nodemap:
599 self._parse_index = parse_index_v1_nodemap
599 self._parse_index = parse_index_v1_nodemap
600 elif use_rust_index:
600 elif use_rust_index:
601 self._parse_index = parse_index_v1_mixed
601 self._parse_index = parse_index_v1_mixed
602 try:
602 try:
603 d = self._parse_index(index_data, self._inline)
603 d = self._parse_index(index_data, self._inline)
604 index, chunkcache = d
604 index, chunkcache = d
605 use_nodemap = (
605 use_nodemap = (
606 not self._inline
606 not self._inline
607 and self._nodemap_file is not None
607 and self._nodemap_file is not None
608 and util.safehasattr(index, 'update_nodemap_data')
608 and util.safehasattr(index, 'update_nodemap_data')
609 )
609 )
610 if use_nodemap:
610 if use_nodemap:
611 nodemap_data = nodemaputil.persisted_data(self)
611 nodemap_data = nodemaputil.persisted_data(self)
612 if nodemap_data is not None:
612 if nodemap_data is not None:
613 docket = nodemap_data[0]
613 docket = nodemap_data[0]
614 if (
614 if (
615 len(d[0]) > docket.tip_rev
615 len(d[0]) > docket.tip_rev
616 and d[0][docket.tip_rev][7] == docket.tip_node
616 and d[0][docket.tip_rev][7] == docket.tip_node
617 ):
617 ):
618 # no changelog tampering
618 # no changelog tampering
619 self._nodemap_docket = docket
619 self._nodemap_docket = docket
620 index.update_nodemap_data(*nodemap_data)
620 index.update_nodemap_data(*nodemap_data)
621 except (ValueError, IndexError):
621 except (ValueError, IndexError):
622 raise error.RevlogError(
622 raise error.RevlogError(
623 _(b"index %s is corrupted") % self.display_id
623 _(b"index %s is corrupted") % self.display_id
624 )
624 )
625 self.index = index
625 self.index = index
626 self._segmentfile = randomaccessfile.randomaccessfile(
626 self._segmentfile = randomaccessfile.randomaccessfile(
627 self.opener,
627 self.opener,
628 (self._indexfile if self._inline else self._datafile),
628 (self._indexfile if self._inline else self._datafile),
629 self._chunkcachesize,
629 self._chunkcachesize,
630 chunkcache,
630 chunkcache,
631 )
631 )
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 self.opener,
633 self.opener,
634 self._sidedatafile,
634 self._sidedatafile,
635 self._chunkcachesize,
635 self._chunkcachesize,
636 )
636 )
637 # revnum -> (chain-length, sum-delta-length)
637 # revnum -> (chain-length, sum-delta-length)
638 self._chaininfocache = util.lrucachedict(500)
638 self._chaininfocache = util.lrucachedict(500)
639 # revlog header -> revlog compressor
639 # revlog header -> revlog compressor
640 self._decompressors = {}
640 self._decompressors = {}
641
641
642 @util.propertycache
642 @util.propertycache
643 def revlog_kind(self):
643 def revlog_kind(self):
644 return self.target[0]
644 return self.target[0]
645
645
646 @util.propertycache
646 @util.propertycache
647 def display_id(self):
647 def display_id(self):
648 """The public facing "ID" of the revlog that we use in message"""
648 """The public facing "ID" of the revlog that we use in message"""
649 # Maybe we should build a user facing representation of
649 # Maybe we should build a user facing representation of
650 # revlog.target instead of using `self.radix`
650 # revlog.target instead of using `self.radix`
651 return self.radix
651 return self.radix
652
652
653 def _get_decompressor(self, t):
653 def _get_decompressor(self, t):
654 try:
654 try:
655 compressor = self._decompressors[t]
655 compressor = self._decompressors[t]
656 except KeyError:
656 except KeyError:
657 try:
657 try:
658 engine = util.compengines.forrevlogheader(t)
658 engine = util.compengines.forrevlogheader(t)
659 compressor = engine.revlogcompressor(self._compengineopts)
659 compressor = engine.revlogcompressor(self._compengineopts)
660 self._decompressors[t] = compressor
660 self._decompressors[t] = compressor
661 except KeyError:
661 except KeyError:
662 raise error.RevlogError(
662 raise error.RevlogError(
663 _(b'unknown compression type %s') % binascii.hexlify(t)
663 _(b'unknown compression type %s') % binascii.hexlify(t)
664 )
664 )
665 return compressor
665 return compressor
666
666
667 @util.propertycache
667 @util.propertycache
668 def _compressor(self):
668 def _compressor(self):
669 engine = util.compengines[self._compengine]
669 engine = util.compengines[self._compengine]
670 return engine.revlogcompressor(self._compengineopts)
670 return engine.revlogcompressor(self._compengineopts)
671
671
672 @util.propertycache
672 @util.propertycache
673 def _decompressor(self):
673 def _decompressor(self):
674 """the default decompressor"""
674 """the default decompressor"""
675 if self._docket is None:
675 if self._docket is None:
676 return None
676 return None
677 t = self._docket.default_compression_header
677 t = self._docket.default_compression_header
678 c = self._get_decompressor(t)
678 c = self._get_decompressor(t)
679 return c.decompress
679 return c.decompress
680
680
681 def _indexfp(self):
681 def _indexfp(self):
682 """file object for the revlog's index file"""
682 """file object for the revlog's index file"""
683 return self.opener(self._indexfile, mode=b"r")
683 return self.opener(self._indexfile, mode=b"r")
684
684
685 def __index_write_fp(self):
685 def __index_write_fp(self):
686 # You should not use this directly and use `_writing` instead
686 # You should not use this directly and use `_writing` instead
687 try:
687 try:
688 f = self.opener(
688 f = self.opener(
689 self._indexfile, mode=b"r+", checkambig=self._checkambig
689 self._indexfile, mode=b"r+", checkambig=self._checkambig
690 )
690 )
691 if self._docket is None:
691 if self._docket is None:
692 f.seek(0, os.SEEK_END)
692 f.seek(0, os.SEEK_END)
693 else:
693 else:
694 f.seek(self._docket.index_end, os.SEEK_SET)
694 f.seek(self._docket.index_end, os.SEEK_SET)
695 return f
695 return f
696 except IOError as inst:
696 except IOError as inst:
697 if inst.errno != errno.ENOENT:
697 if inst.errno != errno.ENOENT:
698 raise
698 raise
699 return self.opener(
699 return self.opener(
700 self._indexfile, mode=b"w+", checkambig=self._checkambig
700 self._indexfile, mode=b"w+", checkambig=self._checkambig
701 )
701 )
702
702
703 def __index_new_fp(self):
703 def __index_new_fp(self):
704 # You should not use this unless you are upgrading from inline revlog
704 # You should not use this unless you are upgrading from inline revlog
705 return self.opener(
705 return self.opener(
706 self._indexfile,
706 self._indexfile,
707 mode=b"w",
707 mode=b"w",
708 checkambig=self._checkambig,
708 checkambig=self._checkambig,
709 atomictemp=True,
709 atomictemp=True,
710 )
710 )
711
711
712 def _datafp(self, mode=b'r'):
712 def _datafp(self, mode=b'r'):
713 """file object for the revlog's data file"""
713 """file object for the revlog's data file"""
714 return self.opener(self._datafile, mode=mode)
714 return self.opener(self._datafile, mode=mode)
715
715
716 @contextlib.contextmanager
716 @contextlib.contextmanager
717 def _sidedatareadfp(self):
717 def _sidedatareadfp(self):
718 """file object suitable to read sidedata"""
718 """file object suitable to read sidedata"""
719 if self._writinghandles:
719 if self._writinghandles:
720 yield self._writinghandles[2]
720 yield self._writinghandles[2]
721 else:
721 else:
722 with self.opener(self._sidedatafile) as fp:
722 with self.opener(self._sidedatafile) as fp:
723 yield fp
723 yield fp
724
724
725 def tiprev(self):
725 def tiprev(self):
726 return len(self.index) - 1
726 return len(self.index) - 1
727
727
728 def tip(self):
728 def tip(self):
729 return self.node(self.tiprev())
729 return self.node(self.tiprev())
730
730
731 def __contains__(self, rev):
731 def __contains__(self, rev):
732 return 0 <= rev < len(self)
732 return 0 <= rev < len(self)
733
733
734 def __len__(self):
734 def __len__(self):
735 return len(self.index)
735 return len(self.index)
736
736
737 def __iter__(self):
737 def __iter__(self):
738 return iter(pycompat.xrange(len(self)))
738 return iter(pycompat.xrange(len(self)))
739
739
740 def revs(self, start=0, stop=None):
740 def revs(self, start=0, stop=None):
741 """iterate over all rev in this revlog (from start to stop)"""
741 """iterate over all rev in this revlog (from start to stop)"""
742 return storageutil.iterrevs(len(self), start=start, stop=stop)
742 return storageutil.iterrevs(len(self), start=start, stop=stop)
743
743
744 @property
744 @property
745 def nodemap(self):
745 def nodemap(self):
746 msg = (
746 msg = (
747 b"revlog.nodemap is deprecated, "
747 b"revlog.nodemap is deprecated, "
748 b"use revlog.index.[has_node|rev|get_rev]"
748 b"use revlog.index.[has_node|rev|get_rev]"
749 )
749 )
750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
751 return self.index.nodemap
751 return self.index.nodemap
752
752
753 @property
753 @property
754 def _nodecache(self):
754 def _nodecache(self):
755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
756 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
756 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
757 return self.index.nodemap
757 return self.index.nodemap
758
758
759 def hasnode(self, node):
759 def hasnode(self, node):
760 try:
760 try:
761 self.rev(node)
761 self.rev(node)
762 return True
762 return True
763 except KeyError:
763 except KeyError:
764 return False
764 return False
765
765
766 def candelta(self, baserev, rev):
766 def candelta(self, baserev, rev):
767 """whether two revisions (baserev, rev) can be delta-ed or not"""
767 """whether two revisions (baserev, rev) can be delta-ed or not"""
768 # Disable delta if either rev requires a content-changing flag
768 # Disable delta if either rev requires a content-changing flag
769 # processor (ex. LFS). This is because such flag processor can alter
769 # processor (ex. LFS). This is because such flag processor can alter
770 # the rawtext content that the delta will be based on, and two clients
770 # the rawtext content that the delta will be based on, and two clients
771 # could have a same revlog node with different flags (i.e. different
771 # could have a same revlog node with different flags (i.e. different
772 # rawtext contents) and the delta could be incompatible.
772 # rawtext contents) and the delta could be incompatible.
773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
775 ):
775 ):
776 return False
776 return False
777 return True
777 return True
778
778
779 def update_caches(self, transaction):
779 def update_caches(self, transaction):
780 if self._nodemap_file is not None:
780 if self._nodemap_file is not None:
781 if transaction is None:
781 if transaction is None:
782 nodemaputil.update_persistent_nodemap(self)
782 nodemaputil.update_persistent_nodemap(self)
783 else:
783 else:
784 nodemaputil.setup_persistent_nodemap(transaction, self)
784 nodemaputil.setup_persistent_nodemap(transaction, self)
785
785
786 def clearcaches(self):
786 def clearcaches(self):
787 self._revisioncache = None
787 self._revisioncache = None
788 self._chainbasecache.clear()
788 self._chainbasecache.clear()
789 self._segmentfile.clear_cache()
789 self._segmentfile.clear_cache()
790 self._segmentfile_sidedata.clear_cache()
790 self._segmentfile_sidedata.clear_cache()
791 self._pcache = {}
791 self._pcache = {}
792 self._nodemap_docket = None
792 self._nodemap_docket = None
793 self.index.clearcaches()
793 self.index.clearcaches()
794 # The python code is the one responsible for validating the docket, we
794 # The python code is the one responsible for validating the docket, we
795 # end up having to refresh it here.
795 # end up having to refresh it here.
796 use_nodemap = (
796 use_nodemap = (
797 not self._inline
797 not self._inline
798 and self._nodemap_file is not None
798 and self._nodemap_file is not None
799 and util.safehasattr(self.index, 'update_nodemap_data')
799 and util.safehasattr(self.index, 'update_nodemap_data')
800 )
800 )
801 if use_nodemap:
801 if use_nodemap:
802 nodemap_data = nodemaputil.persisted_data(self)
802 nodemap_data = nodemaputil.persisted_data(self)
803 if nodemap_data is not None:
803 if nodemap_data is not None:
804 self._nodemap_docket = nodemap_data[0]
804 self._nodemap_docket = nodemap_data[0]
805 self.index.update_nodemap_data(*nodemap_data)
805 self.index.update_nodemap_data(*nodemap_data)
806
806
807 def rev(self, node):
807 def rev(self, node):
808 try:
808 try:
809 return self.index.rev(node)
809 return self.index.rev(node)
810 except TypeError:
810 except TypeError:
811 raise
811 raise
812 except error.RevlogError:
812 except error.RevlogError:
813 # parsers.c radix tree lookup failed
813 # parsers.c radix tree lookup failed
814 if (
814 if (
815 node == self.nodeconstants.wdirid
815 node == self.nodeconstants.wdirid
816 or node in self.nodeconstants.wdirfilenodeids
816 or node in self.nodeconstants.wdirfilenodeids
817 ):
817 ):
818 raise error.WdirUnsupported
818 raise error.WdirUnsupported
819 raise error.LookupError(node, self.display_id, _(b'no node'))
819 raise error.LookupError(node, self.display_id, _(b'no node'))
820
820
821 # Accessors for index entries.
821 # Accessors for index entries.
822
822
823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
824 # are flags.
824 # are flags.
825 def start(self, rev):
825 def start(self, rev):
826 return int(self.index[rev][0] >> 16)
826 return int(self.index[rev][0] >> 16)
827
827
828 def sidedata_cut_off(self, rev):
828 def sidedata_cut_off(self, rev):
829 sd_cut_off = self.index[rev][8]
829 sd_cut_off = self.index[rev][8]
830 if sd_cut_off != 0:
830 if sd_cut_off != 0:
831 return sd_cut_off
831 return sd_cut_off
832 # This is some annoying dance, because entries without sidedata
832 # This is some annoying dance, because entries without sidedata
833 # currently use 0 as their ofsset. (instead of previous-offset +
833 # currently use 0 as their ofsset. (instead of previous-offset +
834 # previous-size)
834 # previous-size)
835 #
835 #
836 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
836 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
837 # In the meantime, we need this.
837 # In the meantime, we need this.
838 while 0 <= rev:
838 while 0 <= rev:
839 e = self.index[rev]
839 e = self.index[rev]
840 if e[9] != 0:
840 if e[9] != 0:
841 return e[8] + e[9]
841 return e[8] + e[9]
842 rev -= 1
842 rev -= 1
843 return 0
843 return 0
844
844
845 def flags(self, rev):
845 def flags(self, rev):
846 return self.index[rev][0] & 0xFFFF
846 return self.index[rev][0] & 0xFFFF
847
847
848 def length(self, rev):
848 def length(self, rev):
849 return self.index[rev][1]
849 return self.index[rev][1]
850
850
851 def sidedata_length(self, rev):
851 def sidedata_length(self, rev):
852 if not self.hassidedata:
852 if not self.hassidedata:
853 return 0
853 return 0
854 return self.index[rev][9]
854 return self.index[rev][9]
855
855
856 def rawsize(self, rev):
856 def rawsize(self, rev):
857 """return the length of the uncompressed text for a given revision"""
857 """return the length of the uncompressed text for a given revision"""
858 l = self.index[rev][2]
858 l = self.index[rev][2]
859 if l >= 0:
859 if l >= 0:
860 return l
860 return l
861
861
862 t = self.rawdata(rev)
862 t = self.rawdata(rev)
863 return len(t)
863 return len(t)
864
864
865 def size(self, rev):
865 def size(self, rev):
866 """length of non-raw text (processed by a "read" flag processor)"""
866 """length of non-raw text (processed by a "read" flag processor)"""
867 # fast path: if no "read" flag processor could change the content,
867 # fast path: if no "read" flag processor could change the content,
868 # size is rawsize. note: ELLIPSIS is known to not change the content.
868 # size is rawsize. note: ELLIPSIS is known to not change the content.
869 flags = self.flags(rev)
869 flags = self.flags(rev)
870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
871 return self.rawsize(rev)
871 return self.rawsize(rev)
872
872
873 return len(self.revision(rev, raw=False))
873 return len(self.revision(rev, raw=False))
874
874
875 def chainbase(self, rev):
875 def chainbase(self, rev):
876 base = self._chainbasecache.get(rev)
876 base = self._chainbasecache.get(rev)
877 if base is not None:
877 if base is not None:
878 return base
878 return base
879
879
880 index = self.index
880 index = self.index
881 iterrev = rev
881 iterrev = rev
882 base = index[iterrev][3]
882 base = index[iterrev][3]
883 while base != iterrev:
883 while base != iterrev:
884 iterrev = base
884 iterrev = base
885 base = index[iterrev][3]
885 base = index[iterrev][3]
886
886
887 self._chainbasecache[rev] = base
887 self._chainbasecache[rev] = base
888 return base
888 return base
889
889
890 def linkrev(self, rev):
890 def linkrev(self, rev):
891 return self.index[rev][4]
891 return self.index[rev][4]
892
892
893 def parentrevs(self, rev):
893 def parentrevs(self, rev):
894 try:
894 try:
895 entry = self.index[rev]
895 entry = self.index[rev]
896 except IndexError:
896 except IndexError:
897 if rev == wdirrev:
897 if rev == wdirrev:
898 raise error.WdirUnsupported
898 raise error.WdirUnsupported
899 raise
899 raise
900
900
901 return entry[5], entry[6]
901 return entry[5], entry[6]
902
902
903 # fast parentrevs(rev) where rev isn't filtered
903 # fast parentrevs(rev) where rev isn't filtered
904 _uncheckedparentrevs = parentrevs
904 _uncheckedparentrevs = parentrevs
905
905
906 def node(self, rev):
906 def node(self, rev):
907 try:
907 try:
908 return self.index[rev][7]
908 return self.index[rev][7]
909 except IndexError:
909 except IndexError:
910 if rev == wdirrev:
910 if rev == wdirrev:
911 raise error.WdirUnsupported
911 raise error.WdirUnsupported
912 raise
912 raise
913
913
914 # Derived from index values.
914 # Derived from index values.
915
915
916 def end(self, rev):
916 def end(self, rev):
917 return self.start(rev) + self.length(rev)
917 return self.start(rev) + self.length(rev)
918
918
919 def parents(self, node):
919 def parents(self, node):
920 i = self.index
920 i = self.index
921 d = i[self.rev(node)]
921 d = i[self.rev(node)]
922 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
922 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
923
923
924 def chainlen(self, rev):
924 def chainlen(self, rev):
925 return self._chaininfo(rev)[0]
925 return self._chaininfo(rev)[0]
926
926
927 def _chaininfo(self, rev):
927 def _chaininfo(self, rev):
928 chaininfocache = self._chaininfocache
928 chaininfocache = self._chaininfocache
929 if rev in chaininfocache:
929 if rev in chaininfocache:
930 return chaininfocache[rev]
930 return chaininfocache[rev]
931 index = self.index
931 index = self.index
932 generaldelta = self._generaldelta
932 generaldelta = self._generaldelta
933 iterrev = rev
933 iterrev = rev
934 e = index[iterrev]
934 e = index[iterrev]
935 clen = 0
935 clen = 0
936 compresseddeltalen = 0
936 compresseddeltalen = 0
937 while iterrev != e[3]:
937 while iterrev != e[3]:
938 clen += 1
938 clen += 1
939 compresseddeltalen += e[1]
939 compresseddeltalen += e[1]
940 if generaldelta:
940 if generaldelta:
941 iterrev = e[3]
941 iterrev = e[3]
942 else:
942 else:
943 iterrev -= 1
943 iterrev -= 1
944 if iterrev in chaininfocache:
944 if iterrev in chaininfocache:
945 t = chaininfocache[iterrev]
945 t = chaininfocache[iterrev]
946 clen += t[0]
946 clen += t[0]
947 compresseddeltalen += t[1]
947 compresseddeltalen += t[1]
948 break
948 break
949 e = index[iterrev]
949 e = index[iterrev]
950 else:
950 else:
951 # Add text length of base since decompressing that also takes
951 # Add text length of base since decompressing that also takes
952 # work. For cache hits the length is already included.
952 # work. For cache hits the length is already included.
953 compresseddeltalen += e[1]
953 compresseddeltalen += e[1]
954 r = (clen, compresseddeltalen)
954 r = (clen, compresseddeltalen)
955 chaininfocache[rev] = r
955 chaininfocache[rev] = r
956 return r
956 return r
957
957
958 def _deltachain(self, rev, stoprev=None):
958 def _deltachain(self, rev, stoprev=None):
959 """Obtain the delta chain for a revision.
959 """Obtain the delta chain for a revision.
960
960
961 ``stoprev`` specifies a revision to stop at. If not specified, we
961 ``stoprev`` specifies a revision to stop at. If not specified, we
962 stop at the base of the chain.
962 stop at the base of the chain.
963
963
964 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
964 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
965 revs in ascending order and ``stopped`` is a bool indicating whether
965 revs in ascending order and ``stopped`` is a bool indicating whether
966 ``stoprev`` was hit.
966 ``stoprev`` was hit.
967 """
967 """
968 # Try C implementation.
968 # Try C implementation.
969 try:
969 try:
970 return self.index.deltachain(rev, stoprev, self._generaldelta)
970 return self.index.deltachain(rev, stoprev, self._generaldelta)
971 except AttributeError:
971 except AttributeError:
972 pass
972 pass
973
973
974 chain = []
974 chain = []
975
975
976 # Alias to prevent attribute lookup in tight loop.
976 # Alias to prevent attribute lookup in tight loop.
977 index = self.index
977 index = self.index
978 generaldelta = self._generaldelta
978 generaldelta = self._generaldelta
979
979
980 iterrev = rev
980 iterrev = rev
981 e = index[iterrev]
981 e = index[iterrev]
982 while iterrev != e[3] and iterrev != stoprev:
982 while iterrev != e[3] and iterrev != stoprev:
983 chain.append(iterrev)
983 chain.append(iterrev)
984 if generaldelta:
984 if generaldelta:
985 iterrev = e[3]
985 iterrev = e[3]
986 else:
986 else:
987 iterrev -= 1
987 iterrev -= 1
988 e = index[iterrev]
988 e = index[iterrev]
989
989
990 if iterrev == stoprev:
990 if iterrev == stoprev:
991 stopped = True
991 stopped = True
992 else:
992 else:
993 chain.append(iterrev)
993 chain.append(iterrev)
994 stopped = False
994 stopped = False
995
995
996 chain.reverse()
996 chain.reverse()
997 return chain, stopped
997 return chain, stopped
998
998
999 def ancestors(self, revs, stoprev=0, inclusive=False):
999 def ancestors(self, revs, stoprev=0, inclusive=False):
1000 """Generate the ancestors of 'revs' in reverse revision order.
1000 """Generate the ancestors of 'revs' in reverse revision order.
1001 Does not generate revs lower than stoprev.
1001 Does not generate revs lower than stoprev.
1002
1002
1003 See the documentation for ancestor.lazyancestors for more details."""
1003 See the documentation for ancestor.lazyancestors for more details."""
1004
1004
1005 # first, make sure start revisions aren't filtered
1005 # first, make sure start revisions aren't filtered
1006 revs = list(revs)
1006 revs = list(revs)
1007 checkrev = self.node
1007 checkrev = self.node
1008 for r in revs:
1008 for r in revs:
1009 checkrev(r)
1009 checkrev(r)
1010 # and we're sure ancestors aren't filtered as well
1010 # and we're sure ancestors aren't filtered as well
1011
1011
1012 if rustancestor is not None and self.index.rust_ext_compat:
1012 if rustancestor is not None and self.index.rust_ext_compat:
1013 lazyancestors = rustancestor.LazyAncestors
1013 lazyancestors = rustancestor.LazyAncestors
1014 arg = self.index
1014 arg = self.index
1015 else:
1015 else:
1016 lazyancestors = ancestor.lazyancestors
1016 lazyancestors = ancestor.lazyancestors
1017 arg = self._uncheckedparentrevs
1017 arg = self._uncheckedparentrevs
1018 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1018 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1019
1019
1020 def descendants(self, revs):
1020 def descendants(self, revs):
1021 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1021 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1022
1022
1023 def findcommonmissing(self, common=None, heads=None):
1023 def findcommonmissing(self, common=None, heads=None):
1024 """Return a tuple of the ancestors of common and the ancestors of heads
1024 """Return a tuple of the ancestors of common and the ancestors of heads
1025 that are not ancestors of common. In revset terminology, we return the
1025 that are not ancestors of common. In revset terminology, we return the
1026 tuple:
1026 tuple:
1027
1027
1028 ::common, (::heads) - (::common)
1028 ::common, (::heads) - (::common)
1029
1029
1030 The list is sorted by revision number, meaning it is
1030 The list is sorted by revision number, meaning it is
1031 topologically sorted.
1031 topologically sorted.
1032
1032
1033 'heads' and 'common' are both lists of node IDs. If heads is
1033 'heads' and 'common' are both lists of node IDs. If heads is
1034 not supplied, uses all of the revlog's heads. If common is not
1034 not supplied, uses all of the revlog's heads. If common is not
1035 supplied, uses nullid."""
1035 supplied, uses nullid."""
1036 if common is None:
1036 if common is None:
1037 common = [self.nullid]
1037 common = [self.nullid]
1038 if heads is None:
1038 if heads is None:
1039 heads = self.heads()
1039 heads = self.heads()
1040
1040
1041 common = [self.rev(n) for n in common]
1041 common = [self.rev(n) for n in common]
1042 heads = [self.rev(n) for n in heads]
1042 heads = [self.rev(n) for n in heads]
1043
1043
1044 # we want the ancestors, but inclusive
1044 # we want the ancestors, but inclusive
1045 class lazyset(object):
1045 class lazyset(object):
1046 def __init__(self, lazyvalues):
1046 def __init__(self, lazyvalues):
1047 self.addedvalues = set()
1047 self.addedvalues = set()
1048 self.lazyvalues = lazyvalues
1048 self.lazyvalues = lazyvalues
1049
1049
1050 def __contains__(self, value):
1050 def __contains__(self, value):
1051 return value in self.addedvalues or value in self.lazyvalues
1051 return value in self.addedvalues or value in self.lazyvalues
1052
1052
1053 def __iter__(self):
1053 def __iter__(self):
1054 added = self.addedvalues
1054 added = self.addedvalues
1055 for r in added:
1055 for r in added:
1056 yield r
1056 yield r
1057 for r in self.lazyvalues:
1057 for r in self.lazyvalues:
1058 if not r in added:
1058 if not r in added:
1059 yield r
1059 yield r
1060
1060
1061 def add(self, value):
1061 def add(self, value):
1062 self.addedvalues.add(value)
1062 self.addedvalues.add(value)
1063
1063
1064 def update(self, values):
1064 def update(self, values):
1065 self.addedvalues.update(values)
1065 self.addedvalues.update(values)
1066
1066
1067 has = lazyset(self.ancestors(common))
1067 has = lazyset(self.ancestors(common))
1068 has.add(nullrev)
1068 has.add(nullrev)
1069 has.update(common)
1069 has.update(common)
1070
1070
1071 # take all ancestors from heads that aren't in has
1071 # take all ancestors from heads that aren't in has
1072 missing = set()
1072 missing = set()
1073 visit = collections.deque(r for r in heads if r not in has)
1073 visit = collections.deque(r for r in heads if r not in has)
1074 while visit:
1074 while visit:
1075 r = visit.popleft()
1075 r = visit.popleft()
1076 if r in missing:
1076 if r in missing:
1077 continue
1077 continue
1078 else:
1078 else:
1079 missing.add(r)
1079 missing.add(r)
1080 for p in self.parentrevs(r):
1080 for p in self.parentrevs(r):
1081 if p not in has:
1081 if p not in has:
1082 visit.append(p)
1082 visit.append(p)
1083 missing = list(missing)
1083 missing = list(missing)
1084 missing.sort()
1084 missing.sort()
1085 return has, [self.node(miss) for miss in missing]
1085 return has, [self.node(miss) for miss in missing]
1086
1086
1087 def incrementalmissingrevs(self, common=None):
1087 def incrementalmissingrevs(self, common=None):
1088 """Return an object that can be used to incrementally compute the
1088 """Return an object that can be used to incrementally compute the
1089 revision numbers of the ancestors of arbitrary sets that are not
1089 revision numbers of the ancestors of arbitrary sets that are not
1090 ancestors of common. This is an ancestor.incrementalmissingancestors
1090 ancestors of common. This is an ancestor.incrementalmissingancestors
1091 object.
1091 object.
1092
1092
1093 'common' is a list of revision numbers. If common is not supplied, uses
1093 'common' is a list of revision numbers. If common is not supplied, uses
1094 nullrev.
1094 nullrev.
1095 """
1095 """
1096 if common is None:
1096 if common is None:
1097 common = [nullrev]
1097 common = [nullrev]
1098
1098
1099 if rustancestor is not None and self.index.rust_ext_compat:
1099 if rustancestor is not None and self.index.rust_ext_compat:
1100 return rustancestor.MissingAncestors(self.index, common)
1100 return rustancestor.MissingAncestors(self.index, common)
1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1102
1102
1103 def findmissingrevs(self, common=None, heads=None):
1103 def findmissingrevs(self, common=None, heads=None):
1104 """Return the revision numbers of the ancestors of heads that
1104 """Return the revision numbers of the ancestors of heads that
1105 are not ancestors of common.
1105 are not ancestors of common.
1106
1106
1107 More specifically, return a list of revision numbers corresponding to
1107 More specifically, return a list of revision numbers corresponding to
1108 nodes N such that every N satisfies the following constraints:
1108 nodes N such that every N satisfies the following constraints:
1109
1109
1110 1. N is an ancestor of some node in 'heads'
1110 1. N is an ancestor of some node in 'heads'
1111 2. N is not an ancestor of any node in 'common'
1111 2. N is not an ancestor of any node in 'common'
1112
1112
1113 The list is sorted by revision number, meaning it is
1113 The list is sorted by revision number, meaning it is
1114 topologically sorted.
1114 topologically sorted.
1115
1115
1116 'heads' and 'common' are both lists of revision numbers. If heads is
1116 'heads' and 'common' are both lists of revision numbers. If heads is
1117 not supplied, uses all of the revlog's heads. If common is not
1117 not supplied, uses all of the revlog's heads. If common is not
1118 supplied, uses nullid."""
1118 supplied, uses nullid."""
1119 if common is None:
1119 if common is None:
1120 common = [nullrev]
1120 common = [nullrev]
1121 if heads is None:
1121 if heads is None:
1122 heads = self.headrevs()
1122 heads = self.headrevs()
1123
1123
1124 inc = self.incrementalmissingrevs(common=common)
1124 inc = self.incrementalmissingrevs(common=common)
1125 return inc.missingancestors(heads)
1125 return inc.missingancestors(heads)
1126
1126
1127 def findmissing(self, common=None, heads=None):
1127 def findmissing(self, common=None, heads=None):
1128 """Return the ancestors of heads that are not ancestors of common.
1128 """Return the ancestors of heads that are not ancestors of common.
1129
1129
1130 More specifically, return a list of nodes N such that every N
1130 More specifically, return a list of nodes N such that every N
1131 satisfies the following constraints:
1131 satisfies the following constraints:
1132
1132
1133 1. N is an ancestor of some node in 'heads'
1133 1. N is an ancestor of some node in 'heads'
1134 2. N is not an ancestor of any node in 'common'
1134 2. N is not an ancestor of any node in 'common'
1135
1135
1136 The list is sorted by revision number, meaning it is
1136 The list is sorted by revision number, meaning it is
1137 topologically sorted.
1137 topologically sorted.
1138
1138
1139 'heads' and 'common' are both lists of node IDs. If heads is
1139 'heads' and 'common' are both lists of node IDs. If heads is
1140 not supplied, uses all of the revlog's heads. If common is not
1140 not supplied, uses all of the revlog's heads. If common is not
1141 supplied, uses nullid."""
1141 supplied, uses nullid."""
1142 if common is None:
1142 if common is None:
1143 common = [self.nullid]
1143 common = [self.nullid]
1144 if heads is None:
1144 if heads is None:
1145 heads = self.heads()
1145 heads = self.heads()
1146
1146
1147 common = [self.rev(n) for n in common]
1147 common = [self.rev(n) for n in common]
1148 heads = [self.rev(n) for n in heads]
1148 heads = [self.rev(n) for n in heads]
1149
1149
1150 inc = self.incrementalmissingrevs(common=common)
1150 inc = self.incrementalmissingrevs(common=common)
1151 return [self.node(r) for r in inc.missingancestors(heads)]
1151 return [self.node(r) for r in inc.missingancestors(heads)]
1152
1152
1153 def nodesbetween(self, roots=None, heads=None):
1153 def nodesbetween(self, roots=None, heads=None):
1154 """Return a topological path from 'roots' to 'heads'.
1154 """Return a topological path from 'roots' to 'heads'.
1155
1155
1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1157 topologically sorted list of all nodes N that satisfy both of
1157 topologically sorted list of all nodes N that satisfy both of
1158 these constraints:
1158 these constraints:
1159
1159
1160 1. N is a descendant of some node in 'roots'
1160 1. N is a descendant of some node in 'roots'
1161 2. N is an ancestor of some node in 'heads'
1161 2. N is an ancestor of some node in 'heads'
1162
1162
1163 Every node is considered to be both a descendant and an ancestor
1163 Every node is considered to be both a descendant and an ancestor
1164 of itself, so every reachable node in 'roots' and 'heads' will be
1164 of itself, so every reachable node in 'roots' and 'heads' will be
1165 included in 'nodes'.
1165 included in 'nodes'.
1166
1166
1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1170
1170
1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1172 unspecified, uses nullid as the only root. If 'heads' is
1172 unspecified, uses nullid as the only root. If 'heads' is
1173 unspecified, uses list of all of the revlog's heads."""
1173 unspecified, uses list of all of the revlog's heads."""
1174 nonodes = ([], [], [])
1174 nonodes = ([], [], [])
1175 if roots is not None:
1175 if roots is not None:
1176 roots = list(roots)
1176 roots = list(roots)
1177 if not roots:
1177 if not roots:
1178 return nonodes
1178 return nonodes
1179 lowestrev = min([self.rev(n) for n in roots])
1179 lowestrev = min([self.rev(n) for n in roots])
1180 else:
1180 else:
1181 roots = [self.nullid] # Everybody's a descendant of nullid
1181 roots = [self.nullid] # Everybody's a descendant of nullid
1182 lowestrev = nullrev
1182 lowestrev = nullrev
1183 if (lowestrev == nullrev) and (heads is None):
1183 if (lowestrev == nullrev) and (heads is None):
1184 # We want _all_ the nodes!
1184 # We want _all_ the nodes!
1185 return (
1185 return (
1186 [self.node(r) for r in self],
1186 [self.node(r) for r in self],
1187 [self.nullid],
1187 [self.nullid],
1188 list(self.heads()),
1188 list(self.heads()),
1189 )
1189 )
1190 if heads is None:
1190 if heads is None:
1191 # All nodes are ancestors, so the latest ancestor is the last
1191 # All nodes are ancestors, so the latest ancestor is the last
1192 # node.
1192 # node.
1193 highestrev = len(self) - 1
1193 highestrev = len(self) - 1
1194 # Set ancestors to None to signal that every node is an ancestor.
1194 # Set ancestors to None to signal that every node is an ancestor.
1195 ancestors = None
1195 ancestors = None
1196 # Set heads to an empty dictionary for later discovery of heads
1196 # Set heads to an empty dictionary for later discovery of heads
1197 heads = {}
1197 heads = {}
1198 else:
1198 else:
1199 heads = list(heads)
1199 heads = list(heads)
1200 if not heads:
1200 if not heads:
1201 return nonodes
1201 return nonodes
1202 ancestors = set()
1202 ancestors = set()
1203 # Turn heads into a dictionary so we can remove 'fake' heads.
1203 # Turn heads into a dictionary so we can remove 'fake' heads.
1204 # Also, later we will be using it to filter out the heads we can't
1204 # Also, later we will be using it to filter out the heads we can't
1205 # find from roots.
1205 # find from roots.
1206 heads = dict.fromkeys(heads, False)
1206 heads = dict.fromkeys(heads, False)
1207 # Start at the top and keep marking parents until we're done.
1207 # Start at the top and keep marking parents until we're done.
1208 nodestotag = set(heads)
1208 nodestotag = set(heads)
1209 # Remember where the top was so we can use it as a limit later.
1209 # Remember where the top was so we can use it as a limit later.
1210 highestrev = max([self.rev(n) for n in nodestotag])
1210 highestrev = max([self.rev(n) for n in nodestotag])
1211 while nodestotag:
1211 while nodestotag:
1212 # grab a node to tag
1212 # grab a node to tag
1213 n = nodestotag.pop()
1213 n = nodestotag.pop()
1214 # Never tag nullid
1214 # Never tag nullid
1215 if n == self.nullid:
1215 if n == self.nullid:
1216 continue
1216 continue
1217 # A node's revision number represents its place in a
1217 # A node's revision number represents its place in a
1218 # topologically sorted list of nodes.
1218 # topologically sorted list of nodes.
1219 r = self.rev(n)
1219 r = self.rev(n)
1220 if r >= lowestrev:
1220 if r >= lowestrev:
1221 if n not in ancestors:
1221 if n not in ancestors:
1222 # If we are possibly a descendant of one of the roots
1222 # If we are possibly a descendant of one of the roots
1223 # and we haven't already been marked as an ancestor
1223 # and we haven't already been marked as an ancestor
1224 ancestors.add(n) # Mark as ancestor
1224 ancestors.add(n) # Mark as ancestor
1225 # Add non-nullid parents to list of nodes to tag.
1225 # Add non-nullid parents to list of nodes to tag.
1226 nodestotag.update(
1226 nodestotag.update(
1227 [p for p in self.parents(n) if p != self.nullid]
1227 [p for p in self.parents(n) if p != self.nullid]
1228 )
1228 )
1229 elif n in heads: # We've seen it before, is it a fake head?
1229 elif n in heads: # We've seen it before, is it a fake head?
1230 # So it is, real heads should not be the ancestors of
1230 # So it is, real heads should not be the ancestors of
1231 # any other heads.
1231 # any other heads.
1232 heads.pop(n)
1232 heads.pop(n)
1233 if not ancestors:
1233 if not ancestors:
1234 return nonodes
1234 return nonodes
1235 # Now that we have our set of ancestors, we want to remove any
1235 # Now that we have our set of ancestors, we want to remove any
1236 # roots that are not ancestors.
1236 # roots that are not ancestors.
1237
1237
1238 # If one of the roots was nullid, everything is included anyway.
1238 # If one of the roots was nullid, everything is included anyway.
1239 if lowestrev > nullrev:
1239 if lowestrev > nullrev:
1240 # But, since we weren't, let's recompute the lowest rev to not
1240 # But, since we weren't, let's recompute the lowest rev to not
1241 # include roots that aren't ancestors.
1241 # include roots that aren't ancestors.
1242
1242
1243 # Filter out roots that aren't ancestors of heads
1243 # Filter out roots that aren't ancestors of heads
1244 roots = [root for root in roots if root in ancestors]
1244 roots = [root for root in roots if root in ancestors]
1245 # Recompute the lowest revision
1245 # Recompute the lowest revision
1246 if roots:
1246 if roots:
1247 lowestrev = min([self.rev(root) for root in roots])
1247 lowestrev = min([self.rev(root) for root in roots])
1248 else:
1248 else:
1249 # No more roots? Return empty list
1249 # No more roots? Return empty list
1250 return nonodes
1250 return nonodes
1251 else:
1251 else:
1252 # We are descending from nullid, and don't need to care about
1252 # We are descending from nullid, and don't need to care about
1253 # any other roots.
1253 # any other roots.
1254 lowestrev = nullrev
1254 lowestrev = nullrev
1255 roots = [self.nullid]
1255 roots = [self.nullid]
1256 # Transform our roots list into a set.
1256 # Transform our roots list into a set.
1257 descendants = set(roots)
1257 descendants = set(roots)
1258 # Also, keep the original roots so we can filter out roots that aren't
1258 # Also, keep the original roots so we can filter out roots that aren't
1259 # 'real' roots (i.e. are descended from other roots).
1259 # 'real' roots (i.e. are descended from other roots).
1260 roots = descendants.copy()
1260 roots = descendants.copy()
1261 # Our topologically sorted list of output nodes.
1261 # Our topologically sorted list of output nodes.
1262 orderedout = []
1262 orderedout = []
1263 # Don't start at nullid since we don't want nullid in our output list,
1263 # Don't start at nullid since we don't want nullid in our output list,
1264 # and if nullid shows up in descendants, empty parents will look like
1264 # and if nullid shows up in descendants, empty parents will look like
1265 # they're descendants.
1265 # they're descendants.
1266 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1266 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1267 n = self.node(r)
1267 n = self.node(r)
1268 isdescendant = False
1268 isdescendant = False
1269 if lowestrev == nullrev: # Everybody is a descendant of nullid
1269 if lowestrev == nullrev: # Everybody is a descendant of nullid
1270 isdescendant = True
1270 isdescendant = True
1271 elif n in descendants:
1271 elif n in descendants:
1272 # n is already a descendant
1272 # n is already a descendant
1273 isdescendant = True
1273 isdescendant = True
1274 # This check only needs to be done here because all the roots
1274 # This check only needs to be done here because all the roots
1275 # will start being marked is descendants before the loop.
1275 # will start being marked is descendants before the loop.
1276 if n in roots:
1276 if n in roots:
1277 # If n was a root, check if it's a 'real' root.
1277 # If n was a root, check if it's a 'real' root.
1278 p = tuple(self.parents(n))
1278 p = tuple(self.parents(n))
1279 # If any of its parents are descendants, it's not a root.
1279 # If any of its parents are descendants, it's not a root.
1280 if (p[0] in descendants) or (p[1] in descendants):
1280 if (p[0] in descendants) or (p[1] in descendants):
1281 roots.remove(n)
1281 roots.remove(n)
1282 else:
1282 else:
1283 p = tuple(self.parents(n))
1283 p = tuple(self.parents(n))
1284 # A node is a descendant if either of its parents are
1284 # A node is a descendant if either of its parents are
1285 # descendants. (We seeded the dependents list with the roots
1285 # descendants. (We seeded the dependents list with the roots
1286 # up there, remember?)
1286 # up there, remember?)
1287 if (p[0] in descendants) or (p[1] in descendants):
1287 if (p[0] in descendants) or (p[1] in descendants):
1288 descendants.add(n)
1288 descendants.add(n)
1289 isdescendant = True
1289 isdescendant = True
1290 if isdescendant and ((ancestors is None) or (n in ancestors)):
1290 if isdescendant and ((ancestors is None) or (n in ancestors)):
1291 # Only include nodes that are both descendants and ancestors.
1291 # Only include nodes that are both descendants and ancestors.
1292 orderedout.append(n)
1292 orderedout.append(n)
1293 if (ancestors is not None) and (n in heads):
1293 if (ancestors is not None) and (n in heads):
1294 # We're trying to figure out which heads are reachable
1294 # We're trying to figure out which heads are reachable
1295 # from roots.
1295 # from roots.
1296 # Mark this head as having been reached
1296 # Mark this head as having been reached
1297 heads[n] = True
1297 heads[n] = True
1298 elif ancestors is None:
1298 elif ancestors is None:
1299 # Otherwise, we're trying to discover the heads.
1299 # Otherwise, we're trying to discover the heads.
1300 # Assume this is a head because if it isn't, the next step
1300 # Assume this is a head because if it isn't, the next step
1301 # will eventually remove it.
1301 # will eventually remove it.
1302 heads[n] = True
1302 heads[n] = True
1303 # But, obviously its parents aren't.
1303 # But, obviously its parents aren't.
1304 for p in self.parents(n):
1304 for p in self.parents(n):
1305 heads.pop(p, None)
1305 heads.pop(p, None)
1306 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1306 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1307 roots = list(roots)
1307 roots = list(roots)
1308 assert orderedout
1308 assert orderedout
1309 assert roots
1309 assert roots
1310 assert heads
1310 assert heads
1311 return (orderedout, roots, heads)
1311 return (orderedout, roots, heads)
1312
1312
1313 def headrevs(self, revs=None):
1313 def headrevs(self, revs=None):
1314 if revs is None:
1314 if revs is None:
1315 try:
1315 try:
1316 return self.index.headrevs()
1316 return self.index.headrevs()
1317 except AttributeError:
1317 except AttributeError:
1318 return self._headrevs()
1318 return self._headrevs()
1319 if rustdagop is not None and self.index.rust_ext_compat:
1319 if rustdagop is not None and self.index.rust_ext_compat:
1320 return rustdagop.headrevs(self.index, revs)
1320 return rustdagop.headrevs(self.index, revs)
1321 return dagop.headrevs(revs, self._uncheckedparentrevs)
1321 return dagop.headrevs(revs, self._uncheckedparentrevs)
1322
1322
1323 def computephases(self, roots):
1323 def computephases(self, roots):
1324 return self.index.computephasesmapsets(roots)
1324 return self.index.computephasesmapsets(roots)
1325
1325
1326 def _headrevs(self):
1326 def _headrevs(self):
1327 count = len(self)
1327 count = len(self)
1328 if not count:
1328 if not count:
1329 return [nullrev]
1329 return [nullrev]
1330 # we won't iter over filtered rev so nobody is a head at start
1330 # we won't iter over filtered rev so nobody is a head at start
1331 ishead = [0] * (count + 1)
1331 ishead = [0] * (count + 1)
1332 index = self.index
1332 index = self.index
1333 for r in self:
1333 for r in self:
1334 ishead[r] = 1 # I may be an head
1334 ishead[r] = 1 # I may be an head
1335 e = index[r]
1335 e = index[r]
1336 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1336 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1337 return [r for r, val in enumerate(ishead) if val]
1337 return [r for r, val in enumerate(ishead) if val]
1338
1338
1339 def heads(self, start=None, stop=None):
1339 def heads(self, start=None, stop=None):
1340 """return the list of all nodes that have no children
1340 """return the list of all nodes that have no children
1341
1341
1342 if start is specified, only heads that are descendants of
1342 if start is specified, only heads that are descendants of
1343 start will be returned
1343 start will be returned
1344 if stop is specified, it will consider all the revs from stop
1344 if stop is specified, it will consider all the revs from stop
1345 as if they had no children
1345 as if they had no children
1346 """
1346 """
1347 if start is None and stop is None:
1347 if start is None and stop is None:
1348 if not len(self):
1348 if not len(self):
1349 return [self.nullid]
1349 return [self.nullid]
1350 return [self.node(r) for r in self.headrevs()]
1350 return [self.node(r) for r in self.headrevs()]
1351
1351
1352 if start is None:
1352 if start is None:
1353 start = nullrev
1353 start = nullrev
1354 else:
1354 else:
1355 start = self.rev(start)
1355 start = self.rev(start)
1356
1356
1357 stoprevs = {self.rev(n) for n in stop or []}
1357 stoprevs = {self.rev(n) for n in stop or []}
1358
1358
1359 revs = dagop.headrevssubset(
1359 revs = dagop.headrevssubset(
1360 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1360 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1361 )
1361 )
1362
1362
1363 return [self.node(rev) for rev in revs]
1363 return [self.node(rev) for rev in revs]
1364
1364
1365 def children(self, node):
1365 def children(self, node):
1366 """find the children of a given node"""
1366 """find the children of a given node"""
1367 c = []
1367 c = []
1368 p = self.rev(node)
1368 p = self.rev(node)
1369 for r in self.revs(start=p + 1):
1369 for r in self.revs(start=p + 1):
1370 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1370 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1371 if prevs:
1371 if prevs:
1372 for pr in prevs:
1372 for pr in prevs:
1373 if pr == p:
1373 if pr == p:
1374 c.append(self.node(r))
1374 c.append(self.node(r))
1375 elif p == nullrev:
1375 elif p == nullrev:
1376 c.append(self.node(r))
1376 c.append(self.node(r))
1377 return c
1377 return c
1378
1378
1379 def commonancestorsheads(self, a, b):
1379 def commonancestorsheads(self, a, b):
1380 """calculate all the heads of the common ancestors of nodes a and b"""
1380 """calculate all the heads of the common ancestors of nodes a and b"""
1381 a, b = self.rev(a), self.rev(b)
1381 a, b = self.rev(a), self.rev(b)
1382 ancs = self._commonancestorsheads(a, b)
1382 ancs = self._commonancestorsheads(a, b)
1383 return pycompat.maplist(self.node, ancs)
1383 return pycompat.maplist(self.node, ancs)
1384
1384
1385 def _commonancestorsheads(self, *revs):
1385 def _commonancestorsheads(self, *revs):
1386 """calculate all the heads of the common ancestors of revs"""
1386 """calculate all the heads of the common ancestors of revs"""
1387 try:
1387 try:
1388 ancs = self.index.commonancestorsheads(*revs)
1388 ancs = self.index.commonancestorsheads(*revs)
1389 except (AttributeError, OverflowError): # C implementation failed
1389 except (AttributeError, OverflowError): # C implementation failed
1390 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1390 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1391 return ancs
1391 return ancs
1392
1392
1393 def isancestor(self, a, b):
1393 def isancestor(self, a, b):
1394 """return True if node a is an ancestor of node b
1394 """return True if node a is an ancestor of node b
1395
1395
1396 A revision is considered an ancestor of itself."""
1396 A revision is considered an ancestor of itself."""
1397 a, b = self.rev(a), self.rev(b)
1397 a, b = self.rev(a), self.rev(b)
1398 return self.isancestorrev(a, b)
1398 return self.isancestorrev(a, b)
1399
1399
1400 def isancestorrev(self, a, b):
1400 def isancestorrev(self, a, b):
1401 """return True if revision a is an ancestor of revision b
1401 """return True if revision a is an ancestor of revision b
1402
1402
1403 A revision is considered an ancestor of itself.
1403 A revision is considered an ancestor of itself.
1404
1404
1405 The implementation of this is trivial but the use of
1405 The implementation of this is trivial but the use of
1406 reachableroots is not."""
1406 reachableroots is not."""
1407 if a == nullrev:
1407 if a == nullrev:
1408 return True
1408 return True
1409 elif a == b:
1409 elif a == b:
1410 return True
1410 return True
1411 elif a > b:
1411 elif a > b:
1412 return False
1412 return False
1413 return bool(self.reachableroots(a, [b], [a], includepath=False))
1413 return bool(self.reachableroots(a, [b], [a], includepath=False))
1414
1414
1415 def reachableroots(self, minroot, heads, roots, includepath=False):
1415 def reachableroots(self, minroot, heads, roots, includepath=False):
1416 """return (heads(::(<roots> and <roots>::<heads>)))
1416 """return (heads(::(<roots> and <roots>::<heads>)))
1417
1417
1418 If includepath is True, return (<roots>::<heads>)."""
1418 If includepath is True, return (<roots>::<heads>)."""
1419 try:
1419 try:
1420 return self.index.reachableroots2(
1420 return self.index.reachableroots2(
1421 minroot, heads, roots, includepath
1421 minroot, heads, roots, includepath
1422 )
1422 )
1423 except AttributeError:
1423 except AttributeError:
1424 return dagop._reachablerootspure(
1424 return dagop._reachablerootspure(
1425 self.parentrevs, minroot, roots, heads, includepath
1425 self.parentrevs, minroot, roots, heads, includepath
1426 )
1426 )
1427
1427
1428 def ancestor(self, a, b):
1428 def ancestor(self, a, b):
1429 """calculate the "best" common ancestor of nodes a and b"""
1429 """calculate the "best" common ancestor of nodes a and b"""
1430
1430
1431 a, b = self.rev(a), self.rev(b)
1431 a, b = self.rev(a), self.rev(b)
1432 try:
1432 try:
1433 ancs = self.index.ancestors(a, b)
1433 ancs = self.index.ancestors(a, b)
1434 except (AttributeError, OverflowError):
1434 except (AttributeError, OverflowError):
1435 ancs = ancestor.ancestors(self.parentrevs, a, b)
1435 ancs = ancestor.ancestors(self.parentrevs, a, b)
1436 if ancs:
1436 if ancs:
1437 # choose a consistent winner when there's a tie
1437 # choose a consistent winner when there's a tie
1438 return min(map(self.node, ancs))
1438 return min(map(self.node, ancs))
1439 return self.nullid
1439 return self.nullid
1440
1440
1441 def _match(self, id):
1441 def _match(self, id):
1442 if isinstance(id, int):
1442 if isinstance(id, int):
1443 # rev
1443 # rev
1444 return self.node(id)
1444 return self.node(id)
1445 if len(id) == self.nodeconstants.nodelen:
1445 if len(id) == self.nodeconstants.nodelen:
1446 # possibly a binary node
1446 # possibly a binary node
1447 # odds of a binary node being all hex in ASCII are 1 in 10**25
1447 # odds of a binary node being all hex in ASCII are 1 in 10**25
1448 try:
1448 try:
1449 node = id
1449 node = id
1450 self.rev(node) # quick search the index
1450 self.rev(node) # quick search the index
1451 return node
1451 return node
1452 except error.LookupError:
1452 except error.LookupError:
1453 pass # may be partial hex id
1453 pass # may be partial hex id
1454 try:
1454 try:
1455 # str(rev)
1455 # str(rev)
1456 rev = int(id)
1456 rev = int(id)
1457 if b"%d" % rev != id:
1457 if b"%d" % rev != id:
1458 raise ValueError
1458 raise ValueError
1459 if rev < 0:
1459 if rev < 0:
1460 rev = len(self) + rev
1460 rev = len(self) + rev
1461 if rev < 0 or rev >= len(self):
1461 if rev < 0 or rev >= len(self):
1462 raise ValueError
1462 raise ValueError
1463 return self.node(rev)
1463 return self.node(rev)
1464 except (ValueError, OverflowError):
1464 except (ValueError, OverflowError):
1465 pass
1465 pass
1466 if len(id) == 2 * self.nodeconstants.nodelen:
1466 if len(id) == 2 * self.nodeconstants.nodelen:
1467 try:
1467 try:
1468 # a full hex nodeid?
1468 # a full hex nodeid?
1469 node = bin(id)
1469 node = bin(id)
1470 self.rev(node)
1470 self.rev(node)
1471 return node
1471 return node
1472 except (TypeError, error.LookupError):
1472 except (TypeError, error.LookupError):
1473 pass
1473 pass
1474
1474
1475 def _partialmatch(self, id):
1475 def _partialmatch(self, id):
1476 # we don't care wdirfilenodeids as they should be always full hash
1476 # we don't care wdirfilenodeids as they should be always full hash
1477 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1477 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1478 ambiguous = False
1478 ambiguous = False
1479 try:
1479 try:
1480 partial = self.index.partialmatch(id)
1480 partial = self.index.partialmatch(id)
1481 if partial and self.hasnode(partial):
1481 if partial and self.hasnode(partial):
1482 if maybewdir:
1482 if maybewdir:
1483 # single 'ff...' match in radix tree, ambiguous with wdir
1483 # single 'ff...' match in radix tree, ambiguous with wdir
1484 ambiguous = True
1484 ambiguous = True
1485 else:
1485 else:
1486 return partial
1486 return partial
1487 elif maybewdir:
1487 elif maybewdir:
1488 # no 'ff...' match in radix tree, wdir identified
1488 # no 'ff...' match in radix tree, wdir identified
1489 raise error.WdirUnsupported
1489 raise error.WdirUnsupported
1490 else:
1490 else:
1491 return None
1491 return None
1492 except error.RevlogError:
1492 except error.RevlogError:
1493 # parsers.c radix tree lookup gave multiple matches
1493 # parsers.c radix tree lookup gave multiple matches
1494 # fast path: for unfiltered changelog, radix tree is accurate
1494 # fast path: for unfiltered changelog, radix tree is accurate
1495 if not getattr(self, 'filteredrevs', None):
1495 if not getattr(self, 'filteredrevs', None):
1496 ambiguous = True
1496 ambiguous = True
1497 # fall through to slow path that filters hidden revisions
1497 # fall through to slow path that filters hidden revisions
1498 except (AttributeError, ValueError):
1498 except (AttributeError, ValueError):
1499 # we are pure python, or key was too short to search radix tree
1499 # we are pure python, or key was too short to search radix tree
1500 pass
1500 pass
1501 if ambiguous:
1501 if ambiguous:
1502 raise error.AmbiguousPrefixLookupError(
1502 raise error.AmbiguousPrefixLookupError(
1503 id, self.display_id, _(b'ambiguous identifier')
1503 id, self.display_id, _(b'ambiguous identifier')
1504 )
1504 )
1505
1505
1506 if id in self._pcache:
1506 if id in self._pcache:
1507 return self._pcache[id]
1507 return self._pcache[id]
1508
1508
1509 if len(id) <= 40:
1509 if len(id) <= 40:
1510 try:
1510 try:
1511 # hex(node)[:...]
1511 # hex(node)[:...]
1512 l = len(id) // 2 # grab an even number of digits
1512 l = len(id) // 2 # grab an even number of digits
1513 prefix = bin(id[: l * 2])
1513 prefix = bin(id[: l * 2])
1514 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1514 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1515 nl = [
1515 nl = [
1516 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1516 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1517 ]
1517 ]
1518 if self.nodeconstants.nullhex.startswith(id):
1518 if self.nodeconstants.nullhex.startswith(id):
1519 nl.append(self.nullid)
1519 nl.append(self.nullid)
1520 if len(nl) > 0:
1520 if len(nl) > 0:
1521 if len(nl) == 1 and not maybewdir:
1521 if len(nl) == 1 and not maybewdir:
1522 self._pcache[id] = nl[0]
1522 self._pcache[id] = nl[0]
1523 return nl[0]
1523 return nl[0]
1524 raise error.AmbiguousPrefixLookupError(
1524 raise error.AmbiguousPrefixLookupError(
1525 id, self.display_id, _(b'ambiguous identifier')
1525 id, self.display_id, _(b'ambiguous identifier')
1526 )
1526 )
1527 if maybewdir:
1527 if maybewdir:
1528 raise error.WdirUnsupported
1528 raise error.WdirUnsupported
1529 return None
1529 return None
1530 except TypeError:
1530 except TypeError:
1531 pass
1531 pass
1532
1532
1533 def lookup(self, id):
1533 def lookup(self, id):
1534 """locate a node based on:
1534 """locate a node based on:
1535 - revision number or str(revision number)
1535 - revision number or str(revision number)
1536 - nodeid or subset of hex nodeid
1536 - nodeid or subset of hex nodeid
1537 """
1537 """
1538 n = self._match(id)
1538 n = self._match(id)
1539 if n is not None:
1539 if n is not None:
1540 return n
1540 return n
1541 n = self._partialmatch(id)
1541 n = self._partialmatch(id)
1542 if n:
1542 if n:
1543 return n
1543 return n
1544
1544
1545 raise error.LookupError(id, self.display_id, _(b'no match found'))
1545 raise error.LookupError(id, self.display_id, _(b'no match found'))
1546
1546
1547 def shortest(self, node, minlength=1):
1547 def shortest(self, node, minlength=1):
1548 """Find the shortest unambiguous prefix that matches node."""
1548 """Find the shortest unambiguous prefix that matches node."""
1549
1549
1550 def isvalid(prefix):
1550 def isvalid(prefix):
1551 try:
1551 try:
1552 matchednode = self._partialmatch(prefix)
1552 matchednode = self._partialmatch(prefix)
1553 except error.AmbiguousPrefixLookupError:
1553 except error.AmbiguousPrefixLookupError:
1554 return False
1554 return False
1555 except error.WdirUnsupported:
1555 except error.WdirUnsupported:
1556 # single 'ff...' match
1556 # single 'ff...' match
1557 return True
1557 return True
1558 if matchednode is None:
1558 if matchednode is None:
1559 raise error.LookupError(node, self.display_id, _(b'no node'))
1559 raise error.LookupError(node, self.display_id, _(b'no node'))
1560 return True
1560 return True
1561
1561
1562 def maybewdir(prefix):
1562 def maybewdir(prefix):
1563 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1563 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1564
1564
1565 hexnode = hex(node)
1565 hexnode = hex(node)
1566
1566
1567 def disambiguate(hexnode, minlength):
1567 def disambiguate(hexnode, minlength):
1568 """Disambiguate against wdirid."""
1568 """Disambiguate against wdirid."""
1569 for length in range(minlength, len(hexnode) + 1):
1569 for length in range(minlength, len(hexnode) + 1):
1570 prefix = hexnode[:length]
1570 prefix = hexnode[:length]
1571 if not maybewdir(prefix):
1571 if not maybewdir(prefix):
1572 return prefix
1572 return prefix
1573
1573
1574 if not getattr(self, 'filteredrevs', None):
1574 if not getattr(self, 'filteredrevs', None):
1575 try:
1575 try:
1576 length = max(self.index.shortest(node), minlength)
1576 length = max(self.index.shortest(node), minlength)
1577 return disambiguate(hexnode, length)
1577 return disambiguate(hexnode, length)
1578 except error.RevlogError:
1578 except error.RevlogError:
1579 if node != self.nodeconstants.wdirid:
1579 if node != self.nodeconstants.wdirid:
1580 raise error.LookupError(
1580 raise error.LookupError(
1581 node, self.display_id, _(b'no node')
1581 node, self.display_id, _(b'no node')
1582 )
1582 )
1583 except AttributeError:
1583 except AttributeError:
1584 # Fall through to pure code
1584 # Fall through to pure code
1585 pass
1585 pass
1586
1586
1587 if node == self.nodeconstants.wdirid:
1587 if node == self.nodeconstants.wdirid:
1588 for length in range(minlength, len(hexnode) + 1):
1588 for length in range(minlength, len(hexnode) + 1):
1589 prefix = hexnode[:length]
1589 prefix = hexnode[:length]
1590 if isvalid(prefix):
1590 if isvalid(prefix):
1591 return prefix
1591 return prefix
1592
1592
1593 for length in range(minlength, len(hexnode) + 1):
1593 for length in range(minlength, len(hexnode) + 1):
1594 prefix = hexnode[:length]
1594 prefix = hexnode[:length]
1595 if isvalid(prefix):
1595 if isvalid(prefix):
1596 return disambiguate(hexnode, length)
1596 return disambiguate(hexnode, length)
1597
1597
1598 def cmp(self, node, text):
1598 def cmp(self, node, text):
1599 """compare text with a given file revision
1599 """compare text with a given file revision
1600
1600
1601 returns True if text is different than what is stored.
1601 returns True if text is different than what is stored.
1602 """
1602 """
1603 p1, p2 = self.parents(node)
1603 p1, p2 = self.parents(node)
1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1605
1605
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1608
1608
1609 Accepts the start and end revisions and an optional already-open
1609 Accepts the start and end revisions and an optional already-open
1610 file handle to be used for reading. If the file handle is read, its
1610 file handle to be used for reading. If the file handle is read, its
1611 seek position will not be preserved.
1611 seek position will not be preserved.
1612
1612
1613 Requests for data may be satisfied by a cache.
1613 Requests for data may be satisfied by a cache.
1614
1614
1615 Returns a 2-tuple of (offset, data) for the requested range of
1615 Returns a 2-tuple of (offset, data) for the requested range of
1616 revisions. Offset is the integer offset from the beginning of the
1616 revisions. Offset is the integer offset from the beginning of the
1617 revlog and data is a str or buffer of the raw byte data.
1617 revlog and data is a str or buffer of the raw byte data.
1618
1618
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1620 to determine where each revision's data begins and ends.
1620 to determine where each revision's data begins and ends.
1621 """
1621 """
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1623 # (functions are expensive).
1623 # (functions are expensive).
1624 index = self.index
1624 index = self.index
1625 istart = index[startrev]
1625 istart = index[startrev]
1626 start = int(istart[0] >> 16)
1626 start = int(istart[0] >> 16)
1627 if startrev == endrev:
1627 if startrev == endrev:
1628 end = start + istart[1]
1628 end = start + istart[1]
1629 else:
1629 else:
1630 iend = index[endrev]
1630 iend = index[endrev]
1631 end = int(iend[0] >> 16) + iend[1]
1631 end = int(iend[0] >> 16) + iend[1]
1632
1632
1633 if self._inline:
1633 if self._inline:
1634 start += (startrev + 1) * self.index.entry_size
1634 start += (startrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1636 length = end - start
1636 length = end - start
1637
1637
1638 return start, self._segmentfile.read_chunk(start, length, df)
1638 return start, self._segmentfile.read_chunk(start, length, df)
1639
1639
1640 def _chunk(self, rev, df=None):
1640 def _chunk(self, rev, df=None):
1641 """Obtain a single decompressed chunk for a revision.
1641 """Obtain a single decompressed chunk for a revision.
1642
1642
1643 Accepts an integer revision and an optional already-open file handle
1643 Accepts an integer revision and an optional already-open file handle
1644 to be used for reading. If used, the seek position of the file will not
1644 to be used for reading. If used, the seek position of the file will not
1645 be preserved.
1645 be preserved.
1646
1646
1647 Returns a str holding uncompressed data for the requested revision.
1647 Returns a str holding uncompressed data for the requested revision.
1648 """
1648 """
1649 compression_mode = self.index[rev][10]
1649 compression_mode = self.index[rev][10]
1650 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1650 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1651 if compression_mode == COMP_MODE_PLAIN:
1651 if compression_mode == COMP_MODE_PLAIN:
1652 return data
1652 return data
1653 elif compression_mode == COMP_MODE_DEFAULT:
1653 elif compression_mode == COMP_MODE_DEFAULT:
1654 return self._decompressor(data)
1654 return self._decompressor(data)
1655 elif compression_mode == COMP_MODE_INLINE:
1655 elif compression_mode == COMP_MODE_INLINE:
1656 return self.decompress(data)
1656 return self.decompress(data)
1657 else:
1657 else:
1658 msg = b'unknown compression mode %d'
1658 msg = b'unknown compression mode %d'
1659 msg %= compression_mode
1659 msg %= compression_mode
1660 raise error.RevlogError(msg)
1660 raise error.RevlogError(msg)
1661
1661
1662 def _chunks(self, revs, df=None, targetsize=None):
1662 def _chunks(self, revs, df=None, targetsize=None):
1663 """Obtain decompressed chunks for the specified revisions.
1663 """Obtain decompressed chunks for the specified revisions.
1664
1664
1665 Accepts an iterable of numeric revisions that are assumed to be in
1665 Accepts an iterable of numeric revisions that are assumed to be in
1666 ascending order. Also accepts an optional already-open file handle
1666 ascending order. Also accepts an optional already-open file handle
1667 to be used for reading. If used, the seek position of the file will
1667 to be used for reading. If used, the seek position of the file will
1668 not be preserved.
1668 not be preserved.
1669
1669
1670 This function is similar to calling ``self._chunk()`` multiple times,
1670 This function is similar to calling ``self._chunk()`` multiple times,
1671 but is faster.
1671 but is faster.
1672
1672
1673 Returns a list with decompressed data for each requested revision.
1673 Returns a list with decompressed data for each requested revision.
1674 """
1674 """
1675 if not revs:
1675 if not revs:
1676 return []
1676 return []
1677 start = self.start
1677 start = self.start
1678 length = self.length
1678 length = self.length
1679 inline = self._inline
1679 inline = self._inline
1680 iosize = self.index.entry_size
1680 iosize = self.index.entry_size
1681 buffer = util.buffer
1681 buffer = util.buffer
1682
1682
1683 l = []
1683 l = []
1684 ladd = l.append
1684 ladd = l.append
1685
1685
1686 if not self._withsparseread:
1686 if not self._withsparseread:
1687 slicedchunks = (revs,)
1687 slicedchunks = (revs,)
1688 else:
1688 else:
1689 slicedchunks = deltautil.slicechunk(
1689 slicedchunks = deltautil.slicechunk(
1690 self, revs, targetsize=targetsize
1690 self, revs, targetsize=targetsize
1691 )
1691 )
1692
1692
1693 for revschunk in slicedchunks:
1693 for revschunk in slicedchunks:
1694 firstrev = revschunk[0]
1694 firstrev = revschunk[0]
1695 # Skip trailing revisions with empty diff
1695 # Skip trailing revisions with empty diff
1696 for lastrev in revschunk[::-1]:
1696 for lastrev in revschunk[::-1]:
1697 if length(lastrev) != 0:
1697 if length(lastrev) != 0:
1698 break
1698 break
1699
1699
1700 try:
1700 try:
1701 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1701 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1702 except OverflowError:
1702 except OverflowError:
1703 # issue4215 - we can't cache a run of chunks greater than
1703 # issue4215 - we can't cache a run of chunks greater than
1704 # 2G on Windows
1704 # 2G on Windows
1705 return [self._chunk(rev, df=df) for rev in revschunk]
1705 return [self._chunk(rev, df=df) for rev in revschunk]
1706
1706
1707 decomp = self.decompress
1707 decomp = self.decompress
1708 # self._decompressor might be None, but will not be used in that case
1708 # self._decompressor might be None, but will not be used in that case
1709 def_decomp = self._decompressor
1709 def_decomp = self._decompressor
1710 for rev in revschunk:
1710 for rev in revschunk:
1711 chunkstart = start(rev)
1711 chunkstart = start(rev)
1712 if inline:
1712 if inline:
1713 chunkstart += (rev + 1) * iosize
1713 chunkstart += (rev + 1) * iosize
1714 chunklength = length(rev)
1714 chunklength = length(rev)
1715 comp_mode = self.index[rev][10]
1715 comp_mode = self.index[rev][10]
1716 c = buffer(data, chunkstart - offset, chunklength)
1716 c = buffer(data, chunkstart - offset, chunklength)
1717 if comp_mode == COMP_MODE_PLAIN:
1717 if comp_mode == COMP_MODE_PLAIN:
1718 ladd(c)
1718 ladd(c)
1719 elif comp_mode == COMP_MODE_INLINE:
1719 elif comp_mode == COMP_MODE_INLINE:
1720 ladd(decomp(c))
1720 ladd(decomp(c))
1721 elif comp_mode == COMP_MODE_DEFAULT:
1721 elif comp_mode == COMP_MODE_DEFAULT:
1722 ladd(def_decomp(c))
1722 ladd(def_decomp(c))
1723 else:
1723 else:
1724 msg = b'unknown compression mode %d'
1724 msg = b'unknown compression mode %d'
1725 msg %= comp_mode
1725 msg %= comp_mode
1726 raise error.RevlogError(msg)
1726 raise error.RevlogError(msg)
1727
1727
1728 return l
1728 return l
1729
1729
1730 def deltaparent(self, rev):
1730 def deltaparent(self, rev):
1731 """return deltaparent of the given revision"""
1731 """return deltaparent of the given revision"""
1732 base = self.index[rev][3]
1732 base = self.index[rev][3]
1733 if base == rev:
1733 if base == rev:
1734 return nullrev
1734 return nullrev
1735 elif self._generaldelta:
1735 elif self._generaldelta:
1736 return base
1736 return base
1737 else:
1737 else:
1738 return rev - 1
1738 return rev - 1
1739
1739
1740 def issnapshot(self, rev):
1740 def issnapshot(self, rev):
1741 """tells whether rev is a snapshot"""
1741 """tells whether rev is a snapshot"""
1742 if not self._sparserevlog:
1742 if not self._sparserevlog:
1743 return self.deltaparent(rev) == nullrev
1743 return self.deltaparent(rev) == nullrev
1744 elif util.safehasattr(self.index, b'issnapshot'):
1744 elif util.safehasattr(self.index, b'issnapshot'):
1745 # directly assign the method to cache the testing and access
1745 # directly assign the method to cache the testing and access
1746 self.issnapshot = self.index.issnapshot
1746 self.issnapshot = self.index.issnapshot
1747 return self.issnapshot(rev)
1747 return self.issnapshot(rev)
1748 if rev == nullrev:
1748 if rev == nullrev:
1749 return True
1749 return True
1750 entry = self.index[rev]
1750 entry = self.index[rev]
1751 base = entry[3]
1751 base = entry[3]
1752 if base == rev:
1752 if base == rev:
1753 return True
1753 return True
1754 if base == nullrev:
1754 if base == nullrev:
1755 return True
1755 return True
1756 p1 = entry[5]
1756 p1 = entry[5]
1757 p2 = entry[6]
1757 p2 = entry[6]
1758 if base == p1 or base == p2:
1758 if base == p1 or base == p2:
1759 return False
1759 return False
1760 return self.issnapshot(base)
1760 return self.issnapshot(base)
1761
1761
1762 def snapshotdepth(self, rev):
1762 def snapshotdepth(self, rev):
1763 """number of snapshot in the chain before this one"""
1763 """number of snapshot in the chain before this one"""
1764 if not self.issnapshot(rev):
1764 if not self.issnapshot(rev):
1765 raise error.ProgrammingError(b'revision %d not a snapshot')
1765 raise error.ProgrammingError(b'revision %d not a snapshot')
1766 return len(self._deltachain(rev)[0]) - 1
1766 return len(self._deltachain(rev)[0]) - 1
1767
1767
1768 def revdiff(self, rev1, rev2):
1768 def revdiff(self, rev1, rev2):
1769 """return or calculate a delta between two revisions
1769 """return or calculate a delta between two revisions
1770
1770
1771 The delta calculated is in binary form and is intended to be written to
1771 The delta calculated is in binary form and is intended to be written to
1772 revlog data directly. So this function needs raw revision data.
1772 revlog data directly. So this function needs raw revision data.
1773 """
1773 """
1774 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1774 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1775 return bytes(self._chunk(rev2))
1775 return bytes(self._chunk(rev2))
1776
1776
1777 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1777 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1778
1778
1779 def _processflags(self, text, flags, operation, raw=False):
1779 def _processflags(self, text, flags, operation, raw=False):
1780 """deprecated entry point to access flag processors"""
1780 """deprecated entry point to access flag processors"""
1781 msg = b'_processflag(...) use the specialized variant'
1781 msg = b'_processflag(...) use the specialized variant'
1782 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1782 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1783 if raw:
1783 if raw:
1784 return text, flagutil.processflagsraw(self, text, flags)
1784 return text, flagutil.processflagsraw(self, text, flags)
1785 elif operation == b'read':
1785 elif operation == b'read':
1786 return flagutil.processflagsread(self, text, flags)
1786 return flagutil.processflagsread(self, text, flags)
1787 else: # write operation
1787 else: # write operation
1788 return flagutil.processflagswrite(self, text, flags)
1788 return flagutil.processflagswrite(self, text, flags)
1789
1789
1790 def revision(self, nodeorrev, _df=None, raw=False):
1790 def revision(self, nodeorrev, _df=None, raw=False):
1791 """return an uncompressed revision of a given node or revision
1791 """return an uncompressed revision of a given node or revision
1792 number.
1792 number.
1793
1793
1794 _df - an existing file handle to read from. (internal-only)
1794 _df - an existing file handle to read from. (internal-only)
1795 raw - an optional argument specifying if the revision data is to be
1795 raw - an optional argument specifying if the revision data is to be
1796 treated as raw data when applying flag transforms. 'raw' should be set
1796 treated as raw data when applying flag transforms. 'raw' should be set
1797 to True when generating changegroups or in debug commands.
1797 to True when generating changegroups or in debug commands.
1798 """
1798 """
1799 if raw:
1799 if raw:
1800 msg = (
1800 msg = (
1801 b'revlog.revision(..., raw=True) is deprecated, '
1801 b'revlog.revision(..., raw=True) is deprecated, '
1802 b'use revlog.rawdata(...)'
1802 b'use revlog.rawdata(...)'
1803 )
1803 )
1804 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1804 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1805 return self._revisiondata(nodeorrev, _df, raw=raw)
1805 return self._revisiondata(nodeorrev, _df, raw=raw)
1806
1806
1807 def sidedata(self, nodeorrev, _df=None):
1807 def sidedata(self, nodeorrev, _df=None):
1808 """a map of extra data related to the changeset but not part of the hash
1808 """a map of extra data related to the changeset but not part of the hash
1809
1809
1810 This function currently return a dictionary. However, more advanced
1810 This function currently return a dictionary. However, more advanced
1811 mapping object will likely be used in the future for a more
1811 mapping object will likely be used in the future for a more
1812 efficient/lazy code.
1812 efficient/lazy code.
1813 """
1813 """
1814 # deal with <nodeorrev> argument type
1814 # deal with <nodeorrev> argument type
1815 if isinstance(nodeorrev, int):
1815 if isinstance(nodeorrev, int):
1816 rev = nodeorrev
1816 rev = nodeorrev
1817 else:
1817 else:
1818 rev = self.rev(nodeorrev)
1818 rev = self.rev(nodeorrev)
1819 return self._sidedata(rev)
1819 return self._sidedata(rev)
1820
1820
1821 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1821 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1822 # deal with <nodeorrev> argument type
1822 # deal with <nodeorrev> argument type
1823 if isinstance(nodeorrev, int):
1823 if isinstance(nodeorrev, int):
1824 rev = nodeorrev
1824 rev = nodeorrev
1825 node = self.node(rev)
1825 node = self.node(rev)
1826 else:
1826 else:
1827 node = nodeorrev
1827 node = nodeorrev
1828 rev = None
1828 rev = None
1829
1829
1830 # fast path the special `nullid` rev
1830 # fast path the special `nullid` rev
1831 if node == self.nullid:
1831 if node == self.nullid:
1832 return b""
1832 return b""
1833
1833
1834 # ``rawtext`` is the text as stored inside the revlog. Might be the
1834 # ``rawtext`` is the text as stored inside the revlog. Might be the
1835 # revision or might need to be processed to retrieve the revision.
1835 # revision or might need to be processed to retrieve the revision.
1836 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1836 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1837
1837
1838 if raw and validated:
1838 if raw and validated:
1839 # if we don't want to process the raw text and that raw
1839 # if we don't want to process the raw text and that raw
1840 # text is cached, we can exit early.
1840 # text is cached, we can exit early.
1841 return rawtext
1841 return rawtext
1842 if rev is None:
1842 if rev is None:
1843 rev = self.rev(node)
1843 rev = self.rev(node)
1844 # the revlog's flag for this revision
1844 # the revlog's flag for this revision
1845 # (usually alter its state or content)
1845 # (usually alter its state or content)
1846 flags = self.flags(rev)
1846 flags = self.flags(rev)
1847
1847
1848 if validated and flags == REVIDX_DEFAULT_FLAGS:
1848 if validated and flags == REVIDX_DEFAULT_FLAGS:
1849 # no extra flags set, no flag processor runs, text = rawtext
1849 # no extra flags set, no flag processor runs, text = rawtext
1850 return rawtext
1850 return rawtext
1851
1851
1852 if raw:
1852 if raw:
1853 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1853 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1854 text = rawtext
1854 text = rawtext
1855 else:
1855 else:
1856 r = flagutil.processflagsread(self, rawtext, flags)
1856 r = flagutil.processflagsread(self, rawtext, flags)
1857 text, validatehash = r
1857 text, validatehash = r
1858 if validatehash:
1858 if validatehash:
1859 self.checkhash(text, node, rev=rev)
1859 self.checkhash(text, node, rev=rev)
1860 if not validated:
1860 if not validated:
1861 self._revisioncache = (node, rev, rawtext)
1861 self._revisioncache = (node, rev, rawtext)
1862
1862
1863 return text
1863 return text
1864
1864
1865 def _rawtext(self, node, rev, _df=None):
1865 def _rawtext(self, node, rev, _df=None):
1866 """return the possibly unvalidated rawtext for a revision
1866 """return the possibly unvalidated rawtext for a revision
1867
1867
1868 returns (rev, rawtext, validated)
1868 returns (rev, rawtext, validated)
1869 """
1869 """
1870
1870
1871 # revision in the cache (could be useful to apply delta)
1871 # revision in the cache (could be useful to apply delta)
1872 cachedrev = None
1872 cachedrev = None
1873 # An intermediate text to apply deltas to
1873 # An intermediate text to apply deltas to
1874 basetext = None
1874 basetext = None
1875
1875
1876 # Check if we have the entry in cache
1876 # Check if we have the entry in cache
1877 # The cache entry looks like (node, rev, rawtext)
1877 # The cache entry looks like (node, rev, rawtext)
1878 if self._revisioncache:
1878 if self._revisioncache:
1879 if self._revisioncache[0] == node:
1879 if self._revisioncache[0] == node:
1880 return (rev, self._revisioncache[2], True)
1880 return (rev, self._revisioncache[2], True)
1881 cachedrev = self._revisioncache[1]
1881 cachedrev = self._revisioncache[1]
1882
1882
1883 if rev is None:
1883 if rev is None:
1884 rev = self.rev(node)
1884 rev = self.rev(node)
1885
1885
1886 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1886 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1887 if stopped:
1887 if stopped:
1888 basetext = self._revisioncache[2]
1888 basetext = self._revisioncache[2]
1889
1889
1890 # drop cache to save memory, the caller is expected to
1890 # drop cache to save memory, the caller is expected to
1891 # update self._revisioncache after validating the text
1891 # update self._revisioncache after validating the text
1892 self._revisioncache = None
1892 self._revisioncache = None
1893
1893
1894 targetsize = None
1894 targetsize = None
1895 rawsize = self.index[rev][2]
1895 rawsize = self.index[rev][2]
1896 if 0 <= rawsize:
1896 if 0 <= rawsize:
1897 targetsize = 4 * rawsize
1897 targetsize = 4 * rawsize
1898
1898
1899 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1899 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1900 if basetext is None:
1900 if basetext is None:
1901 basetext = bytes(bins[0])
1901 basetext = bytes(bins[0])
1902 bins = bins[1:]
1902 bins = bins[1:]
1903
1903
1904 rawtext = mdiff.patches(basetext, bins)
1904 rawtext = mdiff.patches(basetext, bins)
1905 del basetext # let us have a chance to free memory early
1905 del basetext # let us have a chance to free memory early
1906 return (rev, rawtext, False)
1906 return (rev, rawtext, False)
1907
1907
1908 def _sidedata(self, rev):
1908 def _sidedata(self, rev):
1909 """Return the sidedata for a given revision number."""
1909 """Return the sidedata for a given revision number."""
1910 index_entry = self.index[rev]
1910 index_entry = self.index[rev]
1911 sidedata_offset = index_entry[8]
1911 sidedata_offset = index_entry[8]
1912 sidedata_size = index_entry[9]
1912 sidedata_size = index_entry[9]
1913
1913
1914 if self._inline:
1914 if self._inline:
1915 sidedata_offset += self.index.entry_size * (1 + rev)
1915 sidedata_offset += self.index.entry_size * (1 + rev)
1916 if sidedata_size == 0:
1916 if sidedata_size == 0:
1917 return {}
1917 return {}
1918
1918
1919 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1919 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1920 filename = self._sidedatafile
1920 filename = self._sidedatafile
1921 end = self._docket.sidedata_end
1921 end = self._docket.sidedata_end
1922 offset = sidedata_offset
1922 offset = sidedata_offset
1923 length = sidedata_size
1923 length = sidedata_size
1924 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1924 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1925 raise error.RevlogError(m)
1925 raise error.RevlogError(m)
1926
1926
1927 comp_segment = self._segmentfile_sidedata.read_chunk(
1927 comp_segment = self._segmentfile_sidedata.read_chunk(
1928 sidedata_offset, sidedata_size
1928 sidedata_offset, sidedata_size
1929 )
1929 )
1930
1930
1931 comp = self.index[rev][11]
1931 comp = self.index[rev][11]
1932 if comp == COMP_MODE_PLAIN:
1932 if comp == COMP_MODE_PLAIN:
1933 segment = comp_segment
1933 segment = comp_segment
1934 elif comp == COMP_MODE_DEFAULT:
1934 elif comp == COMP_MODE_DEFAULT:
1935 segment = self._decompressor(comp_segment)
1935 segment = self._decompressor(comp_segment)
1936 elif comp == COMP_MODE_INLINE:
1936 elif comp == COMP_MODE_INLINE:
1937 segment = self.decompress(comp_segment)
1937 segment = self.decompress(comp_segment)
1938 else:
1938 else:
1939 msg = b'unknown compression mode %d'
1939 msg = b'unknown compression mode %d'
1940 msg %= comp
1940 msg %= comp
1941 raise error.RevlogError(msg)
1941 raise error.RevlogError(msg)
1942
1942
1943 sidedata = sidedatautil.deserialize_sidedata(segment)
1943 sidedata = sidedatautil.deserialize_sidedata(segment)
1944 return sidedata
1944 return sidedata
1945
1945
1946 def rawdata(self, nodeorrev, _df=None):
1946 def rawdata(self, nodeorrev, _df=None):
1947 """return an uncompressed raw data of a given node or revision number.
1947 """return an uncompressed raw data of a given node or revision number.
1948
1948
1949 _df - an existing file handle to read from. (internal-only)
1949 _df - an existing file handle to read from. (internal-only)
1950 """
1950 """
1951 return self._revisiondata(nodeorrev, _df, raw=True)
1951 return self._revisiondata(nodeorrev, _df, raw=True)
1952
1952
1953 def hash(self, text, p1, p2):
1953 def hash(self, text, p1, p2):
1954 """Compute a node hash.
1954 """Compute a node hash.
1955
1955
1956 Available as a function so that subclasses can replace the hash
1956 Available as a function so that subclasses can replace the hash
1957 as needed.
1957 as needed.
1958 """
1958 """
1959 return storageutil.hashrevisionsha1(text, p1, p2)
1959 return storageutil.hashrevisionsha1(text, p1, p2)
1960
1960
1961 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1961 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1962 """Check node hash integrity.
1962 """Check node hash integrity.
1963
1963
1964 Available as a function so that subclasses can extend hash mismatch
1964 Available as a function so that subclasses can extend hash mismatch
1965 behaviors as needed.
1965 behaviors as needed.
1966 """
1966 """
1967 try:
1967 try:
1968 if p1 is None and p2 is None:
1968 if p1 is None and p2 is None:
1969 p1, p2 = self.parents(node)
1969 p1, p2 = self.parents(node)
1970 if node != self.hash(text, p1, p2):
1970 if node != self.hash(text, p1, p2):
1971 # Clear the revision cache on hash failure. The revision cache
1971 # Clear the revision cache on hash failure. The revision cache
1972 # only stores the raw revision and clearing the cache does have
1972 # only stores the raw revision and clearing the cache does have
1973 # the side-effect that we won't have a cache hit when the raw
1973 # the side-effect that we won't have a cache hit when the raw
1974 # revision data is accessed. But this case should be rare and
1974 # revision data is accessed. But this case should be rare and
1975 # it is extra work to teach the cache about the hash
1975 # it is extra work to teach the cache about the hash
1976 # verification state.
1976 # verification state.
1977 if self._revisioncache and self._revisioncache[0] == node:
1977 if self._revisioncache and self._revisioncache[0] == node:
1978 self._revisioncache = None
1978 self._revisioncache = None
1979
1979
1980 revornode = rev
1980 revornode = rev
1981 if revornode is None:
1981 if revornode is None:
1982 revornode = templatefilters.short(hex(node))
1982 revornode = templatefilters.short(hex(node))
1983 raise error.RevlogError(
1983 raise error.RevlogError(
1984 _(b"integrity check failed on %s:%s")
1984 _(b"integrity check failed on %s:%s")
1985 % (self.display_id, pycompat.bytestr(revornode))
1985 % (self.display_id, pycompat.bytestr(revornode))
1986 )
1986 )
1987 except error.RevlogError:
1987 except error.RevlogError:
1988 if self._censorable and storageutil.iscensoredtext(text):
1988 if self._censorable and storageutil.iscensoredtext(text):
1989 raise error.CensoredNodeError(self.display_id, node, text)
1989 raise error.CensoredNodeError(self.display_id, node, text)
1990 raise
1990 raise
1991
1991
1992 def _enforceinlinesize(self, tr):
1992 def _enforceinlinesize(self, tr):
1993 """Check if the revlog is too big for inline and convert if so.
1993 """Check if the revlog is too big for inline and convert if so.
1994
1994
1995 This should be called after revisions are added to the revlog. If the
1995 This should be called after revisions are added to the revlog. If the
1996 revlog has grown too large to be an inline revlog, it will convert it
1996 revlog has grown too large to be an inline revlog, it will convert it
1997 to use multiple index and data files.
1997 to use multiple index and data files.
1998 """
1998 """
1999 tiprev = len(self) - 1
1999 tiprev = len(self) - 1
2000 total_size = self.start(tiprev) + self.length(tiprev)
2000 total_size = self.start(tiprev) + self.length(tiprev)
2001 if not self._inline or total_size < _maxinline:
2001 if not self._inline or total_size < _maxinline:
2002 return
2002 return
2003
2003
2004 troffset = tr.findoffset(self._indexfile)
2004 troffset = tr.findoffset(self._indexfile)
2005 if troffset is None:
2005 if troffset is None:
2006 raise error.RevlogError(
2006 raise error.RevlogError(
2007 _(b"%s not found in the transaction") % self._indexfile
2007 _(b"%s not found in the transaction") % self._indexfile
2008 )
2008 )
2009 trindex = 0
2009 trindex = None
2010 tr.add(self._datafile, 0)
2010 tr.add(self._datafile, 0)
2011
2011
2012 existing_handles = False
2012 existing_handles = False
2013 if self._writinghandles is not None:
2013 if self._writinghandles is not None:
2014 existing_handles = True
2014 existing_handles = True
2015 fp = self._writinghandles[0]
2015 fp = self._writinghandles[0]
2016 fp.flush()
2016 fp.flush()
2017 fp.close()
2017 fp.close()
2018 # We can't use the cached file handle after close(). So prevent
2018 # We can't use the cached file handle after close(). So prevent
2019 # its usage.
2019 # its usage.
2020 self._writinghandles = None
2020 self._writinghandles = None
2021 self._segmentfile.writing_handle = None
2021 self._segmentfile.writing_handle = None
2022 # No need to deal with sidedata writing handle as it is only
2022 # No need to deal with sidedata writing handle as it is only
2023 # relevant with revlog-v2 which is never inline, not reaching
2023 # relevant with revlog-v2 which is never inline, not reaching
2024 # this code
2024 # this code
2025
2025
2026 new_dfh = self._datafp(b'w+')
2026 new_dfh = self._datafp(b'w+')
2027 new_dfh.truncate(0) # drop any potentially existing data
2027 new_dfh.truncate(0) # drop any potentially existing data
2028 try:
2028 try:
2029 with self._indexfp() as read_ifh:
2029 with self._indexfp() as read_ifh:
2030 for r in self:
2030 for r in self:
2031 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2031 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2032 if troffset <= self.start(r) + r * self.index.entry_size:
2032 if (
2033 trindex is None
2034 and troffset
2035 <= self.start(r) + r * self.index.entry_size
2036 ):
2033 trindex = r
2037 trindex = r
2034 new_dfh.flush()
2038 new_dfh.flush()
2035
2039
2040 if trindex is None:
2041 trindex = 0
2042
2036 with self.__index_new_fp() as fp:
2043 with self.__index_new_fp() as fp:
2037 self._format_flags &= ~FLAG_INLINE_DATA
2044 self._format_flags &= ~FLAG_INLINE_DATA
2038 self._inline = False
2045 self._inline = False
2039 for i in self:
2046 for i in self:
2040 e = self.index.entry_binary(i)
2047 e = self.index.entry_binary(i)
2041 if i == 0 and self._docket is None:
2048 if i == 0 and self._docket is None:
2042 header = self._format_flags | self._format_version
2049 header = self._format_flags | self._format_version
2043 header = self.index.pack_header(header)
2050 header = self.index.pack_header(header)
2044 e = header + e
2051 e = header + e
2045 fp.write(e)
2052 fp.write(e)
2046 if self._docket is not None:
2053 if self._docket is not None:
2047 self._docket.index_end = fp.tell()
2054 self._docket.index_end = fp.tell()
2048
2055
2049 # There is a small transactional race here. If the rename of
2056 # There is a small transactional race here. If the rename of
2050 # the index fails, we should remove the datafile. It is more
2057 # the index fails, we should remove the datafile. It is more
2051 # important to ensure that the data file is not truncated
2058 # important to ensure that the data file is not truncated
2052 # when the index is replaced as otherwise data is lost.
2059 # when the index is replaced as otherwise data is lost.
2053 tr.replace(self._datafile, self.start(trindex))
2060 tr.replace(self._datafile, self.start(trindex))
2054
2061
2055 # the temp file replace the real index when we exit the context
2062 # the temp file replace the real index when we exit the context
2056 # manager
2063 # manager
2057
2064
2058 tr.replace(self._indexfile, trindex * self.index.entry_size)
2065 tr.replace(self._indexfile, trindex * self.index.entry_size)
2059 nodemaputil.setup_persistent_nodemap(tr, self)
2066 nodemaputil.setup_persistent_nodemap(tr, self)
2060 self._segmentfile = randomaccessfile.randomaccessfile(
2067 self._segmentfile = randomaccessfile.randomaccessfile(
2061 self.opener,
2068 self.opener,
2062 self._datafile,
2069 self._datafile,
2063 self._chunkcachesize,
2070 self._chunkcachesize,
2064 )
2071 )
2065
2072
2066 if existing_handles:
2073 if existing_handles:
2067 # switched from inline to conventional reopen the index
2074 # switched from inline to conventional reopen the index
2068 ifh = self.__index_write_fp()
2075 ifh = self.__index_write_fp()
2069 self._writinghandles = (ifh, new_dfh, None)
2076 self._writinghandles = (ifh, new_dfh, None)
2070 self._segmentfile.writing_handle = new_dfh
2077 self._segmentfile.writing_handle = new_dfh
2071 new_dfh = None
2078 new_dfh = None
2072 # No need to deal with sidedata writing handle as it is only
2079 # No need to deal with sidedata writing handle as it is only
2073 # relevant with revlog-v2 which is never inline, not reaching
2080 # relevant with revlog-v2 which is never inline, not reaching
2074 # this code
2081 # this code
2075 finally:
2082 finally:
2076 if new_dfh is not None:
2083 if new_dfh is not None:
2077 new_dfh.close()
2084 new_dfh.close()
2078
2085
2079 def _nodeduplicatecallback(self, transaction, node):
2086 def _nodeduplicatecallback(self, transaction, node):
2080 """called when trying to add a node already stored."""
2087 """called when trying to add a node already stored."""
2081
2088
2082 @contextlib.contextmanager
2089 @contextlib.contextmanager
2083 def reading(self):
2090 def reading(self):
2084 """Context manager that keeps data and sidedata files open for reading"""
2091 """Context manager that keeps data and sidedata files open for reading"""
2085 with self._segmentfile.reading():
2092 with self._segmentfile.reading():
2086 with self._segmentfile_sidedata.reading():
2093 with self._segmentfile_sidedata.reading():
2087 yield
2094 yield
2088
2095
2089 @contextlib.contextmanager
2096 @contextlib.contextmanager
2090 def _writing(self, transaction):
2097 def _writing(self, transaction):
2091 if self._trypending:
2098 if self._trypending:
2092 msg = b'try to write in a `trypending` revlog: %s'
2099 msg = b'try to write in a `trypending` revlog: %s'
2093 msg %= self.display_id
2100 msg %= self.display_id
2094 raise error.ProgrammingError(msg)
2101 raise error.ProgrammingError(msg)
2095 if self._writinghandles is not None:
2102 if self._writinghandles is not None:
2096 yield
2103 yield
2097 else:
2104 else:
2098 ifh = dfh = sdfh = None
2105 ifh = dfh = sdfh = None
2099 try:
2106 try:
2100 r = len(self)
2107 r = len(self)
2101 # opening the data file.
2108 # opening the data file.
2102 dsize = 0
2109 dsize = 0
2103 if r:
2110 if r:
2104 dsize = self.end(r - 1)
2111 dsize = self.end(r - 1)
2105 dfh = None
2112 dfh = None
2106 if not self._inline:
2113 if not self._inline:
2107 try:
2114 try:
2108 dfh = self._datafp(b"r+")
2115 dfh = self._datafp(b"r+")
2109 if self._docket is None:
2116 if self._docket is None:
2110 dfh.seek(0, os.SEEK_END)
2117 dfh.seek(0, os.SEEK_END)
2111 else:
2118 else:
2112 dfh.seek(self._docket.data_end, os.SEEK_SET)
2119 dfh.seek(self._docket.data_end, os.SEEK_SET)
2113 except IOError as inst:
2120 except IOError as inst:
2114 if inst.errno != errno.ENOENT:
2121 if inst.errno != errno.ENOENT:
2115 raise
2122 raise
2116 dfh = self._datafp(b"w+")
2123 dfh = self._datafp(b"w+")
2117 transaction.add(self._datafile, dsize)
2124 transaction.add(self._datafile, dsize)
2118 if self._sidedatafile is not None:
2125 if self._sidedatafile is not None:
2119 # revlog-v2 does not inline, help Pytype
2126 # revlog-v2 does not inline, help Pytype
2120 assert dfh is not None
2127 assert dfh is not None
2121 try:
2128 try:
2122 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2129 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2123 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2130 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2124 except IOError as inst:
2131 except IOError as inst:
2125 if inst.errno != errno.ENOENT:
2132 if inst.errno != errno.ENOENT:
2126 raise
2133 raise
2127 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2134 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2128 transaction.add(
2135 transaction.add(
2129 self._sidedatafile, self._docket.sidedata_end
2136 self._sidedatafile, self._docket.sidedata_end
2130 )
2137 )
2131
2138
2132 # opening the index file.
2139 # opening the index file.
2133 isize = r * self.index.entry_size
2140 isize = r * self.index.entry_size
2134 ifh = self.__index_write_fp()
2141 ifh = self.__index_write_fp()
2135 if self._inline:
2142 if self._inline:
2136 transaction.add(self._indexfile, dsize + isize)
2143 transaction.add(self._indexfile, dsize + isize)
2137 else:
2144 else:
2138 transaction.add(self._indexfile, isize)
2145 transaction.add(self._indexfile, isize)
2139 # exposing all file handle for writing.
2146 # exposing all file handle for writing.
2140 self._writinghandles = (ifh, dfh, sdfh)
2147 self._writinghandles = (ifh, dfh, sdfh)
2141 self._segmentfile.writing_handle = ifh if self._inline else dfh
2148 self._segmentfile.writing_handle = ifh if self._inline else dfh
2142 self._segmentfile_sidedata.writing_handle = sdfh
2149 self._segmentfile_sidedata.writing_handle = sdfh
2143 yield
2150 yield
2144 if self._docket is not None:
2151 if self._docket is not None:
2145 self._write_docket(transaction)
2152 self._write_docket(transaction)
2146 finally:
2153 finally:
2147 self._writinghandles = None
2154 self._writinghandles = None
2148 self._segmentfile.writing_handle = None
2155 self._segmentfile.writing_handle = None
2149 self._segmentfile_sidedata.writing_handle = None
2156 self._segmentfile_sidedata.writing_handle = None
2150 if dfh is not None:
2157 if dfh is not None:
2151 dfh.close()
2158 dfh.close()
2152 if sdfh is not None:
2159 if sdfh is not None:
2153 sdfh.close()
2160 sdfh.close()
2154 # closing the index file last to avoid exposing referent to
2161 # closing the index file last to avoid exposing referent to
2155 # potential unflushed data content.
2162 # potential unflushed data content.
2156 if ifh is not None:
2163 if ifh is not None:
2157 ifh.close()
2164 ifh.close()
2158
2165
2159 def _write_docket(self, transaction):
2166 def _write_docket(self, transaction):
2160 """write the current docket on disk
2167 """write the current docket on disk
2161
2168
2162 Exist as a method to help changelog to implement transaction logic
2169 Exist as a method to help changelog to implement transaction logic
2163
2170
2164 We could also imagine using the same transaction logic for all revlog
2171 We could also imagine using the same transaction logic for all revlog
2165 since docket are cheap."""
2172 since docket are cheap."""
2166 self._docket.write(transaction)
2173 self._docket.write(transaction)
2167
2174
2168 def addrevision(
2175 def addrevision(
2169 self,
2176 self,
2170 text,
2177 text,
2171 transaction,
2178 transaction,
2172 link,
2179 link,
2173 p1,
2180 p1,
2174 p2,
2181 p2,
2175 cachedelta=None,
2182 cachedelta=None,
2176 node=None,
2183 node=None,
2177 flags=REVIDX_DEFAULT_FLAGS,
2184 flags=REVIDX_DEFAULT_FLAGS,
2178 deltacomputer=None,
2185 deltacomputer=None,
2179 sidedata=None,
2186 sidedata=None,
2180 ):
2187 ):
2181 """add a revision to the log
2188 """add a revision to the log
2182
2189
2183 text - the revision data to add
2190 text - the revision data to add
2184 transaction - the transaction object used for rollback
2191 transaction - the transaction object used for rollback
2185 link - the linkrev data to add
2192 link - the linkrev data to add
2186 p1, p2 - the parent nodeids of the revision
2193 p1, p2 - the parent nodeids of the revision
2187 cachedelta - an optional precomputed delta
2194 cachedelta - an optional precomputed delta
2188 node - nodeid of revision; typically node is not specified, and it is
2195 node - nodeid of revision; typically node is not specified, and it is
2189 computed by default as hash(text, p1, p2), however subclasses might
2196 computed by default as hash(text, p1, p2), however subclasses might
2190 use different hashing method (and override checkhash() in such case)
2197 use different hashing method (and override checkhash() in such case)
2191 flags - the known flags to set on the revision
2198 flags - the known flags to set on the revision
2192 deltacomputer - an optional deltacomputer instance shared between
2199 deltacomputer - an optional deltacomputer instance shared between
2193 multiple calls
2200 multiple calls
2194 """
2201 """
2195 if link == nullrev:
2202 if link == nullrev:
2196 raise error.RevlogError(
2203 raise error.RevlogError(
2197 _(b"attempted to add linkrev -1 to %s") % self.display_id
2204 _(b"attempted to add linkrev -1 to %s") % self.display_id
2198 )
2205 )
2199
2206
2200 if sidedata is None:
2207 if sidedata is None:
2201 sidedata = {}
2208 sidedata = {}
2202 elif sidedata and not self.hassidedata:
2209 elif sidedata and not self.hassidedata:
2203 raise error.ProgrammingError(
2210 raise error.ProgrammingError(
2204 _(b"trying to add sidedata to a revlog who don't support them")
2211 _(b"trying to add sidedata to a revlog who don't support them")
2205 )
2212 )
2206
2213
2207 if flags:
2214 if flags:
2208 node = node or self.hash(text, p1, p2)
2215 node = node or self.hash(text, p1, p2)
2209
2216
2210 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2217 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2211
2218
2212 # If the flag processor modifies the revision data, ignore any provided
2219 # If the flag processor modifies the revision data, ignore any provided
2213 # cachedelta.
2220 # cachedelta.
2214 if rawtext != text:
2221 if rawtext != text:
2215 cachedelta = None
2222 cachedelta = None
2216
2223
2217 if len(rawtext) > _maxentrysize:
2224 if len(rawtext) > _maxentrysize:
2218 raise error.RevlogError(
2225 raise error.RevlogError(
2219 _(
2226 _(
2220 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2227 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2221 )
2228 )
2222 % (self.display_id, len(rawtext))
2229 % (self.display_id, len(rawtext))
2223 )
2230 )
2224
2231
2225 node = node or self.hash(rawtext, p1, p2)
2232 node = node or self.hash(rawtext, p1, p2)
2226 rev = self.index.get_rev(node)
2233 rev = self.index.get_rev(node)
2227 if rev is not None:
2234 if rev is not None:
2228 return rev
2235 return rev
2229
2236
2230 if validatehash:
2237 if validatehash:
2231 self.checkhash(rawtext, node, p1=p1, p2=p2)
2238 self.checkhash(rawtext, node, p1=p1, p2=p2)
2232
2239
2233 return self.addrawrevision(
2240 return self.addrawrevision(
2234 rawtext,
2241 rawtext,
2235 transaction,
2242 transaction,
2236 link,
2243 link,
2237 p1,
2244 p1,
2238 p2,
2245 p2,
2239 node,
2246 node,
2240 flags,
2247 flags,
2241 cachedelta=cachedelta,
2248 cachedelta=cachedelta,
2242 deltacomputer=deltacomputer,
2249 deltacomputer=deltacomputer,
2243 sidedata=sidedata,
2250 sidedata=sidedata,
2244 )
2251 )
2245
2252
2246 def addrawrevision(
2253 def addrawrevision(
2247 self,
2254 self,
2248 rawtext,
2255 rawtext,
2249 transaction,
2256 transaction,
2250 link,
2257 link,
2251 p1,
2258 p1,
2252 p2,
2259 p2,
2253 node,
2260 node,
2254 flags,
2261 flags,
2255 cachedelta=None,
2262 cachedelta=None,
2256 deltacomputer=None,
2263 deltacomputer=None,
2257 sidedata=None,
2264 sidedata=None,
2258 ):
2265 ):
2259 """add a raw revision with known flags, node and parents
2266 """add a raw revision with known flags, node and parents
2260 useful when reusing a revision not stored in this revlog (ex: received
2267 useful when reusing a revision not stored in this revlog (ex: received
2261 over wire, or read from an external bundle).
2268 over wire, or read from an external bundle).
2262 """
2269 """
2263 with self._writing(transaction):
2270 with self._writing(transaction):
2264 return self._addrevision(
2271 return self._addrevision(
2265 node,
2272 node,
2266 rawtext,
2273 rawtext,
2267 transaction,
2274 transaction,
2268 link,
2275 link,
2269 p1,
2276 p1,
2270 p2,
2277 p2,
2271 flags,
2278 flags,
2272 cachedelta,
2279 cachedelta,
2273 deltacomputer=deltacomputer,
2280 deltacomputer=deltacomputer,
2274 sidedata=sidedata,
2281 sidedata=sidedata,
2275 )
2282 )
2276
2283
2277 def compress(self, data):
2284 def compress(self, data):
2278 """Generate a possibly-compressed representation of data."""
2285 """Generate a possibly-compressed representation of data."""
2279 if not data:
2286 if not data:
2280 return b'', data
2287 return b'', data
2281
2288
2282 compressed = self._compressor.compress(data)
2289 compressed = self._compressor.compress(data)
2283
2290
2284 if compressed:
2291 if compressed:
2285 # The revlog compressor added the header in the returned data.
2292 # The revlog compressor added the header in the returned data.
2286 return b'', compressed
2293 return b'', compressed
2287
2294
2288 if data[0:1] == b'\0':
2295 if data[0:1] == b'\0':
2289 return b'', data
2296 return b'', data
2290 return b'u', data
2297 return b'u', data
2291
2298
2292 def decompress(self, data):
2299 def decompress(self, data):
2293 """Decompress a revlog chunk.
2300 """Decompress a revlog chunk.
2294
2301
2295 The chunk is expected to begin with a header identifying the
2302 The chunk is expected to begin with a header identifying the
2296 format type so it can be routed to an appropriate decompressor.
2303 format type so it can be routed to an appropriate decompressor.
2297 """
2304 """
2298 if not data:
2305 if not data:
2299 return data
2306 return data
2300
2307
2301 # Revlogs are read much more frequently than they are written and many
2308 # Revlogs are read much more frequently than they are written and many
2302 # chunks only take microseconds to decompress, so performance is
2309 # chunks only take microseconds to decompress, so performance is
2303 # important here.
2310 # important here.
2304 #
2311 #
2305 # We can make a few assumptions about revlogs:
2312 # We can make a few assumptions about revlogs:
2306 #
2313 #
2307 # 1) the majority of chunks will be compressed (as opposed to inline
2314 # 1) the majority of chunks will be compressed (as opposed to inline
2308 # raw data).
2315 # raw data).
2309 # 2) decompressing *any* data will likely by at least 10x slower than
2316 # 2) decompressing *any* data will likely by at least 10x slower than
2310 # returning raw inline data.
2317 # returning raw inline data.
2311 # 3) we want to prioritize common and officially supported compression
2318 # 3) we want to prioritize common and officially supported compression
2312 # engines
2319 # engines
2313 #
2320 #
2314 # It follows that we want to optimize for "decompress compressed data
2321 # It follows that we want to optimize for "decompress compressed data
2315 # when encoded with common and officially supported compression engines"
2322 # when encoded with common and officially supported compression engines"
2316 # case over "raw data" and "data encoded by less common or non-official
2323 # case over "raw data" and "data encoded by less common or non-official
2317 # compression engines." That is why we have the inline lookup first
2324 # compression engines." That is why we have the inline lookup first
2318 # followed by the compengines lookup.
2325 # followed by the compengines lookup.
2319 #
2326 #
2320 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2327 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2321 # compressed chunks. And this matters for changelog and manifest reads.
2328 # compressed chunks. And this matters for changelog and manifest reads.
2322 t = data[0:1]
2329 t = data[0:1]
2323
2330
2324 if t == b'x':
2331 if t == b'x':
2325 try:
2332 try:
2326 return _zlibdecompress(data)
2333 return _zlibdecompress(data)
2327 except zlib.error as e:
2334 except zlib.error as e:
2328 raise error.RevlogError(
2335 raise error.RevlogError(
2329 _(b'revlog decompress error: %s')
2336 _(b'revlog decompress error: %s')
2330 % stringutil.forcebytestr(e)
2337 % stringutil.forcebytestr(e)
2331 )
2338 )
2332 # '\0' is more common than 'u' so it goes first.
2339 # '\0' is more common than 'u' so it goes first.
2333 elif t == b'\0':
2340 elif t == b'\0':
2334 return data
2341 return data
2335 elif t == b'u':
2342 elif t == b'u':
2336 return util.buffer(data, 1)
2343 return util.buffer(data, 1)
2337
2344
2338 compressor = self._get_decompressor(t)
2345 compressor = self._get_decompressor(t)
2339
2346
2340 return compressor.decompress(data)
2347 return compressor.decompress(data)
2341
2348
2342 def _addrevision(
2349 def _addrevision(
2343 self,
2350 self,
2344 node,
2351 node,
2345 rawtext,
2352 rawtext,
2346 transaction,
2353 transaction,
2347 link,
2354 link,
2348 p1,
2355 p1,
2349 p2,
2356 p2,
2350 flags,
2357 flags,
2351 cachedelta,
2358 cachedelta,
2352 alwayscache=False,
2359 alwayscache=False,
2353 deltacomputer=None,
2360 deltacomputer=None,
2354 sidedata=None,
2361 sidedata=None,
2355 ):
2362 ):
2356 """internal function to add revisions to the log
2363 """internal function to add revisions to the log
2357
2364
2358 see addrevision for argument descriptions.
2365 see addrevision for argument descriptions.
2359
2366
2360 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2367 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2361
2368
2362 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2369 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2363 be used.
2370 be used.
2364
2371
2365 invariants:
2372 invariants:
2366 - rawtext is optional (can be None); if not set, cachedelta must be set.
2373 - rawtext is optional (can be None); if not set, cachedelta must be set.
2367 if both are set, they must correspond to each other.
2374 if both are set, they must correspond to each other.
2368 """
2375 """
2369 if node == self.nullid:
2376 if node == self.nullid:
2370 raise error.RevlogError(
2377 raise error.RevlogError(
2371 _(b"%s: attempt to add null revision") % self.display_id
2378 _(b"%s: attempt to add null revision") % self.display_id
2372 )
2379 )
2373 if (
2380 if (
2374 node == self.nodeconstants.wdirid
2381 node == self.nodeconstants.wdirid
2375 or node in self.nodeconstants.wdirfilenodeids
2382 or node in self.nodeconstants.wdirfilenodeids
2376 ):
2383 ):
2377 raise error.RevlogError(
2384 raise error.RevlogError(
2378 _(b"%s: attempt to add wdir revision") % self.display_id
2385 _(b"%s: attempt to add wdir revision") % self.display_id
2379 )
2386 )
2380 if self._writinghandles is None:
2387 if self._writinghandles is None:
2381 msg = b'adding revision outside `revlog._writing` context'
2388 msg = b'adding revision outside `revlog._writing` context'
2382 raise error.ProgrammingError(msg)
2389 raise error.ProgrammingError(msg)
2383
2390
2384 if self._inline:
2391 if self._inline:
2385 fh = self._writinghandles[0]
2392 fh = self._writinghandles[0]
2386 else:
2393 else:
2387 fh = self._writinghandles[1]
2394 fh = self._writinghandles[1]
2388
2395
2389 btext = [rawtext]
2396 btext = [rawtext]
2390
2397
2391 curr = len(self)
2398 curr = len(self)
2392 prev = curr - 1
2399 prev = curr - 1
2393
2400
2394 offset = self._get_data_offset(prev)
2401 offset = self._get_data_offset(prev)
2395
2402
2396 if self._concurrencychecker:
2403 if self._concurrencychecker:
2397 ifh, dfh, sdfh = self._writinghandles
2404 ifh, dfh, sdfh = self._writinghandles
2398 # XXX no checking for the sidedata file
2405 # XXX no checking for the sidedata file
2399 if self._inline:
2406 if self._inline:
2400 # offset is "as if" it were in the .d file, so we need to add on
2407 # offset is "as if" it were in the .d file, so we need to add on
2401 # the size of the entry metadata.
2408 # the size of the entry metadata.
2402 self._concurrencychecker(
2409 self._concurrencychecker(
2403 ifh, self._indexfile, offset + curr * self.index.entry_size
2410 ifh, self._indexfile, offset + curr * self.index.entry_size
2404 )
2411 )
2405 else:
2412 else:
2406 # Entries in the .i are a consistent size.
2413 # Entries in the .i are a consistent size.
2407 self._concurrencychecker(
2414 self._concurrencychecker(
2408 ifh, self._indexfile, curr * self.index.entry_size
2415 ifh, self._indexfile, curr * self.index.entry_size
2409 )
2416 )
2410 self._concurrencychecker(dfh, self._datafile, offset)
2417 self._concurrencychecker(dfh, self._datafile, offset)
2411
2418
2412 p1r, p2r = self.rev(p1), self.rev(p2)
2419 p1r, p2r = self.rev(p1), self.rev(p2)
2413
2420
2414 # full versions are inserted when the needed deltas
2421 # full versions are inserted when the needed deltas
2415 # become comparable to the uncompressed text
2422 # become comparable to the uncompressed text
2416 if rawtext is None:
2423 if rawtext is None:
2417 # need rawtext size, before changed by flag processors, which is
2424 # need rawtext size, before changed by flag processors, which is
2418 # the non-raw size. use revlog explicitly to avoid filelog's extra
2425 # the non-raw size. use revlog explicitly to avoid filelog's extra
2419 # logic that might remove metadata size.
2426 # logic that might remove metadata size.
2420 textlen = mdiff.patchedsize(
2427 textlen = mdiff.patchedsize(
2421 revlog.size(self, cachedelta[0]), cachedelta[1]
2428 revlog.size(self, cachedelta[0]), cachedelta[1]
2422 )
2429 )
2423 else:
2430 else:
2424 textlen = len(rawtext)
2431 textlen = len(rawtext)
2425
2432
2426 if deltacomputer is None:
2433 if deltacomputer is None:
2427 deltacomputer = deltautil.deltacomputer(self)
2434 deltacomputer = deltautil.deltacomputer(self)
2428
2435
2429 revinfo = revlogutils.revisioninfo(
2436 revinfo = revlogutils.revisioninfo(
2430 node,
2437 node,
2431 p1,
2438 p1,
2432 p2,
2439 p2,
2433 btext,
2440 btext,
2434 textlen,
2441 textlen,
2435 cachedelta,
2442 cachedelta,
2436 flags,
2443 flags,
2437 )
2444 )
2438
2445
2439 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2446 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2440
2447
2441 compression_mode = COMP_MODE_INLINE
2448 compression_mode = COMP_MODE_INLINE
2442 if self._docket is not None:
2449 if self._docket is not None:
2443 default_comp = self._docket.default_compression_header
2450 default_comp = self._docket.default_compression_header
2444 r = deltautil.delta_compression(default_comp, deltainfo)
2451 r = deltautil.delta_compression(default_comp, deltainfo)
2445 compression_mode, deltainfo = r
2452 compression_mode, deltainfo = r
2446
2453
2447 sidedata_compression_mode = COMP_MODE_INLINE
2454 sidedata_compression_mode = COMP_MODE_INLINE
2448 if sidedata and self.hassidedata:
2455 if sidedata and self.hassidedata:
2449 sidedata_compression_mode = COMP_MODE_PLAIN
2456 sidedata_compression_mode = COMP_MODE_PLAIN
2450 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2457 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2451 sidedata_offset = self._docket.sidedata_end
2458 sidedata_offset = self._docket.sidedata_end
2452 h, comp_sidedata = self.compress(serialized_sidedata)
2459 h, comp_sidedata = self.compress(serialized_sidedata)
2453 if (
2460 if (
2454 h != b'u'
2461 h != b'u'
2455 and comp_sidedata[0:1] != b'\0'
2462 and comp_sidedata[0:1] != b'\0'
2456 and len(comp_sidedata) < len(serialized_sidedata)
2463 and len(comp_sidedata) < len(serialized_sidedata)
2457 ):
2464 ):
2458 assert not h
2465 assert not h
2459 if (
2466 if (
2460 comp_sidedata[0:1]
2467 comp_sidedata[0:1]
2461 == self._docket.default_compression_header
2468 == self._docket.default_compression_header
2462 ):
2469 ):
2463 sidedata_compression_mode = COMP_MODE_DEFAULT
2470 sidedata_compression_mode = COMP_MODE_DEFAULT
2464 serialized_sidedata = comp_sidedata
2471 serialized_sidedata = comp_sidedata
2465 else:
2472 else:
2466 sidedata_compression_mode = COMP_MODE_INLINE
2473 sidedata_compression_mode = COMP_MODE_INLINE
2467 serialized_sidedata = comp_sidedata
2474 serialized_sidedata = comp_sidedata
2468 else:
2475 else:
2469 serialized_sidedata = b""
2476 serialized_sidedata = b""
2470 # Don't store the offset if the sidedata is empty, that way
2477 # Don't store the offset if the sidedata is empty, that way
2471 # we can easily detect empty sidedata and they will be no different
2478 # we can easily detect empty sidedata and they will be no different
2472 # than ones we manually add.
2479 # than ones we manually add.
2473 sidedata_offset = 0
2480 sidedata_offset = 0
2474
2481
2475 e = revlogutils.entry(
2482 e = revlogutils.entry(
2476 flags=flags,
2483 flags=flags,
2477 data_offset=offset,
2484 data_offset=offset,
2478 data_compressed_length=deltainfo.deltalen,
2485 data_compressed_length=deltainfo.deltalen,
2479 data_uncompressed_length=textlen,
2486 data_uncompressed_length=textlen,
2480 data_compression_mode=compression_mode,
2487 data_compression_mode=compression_mode,
2481 data_delta_base=deltainfo.base,
2488 data_delta_base=deltainfo.base,
2482 link_rev=link,
2489 link_rev=link,
2483 parent_rev_1=p1r,
2490 parent_rev_1=p1r,
2484 parent_rev_2=p2r,
2491 parent_rev_2=p2r,
2485 node_id=node,
2492 node_id=node,
2486 sidedata_offset=sidedata_offset,
2493 sidedata_offset=sidedata_offset,
2487 sidedata_compressed_length=len(serialized_sidedata),
2494 sidedata_compressed_length=len(serialized_sidedata),
2488 sidedata_compression_mode=sidedata_compression_mode,
2495 sidedata_compression_mode=sidedata_compression_mode,
2489 )
2496 )
2490
2497
2491 self.index.append(e)
2498 self.index.append(e)
2492 entry = self.index.entry_binary(curr)
2499 entry = self.index.entry_binary(curr)
2493 if curr == 0 and self._docket is None:
2500 if curr == 0 and self._docket is None:
2494 header = self._format_flags | self._format_version
2501 header = self._format_flags | self._format_version
2495 header = self.index.pack_header(header)
2502 header = self.index.pack_header(header)
2496 entry = header + entry
2503 entry = header + entry
2497 self._writeentry(
2504 self._writeentry(
2498 transaction,
2505 transaction,
2499 entry,
2506 entry,
2500 deltainfo.data,
2507 deltainfo.data,
2501 link,
2508 link,
2502 offset,
2509 offset,
2503 serialized_sidedata,
2510 serialized_sidedata,
2504 sidedata_offset,
2511 sidedata_offset,
2505 )
2512 )
2506
2513
2507 rawtext = btext[0]
2514 rawtext = btext[0]
2508
2515
2509 if alwayscache and rawtext is None:
2516 if alwayscache and rawtext is None:
2510 rawtext = deltacomputer.buildtext(revinfo, fh)
2517 rawtext = deltacomputer.buildtext(revinfo, fh)
2511
2518
2512 if type(rawtext) == bytes: # only accept immutable objects
2519 if type(rawtext) == bytes: # only accept immutable objects
2513 self._revisioncache = (node, curr, rawtext)
2520 self._revisioncache = (node, curr, rawtext)
2514 self._chainbasecache[curr] = deltainfo.chainbase
2521 self._chainbasecache[curr] = deltainfo.chainbase
2515 return curr
2522 return curr
2516
2523
2517 def _get_data_offset(self, prev):
2524 def _get_data_offset(self, prev):
2518 """Returns the current offset in the (in-transaction) data file.
2525 """Returns the current offset in the (in-transaction) data file.
2519 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2526 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2520 file to store that information: since sidedata can be rewritten to the
2527 file to store that information: since sidedata can be rewritten to the
2521 end of the data file within a transaction, you can have cases where, for
2528 end of the data file within a transaction, you can have cases where, for
2522 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2529 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2523 to `n - 1`'s sidedata being written after `n`'s data.
2530 to `n - 1`'s sidedata being written after `n`'s data.
2524
2531
2525 TODO cache this in a docket file before getting out of experimental."""
2532 TODO cache this in a docket file before getting out of experimental."""
2526 if self._docket is None:
2533 if self._docket is None:
2527 return self.end(prev)
2534 return self.end(prev)
2528 else:
2535 else:
2529 return self._docket.data_end
2536 return self._docket.data_end
2530
2537
2531 def _writeentry(
2538 def _writeentry(
2532 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2539 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2533 ):
2540 ):
2534 # Files opened in a+ mode have inconsistent behavior on various
2541 # Files opened in a+ mode have inconsistent behavior on various
2535 # platforms. Windows requires that a file positioning call be made
2542 # platforms. Windows requires that a file positioning call be made
2536 # when the file handle transitions between reads and writes. See
2543 # when the file handle transitions between reads and writes. See
2537 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2544 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2538 # platforms, Python or the platform itself can be buggy. Some versions
2545 # platforms, Python or the platform itself can be buggy. Some versions
2539 # of Solaris have been observed to not append at the end of the file
2546 # of Solaris have been observed to not append at the end of the file
2540 # if the file was seeked to before the end. See issue4943 for more.
2547 # if the file was seeked to before the end. See issue4943 for more.
2541 #
2548 #
2542 # We work around this issue by inserting a seek() before writing.
2549 # We work around this issue by inserting a seek() before writing.
2543 # Note: This is likely not necessary on Python 3. However, because
2550 # Note: This is likely not necessary on Python 3. However, because
2544 # the file handle is reused for reads and may be seeked there, we need
2551 # the file handle is reused for reads and may be seeked there, we need
2545 # to be careful before changing this.
2552 # to be careful before changing this.
2546 if self._writinghandles is None:
2553 if self._writinghandles is None:
2547 msg = b'adding revision outside `revlog._writing` context'
2554 msg = b'adding revision outside `revlog._writing` context'
2548 raise error.ProgrammingError(msg)
2555 raise error.ProgrammingError(msg)
2549 ifh, dfh, sdfh = self._writinghandles
2556 ifh, dfh, sdfh = self._writinghandles
2550 if self._docket is None:
2557 if self._docket is None:
2551 ifh.seek(0, os.SEEK_END)
2558 ifh.seek(0, os.SEEK_END)
2552 else:
2559 else:
2553 ifh.seek(self._docket.index_end, os.SEEK_SET)
2560 ifh.seek(self._docket.index_end, os.SEEK_SET)
2554 if dfh:
2561 if dfh:
2555 if self._docket is None:
2562 if self._docket is None:
2556 dfh.seek(0, os.SEEK_END)
2563 dfh.seek(0, os.SEEK_END)
2557 else:
2564 else:
2558 dfh.seek(self._docket.data_end, os.SEEK_SET)
2565 dfh.seek(self._docket.data_end, os.SEEK_SET)
2559 if sdfh:
2566 if sdfh:
2560 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2567 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2561
2568
2562 curr = len(self) - 1
2569 curr = len(self) - 1
2563 if not self._inline:
2570 if not self._inline:
2564 transaction.add(self._datafile, offset)
2571 transaction.add(self._datafile, offset)
2565 if self._sidedatafile:
2572 if self._sidedatafile:
2566 transaction.add(self._sidedatafile, sidedata_offset)
2573 transaction.add(self._sidedatafile, sidedata_offset)
2567 transaction.add(self._indexfile, curr * len(entry))
2574 transaction.add(self._indexfile, curr * len(entry))
2568 if data[0]:
2575 if data[0]:
2569 dfh.write(data[0])
2576 dfh.write(data[0])
2570 dfh.write(data[1])
2577 dfh.write(data[1])
2571 if sidedata:
2578 if sidedata:
2572 sdfh.write(sidedata)
2579 sdfh.write(sidedata)
2573 ifh.write(entry)
2580 ifh.write(entry)
2574 else:
2581 else:
2575 offset += curr * self.index.entry_size
2582 offset += curr * self.index.entry_size
2576 transaction.add(self._indexfile, offset)
2583 transaction.add(self._indexfile, offset)
2577 ifh.write(entry)
2584 ifh.write(entry)
2578 ifh.write(data[0])
2585 ifh.write(data[0])
2579 ifh.write(data[1])
2586 ifh.write(data[1])
2580 assert not sidedata
2587 assert not sidedata
2581 self._enforceinlinesize(transaction)
2588 self._enforceinlinesize(transaction)
2582 if self._docket is not None:
2589 if self._docket is not None:
2583 # revlog-v2 always has 3 writing handles, help Pytype
2590 # revlog-v2 always has 3 writing handles, help Pytype
2584 wh1 = self._writinghandles[0]
2591 wh1 = self._writinghandles[0]
2585 wh2 = self._writinghandles[1]
2592 wh2 = self._writinghandles[1]
2586 wh3 = self._writinghandles[2]
2593 wh3 = self._writinghandles[2]
2587 assert wh1 is not None
2594 assert wh1 is not None
2588 assert wh2 is not None
2595 assert wh2 is not None
2589 assert wh3 is not None
2596 assert wh3 is not None
2590 self._docket.index_end = wh1.tell()
2597 self._docket.index_end = wh1.tell()
2591 self._docket.data_end = wh2.tell()
2598 self._docket.data_end = wh2.tell()
2592 self._docket.sidedata_end = wh3.tell()
2599 self._docket.sidedata_end = wh3.tell()
2593
2600
2594 nodemaputil.setup_persistent_nodemap(transaction, self)
2601 nodemaputil.setup_persistent_nodemap(transaction, self)
2595
2602
2596 def addgroup(
2603 def addgroup(
2597 self,
2604 self,
2598 deltas,
2605 deltas,
2599 linkmapper,
2606 linkmapper,
2600 transaction,
2607 transaction,
2601 alwayscache=False,
2608 alwayscache=False,
2602 addrevisioncb=None,
2609 addrevisioncb=None,
2603 duplicaterevisioncb=None,
2610 duplicaterevisioncb=None,
2604 ):
2611 ):
2605 """
2612 """
2606 add a delta group
2613 add a delta group
2607
2614
2608 given a set of deltas, add them to the revision log. the
2615 given a set of deltas, add them to the revision log. the
2609 first delta is against its parent, which should be in our
2616 first delta is against its parent, which should be in our
2610 log, the rest are against the previous delta.
2617 log, the rest are against the previous delta.
2611
2618
2612 If ``addrevisioncb`` is defined, it will be called with arguments of
2619 If ``addrevisioncb`` is defined, it will be called with arguments of
2613 this revlog and the node that was added.
2620 this revlog and the node that was added.
2614 """
2621 """
2615
2622
2616 if self._adding_group:
2623 if self._adding_group:
2617 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2624 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2618
2625
2619 self._adding_group = True
2626 self._adding_group = True
2620 empty = True
2627 empty = True
2621 try:
2628 try:
2622 with self._writing(transaction):
2629 with self._writing(transaction):
2623 deltacomputer = deltautil.deltacomputer(self)
2630 deltacomputer = deltautil.deltacomputer(self)
2624 # loop through our set of deltas
2631 # loop through our set of deltas
2625 for data in deltas:
2632 for data in deltas:
2626 (
2633 (
2627 node,
2634 node,
2628 p1,
2635 p1,
2629 p2,
2636 p2,
2630 linknode,
2637 linknode,
2631 deltabase,
2638 deltabase,
2632 delta,
2639 delta,
2633 flags,
2640 flags,
2634 sidedata,
2641 sidedata,
2635 ) = data
2642 ) = data
2636 link = linkmapper(linknode)
2643 link = linkmapper(linknode)
2637 flags = flags or REVIDX_DEFAULT_FLAGS
2644 flags = flags or REVIDX_DEFAULT_FLAGS
2638
2645
2639 rev = self.index.get_rev(node)
2646 rev = self.index.get_rev(node)
2640 if rev is not None:
2647 if rev is not None:
2641 # this can happen if two branches make the same change
2648 # this can happen if two branches make the same change
2642 self._nodeduplicatecallback(transaction, rev)
2649 self._nodeduplicatecallback(transaction, rev)
2643 if duplicaterevisioncb:
2650 if duplicaterevisioncb:
2644 duplicaterevisioncb(self, rev)
2651 duplicaterevisioncb(self, rev)
2645 empty = False
2652 empty = False
2646 continue
2653 continue
2647
2654
2648 for p in (p1, p2):
2655 for p in (p1, p2):
2649 if not self.index.has_node(p):
2656 if not self.index.has_node(p):
2650 raise error.LookupError(
2657 raise error.LookupError(
2651 p, self.radix, _(b'unknown parent')
2658 p, self.radix, _(b'unknown parent')
2652 )
2659 )
2653
2660
2654 if not self.index.has_node(deltabase):
2661 if not self.index.has_node(deltabase):
2655 raise error.LookupError(
2662 raise error.LookupError(
2656 deltabase, self.display_id, _(b'unknown delta base')
2663 deltabase, self.display_id, _(b'unknown delta base')
2657 )
2664 )
2658
2665
2659 baserev = self.rev(deltabase)
2666 baserev = self.rev(deltabase)
2660
2667
2661 if baserev != nullrev and self.iscensored(baserev):
2668 if baserev != nullrev and self.iscensored(baserev):
2662 # if base is censored, delta must be full replacement in a
2669 # if base is censored, delta must be full replacement in a
2663 # single patch operation
2670 # single patch operation
2664 hlen = struct.calcsize(b">lll")
2671 hlen = struct.calcsize(b">lll")
2665 oldlen = self.rawsize(baserev)
2672 oldlen = self.rawsize(baserev)
2666 newlen = len(delta) - hlen
2673 newlen = len(delta) - hlen
2667 if delta[:hlen] != mdiff.replacediffheader(
2674 if delta[:hlen] != mdiff.replacediffheader(
2668 oldlen, newlen
2675 oldlen, newlen
2669 ):
2676 ):
2670 raise error.CensoredBaseError(
2677 raise error.CensoredBaseError(
2671 self.display_id, self.node(baserev)
2678 self.display_id, self.node(baserev)
2672 )
2679 )
2673
2680
2674 if not flags and self._peek_iscensored(baserev, delta):
2681 if not flags and self._peek_iscensored(baserev, delta):
2675 flags |= REVIDX_ISCENSORED
2682 flags |= REVIDX_ISCENSORED
2676
2683
2677 # We assume consumers of addrevisioncb will want to retrieve
2684 # We assume consumers of addrevisioncb will want to retrieve
2678 # the added revision, which will require a call to
2685 # the added revision, which will require a call to
2679 # revision(). revision() will fast path if there is a cache
2686 # revision(). revision() will fast path if there is a cache
2680 # hit. So, we tell _addrevision() to always cache in this case.
2687 # hit. So, we tell _addrevision() to always cache in this case.
2681 # We're only using addgroup() in the context of changegroup
2688 # We're only using addgroup() in the context of changegroup
2682 # generation so the revision data can always be handled as raw
2689 # generation so the revision data can always be handled as raw
2683 # by the flagprocessor.
2690 # by the flagprocessor.
2684 rev = self._addrevision(
2691 rev = self._addrevision(
2685 node,
2692 node,
2686 None,
2693 None,
2687 transaction,
2694 transaction,
2688 link,
2695 link,
2689 p1,
2696 p1,
2690 p2,
2697 p2,
2691 flags,
2698 flags,
2692 (baserev, delta),
2699 (baserev, delta),
2693 alwayscache=alwayscache,
2700 alwayscache=alwayscache,
2694 deltacomputer=deltacomputer,
2701 deltacomputer=deltacomputer,
2695 sidedata=sidedata,
2702 sidedata=sidedata,
2696 )
2703 )
2697
2704
2698 if addrevisioncb:
2705 if addrevisioncb:
2699 addrevisioncb(self, rev)
2706 addrevisioncb(self, rev)
2700 empty = False
2707 empty = False
2701 finally:
2708 finally:
2702 self._adding_group = False
2709 self._adding_group = False
2703 return not empty
2710 return not empty
2704
2711
2705 def iscensored(self, rev):
2712 def iscensored(self, rev):
2706 """Check if a file revision is censored."""
2713 """Check if a file revision is censored."""
2707 if not self._censorable:
2714 if not self._censorable:
2708 return False
2715 return False
2709
2716
2710 return self.flags(rev) & REVIDX_ISCENSORED
2717 return self.flags(rev) & REVIDX_ISCENSORED
2711
2718
2712 def _peek_iscensored(self, baserev, delta):
2719 def _peek_iscensored(self, baserev, delta):
2713 """Quickly check if a delta produces a censored revision."""
2720 """Quickly check if a delta produces a censored revision."""
2714 if not self._censorable:
2721 if not self._censorable:
2715 return False
2722 return False
2716
2723
2717 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2724 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2718
2725
2719 def getstrippoint(self, minlink):
2726 def getstrippoint(self, minlink):
2720 """find the minimum rev that must be stripped to strip the linkrev
2727 """find the minimum rev that must be stripped to strip the linkrev
2721
2728
2722 Returns a tuple containing the minimum rev and a set of all revs that
2729 Returns a tuple containing the minimum rev and a set of all revs that
2723 have linkrevs that will be broken by this strip.
2730 have linkrevs that will be broken by this strip.
2724 """
2731 """
2725 return storageutil.resolvestripinfo(
2732 return storageutil.resolvestripinfo(
2726 minlink,
2733 minlink,
2727 len(self) - 1,
2734 len(self) - 1,
2728 self.headrevs(),
2735 self.headrevs(),
2729 self.linkrev,
2736 self.linkrev,
2730 self.parentrevs,
2737 self.parentrevs,
2731 )
2738 )
2732
2739
2733 def strip(self, minlink, transaction):
2740 def strip(self, minlink, transaction):
2734 """truncate the revlog on the first revision with a linkrev >= minlink
2741 """truncate the revlog on the first revision with a linkrev >= minlink
2735
2742
2736 This function is called when we're stripping revision minlink and
2743 This function is called when we're stripping revision minlink and
2737 its descendants from the repository.
2744 its descendants from the repository.
2738
2745
2739 We have to remove all revisions with linkrev >= minlink, because
2746 We have to remove all revisions with linkrev >= minlink, because
2740 the equivalent changelog revisions will be renumbered after the
2747 the equivalent changelog revisions will be renumbered after the
2741 strip.
2748 strip.
2742
2749
2743 So we truncate the revlog on the first of these revisions, and
2750 So we truncate the revlog on the first of these revisions, and
2744 trust that the caller has saved the revisions that shouldn't be
2751 trust that the caller has saved the revisions that shouldn't be
2745 removed and that it'll re-add them after this truncation.
2752 removed and that it'll re-add them after this truncation.
2746 """
2753 """
2747 if len(self) == 0:
2754 if len(self) == 0:
2748 return
2755 return
2749
2756
2750 rev, _ = self.getstrippoint(minlink)
2757 rev, _ = self.getstrippoint(minlink)
2751 if rev == len(self):
2758 if rev == len(self):
2752 return
2759 return
2753
2760
2754 # first truncate the files on disk
2761 # first truncate the files on disk
2755 data_end = self.start(rev)
2762 data_end = self.start(rev)
2756 if not self._inline:
2763 if not self._inline:
2757 transaction.add(self._datafile, data_end)
2764 transaction.add(self._datafile, data_end)
2758 end = rev * self.index.entry_size
2765 end = rev * self.index.entry_size
2759 else:
2766 else:
2760 end = data_end + (rev * self.index.entry_size)
2767 end = data_end + (rev * self.index.entry_size)
2761
2768
2762 if self._sidedatafile:
2769 if self._sidedatafile:
2763 sidedata_end = self.sidedata_cut_off(rev)
2770 sidedata_end = self.sidedata_cut_off(rev)
2764 transaction.add(self._sidedatafile, sidedata_end)
2771 transaction.add(self._sidedatafile, sidedata_end)
2765
2772
2766 transaction.add(self._indexfile, end)
2773 transaction.add(self._indexfile, end)
2767 if self._docket is not None:
2774 if self._docket is not None:
2768 # XXX we could, leverage the docket while stripping. However it is
2775 # XXX we could, leverage the docket while stripping. However it is
2769 # not powerfull enough at the time of this comment
2776 # not powerfull enough at the time of this comment
2770 self._docket.index_end = end
2777 self._docket.index_end = end
2771 self._docket.data_end = data_end
2778 self._docket.data_end = data_end
2772 self._docket.sidedata_end = sidedata_end
2779 self._docket.sidedata_end = sidedata_end
2773 self._docket.write(transaction, stripping=True)
2780 self._docket.write(transaction, stripping=True)
2774
2781
2775 # then reset internal state in memory to forget those revisions
2782 # then reset internal state in memory to forget those revisions
2776 self._revisioncache = None
2783 self._revisioncache = None
2777 self._chaininfocache = util.lrucachedict(500)
2784 self._chaininfocache = util.lrucachedict(500)
2778 self._segmentfile.clear_cache()
2785 self._segmentfile.clear_cache()
2779 self._segmentfile_sidedata.clear_cache()
2786 self._segmentfile_sidedata.clear_cache()
2780
2787
2781 del self.index[rev:-1]
2788 del self.index[rev:-1]
2782
2789
2783 def checksize(self):
2790 def checksize(self):
2784 """Check size of index and data files
2791 """Check size of index and data files
2785
2792
2786 return a (dd, di) tuple.
2793 return a (dd, di) tuple.
2787 - dd: extra bytes for the "data" file
2794 - dd: extra bytes for the "data" file
2788 - di: extra bytes for the "index" file
2795 - di: extra bytes for the "index" file
2789
2796
2790 A healthy revlog will return (0, 0).
2797 A healthy revlog will return (0, 0).
2791 """
2798 """
2792 expected = 0
2799 expected = 0
2793 if len(self):
2800 if len(self):
2794 expected = max(0, self.end(len(self) - 1))
2801 expected = max(0, self.end(len(self) - 1))
2795
2802
2796 try:
2803 try:
2797 with self._datafp() as f:
2804 with self._datafp() as f:
2798 f.seek(0, io.SEEK_END)
2805 f.seek(0, io.SEEK_END)
2799 actual = f.tell()
2806 actual = f.tell()
2800 dd = actual - expected
2807 dd = actual - expected
2801 except IOError as inst:
2808 except IOError as inst:
2802 if inst.errno != errno.ENOENT:
2809 if inst.errno != errno.ENOENT:
2803 raise
2810 raise
2804 dd = 0
2811 dd = 0
2805
2812
2806 try:
2813 try:
2807 f = self.opener(self._indexfile)
2814 f = self.opener(self._indexfile)
2808 f.seek(0, io.SEEK_END)
2815 f.seek(0, io.SEEK_END)
2809 actual = f.tell()
2816 actual = f.tell()
2810 f.close()
2817 f.close()
2811 s = self.index.entry_size
2818 s = self.index.entry_size
2812 i = max(0, actual // s)
2819 i = max(0, actual // s)
2813 di = actual - (i * s)
2820 di = actual - (i * s)
2814 if self._inline:
2821 if self._inline:
2815 databytes = 0
2822 databytes = 0
2816 for r in self:
2823 for r in self:
2817 databytes += max(0, self.length(r))
2824 databytes += max(0, self.length(r))
2818 dd = 0
2825 dd = 0
2819 di = actual - len(self) * s - databytes
2826 di = actual - len(self) * s - databytes
2820 except IOError as inst:
2827 except IOError as inst:
2821 if inst.errno != errno.ENOENT:
2828 if inst.errno != errno.ENOENT:
2822 raise
2829 raise
2823 di = 0
2830 di = 0
2824
2831
2825 return (dd, di)
2832 return (dd, di)
2826
2833
2827 def files(self):
2834 def files(self):
2828 res = [self._indexfile]
2835 res = [self._indexfile]
2829 if self._docket_file is None:
2836 if self._docket_file is None:
2830 if not self._inline:
2837 if not self._inline:
2831 res.append(self._datafile)
2838 res.append(self._datafile)
2832 else:
2839 else:
2833 res.append(self._docket_file)
2840 res.append(self._docket_file)
2834 res.extend(self._docket.old_index_filepaths(include_empty=False))
2841 res.extend(self._docket.old_index_filepaths(include_empty=False))
2835 if self._docket.data_end:
2842 if self._docket.data_end:
2836 res.append(self._datafile)
2843 res.append(self._datafile)
2837 res.extend(self._docket.old_data_filepaths(include_empty=False))
2844 res.extend(self._docket.old_data_filepaths(include_empty=False))
2838 if self._docket.sidedata_end:
2845 if self._docket.sidedata_end:
2839 res.append(self._sidedatafile)
2846 res.append(self._sidedatafile)
2840 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2847 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2841 return res
2848 return res
2842
2849
2843 def emitrevisions(
2850 def emitrevisions(
2844 self,
2851 self,
2845 nodes,
2852 nodes,
2846 nodesorder=None,
2853 nodesorder=None,
2847 revisiondata=False,
2854 revisiondata=False,
2848 assumehaveparentrevisions=False,
2855 assumehaveparentrevisions=False,
2849 deltamode=repository.CG_DELTAMODE_STD,
2856 deltamode=repository.CG_DELTAMODE_STD,
2850 sidedata_helpers=None,
2857 sidedata_helpers=None,
2851 ):
2858 ):
2852 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2859 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2853 raise error.ProgrammingError(
2860 raise error.ProgrammingError(
2854 b'unhandled value for nodesorder: %s' % nodesorder
2861 b'unhandled value for nodesorder: %s' % nodesorder
2855 )
2862 )
2856
2863
2857 if nodesorder is None and not self._generaldelta:
2864 if nodesorder is None and not self._generaldelta:
2858 nodesorder = b'storage'
2865 nodesorder = b'storage'
2859
2866
2860 if (
2867 if (
2861 not self._storedeltachains
2868 not self._storedeltachains
2862 and deltamode != repository.CG_DELTAMODE_PREV
2869 and deltamode != repository.CG_DELTAMODE_PREV
2863 ):
2870 ):
2864 deltamode = repository.CG_DELTAMODE_FULL
2871 deltamode = repository.CG_DELTAMODE_FULL
2865
2872
2866 return storageutil.emitrevisions(
2873 return storageutil.emitrevisions(
2867 self,
2874 self,
2868 nodes,
2875 nodes,
2869 nodesorder,
2876 nodesorder,
2870 revlogrevisiondelta,
2877 revlogrevisiondelta,
2871 deltaparentfn=self.deltaparent,
2878 deltaparentfn=self.deltaparent,
2872 candeltafn=self.candelta,
2879 candeltafn=self.candelta,
2873 rawsizefn=self.rawsize,
2880 rawsizefn=self.rawsize,
2874 revdifffn=self.revdiff,
2881 revdifffn=self.revdiff,
2875 flagsfn=self.flags,
2882 flagsfn=self.flags,
2876 deltamode=deltamode,
2883 deltamode=deltamode,
2877 revisiondata=revisiondata,
2884 revisiondata=revisiondata,
2878 assumehaveparentrevisions=assumehaveparentrevisions,
2885 assumehaveparentrevisions=assumehaveparentrevisions,
2879 sidedata_helpers=sidedata_helpers,
2886 sidedata_helpers=sidedata_helpers,
2880 )
2887 )
2881
2888
2882 DELTAREUSEALWAYS = b'always'
2889 DELTAREUSEALWAYS = b'always'
2883 DELTAREUSESAMEREVS = b'samerevs'
2890 DELTAREUSESAMEREVS = b'samerevs'
2884 DELTAREUSENEVER = b'never'
2891 DELTAREUSENEVER = b'never'
2885
2892
2886 DELTAREUSEFULLADD = b'fulladd'
2893 DELTAREUSEFULLADD = b'fulladd'
2887
2894
2888 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2895 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2889
2896
2890 def clone(
2897 def clone(
2891 self,
2898 self,
2892 tr,
2899 tr,
2893 destrevlog,
2900 destrevlog,
2894 addrevisioncb=None,
2901 addrevisioncb=None,
2895 deltareuse=DELTAREUSESAMEREVS,
2902 deltareuse=DELTAREUSESAMEREVS,
2896 forcedeltabothparents=None,
2903 forcedeltabothparents=None,
2897 sidedata_helpers=None,
2904 sidedata_helpers=None,
2898 ):
2905 ):
2899 """Copy this revlog to another, possibly with format changes.
2906 """Copy this revlog to another, possibly with format changes.
2900
2907
2901 The destination revlog will contain the same revisions and nodes.
2908 The destination revlog will contain the same revisions and nodes.
2902 However, it may not be bit-for-bit identical due to e.g. delta encoding
2909 However, it may not be bit-for-bit identical due to e.g. delta encoding
2903 differences.
2910 differences.
2904
2911
2905 The ``deltareuse`` argument control how deltas from the existing revlog
2912 The ``deltareuse`` argument control how deltas from the existing revlog
2906 are preserved in the destination revlog. The argument can have the
2913 are preserved in the destination revlog. The argument can have the
2907 following values:
2914 following values:
2908
2915
2909 DELTAREUSEALWAYS
2916 DELTAREUSEALWAYS
2910 Deltas will always be reused (if possible), even if the destination
2917 Deltas will always be reused (if possible), even if the destination
2911 revlog would not select the same revisions for the delta. This is the
2918 revlog would not select the same revisions for the delta. This is the
2912 fastest mode of operation.
2919 fastest mode of operation.
2913 DELTAREUSESAMEREVS
2920 DELTAREUSESAMEREVS
2914 Deltas will be reused if the destination revlog would pick the same
2921 Deltas will be reused if the destination revlog would pick the same
2915 revisions for the delta. This mode strikes a balance between speed
2922 revisions for the delta. This mode strikes a balance between speed
2916 and optimization.
2923 and optimization.
2917 DELTAREUSENEVER
2924 DELTAREUSENEVER
2918 Deltas will never be reused. This is the slowest mode of execution.
2925 Deltas will never be reused. This is the slowest mode of execution.
2919 This mode can be used to recompute deltas (e.g. if the diff/delta
2926 This mode can be used to recompute deltas (e.g. if the diff/delta
2920 algorithm changes).
2927 algorithm changes).
2921 DELTAREUSEFULLADD
2928 DELTAREUSEFULLADD
2922 Revision will be re-added as if their were new content. This is
2929 Revision will be re-added as if their were new content. This is
2923 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2930 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2924 eg: large file detection and handling.
2931 eg: large file detection and handling.
2925
2932
2926 Delta computation can be slow, so the choice of delta reuse policy can
2933 Delta computation can be slow, so the choice of delta reuse policy can
2927 significantly affect run time.
2934 significantly affect run time.
2928
2935
2929 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2936 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2930 two extremes. Deltas will be reused if they are appropriate. But if the
2937 two extremes. Deltas will be reused if they are appropriate. But if the
2931 delta could choose a better revision, it will do so. This means if you
2938 delta could choose a better revision, it will do so. This means if you
2932 are converting a non-generaldelta revlog to a generaldelta revlog,
2939 are converting a non-generaldelta revlog to a generaldelta revlog,
2933 deltas will be recomputed if the delta's parent isn't a parent of the
2940 deltas will be recomputed if the delta's parent isn't a parent of the
2934 revision.
2941 revision.
2935
2942
2936 In addition to the delta policy, the ``forcedeltabothparents``
2943 In addition to the delta policy, the ``forcedeltabothparents``
2937 argument controls whether to force compute deltas against both parents
2944 argument controls whether to force compute deltas against both parents
2938 for merges. By default, the current default is used.
2945 for merges. By default, the current default is used.
2939
2946
2940 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2947 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2941 `sidedata_helpers`.
2948 `sidedata_helpers`.
2942 """
2949 """
2943 if deltareuse not in self.DELTAREUSEALL:
2950 if deltareuse not in self.DELTAREUSEALL:
2944 raise ValueError(
2951 raise ValueError(
2945 _(b'value for deltareuse invalid: %s') % deltareuse
2952 _(b'value for deltareuse invalid: %s') % deltareuse
2946 )
2953 )
2947
2954
2948 if len(destrevlog):
2955 if len(destrevlog):
2949 raise ValueError(_(b'destination revlog is not empty'))
2956 raise ValueError(_(b'destination revlog is not empty'))
2950
2957
2951 if getattr(self, 'filteredrevs', None):
2958 if getattr(self, 'filteredrevs', None):
2952 raise ValueError(_(b'source revlog has filtered revisions'))
2959 raise ValueError(_(b'source revlog has filtered revisions'))
2953 if getattr(destrevlog, 'filteredrevs', None):
2960 if getattr(destrevlog, 'filteredrevs', None):
2954 raise ValueError(_(b'destination revlog has filtered revisions'))
2961 raise ValueError(_(b'destination revlog has filtered revisions'))
2955
2962
2956 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2963 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2957 # if possible.
2964 # if possible.
2958 oldlazydelta = destrevlog._lazydelta
2965 oldlazydelta = destrevlog._lazydelta
2959 oldlazydeltabase = destrevlog._lazydeltabase
2966 oldlazydeltabase = destrevlog._lazydeltabase
2960 oldamd = destrevlog._deltabothparents
2967 oldamd = destrevlog._deltabothparents
2961
2968
2962 try:
2969 try:
2963 if deltareuse == self.DELTAREUSEALWAYS:
2970 if deltareuse == self.DELTAREUSEALWAYS:
2964 destrevlog._lazydeltabase = True
2971 destrevlog._lazydeltabase = True
2965 destrevlog._lazydelta = True
2972 destrevlog._lazydelta = True
2966 elif deltareuse == self.DELTAREUSESAMEREVS:
2973 elif deltareuse == self.DELTAREUSESAMEREVS:
2967 destrevlog._lazydeltabase = False
2974 destrevlog._lazydeltabase = False
2968 destrevlog._lazydelta = True
2975 destrevlog._lazydelta = True
2969 elif deltareuse == self.DELTAREUSENEVER:
2976 elif deltareuse == self.DELTAREUSENEVER:
2970 destrevlog._lazydeltabase = False
2977 destrevlog._lazydeltabase = False
2971 destrevlog._lazydelta = False
2978 destrevlog._lazydelta = False
2972
2979
2973 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2980 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2974
2981
2975 self._clone(
2982 self._clone(
2976 tr,
2983 tr,
2977 destrevlog,
2984 destrevlog,
2978 addrevisioncb,
2985 addrevisioncb,
2979 deltareuse,
2986 deltareuse,
2980 forcedeltabothparents,
2987 forcedeltabothparents,
2981 sidedata_helpers,
2988 sidedata_helpers,
2982 )
2989 )
2983
2990
2984 finally:
2991 finally:
2985 destrevlog._lazydelta = oldlazydelta
2992 destrevlog._lazydelta = oldlazydelta
2986 destrevlog._lazydeltabase = oldlazydeltabase
2993 destrevlog._lazydeltabase = oldlazydeltabase
2987 destrevlog._deltabothparents = oldamd
2994 destrevlog._deltabothparents = oldamd
2988
2995
2989 def _clone(
2996 def _clone(
2990 self,
2997 self,
2991 tr,
2998 tr,
2992 destrevlog,
2999 destrevlog,
2993 addrevisioncb,
3000 addrevisioncb,
2994 deltareuse,
3001 deltareuse,
2995 forcedeltabothparents,
3002 forcedeltabothparents,
2996 sidedata_helpers,
3003 sidedata_helpers,
2997 ):
3004 ):
2998 """perform the core duty of `revlog.clone` after parameter processing"""
3005 """perform the core duty of `revlog.clone` after parameter processing"""
2999 deltacomputer = deltautil.deltacomputer(destrevlog)
3006 deltacomputer = deltautil.deltacomputer(destrevlog)
3000 index = self.index
3007 index = self.index
3001 for rev in self:
3008 for rev in self:
3002 entry = index[rev]
3009 entry = index[rev]
3003
3010
3004 # Some classes override linkrev to take filtered revs into
3011 # Some classes override linkrev to take filtered revs into
3005 # account. Use raw entry from index.
3012 # account. Use raw entry from index.
3006 flags = entry[0] & 0xFFFF
3013 flags = entry[0] & 0xFFFF
3007 linkrev = entry[4]
3014 linkrev = entry[4]
3008 p1 = index[entry[5]][7]
3015 p1 = index[entry[5]][7]
3009 p2 = index[entry[6]][7]
3016 p2 = index[entry[6]][7]
3010 node = entry[7]
3017 node = entry[7]
3011
3018
3012 # (Possibly) reuse the delta from the revlog if allowed and
3019 # (Possibly) reuse the delta from the revlog if allowed and
3013 # the revlog chunk is a delta.
3020 # the revlog chunk is a delta.
3014 cachedelta = None
3021 cachedelta = None
3015 rawtext = None
3022 rawtext = None
3016 if deltareuse == self.DELTAREUSEFULLADD:
3023 if deltareuse == self.DELTAREUSEFULLADD:
3017 text = self._revisiondata(rev)
3024 text = self._revisiondata(rev)
3018 sidedata = self.sidedata(rev)
3025 sidedata = self.sidedata(rev)
3019
3026
3020 if sidedata_helpers is not None:
3027 if sidedata_helpers is not None:
3021 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3028 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3022 self, sidedata_helpers, sidedata, rev
3029 self, sidedata_helpers, sidedata, rev
3023 )
3030 )
3024 flags = flags | new_flags[0] & ~new_flags[1]
3031 flags = flags | new_flags[0] & ~new_flags[1]
3025
3032
3026 destrevlog.addrevision(
3033 destrevlog.addrevision(
3027 text,
3034 text,
3028 tr,
3035 tr,
3029 linkrev,
3036 linkrev,
3030 p1,
3037 p1,
3031 p2,
3038 p2,
3032 cachedelta=cachedelta,
3039 cachedelta=cachedelta,
3033 node=node,
3040 node=node,
3034 flags=flags,
3041 flags=flags,
3035 deltacomputer=deltacomputer,
3042 deltacomputer=deltacomputer,
3036 sidedata=sidedata,
3043 sidedata=sidedata,
3037 )
3044 )
3038 else:
3045 else:
3039 if destrevlog._lazydelta:
3046 if destrevlog._lazydelta:
3040 dp = self.deltaparent(rev)
3047 dp = self.deltaparent(rev)
3041 if dp != nullrev:
3048 if dp != nullrev:
3042 cachedelta = (dp, bytes(self._chunk(rev)))
3049 cachedelta = (dp, bytes(self._chunk(rev)))
3043
3050
3044 sidedata = None
3051 sidedata = None
3045 if not cachedelta:
3052 if not cachedelta:
3046 rawtext = self._revisiondata(rev)
3053 rawtext = self._revisiondata(rev)
3047 sidedata = self.sidedata(rev)
3054 sidedata = self.sidedata(rev)
3048 if sidedata is None:
3055 if sidedata is None:
3049 sidedata = self.sidedata(rev)
3056 sidedata = self.sidedata(rev)
3050
3057
3051 if sidedata_helpers is not None:
3058 if sidedata_helpers is not None:
3052 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3059 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3053 self, sidedata_helpers, sidedata, rev
3060 self, sidedata_helpers, sidedata, rev
3054 )
3061 )
3055 flags = flags | new_flags[0] & ~new_flags[1]
3062 flags = flags | new_flags[0] & ~new_flags[1]
3056
3063
3057 with destrevlog._writing(tr):
3064 with destrevlog._writing(tr):
3058 destrevlog._addrevision(
3065 destrevlog._addrevision(
3059 node,
3066 node,
3060 rawtext,
3067 rawtext,
3061 tr,
3068 tr,
3062 linkrev,
3069 linkrev,
3063 p1,
3070 p1,
3064 p2,
3071 p2,
3065 flags,
3072 flags,
3066 cachedelta,
3073 cachedelta,
3067 deltacomputer=deltacomputer,
3074 deltacomputer=deltacomputer,
3068 sidedata=sidedata,
3075 sidedata=sidedata,
3069 )
3076 )
3070
3077
3071 if addrevisioncb:
3078 if addrevisioncb:
3072 addrevisioncb(self, rev, node)
3079 addrevisioncb(self, rev, node)
3073
3080
3074 def censorrevision(self, tr, censornode, tombstone=b''):
3081 def censorrevision(self, tr, censornode, tombstone=b''):
3075 if self._format_version == REVLOGV0:
3082 if self._format_version == REVLOGV0:
3076 raise error.RevlogError(
3083 raise error.RevlogError(
3077 _(b'cannot censor with version %d revlogs')
3084 _(b'cannot censor with version %d revlogs')
3078 % self._format_version
3085 % self._format_version
3079 )
3086 )
3080 elif self._format_version == REVLOGV1:
3087 elif self._format_version == REVLOGV1:
3081 rewrite.v1_censor(self, tr, censornode, tombstone)
3088 rewrite.v1_censor(self, tr, censornode, tombstone)
3082 else:
3089 else:
3083 rewrite.v2_censor(self, tr, censornode, tombstone)
3090 rewrite.v2_censor(self, tr, censornode, tombstone)
3084
3091
3085 def verifyintegrity(self, state):
3092 def verifyintegrity(self, state):
3086 """Verifies the integrity of the revlog.
3093 """Verifies the integrity of the revlog.
3087
3094
3088 Yields ``revlogproblem`` instances describing problems that are
3095 Yields ``revlogproblem`` instances describing problems that are
3089 found.
3096 found.
3090 """
3097 """
3091 dd, di = self.checksize()
3098 dd, di = self.checksize()
3092 if dd:
3099 if dd:
3093 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3100 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3094 if di:
3101 if di:
3095 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3102 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3096
3103
3097 version = self._format_version
3104 version = self._format_version
3098
3105
3099 # The verifier tells us what version revlog we should be.
3106 # The verifier tells us what version revlog we should be.
3100 if version != state[b'expectedversion']:
3107 if version != state[b'expectedversion']:
3101 yield revlogproblem(
3108 yield revlogproblem(
3102 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3109 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3103 % (self.display_id, version, state[b'expectedversion'])
3110 % (self.display_id, version, state[b'expectedversion'])
3104 )
3111 )
3105
3112
3106 state[b'skipread'] = set()
3113 state[b'skipread'] = set()
3107 state[b'safe_renamed'] = set()
3114 state[b'safe_renamed'] = set()
3108
3115
3109 for rev in self:
3116 for rev in self:
3110 node = self.node(rev)
3117 node = self.node(rev)
3111
3118
3112 # Verify contents. 4 cases to care about:
3119 # Verify contents. 4 cases to care about:
3113 #
3120 #
3114 # common: the most common case
3121 # common: the most common case
3115 # rename: with a rename
3122 # rename: with a rename
3116 # meta: file content starts with b'\1\n', the metadata
3123 # meta: file content starts with b'\1\n', the metadata
3117 # header defined in filelog.py, but without a rename
3124 # header defined in filelog.py, but without a rename
3118 # ext: content stored externally
3125 # ext: content stored externally
3119 #
3126 #
3120 # More formally, their differences are shown below:
3127 # More formally, their differences are shown below:
3121 #
3128 #
3122 # | common | rename | meta | ext
3129 # | common | rename | meta | ext
3123 # -------------------------------------------------------
3130 # -------------------------------------------------------
3124 # flags() | 0 | 0 | 0 | not 0
3131 # flags() | 0 | 0 | 0 | not 0
3125 # renamed() | False | True | False | ?
3132 # renamed() | False | True | False | ?
3126 # rawtext[0:2]=='\1\n'| False | True | True | ?
3133 # rawtext[0:2]=='\1\n'| False | True | True | ?
3127 #
3134 #
3128 # "rawtext" means the raw text stored in revlog data, which
3135 # "rawtext" means the raw text stored in revlog data, which
3129 # could be retrieved by "rawdata(rev)". "text"
3136 # could be retrieved by "rawdata(rev)". "text"
3130 # mentioned below is "revision(rev)".
3137 # mentioned below is "revision(rev)".
3131 #
3138 #
3132 # There are 3 different lengths stored physically:
3139 # There are 3 different lengths stored physically:
3133 # 1. L1: rawsize, stored in revlog index
3140 # 1. L1: rawsize, stored in revlog index
3134 # 2. L2: len(rawtext), stored in revlog data
3141 # 2. L2: len(rawtext), stored in revlog data
3135 # 3. L3: len(text), stored in revlog data if flags==0, or
3142 # 3. L3: len(text), stored in revlog data if flags==0, or
3136 # possibly somewhere else if flags!=0
3143 # possibly somewhere else if flags!=0
3137 #
3144 #
3138 # L1 should be equal to L2. L3 could be different from them.
3145 # L1 should be equal to L2. L3 could be different from them.
3139 # "text" may or may not affect commit hash depending on flag
3146 # "text" may or may not affect commit hash depending on flag
3140 # processors (see flagutil.addflagprocessor).
3147 # processors (see flagutil.addflagprocessor).
3141 #
3148 #
3142 # | common | rename | meta | ext
3149 # | common | rename | meta | ext
3143 # -------------------------------------------------
3150 # -------------------------------------------------
3144 # rawsize() | L1 | L1 | L1 | L1
3151 # rawsize() | L1 | L1 | L1 | L1
3145 # size() | L1 | L2-LM | L1(*) | L1 (?)
3152 # size() | L1 | L2-LM | L1(*) | L1 (?)
3146 # len(rawtext) | L2 | L2 | L2 | L2
3153 # len(rawtext) | L2 | L2 | L2 | L2
3147 # len(text) | L2 | L2 | L2 | L3
3154 # len(text) | L2 | L2 | L2 | L3
3148 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3155 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3149 #
3156 #
3150 # LM: length of metadata, depending on rawtext
3157 # LM: length of metadata, depending on rawtext
3151 # (*): not ideal, see comment in filelog.size
3158 # (*): not ideal, see comment in filelog.size
3152 # (?): could be "- len(meta)" if the resolved content has
3159 # (?): could be "- len(meta)" if the resolved content has
3153 # rename metadata
3160 # rename metadata
3154 #
3161 #
3155 # Checks needed to be done:
3162 # Checks needed to be done:
3156 # 1. length check: L1 == L2, in all cases.
3163 # 1. length check: L1 == L2, in all cases.
3157 # 2. hash check: depending on flag processor, we may need to
3164 # 2. hash check: depending on flag processor, we may need to
3158 # use either "text" (external), or "rawtext" (in revlog).
3165 # use either "text" (external), or "rawtext" (in revlog).
3159
3166
3160 try:
3167 try:
3161 skipflags = state.get(b'skipflags', 0)
3168 skipflags = state.get(b'skipflags', 0)
3162 if skipflags:
3169 if skipflags:
3163 skipflags &= self.flags(rev)
3170 skipflags &= self.flags(rev)
3164
3171
3165 _verify_revision(self, skipflags, state, node)
3172 _verify_revision(self, skipflags, state, node)
3166
3173
3167 l1 = self.rawsize(rev)
3174 l1 = self.rawsize(rev)
3168 l2 = len(self.rawdata(node))
3175 l2 = len(self.rawdata(node))
3169
3176
3170 if l1 != l2:
3177 if l1 != l2:
3171 yield revlogproblem(
3178 yield revlogproblem(
3172 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3179 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3173 node=node,
3180 node=node,
3174 )
3181 )
3175
3182
3176 except error.CensoredNodeError:
3183 except error.CensoredNodeError:
3177 if state[b'erroroncensored']:
3184 if state[b'erroroncensored']:
3178 yield revlogproblem(
3185 yield revlogproblem(
3179 error=_(b'censored file data'), node=node
3186 error=_(b'censored file data'), node=node
3180 )
3187 )
3181 state[b'skipread'].add(node)
3188 state[b'skipread'].add(node)
3182 except Exception as e:
3189 except Exception as e:
3183 yield revlogproblem(
3190 yield revlogproblem(
3184 error=_(b'unpacking %s: %s')
3191 error=_(b'unpacking %s: %s')
3185 % (short(node), stringutil.forcebytestr(e)),
3192 % (short(node), stringutil.forcebytestr(e)),
3186 node=node,
3193 node=node,
3187 )
3194 )
3188 state[b'skipread'].add(node)
3195 state[b'skipread'].add(node)
3189
3196
3190 def storageinfo(
3197 def storageinfo(
3191 self,
3198 self,
3192 exclusivefiles=False,
3199 exclusivefiles=False,
3193 sharedfiles=False,
3200 sharedfiles=False,
3194 revisionscount=False,
3201 revisionscount=False,
3195 trackedsize=False,
3202 trackedsize=False,
3196 storedsize=False,
3203 storedsize=False,
3197 ):
3204 ):
3198 d = {}
3205 d = {}
3199
3206
3200 if exclusivefiles:
3207 if exclusivefiles:
3201 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3208 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3202 if not self._inline:
3209 if not self._inline:
3203 d[b'exclusivefiles'].append((self.opener, self._datafile))
3210 d[b'exclusivefiles'].append((self.opener, self._datafile))
3204
3211
3205 if sharedfiles:
3212 if sharedfiles:
3206 d[b'sharedfiles'] = []
3213 d[b'sharedfiles'] = []
3207
3214
3208 if revisionscount:
3215 if revisionscount:
3209 d[b'revisionscount'] = len(self)
3216 d[b'revisionscount'] = len(self)
3210
3217
3211 if trackedsize:
3218 if trackedsize:
3212 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3219 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3213
3220
3214 if storedsize:
3221 if storedsize:
3215 d[b'storedsize'] = sum(
3222 d[b'storedsize'] = sum(
3216 self.opener.stat(path).st_size for path in self.files()
3223 self.opener.stat(path).st_size for path in self.files()
3217 )
3224 )
3218
3225
3219 return d
3226 return d
3220
3227
3221 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3228 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3222 if not self.hassidedata:
3229 if not self.hassidedata:
3223 return
3230 return
3224 # revlog formats with sidedata support does not support inline
3231 # revlog formats with sidedata support does not support inline
3225 assert not self._inline
3232 assert not self._inline
3226 if not helpers[1] and not helpers[2]:
3233 if not helpers[1] and not helpers[2]:
3227 # Nothing to generate or remove
3234 # Nothing to generate or remove
3228 return
3235 return
3229
3236
3230 new_entries = []
3237 new_entries = []
3231 # append the new sidedata
3238 # append the new sidedata
3232 with self._writing(transaction):
3239 with self._writing(transaction):
3233 ifh, dfh, sdfh = self._writinghandles
3240 ifh, dfh, sdfh = self._writinghandles
3234 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3241 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3235
3242
3236 current_offset = sdfh.tell()
3243 current_offset = sdfh.tell()
3237 for rev in range(startrev, endrev + 1):
3244 for rev in range(startrev, endrev + 1):
3238 entry = self.index[rev]
3245 entry = self.index[rev]
3239 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3246 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3240 store=self,
3247 store=self,
3241 sidedata_helpers=helpers,
3248 sidedata_helpers=helpers,
3242 sidedata={},
3249 sidedata={},
3243 rev=rev,
3250 rev=rev,
3244 )
3251 )
3245
3252
3246 serialized_sidedata = sidedatautil.serialize_sidedata(
3253 serialized_sidedata = sidedatautil.serialize_sidedata(
3247 new_sidedata
3254 new_sidedata
3248 )
3255 )
3249
3256
3250 sidedata_compression_mode = COMP_MODE_INLINE
3257 sidedata_compression_mode = COMP_MODE_INLINE
3251 if serialized_sidedata and self.hassidedata:
3258 if serialized_sidedata and self.hassidedata:
3252 sidedata_compression_mode = COMP_MODE_PLAIN
3259 sidedata_compression_mode = COMP_MODE_PLAIN
3253 h, comp_sidedata = self.compress(serialized_sidedata)
3260 h, comp_sidedata = self.compress(serialized_sidedata)
3254 if (
3261 if (
3255 h != b'u'
3262 h != b'u'
3256 and comp_sidedata[0] != b'\0'
3263 and comp_sidedata[0] != b'\0'
3257 and len(comp_sidedata) < len(serialized_sidedata)
3264 and len(comp_sidedata) < len(serialized_sidedata)
3258 ):
3265 ):
3259 assert not h
3266 assert not h
3260 if (
3267 if (
3261 comp_sidedata[0]
3268 comp_sidedata[0]
3262 == self._docket.default_compression_header
3269 == self._docket.default_compression_header
3263 ):
3270 ):
3264 sidedata_compression_mode = COMP_MODE_DEFAULT
3271 sidedata_compression_mode = COMP_MODE_DEFAULT
3265 serialized_sidedata = comp_sidedata
3272 serialized_sidedata = comp_sidedata
3266 else:
3273 else:
3267 sidedata_compression_mode = COMP_MODE_INLINE
3274 sidedata_compression_mode = COMP_MODE_INLINE
3268 serialized_sidedata = comp_sidedata
3275 serialized_sidedata = comp_sidedata
3269 if entry[8] != 0 or entry[9] != 0:
3276 if entry[8] != 0 or entry[9] != 0:
3270 # rewriting entries that already have sidedata is not
3277 # rewriting entries that already have sidedata is not
3271 # supported yet, because it introduces garbage data in the
3278 # supported yet, because it introduces garbage data in the
3272 # revlog.
3279 # revlog.
3273 msg = b"rewriting existing sidedata is not supported yet"
3280 msg = b"rewriting existing sidedata is not supported yet"
3274 raise error.Abort(msg)
3281 raise error.Abort(msg)
3275
3282
3276 # Apply (potential) flags to add and to remove after running
3283 # Apply (potential) flags to add and to remove after running
3277 # the sidedata helpers
3284 # the sidedata helpers
3278 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3285 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3279 entry_update = (
3286 entry_update = (
3280 current_offset,
3287 current_offset,
3281 len(serialized_sidedata),
3288 len(serialized_sidedata),
3282 new_offset_flags,
3289 new_offset_flags,
3283 sidedata_compression_mode,
3290 sidedata_compression_mode,
3284 )
3291 )
3285
3292
3286 # the sidedata computation might have move the file cursors around
3293 # the sidedata computation might have move the file cursors around
3287 sdfh.seek(current_offset, os.SEEK_SET)
3294 sdfh.seek(current_offset, os.SEEK_SET)
3288 sdfh.write(serialized_sidedata)
3295 sdfh.write(serialized_sidedata)
3289 new_entries.append(entry_update)
3296 new_entries.append(entry_update)
3290 current_offset += len(serialized_sidedata)
3297 current_offset += len(serialized_sidedata)
3291 self._docket.sidedata_end = sdfh.tell()
3298 self._docket.sidedata_end = sdfh.tell()
3292
3299
3293 # rewrite the new index entries
3300 # rewrite the new index entries
3294 ifh.seek(startrev * self.index.entry_size)
3301 ifh.seek(startrev * self.index.entry_size)
3295 for i, e in enumerate(new_entries):
3302 for i, e in enumerate(new_entries):
3296 rev = startrev + i
3303 rev = startrev + i
3297 self.index.replace_sidedata_info(rev, *e)
3304 self.index.replace_sidedata_info(rev, *e)
3298 packed = self.index.entry_binary(rev)
3305 packed = self.index.entry_binary(rev)
3299 if rev == 0 and self._docket is None:
3306 if rev == 0 and self._docket is None:
3300 header = self._format_flags | self._format_version
3307 header = self._format_flags | self._format_version
3301 header = self.index.pack_header(header)
3308 header = self.index.pack_header(header)
3302 packed = header + packed
3309 packed = header + packed
3303 ifh.write(packed)
3310 ifh.write(packed)
@@ -1,194 +1,178 b''
1 Test correctness of revlog inline -> non-inline transition
1 Test correctness of revlog inline -> non-inline transition
2 ----------------------------------------------------------
2 ----------------------------------------------------------
3
3
4 Helper extension to intercept renames.
4 Helper extension to intercept renames.
5
5
6 $ cat > $TESTTMP/intercept_rename.py << EOF
6 $ cat > $TESTTMP/intercept_rename.py << EOF
7 > import os
7 > import os
8 > import sys
8 > import sys
9 > from mercurial import extensions, util
9 > from mercurial import extensions, util
10 >
10 >
11 > def extsetup(ui):
11 > def extsetup(ui):
12 > def close(orig, *args, **kwargs):
12 > def close(orig, *args, **kwargs):
13 > path = util.normpath(args[0]._atomictempfile__name)
13 > path = util.normpath(args[0]._atomictempfile__name)
14 > if path.endswith(b'/.hg/store/data/file.i'):
14 > if path.endswith(b'/.hg/store/data/file.i'):
15 > os._exit(80)
15 > os._exit(80)
16 > return orig(*args, **kwargs)
16 > return orig(*args, **kwargs)
17 > extensions.wrapfunction(util.atomictempfile, 'close', close)
17 > extensions.wrapfunction(util.atomictempfile, 'close', close)
18 > EOF
18 > EOF
19
19
20 Test offset computation to correctly factor in the index entries themselves.
20 Test offset computation to correctly factor in the index entries themselves.
21 Also test that the new data size has the correct size if the transaction is aborted
21 Also test that the new data size has the correct size if the transaction is aborted
22 after the index has been replaced.
22 after the index has been replaced.
23
23
24 Test repo has commits a, b, c, D, where D is large (grows the revlog enough that it
24 Test repo has commits a, b, c, D, where D is large (grows the revlog enough that it
25 transitions to non-inline storage). The clone initially has changes a, b
25 transitions to non-inline storage). The clone initially has changes a, b
26 and will transition to non-inline storage when adding c, D.
26 and will transition to non-inline storage when adding c, D.
27
27
28 If the transaction adding c, D is rolled back, then we don't undo the revlog split,
28 If the transaction adding c, D is rolled back, then we don't undo the revlog split,
29 but truncate the index and the data to remove both c and D.
29 but truncate the index and the data to remove both c and D.
30
30
31 $ hg init troffset-computation --config format.revlog-compression=none
31 $ hg init troffset-computation --config format.revlog-compression=none
32 $ cd troffset-computation
32 $ cd troffset-computation
33 $ printf '%20d' '1' > file
33 $ printf '%20d' '1' > file
34 $ hg commit -Aqma
34 $ hg commit -Aqma
35 $ printf '%1024d' '1' > file
35 $ printf '%1024d' '1' > file
36 $ hg commit -Aqmb
36 $ hg commit -Aqmb
37 $ printf '%20d' '1' > file
37 $ printf '%20d' '1' > file
38 $ hg commit -Aqmc
38 $ hg commit -Aqmc
39 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
39 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
40 $ hg commit -AqmD
40 $ hg commit -AqmD
41
41
42 $ cd ..
42 $ cd ..
43
43
44 $ hg clone -r 1 troffset-computation troffset-computation-copy --config format.revlog-compression=none -q
44 $ hg clone -r 1 troffset-computation troffset-computation-copy --config format.revlog-compression=none -q
45 $ cd troffset-computation-copy
45 $ cd troffset-computation-copy
46
46
47 Reference size:
47 Reference size:
48
48
49 $ f -s .hg/store/data/file*
49 $ f -s .hg/store/data/file*
50 .hg/store/data/file.i: size=1174
50 .hg/store/data/file.i: size=1174
51
51
52 $ cat > .hg/hgrc <<EOF
52 $ cat > .hg/hgrc <<EOF
53 > [hooks]
53 > [hooks]
54 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
54 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
55 > EOF
55 > EOF
56 #if chg
56 #if chg
57 $ hg pull ../troffset-computation
57 $ hg pull ../troffset-computation
58 pulling from ../troffset-computation
58 pulling from ../troffset-computation
59 [255]
59 [255]
60 #else
60 #else
61 $ hg pull ../troffset-computation
61 $ hg pull ../troffset-computation
62 pulling from ../troffset-computation
62 pulling from ../troffset-computation
63 [80]
63 [80]
64 #endif
64 #endif
65 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1
65 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1
66 data/file.i 192
66 data/file.i 128
67
67
68 The first file.i entry should match the "Reference size" above.
68 The first file.i entry should match the "Reference size" above.
69 The first file.d entry is the temporary record during the split,
69 The first file.d entry is the temporary record during the split,
70 the second entry after the split happened. The sum of the second file.d
70 the second entry after the split happened. The sum of the second file.d
71 and the second file.i entry should match the first file.i entry.
71 and the second file.i entry should match the first file.i entry.
72
72
73 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
73 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
74 data/file.i 1174
74 data/file.i 1174
75 data/file.d 0
75 data/file.d 0
76 data/file.d 1067
76 data/file.d 1046
77 data/file.i 192
77 data/file.i 128
78 $ hg recover
78 $ hg recover
79 rolling back interrupted transaction
79 rolling back interrupted transaction
80 (verify step skipped, run `hg verify` to check your repository content)
80 (verify step skipped, run `hg verify` to check your repository content)
81 $ f -s .hg/store/data/file*
81 $ f -s .hg/store/data/file*
82 .hg/store/data/file.d: size=1067
82 .hg/store/data/file.d: size=1046
83 .hg/store/data/file.i: size=192
83 .hg/store/data/file.i: size=128
84 $ hg tip
84 $ hg tip
85 changeset: 1:cfa8d6e60429
85 changeset: 1:cfa8d6e60429
86 tag: tip
86 tag: tip
87 user: test
87 user: test
88 date: Thu Jan 01 00:00:00 1970 +0000
88 date: Thu Jan 01 00:00:00 1970 +0000
89 summary: b
89 summary: b
90
90
91 $ hg verify -q
91 $ hg verify -q
92 warning: revlog 'data/file.d' not in fncache!
92 warning: revlog 'data/file.d' not in fncache!
93 file@?: rev 2 points to nonexistent changeset 2
93 1 warnings encountered!
94 (expected )
95 file@?: fa1120531cc1 not in manifests
96 2 warnings encountered!
97 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
94 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
98 2 integrity errors encountered!
99 [1]
100 $ hg debugrebuildfncache --only-data
95 $ hg debugrebuildfncache --only-data
101 adding data/file.d
96 adding data/file.d
102 1 items added, 0 removed from fncache
97 1 items added, 0 removed from fncache
103 $ hg verify -q
98 $ hg verify -q
104 file@?: rev 2 points to nonexistent changeset 2
105 (expected )
106 file@?: fa1120531cc1 not in manifests
107 1 warnings encountered!
108 2 integrity errors encountered!
109 [1]
110 $ cd ..
99 $ cd ..
111
100
112
101
113 Now retry the procedure but intercept the rename of the index and check that
102 Now retry the procedure but intercept the rename of the index and check that
114 the journal does not contain the new index size. This demonstrates the edge case
103 the journal does not contain the new index size. This demonstrates the edge case
115 where the data file is left as garbage.
104 where the data file is left as garbage.
116
105
117 $ hg clone -r 1 troffset-computation troffset-computation-copy2 --config format.revlog-compression=none -q
106 $ hg clone -r 1 troffset-computation troffset-computation-copy2 --config format.revlog-compression=none -q
118 $ cd troffset-computation-copy2
107 $ cd troffset-computation-copy2
119 $ cat > .hg/hgrc <<EOF
108 $ cat > .hg/hgrc <<EOF
120 > [extensions]
109 > [extensions]
121 > intercept_rename = $TESTTMP/intercept_rename.py
110 > intercept_rename = $TESTTMP/intercept_rename.py
122 > [hooks]
111 > [hooks]
123 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
112 > pretxnchangegroup = python:$TESTDIR/helper-killhook.py:killme
124 > EOF
113 > EOF
125 #if chg
114 #if chg
126 $ hg pull ../troffset-computation
115 $ hg pull ../troffset-computation
127 pulling from ../troffset-computation
116 pulling from ../troffset-computation
128 [255]
117 [255]
129 #else
118 #else
130 $ hg pull ../troffset-computation
119 $ hg pull ../troffset-computation
131 pulling from ../troffset-computation
120 pulling from ../troffset-computation
132 [80]
121 [80]
133 #endif
122 #endif
134 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
123 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
135 data/file.i 1174
124 data/file.i 1174
136 data/file.d 0
125 data/file.d 0
137 data/file.d 1067
126 data/file.d 1046
138
127
139 $ hg recover
128 $ hg recover
140 rolling back interrupted transaction
129 rolling back interrupted transaction
141 (verify step skipped, run `hg verify` to check your repository content)
130 (verify step skipped, run `hg verify` to check your repository content)
142 $ f -s .hg/store/data/file*
131 $ f -s .hg/store/data/file*
143 .hg/store/data/file.d: size=1067
132 .hg/store/data/file.d: size=1046
144 .hg/store/data/file.i: size=1174
133 .hg/store/data/file.i: size=1174
145 $ hg tip
134 $ hg tip
146 changeset: 1:cfa8d6e60429
135 changeset: 1:cfa8d6e60429
147 tag: tip
136 tag: tip
148 user: test
137 user: test
149 date: Thu Jan 01 00:00:00 1970 +0000
138 date: Thu Jan 01 00:00:00 1970 +0000
150 summary: b
139 summary: b
151
140
152 $ hg verify -q
141 $ hg verify -q
153 $ cd ..
142 $ cd ..
154
143
155
144
156 Repeat the original test but let hg rollback the transaction.
145 Repeat the original test but let hg rollback the transaction.
157
146
158 $ hg clone -r 1 troffset-computation troffset-computation-copy-rb --config format.revlog-compression=none -q
147 $ hg clone -r 1 troffset-computation troffset-computation-copy-rb --config format.revlog-compression=none -q
159 $ cd troffset-computation-copy-rb
148 $ cd troffset-computation-copy-rb
160 $ cat > .hg/hgrc <<EOF
149 $ cat > .hg/hgrc <<EOF
161 > [hooks]
150 > [hooks]
162 > pretxnchangegroup = false
151 > pretxnchangegroup = false
163 > EOF
152 > EOF
164 $ hg pull ../troffset-computation
153 $ hg pull ../troffset-computation
165 pulling from ../troffset-computation
154 pulling from ../troffset-computation
166 searching for changes
155 searching for changes
167 adding changesets
156 adding changesets
168 adding manifests
157 adding manifests
169 adding file changes
158 adding file changes
170 transaction abort!
159 transaction abort!
171 rollback completed
160 rollback completed
172 abort: pretxnchangegroup hook exited with status 1
161 abort: pretxnchangegroup hook exited with status 1
173 [40]
162 [40]
174 $ f -s .hg/store/data/file*
163 $ f -s .hg/store/data/file*
175 .hg/store/data/file.d: size=1067
164 .hg/store/data/file.d: size=1046
176 .hg/store/data/file.i: size=192
165 .hg/store/data/file.i: size=128
177 $ hg tip
166 $ hg tip
178 changeset: 1:cfa8d6e60429
167 changeset: 1:cfa8d6e60429
179 tag: tip
168 tag: tip
180 user: test
169 user: test
181 date: Thu Jan 01 00:00:00 1970 +0000
170 date: Thu Jan 01 00:00:00 1970 +0000
182 summary: b
171 summary: b
183
172
184 $ hg verify -q
173 $ hg verify -q
185 warning: revlog 'data/file.d' not in fncache!
174 warning: revlog 'data/file.d' not in fncache!
186 file@?: rev 2 points to nonexistent changeset 2
175 1 warnings encountered!
187 (expected )
188 file@?: fa1120531cc1 not in manifests
189 2 warnings encountered!
190 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
176 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
191 2 integrity errors encountered!
192 [1]
193 $ cd ..
177 $ cd ..
194
178
General Comments 0
You need to be logged in to leave comments. Login now