##// END OF EJS Templates
revlog: preindent some code in _enforceinlinesize...
marmoute -
r47987:100f061d default
parent child Browse files
Show More
@@ -1,3192 +1,3193 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 self._indexfile = None
318 self._indexfile = None
319 self._datafile = None
319 self._datafile = None
320 self._nodemap_file = None
320 self._nodemap_file = None
321 self.postfix = postfix
321 self.postfix = postfix
322 self.opener = opener
322 self.opener = opener
323 if persistentnodemap:
323 if persistentnodemap:
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325
325
326 assert target[0] in ALL_KINDS
326 assert target[0] in ALL_KINDS
327 assert len(target) == 2
327 assert len(target) == 2
328 self.target = target
328 self.target = target
329 # When True, indexfile is opened with checkambig=True at writing, to
329 # When True, indexfile is opened with checkambig=True at writing, to
330 # avoid file stat ambiguity.
330 # avoid file stat ambiguity.
331 self._checkambig = checkambig
331 self._checkambig = checkambig
332 self._mmaplargeindex = mmaplargeindex
332 self._mmaplargeindex = mmaplargeindex
333 self._censorable = censorable
333 self._censorable = censorable
334 # 3-tuple of (node, rev, text) for a raw revision.
334 # 3-tuple of (node, rev, text) for a raw revision.
335 self._revisioncache = None
335 self._revisioncache = None
336 # Maps rev to chain base rev.
336 # Maps rev to chain base rev.
337 self._chainbasecache = util.lrucachedict(100)
337 self._chainbasecache = util.lrucachedict(100)
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 self._chunkcache = (0, b'')
339 self._chunkcache = (0, b'')
340 # How much data to read and cache into the raw revlog data cache.
340 # How much data to read and cache into the raw revlog data cache.
341 self._chunkcachesize = 65536
341 self._chunkcachesize = 65536
342 self._maxchainlen = None
342 self._maxchainlen = None
343 self._deltabothparents = True
343 self._deltabothparents = True
344 self.index = None
344 self.index = None
345 self._nodemap_docket = None
345 self._nodemap_docket = None
346 # Mapping of partial identifiers to full nodes.
346 # Mapping of partial identifiers to full nodes.
347 self._pcache = {}
347 self._pcache = {}
348 # Mapping of revision integer to full node.
348 # Mapping of revision integer to full node.
349 self._compengine = b'zlib'
349 self._compengine = b'zlib'
350 self._compengineopts = {}
350 self._compengineopts = {}
351 self._maxdeltachainspan = -1
351 self._maxdeltachainspan = -1
352 self._withsparseread = False
352 self._withsparseread = False
353 self._sparserevlog = False
353 self._sparserevlog = False
354 self._srdensitythreshold = 0.50
354 self._srdensitythreshold = 0.50
355 self._srmingapsize = 262144
355 self._srmingapsize = 262144
356
356
357 # Make copy of flag processors so each revlog instance can support
357 # Make copy of flag processors so each revlog instance can support
358 # custom flags.
358 # custom flags.
359 self._flagprocessors = dict(flagutil.flagprocessors)
359 self._flagprocessors = dict(flagutil.flagprocessors)
360
360
361 # 2-tuple of file handles being used for active writing.
361 # 2-tuple of file handles being used for active writing.
362 self._writinghandles = None
362 self._writinghandles = None
363
363
364 self._loadindex()
364 self._loadindex()
365
365
366 self._concurrencychecker = concurrencychecker
366 self._concurrencychecker = concurrencychecker
367
367
368 def _init_opts(self):
368 def _init_opts(self):
369 """process options (from above/config) to setup associated default revlog mode
369 """process options (from above/config) to setup associated default revlog mode
370
370
371 These values might be affected when actually reading on disk information.
371 These values might be affected when actually reading on disk information.
372
372
373 The relevant values are returned for use in _loadindex().
373 The relevant values are returned for use in _loadindex().
374
374
375 * newversionflags:
375 * newversionflags:
376 version header to use if we need to create a new revlog
376 version header to use if we need to create a new revlog
377
377
378 * mmapindexthreshold:
378 * mmapindexthreshold:
379 minimal index size for start to use mmap
379 minimal index size for start to use mmap
380
380
381 * force_nodemap:
381 * force_nodemap:
382 force the usage of a "development" version of the nodemap code
382 force the usage of a "development" version of the nodemap code
383 """
383 """
384 mmapindexthreshold = None
384 mmapindexthreshold = None
385 opts = self.opener.options
385 opts = self.opener.options
386
386
387 if b'revlogv2' in opts:
387 if b'revlogv2' in opts:
388 new_header = REVLOGV2 | FLAG_INLINE_DATA
388 new_header = REVLOGV2 | FLAG_INLINE_DATA
389 elif b'revlogv1' in opts:
389 elif b'revlogv1' in opts:
390 new_header = REVLOGV1 | FLAG_INLINE_DATA
390 new_header = REVLOGV1 | FLAG_INLINE_DATA
391 if b'generaldelta' in opts:
391 if b'generaldelta' in opts:
392 new_header |= FLAG_GENERALDELTA
392 new_header |= FLAG_GENERALDELTA
393 elif b'revlogv0' in self.opener.options:
393 elif b'revlogv0' in self.opener.options:
394 new_header = REVLOGV0
394 new_header = REVLOGV0
395 else:
395 else:
396 new_header = REVLOG_DEFAULT_VERSION
396 new_header = REVLOG_DEFAULT_VERSION
397
397
398 if b'chunkcachesize' in opts:
398 if b'chunkcachesize' in opts:
399 self._chunkcachesize = opts[b'chunkcachesize']
399 self._chunkcachesize = opts[b'chunkcachesize']
400 if b'maxchainlen' in opts:
400 if b'maxchainlen' in opts:
401 self._maxchainlen = opts[b'maxchainlen']
401 self._maxchainlen = opts[b'maxchainlen']
402 if b'deltabothparents' in opts:
402 if b'deltabothparents' in opts:
403 self._deltabothparents = opts[b'deltabothparents']
403 self._deltabothparents = opts[b'deltabothparents']
404 self._lazydelta = bool(opts.get(b'lazydelta', True))
404 self._lazydelta = bool(opts.get(b'lazydelta', True))
405 self._lazydeltabase = False
405 self._lazydeltabase = False
406 if self._lazydelta:
406 if self._lazydelta:
407 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
407 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
408 if b'compengine' in opts:
408 if b'compengine' in opts:
409 self._compengine = opts[b'compengine']
409 self._compengine = opts[b'compengine']
410 if b'zlib.level' in opts:
410 if b'zlib.level' in opts:
411 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
411 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
412 if b'zstd.level' in opts:
412 if b'zstd.level' in opts:
413 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
413 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
414 if b'maxdeltachainspan' in opts:
414 if b'maxdeltachainspan' in opts:
415 self._maxdeltachainspan = opts[b'maxdeltachainspan']
415 self._maxdeltachainspan = opts[b'maxdeltachainspan']
416 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
416 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
417 mmapindexthreshold = opts[b'mmapindexthreshold']
417 mmapindexthreshold = opts[b'mmapindexthreshold']
418 self.hassidedata = bool(opts.get(b'side-data', False))
418 self.hassidedata = bool(opts.get(b'side-data', False))
419 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
419 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
420 withsparseread = bool(opts.get(b'with-sparse-read', False))
420 withsparseread = bool(opts.get(b'with-sparse-read', False))
421 # sparse-revlog forces sparse-read
421 # sparse-revlog forces sparse-read
422 self._withsparseread = self._sparserevlog or withsparseread
422 self._withsparseread = self._sparserevlog or withsparseread
423 if b'sparse-read-density-threshold' in opts:
423 if b'sparse-read-density-threshold' in opts:
424 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
424 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
425 if b'sparse-read-min-gap-size' in opts:
425 if b'sparse-read-min-gap-size' in opts:
426 self._srmingapsize = opts[b'sparse-read-min-gap-size']
426 self._srmingapsize = opts[b'sparse-read-min-gap-size']
427 if opts.get(b'enableellipsis'):
427 if opts.get(b'enableellipsis'):
428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
429
429
430 # revlog v0 doesn't have flag processors
430 # revlog v0 doesn't have flag processors
431 for flag, processor in pycompat.iteritems(
431 for flag, processor in pycompat.iteritems(
432 opts.get(b'flagprocessors', {})
432 opts.get(b'flagprocessors', {})
433 ):
433 ):
434 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
434 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
435
435
436 if self._chunkcachesize <= 0:
436 if self._chunkcachesize <= 0:
437 raise error.RevlogError(
437 raise error.RevlogError(
438 _(b'revlog chunk cache size %r is not greater than 0')
438 _(b'revlog chunk cache size %r is not greater than 0')
439 % self._chunkcachesize
439 % self._chunkcachesize
440 )
440 )
441 elif self._chunkcachesize & (self._chunkcachesize - 1):
441 elif self._chunkcachesize & (self._chunkcachesize - 1):
442 raise error.RevlogError(
442 raise error.RevlogError(
443 _(b'revlog chunk cache size %r is not a power of 2')
443 _(b'revlog chunk cache size %r is not a power of 2')
444 % self._chunkcachesize
444 % self._chunkcachesize
445 )
445 )
446 force_nodemap = opts.get(b'devel-force-nodemap', False)
446 force_nodemap = opts.get(b'devel-force-nodemap', False)
447 return new_header, mmapindexthreshold, force_nodemap
447 return new_header, mmapindexthreshold, force_nodemap
448
448
449 def _get_data(self, filepath, mmap_threshold):
449 def _get_data(self, filepath, mmap_threshold):
450 """return a file content with or without mmap
450 """return a file content with or without mmap
451
451
452 If the file is missing return the empty string"""
452 If the file is missing return the empty string"""
453 try:
453 try:
454 with self.opener(filepath) as fp:
454 with self.opener(filepath) as fp:
455 if mmap_threshold is not None:
455 if mmap_threshold is not None:
456 file_size = self.opener.fstat(fp).st_size
456 file_size = self.opener.fstat(fp).st_size
457 if file_size >= mmap_threshold:
457 if file_size >= mmap_threshold:
458 # TODO: should .close() to release resources without
458 # TODO: should .close() to release resources without
459 # relying on Python GC
459 # relying on Python GC
460 return util.buffer(util.mmapread(fp))
460 return util.buffer(util.mmapread(fp))
461 return fp.read()
461 return fp.read()
462 except IOError as inst:
462 except IOError as inst:
463 if inst.errno != errno.ENOENT:
463 if inst.errno != errno.ENOENT:
464 raise
464 raise
465 return b''
465 return b''
466
466
467 def _loadindex(self):
467 def _loadindex(self):
468
468
469 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
469 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
470
470
471 if self.postfix is None:
471 if self.postfix is None:
472 entry_point = b'%s.i' % self.radix
472 entry_point = b'%s.i' % self.radix
473 else:
473 else:
474 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
474 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
475
475
476 entry_data = b''
476 entry_data = b''
477 self._initempty = True
477 self._initempty = True
478 entry_data = self._get_data(entry_point, mmapindexthreshold)
478 entry_data = self._get_data(entry_point, mmapindexthreshold)
479 if len(entry_data) > 0:
479 if len(entry_data) > 0:
480 header = INDEX_HEADER.unpack(entry_data[:4])[0]
480 header = INDEX_HEADER.unpack(entry_data[:4])[0]
481 self._initempty = False
481 self._initempty = False
482 else:
482 else:
483 header = new_header
483 header = new_header
484
484
485 self._format_flags = header & ~0xFFFF
485 self._format_flags = header & ~0xFFFF
486 self._format_version = header & 0xFFFF
486 self._format_version = header & 0xFFFF
487
487
488 if self._format_version == REVLOGV0:
488 if self._format_version == REVLOGV0:
489 if self._format_flags:
489 if self._format_flags:
490 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
490 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
491 display_flag = self._format_flags >> 16
491 display_flag = self._format_flags >> 16
492 msg %= (display_flag, self._format_version, self.display_id)
492 msg %= (display_flag, self._format_version, self.display_id)
493 raise error.RevlogError(msg)
493 raise error.RevlogError(msg)
494
494
495 self._inline = False
495 self._inline = False
496 self._generaldelta = False
496 self._generaldelta = False
497
497
498 elif self._format_version == REVLOGV1:
498 elif self._format_version == REVLOGV1:
499 if self._format_flags & ~REVLOGV1_FLAGS:
499 if self._format_flags & ~REVLOGV1_FLAGS:
500 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
500 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
501 display_flag = self._format_flags >> 16
501 display_flag = self._format_flags >> 16
502 msg %= (display_flag, self._format_version, self.display_id)
502 msg %= (display_flag, self._format_version, self.display_id)
503 raise error.RevlogError(msg)
503 raise error.RevlogError(msg)
504
504
505 self._inline = self._format_flags & FLAG_INLINE_DATA
505 self._inline = self._format_flags & FLAG_INLINE_DATA
506 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
506 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
507
507
508 elif self._format_version == REVLOGV2:
508 elif self._format_version == REVLOGV2:
509 if self._format_flags & ~REVLOGV2_FLAGS:
509 if self._format_flags & ~REVLOGV2_FLAGS:
510 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
510 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
511 display_flag = self._format_flags >> 16
511 display_flag = self._format_flags >> 16
512 msg %= (display_flag, self._format_version, self.display_id)
512 msg %= (display_flag, self._format_version, self.display_id)
513 raise error.RevlogError(msg)
513 raise error.RevlogError(msg)
514
514
515 # There is a bug in the transaction handling when going from an
515 # There is a bug in the transaction handling when going from an
516 # inline revlog to a separate index and data file. Turn it off until
516 # inline revlog to a separate index and data file. Turn it off until
517 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
517 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
518 # See issue6485
518 # See issue6485
519 self._inline = False
519 self._inline = False
520 # generaldelta implied by version 2 revlogs.
520 # generaldelta implied by version 2 revlogs.
521 self._generaldelta = True
521 self._generaldelta = True
522
522
523 else:
523 else:
524 msg = _(b'unknown version (%d) in revlog %s')
524 msg = _(b'unknown version (%d) in revlog %s')
525 msg %= (self._format_version, self.display_id)
525 msg %= (self._format_version, self.display_id)
526 raise error.RevlogError(msg)
526 raise error.RevlogError(msg)
527
527
528 index_data = entry_data
528 index_data = entry_data
529 self._indexfile = entry_point
529 self._indexfile = entry_point
530
530
531 if self.postfix is None or self.postfix == b'a':
531 if self.postfix is None or self.postfix == b'a':
532 self._datafile = b'%s.d' % self.radix
532 self._datafile = b'%s.d' % self.radix
533 else:
533 else:
534 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
534 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
535
535
536 self.nodeconstants = sha1nodeconstants
536 self.nodeconstants = sha1nodeconstants
537 self.nullid = self.nodeconstants.nullid
537 self.nullid = self.nodeconstants.nullid
538
538
539 # sparse-revlog can't be on without general-delta (issue6056)
539 # sparse-revlog can't be on without general-delta (issue6056)
540 if not self._generaldelta:
540 if not self._generaldelta:
541 self._sparserevlog = False
541 self._sparserevlog = False
542
542
543 self._storedeltachains = True
543 self._storedeltachains = True
544
544
545 devel_nodemap = (
545 devel_nodemap = (
546 self._nodemap_file
546 self._nodemap_file
547 and force_nodemap
547 and force_nodemap
548 and parse_index_v1_nodemap is not None
548 and parse_index_v1_nodemap is not None
549 )
549 )
550
550
551 use_rust_index = False
551 use_rust_index = False
552 if rustrevlog is not None:
552 if rustrevlog is not None:
553 if self._nodemap_file is not None:
553 if self._nodemap_file is not None:
554 use_rust_index = True
554 use_rust_index = True
555 else:
555 else:
556 use_rust_index = self.opener.options.get(b'rust.index')
556 use_rust_index = self.opener.options.get(b'rust.index')
557
557
558 self._parse_index = parse_index_v1
558 self._parse_index = parse_index_v1
559 if self._format_version == REVLOGV0:
559 if self._format_version == REVLOGV0:
560 self._parse_index = revlogv0.parse_index_v0
560 self._parse_index = revlogv0.parse_index_v0
561 elif self._format_version == REVLOGV2:
561 elif self._format_version == REVLOGV2:
562 self._parse_index = parse_index_v2
562 self._parse_index = parse_index_v2
563 elif devel_nodemap:
563 elif devel_nodemap:
564 self._parse_index = parse_index_v1_nodemap
564 self._parse_index = parse_index_v1_nodemap
565 elif use_rust_index:
565 elif use_rust_index:
566 self._parse_index = parse_index_v1_mixed
566 self._parse_index = parse_index_v1_mixed
567 try:
567 try:
568 d = self._parse_index(index_data, self._inline)
568 d = self._parse_index(index_data, self._inline)
569 index, _chunkcache = d
569 index, _chunkcache = d
570 use_nodemap = (
570 use_nodemap = (
571 not self._inline
571 not self._inline
572 and self._nodemap_file is not None
572 and self._nodemap_file is not None
573 and util.safehasattr(index, 'update_nodemap_data')
573 and util.safehasattr(index, 'update_nodemap_data')
574 )
574 )
575 if use_nodemap:
575 if use_nodemap:
576 nodemap_data = nodemaputil.persisted_data(self)
576 nodemap_data = nodemaputil.persisted_data(self)
577 if nodemap_data is not None:
577 if nodemap_data is not None:
578 docket = nodemap_data[0]
578 docket = nodemap_data[0]
579 if (
579 if (
580 len(d[0]) > docket.tip_rev
580 len(d[0]) > docket.tip_rev
581 and d[0][docket.tip_rev][7] == docket.tip_node
581 and d[0][docket.tip_rev][7] == docket.tip_node
582 ):
582 ):
583 # no changelog tampering
583 # no changelog tampering
584 self._nodemap_docket = docket
584 self._nodemap_docket = docket
585 index.update_nodemap_data(*nodemap_data)
585 index.update_nodemap_data(*nodemap_data)
586 except (ValueError, IndexError):
586 except (ValueError, IndexError):
587 raise error.RevlogError(
587 raise error.RevlogError(
588 _(b"index %s is corrupted") % self.display_id
588 _(b"index %s is corrupted") % self.display_id
589 )
589 )
590 self.index, self._chunkcache = d
590 self.index, self._chunkcache = d
591 if not self._chunkcache:
591 if not self._chunkcache:
592 self._chunkclear()
592 self._chunkclear()
593 # revnum -> (chain-length, sum-delta-length)
593 # revnum -> (chain-length, sum-delta-length)
594 self._chaininfocache = util.lrucachedict(500)
594 self._chaininfocache = util.lrucachedict(500)
595 # revlog header -> revlog compressor
595 # revlog header -> revlog compressor
596 self._decompressors = {}
596 self._decompressors = {}
597
597
598 @util.propertycache
598 @util.propertycache
599 def revlog_kind(self):
599 def revlog_kind(self):
600 return self.target[0]
600 return self.target[0]
601
601
602 @util.propertycache
602 @util.propertycache
603 def display_id(self):
603 def display_id(self):
604 """The public facing "ID" of the revlog that we use in message"""
604 """The public facing "ID" of the revlog that we use in message"""
605 # Maybe we should build a user facing representation of
605 # Maybe we should build a user facing representation of
606 # revlog.target instead of using `self.radix`
606 # revlog.target instead of using `self.radix`
607 return self.radix
607 return self.radix
608
608
609 @util.propertycache
609 @util.propertycache
610 def _compressor(self):
610 def _compressor(self):
611 engine = util.compengines[self._compengine]
611 engine = util.compengines[self._compengine]
612 return engine.revlogcompressor(self._compengineopts)
612 return engine.revlogcompressor(self._compengineopts)
613
613
614 def _indexfp(self, mode=b'r'):
614 def _indexfp(self, mode=b'r'):
615 """file object for the revlog's index file"""
615 """file object for the revlog's index file"""
616 args = {'mode': mode}
616 args = {'mode': mode}
617 if mode != b'r':
617 if mode != b'r':
618 args['checkambig'] = self._checkambig
618 args['checkambig'] = self._checkambig
619 if mode == b'w':
619 if mode == b'w':
620 args['atomictemp'] = True
620 args['atomictemp'] = True
621 return self.opener(self._indexfile, **args)
621 return self.opener(self._indexfile, **args)
622
622
623 def _datafp(self, mode=b'r'):
623 def _datafp(self, mode=b'r'):
624 """file object for the revlog's data file"""
624 """file object for the revlog's data file"""
625 return self.opener(self._datafile, mode=mode)
625 return self.opener(self._datafile, mode=mode)
626
626
627 @contextlib.contextmanager
627 @contextlib.contextmanager
628 def _datareadfp(self, existingfp=None):
628 def _datareadfp(self, existingfp=None):
629 """file object suitable to read data"""
629 """file object suitable to read data"""
630 # Use explicit file handle, if given.
630 # Use explicit file handle, if given.
631 if existingfp is not None:
631 if existingfp is not None:
632 yield existingfp
632 yield existingfp
633
633
634 # Use a file handle being actively used for writes, if available.
634 # Use a file handle being actively used for writes, if available.
635 # There is some danger to doing this because reads will seek the
635 # There is some danger to doing this because reads will seek the
636 # file. However, _writeentry() performs a SEEK_END before all writes,
636 # file. However, _writeentry() performs a SEEK_END before all writes,
637 # so we should be safe.
637 # so we should be safe.
638 elif self._writinghandles:
638 elif self._writinghandles:
639 if self._inline:
639 if self._inline:
640 yield self._writinghandles[0]
640 yield self._writinghandles[0]
641 else:
641 else:
642 yield self._writinghandles[1]
642 yield self._writinghandles[1]
643
643
644 # Otherwise open a new file handle.
644 # Otherwise open a new file handle.
645 else:
645 else:
646 if self._inline:
646 if self._inline:
647 func = self._indexfp
647 func = self._indexfp
648 else:
648 else:
649 func = self._datafp
649 func = self._datafp
650 with func() as fp:
650 with func() as fp:
651 yield fp
651 yield fp
652
652
653 def tiprev(self):
653 def tiprev(self):
654 return len(self.index) - 1
654 return len(self.index) - 1
655
655
656 def tip(self):
656 def tip(self):
657 return self.node(self.tiprev())
657 return self.node(self.tiprev())
658
658
659 def __contains__(self, rev):
659 def __contains__(self, rev):
660 return 0 <= rev < len(self)
660 return 0 <= rev < len(self)
661
661
662 def __len__(self):
662 def __len__(self):
663 return len(self.index)
663 return len(self.index)
664
664
665 def __iter__(self):
665 def __iter__(self):
666 return iter(pycompat.xrange(len(self)))
666 return iter(pycompat.xrange(len(self)))
667
667
668 def revs(self, start=0, stop=None):
668 def revs(self, start=0, stop=None):
669 """iterate over all rev in this revlog (from start to stop)"""
669 """iterate over all rev in this revlog (from start to stop)"""
670 return storageutil.iterrevs(len(self), start=start, stop=stop)
670 return storageutil.iterrevs(len(self), start=start, stop=stop)
671
671
672 @property
672 @property
673 def nodemap(self):
673 def nodemap(self):
674 msg = (
674 msg = (
675 b"revlog.nodemap is deprecated, "
675 b"revlog.nodemap is deprecated, "
676 b"use revlog.index.[has_node|rev|get_rev]"
676 b"use revlog.index.[has_node|rev|get_rev]"
677 )
677 )
678 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
678 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
679 return self.index.nodemap
679 return self.index.nodemap
680
680
681 @property
681 @property
682 def _nodecache(self):
682 def _nodecache(self):
683 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
683 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
684 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
684 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
685 return self.index.nodemap
685 return self.index.nodemap
686
686
687 def hasnode(self, node):
687 def hasnode(self, node):
688 try:
688 try:
689 self.rev(node)
689 self.rev(node)
690 return True
690 return True
691 except KeyError:
691 except KeyError:
692 return False
692 return False
693
693
694 def candelta(self, baserev, rev):
694 def candelta(self, baserev, rev):
695 """whether two revisions (baserev, rev) can be delta-ed or not"""
695 """whether two revisions (baserev, rev) can be delta-ed or not"""
696 # Disable delta if either rev requires a content-changing flag
696 # Disable delta if either rev requires a content-changing flag
697 # processor (ex. LFS). This is because such flag processor can alter
697 # processor (ex. LFS). This is because such flag processor can alter
698 # the rawtext content that the delta will be based on, and two clients
698 # the rawtext content that the delta will be based on, and two clients
699 # could have a same revlog node with different flags (i.e. different
699 # could have a same revlog node with different flags (i.e. different
700 # rawtext contents) and the delta could be incompatible.
700 # rawtext contents) and the delta could be incompatible.
701 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
701 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
702 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
702 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
703 ):
703 ):
704 return False
704 return False
705 return True
705 return True
706
706
707 def update_caches(self, transaction):
707 def update_caches(self, transaction):
708 if self._nodemap_file is not None:
708 if self._nodemap_file is not None:
709 if transaction is None:
709 if transaction is None:
710 nodemaputil.update_persistent_nodemap(self)
710 nodemaputil.update_persistent_nodemap(self)
711 else:
711 else:
712 nodemaputil.setup_persistent_nodemap(transaction, self)
712 nodemaputil.setup_persistent_nodemap(transaction, self)
713
713
714 def clearcaches(self):
714 def clearcaches(self):
715 self._revisioncache = None
715 self._revisioncache = None
716 self._chainbasecache.clear()
716 self._chainbasecache.clear()
717 self._chunkcache = (0, b'')
717 self._chunkcache = (0, b'')
718 self._pcache = {}
718 self._pcache = {}
719 self._nodemap_docket = None
719 self._nodemap_docket = None
720 self.index.clearcaches()
720 self.index.clearcaches()
721 # The python code is the one responsible for validating the docket, we
721 # The python code is the one responsible for validating the docket, we
722 # end up having to refresh it here.
722 # end up having to refresh it here.
723 use_nodemap = (
723 use_nodemap = (
724 not self._inline
724 not self._inline
725 and self._nodemap_file is not None
725 and self._nodemap_file is not None
726 and util.safehasattr(self.index, 'update_nodemap_data')
726 and util.safehasattr(self.index, 'update_nodemap_data')
727 )
727 )
728 if use_nodemap:
728 if use_nodemap:
729 nodemap_data = nodemaputil.persisted_data(self)
729 nodemap_data = nodemaputil.persisted_data(self)
730 if nodemap_data is not None:
730 if nodemap_data is not None:
731 self._nodemap_docket = nodemap_data[0]
731 self._nodemap_docket = nodemap_data[0]
732 self.index.update_nodemap_data(*nodemap_data)
732 self.index.update_nodemap_data(*nodemap_data)
733
733
734 def rev(self, node):
734 def rev(self, node):
735 try:
735 try:
736 return self.index.rev(node)
736 return self.index.rev(node)
737 except TypeError:
737 except TypeError:
738 raise
738 raise
739 except error.RevlogError:
739 except error.RevlogError:
740 # parsers.c radix tree lookup failed
740 # parsers.c radix tree lookup failed
741 if (
741 if (
742 node == self.nodeconstants.wdirid
742 node == self.nodeconstants.wdirid
743 or node in self.nodeconstants.wdirfilenodeids
743 or node in self.nodeconstants.wdirfilenodeids
744 ):
744 ):
745 raise error.WdirUnsupported
745 raise error.WdirUnsupported
746 raise error.LookupError(node, self.display_id, _(b'no node'))
746 raise error.LookupError(node, self.display_id, _(b'no node'))
747
747
748 # Accessors for index entries.
748 # Accessors for index entries.
749
749
750 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
750 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
751 # are flags.
751 # are flags.
752 def start(self, rev):
752 def start(self, rev):
753 return int(self.index[rev][0] >> 16)
753 return int(self.index[rev][0] >> 16)
754
754
755 def flags(self, rev):
755 def flags(self, rev):
756 return self.index[rev][0] & 0xFFFF
756 return self.index[rev][0] & 0xFFFF
757
757
758 def length(self, rev):
758 def length(self, rev):
759 return self.index[rev][1]
759 return self.index[rev][1]
760
760
761 def sidedata_length(self, rev):
761 def sidedata_length(self, rev):
762 if not self.hassidedata:
762 if not self.hassidedata:
763 return 0
763 return 0
764 return self.index[rev][9]
764 return self.index[rev][9]
765
765
766 def rawsize(self, rev):
766 def rawsize(self, rev):
767 """return the length of the uncompressed text for a given revision"""
767 """return the length of the uncompressed text for a given revision"""
768 l = self.index[rev][2]
768 l = self.index[rev][2]
769 if l >= 0:
769 if l >= 0:
770 return l
770 return l
771
771
772 t = self.rawdata(rev)
772 t = self.rawdata(rev)
773 return len(t)
773 return len(t)
774
774
775 def size(self, rev):
775 def size(self, rev):
776 """length of non-raw text (processed by a "read" flag processor)"""
776 """length of non-raw text (processed by a "read" flag processor)"""
777 # fast path: if no "read" flag processor could change the content,
777 # fast path: if no "read" flag processor could change the content,
778 # size is rawsize. note: ELLIPSIS is known to not change the content.
778 # size is rawsize. note: ELLIPSIS is known to not change the content.
779 flags = self.flags(rev)
779 flags = self.flags(rev)
780 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
780 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
781 return self.rawsize(rev)
781 return self.rawsize(rev)
782
782
783 return len(self.revision(rev, raw=False))
783 return len(self.revision(rev, raw=False))
784
784
785 def chainbase(self, rev):
785 def chainbase(self, rev):
786 base = self._chainbasecache.get(rev)
786 base = self._chainbasecache.get(rev)
787 if base is not None:
787 if base is not None:
788 return base
788 return base
789
789
790 index = self.index
790 index = self.index
791 iterrev = rev
791 iterrev = rev
792 base = index[iterrev][3]
792 base = index[iterrev][3]
793 while base != iterrev:
793 while base != iterrev:
794 iterrev = base
794 iterrev = base
795 base = index[iterrev][3]
795 base = index[iterrev][3]
796
796
797 self._chainbasecache[rev] = base
797 self._chainbasecache[rev] = base
798 return base
798 return base
799
799
800 def linkrev(self, rev):
800 def linkrev(self, rev):
801 return self.index[rev][4]
801 return self.index[rev][4]
802
802
803 def parentrevs(self, rev):
803 def parentrevs(self, rev):
804 try:
804 try:
805 entry = self.index[rev]
805 entry = self.index[rev]
806 except IndexError:
806 except IndexError:
807 if rev == wdirrev:
807 if rev == wdirrev:
808 raise error.WdirUnsupported
808 raise error.WdirUnsupported
809 raise
809 raise
810 if entry[5] == nullrev:
810 if entry[5] == nullrev:
811 return entry[6], entry[5]
811 return entry[6], entry[5]
812 else:
812 else:
813 return entry[5], entry[6]
813 return entry[5], entry[6]
814
814
815 # fast parentrevs(rev) where rev isn't filtered
815 # fast parentrevs(rev) where rev isn't filtered
816 _uncheckedparentrevs = parentrevs
816 _uncheckedparentrevs = parentrevs
817
817
818 def node(self, rev):
818 def node(self, rev):
819 try:
819 try:
820 return self.index[rev][7]
820 return self.index[rev][7]
821 except IndexError:
821 except IndexError:
822 if rev == wdirrev:
822 if rev == wdirrev:
823 raise error.WdirUnsupported
823 raise error.WdirUnsupported
824 raise
824 raise
825
825
826 # Derived from index values.
826 # Derived from index values.
827
827
828 def end(self, rev):
828 def end(self, rev):
829 return self.start(rev) + self.length(rev)
829 return self.start(rev) + self.length(rev)
830
830
831 def parents(self, node):
831 def parents(self, node):
832 i = self.index
832 i = self.index
833 d = i[self.rev(node)]
833 d = i[self.rev(node)]
834 # inline node() to avoid function call overhead
834 # inline node() to avoid function call overhead
835 if d[5] == self.nullid:
835 if d[5] == self.nullid:
836 return i[d[6]][7], i[d[5]][7]
836 return i[d[6]][7], i[d[5]][7]
837 else:
837 else:
838 return i[d[5]][7], i[d[6]][7]
838 return i[d[5]][7], i[d[6]][7]
839
839
840 def chainlen(self, rev):
840 def chainlen(self, rev):
841 return self._chaininfo(rev)[0]
841 return self._chaininfo(rev)[0]
842
842
843 def _chaininfo(self, rev):
843 def _chaininfo(self, rev):
844 chaininfocache = self._chaininfocache
844 chaininfocache = self._chaininfocache
845 if rev in chaininfocache:
845 if rev in chaininfocache:
846 return chaininfocache[rev]
846 return chaininfocache[rev]
847 index = self.index
847 index = self.index
848 generaldelta = self._generaldelta
848 generaldelta = self._generaldelta
849 iterrev = rev
849 iterrev = rev
850 e = index[iterrev]
850 e = index[iterrev]
851 clen = 0
851 clen = 0
852 compresseddeltalen = 0
852 compresseddeltalen = 0
853 while iterrev != e[3]:
853 while iterrev != e[3]:
854 clen += 1
854 clen += 1
855 compresseddeltalen += e[1]
855 compresseddeltalen += e[1]
856 if generaldelta:
856 if generaldelta:
857 iterrev = e[3]
857 iterrev = e[3]
858 else:
858 else:
859 iterrev -= 1
859 iterrev -= 1
860 if iterrev in chaininfocache:
860 if iterrev in chaininfocache:
861 t = chaininfocache[iterrev]
861 t = chaininfocache[iterrev]
862 clen += t[0]
862 clen += t[0]
863 compresseddeltalen += t[1]
863 compresseddeltalen += t[1]
864 break
864 break
865 e = index[iterrev]
865 e = index[iterrev]
866 else:
866 else:
867 # Add text length of base since decompressing that also takes
867 # Add text length of base since decompressing that also takes
868 # work. For cache hits the length is already included.
868 # work. For cache hits the length is already included.
869 compresseddeltalen += e[1]
869 compresseddeltalen += e[1]
870 r = (clen, compresseddeltalen)
870 r = (clen, compresseddeltalen)
871 chaininfocache[rev] = r
871 chaininfocache[rev] = r
872 return r
872 return r
873
873
874 def _deltachain(self, rev, stoprev=None):
874 def _deltachain(self, rev, stoprev=None):
875 """Obtain the delta chain for a revision.
875 """Obtain the delta chain for a revision.
876
876
877 ``stoprev`` specifies a revision to stop at. If not specified, we
877 ``stoprev`` specifies a revision to stop at. If not specified, we
878 stop at the base of the chain.
878 stop at the base of the chain.
879
879
880 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
880 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
881 revs in ascending order and ``stopped`` is a bool indicating whether
881 revs in ascending order and ``stopped`` is a bool indicating whether
882 ``stoprev`` was hit.
882 ``stoprev`` was hit.
883 """
883 """
884 # Try C implementation.
884 # Try C implementation.
885 try:
885 try:
886 return self.index.deltachain(rev, stoprev, self._generaldelta)
886 return self.index.deltachain(rev, stoprev, self._generaldelta)
887 except AttributeError:
887 except AttributeError:
888 pass
888 pass
889
889
890 chain = []
890 chain = []
891
891
892 # Alias to prevent attribute lookup in tight loop.
892 # Alias to prevent attribute lookup in tight loop.
893 index = self.index
893 index = self.index
894 generaldelta = self._generaldelta
894 generaldelta = self._generaldelta
895
895
896 iterrev = rev
896 iterrev = rev
897 e = index[iterrev]
897 e = index[iterrev]
898 while iterrev != e[3] and iterrev != stoprev:
898 while iterrev != e[3] and iterrev != stoprev:
899 chain.append(iterrev)
899 chain.append(iterrev)
900 if generaldelta:
900 if generaldelta:
901 iterrev = e[3]
901 iterrev = e[3]
902 else:
902 else:
903 iterrev -= 1
903 iterrev -= 1
904 e = index[iterrev]
904 e = index[iterrev]
905
905
906 if iterrev == stoprev:
906 if iterrev == stoprev:
907 stopped = True
907 stopped = True
908 else:
908 else:
909 chain.append(iterrev)
909 chain.append(iterrev)
910 stopped = False
910 stopped = False
911
911
912 chain.reverse()
912 chain.reverse()
913 return chain, stopped
913 return chain, stopped
914
914
915 def ancestors(self, revs, stoprev=0, inclusive=False):
915 def ancestors(self, revs, stoprev=0, inclusive=False):
916 """Generate the ancestors of 'revs' in reverse revision order.
916 """Generate the ancestors of 'revs' in reverse revision order.
917 Does not generate revs lower than stoprev.
917 Does not generate revs lower than stoprev.
918
918
919 See the documentation for ancestor.lazyancestors for more details."""
919 See the documentation for ancestor.lazyancestors for more details."""
920
920
921 # first, make sure start revisions aren't filtered
921 # first, make sure start revisions aren't filtered
922 revs = list(revs)
922 revs = list(revs)
923 checkrev = self.node
923 checkrev = self.node
924 for r in revs:
924 for r in revs:
925 checkrev(r)
925 checkrev(r)
926 # and we're sure ancestors aren't filtered as well
926 # and we're sure ancestors aren't filtered as well
927
927
928 if rustancestor is not None:
928 if rustancestor is not None:
929 lazyancestors = rustancestor.LazyAncestors
929 lazyancestors = rustancestor.LazyAncestors
930 arg = self.index
930 arg = self.index
931 else:
931 else:
932 lazyancestors = ancestor.lazyancestors
932 lazyancestors = ancestor.lazyancestors
933 arg = self._uncheckedparentrevs
933 arg = self._uncheckedparentrevs
934 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
934 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
935
935
936 def descendants(self, revs):
936 def descendants(self, revs):
937 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
937 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
938
938
939 def findcommonmissing(self, common=None, heads=None):
939 def findcommonmissing(self, common=None, heads=None):
940 """Return a tuple of the ancestors of common and the ancestors of heads
940 """Return a tuple of the ancestors of common and the ancestors of heads
941 that are not ancestors of common. In revset terminology, we return the
941 that are not ancestors of common. In revset terminology, we return the
942 tuple:
942 tuple:
943
943
944 ::common, (::heads) - (::common)
944 ::common, (::heads) - (::common)
945
945
946 The list is sorted by revision number, meaning it is
946 The list is sorted by revision number, meaning it is
947 topologically sorted.
947 topologically sorted.
948
948
949 'heads' and 'common' are both lists of node IDs. If heads is
949 'heads' and 'common' are both lists of node IDs. If heads is
950 not supplied, uses all of the revlog's heads. If common is not
950 not supplied, uses all of the revlog's heads. If common is not
951 supplied, uses nullid."""
951 supplied, uses nullid."""
952 if common is None:
952 if common is None:
953 common = [self.nullid]
953 common = [self.nullid]
954 if heads is None:
954 if heads is None:
955 heads = self.heads()
955 heads = self.heads()
956
956
957 common = [self.rev(n) for n in common]
957 common = [self.rev(n) for n in common]
958 heads = [self.rev(n) for n in heads]
958 heads = [self.rev(n) for n in heads]
959
959
960 # we want the ancestors, but inclusive
960 # we want the ancestors, but inclusive
961 class lazyset(object):
961 class lazyset(object):
962 def __init__(self, lazyvalues):
962 def __init__(self, lazyvalues):
963 self.addedvalues = set()
963 self.addedvalues = set()
964 self.lazyvalues = lazyvalues
964 self.lazyvalues = lazyvalues
965
965
966 def __contains__(self, value):
966 def __contains__(self, value):
967 return value in self.addedvalues or value in self.lazyvalues
967 return value in self.addedvalues or value in self.lazyvalues
968
968
969 def __iter__(self):
969 def __iter__(self):
970 added = self.addedvalues
970 added = self.addedvalues
971 for r in added:
971 for r in added:
972 yield r
972 yield r
973 for r in self.lazyvalues:
973 for r in self.lazyvalues:
974 if not r in added:
974 if not r in added:
975 yield r
975 yield r
976
976
977 def add(self, value):
977 def add(self, value):
978 self.addedvalues.add(value)
978 self.addedvalues.add(value)
979
979
980 def update(self, values):
980 def update(self, values):
981 self.addedvalues.update(values)
981 self.addedvalues.update(values)
982
982
983 has = lazyset(self.ancestors(common))
983 has = lazyset(self.ancestors(common))
984 has.add(nullrev)
984 has.add(nullrev)
985 has.update(common)
985 has.update(common)
986
986
987 # take all ancestors from heads that aren't in has
987 # take all ancestors from heads that aren't in has
988 missing = set()
988 missing = set()
989 visit = collections.deque(r for r in heads if r not in has)
989 visit = collections.deque(r for r in heads if r not in has)
990 while visit:
990 while visit:
991 r = visit.popleft()
991 r = visit.popleft()
992 if r in missing:
992 if r in missing:
993 continue
993 continue
994 else:
994 else:
995 missing.add(r)
995 missing.add(r)
996 for p in self.parentrevs(r):
996 for p in self.parentrevs(r):
997 if p not in has:
997 if p not in has:
998 visit.append(p)
998 visit.append(p)
999 missing = list(missing)
999 missing = list(missing)
1000 missing.sort()
1000 missing.sort()
1001 return has, [self.node(miss) for miss in missing]
1001 return has, [self.node(miss) for miss in missing]
1002
1002
1003 def incrementalmissingrevs(self, common=None):
1003 def incrementalmissingrevs(self, common=None):
1004 """Return an object that can be used to incrementally compute the
1004 """Return an object that can be used to incrementally compute the
1005 revision numbers of the ancestors of arbitrary sets that are not
1005 revision numbers of the ancestors of arbitrary sets that are not
1006 ancestors of common. This is an ancestor.incrementalmissingancestors
1006 ancestors of common. This is an ancestor.incrementalmissingancestors
1007 object.
1007 object.
1008
1008
1009 'common' is a list of revision numbers. If common is not supplied, uses
1009 'common' is a list of revision numbers. If common is not supplied, uses
1010 nullrev.
1010 nullrev.
1011 """
1011 """
1012 if common is None:
1012 if common is None:
1013 common = [nullrev]
1013 common = [nullrev]
1014
1014
1015 if rustancestor is not None:
1015 if rustancestor is not None:
1016 return rustancestor.MissingAncestors(self.index, common)
1016 return rustancestor.MissingAncestors(self.index, common)
1017 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1017 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1018
1018
1019 def findmissingrevs(self, common=None, heads=None):
1019 def findmissingrevs(self, common=None, heads=None):
1020 """Return the revision numbers of the ancestors of heads that
1020 """Return the revision numbers of the ancestors of heads that
1021 are not ancestors of common.
1021 are not ancestors of common.
1022
1022
1023 More specifically, return a list of revision numbers corresponding to
1023 More specifically, return a list of revision numbers corresponding to
1024 nodes N such that every N satisfies the following constraints:
1024 nodes N such that every N satisfies the following constraints:
1025
1025
1026 1. N is an ancestor of some node in 'heads'
1026 1. N is an ancestor of some node in 'heads'
1027 2. N is not an ancestor of any node in 'common'
1027 2. N is not an ancestor of any node in 'common'
1028
1028
1029 The list is sorted by revision number, meaning it is
1029 The list is sorted by revision number, meaning it is
1030 topologically sorted.
1030 topologically sorted.
1031
1031
1032 'heads' and 'common' are both lists of revision numbers. If heads is
1032 'heads' and 'common' are both lists of revision numbers. If heads is
1033 not supplied, uses all of the revlog's heads. If common is not
1033 not supplied, uses all of the revlog's heads. If common is not
1034 supplied, uses nullid."""
1034 supplied, uses nullid."""
1035 if common is None:
1035 if common is None:
1036 common = [nullrev]
1036 common = [nullrev]
1037 if heads is None:
1037 if heads is None:
1038 heads = self.headrevs()
1038 heads = self.headrevs()
1039
1039
1040 inc = self.incrementalmissingrevs(common=common)
1040 inc = self.incrementalmissingrevs(common=common)
1041 return inc.missingancestors(heads)
1041 return inc.missingancestors(heads)
1042
1042
1043 def findmissing(self, common=None, heads=None):
1043 def findmissing(self, common=None, heads=None):
1044 """Return the ancestors of heads that are not ancestors of common.
1044 """Return the ancestors of heads that are not ancestors of common.
1045
1045
1046 More specifically, return a list of nodes N such that every N
1046 More specifically, return a list of nodes N such that every N
1047 satisfies the following constraints:
1047 satisfies the following constraints:
1048
1048
1049 1. N is an ancestor of some node in 'heads'
1049 1. N is an ancestor of some node in 'heads'
1050 2. N is not an ancestor of any node in 'common'
1050 2. N is not an ancestor of any node in 'common'
1051
1051
1052 The list is sorted by revision number, meaning it is
1052 The list is sorted by revision number, meaning it is
1053 topologically sorted.
1053 topologically sorted.
1054
1054
1055 'heads' and 'common' are both lists of node IDs. If heads is
1055 'heads' and 'common' are both lists of node IDs. If heads is
1056 not supplied, uses all of the revlog's heads. If common is not
1056 not supplied, uses all of the revlog's heads. If common is not
1057 supplied, uses nullid."""
1057 supplied, uses nullid."""
1058 if common is None:
1058 if common is None:
1059 common = [self.nullid]
1059 common = [self.nullid]
1060 if heads is None:
1060 if heads is None:
1061 heads = self.heads()
1061 heads = self.heads()
1062
1062
1063 common = [self.rev(n) for n in common]
1063 common = [self.rev(n) for n in common]
1064 heads = [self.rev(n) for n in heads]
1064 heads = [self.rev(n) for n in heads]
1065
1065
1066 inc = self.incrementalmissingrevs(common=common)
1066 inc = self.incrementalmissingrevs(common=common)
1067 return [self.node(r) for r in inc.missingancestors(heads)]
1067 return [self.node(r) for r in inc.missingancestors(heads)]
1068
1068
1069 def nodesbetween(self, roots=None, heads=None):
1069 def nodesbetween(self, roots=None, heads=None):
1070 """Return a topological path from 'roots' to 'heads'.
1070 """Return a topological path from 'roots' to 'heads'.
1071
1071
1072 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1072 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1073 topologically sorted list of all nodes N that satisfy both of
1073 topologically sorted list of all nodes N that satisfy both of
1074 these constraints:
1074 these constraints:
1075
1075
1076 1. N is a descendant of some node in 'roots'
1076 1. N is a descendant of some node in 'roots'
1077 2. N is an ancestor of some node in 'heads'
1077 2. N is an ancestor of some node in 'heads'
1078
1078
1079 Every node is considered to be both a descendant and an ancestor
1079 Every node is considered to be both a descendant and an ancestor
1080 of itself, so every reachable node in 'roots' and 'heads' will be
1080 of itself, so every reachable node in 'roots' and 'heads' will be
1081 included in 'nodes'.
1081 included in 'nodes'.
1082
1082
1083 'outroots' is the list of reachable nodes in 'roots', i.e., the
1083 'outroots' is the list of reachable nodes in 'roots', i.e., the
1084 subset of 'roots' that is returned in 'nodes'. Likewise,
1084 subset of 'roots' that is returned in 'nodes'. Likewise,
1085 'outheads' is the subset of 'heads' that is also in 'nodes'.
1085 'outheads' is the subset of 'heads' that is also in 'nodes'.
1086
1086
1087 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1087 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1088 unspecified, uses nullid as the only root. If 'heads' is
1088 unspecified, uses nullid as the only root. If 'heads' is
1089 unspecified, uses list of all of the revlog's heads."""
1089 unspecified, uses list of all of the revlog's heads."""
1090 nonodes = ([], [], [])
1090 nonodes = ([], [], [])
1091 if roots is not None:
1091 if roots is not None:
1092 roots = list(roots)
1092 roots = list(roots)
1093 if not roots:
1093 if not roots:
1094 return nonodes
1094 return nonodes
1095 lowestrev = min([self.rev(n) for n in roots])
1095 lowestrev = min([self.rev(n) for n in roots])
1096 else:
1096 else:
1097 roots = [self.nullid] # Everybody's a descendant of nullid
1097 roots = [self.nullid] # Everybody's a descendant of nullid
1098 lowestrev = nullrev
1098 lowestrev = nullrev
1099 if (lowestrev == nullrev) and (heads is None):
1099 if (lowestrev == nullrev) and (heads is None):
1100 # We want _all_ the nodes!
1100 # We want _all_ the nodes!
1101 return (
1101 return (
1102 [self.node(r) for r in self],
1102 [self.node(r) for r in self],
1103 [self.nullid],
1103 [self.nullid],
1104 list(self.heads()),
1104 list(self.heads()),
1105 )
1105 )
1106 if heads is None:
1106 if heads is None:
1107 # All nodes are ancestors, so the latest ancestor is the last
1107 # All nodes are ancestors, so the latest ancestor is the last
1108 # node.
1108 # node.
1109 highestrev = len(self) - 1
1109 highestrev = len(self) - 1
1110 # Set ancestors to None to signal that every node is an ancestor.
1110 # Set ancestors to None to signal that every node is an ancestor.
1111 ancestors = None
1111 ancestors = None
1112 # Set heads to an empty dictionary for later discovery of heads
1112 # Set heads to an empty dictionary for later discovery of heads
1113 heads = {}
1113 heads = {}
1114 else:
1114 else:
1115 heads = list(heads)
1115 heads = list(heads)
1116 if not heads:
1116 if not heads:
1117 return nonodes
1117 return nonodes
1118 ancestors = set()
1118 ancestors = set()
1119 # Turn heads into a dictionary so we can remove 'fake' heads.
1119 # Turn heads into a dictionary so we can remove 'fake' heads.
1120 # Also, later we will be using it to filter out the heads we can't
1120 # Also, later we will be using it to filter out the heads we can't
1121 # find from roots.
1121 # find from roots.
1122 heads = dict.fromkeys(heads, False)
1122 heads = dict.fromkeys(heads, False)
1123 # Start at the top and keep marking parents until we're done.
1123 # Start at the top and keep marking parents until we're done.
1124 nodestotag = set(heads)
1124 nodestotag = set(heads)
1125 # Remember where the top was so we can use it as a limit later.
1125 # Remember where the top was so we can use it as a limit later.
1126 highestrev = max([self.rev(n) for n in nodestotag])
1126 highestrev = max([self.rev(n) for n in nodestotag])
1127 while nodestotag:
1127 while nodestotag:
1128 # grab a node to tag
1128 # grab a node to tag
1129 n = nodestotag.pop()
1129 n = nodestotag.pop()
1130 # Never tag nullid
1130 # Never tag nullid
1131 if n == self.nullid:
1131 if n == self.nullid:
1132 continue
1132 continue
1133 # A node's revision number represents its place in a
1133 # A node's revision number represents its place in a
1134 # topologically sorted list of nodes.
1134 # topologically sorted list of nodes.
1135 r = self.rev(n)
1135 r = self.rev(n)
1136 if r >= lowestrev:
1136 if r >= lowestrev:
1137 if n not in ancestors:
1137 if n not in ancestors:
1138 # If we are possibly a descendant of one of the roots
1138 # If we are possibly a descendant of one of the roots
1139 # and we haven't already been marked as an ancestor
1139 # and we haven't already been marked as an ancestor
1140 ancestors.add(n) # Mark as ancestor
1140 ancestors.add(n) # Mark as ancestor
1141 # Add non-nullid parents to list of nodes to tag.
1141 # Add non-nullid parents to list of nodes to tag.
1142 nodestotag.update(
1142 nodestotag.update(
1143 [p for p in self.parents(n) if p != self.nullid]
1143 [p for p in self.parents(n) if p != self.nullid]
1144 )
1144 )
1145 elif n in heads: # We've seen it before, is it a fake head?
1145 elif n in heads: # We've seen it before, is it a fake head?
1146 # So it is, real heads should not be the ancestors of
1146 # So it is, real heads should not be the ancestors of
1147 # any other heads.
1147 # any other heads.
1148 heads.pop(n)
1148 heads.pop(n)
1149 if not ancestors:
1149 if not ancestors:
1150 return nonodes
1150 return nonodes
1151 # Now that we have our set of ancestors, we want to remove any
1151 # Now that we have our set of ancestors, we want to remove any
1152 # roots that are not ancestors.
1152 # roots that are not ancestors.
1153
1153
1154 # If one of the roots was nullid, everything is included anyway.
1154 # If one of the roots was nullid, everything is included anyway.
1155 if lowestrev > nullrev:
1155 if lowestrev > nullrev:
1156 # But, since we weren't, let's recompute the lowest rev to not
1156 # But, since we weren't, let's recompute the lowest rev to not
1157 # include roots that aren't ancestors.
1157 # include roots that aren't ancestors.
1158
1158
1159 # Filter out roots that aren't ancestors of heads
1159 # Filter out roots that aren't ancestors of heads
1160 roots = [root for root in roots if root in ancestors]
1160 roots = [root for root in roots if root in ancestors]
1161 # Recompute the lowest revision
1161 # Recompute the lowest revision
1162 if roots:
1162 if roots:
1163 lowestrev = min([self.rev(root) for root in roots])
1163 lowestrev = min([self.rev(root) for root in roots])
1164 else:
1164 else:
1165 # No more roots? Return empty list
1165 # No more roots? Return empty list
1166 return nonodes
1166 return nonodes
1167 else:
1167 else:
1168 # We are descending from nullid, and don't need to care about
1168 # We are descending from nullid, and don't need to care about
1169 # any other roots.
1169 # any other roots.
1170 lowestrev = nullrev
1170 lowestrev = nullrev
1171 roots = [self.nullid]
1171 roots = [self.nullid]
1172 # Transform our roots list into a set.
1172 # Transform our roots list into a set.
1173 descendants = set(roots)
1173 descendants = set(roots)
1174 # Also, keep the original roots so we can filter out roots that aren't
1174 # Also, keep the original roots so we can filter out roots that aren't
1175 # 'real' roots (i.e. are descended from other roots).
1175 # 'real' roots (i.e. are descended from other roots).
1176 roots = descendants.copy()
1176 roots = descendants.copy()
1177 # Our topologically sorted list of output nodes.
1177 # Our topologically sorted list of output nodes.
1178 orderedout = []
1178 orderedout = []
1179 # Don't start at nullid since we don't want nullid in our output list,
1179 # Don't start at nullid since we don't want nullid in our output list,
1180 # and if nullid shows up in descendants, empty parents will look like
1180 # and if nullid shows up in descendants, empty parents will look like
1181 # they're descendants.
1181 # they're descendants.
1182 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1182 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1183 n = self.node(r)
1183 n = self.node(r)
1184 isdescendant = False
1184 isdescendant = False
1185 if lowestrev == nullrev: # Everybody is a descendant of nullid
1185 if lowestrev == nullrev: # Everybody is a descendant of nullid
1186 isdescendant = True
1186 isdescendant = True
1187 elif n in descendants:
1187 elif n in descendants:
1188 # n is already a descendant
1188 # n is already a descendant
1189 isdescendant = True
1189 isdescendant = True
1190 # This check only needs to be done here because all the roots
1190 # This check only needs to be done here because all the roots
1191 # will start being marked is descendants before the loop.
1191 # will start being marked is descendants before the loop.
1192 if n in roots:
1192 if n in roots:
1193 # If n was a root, check if it's a 'real' root.
1193 # If n was a root, check if it's a 'real' root.
1194 p = tuple(self.parents(n))
1194 p = tuple(self.parents(n))
1195 # If any of its parents are descendants, it's not a root.
1195 # If any of its parents are descendants, it's not a root.
1196 if (p[0] in descendants) or (p[1] in descendants):
1196 if (p[0] in descendants) or (p[1] in descendants):
1197 roots.remove(n)
1197 roots.remove(n)
1198 else:
1198 else:
1199 p = tuple(self.parents(n))
1199 p = tuple(self.parents(n))
1200 # A node is a descendant if either of its parents are
1200 # A node is a descendant if either of its parents are
1201 # descendants. (We seeded the dependents list with the roots
1201 # descendants. (We seeded the dependents list with the roots
1202 # up there, remember?)
1202 # up there, remember?)
1203 if (p[0] in descendants) or (p[1] in descendants):
1203 if (p[0] in descendants) or (p[1] in descendants):
1204 descendants.add(n)
1204 descendants.add(n)
1205 isdescendant = True
1205 isdescendant = True
1206 if isdescendant and ((ancestors is None) or (n in ancestors)):
1206 if isdescendant and ((ancestors is None) or (n in ancestors)):
1207 # Only include nodes that are both descendants and ancestors.
1207 # Only include nodes that are both descendants and ancestors.
1208 orderedout.append(n)
1208 orderedout.append(n)
1209 if (ancestors is not None) and (n in heads):
1209 if (ancestors is not None) and (n in heads):
1210 # We're trying to figure out which heads are reachable
1210 # We're trying to figure out which heads are reachable
1211 # from roots.
1211 # from roots.
1212 # Mark this head as having been reached
1212 # Mark this head as having been reached
1213 heads[n] = True
1213 heads[n] = True
1214 elif ancestors is None:
1214 elif ancestors is None:
1215 # Otherwise, we're trying to discover the heads.
1215 # Otherwise, we're trying to discover the heads.
1216 # Assume this is a head because if it isn't, the next step
1216 # Assume this is a head because if it isn't, the next step
1217 # will eventually remove it.
1217 # will eventually remove it.
1218 heads[n] = True
1218 heads[n] = True
1219 # But, obviously its parents aren't.
1219 # But, obviously its parents aren't.
1220 for p in self.parents(n):
1220 for p in self.parents(n):
1221 heads.pop(p, None)
1221 heads.pop(p, None)
1222 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1222 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1223 roots = list(roots)
1223 roots = list(roots)
1224 assert orderedout
1224 assert orderedout
1225 assert roots
1225 assert roots
1226 assert heads
1226 assert heads
1227 return (orderedout, roots, heads)
1227 return (orderedout, roots, heads)
1228
1228
1229 def headrevs(self, revs=None):
1229 def headrevs(self, revs=None):
1230 if revs is None:
1230 if revs is None:
1231 try:
1231 try:
1232 return self.index.headrevs()
1232 return self.index.headrevs()
1233 except AttributeError:
1233 except AttributeError:
1234 return self._headrevs()
1234 return self._headrevs()
1235 if rustdagop is not None:
1235 if rustdagop is not None:
1236 return rustdagop.headrevs(self.index, revs)
1236 return rustdagop.headrevs(self.index, revs)
1237 return dagop.headrevs(revs, self._uncheckedparentrevs)
1237 return dagop.headrevs(revs, self._uncheckedparentrevs)
1238
1238
1239 def computephases(self, roots):
1239 def computephases(self, roots):
1240 return self.index.computephasesmapsets(roots)
1240 return self.index.computephasesmapsets(roots)
1241
1241
1242 def _headrevs(self):
1242 def _headrevs(self):
1243 count = len(self)
1243 count = len(self)
1244 if not count:
1244 if not count:
1245 return [nullrev]
1245 return [nullrev]
1246 # we won't iter over filtered rev so nobody is a head at start
1246 # we won't iter over filtered rev so nobody is a head at start
1247 ishead = [0] * (count + 1)
1247 ishead = [0] * (count + 1)
1248 index = self.index
1248 index = self.index
1249 for r in self:
1249 for r in self:
1250 ishead[r] = 1 # I may be an head
1250 ishead[r] = 1 # I may be an head
1251 e = index[r]
1251 e = index[r]
1252 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1252 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1253 return [r for r, val in enumerate(ishead) if val]
1253 return [r for r, val in enumerate(ishead) if val]
1254
1254
1255 def heads(self, start=None, stop=None):
1255 def heads(self, start=None, stop=None):
1256 """return the list of all nodes that have no children
1256 """return the list of all nodes that have no children
1257
1257
1258 if start is specified, only heads that are descendants of
1258 if start is specified, only heads that are descendants of
1259 start will be returned
1259 start will be returned
1260 if stop is specified, it will consider all the revs from stop
1260 if stop is specified, it will consider all the revs from stop
1261 as if they had no children
1261 as if they had no children
1262 """
1262 """
1263 if start is None and stop is None:
1263 if start is None and stop is None:
1264 if not len(self):
1264 if not len(self):
1265 return [self.nullid]
1265 return [self.nullid]
1266 return [self.node(r) for r in self.headrevs()]
1266 return [self.node(r) for r in self.headrevs()]
1267
1267
1268 if start is None:
1268 if start is None:
1269 start = nullrev
1269 start = nullrev
1270 else:
1270 else:
1271 start = self.rev(start)
1271 start = self.rev(start)
1272
1272
1273 stoprevs = {self.rev(n) for n in stop or []}
1273 stoprevs = {self.rev(n) for n in stop or []}
1274
1274
1275 revs = dagop.headrevssubset(
1275 revs = dagop.headrevssubset(
1276 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1276 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1277 )
1277 )
1278
1278
1279 return [self.node(rev) for rev in revs]
1279 return [self.node(rev) for rev in revs]
1280
1280
1281 def children(self, node):
1281 def children(self, node):
1282 """find the children of a given node"""
1282 """find the children of a given node"""
1283 c = []
1283 c = []
1284 p = self.rev(node)
1284 p = self.rev(node)
1285 for r in self.revs(start=p + 1):
1285 for r in self.revs(start=p + 1):
1286 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1286 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1287 if prevs:
1287 if prevs:
1288 for pr in prevs:
1288 for pr in prevs:
1289 if pr == p:
1289 if pr == p:
1290 c.append(self.node(r))
1290 c.append(self.node(r))
1291 elif p == nullrev:
1291 elif p == nullrev:
1292 c.append(self.node(r))
1292 c.append(self.node(r))
1293 return c
1293 return c
1294
1294
1295 def commonancestorsheads(self, a, b):
1295 def commonancestorsheads(self, a, b):
1296 """calculate all the heads of the common ancestors of nodes a and b"""
1296 """calculate all the heads of the common ancestors of nodes a and b"""
1297 a, b = self.rev(a), self.rev(b)
1297 a, b = self.rev(a), self.rev(b)
1298 ancs = self._commonancestorsheads(a, b)
1298 ancs = self._commonancestorsheads(a, b)
1299 return pycompat.maplist(self.node, ancs)
1299 return pycompat.maplist(self.node, ancs)
1300
1300
1301 def _commonancestorsheads(self, *revs):
1301 def _commonancestorsheads(self, *revs):
1302 """calculate all the heads of the common ancestors of revs"""
1302 """calculate all the heads of the common ancestors of revs"""
1303 try:
1303 try:
1304 ancs = self.index.commonancestorsheads(*revs)
1304 ancs = self.index.commonancestorsheads(*revs)
1305 except (AttributeError, OverflowError): # C implementation failed
1305 except (AttributeError, OverflowError): # C implementation failed
1306 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1306 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1307 return ancs
1307 return ancs
1308
1308
1309 def isancestor(self, a, b):
1309 def isancestor(self, a, b):
1310 """return True if node a is an ancestor of node b
1310 """return True if node a is an ancestor of node b
1311
1311
1312 A revision is considered an ancestor of itself."""
1312 A revision is considered an ancestor of itself."""
1313 a, b = self.rev(a), self.rev(b)
1313 a, b = self.rev(a), self.rev(b)
1314 return self.isancestorrev(a, b)
1314 return self.isancestorrev(a, b)
1315
1315
1316 def isancestorrev(self, a, b):
1316 def isancestorrev(self, a, b):
1317 """return True if revision a is an ancestor of revision b
1317 """return True if revision a is an ancestor of revision b
1318
1318
1319 A revision is considered an ancestor of itself.
1319 A revision is considered an ancestor of itself.
1320
1320
1321 The implementation of this is trivial but the use of
1321 The implementation of this is trivial but the use of
1322 reachableroots is not."""
1322 reachableroots is not."""
1323 if a == nullrev:
1323 if a == nullrev:
1324 return True
1324 return True
1325 elif a == b:
1325 elif a == b:
1326 return True
1326 return True
1327 elif a > b:
1327 elif a > b:
1328 return False
1328 return False
1329 return bool(self.reachableroots(a, [b], [a], includepath=False))
1329 return bool(self.reachableroots(a, [b], [a], includepath=False))
1330
1330
1331 def reachableroots(self, minroot, heads, roots, includepath=False):
1331 def reachableroots(self, minroot, heads, roots, includepath=False):
1332 """return (heads(::(<roots> and <roots>::<heads>)))
1332 """return (heads(::(<roots> and <roots>::<heads>)))
1333
1333
1334 If includepath is True, return (<roots>::<heads>)."""
1334 If includepath is True, return (<roots>::<heads>)."""
1335 try:
1335 try:
1336 return self.index.reachableroots2(
1336 return self.index.reachableroots2(
1337 minroot, heads, roots, includepath
1337 minroot, heads, roots, includepath
1338 )
1338 )
1339 except AttributeError:
1339 except AttributeError:
1340 return dagop._reachablerootspure(
1340 return dagop._reachablerootspure(
1341 self.parentrevs, minroot, roots, heads, includepath
1341 self.parentrevs, minroot, roots, heads, includepath
1342 )
1342 )
1343
1343
1344 def ancestor(self, a, b):
1344 def ancestor(self, a, b):
1345 """calculate the "best" common ancestor of nodes a and b"""
1345 """calculate the "best" common ancestor of nodes a and b"""
1346
1346
1347 a, b = self.rev(a), self.rev(b)
1347 a, b = self.rev(a), self.rev(b)
1348 try:
1348 try:
1349 ancs = self.index.ancestors(a, b)
1349 ancs = self.index.ancestors(a, b)
1350 except (AttributeError, OverflowError):
1350 except (AttributeError, OverflowError):
1351 ancs = ancestor.ancestors(self.parentrevs, a, b)
1351 ancs = ancestor.ancestors(self.parentrevs, a, b)
1352 if ancs:
1352 if ancs:
1353 # choose a consistent winner when there's a tie
1353 # choose a consistent winner when there's a tie
1354 return min(map(self.node, ancs))
1354 return min(map(self.node, ancs))
1355 return self.nullid
1355 return self.nullid
1356
1356
1357 def _match(self, id):
1357 def _match(self, id):
1358 if isinstance(id, int):
1358 if isinstance(id, int):
1359 # rev
1359 # rev
1360 return self.node(id)
1360 return self.node(id)
1361 if len(id) == self.nodeconstants.nodelen:
1361 if len(id) == self.nodeconstants.nodelen:
1362 # possibly a binary node
1362 # possibly a binary node
1363 # odds of a binary node being all hex in ASCII are 1 in 10**25
1363 # odds of a binary node being all hex in ASCII are 1 in 10**25
1364 try:
1364 try:
1365 node = id
1365 node = id
1366 self.rev(node) # quick search the index
1366 self.rev(node) # quick search the index
1367 return node
1367 return node
1368 except error.LookupError:
1368 except error.LookupError:
1369 pass # may be partial hex id
1369 pass # may be partial hex id
1370 try:
1370 try:
1371 # str(rev)
1371 # str(rev)
1372 rev = int(id)
1372 rev = int(id)
1373 if b"%d" % rev != id:
1373 if b"%d" % rev != id:
1374 raise ValueError
1374 raise ValueError
1375 if rev < 0:
1375 if rev < 0:
1376 rev = len(self) + rev
1376 rev = len(self) + rev
1377 if rev < 0 or rev >= len(self):
1377 if rev < 0 or rev >= len(self):
1378 raise ValueError
1378 raise ValueError
1379 return self.node(rev)
1379 return self.node(rev)
1380 except (ValueError, OverflowError):
1380 except (ValueError, OverflowError):
1381 pass
1381 pass
1382 if len(id) == 2 * self.nodeconstants.nodelen:
1382 if len(id) == 2 * self.nodeconstants.nodelen:
1383 try:
1383 try:
1384 # a full hex nodeid?
1384 # a full hex nodeid?
1385 node = bin(id)
1385 node = bin(id)
1386 self.rev(node)
1386 self.rev(node)
1387 return node
1387 return node
1388 except (TypeError, error.LookupError):
1388 except (TypeError, error.LookupError):
1389 pass
1389 pass
1390
1390
1391 def _partialmatch(self, id):
1391 def _partialmatch(self, id):
1392 # we don't care wdirfilenodeids as they should be always full hash
1392 # we don't care wdirfilenodeids as they should be always full hash
1393 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1393 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1394 try:
1394 try:
1395 partial = self.index.partialmatch(id)
1395 partial = self.index.partialmatch(id)
1396 if partial and self.hasnode(partial):
1396 if partial and self.hasnode(partial):
1397 if maybewdir:
1397 if maybewdir:
1398 # single 'ff...' match in radix tree, ambiguous with wdir
1398 # single 'ff...' match in radix tree, ambiguous with wdir
1399 raise error.RevlogError
1399 raise error.RevlogError
1400 return partial
1400 return partial
1401 if maybewdir:
1401 if maybewdir:
1402 # no 'ff...' match in radix tree, wdir identified
1402 # no 'ff...' match in radix tree, wdir identified
1403 raise error.WdirUnsupported
1403 raise error.WdirUnsupported
1404 return None
1404 return None
1405 except error.RevlogError:
1405 except error.RevlogError:
1406 # parsers.c radix tree lookup gave multiple matches
1406 # parsers.c radix tree lookup gave multiple matches
1407 # fast path: for unfiltered changelog, radix tree is accurate
1407 # fast path: for unfiltered changelog, radix tree is accurate
1408 if not getattr(self, 'filteredrevs', None):
1408 if not getattr(self, 'filteredrevs', None):
1409 raise error.AmbiguousPrefixLookupError(
1409 raise error.AmbiguousPrefixLookupError(
1410 id, self.display_id, _(b'ambiguous identifier')
1410 id, self.display_id, _(b'ambiguous identifier')
1411 )
1411 )
1412 # fall through to slow path that filters hidden revisions
1412 # fall through to slow path that filters hidden revisions
1413 except (AttributeError, ValueError):
1413 except (AttributeError, ValueError):
1414 # we are pure python, or key was too short to search radix tree
1414 # we are pure python, or key was too short to search radix tree
1415 pass
1415 pass
1416
1416
1417 if id in self._pcache:
1417 if id in self._pcache:
1418 return self._pcache[id]
1418 return self._pcache[id]
1419
1419
1420 if len(id) <= 40:
1420 if len(id) <= 40:
1421 try:
1421 try:
1422 # hex(node)[:...]
1422 # hex(node)[:...]
1423 l = len(id) // 2 # grab an even number of digits
1423 l = len(id) // 2 # grab an even number of digits
1424 prefix = bin(id[: l * 2])
1424 prefix = bin(id[: l * 2])
1425 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1425 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1426 nl = [
1426 nl = [
1427 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1427 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1428 ]
1428 ]
1429 if self.nodeconstants.nullhex.startswith(id):
1429 if self.nodeconstants.nullhex.startswith(id):
1430 nl.append(self.nullid)
1430 nl.append(self.nullid)
1431 if len(nl) > 0:
1431 if len(nl) > 0:
1432 if len(nl) == 1 and not maybewdir:
1432 if len(nl) == 1 and not maybewdir:
1433 self._pcache[id] = nl[0]
1433 self._pcache[id] = nl[0]
1434 return nl[0]
1434 return nl[0]
1435 raise error.AmbiguousPrefixLookupError(
1435 raise error.AmbiguousPrefixLookupError(
1436 id, self.display_id, _(b'ambiguous identifier')
1436 id, self.display_id, _(b'ambiguous identifier')
1437 )
1437 )
1438 if maybewdir:
1438 if maybewdir:
1439 raise error.WdirUnsupported
1439 raise error.WdirUnsupported
1440 return None
1440 return None
1441 except TypeError:
1441 except TypeError:
1442 pass
1442 pass
1443
1443
1444 def lookup(self, id):
1444 def lookup(self, id):
1445 """locate a node based on:
1445 """locate a node based on:
1446 - revision number or str(revision number)
1446 - revision number or str(revision number)
1447 - nodeid or subset of hex nodeid
1447 - nodeid or subset of hex nodeid
1448 """
1448 """
1449 n = self._match(id)
1449 n = self._match(id)
1450 if n is not None:
1450 if n is not None:
1451 return n
1451 return n
1452 n = self._partialmatch(id)
1452 n = self._partialmatch(id)
1453 if n:
1453 if n:
1454 return n
1454 return n
1455
1455
1456 raise error.LookupError(id, self.display_id, _(b'no match found'))
1456 raise error.LookupError(id, self.display_id, _(b'no match found'))
1457
1457
1458 def shortest(self, node, minlength=1):
1458 def shortest(self, node, minlength=1):
1459 """Find the shortest unambiguous prefix that matches node."""
1459 """Find the shortest unambiguous prefix that matches node."""
1460
1460
1461 def isvalid(prefix):
1461 def isvalid(prefix):
1462 try:
1462 try:
1463 matchednode = self._partialmatch(prefix)
1463 matchednode = self._partialmatch(prefix)
1464 except error.AmbiguousPrefixLookupError:
1464 except error.AmbiguousPrefixLookupError:
1465 return False
1465 return False
1466 except error.WdirUnsupported:
1466 except error.WdirUnsupported:
1467 # single 'ff...' match
1467 # single 'ff...' match
1468 return True
1468 return True
1469 if matchednode is None:
1469 if matchednode is None:
1470 raise error.LookupError(node, self.display_id, _(b'no node'))
1470 raise error.LookupError(node, self.display_id, _(b'no node'))
1471 return True
1471 return True
1472
1472
1473 def maybewdir(prefix):
1473 def maybewdir(prefix):
1474 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1474 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1475
1475
1476 hexnode = hex(node)
1476 hexnode = hex(node)
1477
1477
1478 def disambiguate(hexnode, minlength):
1478 def disambiguate(hexnode, minlength):
1479 """Disambiguate against wdirid."""
1479 """Disambiguate against wdirid."""
1480 for length in range(minlength, len(hexnode) + 1):
1480 for length in range(minlength, len(hexnode) + 1):
1481 prefix = hexnode[:length]
1481 prefix = hexnode[:length]
1482 if not maybewdir(prefix):
1482 if not maybewdir(prefix):
1483 return prefix
1483 return prefix
1484
1484
1485 if not getattr(self, 'filteredrevs', None):
1485 if not getattr(self, 'filteredrevs', None):
1486 try:
1486 try:
1487 length = max(self.index.shortest(node), minlength)
1487 length = max(self.index.shortest(node), minlength)
1488 return disambiguate(hexnode, length)
1488 return disambiguate(hexnode, length)
1489 except error.RevlogError:
1489 except error.RevlogError:
1490 if node != self.nodeconstants.wdirid:
1490 if node != self.nodeconstants.wdirid:
1491 raise error.LookupError(
1491 raise error.LookupError(
1492 node, self.display_id, _(b'no node')
1492 node, self.display_id, _(b'no node')
1493 )
1493 )
1494 except AttributeError:
1494 except AttributeError:
1495 # Fall through to pure code
1495 # Fall through to pure code
1496 pass
1496 pass
1497
1497
1498 if node == self.nodeconstants.wdirid:
1498 if node == self.nodeconstants.wdirid:
1499 for length in range(minlength, len(hexnode) + 1):
1499 for length in range(minlength, len(hexnode) + 1):
1500 prefix = hexnode[:length]
1500 prefix = hexnode[:length]
1501 if isvalid(prefix):
1501 if isvalid(prefix):
1502 return prefix
1502 return prefix
1503
1503
1504 for length in range(minlength, len(hexnode) + 1):
1504 for length in range(minlength, len(hexnode) + 1):
1505 prefix = hexnode[:length]
1505 prefix = hexnode[:length]
1506 if isvalid(prefix):
1506 if isvalid(prefix):
1507 return disambiguate(hexnode, length)
1507 return disambiguate(hexnode, length)
1508
1508
1509 def cmp(self, node, text):
1509 def cmp(self, node, text):
1510 """compare text with a given file revision
1510 """compare text with a given file revision
1511
1511
1512 returns True if text is different than what is stored.
1512 returns True if text is different than what is stored.
1513 """
1513 """
1514 p1, p2 = self.parents(node)
1514 p1, p2 = self.parents(node)
1515 return storageutil.hashrevisionsha1(text, p1, p2) != node
1515 return storageutil.hashrevisionsha1(text, p1, p2) != node
1516
1516
1517 def _cachesegment(self, offset, data):
1517 def _cachesegment(self, offset, data):
1518 """Add a segment to the revlog cache.
1518 """Add a segment to the revlog cache.
1519
1519
1520 Accepts an absolute offset and the data that is at that location.
1520 Accepts an absolute offset and the data that is at that location.
1521 """
1521 """
1522 o, d = self._chunkcache
1522 o, d = self._chunkcache
1523 # try to add to existing cache
1523 # try to add to existing cache
1524 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1524 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1525 self._chunkcache = o, d + data
1525 self._chunkcache = o, d + data
1526 else:
1526 else:
1527 self._chunkcache = offset, data
1527 self._chunkcache = offset, data
1528
1528
1529 def _readsegment(self, offset, length, df=None):
1529 def _readsegment(self, offset, length, df=None):
1530 """Load a segment of raw data from the revlog.
1530 """Load a segment of raw data from the revlog.
1531
1531
1532 Accepts an absolute offset, length to read, and an optional existing
1532 Accepts an absolute offset, length to read, and an optional existing
1533 file handle to read from.
1533 file handle to read from.
1534
1534
1535 If an existing file handle is passed, it will be seeked and the
1535 If an existing file handle is passed, it will be seeked and the
1536 original seek position will NOT be restored.
1536 original seek position will NOT be restored.
1537
1537
1538 Returns a str or buffer of raw byte data.
1538 Returns a str or buffer of raw byte data.
1539
1539
1540 Raises if the requested number of bytes could not be read.
1540 Raises if the requested number of bytes could not be read.
1541 """
1541 """
1542 # Cache data both forward and backward around the requested
1542 # Cache data both forward and backward around the requested
1543 # data, in a fixed size window. This helps speed up operations
1543 # data, in a fixed size window. This helps speed up operations
1544 # involving reading the revlog backwards.
1544 # involving reading the revlog backwards.
1545 cachesize = self._chunkcachesize
1545 cachesize = self._chunkcachesize
1546 realoffset = offset & ~(cachesize - 1)
1546 realoffset = offset & ~(cachesize - 1)
1547 reallength = (
1547 reallength = (
1548 (offset + length + cachesize) & ~(cachesize - 1)
1548 (offset + length + cachesize) & ~(cachesize - 1)
1549 ) - realoffset
1549 ) - realoffset
1550 with self._datareadfp(df) as df:
1550 with self._datareadfp(df) as df:
1551 df.seek(realoffset)
1551 df.seek(realoffset)
1552 d = df.read(reallength)
1552 d = df.read(reallength)
1553
1553
1554 self._cachesegment(realoffset, d)
1554 self._cachesegment(realoffset, d)
1555 if offset != realoffset or reallength != length:
1555 if offset != realoffset or reallength != length:
1556 startoffset = offset - realoffset
1556 startoffset = offset - realoffset
1557 if len(d) - startoffset < length:
1557 if len(d) - startoffset < length:
1558 raise error.RevlogError(
1558 raise error.RevlogError(
1559 _(
1559 _(
1560 b'partial read of revlog %s; expected %d bytes from '
1560 b'partial read of revlog %s; expected %d bytes from '
1561 b'offset %d, got %d'
1561 b'offset %d, got %d'
1562 )
1562 )
1563 % (
1563 % (
1564 self._indexfile if self._inline else self._datafile,
1564 self._indexfile if self._inline else self._datafile,
1565 length,
1565 length,
1566 offset,
1566 offset,
1567 len(d) - startoffset,
1567 len(d) - startoffset,
1568 )
1568 )
1569 )
1569 )
1570
1570
1571 return util.buffer(d, startoffset, length)
1571 return util.buffer(d, startoffset, length)
1572
1572
1573 if len(d) < length:
1573 if len(d) < length:
1574 raise error.RevlogError(
1574 raise error.RevlogError(
1575 _(
1575 _(
1576 b'partial read of revlog %s; expected %d bytes from offset '
1576 b'partial read of revlog %s; expected %d bytes from offset '
1577 b'%d, got %d'
1577 b'%d, got %d'
1578 )
1578 )
1579 % (
1579 % (
1580 self._indexfile if self._inline else self._datafile,
1580 self._indexfile if self._inline else self._datafile,
1581 length,
1581 length,
1582 offset,
1582 offset,
1583 len(d),
1583 len(d),
1584 )
1584 )
1585 )
1585 )
1586
1586
1587 return d
1587 return d
1588
1588
1589 def _getsegment(self, offset, length, df=None):
1589 def _getsegment(self, offset, length, df=None):
1590 """Obtain a segment of raw data from the revlog.
1590 """Obtain a segment of raw data from the revlog.
1591
1591
1592 Accepts an absolute offset, length of bytes to obtain, and an
1592 Accepts an absolute offset, length of bytes to obtain, and an
1593 optional file handle to the already-opened revlog. If the file
1593 optional file handle to the already-opened revlog. If the file
1594 handle is used, it's original seek position will not be preserved.
1594 handle is used, it's original seek position will not be preserved.
1595
1595
1596 Requests for data may be returned from a cache.
1596 Requests for data may be returned from a cache.
1597
1597
1598 Returns a str or a buffer instance of raw byte data.
1598 Returns a str or a buffer instance of raw byte data.
1599 """
1599 """
1600 o, d = self._chunkcache
1600 o, d = self._chunkcache
1601 l = len(d)
1601 l = len(d)
1602
1602
1603 # is it in the cache?
1603 # is it in the cache?
1604 cachestart = offset - o
1604 cachestart = offset - o
1605 cacheend = cachestart + length
1605 cacheend = cachestart + length
1606 if cachestart >= 0 and cacheend <= l:
1606 if cachestart >= 0 and cacheend <= l:
1607 if cachestart == 0 and cacheend == l:
1607 if cachestart == 0 and cacheend == l:
1608 return d # avoid a copy
1608 return d # avoid a copy
1609 return util.buffer(d, cachestart, cacheend - cachestart)
1609 return util.buffer(d, cachestart, cacheend - cachestart)
1610
1610
1611 return self._readsegment(offset, length, df=df)
1611 return self._readsegment(offset, length, df=df)
1612
1612
1613 def _getsegmentforrevs(self, startrev, endrev, df=None):
1613 def _getsegmentforrevs(self, startrev, endrev, df=None):
1614 """Obtain a segment of raw data corresponding to a range of revisions.
1614 """Obtain a segment of raw data corresponding to a range of revisions.
1615
1615
1616 Accepts the start and end revisions and an optional already-open
1616 Accepts the start and end revisions and an optional already-open
1617 file handle to be used for reading. If the file handle is read, its
1617 file handle to be used for reading. If the file handle is read, its
1618 seek position will not be preserved.
1618 seek position will not be preserved.
1619
1619
1620 Requests for data may be satisfied by a cache.
1620 Requests for data may be satisfied by a cache.
1621
1621
1622 Returns a 2-tuple of (offset, data) for the requested range of
1622 Returns a 2-tuple of (offset, data) for the requested range of
1623 revisions. Offset is the integer offset from the beginning of the
1623 revisions. Offset is the integer offset from the beginning of the
1624 revlog and data is a str or buffer of the raw byte data.
1624 revlog and data is a str or buffer of the raw byte data.
1625
1625
1626 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1626 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1627 to determine where each revision's data begins and ends.
1627 to determine where each revision's data begins and ends.
1628 """
1628 """
1629 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1629 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1630 # (functions are expensive).
1630 # (functions are expensive).
1631 index = self.index
1631 index = self.index
1632 istart = index[startrev]
1632 istart = index[startrev]
1633 start = int(istart[0] >> 16)
1633 start = int(istart[0] >> 16)
1634 if startrev == endrev:
1634 if startrev == endrev:
1635 end = start + istart[1]
1635 end = start + istart[1]
1636 else:
1636 else:
1637 iend = index[endrev]
1637 iend = index[endrev]
1638 end = int(iend[0] >> 16) + iend[1]
1638 end = int(iend[0] >> 16) + iend[1]
1639
1639
1640 if self._inline:
1640 if self._inline:
1641 start += (startrev + 1) * self.index.entry_size
1641 start += (startrev + 1) * self.index.entry_size
1642 end += (endrev + 1) * self.index.entry_size
1642 end += (endrev + 1) * self.index.entry_size
1643 length = end - start
1643 length = end - start
1644
1644
1645 return start, self._getsegment(start, length, df=df)
1645 return start, self._getsegment(start, length, df=df)
1646
1646
1647 def _chunk(self, rev, df=None):
1647 def _chunk(self, rev, df=None):
1648 """Obtain a single decompressed chunk for a revision.
1648 """Obtain a single decompressed chunk for a revision.
1649
1649
1650 Accepts an integer revision and an optional already-open file handle
1650 Accepts an integer revision and an optional already-open file handle
1651 to be used for reading. If used, the seek position of the file will not
1651 to be used for reading. If used, the seek position of the file will not
1652 be preserved.
1652 be preserved.
1653
1653
1654 Returns a str holding uncompressed data for the requested revision.
1654 Returns a str holding uncompressed data for the requested revision.
1655 """
1655 """
1656 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1656 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1657
1657
1658 def _chunks(self, revs, df=None, targetsize=None):
1658 def _chunks(self, revs, df=None, targetsize=None):
1659 """Obtain decompressed chunks for the specified revisions.
1659 """Obtain decompressed chunks for the specified revisions.
1660
1660
1661 Accepts an iterable of numeric revisions that are assumed to be in
1661 Accepts an iterable of numeric revisions that are assumed to be in
1662 ascending order. Also accepts an optional already-open file handle
1662 ascending order. Also accepts an optional already-open file handle
1663 to be used for reading. If used, the seek position of the file will
1663 to be used for reading. If used, the seek position of the file will
1664 not be preserved.
1664 not be preserved.
1665
1665
1666 This function is similar to calling ``self._chunk()`` multiple times,
1666 This function is similar to calling ``self._chunk()`` multiple times,
1667 but is faster.
1667 but is faster.
1668
1668
1669 Returns a list with decompressed data for each requested revision.
1669 Returns a list with decompressed data for each requested revision.
1670 """
1670 """
1671 if not revs:
1671 if not revs:
1672 return []
1672 return []
1673 start = self.start
1673 start = self.start
1674 length = self.length
1674 length = self.length
1675 inline = self._inline
1675 inline = self._inline
1676 iosize = self.index.entry_size
1676 iosize = self.index.entry_size
1677 buffer = util.buffer
1677 buffer = util.buffer
1678
1678
1679 l = []
1679 l = []
1680 ladd = l.append
1680 ladd = l.append
1681
1681
1682 if not self._withsparseread:
1682 if not self._withsparseread:
1683 slicedchunks = (revs,)
1683 slicedchunks = (revs,)
1684 else:
1684 else:
1685 slicedchunks = deltautil.slicechunk(
1685 slicedchunks = deltautil.slicechunk(
1686 self, revs, targetsize=targetsize
1686 self, revs, targetsize=targetsize
1687 )
1687 )
1688
1688
1689 for revschunk in slicedchunks:
1689 for revschunk in slicedchunks:
1690 firstrev = revschunk[0]
1690 firstrev = revschunk[0]
1691 # Skip trailing revisions with empty diff
1691 # Skip trailing revisions with empty diff
1692 for lastrev in revschunk[::-1]:
1692 for lastrev in revschunk[::-1]:
1693 if length(lastrev) != 0:
1693 if length(lastrev) != 0:
1694 break
1694 break
1695
1695
1696 try:
1696 try:
1697 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1697 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1698 except OverflowError:
1698 except OverflowError:
1699 # issue4215 - we can't cache a run of chunks greater than
1699 # issue4215 - we can't cache a run of chunks greater than
1700 # 2G on Windows
1700 # 2G on Windows
1701 return [self._chunk(rev, df=df) for rev in revschunk]
1701 return [self._chunk(rev, df=df) for rev in revschunk]
1702
1702
1703 decomp = self.decompress
1703 decomp = self.decompress
1704 for rev in revschunk:
1704 for rev in revschunk:
1705 chunkstart = start(rev)
1705 chunkstart = start(rev)
1706 if inline:
1706 if inline:
1707 chunkstart += (rev + 1) * iosize
1707 chunkstart += (rev + 1) * iosize
1708 chunklength = length(rev)
1708 chunklength = length(rev)
1709 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1709 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1710
1710
1711 return l
1711 return l
1712
1712
1713 def _chunkclear(self):
1713 def _chunkclear(self):
1714 """Clear the raw chunk cache."""
1714 """Clear the raw chunk cache."""
1715 self._chunkcache = (0, b'')
1715 self._chunkcache = (0, b'')
1716
1716
1717 def deltaparent(self, rev):
1717 def deltaparent(self, rev):
1718 """return deltaparent of the given revision"""
1718 """return deltaparent of the given revision"""
1719 base = self.index[rev][3]
1719 base = self.index[rev][3]
1720 if base == rev:
1720 if base == rev:
1721 return nullrev
1721 return nullrev
1722 elif self._generaldelta:
1722 elif self._generaldelta:
1723 return base
1723 return base
1724 else:
1724 else:
1725 return rev - 1
1725 return rev - 1
1726
1726
1727 def issnapshot(self, rev):
1727 def issnapshot(self, rev):
1728 """tells whether rev is a snapshot"""
1728 """tells whether rev is a snapshot"""
1729 if not self._sparserevlog:
1729 if not self._sparserevlog:
1730 return self.deltaparent(rev) == nullrev
1730 return self.deltaparent(rev) == nullrev
1731 elif util.safehasattr(self.index, b'issnapshot'):
1731 elif util.safehasattr(self.index, b'issnapshot'):
1732 # directly assign the method to cache the testing and access
1732 # directly assign the method to cache the testing and access
1733 self.issnapshot = self.index.issnapshot
1733 self.issnapshot = self.index.issnapshot
1734 return self.issnapshot(rev)
1734 return self.issnapshot(rev)
1735 if rev == nullrev:
1735 if rev == nullrev:
1736 return True
1736 return True
1737 entry = self.index[rev]
1737 entry = self.index[rev]
1738 base = entry[3]
1738 base = entry[3]
1739 if base == rev:
1739 if base == rev:
1740 return True
1740 return True
1741 if base == nullrev:
1741 if base == nullrev:
1742 return True
1742 return True
1743 p1 = entry[5]
1743 p1 = entry[5]
1744 p2 = entry[6]
1744 p2 = entry[6]
1745 if base == p1 or base == p2:
1745 if base == p1 or base == p2:
1746 return False
1746 return False
1747 return self.issnapshot(base)
1747 return self.issnapshot(base)
1748
1748
1749 def snapshotdepth(self, rev):
1749 def snapshotdepth(self, rev):
1750 """number of snapshot in the chain before this one"""
1750 """number of snapshot in the chain before this one"""
1751 if not self.issnapshot(rev):
1751 if not self.issnapshot(rev):
1752 raise error.ProgrammingError(b'revision %d not a snapshot')
1752 raise error.ProgrammingError(b'revision %d not a snapshot')
1753 return len(self._deltachain(rev)[0]) - 1
1753 return len(self._deltachain(rev)[0]) - 1
1754
1754
1755 def revdiff(self, rev1, rev2):
1755 def revdiff(self, rev1, rev2):
1756 """return or calculate a delta between two revisions
1756 """return or calculate a delta between two revisions
1757
1757
1758 The delta calculated is in binary form and is intended to be written to
1758 The delta calculated is in binary form and is intended to be written to
1759 revlog data directly. So this function needs raw revision data.
1759 revlog data directly. So this function needs raw revision data.
1760 """
1760 """
1761 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1761 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1762 return bytes(self._chunk(rev2))
1762 return bytes(self._chunk(rev2))
1763
1763
1764 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1764 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1765
1765
1766 def _processflags(self, text, flags, operation, raw=False):
1766 def _processflags(self, text, flags, operation, raw=False):
1767 """deprecated entry point to access flag processors"""
1767 """deprecated entry point to access flag processors"""
1768 msg = b'_processflag(...) use the specialized variant'
1768 msg = b'_processflag(...) use the specialized variant'
1769 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1769 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1770 if raw:
1770 if raw:
1771 return text, flagutil.processflagsraw(self, text, flags)
1771 return text, flagutil.processflagsraw(self, text, flags)
1772 elif operation == b'read':
1772 elif operation == b'read':
1773 return flagutil.processflagsread(self, text, flags)
1773 return flagutil.processflagsread(self, text, flags)
1774 else: # write operation
1774 else: # write operation
1775 return flagutil.processflagswrite(self, text, flags)
1775 return flagutil.processflagswrite(self, text, flags)
1776
1776
1777 def revision(self, nodeorrev, _df=None, raw=False):
1777 def revision(self, nodeorrev, _df=None, raw=False):
1778 """return an uncompressed revision of a given node or revision
1778 """return an uncompressed revision of a given node or revision
1779 number.
1779 number.
1780
1780
1781 _df - an existing file handle to read from. (internal-only)
1781 _df - an existing file handle to read from. (internal-only)
1782 raw - an optional argument specifying if the revision data is to be
1782 raw - an optional argument specifying if the revision data is to be
1783 treated as raw data when applying flag transforms. 'raw' should be set
1783 treated as raw data when applying flag transforms. 'raw' should be set
1784 to True when generating changegroups or in debug commands.
1784 to True when generating changegroups or in debug commands.
1785 """
1785 """
1786 if raw:
1786 if raw:
1787 msg = (
1787 msg = (
1788 b'revlog.revision(..., raw=True) is deprecated, '
1788 b'revlog.revision(..., raw=True) is deprecated, '
1789 b'use revlog.rawdata(...)'
1789 b'use revlog.rawdata(...)'
1790 )
1790 )
1791 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1791 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1792 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1792 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1793
1793
1794 def sidedata(self, nodeorrev, _df=None):
1794 def sidedata(self, nodeorrev, _df=None):
1795 """a map of extra data related to the changeset but not part of the hash
1795 """a map of extra data related to the changeset but not part of the hash
1796
1796
1797 This function currently return a dictionary. However, more advanced
1797 This function currently return a dictionary. However, more advanced
1798 mapping object will likely be used in the future for a more
1798 mapping object will likely be used in the future for a more
1799 efficient/lazy code.
1799 efficient/lazy code.
1800 """
1800 """
1801 return self._revisiondata(nodeorrev, _df)[1]
1801 return self._revisiondata(nodeorrev, _df)[1]
1802
1802
1803 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1803 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1804 # deal with <nodeorrev> argument type
1804 # deal with <nodeorrev> argument type
1805 if isinstance(nodeorrev, int):
1805 if isinstance(nodeorrev, int):
1806 rev = nodeorrev
1806 rev = nodeorrev
1807 node = self.node(rev)
1807 node = self.node(rev)
1808 else:
1808 else:
1809 node = nodeorrev
1809 node = nodeorrev
1810 rev = None
1810 rev = None
1811
1811
1812 # fast path the special `nullid` rev
1812 # fast path the special `nullid` rev
1813 if node == self.nullid:
1813 if node == self.nullid:
1814 return b"", {}
1814 return b"", {}
1815
1815
1816 # ``rawtext`` is the text as stored inside the revlog. Might be the
1816 # ``rawtext`` is the text as stored inside the revlog. Might be the
1817 # revision or might need to be processed to retrieve the revision.
1817 # revision or might need to be processed to retrieve the revision.
1818 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1818 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1819
1819
1820 if self.hassidedata:
1820 if self.hassidedata:
1821 if rev is None:
1821 if rev is None:
1822 rev = self.rev(node)
1822 rev = self.rev(node)
1823 sidedata = self._sidedata(rev)
1823 sidedata = self._sidedata(rev)
1824 else:
1824 else:
1825 sidedata = {}
1825 sidedata = {}
1826
1826
1827 if raw and validated:
1827 if raw and validated:
1828 # if we don't want to process the raw text and that raw
1828 # if we don't want to process the raw text and that raw
1829 # text is cached, we can exit early.
1829 # text is cached, we can exit early.
1830 return rawtext, sidedata
1830 return rawtext, sidedata
1831 if rev is None:
1831 if rev is None:
1832 rev = self.rev(node)
1832 rev = self.rev(node)
1833 # the revlog's flag for this revision
1833 # the revlog's flag for this revision
1834 # (usually alter its state or content)
1834 # (usually alter its state or content)
1835 flags = self.flags(rev)
1835 flags = self.flags(rev)
1836
1836
1837 if validated and flags == REVIDX_DEFAULT_FLAGS:
1837 if validated and flags == REVIDX_DEFAULT_FLAGS:
1838 # no extra flags set, no flag processor runs, text = rawtext
1838 # no extra flags set, no flag processor runs, text = rawtext
1839 return rawtext, sidedata
1839 return rawtext, sidedata
1840
1840
1841 if raw:
1841 if raw:
1842 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1842 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1843 text = rawtext
1843 text = rawtext
1844 else:
1844 else:
1845 r = flagutil.processflagsread(self, rawtext, flags)
1845 r = flagutil.processflagsread(self, rawtext, flags)
1846 text, validatehash = r
1846 text, validatehash = r
1847 if validatehash:
1847 if validatehash:
1848 self.checkhash(text, node, rev=rev)
1848 self.checkhash(text, node, rev=rev)
1849 if not validated:
1849 if not validated:
1850 self._revisioncache = (node, rev, rawtext)
1850 self._revisioncache = (node, rev, rawtext)
1851
1851
1852 return text, sidedata
1852 return text, sidedata
1853
1853
1854 def _rawtext(self, node, rev, _df=None):
1854 def _rawtext(self, node, rev, _df=None):
1855 """return the possibly unvalidated rawtext for a revision
1855 """return the possibly unvalidated rawtext for a revision
1856
1856
1857 returns (rev, rawtext, validated)
1857 returns (rev, rawtext, validated)
1858 """
1858 """
1859
1859
1860 # revision in the cache (could be useful to apply delta)
1860 # revision in the cache (could be useful to apply delta)
1861 cachedrev = None
1861 cachedrev = None
1862 # An intermediate text to apply deltas to
1862 # An intermediate text to apply deltas to
1863 basetext = None
1863 basetext = None
1864
1864
1865 # Check if we have the entry in cache
1865 # Check if we have the entry in cache
1866 # The cache entry looks like (node, rev, rawtext)
1866 # The cache entry looks like (node, rev, rawtext)
1867 if self._revisioncache:
1867 if self._revisioncache:
1868 if self._revisioncache[0] == node:
1868 if self._revisioncache[0] == node:
1869 return (rev, self._revisioncache[2], True)
1869 return (rev, self._revisioncache[2], True)
1870 cachedrev = self._revisioncache[1]
1870 cachedrev = self._revisioncache[1]
1871
1871
1872 if rev is None:
1872 if rev is None:
1873 rev = self.rev(node)
1873 rev = self.rev(node)
1874
1874
1875 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1875 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1876 if stopped:
1876 if stopped:
1877 basetext = self._revisioncache[2]
1877 basetext = self._revisioncache[2]
1878
1878
1879 # drop cache to save memory, the caller is expected to
1879 # drop cache to save memory, the caller is expected to
1880 # update self._revisioncache after validating the text
1880 # update self._revisioncache after validating the text
1881 self._revisioncache = None
1881 self._revisioncache = None
1882
1882
1883 targetsize = None
1883 targetsize = None
1884 rawsize = self.index[rev][2]
1884 rawsize = self.index[rev][2]
1885 if 0 <= rawsize:
1885 if 0 <= rawsize:
1886 targetsize = 4 * rawsize
1886 targetsize = 4 * rawsize
1887
1887
1888 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1888 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1889 if basetext is None:
1889 if basetext is None:
1890 basetext = bytes(bins[0])
1890 basetext = bytes(bins[0])
1891 bins = bins[1:]
1891 bins = bins[1:]
1892
1892
1893 rawtext = mdiff.patches(basetext, bins)
1893 rawtext = mdiff.patches(basetext, bins)
1894 del basetext # let us have a chance to free memory early
1894 del basetext # let us have a chance to free memory early
1895 return (rev, rawtext, False)
1895 return (rev, rawtext, False)
1896
1896
1897 def _sidedata(self, rev):
1897 def _sidedata(self, rev):
1898 """Return the sidedata for a given revision number."""
1898 """Return the sidedata for a given revision number."""
1899 index_entry = self.index[rev]
1899 index_entry = self.index[rev]
1900 sidedata_offset = index_entry[8]
1900 sidedata_offset = index_entry[8]
1901 sidedata_size = index_entry[9]
1901 sidedata_size = index_entry[9]
1902
1902
1903 if self._inline:
1903 if self._inline:
1904 sidedata_offset += self.index.entry_size * (1 + rev)
1904 sidedata_offset += self.index.entry_size * (1 + rev)
1905 if sidedata_size == 0:
1905 if sidedata_size == 0:
1906 return {}
1906 return {}
1907
1907
1908 segment = self._getsegment(sidedata_offset, sidedata_size)
1908 segment = self._getsegment(sidedata_offset, sidedata_size)
1909 sidedata = sidedatautil.deserialize_sidedata(segment)
1909 sidedata = sidedatautil.deserialize_sidedata(segment)
1910 return sidedata
1910 return sidedata
1911
1911
1912 def rawdata(self, nodeorrev, _df=None):
1912 def rawdata(self, nodeorrev, _df=None):
1913 """return an uncompressed raw data of a given node or revision number.
1913 """return an uncompressed raw data of a given node or revision number.
1914
1914
1915 _df - an existing file handle to read from. (internal-only)
1915 _df - an existing file handle to read from. (internal-only)
1916 """
1916 """
1917 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1917 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1918
1918
1919 def hash(self, text, p1, p2):
1919 def hash(self, text, p1, p2):
1920 """Compute a node hash.
1920 """Compute a node hash.
1921
1921
1922 Available as a function so that subclasses can replace the hash
1922 Available as a function so that subclasses can replace the hash
1923 as needed.
1923 as needed.
1924 """
1924 """
1925 return storageutil.hashrevisionsha1(text, p1, p2)
1925 return storageutil.hashrevisionsha1(text, p1, p2)
1926
1926
1927 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1927 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1928 """Check node hash integrity.
1928 """Check node hash integrity.
1929
1929
1930 Available as a function so that subclasses can extend hash mismatch
1930 Available as a function so that subclasses can extend hash mismatch
1931 behaviors as needed.
1931 behaviors as needed.
1932 """
1932 """
1933 try:
1933 try:
1934 if p1 is None and p2 is None:
1934 if p1 is None and p2 is None:
1935 p1, p2 = self.parents(node)
1935 p1, p2 = self.parents(node)
1936 if node != self.hash(text, p1, p2):
1936 if node != self.hash(text, p1, p2):
1937 # Clear the revision cache on hash failure. The revision cache
1937 # Clear the revision cache on hash failure. The revision cache
1938 # only stores the raw revision and clearing the cache does have
1938 # only stores the raw revision and clearing the cache does have
1939 # the side-effect that we won't have a cache hit when the raw
1939 # the side-effect that we won't have a cache hit when the raw
1940 # revision data is accessed. But this case should be rare and
1940 # revision data is accessed. But this case should be rare and
1941 # it is extra work to teach the cache about the hash
1941 # it is extra work to teach the cache about the hash
1942 # verification state.
1942 # verification state.
1943 if self._revisioncache and self._revisioncache[0] == node:
1943 if self._revisioncache and self._revisioncache[0] == node:
1944 self._revisioncache = None
1944 self._revisioncache = None
1945
1945
1946 revornode = rev
1946 revornode = rev
1947 if revornode is None:
1947 if revornode is None:
1948 revornode = templatefilters.short(hex(node))
1948 revornode = templatefilters.short(hex(node))
1949 raise error.RevlogError(
1949 raise error.RevlogError(
1950 _(b"integrity check failed on %s:%s")
1950 _(b"integrity check failed on %s:%s")
1951 % (self.display_id, pycompat.bytestr(revornode))
1951 % (self.display_id, pycompat.bytestr(revornode))
1952 )
1952 )
1953 except error.RevlogError:
1953 except error.RevlogError:
1954 if self._censorable and storageutil.iscensoredtext(text):
1954 if self._censorable and storageutil.iscensoredtext(text):
1955 raise error.CensoredNodeError(self.display_id, node, text)
1955 raise error.CensoredNodeError(self.display_id, node, text)
1956 raise
1956 raise
1957
1957
1958 def _enforceinlinesize(self, tr, fp=None):
1958 def _enforceinlinesize(self, tr, fp=None):
1959 """Check if the revlog is too big for inline and convert if so.
1959 """Check if the revlog is too big for inline and convert if so.
1960
1960
1961 This should be called after revisions are added to the revlog. If the
1961 This should be called after revisions are added to the revlog. If the
1962 revlog has grown too large to be an inline revlog, it will convert it
1962 revlog has grown too large to be an inline revlog, it will convert it
1963 to use multiple index and data files.
1963 to use multiple index and data files.
1964 """
1964 """
1965 tiprev = len(self) - 1
1965 tiprev = len(self) - 1
1966 total_size = self.start(tiprev) + self.length(tiprev)
1966 total_size = self.start(tiprev) + self.length(tiprev)
1967 if not self._inline or total_size < _maxinline:
1967 if not self._inline or total_size < _maxinline:
1968 return
1968 return
1969
1969
1970 troffset = tr.findoffset(self._indexfile)
1970 troffset = tr.findoffset(self._indexfile)
1971 if troffset is None:
1971 if troffset is None:
1972 raise error.RevlogError(
1972 raise error.RevlogError(
1973 _(b"%s not found in the transaction") % self._indexfile
1973 _(b"%s not found in the transaction") % self._indexfile
1974 )
1974 )
1975 trindex = 0
1975 trindex = 0
1976 tr.add(self._datafile, 0)
1976 tr.add(self._datafile, 0)
1977
1977
1978 if fp:
1978 if fp:
1979 fp.flush()
1979 fp.flush()
1980 fp.close()
1980 fp.close()
1981 # We can't use the cached file handle after close(). So prevent
1981 # We can't use the cached file handle after close(). So prevent
1982 # its usage.
1982 # its usage.
1983 self._writinghandles = None
1983 self._writinghandles = None
1984
1984
1985 if True:
1985 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1986 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1986 for r in self:
1987 for r in self:
1987 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1988 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1988 if troffset <= self.start(r):
1989 if troffset <= self.start(r):
1989 trindex = r
1990 trindex = r
1990
1991
1991 with self._indexfp(b'w') as fp:
1992 with self._indexfp(b'w') as fp:
1992 self._format_flags &= ~FLAG_INLINE_DATA
1993 self._format_flags &= ~FLAG_INLINE_DATA
1993 self._inline = False
1994 self._inline = False
1994 for i in self:
1995 for i in self:
1995 e = self.index.entry_binary(i)
1996 e = self.index.entry_binary(i)
1996 if i == 0:
1997 if i == 0:
1997 header = self._format_flags | self._format_version
1998 header = self._format_flags | self._format_version
1998 header = self.index.pack_header(header)
1999 header = self.index.pack_header(header)
1999 e = header + e
2000 e = header + e
2000 fp.write(e)
2001 fp.write(e)
2001
2002
2002 # the temp file replace the real index when we exit the context
2003 # the temp file replace the real index when we exit the context
2003 # manager
2004 # manager
2004
2005
2005 tr.replace(self._indexfile, trindex * self.index.entry_size)
2006 tr.replace(self._indexfile, trindex * self.index.entry_size)
2006 nodemaputil.setup_persistent_nodemap(tr, self)
2007 nodemaputil.setup_persistent_nodemap(tr, self)
2007 self._chunkclear()
2008 self._chunkclear()
2008
2009
2009 def _nodeduplicatecallback(self, transaction, node):
2010 def _nodeduplicatecallback(self, transaction, node):
2010 """called when trying to add a node already stored."""
2011 """called when trying to add a node already stored."""
2011
2012
2012 def addrevision(
2013 def addrevision(
2013 self,
2014 self,
2014 text,
2015 text,
2015 transaction,
2016 transaction,
2016 link,
2017 link,
2017 p1,
2018 p1,
2018 p2,
2019 p2,
2019 cachedelta=None,
2020 cachedelta=None,
2020 node=None,
2021 node=None,
2021 flags=REVIDX_DEFAULT_FLAGS,
2022 flags=REVIDX_DEFAULT_FLAGS,
2022 deltacomputer=None,
2023 deltacomputer=None,
2023 sidedata=None,
2024 sidedata=None,
2024 ):
2025 ):
2025 """add a revision to the log
2026 """add a revision to the log
2026
2027
2027 text - the revision data to add
2028 text - the revision data to add
2028 transaction - the transaction object used for rollback
2029 transaction - the transaction object used for rollback
2029 link - the linkrev data to add
2030 link - the linkrev data to add
2030 p1, p2 - the parent nodeids of the revision
2031 p1, p2 - the parent nodeids of the revision
2031 cachedelta - an optional precomputed delta
2032 cachedelta - an optional precomputed delta
2032 node - nodeid of revision; typically node is not specified, and it is
2033 node - nodeid of revision; typically node is not specified, and it is
2033 computed by default as hash(text, p1, p2), however subclasses might
2034 computed by default as hash(text, p1, p2), however subclasses might
2034 use different hashing method (and override checkhash() in such case)
2035 use different hashing method (and override checkhash() in such case)
2035 flags - the known flags to set on the revision
2036 flags - the known flags to set on the revision
2036 deltacomputer - an optional deltacomputer instance shared between
2037 deltacomputer - an optional deltacomputer instance shared between
2037 multiple calls
2038 multiple calls
2038 """
2039 """
2039 if link == nullrev:
2040 if link == nullrev:
2040 raise error.RevlogError(
2041 raise error.RevlogError(
2041 _(b"attempted to add linkrev -1 to %s") % self.display_id
2042 _(b"attempted to add linkrev -1 to %s") % self.display_id
2042 )
2043 )
2043
2044
2044 if sidedata is None:
2045 if sidedata is None:
2045 sidedata = {}
2046 sidedata = {}
2046 elif sidedata and not self.hassidedata:
2047 elif sidedata and not self.hassidedata:
2047 raise error.ProgrammingError(
2048 raise error.ProgrammingError(
2048 _(b"trying to add sidedata to a revlog who don't support them")
2049 _(b"trying to add sidedata to a revlog who don't support them")
2049 )
2050 )
2050
2051
2051 if flags:
2052 if flags:
2052 node = node or self.hash(text, p1, p2)
2053 node = node or self.hash(text, p1, p2)
2053
2054
2054 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2055 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2055
2056
2056 # If the flag processor modifies the revision data, ignore any provided
2057 # If the flag processor modifies the revision data, ignore any provided
2057 # cachedelta.
2058 # cachedelta.
2058 if rawtext != text:
2059 if rawtext != text:
2059 cachedelta = None
2060 cachedelta = None
2060
2061
2061 if len(rawtext) > _maxentrysize:
2062 if len(rawtext) > _maxentrysize:
2062 raise error.RevlogError(
2063 raise error.RevlogError(
2063 _(
2064 _(
2064 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2065 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2065 )
2066 )
2066 % (self.display_id, len(rawtext))
2067 % (self.display_id, len(rawtext))
2067 )
2068 )
2068
2069
2069 node = node or self.hash(rawtext, p1, p2)
2070 node = node or self.hash(rawtext, p1, p2)
2070 rev = self.index.get_rev(node)
2071 rev = self.index.get_rev(node)
2071 if rev is not None:
2072 if rev is not None:
2072 return rev
2073 return rev
2073
2074
2074 if validatehash:
2075 if validatehash:
2075 self.checkhash(rawtext, node, p1=p1, p2=p2)
2076 self.checkhash(rawtext, node, p1=p1, p2=p2)
2076
2077
2077 return self.addrawrevision(
2078 return self.addrawrevision(
2078 rawtext,
2079 rawtext,
2079 transaction,
2080 transaction,
2080 link,
2081 link,
2081 p1,
2082 p1,
2082 p2,
2083 p2,
2083 node,
2084 node,
2084 flags,
2085 flags,
2085 cachedelta=cachedelta,
2086 cachedelta=cachedelta,
2086 deltacomputer=deltacomputer,
2087 deltacomputer=deltacomputer,
2087 sidedata=sidedata,
2088 sidedata=sidedata,
2088 )
2089 )
2089
2090
2090 def addrawrevision(
2091 def addrawrevision(
2091 self,
2092 self,
2092 rawtext,
2093 rawtext,
2093 transaction,
2094 transaction,
2094 link,
2095 link,
2095 p1,
2096 p1,
2096 p2,
2097 p2,
2097 node,
2098 node,
2098 flags,
2099 flags,
2099 cachedelta=None,
2100 cachedelta=None,
2100 deltacomputer=None,
2101 deltacomputer=None,
2101 sidedata=None,
2102 sidedata=None,
2102 ):
2103 ):
2103 """add a raw revision with known flags, node and parents
2104 """add a raw revision with known flags, node and parents
2104 useful when reusing a revision not stored in this revlog (ex: received
2105 useful when reusing a revision not stored in this revlog (ex: received
2105 over wire, or read from an external bundle).
2106 over wire, or read from an external bundle).
2106 """
2107 """
2107 dfh = None
2108 dfh = None
2108 if not self._inline:
2109 if not self._inline:
2109 dfh = self._datafp(b"a+")
2110 dfh = self._datafp(b"a+")
2110 ifh = self._indexfp(b"a+")
2111 ifh = self._indexfp(b"a+")
2111 try:
2112 try:
2112 return self._addrevision(
2113 return self._addrevision(
2113 node,
2114 node,
2114 rawtext,
2115 rawtext,
2115 transaction,
2116 transaction,
2116 link,
2117 link,
2117 p1,
2118 p1,
2118 p2,
2119 p2,
2119 flags,
2120 flags,
2120 cachedelta,
2121 cachedelta,
2121 ifh,
2122 ifh,
2122 dfh,
2123 dfh,
2123 deltacomputer=deltacomputer,
2124 deltacomputer=deltacomputer,
2124 sidedata=sidedata,
2125 sidedata=sidedata,
2125 )
2126 )
2126 finally:
2127 finally:
2127 if dfh:
2128 if dfh:
2128 dfh.close()
2129 dfh.close()
2129 ifh.close()
2130 ifh.close()
2130
2131
2131 def compress(self, data):
2132 def compress(self, data):
2132 """Generate a possibly-compressed representation of data."""
2133 """Generate a possibly-compressed representation of data."""
2133 if not data:
2134 if not data:
2134 return b'', data
2135 return b'', data
2135
2136
2136 compressed = self._compressor.compress(data)
2137 compressed = self._compressor.compress(data)
2137
2138
2138 if compressed:
2139 if compressed:
2139 # The revlog compressor added the header in the returned data.
2140 # The revlog compressor added the header in the returned data.
2140 return b'', compressed
2141 return b'', compressed
2141
2142
2142 if data[0:1] == b'\0':
2143 if data[0:1] == b'\0':
2143 return b'', data
2144 return b'', data
2144 return b'u', data
2145 return b'u', data
2145
2146
2146 def decompress(self, data):
2147 def decompress(self, data):
2147 """Decompress a revlog chunk.
2148 """Decompress a revlog chunk.
2148
2149
2149 The chunk is expected to begin with a header identifying the
2150 The chunk is expected to begin with a header identifying the
2150 format type so it can be routed to an appropriate decompressor.
2151 format type so it can be routed to an appropriate decompressor.
2151 """
2152 """
2152 if not data:
2153 if not data:
2153 return data
2154 return data
2154
2155
2155 # Revlogs are read much more frequently than they are written and many
2156 # Revlogs are read much more frequently than they are written and many
2156 # chunks only take microseconds to decompress, so performance is
2157 # chunks only take microseconds to decompress, so performance is
2157 # important here.
2158 # important here.
2158 #
2159 #
2159 # We can make a few assumptions about revlogs:
2160 # We can make a few assumptions about revlogs:
2160 #
2161 #
2161 # 1) the majority of chunks will be compressed (as opposed to inline
2162 # 1) the majority of chunks will be compressed (as opposed to inline
2162 # raw data).
2163 # raw data).
2163 # 2) decompressing *any* data will likely by at least 10x slower than
2164 # 2) decompressing *any* data will likely by at least 10x slower than
2164 # returning raw inline data.
2165 # returning raw inline data.
2165 # 3) we want to prioritize common and officially supported compression
2166 # 3) we want to prioritize common and officially supported compression
2166 # engines
2167 # engines
2167 #
2168 #
2168 # It follows that we want to optimize for "decompress compressed data
2169 # It follows that we want to optimize for "decompress compressed data
2169 # when encoded with common and officially supported compression engines"
2170 # when encoded with common and officially supported compression engines"
2170 # case over "raw data" and "data encoded by less common or non-official
2171 # case over "raw data" and "data encoded by less common or non-official
2171 # compression engines." That is why we have the inline lookup first
2172 # compression engines." That is why we have the inline lookup first
2172 # followed by the compengines lookup.
2173 # followed by the compengines lookup.
2173 #
2174 #
2174 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2175 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2175 # compressed chunks. And this matters for changelog and manifest reads.
2176 # compressed chunks. And this matters for changelog and manifest reads.
2176 t = data[0:1]
2177 t = data[0:1]
2177
2178
2178 if t == b'x':
2179 if t == b'x':
2179 try:
2180 try:
2180 return _zlibdecompress(data)
2181 return _zlibdecompress(data)
2181 except zlib.error as e:
2182 except zlib.error as e:
2182 raise error.RevlogError(
2183 raise error.RevlogError(
2183 _(b'revlog decompress error: %s')
2184 _(b'revlog decompress error: %s')
2184 % stringutil.forcebytestr(e)
2185 % stringutil.forcebytestr(e)
2185 )
2186 )
2186 # '\0' is more common than 'u' so it goes first.
2187 # '\0' is more common than 'u' so it goes first.
2187 elif t == b'\0':
2188 elif t == b'\0':
2188 return data
2189 return data
2189 elif t == b'u':
2190 elif t == b'u':
2190 return util.buffer(data, 1)
2191 return util.buffer(data, 1)
2191
2192
2192 try:
2193 try:
2193 compressor = self._decompressors[t]
2194 compressor = self._decompressors[t]
2194 except KeyError:
2195 except KeyError:
2195 try:
2196 try:
2196 engine = util.compengines.forrevlogheader(t)
2197 engine = util.compengines.forrevlogheader(t)
2197 compressor = engine.revlogcompressor(self._compengineopts)
2198 compressor = engine.revlogcompressor(self._compengineopts)
2198 self._decompressors[t] = compressor
2199 self._decompressors[t] = compressor
2199 except KeyError:
2200 except KeyError:
2200 raise error.RevlogError(
2201 raise error.RevlogError(
2201 _(b'unknown compression type %s') % binascii.hexlify(t)
2202 _(b'unknown compression type %s') % binascii.hexlify(t)
2202 )
2203 )
2203
2204
2204 return compressor.decompress(data)
2205 return compressor.decompress(data)
2205
2206
2206 def _addrevision(
2207 def _addrevision(
2207 self,
2208 self,
2208 node,
2209 node,
2209 rawtext,
2210 rawtext,
2210 transaction,
2211 transaction,
2211 link,
2212 link,
2212 p1,
2213 p1,
2213 p2,
2214 p2,
2214 flags,
2215 flags,
2215 cachedelta,
2216 cachedelta,
2216 ifh,
2217 ifh,
2217 dfh,
2218 dfh,
2218 alwayscache=False,
2219 alwayscache=False,
2219 deltacomputer=None,
2220 deltacomputer=None,
2220 sidedata=None,
2221 sidedata=None,
2221 ):
2222 ):
2222 """internal function to add revisions to the log
2223 """internal function to add revisions to the log
2223
2224
2224 see addrevision for argument descriptions.
2225 see addrevision for argument descriptions.
2225
2226
2226 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2227 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2227
2228
2228 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2229 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2229 be used.
2230 be used.
2230
2231
2231 invariants:
2232 invariants:
2232 - rawtext is optional (can be None); if not set, cachedelta must be set.
2233 - rawtext is optional (can be None); if not set, cachedelta must be set.
2233 if both are set, they must correspond to each other.
2234 if both are set, they must correspond to each other.
2234 """
2235 """
2235 if node == self.nullid:
2236 if node == self.nullid:
2236 raise error.RevlogError(
2237 raise error.RevlogError(
2237 _(b"%s: attempt to add null revision") % self.display_id
2238 _(b"%s: attempt to add null revision") % self.display_id
2238 )
2239 )
2239 if (
2240 if (
2240 node == self.nodeconstants.wdirid
2241 node == self.nodeconstants.wdirid
2241 or node in self.nodeconstants.wdirfilenodeids
2242 or node in self.nodeconstants.wdirfilenodeids
2242 ):
2243 ):
2243 raise error.RevlogError(
2244 raise error.RevlogError(
2244 _(b"%s: attempt to add wdir revision") % self.display_id
2245 _(b"%s: attempt to add wdir revision") % self.display_id
2245 )
2246 )
2246
2247
2247 if self._inline:
2248 if self._inline:
2248 fh = ifh
2249 fh = ifh
2249 else:
2250 else:
2250 fh = dfh
2251 fh = dfh
2251
2252
2252 btext = [rawtext]
2253 btext = [rawtext]
2253
2254
2254 curr = len(self)
2255 curr = len(self)
2255 prev = curr - 1
2256 prev = curr - 1
2256
2257
2257 offset = self._get_data_offset(prev)
2258 offset = self._get_data_offset(prev)
2258
2259
2259 if self._concurrencychecker:
2260 if self._concurrencychecker:
2260 if self._inline:
2261 if self._inline:
2261 # offset is "as if" it were in the .d file, so we need to add on
2262 # offset is "as if" it were in the .d file, so we need to add on
2262 # the size of the entry metadata.
2263 # the size of the entry metadata.
2263 self._concurrencychecker(
2264 self._concurrencychecker(
2264 ifh, self._indexfile, offset + curr * self.index.entry_size
2265 ifh, self._indexfile, offset + curr * self.index.entry_size
2265 )
2266 )
2266 else:
2267 else:
2267 # Entries in the .i are a consistent size.
2268 # Entries in the .i are a consistent size.
2268 self._concurrencychecker(
2269 self._concurrencychecker(
2269 ifh, self._indexfile, curr * self.index.entry_size
2270 ifh, self._indexfile, curr * self.index.entry_size
2270 )
2271 )
2271 self._concurrencychecker(dfh, self._datafile, offset)
2272 self._concurrencychecker(dfh, self._datafile, offset)
2272
2273
2273 p1r, p2r = self.rev(p1), self.rev(p2)
2274 p1r, p2r = self.rev(p1), self.rev(p2)
2274
2275
2275 # full versions are inserted when the needed deltas
2276 # full versions are inserted when the needed deltas
2276 # become comparable to the uncompressed text
2277 # become comparable to the uncompressed text
2277 if rawtext is None:
2278 if rawtext is None:
2278 # need rawtext size, before changed by flag processors, which is
2279 # need rawtext size, before changed by flag processors, which is
2279 # the non-raw size. use revlog explicitly to avoid filelog's extra
2280 # the non-raw size. use revlog explicitly to avoid filelog's extra
2280 # logic that might remove metadata size.
2281 # logic that might remove metadata size.
2281 textlen = mdiff.patchedsize(
2282 textlen = mdiff.patchedsize(
2282 revlog.size(self, cachedelta[0]), cachedelta[1]
2283 revlog.size(self, cachedelta[0]), cachedelta[1]
2283 )
2284 )
2284 else:
2285 else:
2285 textlen = len(rawtext)
2286 textlen = len(rawtext)
2286
2287
2287 if deltacomputer is None:
2288 if deltacomputer is None:
2288 deltacomputer = deltautil.deltacomputer(self)
2289 deltacomputer = deltautil.deltacomputer(self)
2289
2290
2290 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2291 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2291
2292
2292 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2293 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2293
2294
2294 if sidedata and self.hassidedata:
2295 if sidedata and self.hassidedata:
2295 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2296 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2296 sidedata_offset = offset + deltainfo.deltalen
2297 sidedata_offset = offset + deltainfo.deltalen
2297 else:
2298 else:
2298 serialized_sidedata = b""
2299 serialized_sidedata = b""
2299 # Don't store the offset if the sidedata is empty, that way
2300 # Don't store the offset if the sidedata is empty, that way
2300 # we can easily detect empty sidedata and they will be no different
2301 # we can easily detect empty sidedata and they will be no different
2301 # than ones we manually add.
2302 # than ones we manually add.
2302 sidedata_offset = 0
2303 sidedata_offset = 0
2303
2304
2304 e = (
2305 e = (
2305 offset_type(offset, flags),
2306 offset_type(offset, flags),
2306 deltainfo.deltalen,
2307 deltainfo.deltalen,
2307 textlen,
2308 textlen,
2308 deltainfo.base,
2309 deltainfo.base,
2309 link,
2310 link,
2310 p1r,
2311 p1r,
2311 p2r,
2312 p2r,
2312 node,
2313 node,
2313 sidedata_offset,
2314 sidedata_offset,
2314 len(serialized_sidedata),
2315 len(serialized_sidedata),
2315 )
2316 )
2316
2317
2317 self.index.append(e)
2318 self.index.append(e)
2318 entry = self.index.entry_binary(curr)
2319 entry = self.index.entry_binary(curr)
2319 if curr == 0:
2320 if curr == 0:
2320 header = self._format_flags | self._format_version
2321 header = self._format_flags | self._format_version
2321 header = self.index.pack_header(header)
2322 header = self.index.pack_header(header)
2322 entry = header + entry
2323 entry = header + entry
2323 self._writeentry(
2324 self._writeentry(
2324 transaction,
2325 transaction,
2325 ifh,
2326 ifh,
2326 dfh,
2327 dfh,
2327 entry,
2328 entry,
2328 deltainfo.data,
2329 deltainfo.data,
2329 link,
2330 link,
2330 offset,
2331 offset,
2331 serialized_sidedata,
2332 serialized_sidedata,
2332 )
2333 )
2333
2334
2334 rawtext = btext[0]
2335 rawtext = btext[0]
2335
2336
2336 if alwayscache and rawtext is None:
2337 if alwayscache and rawtext is None:
2337 rawtext = deltacomputer.buildtext(revinfo, fh)
2338 rawtext = deltacomputer.buildtext(revinfo, fh)
2338
2339
2339 if type(rawtext) == bytes: # only accept immutable objects
2340 if type(rawtext) == bytes: # only accept immutable objects
2340 self._revisioncache = (node, curr, rawtext)
2341 self._revisioncache = (node, curr, rawtext)
2341 self._chainbasecache[curr] = deltainfo.chainbase
2342 self._chainbasecache[curr] = deltainfo.chainbase
2342 return curr
2343 return curr
2343
2344
2344 def _get_data_offset(self, prev):
2345 def _get_data_offset(self, prev):
2345 """Returns the current offset in the (in-transaction) data file.
2346 """Returns the current offset in the (in-transaction) data file.
2346 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2347 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2347 file to store that information: since sidedata can be rewritten to the
2348 file to store that information: since sidedata can be rewritten to the
2348 end of the data file within a transaction, you can have cases where, for
2349 end of the data file within a transaction, you can have cases where, for
2349 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2350 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2350 to `n - 1`'s sidedata being written after `n`'s data.
2351 to `n - 1`'s sidedata being written after `n`'s data.
2351
2352
2352 TODO cache this in a docket file before getting out of experimental."""
2353 TODO cache this in a docket file before getting out of experimental."""
2353 if self._format_version != REVLOGV2:
2354 if self._format_version != REVLOGV2:
2354 return self.end(prev)
2355 return self.end(prev)
2355
2356
2356 offset = 0
2357 offset = 0
2357 for rev, entry in enumerate(self.index):
2358 for rev, entry in enumerate(self.index):
2358 sidedata_end = entry[8] + entry[9]
2359 sidedata_end = entry[8] + entry[9]
2359 # Sidedata for a previous rev has potentially been written after
2360 # Sidedata for a previous rev has potentially been written after
2360 # this rev's end, so take the max.
2361 # this rev's end, so take the max.
2361 offset = max(self.end(rev), offset, sidedata_end)
2362 offset = max(self.end(rev), offset, sidedata_end)
2362 return offset
2363 return offset
2363
2364
2364 def _writeentry(
2365 def _writeentry(
2365 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2366 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2366 ):
2367 ):
2367 # Files opened in a+ mode have inconsistent behavior on various
2368 # Files opened in a+ mode have inconsistent behavior on various
2368 # platforms. Windows requires that a file positioning call be made
2369 # platforms. Windows requires that a file positioning call be made
2369 # when the file handle transitions between reads and writes. See
2370 # when the file handle transitions between reads and writes. See
2370 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2371 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2371 # platforms, Python or the platform itself can be buggy. Some versions
2372 # platforms, Python or the platform itself can be buggy. Some versions
2372 # of Solaris have been observed to not append at the end of the file
2373 # of Solaris have been observed to not append at the end of the file
2373 # if the file was seeked to before the end. See issue4943 for more.
2374 # if the file was seeked to before the end. See issue4943 for more.
2374 #
2375 #
2375 # We work around this issue by inserting a seek() before writing.
2376 # We work around this issue by inserting a seek() before writing.
2376 # Note: This is likely not necessary on Python 3. However, because
2377 # Note: This is likely not necessary on Python 3. However, because
2377 # the file handle is reused for reads and may be seeked there, we need
2378 # the file handle is reused for reads and may be seeked there, we need
2378 # to be careful before changing this.
2379 # to be careful before changing this.
2379 ifh.seek(0, os.SEEK_END)
2380 ifh.seek(0, os.SEEK_END)
2380 if dfh:
2381 if dfh:
2381 dfh.seek(0, os.SEEK_END)
2382 dfh.seek(0, os.SEEK_END)
2382
2383
2383 curr = len(self) - 1
2384 curr = len(self) - 1
2384 if not self._inline:
2385 if not self._inline:
2385 transaction.add(self._datafile, offset)
2386 transaction.add(self._datafile, offset)
2386 transaction.add(self._indexfile, curr * len(entry))
2387 transaction.add(self._indexfile, curr * len(entry))
2387 if data[0]:
2388 if data[0]:
2388 dfh.write(data[0])
2389 dfh.write(data[0])
2389 dfh.write(data[1])
2390 dfh.write(data[1])
2390 if sidedata:
2391 if sidedata:
2391 dfh.write(sidedata)
2392 dfh.write(sidedata)
2392 ifh.write(entry)
2393 ifh.write(entry)
2393 else:
2394 else:
2394 offset += curr * self.index.entry_size
2395 offset += curr * self.index.entry_size
2395 transaction.add(self._indexfile, offset)
2396 transaction.add(self._indexfile, offset)
2396 ifh.write(entry)
2397 ifh.write(entry)
2397 ifh.write(data[0])
2398 ifh.write(data[0])
2398 ifh.write(data[1])
2399 ifh.write(data[1])
2399 if sidedata:
2400 if sidedata:
2400 ifh.write(sidedata)
2401 ifh.write(sidedata)
2401 self._enforceinlinesize(transaction, ifh)
2402 self._enforceinlinesize(transaction, ifh)
2402 nodemaputil.setup_persistent_nodemap(transaction, self)
2403 nodemaputil.setup_persistent_nodemap(transaction, self)
2403
2404
2404 def addgroup(
2405 def addgroup(
2405 self,
2406 self,
2406 deltas,
2407 deltas,
2407 linkmapper,
2408 linkmapper,
2408 transaction,
2409 transaction,
2409 alwayscache=False,
2410 alwayscache=False,
2410 addrevisioncb=None,
2411 addrevisioncb=None,
2411 duplicaterevisioncb=None,
2412 duplicaterevisioncb=None,
2412 ):
2413 ):
2413 """
2414 """
2414 add a delta group
2415 add a delta group
2415
2416
2416 given a set of deltas, add them to the revision log. the
2417 given a set of deltas, add them to the revision log. the
2417 first delta is against its parent, which should be in our
2418 first delta is against its parent, which should be in our
2418 log, the rest are against the previous delta.
2419 log, the rest are against the previous delta.
2419
2420
2420 If ``addrevisioncb`` is defined, it will be called with arguments of
2421 If ``addrevisioncb`` is defined, it will be called with arguments of
2421 this revlog and the node that was added.
2422 this revlog and the node that was added.
2422 """
2423 """
2423
2424
2424 if self._writinghandles:
2425 if self._writinghandles:
2425 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2426 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2426
2427
2427 r = len(self)
2428 r = len(self)
2428 end = 0
2429 end = 0
2429 if r:
2430 if r:
2430 end = self.end(r - 1)
2431 end = self.end(r - 1)
2431 ifh = self._indexfp(b"a+")
2432 ifh = self._indexfp(b"a+")
2432 isize = r * self.index.entry_size
2433 isize = r * self.index.entry_size
2433 if self._inline:
2434 if self._inline:
2434 transaction.add(self._indexfile, end + isize)
2435 transaction.add(self._indexfile, end + isize)
2435 dfh = None
2436 dfh = None
2436 else:
2437 else:
2437 transaction.add(self._indexfile, isize)
2438 transaction.add(self._indexfile, isize)
2438 transaction.add(self._datafile, end)
2439 transaction.add(self._datafile, end)
2439 dfh = self._datafp(b"a+")
2440 dfh = self._datafp(b"a+")
2440
2441
2441 self._writinghandles = (ifh, dfh)
2442 self._writinghandles = (ifh, dfh)
2442 empty = True
2443 empty = True
2443
2444
2444 try:
2445 try:
2445 if True:
2446 if True:
2446 deltacomputer = deltautil.deltacomputer(self)
2447 deltacomputer = deltautil.deltacomputer(self)
2447 # loop through our set of deltas
2448 # loop through our set of deltas
2448 for data in deltas:
2449 for data in deltas:
2449 (
2450 (
2450 node,
2451 node,
2451 p1,
2452 p1,
2452 p2,
2453 p2,
2453 linknode,
2454 linknode,
2454 deltabase,
2455 deltabase,
2455 delta,
2456 delta,
2456 flags,
2457 flags,
2457 sidedata,
2458 sidedata,
2458 ) = data
2459 ) = data
2459 link = linkmapper(linknode)
2460 link = linkmapper(linknode)
2460 flags = flags or REVIDX_DEFAULT_FLAGS
2461 flags = flags or REVIDX_DEFAULT_FLAGS
2461
2462
2462 rev = self.index.get_rev(node)
2463 rev = self.index.get_rev(node)
2463 if rev is not None:
2464 if rev is not None:
2464 # this can happen if two branches make the same change
2465 # this can happen if two branches make the same change
2465 self._nodeduplicatecallback(transaction, rev)
2466 self._nodeduplicatecallback(transaction, rev)
2466 if duplicaterevisioncb:
2467 if duplicaterevisioncb:
2467 duplicaterevisioncb(self, rev)
2468 duplicaterevisioncb(self, rev)
2468 empty = False
2469 empty = False
2469 continue
2470 continue
2470
2471
2471 for p in (p1, p2):
2472 for p in (p1, p2):
2472 if not self.index.has_node(p):
2473 if not self.index.has_node(p):
2473 raise error.LookupError(
2474 raise error.LookupError(
2474 p, self.radix, _(b'unknown parent')
2475 p, self.radix, _(b'unknown parent')
2475 )
2476 )
2476
2477
2477 if not self.index.has_node(deltabase):
2478 if not self.index.has_node(deltabase):
2478 raise error.LookupError(
2479 raise error.LookupError(
2479 deltabase, self.display_id, _(b'unknown delta base')
2480 deltabase, self.display_id, _(b'unknown delta base')
2480 )
2481 )
2481
2482
2482 baserev = self.rev(deltabase)
2483 baserev = self.rev(deltabase)
2483
2484
2484 if baserev != nullrev and self.iscensored(baserev):
2485 if baserev != nullrev and self.iscensored(baserev):
2485 # if base is censored, delta must be full replacement in a
2486 # if base is censored, delta must be full replacement in a
2486 # single patch operation
2487 # single patch operation
2487 hlen = struct.calcsize(b">lll")
2488 hlen = struct.calcsize(b">lll")
2488 oldlen = self.rawsize(baserev)
2489 oldlen = self.rawsize(baserev)
2489 newlen = len(delta) - hlen
2490 newlen = len(delta) - hlen
2490 if delta[:hlen] != mdiff.replacediffheader(
2491 if delta[:hlen] != mdiff.replacediffheader(
2491 oldlen, newlen
2492 oldlen, newlen
2492 ):
2493 ):
2493 raise error.CensoredBaseError(
2494 raise error.CensoredBaseError(
2494 self.display_id, self.node(baserev)
2495 self.display_id, self.node(baserev)
2495 )
2496 )
2496
2497
2497 if not flags and self._peek_iscensored(baserev, delta):
2498 if not flags and self._peek_iscensored(baserev, delta):
2498 flags |= REVIDX_ISCENSORED
2499 flags |= REVIDX_ISCENSORED
2499
2500
2500 # We assume consumers of addrevisioncb will want to retrieve
2501 # We assume consumers of addrevisioncb will want to retrieve
2501 # the added revision, which will require a call to
2502 # the added revision, which will require a call to
2502 # revision(). revision() will fast path if there is a cache
2503 # revision(). revision() will fast path if there is a cache
2503 # hit. So, we tell _addrevision() to always cache in this case.
2504 # hit. So, we tell _addrevision() to always cache in this case.
2504 # We're only using addgroup() in the context of changegroup
2505 # We're only using addgroup() in the context of changegroup
2505 # generation so the revision data can always be handled as raw
2506 # generation so the revision data can always be handled as raw
2506 # by the flagprocessor.
2507 # by the flagprocessor.
2507 rev = self._addrevision(
2508 rev = self._addrevision(
2508 node,
2509 node,
2509 None,
2510 None,
2510 transaction,
2511 transaction,
2511 link,
2512 link,
2512 p1,
2513 p1,
2513 p2,
2514 p2,
2514 flags,
2515 flags,
2515 (baserev, delta),
2516 (baserev, delta),
2516 ifh,
2517 ifh,
2517 dfh,
2518 dfh,
2518 alwayscache=alwayscache,
2519 alwayscache=alwayscache,
2519 deltacomputer=deltacomputer,
2520 deltacomputer=deltacomputer,
2520 sidedata=sidedata,
2521 sidedata=sidedata,
2521 )
2522 )
2522
2523
2523 if addrevisioncb:
2524 if addrevisioncb:
2524 addrevisioncb(self, rev)
2525 addrevisioncb(self, rev)
2525 empty = False
2526 empty = False
2526
2527
2527 if not dfh and not self._inline:
2528 if not dfh and not self._inline:
2528 # addrevision switched from inline to conventional
2529 # addrevision switched from inline to conventional
2529 # reopen the index
2530 # reopen the index
2530 ifh.close()
2531 ifh.close()
2531 dfh = self._datafp(b"a+")
2532 dfh = self._datafp(b"a+")
2532 ifh = self._indexfp(b"a+")
2533 ifh = self._indexfp(b"a+")
2533 self._writinghandles = (ifh, dfh)
2534 self._writinghandles = (ifh, dfh)
2534 finally:
2535 finally:
2535 self._writinghandles = None
2536 self._writinghandles = None
2536
2537
2537 if dfh:
2538 if dfh:
2538 dfh.close()
2539 dfh.close()
2539 ifh.close()
2540 ifh.close()
2540 return not empty
2541 return not empty
2541
2542
2542 def iscensored(self, rev):
2543 def iscensored(self, rev):
2543 """Check if a file revision is censored."""
2544 """Check if a file revision is censored."""
2544 if not self._censorable:
2545 if not self._censorable:
2545 return False
2546 return False
2546
2547
2547 return self.flags(rev) & REVIDX_ISCENSORED
2548 return self.flags(rev) & REVIDX_ISCENSORED
2548
2549
2549 def _peek_iscensored(self, baserev, delta):
2550 def _peek_iscensored(self, baserev, delta):
2550 """Quickly check if a delta produces a censored revision."""
2551 """Quickly check if a delta produces a censored revision."""
2551 if not self._censorable:
2552 if not self._censorable:
2552 return False
2553 return False
2553
2554
2554 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2555 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2555
2556
2556 def getstrippoint(self, minlink):
2557 def getstrippoint(self, minlink):
2557 """find the minimum rev that must be stripped to strip the linkrev
2558 """find the minimum rev that must be stripped to strip the linkrev
2558
2559
2559 Returns a tuple containing the minimum rev and a set of all revs that
2560 Returns a tuple containing the minimum rev and a set of all revs that
2560 have linkrevs that will be broken by this strip.
2561 have linkrevs that will be broken by this strip.
2561 """
2562 """
2562 return storageutil.resolvestripinfo(
2563 return storageutil.resolvestripinfo(
2563 minlink,
2564 minlink,
2564 len(self) - 1,
2565 len(self) - 1,
2565 self.headrevs(),
2566 self.headrevs(),
2566 self.linkrev,
2567 self.linkrev,
2567 self.parentrevs,
2568 self.parentrevs,
2568 )
2569 )
2569
2570
2570 def strip(self, minlink, transaction):
2571 def strip(self, minlink, transaction):
2571 """truncate the revlog on the first revision with a linkrev >= minlink
2572 """truncate the revlog on the first revision with a linkrev >= minlink
2572
2573
2573 This function is called when we're stripping revision minlink and
2574 This function is called when we're stripping revision minlink and
2574 its descendants from the repository.
2575 its descendants from the repository.
2575
2576
2576 We have to remove all revisions with linkrev >= minlink, because
2577 We have to remove all revisions with linkrev >= minlink, because
2577 the equivalent changelog revisions will be renumbered after the
2578 the equivalent changelog revisions will be renumbered after the
2578 strip.
2579 strip.
2579
2580
2580 So we truncate the revlog on the first of these revisions, and
2581 So we truncate the revlog on the first of these revisions, and
2581 trust that the caller has saved the revisions that shouldn't be
2582 trust that the caller has saved the revisions that shouldn't be
2582 removed and that it'll re-add them after this truncation.
2583 removed and that it'll re-add them after this truncation.
2583 """
2584 """
2584 if len(self) == 0:
2585 if len(self) == 0:
2585 return
2586 return
2586
2587
2587 rev, _ = self.getstrippoint(minlink)
2588 rev, _ = self.getstrippoint(minlink)
2588 if rev == len(self):
2589 if rev == len(self):
2589 return
2590 return
2590
2591
2591 # first truncate the files on disk
2592 # first truncate the files on disk
2592 end = self.start(rev)
2593 end = self.start(rev)
2593 if not self._inline:
2594 if not self._inline:
2594 transaction.add(self._datafile, end)
2595 transaction.add(self._datafile, end)
2595 end = rev * self.index.entry_size
2596 end = rev * self.index.entry_size
2596 else:
2597 else:
2597 end += rev * self.index.entry_size
2598 end += rev * self.index.entry_size
2598
2599
2599 transaction.add(self._indexfile, end)
2600 transaction.add(self._indexfile, end)
2600
2601
2601 # then reset internal state in memory to forget those revisions
2602 # then reset internal state in memory to forget those revisions
2602 self._revisioncache = None
2603 self._revisioncache = None
2603 self._chaininfocache = util.lrucachedict(500)
2604 self._chaininfocache = util.lrucachedict(500)
2604 self._chunkclear()
2605 self._chunkclear()
2605
2606
2606 del self.index[rev:-1]
2607 del self.index[rev:-1]
2607
2608
2608 def checksize(self):
2609 def checksize(self):
2609 """Check size of index and data files
2610 """Check size of index and data files
2610
2611
2611 return a (dd, di) tuple.
2612 return a (dd, di) tuple.
2612 - dd: extra bytes for the "data" file
2613 - dd: extra bytes for the "data" file
2613 - di: extra bytes for the "index" file
2614 - di: extra bytes for the "index" file
2614
2615
2615 A healthy revlog will return (0, 0).
2616 A healthy revlog will return (0, 0).
2616 """
2617 """
2617 expected = 0
2618 expected = 0
2618 if len(self):
2619 if len(self):
2619 expected = max(0, self.end(len(self) - 1))
2620 expected = max(0, self.end(len(self) - 1))
2620
2621
2621 try:
2622 try:
2622 with self._datafp() as f:
2623 with self._datafp() as f:
2623 f.seek(0, io.SEEK_END)
2624 f.seek(0, io.SEEK_END)
2624 actual = f.tell()
2625 actual = f.tell()
2625 dd = actual - expected
2626 dd = actual - expected
2626 except IOError as inst:
2627 except IOError as inst:
2627 if inst.errno != errno.ENOENT:
2628 if inst.errno != errno.ENOENT:
2628 raise
2629 raise
2629 dd = 0
2630 dd = 0
2630
2631
2631 try:
2632 try:
2632 f = self.opener(self._indexfile)
2633 f = self.opener(self._indexfile)
2633 f.seek(0, io.SEEK_END)
2634 f.seek(0, io.SEEK_END)
2634 actual = f.tell()
2635 actual = f.tell()
2635 f.close()
2636 f.close()
2636 s = self.index.entry_size
2637 s = self.index.entry_size
2637 i = max(0, actual // s)
2638 i = max(0, actual // s)
2638 di = actual - (i * s)
2639 di = actual - (i * s)
2639 if self._inline:
2640 if self._inline:
2640 databytes = 0
2641 databytes = 0
2641 for r in self:
2642 for r in self:
2642 databytes += max(0, self.length(r))
2643 databytes += max(0, self.length(r))
2643 dd = 0
2644 dd = 0
2644 di = actual - len(self) * s - databytes
2645 di = actual - len(self) * s - databytes
2645 except IOError as inst:
2646 except IOError as inst:
2646 if inst.errno != errno.ENOENT:
2647 if inst.errno != errno.ENOENT:
2647 raise
2648 raise
2648 di = 0
2649 di = 0
2649
2650
2650 return (dd, di)
2651 return (dd, di)
2651
2652
2652 def files(self):
2653 def files(self):
2653 res = [self._indexfile]
2654 res = [self._indexfile]
2654 if not self._inline:
2655 if not self._inline:
2655 res.append(self._datafile)
2656 res.append(self._datafile)
2656 return res
2657 return res
2657
2658
2658 def emitrevisions(
2659 def emitrevisions(
2659 self,
2660 self,
2660 nodes,
2661 nodes,
2661 nodesorder=None,
2662 nodesorder=None,
2662 revisiondata=False,
2663 revisiondata=False,
2663 assumehaveparentrevisions=False,
2664 assumehaveparentrevisions=False,
2664 deltamode=repository.CG_DELTAMODE_STD,
2665 deltamode=repository.CG_DELTAMODE_STD,
2665 sidedata_helpers=None,
2666 sidedata_helpers=None,
2666 ):
2667 ):
2667 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2668 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2668 raise error.ProgrammingError(
2669 raise error.ProgrammingError(
2669 b'unhandled value for nodesorder: %s' % nodesorder
2670 b'unhandled value for nodesorder: %s' % nodesorder
2670 )
2671 )
2671
2672
2672 if nodesorder is None and not self._generaldelta:
2673 if nodesorder is None and not self._generaldelta:
2673 nodesorder = b'storage'
2674 nodesorder = b'storage'
2674
2675
2675 if (
2676 if (
2676 not self._storedeltachains
2677 not self._storedeltachains
2677 and deltamode != repository.CG_DELTAMODE_PREV
2678 and deltamode != repository.CG_DELTAMODE_PREV
2678 ):
2679 ):
2679 deltamode = repository.CG_DELTAMODE_FULL
2680 deltamode = repository.CG_DELTAMODE_FULL
2680
2681
2681 return storageutil.emitrevisions(
2682 return storageutil.emitrevisions(
2682 self,
2683 self,
2683 nodes,
2684 nodes,
2684 nodesorder,
2685 nodesorder,
2685 revlogrevisiondelta,
2686 revlogrevisiondelta,
2686 deltaparentfn=self.deltaparent,
2687 deltaparentfn=self.deltaparent,
2687 candeltafn=self.candelta,
2688 candeltafn=self.candelta,
2688 rawsizefn=self.rawsize,
2689 rawsizefn=self.rawsize,
2689 revdifffn=self.revdiff,
2690 revdifffn=self.revdiff,
2690 flagsfn=self.flags,
2691 flagsfn=self.flags,
2691 deltamode=deltamode,
2692 deltamode=deltamode,
2692 revisiondata=revisiondata,
2693 revisiondata=revisiondata,
2693 assumehaveparentrevisions=assumehaveparentrevisions,
2694 assumehaveparentrevisions=assumehaveparentrevisions,
2694 sidedata_helpers=sidedata_helpers,
2695 sidedata_helpers=sidedata_helpers,
2695 )
2696 )
2696
2697
2697 DELTAREUSEALWAYS = b'always'
2698 DELTAREUSEALWAYS = b'always'
2698 DELTAREUSESAMEREVS = b'samerevs'
2699 DELTAREUSESAMEREVS = b'samerevs'
2699 DELTAREUSENEVER = b'never'
2700 DELTAREUSENEVER = b'never'
2700
2701
2701 DELTAREUSEFULLADD = b'fulladd'
2702 DELTAREUSEFULLADD = b'fulladd'
2702
2703
2703 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2704 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2704
2705
2705 def clone(
2706 def clone(
2706 self,
2707 self,
2707 tr,
2708 tr,
2708 destrevlog,
2709 destrevlog,
2709 addrevisioncb=None,
2710 addrevisioncb=None,
2710 deltareuse=DELTAREUSESAMEREVS,
2711 deltareuse=DELTAREUSESAMEREVS,
2711 forcedeltabothparents=None,
2712 forcedeltabothparents=None,
2712 sidedata_helpers=None,
2713 sidedata_helpers=None,
2713 ):
2714 ):
2714 """Copy this revlog to another, possibly with format changes.
2715 """Copy this revlog to another, possibly with format changes.
2715
2716
2716 The destination revlog will contain the same revisions and nodes.
2717 The destination revlog will contain the same revisions and nodes.
2717 However, it may not be bit-for-bit identical due to e.g. delta encoding
2718 However, it may not be bit-for-bit identical due to e.g. delta encoding
2718 differences.
2719 differences.
2719
2720
2720 The ``deltareuse`` argument control how deltas from the existing revlog
2721 The ``deltareuse`` argument control how deltas from the existing revlog
2721 are preserved in the destination revlog. The argument can have the
2722 are preserved in the destination revlog. The argument can have the
2722 following values:
2723 following values:
2723
2724
2724 DELTAREUSEALWAYS
2725 DELTAREUSEALWAYS
2725 Deltas will always be reused (if possible), even if the destination
2726 Deltas will always be reused (if possible), even if the destination
2726 revlog would not select the same revisions for the delta. This is the
2727 revlog would not select the same revisions for the delta. This is the
2727 fastest mode of operation.
2728 fastest mode of operation.
2728 DELTAREUSESAMEREVS
2729 DELTAREUSESAMEREVS
2729 Deltas will be reused if the destination revlog would pick the same
2730 Deltas will be reused if the destination revlog would pick the same
2730 revisions for the delta. This mode strikes a balance between speed
2731 revisions for the delta. This mode strikes a balance between speed
2731 and optimization.
2732 and optimization.
2732 DELTAREUSENEVER
2733 DELTAREUSENEVER
2733 Deltas will never be reused. This is the slowest mode of execution.
2734 Deltas will never be reused. This is the slowest mode of execution.
2734 This mode can be used to recompute deltas (e.g. if the diff/delta
2735 This mode can be used to recompute deltas (e.g. if the diff/delta
2735 algorithm changes).
2736 algorithm changes).
2736 DELTAREUSEFULLADD
2737 DELTAREUSEFULLADD
2737 Revision will be re-added as if their were new content. This is
2738 Revision will be re-added as if their were new content. This is
2738 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2739 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2739 eg: large file detection and handling.
2740 eg: large file detection and handling.
2740
2741
2741 Delta computation can be slow, so the choice of delta reuse policy can
2742 Delta computation can be slow, so the choice of delta reuse policy can
2742 significantly affect run time.
2743 significantly affect run time.
2743
2744
2744 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2745 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2745 two extremes. Deltas will be reused if they are appropriate. But if the
2746 two extremes. Deltas will be reused if they are appropriate. But if the
2746 delta could choose a better revision, it will do so. This means if you
2747 delta could choose a better revision, it will do so. This means if you
2747 are converting a non-generaldelta revlog to a generaldelta revlog,
2748 are converting a non-generaldelta revlog to a generaldelta revlog,
2748 deltas will be recomputed if the delta's parent isn't a parent of the
2749 deltas will be recomputed if the delta's parent isn't a parent of the
2749 revision.
2750 revision.
2750
2751
2751 In addition to the delta policy, the ``forcedeltabothparents``
2752 In addition to the delta policy, the ``forcedeltabothparents``
2752 argument controls whether to force compute deltas against both parents
2753 argument controls whether to force compute deltas against both parents
2753 for merges. By default, the current default is used.
2754 for merges. By default, the current default is used.
2754
2755
2755 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2756 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2756 `sidedata_helpers`.
2757 `sidedata_helpers`.
2757 """
2758 """
2758 if deltareuse not in self.DELTAREUSEALL:
2759 if deltareuse not in self.DELTAREUSEALL:
2759 raise ValueError(
2760 raise ValueError(
2760 _(b'value for deltareuse invalid: %s') % deltareuse
2761 _(b'value for deltareuse invalid: %s') % deltareuse
2761 )
2762 )
2762
2763
2763 if len(destrevlog):
2764 if len(destrevlog):
2764 raise ValueError(_(b'destination revlog is not empty'))
2765 raise ValueError(_(b'destination revlog is not empty'))
2765
2766
2766 if getattr(self, 'filteredrevs', None):
2767 if getattr(self, 'filteredrevs', None):
2767 raise ValueError(_(b'source revlog has filtered revisions'))
2768 raise ValueError(_(b'source revlog has filtered revisions'))
2768 if getattr(destrevlog, 'filteredrevs', None):
2769 if getattr(destrevlog, 'filteredrevs', None):
2769 raise ValueError(_(b'destination revlog has filtered revisions'))
2770 raise ValueError(_(b'destination revlog has filtered revisions'))
2770
2771
2771 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2772 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2772 # if possible.
2773 # if possible.
2773 oldlazydelta = destrevlog._lazydelta
2774 oldlazydelta = destrevlog._lazydelta
2774 oldlazydeltabase = destrevlog._lazydeltabase
2775 oldlazydeltabase = destrevlog._lazydeltabase
2775 oldamd = destrevlog._deltabothparents
2776 oldamd = destrevlog._deltabothparents
2776
2777
2777 try:
2778 try:
2778 if deltareuse == self.DELTAREUSEALWAYS:
2779 if deltareuse == self.DELTAREUSEALWAYS:
2779 destrevlog._lazydeltabase = True
2780 destrevlog._lazydeltabase = True
2780 destrevlog._lazydelta = True
2781 destrevlog._lazydelta = True
2781 elif deltareuse == self.DELTAREUSESAMEREVS:
2782 elif deltareuse == self.DELTAREUSESAMEREVS:
2782 destrevlog._lazydeltabase = False
2783 destrevlog._lazydeltabase = False
2783 destrevlog._lazydelta = True
2784 destrevlog._lazydelta = True
2784 elif deltareuse == self.DELTAREUSENEVER:
2785 elif deltareuse == self.DELTAREUSENEVER:
2785 destrevlog._lazydeltabase = False
2786 destrevlog._lazydeltabase = False
2786 destrevlog._lazydelta = False
2787 destrevlog._lazydelta = False
2787
2788
2788 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2789 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2789
2790
2790 self._clone(
2791 self._clone(
2791 tr,
2792 tr,
2792 destrevlog,
2793 destrevlog,
2793 addrevisioncb,
2794 addrevisioncb,
2794 deltareuse,
2795 deltareuse,
2795 forcedeltabothparents,
2796 forcedeltabothparents,
2796 sidedata_helpers,
2797 sidedata_helpers,
2797 )
2798 )
2798
2799
2799 finally:
2800 finally:
2800 destrevlog._lazydelta = oldlazydelta
2801 destrevlog._lazydelta = oldlazydelta
2801 destrevlog._lazydeltabase = oldlazydeltabase
2802 destrevlog._lazydeltabase = oldlazydeltabase
2802 destrevlog._deltabothparents = oldamd
2803 destrevlog._deltabothparents = oldamd
2803
2804
2804 def _clone(
2805 def _clone(
2805 self,
2806 self,
2806 tr,
2807 tr,
2807 destrevlog,
2808 destrevlog,
2808 addrevisioncb,
2809 addrevisioncb,
2809 deltareuse,
2810 deltareuse,
2810 forcedeltabothparents,
2811 forcedeltabothparents,
2811 sidedata_helpers,
2812 sidedata_helpers,
2812 ):
2813 ):
2813 """perform the core duty of `revlog.clone` after parameter processing"""
2814 """perform the core duty of `revlog.clone` after parameter processing"""
2814 deltacomputer = deltautil.deltacomputer(destrevlog)
2815 deltacomputer = deltautil.deltacomputer(destrevlog)
2815 index = self.index
2816 index = self.index
2816 for rev in self:
2817 for rev in self:
2817 entry = index[rev]
2818 entry = index[rev]
2818
2819
2819 # Some classes override linkrev to take filtered revs into
2820 # Some classes override linkrev to take filtered revs into
2820 # account. Use raw entry from index.
2821 # account. Use raw entry from index.
2821 flags = entry[0] & 0xFFFF
2822 flags = entry[0] & 0xFFFF
2822 linkrev = entry[4]
2823 linkrev = entry[4]
2823 p1 = index[entry[5]][7]
2824 p1 = index[entry[5]][7]
2824 p2 = index[entry[6]][7]
2825 p2 = index[entry[6]][7]
2825 node = entry[7]
2826 node = entry[7]
2826
2827
2827 # (Possibly) reuse the delta from the revlog if allowed and
2828 # (Possibly) reuse the delta from the revlog if allowed and
2828 # the revlog chunk is a delta.
2829 # the revlog chunk is a delta.
2829 cachedelta = None
2830 cachedelta = None
2830 rawtext = None
2831 rawtext = None
2831 if deltareuse == self.DELTAREUSEFULLADD:
2832 if deltareuse == self.DELTAREUSEFULLADD:
2832 text, sidedata = self._revisiondata(rev)
2833 text, sidedata = self._revisiondata(rev)
2833
2834
2834 if sidedata_helpers is not None:
2835 if sidedata_helpers is not None:
2835 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2836 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2836 self, sidedata_helpers, sidedata, rev
2837 self, sidedata_helpers, sidedata, rev
2837 )
2838 )
2838 flags = flags | new_flags[0] & ~new_flags[1]
2839 flags = flags | new_flags[0] & ~new_flags[1]
2839
2840
2840 destrevlog.addrevision(
2841 destrevlog.addrevision(
2841 text,
2842 text,
2842 tr,
2843 tr,
2843 linkrev,
2844 linkrev,
2844 p1,
2845 p1,
2845 p2,
2846 p2,
2846 cachedelta=cachedelta,
2847 cachedelta=cachedelta,
2847 node=node,
2848 node=node,
2848 flags=flags,
2849 flags=flags,
2849 deltacomputer=deltacomputer,
2850 deltacomputer=deltacomputer,
2850 sidedata=sidedata,
2851 sidedata=sidedata,
2851 )
2852 )
2852 else:
2853 else:
2853 if destrevlog._lazydelta:
2854 if destrevlog._lazydelta:
2854 dp = self.deltaparent(rev)
2855 dp = self.deltaparent(rev)
2855 if dp != nullrev:
2856 if dp != nullrev:
2856 cachedelta = (dp, bytes(self._chunk(rev)))
2857 cachedelta = (dp, bytes(self._chunk(rev)))
2857
2858
2858 sidedata = None
2859 sidedata = None
2859 if not cachedelta:
2860 if not cachedelta:
2860 rawtext, sidedata = self._revisiondata(rev)
2861 rawtext, sidedata = self._revisiondata(rev)
2861 if sidedata is None:
2862 if sidedata is None:
2862 sidedata = self.sidedata(rev)
2863 sidedata = self.sidedata(rev)
2863
2864
2864 if sidedata_helpers is not None:
2865 if sidedata_helpers is not None:
2865 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2866 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2866 self, sidedata_helpers, sidedata, rev
2867 self, sidedata_helpers, sidedata, rev
2867 )
2868 )
2868 flags = flags | new_flags[0] & ~new_flags[1]
2869 flags = flags | new_flags[0] & ~new_flags[1]
2869
2870
2870 ifh = destrevlog.opener(
2871 ifh = destrevlog.opener(
2871 destrevlog._indexfile, b'a+', checkambig=False
2872 destrevlog._indexfile, b'a+', checkambig=False
2872 )
2873 )
2873 dfh = None
2874 dfh = None
2874 if not destrevlog._inline:
2875 if not destrevlog._inline:
2875 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2876 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2876 try:
2877 try:
2877 destrevlog._addrevision(
2878 destrevlog._addrevision(
2878 node,
2879 node,
2879 rawtext,
2880 rawtext,
2880 tr,
2881 tr,
2881 linkrev,
2882 linkrev,
2882 p1,
2883 p1,
2883 p2,
2884 p2,
2884 flags,
2885 flags,
2885 cachedelta,
2886 cachedelta,
2886 ifh,
2887 ifh,
2887 dfh,
2888 dfh,
2888 deltacomputer=deltacomputer,
2889 deltacomputer=deltacomputer,
2889 sidedata=sidedata,
2890 sidedata=sidedata,
2890 )
2891 )
2891 finally:
2892 finally:
2892 if dfh:
2893 if dfh:
2893 dfh.close()
2894 dfh.close()
2894 ifh.close()
2895 ifh.close()
2895
2896
2896 if addrevisioncb:
2897 if addrevisioncb:
2897 addrevisioncb(self, rev, node)
2898 addrevisioncb(self, rev, node)
2898
2899
2899 def censorrevision(self, tr, censornode, tombstone=b''):
2900 def censorrevision(self, tr, censornode, tombstone=b''):
2900 if self._format_version == REVLOGV0:
2901 if self._format_version == REVLOGV0:
2901 raise error.RevlogError(
2902 raise error.RevlogError(
2902 _(b'cannot censor with version %d revlogs')
2903 _(b'cannot censor with version %d revlogs')
2903 % self._format_version
2904 % self._format_version
2904 )
2905 )
2905
2906
2906 censorrev = self.rev(censornode)
2907 censorrev = self.rev(censornode)
2907 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2908 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2908
2909
2909 if len(tombstone) > self.rawsize(censorrev):
2910 if len(tombstone) > self.rawsize(censorrev):
2910 raise error.Abort(
2911 raise error.Abort(
2911 _(b'censor tombstone must be no longer than censored data')
2912 _(b'censor tombstone must be no longer than censored data')
2912 )
2913 )
2913
2914
2914 # Rewriting the revlog in place is hard. Our strategy for censoring is
2915 # Rewriting the revlog in place is hard. Our strategy for censoring is
2915 # to create a new revlog, copy all revisions to it, then replace the
2916 # to create a new revlog, copy all revisions to it, then replace the
2916 # revlogs on transaction close.
2917 # revlogs on transaction close.
2917 #
2918 #
2918 # This is a bit dangerous. We could easily have a mismatch of state.
2919 # This is a bit dangerous. We could easily have a mismatch of state.
2919 newrl = revlog(
2920 newrl = revlog(
2920 self.opener,
2921 self.opener,
2921 target=self.target,
2922 target=self.target,
2922 radix=self.radix,
2923 radix=self.radix,
2923 postfix=b'tmpcensored',
2924 postfix=b'tmpcensored',
2924 censorable=True,
2925 censorable=True,
2925 )
2926 )
2926 newrl._format_version = self._format_version
2927 newrl._format_version = self._format_version
2927 newrl._format_flags = self._format_flags
2928 newrl._format_flags = self._format_flags
2928 newrl._generaldelta = self._generaldelta
2929 newrl._generaldelta = self._generaldelta
2929 newrl._parse_index = self._parse_index
2930 newrl._parse_index = self._parse_index
2930
2931
2931 for rev in self.revs():
2932 for rev in self.revs():
2932 node = self.node(rev)
2933 node = self.node(rev)
2933 p1, p2 = self.parents(node)
2934 p1, p2 = self.parents(node)
2934
2935
2935 if rev == censorrev:
2936 if rev == censorrev:
2936 newrl.addrawrevision(
2937 newrl.addrawrevision(
2937 tombstone,
2938 tombstone,
2938 tr,
2939 tr,
2939 self.linkrev(censorrev),
2940 self.linkrev(censorrev),
2940 p1,
2941 p1,
2941 p2,
2942 p2,
2942 censornode,
2943 censornode,
2943 REVIDX_ISCENSORED,
2944 REVIDX_ISCENSORED,
2944 )
2945 )
2945
2946
2946 if newrl.deltaparent(rev) != nullrev:
2947 if newrl.deltaparent(rev) != nullrev:
2947 raise error.Abort(
2948 raise error.Abort(
2948 _(
2949 _(
2949 b'censored revision stored as delta; '
2950 b'censored revision stored as delta; '
2950 b'cannot censor'
2951 b'cannot censor'
2951 ),
2952 ),
2952 hint=_(
2953 hint=_(
2953 b'censoring of revlogs is not '
2954 b'censoring of revlogs is not '
2954 b'fully implemented; please report '
2955 b'fully implemented; please report '
2955 b'this bug'
2956 b'this bug'
2956 ),
2957 ),
2957 )
2958 )
2958 continue
2959 continue
2959
2960
2960 if self.iscensored(rev):
2961 if self.iscensored(rev):
2961 if self.deltaparent(rev) != nullrev:
2962 if self.deltaparent(rev) != nullrev:
2962 raise error.Abort(
2963 raise error.Abort(
2963 _(
2964 _(
2964 b'cannot censor due to censored '
2965 b'cannot censor due to censored '
2965 b'revision having delta stored'
2966 b'revision having delta stored'
2966 )
2967 )
2967 )
2968 )
2968 rawtext = self._chunk(rev)
2969 rawtext = self._chunk(rev)
2969 else:
2970 else:
2970 rawtext = self.rawdata(rev)
2971 rawtext = self.rawdata(rev)
2971
2972
2972 newrl.addrawrevision(
2973 newrl.addrawrevision(
2973 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2974 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2974 )
2975 )
2975
2976
2976 tr.addbackup(self._indexfile, location=b'store')
2977 tr.addbackup(self._indexfile, location=b'store')
2977 if not self._inline:
2978 if not self._inline:
2978 tr.addbackup(self._datafile, location=b'store')
2979 tr.addbackup(self._datafile, location=b'store')
2979
2980
2980 self.opener.rename(newrl._indexfile, self._indexfile)
2981 self.opener.rename(newrl._indexfile, self._indexfile)
2981 if not self._inline:
2982 if not self._inline:
2982 self.opener.rename(newrl._datafile, self._datafile)
2983 self.opener.rename(newrl._datafile, self._datafile)
2983
2984
2984 self.clearcaches()
2985 self.clearcaches()
2985 self._loadindex()
2986 self._loadindex()
2986
2987
2987 def verifyintegrity(self, state):
2988 def verifyintegrity(self, state):
2988 """Verifies the integrity of the revlog.
2989 """Verifies the integrity of the revlog.
2989
2990
2990 Yields ``revlogproblem`` instances describing problems that are
2991 Yields ``revlogproblem`` instances describing problems that are
2991 found.
2992 found.
2992 """
2993 """
2993 dd, di = self.checksize()
2994 dd, di = self.checksize()
2994 if dd:
2995 if dd:
2995 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2996 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2996 if di:
2997 if di:
2997 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2998 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2998
2999
2999 version = self._format_version
3000 version = self._format_version
3000
3001
3001 # The verifier tells us what version revlog we should be.
3002 # The verifier tells us what version revlog we should be.
3002 if version != state[b'expectedversion']:
3003 if version != state[b'expectedversion']:
3003 yield revlogproblem(
3004 yield revlogproblem(
3004 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3005 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3005 % (self.display_id, version, state[b'expectedversion'])
3006 % (self.display_id, version, state[b'expectedversion'])
3006 )
3007 )
3007
3008
3008 state[b'skipread'] = set()
3009 state[b'skipread'] = set()
3009 state[b'safe_renamed'] = set()
3010 state[b'safe_renamed'] = set()
3010
3011
3011 for rev in self:
3012 for rev in self:
3012 node = self.node(rev)
3013 node = self.node(rev)
3013
3014
3014 # Verify contents. 4 cases to care about:
3015 # Verify contents. 4 cases to care about:
3015 #
3016 #
3016 # common: the most common case
3017 # common: the most common case
3017 # rename: with a rename
3018 # rename: with a rename
3018 # meta: file content starts with b'\1\n', the metadata
3019 # meta: file content starts with b'\1\n', the metadata
3019 # header defined in filelog.py, but without a rename
3020 # header defined in filelog.py, but without a rename
3020 # ext: content stored externally
3021 # ext: content stored externally
3021 #
3022 #
3022 # More formally, their differences are shown below:
3023 # More formally, their differences are shown below:
3023 #
3024 #
3024 # | common | rename | meta | ext
3025 # | common | rename | meta | ext
3025 # -------------------------------------------------------
3026 # -------------------------------------------------------
3026 # flags() | 0 | 0 | 0 | not 0
3027 # flags() | 0 | 0 | 0 | not 0
3027 # renamed() | False | True | False | ?
3028 # renamed() | False | True | False | ?
3028 # rawtext[0:2]=='\1\n'| False | True | True | ?
3029 # rawtext[0:2]=='\1\n'| False | True | True | ?
3029 #
3030 #
3030 # "rawtext" means the raw text stored in revlog data, which
3031 # "rawtext" means the raw text stored in revlog data, which
3031 # could be retrieved by "rawdata(rev)". "text"
3032 # could be retrieved by "rawdata(rev)". "text"
3032 # mentioned below is "revision(rev)".
3033 # mentioned below is "revision(rev)".
3033 #
3034 #
3034 # There are 3 different lengths stored physically:
3035 # There are 3 different lengths stored physically:
3035 # 1. L1: rawsize, stored in revlog index
3036 # 1. L1: rawsize, stored in revlog index
3036 # 2. L2: len(rawtext), stored in revlog data
3037 # 2. L2: len(rawtext), stored in revlog data
3037 # 3. L3: len(text), stored in revlog data if flags==0, or
3038 # 3. L3: len(text), stored in revlog data if flags==0, or
3038 # possibly somewhere else if flags!=0
3039 # possibly somewhere else if flags!=0
3039 #
3040 #
3040 # L1 should be equal to L2. L3 could be different from them.
3041 # L1 should be equal to L2. L3 could be different from them.
3041 # "text" may or may not affect commit hash depending on flag
3042 # "text" may or may not affect commit hash depending on flag
3042 # processors (see flagutil.addflagprocessor).
3043 # processors (see flagutil.addflagprocessor).
3043 #
3044 #
3044 # | common | rename | meta | ext
3045 # | common | rename | meta | ext
3045 # -------------------------------------------------
3046 # -------------------------------------------------
3046 # rawsize() | L1 | L1 | L1 | L1
3047 # rawsize() | L1 | L1 | L1 | L1
3047 # size() | L1 | L2-LM | L1(*) | L1 (?)
3048 # size() | L1 | L2-LM | L1(*) | L1 (?)
3048 # len(rawtext) | L2 | L2 | L2 | L2
3049 # len(rawtext) | L2 | L2 | L2 | L2
3049 # len(text) | L2 | L2 | L2 | L3
3050 # len(text) | L2 | L2 | L2 | L3
3050 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3051 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3051 #
3052 #
3052 # LM: length of metadata, depending on rawtext
3053 # LM: length of metadata, depending on rawtext
3053 # (*): not ideal, see comment in filelog.size
3054 # (*): not ideal, see comment in filelog.size
3054 # (?): could be "- len(meta)" if the resolved content has
3055 # (?): could be "- len(meta)" if the resolved content has
3055 # rename metadata
3056 # rename metadata
3056 #
3057 #
3057 # Checks needed to be done:
3058 # Checks needed to be done:
3058 # 1. length check: L1 == L2, in all cases.
3059 # 1. length check: L1 == L2, in all cases.
3059 # 2. hash check: depending on flag processor, we may need to
3060 # 2. hash check: depending on flag processor, we may need to
3060 # use either "text" (external), or "rawtext" (in revlog).
3061 # use either "text" (external), or "rawtext" (in revlog).
3061
3062
3062 try:
3063 try:
3063 skipflags = state.get(b'skipflags', 0)
3064 skipflags = state.get(b'skipflags', 0)
3064 if skipflags:
3065 if skipflags:
3065 skipflags &= self.flags(rev)
3066 skipflags &= self.flags(rev)
3066
3067
3067 _verify_revision(self, skipflags, state, node)
3068 _verify_revision(self, skipflags, state, node)
3068
3069
3069 l1 = self.rawsize(rev)
3070 l1 = self.rawsize(rev)
3070 l2 = len(self.rawdata(node))
3071 l2 = len(self.rawdata(node))
3071
3072
3072 if l1 != l2:
3073 if l1 != l2:
3073 yield revlogproblem(
3074 yield revlogproblem(
3074 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3075 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3075 node=node,
3076 node=node,
3076 )
3077 )
3077
3078
3078 except error.CensoredNodeError:
3079 except error.CensoredNodeError:
3079 if state[b'erroroncensored']:
3080 if state[b'erroroncensored']:
3080 yield revlogproblem(
3081 yield revlogproblem(
3081 error=_(b'censored file data'), node=node
3082 error=_(b'censored file data'), node=node
3082 )
3083 )
3083 state[b'skipread'].add(node)
3084 state[b'skipread'].add(node)
3084 except Exception as e:
3085 except Exception as e:
3085 yield revlogproblem(
3086 yield revlogproblem(
3086 error=_(b'unpacking %s: %s')
3087 error=_(b'unpacking %s: %s')
3087 % (short(node), stringutil.forcebytestr(e)),
3088 % (short(node), stringutil.forcebytestr(e)),
3088 node=node,
3089 node=node,
3089 )
3090 )
3090 state[b'skipread'].add(node)
3091 state[b'skipread'].add(node)
3091
3092
3092 def storageinfo(
3093 def storageinfo(
3093 self,
3094 self,
3094 exclusivefiles=False,
3095 exclusivefiles=False,
3095 sharedfiles=False,
3096 sharedfiles=False,
3096 revisionscount=False,
3097 revisionscount=False,
3097 trackedsize=False,
3098 trackedsize=False,
3098 storedsize=False,
3099 storedsize=False,
3099 ):
3100 ):
3100 d = {}
3101 d = {}
3101
3102
3102 if exclusivefiles:
3103 if exclusivefiles:
3103 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3104 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3104 if not self._inline:
3105 if not self._inline:
3105 d[b'exclusivefiles'].append((self.opener, self._datafile))
3106 d[b'exclusivefiles'].append((self.opener, self._datafile))
3106
3107
3107 if sharedfiles:
3108 if sharedfiles:
3108 d[b'sharedfiles'] = []
3109 d[b'sharedfiles'] = []
3109
3110
3110 if revisionscount:
3111 if revisionscount:
3111 d[b'revisionscount'] = len(self)
3112 d[b'revisionscount'] = len(self)
3112
3113
3113 if trackedsize:
3114 if trackedsize:
3114 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3115 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3115
3116
3116 if storedsize:
3117 if storedsize:
3117 d[b'storedsize'] = sum(
3118 d[b'storedsize'] = sum(
3118 self.opener.stat(path).st_size for path in self.files()
3119 self.opener.stat(path).st_size for path in self.files()
3119 )
3120 )
3120
3121
3121 return d
3122 return d
3122
3123
3123 def rewrite_sidedata(self, helpers, startrev, endrev):
3124 def rewrite_sidedata(self, helpers, startrev, endrev):
3124 if not self.hassidedata:
3125 if not self.hassidedata:
3125 return
3126 return
3126 # inline are not yet supported because they suffer from an issue when
3127 # inline are not yet supported because they suffer from an issue when
3127 # rewriting them (since it's not an append-only operation).
3128 # rewriting them (since it's not an append-only operation).
3128 # See issue6485.
3129 # See issue6485.
3129 assert not self._inline
3130 assert not self._inline
3130 if not helpers[1] and not helpers[2]:
3131 if not helpers[1] and not helpers[2]:
3131 # Nothing to generate or remove
3132 # Nothing to generate or remove
3132 return
3133 return
3133
3134
3134 # changelog implement some "delayed" writing mechanism that assume that
3135 # changelog implement some "delayed" writing mechanism that assume that
3135 # all index data is writen in append mode and is therefor incompatible
3136 # all index data is writen in append mode and is therefor incompatible
3136 # with the seeked write done in this method. The use of such "delayed"
3137 # with the seeked write done in this method. The use of such "delayed"
3137 # writing will soon be removed for revlog version that support side
3138 # writing will soon be removed for revlog version that support side
3138 # data, so for now, we only keep this simple assert to highlight the
3139 # data, so for now, we only keep this simple assert to highlight the
3139 # situation.
3140 # situation.
3140 delayed = getattr(self, '_delayed', False)
3141 delayed = getattr(self, '_delayed', False)
3141 diverted = getattr(self, '_divert', False)
3142 diverted = getattr(self, '_divert', False)
3142 if delayed and not diverted:
3143 if delayed and not diverted:
3143 msg = "cannot rewrite_sidedata of a delayed revlog"
3144 msg = "cannot rewrite_sidedata of a delayed revlog"
3144 raise error.ProgrammingError(msg)
3145 raise error.ProgrammingError(msg)
3145
3146
3146 new_entries = []
3147 new_entries = []
3147 # append the new sidedata
3148 # append the new sidedata
3148 with self._datafp(b'a+') as fp:
3149 with self._datafp(b'a+') as fp:
3149 # Maybe this bug still exists, see revlog._writeentry
3150 # Maybe this bug still exists, see revlog._writeentry
3150 fp.seek(0, os.SEEK_END)
3151 fp.seek(0, os.SEEK_END)
3151 current_offset = fp.tell()
3152 current_offset = fp.tell()
3152 for rev in range(startrev, endrev + 1):
3153 for rev in range(startrev, endrev + 1):
3153 entry = self.index[rev]
3154 entry = self.index[rev]
3154 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3155 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3155 store=self,
3156 store=self,
3156 sidedata_helpers=helpers,
3157 sidedata_helpers=helpers,
3157 sidedata={},
3158 sidedata={},
3158 rev=rev,
3159 rev=rev,
3159 )
3160 )
3160
3161
3161 serialized_sidedata = sidedatautil.serialize_sidedata(
3162 serialized_sidedata = sidedatautil.serialize_sidedata(
3162 new_sidedata
3163 new_sidedata
3163 )
3164 )
3164 if entry[8] != 0 or entry[9] != 0:
3165 if entry[8] != 0 or entry[9] != 0:
3165 # rewriting entries that already have sidedata is not
3166 # rewriting entries that already have sidedata is not
3166 # supported yet, because it introduces garbage data in the
3167 # supported yet, because it introduces garbage data in the
3167 # revlog.
3168 # revlog.
3168 msg = b"Rewriting existing sidedata is not supported yet"
3169 msg = b"Rewriting existing sidedata is not supported yet"
3169 raise error.Abort(msg)
3170 raise error.Abort(msg)
3170
3171
3171 # Apply (potential) flags to add and to remove after running
3172 # Apply (potential) flags to add and to remove after running
3172 # the sidedata helpers
3173 # the sidedata helpers
3173 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3174 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3174 entry = (new_offset_flags,) + entry[1:8]
3175 entry = (new_offset_flags,) + entry[1:8]
3175 entry += (current_offset, len(serialized_sidedata))
3176 entry += (current_offset, len(serialized_sidedata))
3176
3177
3177 fp.write(serialized_sidedata)
3178 fp.write(serialized_sidedata)
3178 new_entries.append(entry)
3179 new_entries.append(entry)
3179 current_offset += len(serialized_sidedata)
3180 current_offset += len(serialized_sidedata)
3180
3181
3181 # rewrite the new index entries
3182 # rewrite the new index entries
3182 with self._indexfp(b'r+') as fp:
3183 with self._indexfp(b'r+') as fp:
3183 fp.seek(startrev * self.index.entry_size)
3184 fp.seek(startrev * self.index.entry_size)
3184 for i, e in enumerate(new_entries):
3185 for i, e in enumerate(new_entries):
3185 rev = startrev + i
3186 rev = startrev + i
3186 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3187 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3187 packed = self.index.entry_binary(rev)
3188 packed = self.index.entry_binary(rev)
3188 if rev == 0:
3189 if rev == 0:
3189 header = self._format_flags | self._format_version
3190 header = self._format_flags | self._format_version
3190 header = self.index.pack_header(header)
3191 header = self.index.pack_header(header)
3191 packed = header + packed
3192 packed = header + packed
3192 fp.write(packed)
3193 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now