##// END OF EJS Templates
revlog: directly use self._format_flags when loading index...
marmoute -
r47945:58ef549a default
parent child Browse files
Show More
@@ -1,3177 +1,3180 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 self._indexfile = None
318 self._indexfile = None
319 self._datafile = None
319 self._datafile = None
320 self._nodemap_file = None
320 self._nodemap_file = None
321 self.postfix = postfix
321 self.postfix = postfix
322 self.opener = opener
322 self.opener = opener
323 if persistentnodemap:
323 if persistentnodemap:
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325
325
326 assert target[0] in ALL_KINDS
326 assert target[0] in ALL_KINDS
327 assert len(target) == 2
327 assert len(target) == 2
328 self.target = target
328 self.target = target
329 # When True, indexfile is opened with checkambig=True at writing, to
329 # When True, indexfile is opened with checkambig=True at writing, to
330 # avoid file stat ambiguity.
330 # avoid file stat ambiguity.
331 self._checkambig = checkambig
331 self._checkambig = checkambig
332 self._mmaplargeindex = mmaplargeindex
332 self._mmaplargeindex = mmaplargeindex
333 self._censorable = censorable
333 self._censorable = censorable
334 # 3-tuple of (node, rev, text) for a raw revision.
334 # 3-tuple of (node, rev, text) for a raw revision.
335 self._revisioncache = None
335 self._revisioncache = None
336 # Maps rev to chain base rev.
336 # Maps rev to chain base rev.
337 self._chainbasecache = util.lrucachedict(100)
337 self._chainbasecache = util.lrucachedict(100)
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 self._chunkcache = (0, b'')
339 self._chunkcache = (0, b'')
340 # How much data to read and cache into the raw revlog data cache.
340 # How much data to read and cache into the raw revlog data cache.
341 self._chunkcachesize = 65536
341 self._chunkcachesize = 65536
342 self._maxchainlen = None
342 self._maxchainlen = None
343 self._deltabothparents = True
343 self._deltabothparents = True
344 self.index = None
344 self.index = None
345 self._nodemap_docket = None
345 self._nodemap_docket = None
346 # Mapping of partial identifiers to full nodes.
346 # Mapping of partial identifiers to full nodes.
347 self._pcache = {}
347 self._pcache = {}
348 # Mapping of revision integer to full node.
348 # Mapping of revision integer to full node.
349 self._compengine = b'zlib'
349 self._compengine = b'zlib'
350 self._compengineopts = {}
350 self._compengineopts = {}
351 self._maxdeltachainspan = -1
351 self._maxdeltachainspan = -1
352 self._withsparseread = False
352 self._withsparseread = False
353 self._sparserevlog = False
353 self._sparserevlog = False
354 self._srdensitythreshold = 0.50
354 self._srdensitythreshold = 0.50
355 self._srmingapsize = 262144
355 self._srmingapsize = 262144
356
356
357 # Make copy of flag processors so each revlog instance can support
357 # Make copy of flag processors so each revlog instance can support
358 # custom flags.
358 # custom flags.
359 self._flagprocessors = dict(flagutil.flagprocessors)
359 self._flagprocessors = dict(flagutil.flagprocessors)
360
360
361 # 2-tuple of file handles being used for active writing.
361 # 2-tuple of file handles being used for active writing.
362 self._writinghandles = None
362 self._writinghandles = None
363
363
364 self._loadindex()
364 self._loadindex()
365
365
366 self._concurrencychecker = concurrencychecker
366 self._concurrencychecker = concurrencychecker
367
367
368 def _init_opts(self):
368 def _init_opts(self):
369 """process options (from above/config) to setup associated default revlog mode
369 """process options (from above/config) to setup associated default revlog mode
370
370
371 These values might be affected when actually reading on disk information.
371 These values might be affected when actually reading on disk information.
372
372
373 The relevant values are returned for use in _loadindex().
373 The relevant values are returned for use in _loadindex().
374
374
375 * newversionflags:
375 * newversionflags:
376 version header to use if we need to create a new revlog
376 version header to use if we need to create a new revlog
377
377
378 * mmapindexthreshold:
378 * mmapindexthreshold:
379 minimal index size for start to use mmap
379 minimal index size for start to use mmap
380
380
381 * force_nodemap:
381 * force_nodemap:
382 force the usage of a "development" version of the nodemap code
382 force the usage of a "development" version of the nodemap code
383 """
383 """
384 mmapindexthreshold = None
384 mmapindexthreshold = None
385 opts = self.opener.options
385 opts = self.opener.options
386
386
387 if b'revlogv2' in opts:
387 if b'revlogv2' in opts:
388 new_header = REVLOGV2 | FLAG_INLINE_DATA
388 new_header = REVLOGV2 | FLAG_INLINE_DATA
389 elif b'revlogv1' in opts:
389 elif b'revlogv1' in opts:
390 new_header = REVLOGV1 | FLAG_INLINE_DATA
390 new_header = REVLOGV1 | FLAG_INLINE_DATA
391 if b'generaldelta' in opts:
391 if b'generaldelta' in opts:
392 new_header |= FLAG_GENERALDELTA
392 new_header |= FLAG_GENERALDELTA
393 elif b'revlogv0' in self.opener.options:
393 elif b'revlogv0' in self.opener.options:
394 new_header = REVLOGV0
394 new_header = REVLOGV0
395 else:
395 else:
396 new_header = REVLOG_DEFAULT_VERSION
396 new_header = REVLOG_DEFAULT_VERSION
397
397
398 if b'chunkcachesize' in opts:
398 if b'chunkcachesize' in opts:
399 self._chunkcachesize = opts[b'chunkcachesize']
399 self._chunkcachesize = opts[b'chunkcachesize']
400 if b'maxchainlen' in opts:
400 if b'maxchainlen' in opts:
401 self._maxchainlen = opts[b'maxchainlen']
401 self._maxchainlen = opts[b'maxchainlen']
402 if b'deltabothparents' in opts:
402 if b'deltabothparents' in opts:
403 self._deltabothparents = opts[b'deltabothparents']
403 self._deltabothparents = opts[b'deltabothparents']
404 self._lazydelta = bool(opts.get(b'lazydelta', True))
404 self._lazydelta = bool(opts.get(b'lazydelta', True))
405 self._lazydeltabase = False
405 self._lazydeltabase = False
406 if self._lazydelta:
406 if self._lazydelta:
407 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
407 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
408 if b'compengine' in opts:
408 if b'compengine' in opts:
409 self._compengine = opts[b'compengine']
409 self._compengine = opts[b'compengine']
410 if b'zlib.level' in opts:
410 if b'zlib.level' in opts:
411 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
411 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
412 if b'zstd.level' in opts:
412 if b'zstd.level' in opts:
413 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
413 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
414 if b'maxdeltachainspan' in opts:
414 if b'maxdeltachainspan' in opts:
415 self._maxdeltachainspan = opts[b'maxdeltachainspan']
415 self._maxdeltachainspan = opts[b'maxdeltachainspan']
416 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
416 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
417 mmapindexthreshold = opts[b'mmapindexthreshold']
417 mmapindexthreshold = opts[b'mmapindexthreshold']
418 self.hassidedata = bool(opts.get(b'side-data', False))
418 self.hassidedata = bool(opts.get(b'side-data', False))
419 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
419 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
420 withsparseread = bool(opts.get(b'with-sparse-read', False))
420 withsparseread = bool(opts.get(b'with-sparse-read', False))
421 # sparse-revlog forces sparse-read
421 # sparse-revlog forces sparse-read
422 self._withsparseread = self._sparserevlog or withsparseread
422 self._withsparseread = self._sparserevlog or withsparseread
423 if b'sparse-read-density-threshold' in opts:
423 if b'sparse-read-density-threshold' in opts:
424 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
424 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
425 if b'sparse-read-min-gap-size' in opts:
425 if b'sparse-read-min-gap-size' in opts:
426 self._srmingapsize = opts[b'sparse-read-min-gap-size']
426 self._srmingapsize = opts[b'sparse-read-min-gap-size']
427 if opts.get(b'enableellipsis'):
427 if opts.get(b'enableellipsis'):
428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
429
429
430 # revlog v0 doesn't have flag processors
430 # revlog v0 doesn't have flag processors
431 for flag, processor in pycompat.iteritems(
431 for flag, processor in pycompat.iteritems(
432 opts.get(b'flagprocessors', {})
432 opts.get(b'flagprocessors', {})
433 ):
433 ):
434 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
434 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
435
435
436 if self._chunkcachesize <= 0:
436 if self._chunkcachesize <= 0:
437 raise error.RevlogError(
437 raise error.RevlogError(
438 _(b'revlog chunk cache size %r is not greater than 0')
438 _(b'revlog chunk cache size %r is not greater than 0')
439 % self._chunkcachesize
439 % self._chunkcachesize
440 )
440 )
441 elif self._chunkcachesize & (self._chunkcachesize - 1):
441 elif self._chunkcachesize & (self._chunkcachesize - 1):
442 raise error.RevlogError(
442 raise error.RevlogError(
443 _(b'revlog chunk cache size %r is not a power of 2')
443 _(b'revlog chunk cache size %r is not a power of 2')
444 % self._chunkcachesize
444 % self._chunkcachesize
445 )
445 )
446 force_nodemap = opts.get(b'devel-force-nodemap', False)
446 force_nodemap = opts.get(b'devel-force-nodemap', False)
447 return new_header, mmapindexthreshold, force_nodemap
447 return new_header, mmapindexthreshold, force_nodemap
448
448
449 def _get_data(self, filepath, mmap_threshold):
449 def _get_data(self, filepath, mmap_threshold):
450 """return a file content with or without mmap
450 """return a file content with or without mmap
451
451
452 If the file is missing return the empty string"""
452 If the file is missing return the empty string"""
453 try:
453 try:
454 with self.opener(filepath) as fp:
454 with self.opener(filepath) as fp:
455 if mmap_threshold is not None:
455 if mmap_threshold is not None:
456 file_size = self.opener.fstat(fp).st_size
456 file_size = self.opener.fstat(fp).st_size
457 if file_size >= mmap_threshold:
457 if file_size >= mmap_threshold:
458 # TODO: should .close() to release resources without
458 # TODO: should .close() to release resources without
459 # relying on Python GC
459 # relying on Python GC
460 return util.buffer(util.mmapread(fp))
460 return util.buffer(util.mmapread(fp))
461 return fp.read()
461 return fp.read()
462 except IOError as inst:
462 except IOError as inst:
463 if inst.errno != errno.ENOENT:
463 if inst.errno != errno.ENOENT:
464 raise
464 raise
465 return b''
465 return b''
466
466
467 def _loadindex(self):
467 def _loadindex(self):
468
468
469 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
469 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
470
470
471 if self.postfix is None:
471 if self.postfix is None:
472 index_file = b'%s.i' % self.radix
472 index_file = b'%s.i' % self.radix
473 data_file = b'%s.d' % self.radix
473 data_file = b'%s.d' % self.radix
474 elif self.postfix == b'a':
474 elif self.postfix == b'a':
475 index_file = b'%s.i.a' % self.radix
475 index_file = b'%s.i.a' % self.radix
476 data_file = b'%s.d' % self.radix
476 data_file = b'%s.d' % self.radix
477 else:
477 else:
478 index_file = b'%s.i.%s' % (self.radix, self.postfix)
478 index_file = b'%s.i.%s' % (self.radix, self.postfix)
479 data_file = b'%s.d.%s' % (self.radix, self.postfix)
479 data_file = b'%s.d.%s' % (self.radix, self.postfix)
480
480
481 self._indexfile = index_file
481 self._indexfile = index_file
482 self._datafile = data_file
482 self._datafile = data_file
483
483
484 indexdata = b''
484 indexdata = b''
485 self._initempty = True
485 self._initempty = True
486 indexdata = self._get_data(self._indexfile, mmapindexthreshold)
486 indexdata = self._get_data(self._indexfile, mmapindexthreshold)
487 if len(indexdata) > 0:
487 if len(indexdata) > 0:
488 header = INDEX_HEADER.unpack(indexdata[:4])[0]
488 header = INDEX_HEADER.unpack(indexdata[:4])[0]
489 self._initempty = False
489 self._initempty = False
490 else:
490 else:
491 header = new_header
491 header = new_header
492
492
493 flags = self._format_flags = header & ~0xFFFF
493 self._format_flags = header & ~0xFFFF
494 self._format_version = header & 0xFFFF
494 self._format_version = header & 0xFFFF
495
495
496 if self._format_version == REVLOGV0:
496 if self._format_version == REVLOGV0:
497 if flags:
497 if self._format_flags:
498 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
498 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
499 msg %= (flags >> 16, self._format_version, self.display_id)
499 display_flag = self._format_flags >> 16
500 msg %= (display_flag, self._format_version, self.display_id)
500 raise error.RevlogError(msg)
501 raise error.RevlogError(msg)
501
502
502 self._inline = False
503 self._inline = False
503 self._generaldelta = False
504 self._generaldelta = False
504
505
505 elif self._format_version == REVLOGV1:
506 elif self._format_version == REVLOGV1:
506 if flags & ~REVLOGV1_FLAGS:
507 if self._format_flags & ~REVLOGV1_FLAGS:
507 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
508 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
508 msg %= (flags >> 16, self._format_version, self.display_id)
509 display_flag = self._format_flags >> 16
510 msg %= (display_flag, self._format_version, self.display_id)
509 raise error.RevlogError(msg)
511 raise error.RevlogError(msg)
510
512
511 self._inline = self._format_flags & FLAG_INLINE_DATA
513 self._inline = self._format_flags & FLAG_INLINE_DATA
512 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
514 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
513
515
514 elif self._format_version == REVLOGV2:
516 elif self._format_version == REVLOGV2:
515 if flags & ~REVLOGV2_FLAGS:
517 if self._format_flags & ~REVLOGV2_FLAGS:
516 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
518 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
517 msg %= (flags >> 16, self._format_version, self.display_id)
519 display_flag = self._format_flags >> 16
520 msg %= (display_flag, self._format_version, self.display_id)
518 raise error.RevlogError(msg)
521 raise error.RevlogError(msg)
519
522
520 # There is a bug in the transaction handling when going from an
523 # There is a bug in the transaction handling when going from an
521 # inline revlog to a separate index and data file. Turn it off until
524 # inline revlog to a separate index and data file. Turn it off until
522 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
525 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
523 # See issue6485
526 # See issue6485
524 self._inline = False
527 self._inline = False
525 # generaldelta implied by version 2 revlogs.
528 # generaldelta implied by version 2 revlogs.
526 self._generaldelta = True
529 self._generaldelta = True
527
530
528 else:
531 else:
529 msg = _(b'unknown version (%d) in revlog %s')
532 msg = _(b'unknown version (%d) in revlog %s')
530 msg %= (self._format_version, self.display_id)
533 msg %= (self._format_version, self.display_id)
531 raise error.RevlogError(msg)
534 raise error.RevlogError(msg)
532
535
533 self.nodeconstants = sha1nodeconstants
536 self.nodeconstants = sha1nodeconstants
534 self.nullid = self.nodeconstants.nullid
537 self.nullid = self.nodeconstants.nullid
535
538
536 # sparse-revlog can't be on without general-delta (issue6056)
539 # sparse-revlog can't be on without general-delta (issue6056)
537 if not self._generaldelta:
540 if not self._generaldelta:
538 self._sparserevlog = False
541 self._sparserevlog = False
539
542
540 self._storedeltachains = True
543 self._storedeltachains = True
541
544
542 devel_nodemap = (
545 devel_nodemap = (
543 self._nodemap_file
546 self._nodemap_file
544 and force_nodemap
547 and force_nodemap
545 and parse_index_v1_nodemap is not None
548 and parse_index_v1_nodemap is not None
546 )
549 )
547
550
548 use_rust_index = False
551 use_rust_index = False
549 if rustrevlog is not None:
552 if rustrevlog is not None:
550 if self._nodemap_file is not None:
553 if self._nodemap_file is not None:
551 use_rust_index = True
554 use_rust_index = True
552 else:
555 else:
553 use_rust_index = self.opener.options.get(b'rust.index')
556 use_rust_index = self.opener.options.get(b'rust.index')
554
557
555 self._parse_index = parse_index_v1
558 self._parse_index = parse_index_v1
556 if self._format_version == REVLOGV0:
559 if self._format_version == REVLOGV0:
557 self._parse_index = revlogv0.parse_index_v0
560 self._parse_index = revlogv0.parse_index_v0
558 elif self._format_version == REVLOGV2:
561 elif self._format_version == REVLOGV2:
559 self._parse_index = parse_index_v2
562 self._parse_index = parse_index_v2
560 elif devel_nodemap:
563 elif devel_nodemap:
561 self._parse_index = parse_index_v1_nodemap
564 self._parse_index = parse_index_v1_nodemap
562 elif use_rust_index:
565 elif use_rust_index:
563 self._parse_index = parse_index_v1_mixed
566 self._parse_index = parse_index_v1_mixed
564 try:
567 try:
565 d = self._parse_index(indexdata, self._inline)
568 d = self._parse_index(indexdata, self._inline)
566 index, _chunkcache = d
569 index, _chunkcache = d
567 use_nodemap = (
570 use_nodemap = (
568 not self._inline
571 not self._inline
569 and self._nodemap_file is not None
572 and self._nodemap_file is not None
570 and util.safehasattr(index, 'update_nodemap_data')
573 and util.safehasattr(index, 'update_nodemap_data')
571 )
574 )
572 if use_nodemap:
575 if use_nodemap:
573 nodemap_data = nodemaputil.persisted_data(self)
576 nodemap_data = nodemaputil.persisted_data(self)
574 if nodemap_data is not None:
577 if nodemap_data is not None:
575 docket = nodemap_data[0]
578 docket = nodemap_data[0]
576 if (
579 if (
577 len(d[0]) > docket.tip_rev
580 len(d[0]) > docket.tip_rev
578 and d[0][docket.tip_rev][7] == docket.tip_node
581 and d[0][docket.tip_rev][7] == docket.tip_node
579 ):
582 ):
580 # no changelog tampering
583 # no changelog tampering
581 self._nodemap_docket = docket
584 self._nodemap_docket = docket
582 index.update_nodemap_data(*nodemap_data)
585 index.update_nodemap_data(*nodemap_data)
583 except (ValueError, IndexError):
586 except (ValueError, IndexError):
584 raise error.RevlogError(
587 raise error.RevlogError(
585 _(b"index %s is corrupted") % self.display_id
588 _(b"index %s is corrupted") % self.display_id
586 )
589 )
587 self.index, self._chunkcache = d
590 self.index, self._chunkcache = d
588 if not self._chunkcache:
591 if not self._chunkcache:
589 self._chunkclear()
592 self._chunkclear()
590 # revnum -> (chain-length, sum-delta-length)
593 # revnum -> (chain-length, sum-delta-length)
591 self._chaininfocache = util.lrucachedict(500)
594 self._chaininfocache = util.lrucachedict(500)
592 # revlog header -> revlog compressor
595 # revlog header -> revlog compressor
593 self._decompressors = {}
596 self._decompressors = {}
594
597
595 @util.propertycache
598 @util.propertycache
596 def revlog_kind(self):
599 def revlog_kind(self):
597 return self.target[0]
600 return self.target[0]
598
601
599 @util.propertycache
602 @util.propertycache
600 def display_id(self):
603 def display_id(self):
601 """The public facing "ID" of the revlog that we use in message"""
604 """The public facing "ID" of the revlog that we use in message"""
602 # Maybe we should build a user facing representation of
605 # Maybe we should build a user facing representation of
603 # revlog.target instead of using `self.radix`
606 # revlog.target instead of using `self.radix`
604 return self.radix
607 return self.radix
605
608
606 @util.propertycache
609 @util.propertycache
607 def _compressor(self):
610 def _compressor(self):
608 engine = util.compengines[self._compengine]
611 engine = util.compengines[self._compengine]
609 return engine.revlogcompressor(self._compengineopts)
612 return engine.revlogcompressor(self._compengineopts)
610
613
611 def _indexfp(self, mode=b'r'):
614 def _indexfp(self, mode=b'r'):
612 """file object for the revlog's index file"""
615 """file object for the revlog's index file"""
613 args = {'mode': mode}
616 args = {'mode': mode}
614 if mode != b'r':
617 if mode != b'r':
615 args['checkambig'] = self._checkambig
618 args['checkambig'] = self._checkambig
616 if mode == b'w':
619 if mode == b'w':
617 args['atomictemp'] = True
620 args['atomictemp'] = True
618 return self.opener(self._indexfile, **args)
621 return self.opener(self._indexfile, **args)
619
622
620 def _datafp(self, mode=b'r'):
623 def _datafp(self, mode=b'r'):
621 """file object for the revlog's data file"""
624 """file object for the revlog's data file"""
622 return self.opener(self._datafile, mode=mode)
625 return self.opener(self._datafile, mode=mode)
623
626
624 @contextlib.contextmanager
627 @contextlib.contextmanager
625 def _datareadfp(self, existingfp=None):
628 def _datareadfp(self, existingfp=None):
626 """file object suitable to read data"""
629 """file object suitable to read data"""
627 # Use explicit file handle, if given.
630 # Use explicit file handle, if given.
628 if existingfp is not None:
631 if existingfp is not None:
629 yield existingfp
632 yield existingfp
630
633
631 # Use a file handle being actively used for writes, if available.
634 # Use a file handle being actively used for writes, if available.
632 # There is some danger to doing this because reads will seek the
635 # There is some danger to doing this because reads will seek the
633 # file. However, _writeentry() performs a SEEK_END before all writes,
636 # file. However, _writeentry() performs a SEEK_END before all writes,
634 # so we should be safe.
637 # so we should be safe.
635 elif self._writinghandles:
638 elif self._writinghandles:
636 if self._inline:
639 if self._inline:
637 yield self._writinghandles[0]
640 yield self._writinghandles[0]
638 else:
641 else:
639 yield self._writinghandles[1]
642 yield self._writinghandles[1]
640
643
641 # Otherwise open a new file handle.
644 # Otherwise open a new file handle.
642 else:
645 else:
643 if self._inline:
646 if self._inline:
644 func = self._indexfp
647 func = self._indexfp
645 else:
648 else:
646 func = self._datafp
649 func = self._datafp
647 with func() as fp:
650 with func() as fp:
648 yield fp
651 yield fp
649
652
650 def tiprev(self):
653 def tiprev(self):
651 return len(self.index) - 1
654 return len(self.index) - 1
652
655
653 def tip(self):
656 def tip(self):
654 return self.node(self.tiprev())
657 return self.node(self.tiprev())
655
658
656 def __contains__(self, rev):
659 def __contains__(self, rev):
657 return 0 <= rev < len(self)
660 return 0 <= rev < len(self)
658
661
659 def __len__(self):
662 def __len__(self):
660 return len(self.index)
663 return len(self.index)
661
664
662 def __iter__(self):
665 def __iter__(self):
663 return iter(pycompat.xrange(len(self)))
666 return iter(pycompat.xrange(len(self)))
664
667
665 def revs(self, start=0, stop=None):
668 def revs(self, start=0, stop=None):
666 """iterate over all rev in this revlog (from start to stop)"""
669 """iterate over all rev in this revlog (from start to stop)"""
667 return storageutil.iterrevs(len(self), start=start, stop=stop)
670 return storageutil.iterrevs(len(self), start=start, stop=stop)
668
671
669 @property
672 @property
670 def nodemap(self):
673 def nodemap(self):
671 msg = (
674 msg = (
672 b"revlog.nodemap is deprecated, "
675 b"revlog.nodemap is deprecated, "
673 b"use revlog.index.[has_node|rev|get_rev]"
676 b"use revlog.index.[has_node|rev|get_rev]"
674 )
677 )
675 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
678 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
676 return self.index.nodemap
679 return self.index.nodemap
677
680
678 @property
681 @property
679 def _nodecache(self):
682 def _nodecache(self):
680 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
683 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
681 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
684 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
682 return self.index.nodemap
685 return self.index.nodemap
683
686
684 def hasnode(self, node):
687 def hasnode(self, node):
685 try:
688 try:
686 self.rev(node)
689 self.rev(node)
687 return True
690 return True
688 except KeyError:
691 except KeyError:
689 return False
692 return False
690
693
691 def candelta(self, baserev, rev):
694 def candelta(self, baserev, rev):
692 """whether two revisions (baserev, rev) can be delta-ed or not"""
695 """whether two revisions (baserev, rev) can be delta-ed or not"""
693 # Disable delta if either rev requires a content-changing flag
696 # Disable delta if either rev requires a content-changing flag
694 # processor (ex. LFS). This is because such flag processor can alter
697 # processor (ex. LFS). This is because such flag processor can alter
695 # the rawtext content that the delta will be based on, and two clients
698 # the rawtext content that the delta will be based on, and two clients
696 # could have a same revlog node with different flags (i.e. different
699 # could have a same revlog node with different flags (i.e. different
697 # rawtext contents) and the delta could be incompatible.
700 # rawtext contents) and the delta could be incompatible.
698 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
701 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
699 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
702 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
700 ):
703 ):
701 return False
704 return False
702 return True
705 return True
703
706
704 def update_caches(self, transaction):
707 def update_caches(self, transaction):
705 if self._nodemap_file is not None:
708 if self._nodemap_file is not None:
706 if transaction is None:
709 if transaction is None:
707 nodemaputil.update_persistent_nodemap(self)
710 nodemaputil.update_persistent_nodemap(self)
708 else:
711 else:
709 nodemaputil.setup_persistent_nodemap(transaction, self)
712 nodemaputil.setup_persistent_nodemap(transaction, self)
710
713
711 def clearcaches(self):
714 def clearcaches(self):
712 self._revisioncache = None
715 self._revisioncache = None
713 self._chainbasecache.clear()
716 self._chainbasecache.clear()
714 self._chunkcache = (0, b'')
717 self._chunkcache = (0, b'')
715 self._pcache = {}
718 self._pcache = {}
716 self._nodemap_docket = None
719 self._nodemap_docket = None
717 self.index.clearcaches()
720 self.index.clearcaches()
718 # The python code is the one responsible for validating the docket, we
721 # The python code is the one responsible for validating the docket, we
719 # end up having to refresh it here.
722 # end up having to refresh it here.
720 use_nodemap = (
723 use_nodemap = (
721 not self._inline
724 not self._inline
722 and self._nodemap_file is not None
725 and self._nodemap_file is not None
723 and util.safehasattr(self.index, 'update_nodemap_data')
726 and util.safehasattr(self.index, 'update_nodemap_data')
724 )
727 )
725 if use_nodemap:
728 if use_nodemap:
726 nodemap_data = nodemaputil.persisted_data(self)
729 nodemap_data = nodemaputil.persisted_data(self)
727 if nodemap_data is not None:
730 if nodemap_data is not None:
728 self._nodemap_docket = nodemap_data[0]
731 self._nodemap_docket = nodemap_data[0]
729 self.index.update_nodemap_data(*nodemap_data)
732 self.index.update_nodemap_data(*nodemap_data)
730
733
731 def rev(self, node):
734 def rev(self, node):
732 try:
735 try:
733 return self.index.rev(node)
736 return self.index.rev(node)
734 except TypeError:
737 except TypeError:
735 raise
738 raise
736 except error.RevlogError:
739 except error.RevlogError:
737 # parsers.c radix tree lookup failed
740 # parsers.c radix tree lookup failed
738 if (
741 if (
739 node == self.nodeconstants.wdirid
742 node == self.nodeconstants.wdirid
740 or node in self.nodeconstants.wdirfilenodeids
743 or node in self.nodeconstants.wdirfilenodeids
741 ):
744 ):
742 raise error.WdirUnsupported
745 raise error.WdirUnsupported
743 raise error.LookupError(node, self.display_id, _(b'no node'))
746 raise error.LookupError(node, self.display_id, _(b'no node'))
744
747
745 # Accessors for index entries.
748 # Accessors for index entries.
746
749
747 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
750 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
748 # are flags.
751 # are flags.
749 def start(self, rev):
752 def start(self, rev):
750 return int(self.index[rev][0] >> 16)
753 return int(self.index[rev][0] >> 16)
751
754
752 def flags(self, rev):
755 def flags(self, rev):
753 return self.index[rev][0] & 0xFFFF
756 return self.index[rev][0] & 0xFFFF
754
757
755 def length(self, rev):
758 def length(self, rev):
756 return self.index[rev][1]
759 return self.index[rev][1]
757
760
758 def sidedata_length(self, rev):
761 def sidedata_length(self, rev):
759 if not self.hassidedata:
762 if not self.hassidedata:
760 return 0
763 return 0
761 return self.index[rev][9]
764 return self.index[rev][9]
762
765
763 def rawsize(self, rev):
766 def rawsize(self, rev):
764 """return the length of the uncompressed text for a given revision"""
767 """return the length of the uncompressed text for a given revision"""
765 l = self.index[rev][2]
768 l = self.index[rev][2]
766 if l >= 0:
769 if l >= 0:
767 return l
770 return l
768
771
769 t = self.rawdata(rev)
772 t = self.rawdata(rev)
770 return len(t)
773 return len(t)
771
774
772 def size(self, rev):
775 def size(self, rev):
773 """length of non-raw text (processed by a "read" flag processor)"""
776 """length of non-raw text (processed by a "read" flag processor)"""
774 # fast path: if no "read" flag processor could change the content,
777 # fast path: if no "read" flag processor could change the content,
775 # size is rawsize. note: ELLIPSIS is known to not change the content.
778 # size is rawsize. note: ELLIPSIS is known to not change the content.
776 flags = self.flags(rev)
779 flags = self.flags(rev)
777 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
780 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
778 return self.rawsize(rev)
781 return self.rawsize(rev)
779
782
780 return len(self.revision(rev, raw=False))
783 return len(self.revision(rev, raw=False))
781
784
782 def chainbase(self, rev):
785 def chainbase(self, rev):
783 base = self._chainbasecache.get(rev)
786 base = self._chainbasecache.get(rev)
784 if base is not None:
787 if base is not None:
785 return base
788 return base
786
789
787 index = self.index
790 index = self.index
788 iterrev = rev
791 iterrev = rev
789 base = index[iterrev][3]
792 base = index[iterrev][3]
790 while base != iterrev:
793 while base != iterrev:
791 iterrev = base
794 iterrev = base
792 base = index[iterrev][3]
795 base = index[iterrev][3]
793
796
794 self._chainbasecache[rev] = base
797 self._chainbasecache[rev] = base
795 return base
798 return base
796
799
797 def linkrev(self, rev):
800 def linkrev(self, rev):
798 return self.index[rev][4]
801 return self.index[rev][4]
799
802
800 def parentrevs(self, rev):
803 def parentrevs(self, rev):
801 try:
804 try:
802 entry = self.index[rev]
805 entry = self.index[rev]
803 except IndexError:
806 except IndexError:
804 if rev == wdirrev:
807 if rev == wdirrev:
805 raise error.WdirUnsupported
808 raise error.WdirUnsupported
806 raise
809 raise
807 if entry[5] == nullrev:
810 if entry[5] == nullrev:
808 return entry[6], entry[5]
811 return entry[6], entry[5]
809 else:
812 else:
810 return entry[5], entry[6]
813 return entry[5], entry[6]
811
814
812 # fast parentrevs(rev) where rev isn't filtered
815 # fast parentrevs(rev) where rev isn't filtered
813 _uncheckedparentrevs = parentrevs
816 _uncheckedparentrevs = parentrevs
814
817
815 def node(self, rev):
818 def node(self, rev):
816 try:
819 try:
817 return self.index[rev][7]
820 return self.index[rev][7]
818 except IndexError:
821 except IndexError:
819 if rev == wdirrev:
822 if rev == wdirrev:
820 raise error.WdirUnsupported
823 raise error.WdirUnsupported
821 raise
824 raise
822
825
823 # Derived from index values.
826 # Derived from index values.
824
827
825 def end(self, rev):
828 def end(self, rev):
826 return self.start(rev) + self.length(rev)
829 return self.start(rev) + self.length(rev)
827
830
828 def parents(self, node):
831 def parents(self, node):
829 i = self.index
832 i = self.index
830 d = i[self.rev(node)]
833 d = i[self.rev(node)]
831 # inline node() to avoid function call overhead
834 # inline node() to avoid function call overhead
832 if d[5] == self.nullid:
835 if d[5] == self.nullid:
833 return i[d[6]][7], i[d[5]][7]
836 return i[d[6]][7], i[d[5]][7]
834 else:
837 else:
835 return i[d[5]][7], i[d[6]][7]
838 return i[d[5]][7], i[d[6]][7]
836
839
837 def chainlen(self, rev):
840 def chainlen(self, rev):
838 return self._chaininfo(rev)[0]
841 return self._chaininfo(rev)[0]
839
842
840 def _chaininfo(self, rev):
843 def _chaininfo(self, rev):
841 chaininfocache = self._chaininfocache
844 chaininfocache = self._chaininfocache
842 if rev in chaininfocache:
845 if rev in chaininfocache:
843 return chaininfocache[rev]
846 return chaininfocache[rev]
844 index = self.index
847 index = self.index
845 generaldelta = self._generaldelta
848 generaldelta = self._generaldelta
846 iterrev = rev
849 iterrev = rev
847 e = index[iterrev]
850 e = index[iterrev]
848 clen = 0
851 clen = 0
849 compresseddeltalen = 0
852 compresseddeltalen = 0
850 while iterrev != e[3]:
853 while iterrev != e[3]:
851 clen += 1
854 clen += 1
852 compresseddeltalen += e[1]
855 compresseddeltalen += e[1]
853 if generaldelta:
856 if generaldelta:
854 iterrev = e[3]
857 iterrev = e[3]
855 else:
858 else:
856 iterrev -= 1
859 iterrev -= 1
857 if iterrev in chaininfocache:
860 if iterrev in chaininfocache:
858 t = chaininfocache[iterrev]
861 t = chaininfocache[iterrev]
859 clen += t[0]
862 clen += t[0]
860 compresseddeltalen += t[1]
863 compresseddeltalen += t[1]
861 break
864 break
862 e = index[iterrev]
865 e = index[iterrev]
863 else:
866 else:
864 # Add text length of base since decompressing that also takes
867 # Add text length of base since decompressing that also takes
865 # work. For cache hits the length is already included.
868 # work. For cache hits the length is already included.
866 compresseddeltalen += e[1]
869 compresseddeltalen += e[1]
867 r = (clen, compresseddeltalen)
870 r = (clen, compresseddeltalen)
868 chaininfocache[rev] = r
871 chaininfocache[rev] = r
869 return r
872 return r
870
873
871 def _deltachain(self, rev, stoprev=None):
874 def _deltachain(self, rev, stoprev=None):
872 """Obtain the delta chain for a revision.
875 """Obtain the delta chain for a revision.
873
876
874 ``stoprev`` specifies a revision to stop at. If not specified, we
877 ``stoprev`` specifies a revision to stop at. If not specified, we
875 stop at the base of the chain.
878 stop at the base of the chain.
876
879
877 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
880 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
878 revs in ascending order and ``stopped`` is a bool indicating whether
881 revs in ascending order and ``stopped`` is a bool indicating whether
879 ``stoprev`` was hit.
882 ``stoprev`` was hit.
880 """
883 """
881 # Try C implementation.
884 # Try C implementation.
882 try:
885 try:
883 return self.index.deltachain(rev, stoprev, self._generaldelta)
886 return self.index.deltachain(rev, stoprev, self._generaldelta)
884 except AttributeError:
887 except AttributeError:
885 pass
888 pass
886
889
887 chain = []
890 chain = []
888
891
889 # Alias to prevent attribute lookup in tight loop.
892 # Alias to prevent attribute lookup in tight loop.
890 index = self.index
893 index = self.index
891 generaldelta = self._generaldelta
894 generaldelta = self._generaldelta
892
895
893 iterrev = rev
896 iterrev = rev
894 e = index[iterrev]
897 e = index[iterrev]
895 while iterrev != e[3] and iterrev != stoprev:
898 while iterrev != e[3] and iterrev != stoprev:
896 chain.append(iterrev)
899 chain.append(iterrev)
897 if generaldelta:
900 if generaldelta:
898 iterrev = e[3]
901 iterrev = e[3]
899 else:
902 else:
900 iterrev -= 1
903 iterrev -= 1
901 e = index[iterrev]
904 e = index[iterrev]
902
905
903 if iterrev == stoprev:
906 if iterrev == stoprev:
904 stopped = True
907 stopped = True
905 else:
908 else:
906 chain.append(iterrev)
909 chain.append(iterrev)
907 stopped = False
910 stopped = False
908
911
909 chain.reverse()
912 chain.reverse()
910 return chain, stopped
913 return chain, stopped
911
914
912 def ancestors(self, revs, stoprev=0, inclusive=False):
915 def ancestors(self, revs, stoprev=0, inclusive=False):
913 """Generate the ancestors of 'revs' in reverse revision order.
916 """Generate the ancestors of 'revs' in reverse revision order.
914 Does not generate revs lower than stoprev.
917 Does not generate revs lower than stoprev.
915
918
916 See the documentation for ancestor.lazyancestors for more details."""
919 See the documentation for ancestor.lazyancestors for more details."""
917
920
918 # first, make sure start revisions aren't filtered
921 # first, make sure start revisions aren't filtered
919 revs = list(revs)
922 revs = list(revs)
920 checkrev = self.node
923 checkrev = self.node
921 for r in revs:
924 for r in revs:
922 checkrev(r)
925 checkrev(r)
923 # and we're sure ancestors aren't filtered as well
926 # and we're sure ancestors aren't filtered as well
924
927
925 if rustancestor is not None:
928 if rustancestor is not None:
926 lazyancestors = rustancestor.LazyAncestors
929 lazyancestors = rustancestor.LazyAncestors
927 arg = self.index
930 arg = self.index
928 else:
931 else:
929 lazyancestors = ancestor.lazyancestors
932 lazyancestors = ancestor.lazyancestors
930 arg = self._uncheckedparentrevs
933 arg = self._uncheckedparentrevs
931 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
934 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
932
935
933 def descendants(self, revs):
936 def descendants(self, revs):
934 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
937 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
935
938
936 def findcommonmissing(self, common=None, heads=None):
939 def findcommonmissing(self, common=None, heads=None):
937 """Return a tuple of the ancestors of common and the ancestors of heads
940 """Return a tuple of the ancestors of common and the ancestors of heads
938 that are not ancestors of common. In revset terminology, we return the
941 that are not ancestors of common. In revset terminology, we return the
939 tuple:
942 tuple:
940
943
941 ::common, (::heads) - (::common)
944 ::common, (::heads) - (::common)
942
945
943 The list is sorted by revision number, meaning it is
946 The list is sorted by revision number, meaning it is
944 topologically sorted.
947 topologically sorted.
945
948
946 'heads' and 'common' are both lists of node IDs. If heads is
949 'heads' and 'common' are both lists of node IDs. If heads is
947 not supplied, uses all of the revlog's heads. If common is not
950 not supplied, uses all of the revlog's heads. If common is not
948 supplied, uses nullid."""
951 supplied, uses nullid."""
949 if common is None:
952 if common is None:
950 common = [self.nullid]
953 common = [self.nullid]
951 if heads is None:
954 if heads is None:
952 heads = self.heads()
955 heads = self.heads()
953
956
954 common = [self.rev(n) for n in common]
957 common = [self.rev(n) for n in common]
955 heads = [self.rev(n) for n in heads]
958 heads = [self.rev(n) for n in heads]
956
959
957 # we want the ancestors, but inclusive
960 # we want the ancestors, but inclusive
958 class lazyset(object):
961 class lazyset(object):
959 def __init__(self, lazyvalues):
962 def __init__(self, lazyvalues):
960 self.addedvalues = set()
963 self.addedvalues = set()
961 self.lazyvalues = lazyvalues
964 self.lazyvalues = lazyvalues
962
965
963 def __contains__(self, value):
966 def __contains__(self, value):
964 return value in self.addedvalues or value in self.lazyvalues
967 return value in self.addedvalues or value in self.lazyvalues
965
968
966 def __iter__(self):
969 def __iter__(self):
967 added = self.addedvalues
970 added = self.addedvalues
968 for r in added:
971 for r in added:
969 yield r
972 yield r
970 for r in self.lazyvalues:
973 for r in self.lazyvalues:
971 if not r in added:
974 if not r in added:
972 yield r
975 yield r
973
976
974 def add(self, value):
977 def add(self, value):
975 self.addedvalues.add(value)
978 self.addedvalues.add(value)
976
979
977 def update(self, values):
980 def update(self, values):
978 self.addedvalues.update(values)
981 self.addedvalues.update(values)
979
982
980 has = lazyset(self.ancestors(common))
983 has = lazyset(self.ancestors(common))
981 has.add(nullrev)
984 has.add(nullrev)
982 has.update(common)
985 has.update(common)
983
986
984 # take all ancestors from heads that aren't in has
987 # take all ancestors from heads that aren't in has
985 missing = set()
988 missing = set()
986 visit = collections.deque(r for r in heads if r not in has)
989 visit = collections.deque(r for r in heads if r not in has)
987 while visit:
990 while visit:
988 r = visit.popleft()
991 r = visit.popleft()
989 if r in missing:
992 if r in missing:
990 continue
993 continue
991 else:
994 else:
992 missing.add(r)
995 missing.add(r)
993 for p in self.parentrevs(r):
996 for p in self.parentrevs(r):
994 if p not in has:
997 if p not in has:
995 visit.append(p)
998 visit.append(p)
996 missing = list(missing)
999 missing = list(missing)
997 missing.sort()
1000 missing.sort()
998 return has, [self.node(miss) for miss in missing]
1001 return has, [self.node(miss) for miss in missing]
999
1002
1000 def incrementalmissingrevs(self, common=None):
1003 def incrementalmissingrevs(self, common=None):
1001 """Return an object that can be used to incrementally compute the
1004 """Return an object that can be used to incrementally compute the
1002 revision numbers of the ancestors of arbitrary sets that are not
1005 revision numbers of the ancestors of arbitrary sets that are not
1003 ancestors of common. This is an ancestor.incrementalmissingancestors
1006 ancestors of common. This is an ancestor.incrementalmissingancestors
1004 object.
1007 object.
1005
1008
1006 'common' is a list of revision numbers. If common is not supplied, uses
1009 'common' is a list of revision numbers. If common is not supplied, uses
1007 nullrev.
1010 nullrev.
1008 """
1011 """
1009 if common is None:
1012 if common is None:
1010 common = [nullrev]
1013 common = [nullrev]
1011
1014
1012 if rustancestor is not None:
1015 if rustancestor is not None:
1013 return rustancestor.MissingAncestors(self.index, common)
1016 return rustancestor.MissingAncestors(self.index, common)
1014 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1017 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1015
1018
1016 def findmissingrevs(self, common=None, heads=None):
1019 def findmissingrevs(self, common=None, heads=None):
1017 """Return the revision numbers of the ancestors of heads that
1020 """Return the revision numbers of the ancestors of heads that
1018 are not ancestors of common.
1021 are not ancestors of common.
1019
1022
1020 More specifically, return a list of revision numbers corresponding to
1023 More specifically, return a list of revision numbers corresponding to
1021 nodes N such that every N satisfies the following constraints:
1024 nodes N such that every N satisfies the following constraints:
1022
1025
1023 1. N is an ancestor of some node in 'heads'
1026 1. N is an ancestor of some node in 'heads'
1024 2. N is not an ancestor of any node in 'common'
1027 2. N is not an ancestor of any node in 'common'
1025
1028
1026 The list is sorted by revision number, meaning it is
1029 The list is sorted by revision number, meaning it is
1027 topologically sorted.
1030 topologically sorted.
1028
1031
1029 'heads' and 'common' are both lists of revision numbers. If heads is
1032 'heads' and 'common' are both lists of revision numbers. If heads is
1030 not supplied, uses all of the revlog's heads. If common is not
1033 not supplied, uses all of the revlog's heads. If common is not
1031 supplied, uses nullid."""
1034 supplied, uses nullid."""
1032 if common is None:
1035 if common is None:
1033 common = [nullrev]
1036 common = [nullrev]
1034 if heads is None:
1037 if heads is None:
1035 heads = self.headrevs()
1038 heads = self.headrevs()
1036
1039
1037 inc = self.incrementalmissingrevs(common=common)
1040 inc = self.incrementalmissingrevs(common=common)
1038 return inc.missingancestors(heads)
1041 return inc.missingancestors(heads)
1039
1042
1040 def findmissing(self, common=None, heads=None):
1043 def findmissing(self, common=None, heads=None):
1041 """Return the ancestors of heads that are not ancestors of common.
1044 """Return the ancestors of heads that are not ancestors of common.
1042
1045
1043 More specifically, return a list of nodes N such that every N
1046 More specifically, return a list of nodes N such that every N
1044 satisfies the following constraints:
1047 satisfies the following constraints:
1045
1048
1046 1. N is an ancestor of some node in 'heads'
1049 1. N is an ancestor of some node in 'heads'
1047 2. N is not an ancestor of any node in 'common'
1050 2. N is not an ancestor of any node in 'common'
1048
1051
1049 The list is sorted by revision number, meaning it is
1052 The list is sorted by revision number, meaning it is
1050 topologically sorted.
1053 topologically sorted.
1051
1054
1052 'heads' and 'common' are both lists of node IDs. If heads is
1055 'heads' and 'common' are both lists of node IDs. If heads is
1053 not supplied, uses all of the revlog's heads. If common is not
1056 not supplied, uses all of the revlog's heads. If common is not
1054 supplied, uses nullid."""
1057 supplied, uses nullid."""
1055 if common is None:
1058 if common is None:
1056 common = [self.nullid]
1059 common = [self.nullid]
1057 if heads is None:
1060 if heads is None:
1058 heads = self.heads()
1061 heads = self.heads()
1059
1062
1060 common = [self.rev(n) for n in common]
1063 common = [self.rev(n) for n in common]
1061 heads = [self.rev(n) for n in heads]
1064 heads = [self.rev(n) for n in heads]
1062
1065
1063 inc = self.incrementalmissingrevs(common=common)
1066 inc = self.incrementalmissingrevs(common=common)
1064 return [self.node(r) for r in inc.missingancestors(heads)]
1067 return [self.node(r) for r in inc.missingancestors(heads)]
1065
1068
1066 def nodesbetween(self, roots=None, heads=None):
1069 def nodesbetween(self, roots=None, heads=None):
1067 """Return a topological path from 'roots' to 'heads'.
1070 """Return a topological path from 'roots' to 'heads'.
1068
1071
1069 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1072 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1070 topologically sorted list of all nodes N that satisfy both of
1073 topologically sorted list of all nodes N that satisfy both of
1071 these constraints:
1074 these constraints:
1072
1075
1073 1. N is a descendant of some node in 'roots'
1076 1. N is a descendant of some node in 'roots'
1074 2. N is an ancestor of some node in 'heads'
1077 2. N is an ancestor of some node in 'heads'
1075
1078
1076 Every node is considered to be both a descendant and an ancestor
1079 Every node is considered to be both a descendant and an ancestor
1077 of itself, so every reachable node in 'roots' and 'heads' will be
1080 of itself, so every reachable node in 'roots' and 'heads' will be
1078 included in 'nodes'.
1081 included in 'nodes'.
1079
1082
1080 'outroots' is the list of reachable nodes in 'roots', i.e., the
1083 'outroots' is the list of reachable nodes in 'roots', i.e., the
1081 subset of 'roots' that is returned in 'nodes'. Likewise,
1084 subset of 'roots' that is returned in 'nodes'. Likewise,
1082 'outheads' is the subset of 'heads' that is also in 'nodes'.
1085 'outheads' is the subset of 'heads' that is also in 'nodes'.
1083
1086
1084 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1087 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1085 unspecified, uses nullid as the only root. If 'heads' is
1088 unspecified, uses nullid as the only root. If 'heads' is
1086 unspecified, uses list of all of the revlog's heads."""
1089 unspecified, uses list of all of the revlog's heads."""
1087 nonodes = ([], [], [])
1090 nonodes = ([], [], [])
1088 if roots is not None:
1091 if roots is not None:
1089 roots = list(roots)
1092 roots = list(roots)
1090 if not roots:
1093 if not roots:
1091 return nonodes
1094 return nonodes
1092 lowestrev = min([self.rev(n) for n in roots])
1095 lowestrev = min([self.rev(n) for n in roots])
1093 else:
1096 else:
1094 roots = [self.nullid] # Everybody's a descendant of nullid
1097 roots = [self.nullid] # Everybody's a descendant of nullid
1095 lowestrev = nullrev
1098 lowestrev = nullrev
1096 if (lowestrev == nullrev) and (heads is None):
1099 if (lowestrev == nullrev) and (heads is None):
1097 # We want _all_ the nodes!
1100 # We want _all_ the nodes!
1098 return (
1101 return (
1099 [self.node(r) for r in self],
1102 [self.node(r) for r in self],
1100 [self.nullid],
1103 [self.nullid],
1101 list(self.heads()),
1104 list(self.heads()),
1102 )
1105 )
1103 if heads is None:
1106 if heads is None:
1104 # All nodes are ancestors, so the latest ancestor is the last
1107 # All nodes are ancestors, so the latest ancestor is the last
1105 # node.
1108 # node.
1106 highestrev = len(self) - 1
1109 highestrev = len(self) - 1
1107 # Set ancestors to None to signal that every node is an ancestor.
1110 # Set ancestors to None to signal that every node is an ancestor.
1108 ancestors = None
1111 ancestors = None
1109 # Set heads to an empty dictionary for later discovery of heads
1112 # Set heads to an empty dictionary for later discovery of heads
1110 heads = {}
1113 heads = {}
1111 else:
1114 else:
1112 heads = list(heads)
1115 heads = list(heads)
1113 if not heads:
1116 if not heads:
1114 return nonodes
1117 return nonodes
1115 ancestors = set()
1118 ancestors = set()
1116 # Turn heads into a dictionary so we can remove 'fake' heads.
1119 # Turn heads into a dictionary so we can remove 'fake' heads.
1117 # Also, later we will be using it to filter out the heads we can't
1120 # Also, later we will be using it to filter out the heads we can't
1118 # find from roots.
1121 # find from roots.
1119 heads = dict.fromkeys(heads, False)
1122 heads = dict.fromkeys(heads, False)
1120 # Start at the top and keep marking parents until we're done.
1123 # Start at the top and keep marking parents until we're done.
1121 nodestotag = set(heads)
1124 nodestotag = set(heads)
1122 # Remember where the top was so we can use it as a limit later.
1125 # Remember where the top was so we can use it as a limit later.
1123 highestrev = max([self.rev(n) for n in nodestotag])
1126 highestrev = max([self.rev(n) for n in nodestotag])
1124 while nodestotag:
1127 while nodestotag:
1125 # grab a node to tag
1128 # grab a node to tag
1126 n = nodestotag.pop()
1129 n = nodestotag.pop()
1127 # Never tag nullid
1130 # Never tag nullid
1128 if n == self.nullid:
1131 if n == self.nullid:
1129 continue
1132 continue
1130 # A node's revision number represents its place in a
1133 # A node's revision number represents its place in a
1131 # topologically sorted list of nodes.
1134 # topologically sorted list of nodes.
1132 r = self.rev(n)
1135 r = self.rev(n)
1133 if r >= lowestrev:
1136 if r >= lowestrev:
1134 if n not in ancestors:
1137 if n not in ancestors:
1135 # If we are possibly a descendant of one of the roots
1138 # If we are possibly a descendant of one of the roots
1136 # and we haven't already been marked as an ancestor
1139 # and we haven't already been marked as an ancestor
1137 ancestors.add(n) # Mark as ancestor
1140 ancestors.add(n) # Mark as ancestor
1138 # Add non-nullid parents to list of nodes to tag.
1141 # Add non-nullid parents to list of nodes to tag.
1139 nodestotag.update(
1142 nodestotag.update(
1140 [p for p in self.parents(n) if p != self.nullid]
1143 [p for p in self.parents(n) if p != self.nullid]
1141 )
1144 )
1142 elif n in heads: # We've seen it before, is it a fake head?
1145 elif n in heads: # We've seen it before, is it a fake head?
1143 # So it is, real heads should not be the ancestors of
1146 # So it is, real heads should not be the ancestors of
1144 # any other heads.
1147 # any other heads.
1145 heads.pop(n)
1148 heads.pop(n)
1146 if not ancestors:
1149 if not ancestors:
1147 return nonodes
1150 return nonodes
1148 # Now that we have our set of ancestors, we want to remove any
1151 # Now that we have our set of ancestors, we want to remove any
1149 # roots that are not ancestors.
1152 # roots that are not ancestors.
1150
1153
1151 # If one of the roots was nullid, everything is included anyway.
1154 # If one of the roots was nullid, everything is included anyway.
1152 if lowestrev > nullrev:
1155 if lowestrev > nullrev:
1153 # But, since we weren't, let's recompute the lowest rev to not
1156 # But, since we weren't, let's recompute the lowest rev to not
1154 # include roots that aren't ancestors.
1157 # include roots that aren't ancestors.
1155
1158
1156 # Filter out roots that aren't ancestors of heads
1159 # Filter out roots that aren't ancestors of heads
1157 roots = [root for root in roots if root in ancestors]
1160 roots = [root for root in roots if root in ancestors]
1158 # Recompute the lowest revision
1161 # Recompute the lowest revision
1159 if roots:
1162 if roots:
1160 lowestrev = min([self.rev(root) for root in roots])
1163 lowestrev = min([self.rev(root) for root in roots])
1161 else:
1164 else:
1162 # No more roots? Return empty list
1165 # No more roots? Return empty list
1163 return nonodes
1166 return nonodes
1164 else:
1167 else:
1165 # We are descending from nullid, and don't need to care about
1168 # We are descending from nullid, and don't need to care about
1166 # any other roots.
1169 # any other roots.
1167 lowestrev = nullrev
1170 lowestrev = nullrev
1168 roots = [self.nullid]
1171 roots = [self.nullid]
1169 # Transform our roots list into a set.
1172 # Transform our roots list into a set.
1170 descendants = set(roots)
1173 descendants = set(roots)
1171 # Also, keep the original roots so we can filter out roots that aren't
1174 # Also, keep the original roots so we can filter out roots that aren't
1172 # 'real' roots (i.e. are descended from other roots).
1175 # 'real' roots (i.e. are descended from other roots).
1173 roots = descendants.copy()
1176 roots = descendants.copy()
1174 # Our topologically sorted list of output nodes.
1177 # Our topologically sorted list of output nodes.
1175 orderedout = []
1178 orderedout = []
1176 # Don't start at nullid since we don't want nullid in our output list,
1179 # Don't start at nullid since we don't want nullid in our output list,
1177 # and if nullid shows up in descendants, empty parents will look like
1180 # and if nullid shows up in descendants, empty parents will look like
1178 # they're descendants.
1181 # they're descendants.
1179 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1182 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1180 n = self.node(r)
1183 n = self.node(r)
1181 isdescendant = False
1184 isdescendant = False
1182 if lowestrev == nullrev: # Everybody is a descendant of nullid
1185 if lowestrev == nullrev: # Everybody is a descendant of nullid
1183 isdescendant = True
1186 isdescendant = True
1184 elif n in descendants:
1187 elif n in descendants:
1185 # n is already a descendant
1188 # n is already a descendant
1186 isdescendant = True
1189 isdescendant = True
1187 # This check only needs to be done here because all the roots
1190 # This check only needs to be done here because all the roots
1188 # will start being marked is descendants before the loop.
1191 # will start being marked is descendants before the loop.
1189 if n in roots:
1192 if n in roots:
1190 # If n was a root, check if it's a 'real' root.
1193 # If n was a root, check if it's a 'real' root.
1191 p = tuple(self.parents(n))
1194 p = tuple(self.parents(n))
1192 # If any of its parents are descendants, it's not a root.
1195 # If any of its parents are descendants, it's not a root.
1193 if (p[0] in descendants) or (p[1] in descendants):
1196 if (p[0] in descendants) or (p[1] in descendants):
1194 roots.remove(n)
1197 roots.remove(n)
1195 else:
1198 else:
1196 p = tuple(self.parents(n))
1199 p = tuple(self.parents(n))
1197 # A node is a descendant if either of its parents are
1200 # A node is a descendant if either of its parents are
1198 # descendants. (We seeded the dependents list with the roots
1201 # descendants. (We seeded the dependents list with the roots
1199 # up there, remember?)
1202 # up there, remember?)
1200 if (p[0] in descendants) or (p[1] in descendants):
1203 if (p[0] in descendants) or (p[1] in descendants):
1201 descendants.add(n)
1204 descendants.add(n)
1202 isdescendant = True
1205 isdescendant = True
1203 if isdescendant and ((ancestors is None) or (n in ancestors)):
1206 if isdescendant and ((ancestors is None) or (n in ancestors)):
1204 # Only include nodes that are both descendants and ancestors.
1207 # Only include nodes that are both descendants and ancestors.
1205 orderedout.append(n)
1208 orderedout.append(n)
1206 if (ancestors is not None) and (n in heads):
1209 if (ancestors is not None) and (n in heads):
1207 # We're trying to figure out which heads are reachable
1210 # We're trying to figure out which heads are reachable
1208 # from roots.
1211 # from roots.
1209 # Mark this head as having been reached
1212 # Mark this head as having been reached
1210 heads[n] = True
1213 heads[n] = True
1211 elif ancestors is None:
1214 elif ancestors is None:
1212 # Otherwise, we're trying to discover the heads.
1215 # Otherwise, we're trying to discover the heads.
1213 # Assume this is a head because if it isn't, the next step
1216 # Assume this is a head because if it isn't, the next step
1214 # will eventually remove it.
1217 # will eventually remove it.
1215 heads[n] = True
1218 heads[n] = True
1216 # But, obviously its parents aren't.
1219 # But, obviously its parents aren't.
1217 for p in self.parents(n):
1220 for p in self.parents(n):
1218 heads.pop(p, None)
1221 heads.pop(p, None)
1219 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1222 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1220 roots = list(roots)
1223 roots = list(roots)
1221 assert orderedout
1224 assert orderedout
1222 assert roots
1225 assert roots
1223 assert heads
1226 assert heads
1224 return (orderedout, roots, heads)
1227 return (orderedout, roots, heads)
1225
1228
1226 def headrevs(self, revs=None):
1229 def headrevs(self, revs=None):
1227 if revs is None:
1230 if revs is None:
1228 try:
1231 try:
1229 return self.index.headrevs()
1232 return self.index.headrevs()
1230 except AttributeError:
1233 except AttributeError:
1231 return self._headrevs()
1234 return self._headrevs()
1232 if rustdagop is not None:
1235 if rustdagop is not None:
1233 return rustdagop.headrevs(self.index, revs)
1236 return rustdagop.headrevs(self.index, revs)
1234 return dagop.headrevs(revs, self._uncheckedparentrevs)
1237 return dagop.headrevs(revs, self._uncheckedparentrevs)
1235
1238
1236 def computephases(self, roots):
1239 def computephases(self, roots):
1237 return self.index.computephasesmapsets(roots)
1240 return self.index.computephasesmapsets(roots)
1238
1241
1239 def _headrevs(self):
1242 def _headrevs(self):
1240 count = len(self)
1243 count = len(self)
1241 if not count:
1244 if not count:
1242 return [nullrev]
1245 return [nullrev]
1243 # we won't iter over filtered rev so nobody is a head at start
1246 # we won't iter over filtered rev so nobody is a head at start
1244 ishead = [0] * (count + 1)
1247 ishead = [0] * (count + 1)
1245 index = self.index
1248 index = self.index
1246 for r in self:
1249 for r in self:
1247 ishead[r] = 1 # I may be an head
1250 ishead[r] = 1 # I may be an head
1248 e = index[r]
1251 e = index[r]
1249 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1252 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1250 return [r for r, val in enumerate(ishead) if val]
1253 return [r for r, val in enumerate(ishead) if val]
1251
1254
1252 def heads(self, start=None, stop=None):
1255 def heads(self, start=None, stop=None):
1253 """return the list of all nodes that have no children
1256 """return the list of all nodes that have no children
1254
1257
1255 if start is specified, only heads that are descendants of
1258 if start is specified, only heads that are descendants of
1256 start will be returned
1259 start will be returned
1257 if stop is specified, it will consider all the revs from stop
1260 if stop is specified, it will consider all the revs from stop
1258 as if they had no children
1261 as if they had no children
1259 """
1262 """
1260 if start is None and stop is None:
1263 if start is None and stop is None:
1261 if not len(self):
1264 if not len(self):
1262 return [self.nullid]
1265 return [self.nullid]
1263 return [self.node(r) for r in self.headrevs()]
1266 return [self.node(r) for r in self.headrevs()]
1264
1267
1265 if start is None:
1268 if start is None:
1266 start = nullrev
1269 start = nullrev
1267 else:
1270 else:
1268 start = self.rev(start)
1271 start = self.rev(start)
1269
1272
1270 stoprevs = {self.rev(n) for n in stop or []}
1273 stoprevs = {self.rev(n) for n in stop or []}
1271
1274
1272 revs = dagop.headrevssubset(
1275 revs = dagop.headrevssubset(
1273 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1276 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1274 )
1277 )
1275
1278
1276 return [self.node(rev) for rev in revs]
1279 return [self.node(rev) for rev in revs]
1277
1280
1278 def children(self, node):
1281 def children(self, node):
1279 """find the children of a given node"""
1282 """find the children of a given node"""
1280 c = []
1283 c = []
1281 p = self.rev(node)
1284 p = self.rev(node)
1282 for r in self.revs(start=p + 1):
1285 for r in self.revs(start=p + 1):
1283 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1286 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1284 if prevs:
1287 if prevs:
1285 for pr in prevs:
1288 for pr in prevs:
1286 if pr == p:
1289 if pr == p:
1287 c.append(self.node(r))
1290 c.append(self.node(r))
1288 elif p == nullrev:
1291 elif p == nullrev:
1289 c.append(self.node(r))
1292 c.append(self.node(r))
1290 return c
1293 return c
1291
1294
1292 def commonancestorsheads(self, a, b):
1295 def commonancestorsheads(self, a, b):
1293 """calculate all the heads of the common ancestors of nodes a and b"""
1296 """calculate all the heads of the common ancestors of nodes a and b"""
1294 a, b = self.rev(a), self.rev(b)
1297 a, b = self.rev(a), self.rev(b)
1295 ancs = self._commonancestorsheads(a, b)
1298 ancs = self._commonancestorsheads(a, b)
1296 return pycompat.maplist(self.node, ancs)
1299 return pycompat.maplist(self.node, ancs)
1297
1300
1298 def _commonancestorsheads(self, *revs):
1301 def _commonancestorsheads(self, *revs):
1299 """calculate all the heads of the common ancestors of revs"""
1302 """calculate all the heads of the common ancestors of revs"""
1300 try:
1303 try:
1301 ancs = self.index.commonancestorsheads(*revs)
1304 ancs = self.index.commonancestorsheads(*revs)
1302 except (AttributeError, OverflowError): # C implementation failed
1305 except (AttributeError, OverflowError): # C implementation failed
1303 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1306 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1304 return ancs
1307 return ancs
1305
1308
1306 def isancestor(self, a, b):
1309 def isancestor(self, a, b):
1307 """return True if node a is an ancestor of node b
1310 """return True if node a is an ancestor of node b
1308
1311
1309 A revision is considered an ancestor of itself."""
1312 A revision is considered an ancestor of itself."""
1310 a, b = self.rev(a), self.rev(b)
1313 a, b = self.rev(a), self.rev(b)
1311 return self.isancestorrev(a, b)
1314 return self.isancestorrev(a, b)
1312
1315
1313 def isancestorrev(self, a, b):
1316 def isancestorrev(self, a, b):
1314 """return True if revision a is an ancestor of revision b
1317 """return True if revision a is an ancestor of revision b
1315
1318
1316 A revision is considered an ancestor of itself.
1319 A revision is considered an ancestor of itself.
1317
1320
1318 The implementation of this is trivial but the use of
1321 The implementation of this is trivial but the use of
1319 reachableroots is not."""
1322 reachableroots is not."""
1320 if a == nullrev:
1323 if a == nullrev:
1321 return True
1324 return True
1322 elif a == b:
1325 elif a == b:
1323 return True
1326 return True
1324 elif a > b:
1327 elif a > b:
1325 return False
1328 return False
1326 return bool(self.reachableroots(a, [b], [a], includepath=False))
1329 return bool(self.reachableroots(a, [b], [a], includepath=False))
1327
1330
1328 def reachableroots(self, minroot, heads, roots, includepath=False):
1331 def reachableroots(self, minroot, heads, roots, includepath=False):
1329 """return (heads(::(<roots> and <roots>::<heads>)))
1332 """return (heads(::(<roots> and <roots>::<heads>)))
1330
1333
1331 If includepath is True, return (<roots>::<heads>)."""
1334 If includepath is True, return (<roots>::<heads>)."""
1332 try:
1335 try:
1333 return self.index.reachableroots2(
1336 return self.index.reachableroots2(
1334 minroot, heads, roots, includepath
1337 minroot, heads, roots, includepath
1335 )
1338 )
1336 except AttributeError:
1339 except AttributeError:
1337 return dagop._reachablerootspure(
1340 return dagop._reachablerootspure(
1338 self.parentrevs, minroot, roots, heads, includepath
1341 self.parentrevs, minroot, roots, heads, includepath
1339 )
1342 )
1340
1343
1341 def ancestor(self, a, b):
1344 def ancestor(self, a, b):
1342 """calculate the "best" common ancestor of nodes a and b"""
1345 """calculate the "best" common ancestor of nodes a and b"""
1343
1346
1344 a, b = self.rev(a), self.rev(b)
1347 a, b = self.rev(a), self.rev(b)
1345 try:
1348 try:
1346 ancs = self.index.ancestors(a, b)
1349 ancs = self.index.ancestors(a, b)
1347 except (AttributeError, OverflowError):
1350 except (AttributeError, OverflowError):
1348 ancs = ancestor.ancestors(self.parentrevs, a, b)
1351 ancs = ancestor.ancestors(self.parentrevs, a, b)
1349 if ancs:
1352 if ancs:
1350 # choose a consistent winner when there's a tie
1353 # choose a consistent winner when there's a tie
1351 return min(map(self.node, ancs))
1354 return min(map(self.node, ancs))
1352 return self.nullid
1355 return self.nullid
1353
1356
1354 def _match(self, id):
1357 def _match(self, id):
1355 if isinstance(id, int):
1358 if isinstance(id, int):
1356 # rev
1359 # rev
1357 return self.node(id)
1360 return self.node(id)
1358 if len(id) == self.nodeconstants.nodelen:
1361 if len(id) == self.nodeconstants.nodelen:
1359 # possibly a binary node
1362 # possibly a binary node
1360 # odds of a binary node being all hex in ASCII are 1 in 10**25
1363 # odds of a binary node being all hex in ASCII are 1 in 10**25
1361 try:
1364 try:
1362 node = id
1365 node = id
1363 self.rev(node) # quick search the index
1366 self.rev(node) # quick search the index
1364 return node
1367 return node
1365 except error.LookupError:
1368 except error.LookupError:
1366 pass # may be partial hex id
1369 pass # may be partial hex id
1367 try:
1370 try:
1368 # str(rev)
1371 # str(rev)
1369 rev = int(id)
1372 rev = int(id)
1370 if b"%d" % rev != id:
1373 if b"%d" % rev != id:
1371 raise ValueError
1374 raise ValueError
1372 if rev < 0:
1375 if rev < 0:
1373 rev = len(self) + rev
1376 rev = len(self) + rev
1374 if rev < 0 or rev >= len(self):
1377 if rev < 0 or rev >= len(self):
1375 raise ValueError
1378 raise ValueError
1376 return self.node(rev)
1379 return self.node(rev)
1377 except (ValueError, OverflowError):
1380 except (ValueError, OverflowError):
1378 pass
1381 pass
1379 if len(id) == 2 * self.nodeconstants.nodelen:
1382 if len(id) == 2 * self.nodeconstants.nodelen:
1380 try:
1383 try:
1381 # a full hex nodeid?
1384 # a full hex nodeid?
1382 node = bin(id)
1385 node = bin(id)
1383 self.rev(node)
1386 self.rev(node)
1384 return node
1387 return node
1385 except (TypeError, error.LookupError):
1388 except (TypeError, error.LookupError):
1386 pass
1389 pass
1387
1390
1388 def _partialmatch(self, id):
1391 def _partialmatch(self, id):
1389 # we don't care wdirfilenodeids as they should be always full hash
1392 # we don't care wdirfilenodeids as they should be always full hash
1390 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1393 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1391 try:
1394 try:
1392 partial = self.index.partialmatch(id)
1395 partial = self.index.partialmatch(id)
1393 if partial and self.hasnode(partial):
1396 if partial and self.hasnode(partial):
1394 if maybewdir:
1397 if maybewdir:
1395 # single 'ff...' match in radix tree, ambiguous with wdir
1398 # single 'ff...' match in radix tree, ambiguous with wdir
1396 raise error.RevlogError
1399 raise error.RevlogError
1397 return partial
1400 return partial
1398 if maybewdir:
1401 if maybewdir:
1399 # no 'ff...' match in radix tree, wdir identified
1402 # no 'ff...' match in radix tree, wdir identified
1400 raise error.WdirUnsupported
1403 raise error.WdirUnsupported
1401 return None
1404 return None
1402 except error.RevlogError:
1405 except error.RevlogError:
1403 # parsers.c radix tree lookup gave multiple matches
1406 # parsers.c radix tree lookup gave multiple matches
1404 # fast path: for unfiltered changelog, radix tree is accurate
1407 # fast path: for unfiltered changelog, radix tree is accurate
1405 if not getattr(self, 'filteredrevs', None):
1408 if not getattr(self, 'filteredrevs', None):
1406 raise error.AmbiguousPrefixLookupError(
1409 raise error.AmbiguousPrefixLookupError(
1407 id, self.display_id, _(b'ambiguous identifier')
1410 id, self.display_id, _(b'ambiguous identifier')
1408 )
1411 )
1409 # fall through to slow path that filters hidden revisions
1412 # fall through to slow path that filters hidden revisions
1410 except (AttributeError, ValueError):
1413 except (AttributeError, ValueError):
1411 # we are pure python, or key was too short to search radix tree
1414 # we are pure python, or key was too short to search radix tree
1412 pass
1415 pass
1413
1416
1414 if id in self._pcache:
1417 if id in self._pcache:
1415 return self._pcache[id]
1418 return self._pcache[id]
1416
1419
1417 if len(id) <= 40:
1420 if len(id) <= 40:
1418 try:
1421 try:
1419 # hex(node)[:...]
1422 # hex(node)[:...]
1420 l = len(id) // 2 # grab an even number of digits
1423 l = len(id) // 2 # grab an even number of digits
1421 prefix = bin(id[: l * 2])
1424 prefix = bin(id[: l * 2])
1422 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1425 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1423 nl = [
1426 nl = [
1424 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1427 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1425 ]
1428 ]
1426 if self.nodeconstants.nullhex.startswith(id):
1429 if self.nodeconstants.nullhex.startswith(id):
1427 nl.append(self.nullid)
1430 nl.append(self.nullid)
1428 if len(nl) > 0:
1431 if len(nl) > 0:
1429 if len(nl) == 1 and not maybewdir:
1432 if len(nl) == 1 and not maybewdir:
1430 self._pcache[id] = nl[0]
1433 self._pcache[id] = nl[0]
1431 return nl[0]
1434 return nl[0]
1432 raise error.AmbiguousPrefixLookupError(
1435 raise error.AmbiguousPrefixLookupError(
1433 id, self.display_id, _(b'ambiguous identifier')
1436 id, self.display_id, _(b'ambiguous identifier')
1434 )
1437 )
1435 if maybewdir:
1438 if maybewdir:
1436 raise error.WdirUnsupported
1439 raise error.WdirUnsupported
1437 return None
1440 return None
1438 except TypeError:
1441 except TypeError:
1439 pass
1442 pass
1440
1443
1441 def lookup(self, id):
1444 def lookup(self, id):
1442 """locate a node based on:
1445 """locate a node based on:
1443 - revision number or str(revision number)
1446 - revision number or str(revision number)
1444 - nodeid or subset of hex nodeid
1447 - nodeid or subset of hex nodeid
1445 """
1448 """
1446 n = self._match(id)
1449 n = self._match(id)
1447 if n is not None:
1450 if n is not None:
1448 return n
1451 return n
1449 n = self._partialmatch(id)
1452 n = self._partialmatch(id)
1450 if n:
1453 if n:
1451 return n
1454 return n
1452
1455
1453 raise error.LookupError(id, self.display_id, _(b'no match found'))
1456 raise error.LookupError(id, self.display_id, _(b'no match found'))
1454
1457
1455 def shortest(self, node, minlength=1):
1458 def shortest(self, node, minlength=1):
1456 """Find the shortest unambiguous prefix that matches node."""
1459 """Find the shortest unambiguous prefix that matches node."""
1457
1460
1458 def isvalid(prefix):
1461 def isvalid(prefix):
1459 try:
1462 try:
1460 matchednode = self._partialmatch(prefix)
1463 matchednode = self._partialmatch(prefix)
1461 except error.AmbiguousPrefixLookupError:
1464 except error.AmbiguousPrefixLookupError:
1462 return False
1465 return False
1463 except error.WdirUnsupported:
1466 except error.WdirUnsupported:
1464 # single 'ff...' match
1467 # single 'ff...' match
1465 return True
1468 return True
1466 if matchednode is None:
1469 if matchednode is None:
1467 raise error.LookupError(node, self.display_id, _(b'no node'))
1470 raise error.LookupError(node, self.display_id, _(b'no node'))
1468 return True
1471 return True
1469
1472
1470 def maybewdir(prefix):
1473 def maybewdir(prefix):
1471 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1474 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1472
1475
1473 hexnode = hex(node)
1476 hexnode = hex(node)
1474
1477
1475 def disambiguate(hexnode, minlength):
1478 def disambiguate(hexnode, minlength):
1476 """Disambiguate against wdirid."""
1479 """Disambiguate against wdirid."""
1477 for length in range(minlength, len(hexnode) + 1):
1480 for length in range(minlength, len(hexnode) + 1):
1478 prefix = hexnode[:length]
1481 prefix = hexnode[:length]
1479 if not maybewdir(prefix):
1482 if not maybewdir(prefix):
1480 return prefix
1483 return prefix
1481
1484
1482 if not getattr(self, 'filteredrevs', None):
1485 if not getattr(self, 'filteredrevs', None):
1483 try:
1486 try:
1484 length = max(self.index.shortest(node), minlength)
1487 length = max(self.index.shortest(node), minlength)
1485 return disambiguate(hexnode, length)
1488 return disambiguate(hexnode, length)
1486 except error.RevlogError:
1489 except error.RevlogError:
1487 if node != self.nodeconstants.wdirid:
1490 if node != self.nodeconstants.wdirid:
1488 raise error.LookupError(
1491 raise error.LookupError(
1489 node, self.display_id, _(b'no node')
1492 node, self.display_id, _(b'no node')
1490 )
1493 )
1491 except AttributeError:
1494 except AttributeError:
1492 # Fall through to pure code
1495 # Fall through to pure code
1493 pass
1496 pass
1494
1497
1495 if node == self.nodeconstants.wdirid:
1498 if node == self.nodeconstants.wdirid:
1496 for length in range(minlength, len(hexnode) + 1):
1499 for length in range(minlength, len(hexnode) + 1):
1497 prefix = hexnode[:length]
1500 prefix = hexnode[:length]
1498 if isvalid(prefix):
1501 if isvalid(prefix):
1499 return prefix
1502 return prefix
1500
1503
1501 for length in range(minlength, len(hexnode) + 1):
1504 for length in range(minlength, len(hexnode) + 1):
1502 prefix = hexnode[:length]
1505 prefix = hexnode[:length]
1503 if isvalid(prefix):
1506 if isvalid(prefix):
1504 return disambiguate(hexnode, length)
1507 return disambiguate(hexnode, length)
1505
1508
1506 def cmp(self, node, text):
1509 def cmp(self, node, text):
1507 """compare text with a given file revision
1510 """compare text with a given file revision
1508
1511
1509 returns True if text is different than what is stored.
1512 returns True if text is different than what is stored.
1510 """
1513 """
1511 p1, p2 = self.parents(node)
1514 p1, p2 = self.parents(node)
1512 return storageutil.hashrevisionsha1(text, p1, p2) != node
1515 return storageutil.hashrevisionsha1(text, p1, p2) != node
1513
1516
1514 def _cachesegment(self, offset, data):
1517 def _cachesegment(self, offset, data):
1515 """Add a segment to the revlog cache.
1518 """Add a segment to the revlog cache.
1516
1519
1517 Accepts an absolute offset and the data that is at that location.
1520 Accepts an absolute offset and the data that is at that location.
1518 """
1521 """
1519 o, d = self._chunkcache
1522 o, d = self._chunkcache
1520 # try to add to existing cache
1523 # try to add to existing cache
1521 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1524 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1522 self._chunkcache = o, d + data
1525 self._chunkcache = o, d + data
1523 else:
1526 else:
1524 self._chunkcache = offset, data
1527 self._chunkcache = offset, data
1525
1528
1526 def _readsegment(self, offset, length, df=None):
1529 def _readsegment(self, offset, length, df=None):
1527 """Load a segment of raw data from the revlog.
1530 """Load a segment of raw data from the revlog.
1528
1531
1529 Accepts an absolute offset, length to read, and an optional existing
1532 Accepts an absolute offset, length to read, and an optional existing
1530 file handle to read from.
1533 file handle to read from.
1531
1534
1532 If an existing file handle is passed, it will be seeked and the
1535 If an existing file handle is passed, it will be seeked and the
1533 original seek position will NOT be restored.
1536 original seek position will NOT be restored.
1534
1537
1535 Returns a str or buffer of raw byte data.
1538 Returns a str or buffer of raw byte data.
1536
1539
1537 Raises if the requested number of bytes could not be read.
1540 Raises if the requested number of bytes could not be read.
1538 """
1541 """
1539 # Cache data both forward and backward around the requested
1542 # Cache data both forward and backward around the requested
1540 # data, in a fixed size window. This helps speed up operations
1543 # data, in a fixed size window. This helps speed up operations
1541 # involving reading the revlog backwards.
1544 # involving reading the revlog backwards.
1542 cachesize = self._chunkcachesize
1545 cachesize = self._chunkcachesize
1543 realoffset = offset & ~(cachesize - 1)
1546 realoffset = offset & ~(cachesize - 1)
1544 reallength = (
1547 reallength = (
1545 (offset + length + cachesize) & ~(cachesize - 1)
1548 (offset + length + cachesize) & ~(cachesize - 1)
1546 ) - realoffset
1549 ) - realoffset
1547 with self._datareadfp(df) as df:
1550 with self._datareadfp(df) as df:
1548 df.seek(realoffset)
1551 df.seek(realoffset)
1549 d = df.read(reallength)
1552 d = df.read(reallength)
1550
1553
1551 self._cachesegment(realoffset, d)
1554 self._cachesegment(realoffset, d)
1552 if offset != realoffset or reallength != length:
1555 if offset != realoffset or reallength != length:
1553 startoffset = offset - realoffset
1556 startoffset = offset - realoffset
1554 if len(d) - startoffset < length:
1557 if len(d) - startoffset < length:
1555 raise error.RevlogError(
1558 raise error.RevlogError(
1556 _(
1559 _(
1557 b'partial read of revlog %s; expected %d bytes from '
1560 b'partial read of revlog %s; expected %d bytes from '
1558 b'offset %d, got %d'
1561 b'offset %d, got %d'
1559 )
1562 )
1560 % (
1563 % (
1561 self._indexfile if self._inline else self._datafile,
1564 self._indexfile if self._inline else self._datafile,
1562 length,
1565 length,
1563 offset,
1566 offset,
1564 len(d) - startoffset,
1567 len(d) - startoffset,
1565 )
1568 )
1566 )
1569 )
1567
1570
1568 return util.buffer(d, startoffset, length)
1571 return util.buffer(d, startoffset, length)
1569
1572
1570 if len(d) < length:
1573 if len(d) < length:
1571 raise error.RevlogError(
1574 raise error.RevlogError(
1572 _(
1575 _(
1573 b'partial read of revlog %s; expected %d bytes from offset '
1576 b'partial read of revlog %s; expected %d bytes from offset '
1574 b'%d, got %d'
1577 b'%d, got %d'
1575 )
1578 )
1576 % (
1579 % (
1577 self._indexfile if self._inline else self._datafile,
1580 self._indexfile if self._inline else self._datafile,
1578 length,
1581 length,
1579 offset,
1582 offset,
1580 len(d),
1583 len(d),
1581 )
1584 )
1582 )
1585 )
1583
1586
1584 return d
1587 return d
1585
1588
1586 def _getsegment(self, offset, length, df=None):
1589 def _getsegment(self, offset, length, df=None):
1587 """Obtain a segment of raw data from the revlog.
1590 """Obtain a segment of raw data from the revlog.
1588
1591
1589 Accepts an absolute offset, length of bytes to obtain, and an
1592 Accepts an absolute offset, length of bytes to obtain, and an
1590 optional file handle to the already-opened revlog. If the file
1593 optional file handle to the already-opened revlog. If the file
1591 handle is used, it's original seek position will not be preserved.
1594 handle is used, it's original seek position will not be preserved.
1592
1595
1593 Requests for data may be returned from a cache.
1596 Requests for data may be returned from a cache.
1594
1597
1595 Returns a str or a buffer instance of raw byte data.
1598 Returns a str or a buffer instance of raw byte data.
1596 """
1599 """
1597 o, d = self._chunkcache
1600 o, d = self._chunkcache
1598 l = len(d)
1601 l = len(d)
1599
1602
1600 # is it in the cache?
1603 # is it in the cache?
1601 cachestart = offset - o
1604 cachestart = offset - o
1602 cacheend = cachestart + length
1605 cacheend = cachestart + length
1603 if cachestart >= 0 and cacheend <= l:
1606 if cachestart >= 0 and cacheend <= l:
1604 if cachestart == 0 and cacheend == l:
1607 if cachestart == 0 and cacheend == l:
1605 return d # avoid a copy
1608 return d # avoid a copy
1606 return util.buffer(d, cachestart, cacheend - cachestart)
1609 return util.buffer(d, cachestart, cacheend - cachestart)
1607
1610
1608 return self._readsegment(offset, length, df=df)
1611 return self._readsegment(offset, length, df=df)
1609
1612
1610 def _getsegmentforrevs(self, startrev, endrev, df=None):
1613 def _getsegmentforrevs(self, startrev, endrev, df=None):
1611 """Obtain a segment of raw data corresponding to a range of revisions.
1614 """Obtain a segment of raw data corresponding to a range of revisions.
1612
1615
1613 Accepts the start and end revisions and an optional already-open
1616 Accepts the start and end revisions and an optional already-open
1614 file handle to be used for reading. If the file handle is read, its
1617 file handle to be used for reading. If the file handle is read, its
1615 seek position will not be preserved.
1618 seek position will not be preserved.
1616
1619
1617 Requests for data may be satisfied by a cache.
1620 Requests for data may be satisfied by a cache.
1618
1621
1619 Returns a 2-tuple of (offset, data) for the requested range of
1622 Returns a 2-tuple of (offset, data) for the requested range of
1620 revisions. Offset is the integer offset from the beginning of the
1623 revisions. Offset is the integer offset from the beginning of the
1621 revlog and data is a str or buffer of the raw byte data.
1624 revlog and data is a str or buffer of the raw byte data.
1622
1625
1623 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1626 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1624 to determine where each revision's data begins and ends.
1627 to determine where each revision's data begins and ends.
1625 """
1628 """
1626 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1629 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1627 # (functions are expensive).
1630 # (functions are expensive).
1628 index = self.index
1631 index = self.index
1629 istart = index[startrev]
1632 istart = index[startrev]
1630 start = int(istart[0] >> 16)
1633 start = int(istart[0] >> 16)
1631 if startrev == endrev:
1634 if startrev == endrev:
1632 end = start + istart[1]
1635 end = start + istart[1]
1633 else:
1636 else:
1634 iend = index[endrev]
1637 iend = index[endrev]
1635 end = int(iend[0] >> 16) + iend[1]
1638 end = int(iend[0] >> 16) + iend[1]
1636
1639
1637 if self._inline:
1640 if self._inline:
1638 start += (startrev + 1) * self.index.entry_size
1641 start += (startrev + 1) * self.index.entry_size
1639 end += (endrev + 1) * self.index.entry_size
1642 end += (endrev + 1) * self.index.entry_size
1640 length = end - start
1643 length = end - start
1641
1644
1642 return start, self._getsegment(start, length, df=df)
1645 return start, self._getsegment(start, length, df=df)
1643
1646
1644 def _chunk(self, rev, df=None):
1647 def _chunk(self, rev, df=None):
1645 """Obtain a single decompressed chunk for a revision.
1648 """Obtain a single decompressed chunk for a revision.
1646
1649
1647 Accepts an integer revision and an optional already-open file handle
1650 Accepts an integer revision and an optional already-open file handle
1648 to be used for reading. If used, the seek position of the file will not
1651 to be used for reading. If used, the seek position of the file will not
1649 be preserved.
1652 be preserved.
1650
1653
1651 Returns a str holding uncompressed data for the requested revision.
1654 Returns a str holding uncompressed data for the requested revision.
1652 """
1655 """
1653 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1656 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1654
1657
1655 def _chunks(self, revs, df=None, targetsize=None):
1658 def _chunks(self, revs, df=None, targetsize=None):
1656 """Obtain decompressed chunks for the specified revisions.
1659 """Obtain decompressed chunks for the specified revisions.
1657
1660
1658 Accepts an iterable of numeric revisions that are assumed to be in
1661 Accepts an iterable of numeric revisions that are assumed to be in
1659 ascending order. Also accepts an optional already-open file handle
1662 ascending order. Also accepts an optional already-open file handle
1660 to be used for reading. If used, the seek position of the file will
1663 to be used for reading. If used, the seek position of the file will
1661 not be preserved.
1664 not be preserved.
1662
1665
1663 This function is similar to calling ``self._chunk()`` multiple times,
1666 This function is similar to calling ``self._chunk()`` multiple times,
1664 but is faster.
1667 but is faster.
1665
1668
1666 Returns a list with decompressed data for each requested revision.
1669 Returns a list with decompressed data for each requested revision.
1667 """
1670 """
1668 if not revs:
1671 if not revs:
1669 return []
1672 return []
1670 start = self.start
1673 start = self.start
1671 length = self.length
1674 length = self.length
1672 inline = self._inline
1675 inline = self._inline
1673 iosize = self.index.entry_size
1676 iosize = self.index.entry_size
1674 buffer = util.buffer
1677 buffer = util.buffer
1675
1678
1676 l = []
1679 l = []
1677 ladd = l.append
1680 ladd = l.append
1678
1681
1679 if not self._withsparseread:
1682 if not self._withsparseread:
1680 slicedchunks = (revs,)
1683 slicedchunks = (revs,)
1681 else:
1684 else:
1682 slicedchunks = deltautil.slicechunk(
1685 slicedchunks = deltautil.slicechunk(
1683 self, revs, targetsize=targetsize
1686 self, revs, targetsize=targetsize
1684 )
1687 )
1685
1688
1686 for revschunk in slicedchunks:
1689 for revschunk in slicedchunks:
1687 firstrev = revschunk[0]
1690 firstrev = revschunk[0]
1688 # Skip trailing revisions with empty diff
1691 # Skip trailing revisions with empty diff
1689 for lastrev in revschunk[::-1]:
1692 for lastrev in revschunk[::-1]:
1690 if length(lastrev) != 0:
1693 if length(lastrev) != 0:
1691 break
1694 break
1692
1695
1693 try:
1696 try:
1694 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1697 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1695 except OverflowError:
1698 except OverflowError:
1696 # issue4215 - we can't cache a run of chunks greater than
1699 # issue4215 - we can't cache a run of chunks greater than
1697 # 2G on Windows
1700 # 2G on Windows
1698 return [self._chunk(rev, df=df) for rev in revschunk]
1701 return [self._chunk(rev, df=df) for rev in revschunk]
1699
1702
1700 decomp = self.decompress
1703 decomp = self.decompress
1701 for rev in revschunk:
1704 for rev in revschunk:
1702 chunkstart = start(rev)
1705 chunkstart = start(rev)
1703 if inline:
1706 if inline:
1704 chunkstart += (rev + 1) * iosize
1707 chunkstart += (rev + 1) * iosize
1705 chunklength = length(rev)
1708 chunklength = length(rev)
1706 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1709 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1707
1710
1708 return l
1711 return l
1709
1712
1710 def _chunkclear(self):
1713 def _chunkclear(self):
1711 """Clear the raw chunk cache."""
1714 """Clear the raw chunk cache."""
1712 self._chunkcache = (0, b'')
1715 self._chunkcache = (0, b'')
1713
1716
1714 def deltaparent(self, rev):
1717 def deltaparent(self, rev):
1715 """return deltaparent of the given revision"""
1718 """return deltaparent of the given revision"""
1716 base = self.index[rev][3]
1719 base = self.index[rev][3]
1717 if base == rev:
1720 if base == rev:
1718 return nullrev
1721 return nullrev
1719 elif self._generaldelta:
1722 elif self._generaldelta:
1720 return base
1723 return base
1721 else:
1724 else:
1722 return rev - 1
1725 return rev - 1
1723
1726
1724 def issnapshot(self, rev):
1727 def issnapshot(self, rev):
1725 """tells whether rev is a snapshot"""
1728 """tells whether rev is a snapshot"""
1726 if not self._sparserevlog:
1729 if not self._sparserevlog:
1727 return self.deltaparent(rev) == nullrev
1730 return self.deltaparent(rev) == nullrev
1728 elif util.safehasattr(self.index, b'issnapshot'):
1731 elif util.safehasattr(self.index, b'issnapshot'):
1729 # directly assign the method to cache the testing and access
1732 # directly assign the method to cache the testing and access
1730 self.issnapshot = self.index.issnapshot
1733 self.issnapshot = self.index.issnapshot
1731 return self.issnapshot(rev)
1734 return self.issnapshot(rev)
1732 if rev == nullrev:
1735 if rev == nullrev:
1733 return True
1736 return True
1734 entry = self.index[rev]
1737 entry = self.index[rev]
1735 base = entry[3]
1738 base = entry[3]
1736 if base == rev:
1739 if base == rev:
1737 return True
1740 return True
1738 if base == nullrev:
1741 if base == nullrev:
1739 return True
1742 return True
1740 p1 = entry[5]
1743 p1 = entry[5]
1741 p2 = entry[6]
1744 p2 = entry[6]
1742 if base == p1 or base == p2:
1745 if base == p1 or base == p2:
1743 return False
1746 return False
1744 return self.issnapshot(base)
1747 return self.issnapshot(base)
1745
1748
1746 def snapshotdepth(self, rev):
1749 def snapshotdepth(self, rev):
1747 """number of snapshot in the chain before this one"""
1750 """number of snapshot in the chain before this one"""
1748 if not self.issnapshot(rev):
1751 if not self.issnapshot(rev):
1749 raise error.ProgrammingError(b'revision %d not a snapshot')
1752 raise error.ProgrammingError(b'revision %d not a snapshot')
1750 return len(self._deltachain(rev)[0]) - 1
1753 return len(self._deltachain(rev)[0]) - 1
1751
1754
1752 def revdiff(self, rev1, rev2):
1755 def revdiff(self, rev1, rev2):
1753 """return or calculate a delta between two revisions
1756 """return or calculate a delta between two revisions
1754
1757
1755 The delta calculated is in binary form and is intended to be written to
1758 The delta calculated is in binary form and is intended to be written to
1756 revlog data directly. So this function needs raw revision data.
1759 revlog data directly. So this function needs raw revision data.
1757 """
1760 """
1758 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1761 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1759 return bytes(self._chunk(rev2))
1762 return bytes(self._chunk(rev2))
1760
1763
1761 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1764 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1762
1765
1763 def _processflags(self, text, flags, operation, raw=False):
1766 def _processflags(self, text, flags, operation, raw=False):
1764 """deprecated entry point to access flag processors"""
1767 """deprecated entry point to access flag processors"""
1765 msg = b'_processflag(...) use the specialized variant'
1768 msg = b'_processflag(...) use the specialized variant'
1766 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1769 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1767 if raw:
1770 if raw:
1768 return text, flagutil.processflagsraw(self, text, flags)
1771 return text, flagutil.processflagsraw(self, text, flags)
1769 elif operation == b'read':
1772 elif operation == b'read':
1770 return flagutil.processflagsread(self, text, flags)
1773 return flagutil.processflagsread(self, text, flags)
1771 else: # write operation
1774 else: # write operation
1772 return flagutil.processflagswrite(self, text, flags)
1775 return flagutil.processflagswrite(self, text, flags)
1773
1776
1774 def revision(self, nodeorrev, _df=None, raw=False):
1777 def revision(self, nodeorrev, _df=None, raw=False):
1775 """return an uncompressed revision of a given node or revision
1778 """return an uncompressed revision of a given node or revision
1776 number.
1779 number.
1777
1780
1778 _df - an existing file handle to read from. (internal-only)
1781 _df - an existing file handle to read from. (internal-only)
1779 raw - an optional argument specifying if the revision data is to be
1782 raw - an optional argument specifying if the revision data is to be
1780 treated as raw data when applying flag transforms. 'raw' should be set
1783 treated as raw data when applying flag transforms. 'raw' should be set
1781 to True when generating changegroups or in debug commands.
1784 to True when generating changegroups or in debug commands.
1782 """
1785 """
1783 if raw:
1786 if raw:
1784 msg = (
1787 msg = (
1785 b'revlog.revision(..., raw=True) is deprecated, '
1788 b'revlog.revision(..., raw=True) is deprecated, '
1786 b'use revlog.rawdata(...)'
1789 b'use revlog.rawdata(...)'
1787 )
1790 )
1788 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1791 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1789 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1792 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1790
1793
1791 def sidedata(self, nodeorrev, _df=None):
1794 def sidedata(self, nodeorrev, _df=None):
1792 """a map of extra data related to the changeset but not part of the hash
1795 """a map of extra data related to the changeset but not part of the hash
1793
1796
1794 This function currently return a dictionary. However, more advanced
1797 This function currently return a dictionary. However, more advanced
1795 mapping object will likely be used in the future for a more
1798 mapping object will likely be used in the future for a more
1796 efficient/lazy code.
1799 efficient/lazy code.
1797 """
1800 """
1798 return self._revisiondata(nodeorrev, _df)[1]
1801 return self._revisiondata(nodeorrev, _df)[1]
1799
1802
1800 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1803 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1801 # deal with <nodeorrev> argument type
1804 # deal with <nodeorrev> argument type
1802 if isinstance(nodeorrev, int):
1805 if isinstance(nodeorrev, int):
1803 rev = nodeorrev
1806 rev = nodeorrev
1804 node = self.node(rev)
1807 node = self.node(rev)
1805 else:
1808 else:
1806 node = nodeorrev
1809 node = nodeorrev
1807 rev = None
1810 rev = None
1808
1811
1809 # fast path the special `nullid` rev
1812 # fast path the special `nullid` rev
1810 if node == self.nullid:
1813 if node == self.nullid:
1811 return b"", {}
1814 return b"", {}
1812
1815
1813 # ``rawtext`` is the text as stored inside the revlog. Might be the
1816 # ``rawtext`` is the text as stored inside the revlog. Might be the
1814 # revision or might need to be processed to retrieve the revision.
1817 # revision or might need to be processed to retrieve the revision.
1815 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1818 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1816
1819
1817 if self.hassidedata:
1820 if self.hassidedata:
1818 if rev is None:
1821 if rev is None:
1819 rev = self.rev(node)
1822 rev = self.rev(node)
1820 sidedata = self._sidedata(rev)
1823 sidedata = self._sidedata(rev)
1821 else:
1824 else:
1822 sidedata = {}
1825 sidedata = {}
1823
1826
1824 if raw and validated:
1827 if raw and validated:
1825 # if we don't want to process the raw text and that raw
1828 # if we don't want to process the raw text and that raw
1826 # text is cached, we can exit early.
1829 # text is cached, we can exit early.
1827 return rawtext, sidedata
1830 return rawtext, sidedata
1828 if rev is None:
1831 if rev is None:
1829 rev = self.rev(node)
1832 rev = self.rev(node)
1830 # the revlog's flag for this revision
1833 # the revlog's flag for this revision
1831 # (usually alter its state or content)
1834 # (usually alter its state or content)
1832 flags = self.flags(rev)
1835 flags = self.flags(rev)
1833
1836
1834 if validated and flags == REVIDX_DEFAULT_FLAGS:
1837 if validated and flags == REVIDX_DEFAULT_FLAGS:
1835 # no extra flags set, no flag processor runs, text = rawtext
1838 # no extra flags set, no flag processor runs, text = rawtext
1836 return rawtext, sidedata
1839 return rawtext, sidedata
1837
1840
1838 if raw:
1841 if raw:
1839 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1842 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1840 text = rawtext
1843 text = rawtext
1841 else:
1844 else:
1842 r = flagutil.processflagsread(self, rawtext, flags)
1845 r = flagutil.processflagsread(self, rawtext, flags)
1843 text, validatehash = r
1846 text, validatehash = r
1844 if validatehash:
1847 if validatehash:
1845 self.checkhash(text, node, rev=rev)
1848 self.checkhash(text, node, rev=rev)
1846 if not validated:
1849 if not validated:
1847 self._revisioncache = (node, rev, rawtext)
1850 self._revisioncache = (node, rev, rawtext)
1848
1851
1849 return text, sidedata
1852 return text, sidedata
1850
1853
1851 def _rawtext(self, node, rev, _df=None):
1854 def _rawtext(self, node, rev, _df=None):
1852 """return the possibly unvalidated rawtext for a revision
1855 """return the possibly unvalidated rawtext for a revision
1853
1856
1854 returns (rev, rawtext, validated)
1857 returns (rev, rawtext, validated)
1855 """
1858 """
1856
1859
1857 # revision in the cache (could be useful to apply delta)
1860 # revision in the cache (could be useful to apply delta)
1858 cachedrev = None
1861 cachedrev = None
1859 # An intermediate text to apply deltas to
1862 # An intermediate text to apply deltas to
1860 basetext = None
1863 basetext = None
1861
1864
1862 # Check if we have the entry in cache
1865 # Check if we have the entry in cache
1863 # The cache entry looks like (node, rev, rawtext)
1866 # The cache entry looks like (node, rev, rawtext)
1864 if self._revisioncache:
1867 if self._revisioncache:
1865 if self._revisioncache[0] == node:
1868 if self._revisioncache[0] == node:
1866 return (rev, self._revisioncache[2], True)
1869 return (rev, self._revisioncache[2], True)
1867 cachedrev = self._revisioncache[1]
1870 cachedrev = self._revisioncache[1]
1868
1871
1869 if rev is None:
1872 if rev is None:
1870 rev = self.rev(node)
1873 rev = self.rev(node)
1871
1874
1872 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1875 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1873 if stopped:
1876 if stopped:
1874 basetext = self._revisioncache[2]
1877 basetext = self._revisioncache[2]
1875
1878
1876 # drop cache to save memory, the caller is expected to
1879 # drop cache to save memory, the caller is expected to
1877 # update self._revisioncache after validating the text
1880 # update self._revisioncache after validating the text
1878 self._revisioncache = None
1881 self._revisioncache = None
1879
1882
1880 targetsize = None
1883 targetsize = None
1881 rawsize = self.index[rev][2]
1884 rawsize = self.index[rev][2]
1882 if 0 <= rawsize:
1885 if 0 <= rawsize:
1883 targetsize = 4 * rawsize
1886 targetsize = 4 * rawsize
1884
1887
1885 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1888 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1886 if basetext is None:
1889 if basetext is None:
1887 basetext = bytes(bins[0])
1890 basetext = bytes(bins[0])
1888 bins = bins[1:]
1891 bins = bins[1:]
1889
1892
1890 rawtext = mdiff.patches(basetext, bins)
1893 rawtext = mdiff.patches(basetext, bins)
1891 del basetext # let us have a chance to free memory early
1894 del basetext # let us have a chance to free memory early
1892 return (rev, rawtext, False)
1895 return (rev, rawtext, False)
1893
1896
1894 def _sidedata(self, rev):
1897 def _sidedata(self, rev):
1895 """Return the sidedata for a given revision number."""
1898 """Return the sidedata for a given revision number."""
1896 index_entry = self.index[rev]
1899 index_entry = self.index[rev]
1897 sidedata_offset = index_entry[8]
1900 sidedata_offset = index_entry[8]
1898 sidedata_size = index_entry[9]
1901 sidedata_size = index_entry[9]
1899
1902
1900 if self._inline:
1903 if self._inline:
1901 sidedata_offset += self.index.entry_size * (1 + rev)
1904 sidedata_offset += self.index.entry_size * (1 + rev)
1902 if sidedata_size == 0:
1905 if sidedata_size == 0:
1903 return {}
1906 return {}
1904
1907
1905 segment = self._getsegment(sidedata_offset, sidedata_size)
1908 segment = self._getsegment(sidedata_offset, sidedata_size)
1906 sidedata = sidedatautil.deserialize_sidedata(segment)
1909 sidedata = sidedatautil.deserialize_sidedata(segment)
1907 return sidedata
1910 return sidedata
1908
1911
1909 def rawdata(self, nodeorrev, _df=None):
1912 def rawdata(self, nodeorrev, _df=None):
1910 """return an uncompressed raw data of a given node or revision number.
1913 """return an uncompressed raw data of a given node or revision number.
1911
1914
1912 _df - an existing file handle to read from. (internal-only)
1915 _df - an existing file handle to read from. (internal-only)
1913 """
1916 """
1914 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1917 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1915
1918
1916 def hash(self, text, p1, p2):
1919 def hash(self, text, p1, p2):
1917 """Compute a node hash.
1920 """Compute a node hash.
1918
1921
1919 Available as a function so that subclasses can replace the hash
1922 Available as a function so that subclasses can replace the hash
1920 as needed.
1923 as needed.
1921 """
1924 """
1922 return storageutil.hashrevisionsha1(text, p1, p2)
1925 return storageutil.hashrevisionsha1(text, p1, p2)
1923
1926
1924 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1927 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1925 """Check node hash integrity.
1928 """Check node hash integrity.
1926
1929
1927 Available as a function so that subclasses can extend hash mismatch
1930 Available as a function so that subclasses can extend hash mismatch
1928 behaviors as needed.
1931 behaviors as needed.
1929 """
1932 """
1930 try:
1933 try:
1931 if p1 is None and p2 is None:
1934 if p1 is None and p2 is None:
1932 p1, p2 = self.parents(node)
1935 p1, p2 = self.parents(node)
1933 if node != self.hash(text, p1, p2):
1936 if node != self.hash(text, p1, p2):
1934 # Clear the revision cache on hash failure. The revision cache
1937 # Clear the revision cache on hash failure. The revision cache
1935 # only stores the raw revision and clearing the cache does have
1938 # only stores the raw revision and clearing the cache does have
1936 # the side-effect that we won't have a cache hit when the raw
1939 # the side-effect that we won't have a cache hit when the raw
1937 # revision data is accessed. But this case should be rare and
1940 # revision data is accessed. But this case should be rare and
1938 # it is extra work to teach the cache about the hash
1941 # it is extra work to teach the cache about the hash
1939 # verification state.
1942 # verification state.
1940 if self._revisioncache and self._revisioncache[0] == node:
1943 if self._revisioncache and self._revisioncache[0] == node:
1941 self._revisioncache = None
1944 self._revisioncache = None
1942
1945
1943 revornode = rev
1946 revornode = rev
1944 if revornode is None:
1947 if revornode is None:
1945 revornode = templatefilters.short(hex(node))
1948 revornode = templatefilters.short(hex(node))
1946 raise error.RevlogError(
1949 raise error.RevlogError(
1947 _(b"integrity check failed on %s:%s")
1950 _(b"integrity check failed on %s:%s")
1948 % (self.display_id, pycompat.bytestr(revornode))
1951 % (self.display_id, pycompat.bytestr(revornode))
1949 )
1952 )
1950 except error.RevlogError:
1953 except error.RevlogError:
1951 if self._censorable and storageutil.iscensoredtext(text):
1954 if self._censorable and storageutil.iscensoredtext(text):
1952 raise error.CensoredNodeError(self.display_id, node, text)
1955 raise error.CensoredNodeError(self.display_id, node, text)
1953 raise
1956 raise
1954
1957
1955 def _enforceinlinesize(self, tr, fp=None):
1958 def _enforceinlinesize(self, tr, fp=None):
1956 """Check if the revlog is too big for inline and convert if so.
1959 """Check if the revlog is too big for inline and convert if so.
1957
1960
1958 This should be called after revisions are added to the revlog. If the
1961 This should be called after revisions are added to the revlog. If the
1959 revlog has grown too large to be an inline revlog, it will convert it
1962 revlog has grown too large to be an inline revlog, it will convert it
1960 to use multiple index and data files.
1963 to use multiple index and data files.
1961 """
1964 """
1962 tiprev = len(self) - 1
1965 tiprev = len(self) - 1
1963 total_size = self.start(tiprev) + self.length(tiprev)
1966 total_size = self.start(tiprev) + self.length(tiprev)
1964 if not self._inline or total_size < _maxinline:
1967 if not self._inline or total_size < _maxinline:
1965 return
1968 return
1966
1969
1967 troffset = tr.findoffset(self._indexfile)
1970 troffset = tr.findoffset(self._indexfile)
1968 if troffset is None:
1971 if troffset is None:
1969 raise error.RevlogError(
1972 raise error.RevlogError(
1970 _(b"%s not found in the transaction") % self._indexfile
1973 _(b"%s not found in the transaction") % self._indexfile
1971 )
1974 )
1972 trindex = 0
1975 trindex = 0
1973 tr.add(self._datafile, 0)
1976 tr.add(self._datafile, 0)
1974
1977
1975 if fp:
1978 if fp:
1976 fp.flush()
1979 fp.flush()
1977 fp.close()
1980 fp.close()
1978 # We can't use the cached file handle after close(). So prevent
1981 # We can't use the cached file handle after close(). So prevent
1979 # its usage.
1982 # its usage.
1980 self._writinghandles = None
1983 self._writinghandles = None
1981
1984
1982 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1985 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1983 for r in self:
1986 for r in self:
1984 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1987 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1985 if troffset <= self.start(r):
1988 if troffset <= self.start(r):
1986 trindex = r
1989 trindex = r
1987
1990
1988 with self._indexfp(b'w') as fp:
1991 with self._indexfp(b'w') as fp:
1989 self._format_flags &= ~FLAG_INLINE_DATA
1992 self._format_flags &= ~FLAG_INLINE_DATA
1990 self._inline = False
1993 self._inline = False
1991 for i in self:
1994 for i in self:
1992 e = self.index.entry_binary(i)
1995 e = self.index.entry_binary(i)
1993 if i == 0:
1996 if i == 0:
1994 header = self._format_flags | self._format_version
1997 header = self._format_flags | self._format_version
1995 header = self.index.pack_header(header)
1998 header = self.index.pack_header(header)
1996 e = header + e
1999 e = header + e
1997 fp.write(e)
2000 fp.write(e)
1998
2001
1999 # the temp file replace the real index when we exit the context
2002 # the temp file replace the real index when we exit the context
2000 # manager
2003 # manager
2001
2004
2002 tr.replace(self._indexfile, trindex * self.index.entry_size)
2005 tr.replace(self._indexfile, trindex * self.index.entry_size)
2003 nodemaputil.setup_persistent_nodemap(tr, self)
2006 nodemaputil.setup_persistent_nodemap(tr, self)
2004 self._chunkclear()
2007 self._chunkclear()
2005
2008
2006 def _nodeduplicatecallback(self, transaction, node):
2009 def _nodeduplicatecallback(self, transaction, node):
2007 """called when trying to add a node already stored."""
2010 """called when trying to add a node already stored."""
2008
2011
2009 def addrevision(
2012 def addrevision(
2010 self,
2013 self,
2011 text,
2014 text,
2012 transaction,
2015 transaction,
2013 link,
2016 link,
2014 p1,
2017 p1,
2015 p2,
2018 p2,
2016 cachedelta=None,
2019 cachedelta=None,
2017 node=None,
2020 node=None,
2018 flags=REVIDX_DEFAULT_FLAGS,
2021 flags=REVIDX_DEFAULT_FLAGS,
2019 deltacomputer=None,
2022 deltacomputer=None,
2020 sidedata=None,
2023 sidedata=None,
2021 ):
2024 ):
2022 """add a revision to the log
2025 """add a revision to the log
2023
2026
2024 text - the revision data to add
2027 text - the revision data to add
2025 transaction - the transaction object used for rollback
2028 transaction - the transaction object used for rollback
2026 link - the linkrev data to add
2029 link - the linkrev data to add
2027 p1, p2 - the parent nodeids of the revision
2030 p1, p2 - the parent nodeids of the revision
2028 cachedelta - an optional precomputed delta
2031 cachedelta - an optional precomputed delta
2029 node - nodeid of revision; typically node is not specified, and it is
2032 node - nodeid of revision; typically node is not specified, and it is
2030 computed by default as hash(text, p1, p2), however subclasses might
2033 computed by default as hash(text, p1, p2), however subclasses might
2031 use different hashing method (and override checkhash() in such case)
2034 use different hashing method (and override checkhash() in such case)
2032 flags - the known flags to set on the revision
2035 flags - the known flags to set on the revision
2033 deltacomputer - an optional deltacomputer instance shared between
2036 deltacomputer - an optional deltacomputer instance shared between
2034 multiple calls
2037 multiple calls
2035 """
2038 """
2036 if link == nullrev:
2039 if link == nullrev:
2037 raise error.RevlogError(
2040 raise error.RevlogError(
2038 _(b"attempted to add linkrev -1 to %s") % self.display_id
2041 _(b"attempted to add linkrev -1 to %s") % self.display_id
2039 )
2042 )
2040
2043
2041 if sidedata is None:
2044 if sidedata is None:
2042 sidedata = {}
2045 sidedata = {}
2043 elif sidedata and not self.hassidedata:
2046 elif sidedata and not self.hassidedata:
2044 raise error.ProgrammingError(
2047 raise error.ProgrammingError(
2045 _(b"trying to add sidedata to a revlog who don't support them")
2048 _(b"trying to add sidedata to a revlog who don't support them")
2046 )
2049 )
2047
2050
2048 if flags:
2051 if flags:
2049 node = node or self.hash(text, p1, p2)
2052 node = node or self.hash(text, p1, p2)
2050
2053
2051 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2054 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2052
2055
2053 # If the flag processor modifies the revision data, ignore any provided
2056 # If the flag processor modifies the revision data, ignore any provided
2054 # cachedelta.
2057 # cachedelta.
2055 if rawtext != text:
2058 if rawtext != text:
2056 cachedelta = None
2059 cachedelta = None
2057
2060
2058 if len(rawtext) > _maxentrysize:
2061 if len(rawtext) > _maxentrysize:
2059 raise error.RevlogError(
2062 raise error.RevlogError(
2060 _(
2063 _(
2061 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2064 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2062 )
2065 )
2063 % (self.display_id, len(rawtext))
2066 % (self.display_id, len(rawtext))
2064 )
2067 )
2065
2068
2066 node = node or self.hash(rawtext, p1, p2)
2069 node = node or self.hash(rawtext, p1, p2)
2067 rev = self.index.get_rev(node)
2070 rev = self.index.get_rev(node)
2068 if rev is not None:
2071 if rev is not None:
2069 return rev
2072 return rev
2070
2073
2071 if validatehash:
2074 if validatehash:
2072 self.checkhash(rawtext, node, p1=p1, p2=p2)
2075 self.checkhash(rawtext, node, p1=p1, p2=p2)
2073
2076
2074 return self.addrawrevision(
2077 return self.addrawrevision(
2075 rawtext,
2078 rawtext,
2076 transaction,
2079 transaction,
2077 link,
2080 link,
2078 p1,
2081 p1,
2079 p2,
2082 p2,
2080 node,
2083 node,
2081 flags,
2084 flags,
2082 cachedelta=cachedelta,
2085 cachedelta=cachedelta,
2083 deltacomputer=deltacomputer,
2086 deltacomputer=deltacomputer,
2084 sidedata=sidedata,
2087 sidedata=sidedata,
2085 )
2088 )
2086
2089
2087 def addrawrevision(
2090 def addrawrevision(
2088 self,
2091 self,
2089 rawtext,
2092 rawtext,
2090 transaction,
2093 transaction,
2091 link,
2094 link,
2092 p1,
2095 p1,
2093 p2,
2096 p2,
2094 node,
2097 node,
2095 flags,
2098 flags,
2096 cachedelta=None,
2099 cachedelta=None,
2097 deltacomputer=None,
2100 deltacomputer=None,
2098 sidedata=None,
2101 sidedata=None,
2099 ):
2102 ):
2100 """add a raw revision with known flags, node and parents
2103 """add a raw revision with known flags, node and parents
2101 useful when reusing a revision not stored in this revlog (ex: received
2104 useful when reusing a revision not stored in this revlog (ex: received
2102 over wire, or read from an external bundle).
2105 over wire, or read from an external bundle).
2103 """
2106 """
2104 dfh = None
2107 dfh = None
2105 if not self._inline:
2108 if not self._inline:
2106 dfh = self._datafp(b"a+")
2109 dfh = self._datafp(b"a+")
2107 ifh = self._indexfp(b"a+")
2110 ifh = self._indexfp(b"a+")
2108 try:
2111 try:
2109 return self._addrevision(
2112 return self._addrevision(
2110 node,
2113 node,
2111 rawtext,
2114 rawtext,
2112 transaction,
2115 transaction,
2113 link,
2116 link,
2114 p1,
2117 p1,
2115 p2,
2118 p2,
2116 flags,
2119 flags,
2117 cachedelta,
2120 cachedelta,
2118 ifh,
2121 ifh,
2119 dfh,
2122 dfh,
2120 deltacomputer=deltacomputer,
2123 deltacomputer=deltacomputer,
2121 sidedata=sidedata,
2124 sidedata=sidedata,
2122 )
2125 )
2123 finally:
2126 finally:
2124 if dfh:
2127 if dfh:
2125 dfh.close()
2128 dfh.close()
2126 ifh.close()
2129 ifh.close()
2127
2130
2128 def compress(self, data):
2131 def compress(self, data):
2129 """Generate a possibly-compressed representation of data."""
2132 """Generate a possibly-compressed representation of data."""
2130 if not data:
2133 if not data:
2131 return b'', data
2134 return b'', data
2132
2135
2133 compressed = self._compressor.compress(data)
2136 compressed = self._compressor.compress(data)
2134
2137
2135 if compressed:
2138 if compressed:
2136 # The revlog compressor added the header in the returned data.
2139 # The revlog compressor added the header in the returned data.
2137 return b'', compressed
2140 return b'', compressed
2138
2141
2139 if data[0:1] == b'\0':
2142 if data[0:1] == b'\0':
2140 return b'', data
2143 return b'', data
2141 return b'u', data
2144 return b'u', data
2142
2145
2143 def decompress(self, data):
2146 def decompress(self, data):
2144 """Decompress a revlog chunk.
2147 """Decompress a revlog chunk.
2145
2148
2146 The chunk is expected to begin with a header identifying the
2149 The chunk is expected to begin with a header identifying the
2147 format type so it can be routed to an appropriate decompressor.
2150 format type so it can be routed to an appropriate decompressor.
2148 """
2151 """
2149 if not data:
2152 if not data:
2150 return data
2153 return data
2151
2154
2152 # Revlogs are read much more frequently than they are written and many
2155 # Revlogs are read much more frequently than they are written and many
2153 # chunks only take microseconds to decompress, so performance is
2156 # chunks only take microseconds to decompress, so performance is
2154 # important here.
2157 # important here.
2155 #
2158 #
2156 # We can make a few assumptions about revlogs:
2159 # We can make a few assumptions about revlogs:
2157 #
2160 #
2158 # 1) the majority of chunks will be compressed (as opposed to inline
2161 # 1) the majority of chunks will be compressed (as opposed to inline
2159 # raw data).
2162 # raw data).
2160 # 2) decompressing *any* data will likely by at least 10x slower than
2163 # 2) decompressing *any* data will likely by at least 10x slower than
2161 # returning raw inline data.
2164 # returning raw inline data.
2162 # 3) we want to prioritize common and officially supported compression
2165 # 3) we want to prioritize common and officially supported compression
2163 # engines
2166 # engines
2164 #
2167 #
2165 # It follows that we want to optimize for "decompress compressed data
2168 # It follows that we want to optimize for "decompress compressed data
2166 # when encoded with common and officially supported compression engines"
2169 # when encoded with common and officially supported compression engines"
2167 # case over "raw data" and "data encoded by less common or non-official
2170 # case over "raw data" and "data encoded by less common or non-official
2168 # compression engines." That is why we have the inline lookup first
2171 # compression engines." That is why we have the inline lookup first
2169 # followed by the compengines lookup.
2172 # followed by the compengines lookup.
2170 #
2173 #
2171 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2174 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2172 # compressed chunks. And this matters for changelog and manifest reads.
2175 # compressed chunks. And this matters for changelog and manifest reads.
2173 t = data[0:1]
2176 t = data[0:1]
2174
2177
2175 if t == b'x':
2178 if t == b'x':
2176 try:
2179 try:
2177 return _zlibdecompress(data)
2180 return _zlibdecompress(data)
2178 except zlib.error as e:
2181 except zlib.error as e:
2179 raise error.RevlogError(
2182 raise error.RevlogError(
2180 _(b'revlog decompress error: %s')
2183 _(b'revlog decompress error: %s')
2181 % stringutil.forcebytestr(e)
2184 % stringutil.forcebytestr(e)
2182 )
2185 )
2183 # '\0' is more common than 'u' so it goes first.
2186 # '\0' is more common than 'u' so it goes first.
2184 elif t == b'\0':
2187 elif t == b'\0':
2185 return data
2188 return data
2186 elif t == b'u':
2189 elif t == b'u':
2187 return util.buffer(data, 1)
2190 return util.buffer(data, 1)
2188
2191
2189 try:
2192 try:
2190 compressor = self._decompressors[t]
2193 compressor = self._decompressors[t]
2191 except KeyError:
2194 except KeyError:
2192 try:
2195 try:
2193 engine = util.compengines.forrevlogheader(t)
2196 engine = util.compengines.forrevlogheader(t)
2194 compressor = engine.revlogcompressor(self._compengineopts)
2197 compressor = engine.revlogcompressor(self._compengineopts)
2195 self._decompressors[t] = compressor
2198 self._decompressors[t] = compressor
2196 except KeyError:
2199 except KeyError:
2197 raise error.RevlogError(
2200 raise error.RevlogError(
2198 _(b'unknown compression type %s') % binascii.hexlify(t)
2201 _(b'unknown compression type %s') % binascii.hexlify(t)
2199 )
2202 )
2200
2203
2201 return compressor.decompress(data)
2204 return compressor.decompress(data)
2202
2205
2203 def _addrevision(
2206 def _addrevision(
2204 self,
2207 self,
2205 node,
2208 node,
2206 rawtext,
2209 rawtext,
2207 transaction,
2210 transaction,
2208 link,
2211 link,
2209 p1,
2212 p1,
2210 p2,
2213 p2,
2211 flags,
2214 flags,
2212 cachedelta,
2215 cachedelta,
2213 ifh,
2216 ifh,
2214 dfh,
2217 dfh,
2215 alwayscache=False,
2218 alwayscache=False,
2216 deltacomputer=None,
2219 deltacomputer=None,
2217 sidedata=None,
2220 sidedata=None,
2218 ):
2221 ):
2219 """internal function to add revisions to the log
2222 """internal function to add revisions to the log
2220
2223
2221 see addrevision for argument descriptions.
2224 see addrevision for argument descriptions.
2222
2225
2223 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2226 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2224
2227
2225 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2228 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2226 be used.
2229 be used.
2227
2230
2228 invariants:
2231 invariants:
2229 - rawtext is optional (can be None); if not set, cachedelta must be set.
2232 - rawtext is optional (can be None); if not set, cachedelta must be set.
2230 if both are set, they must correspond to each other.
2233 if both are set, they must correspond to each other.
2231 """
2234 """
2232 if node == self.nullid:
2235 if node == self.nullid:
2233 raise error.RevlogError(
2236 raise error.RevlogError(
2234 _(b"%s: attempt to add null revision") % self.display_id
2237 _(b"%s: attempt to add null revision") % self.display_id
2235 )
2238 )
2236 if (
2239 if (
2237 node == self.nodeconstants.wdirid
2240 node == self.nodeconstants.wdirid
2238 or node in self.nodeconstants.wdirfilenodeids
2241 or node in self.nodeconstants.wdirfilenodeids
2239 ):
2242 ):
2240 raise error.RevlogError(
2243 raise error.RevlogError(
2241 _(b"%s: attempt to add wdir revision") % self.display_id
2244 _(b"%s: attempt to add wdir revision") % self.display_id
2242 )
2245 )
2243
2246
2244 if self._inline:
2247 if self._inline:
2245 fh = ifh
2248 fh = ifh
2246 else:
2249 else:
2247 fh = dfh
2250 fh = dfh
2248
2251
2249 btext = [rawtext]
2252 btext = [rawtext]
2250
2253
2251 curr = len(self)
2254 curr = len(self)
2252 prev = curr - 1
2255 prev = curr - 1
2253
2256
2254 offset = self._get_data_offset(prev)
2257 offset = self._get_data_offset(prev)
2255
2258
2256 if self._concurrencychecker:
2259 if self._concurrencychecker:
2257 if self._inline:
2260 if self._inline:
2258 # offset is "as if" it were in the .d file, so we need to add on
2261 # offset is "as if" it were in the .d file, so we need to add on
2259 # the size of the entry metadata.
2262 # the size of the entry metadata.
2260 self._concurrencychecker(
2263 self._concurrencychecker(
2261 ifh, self._indexfile, offset + curr * self.index.entry_size
2264 ifh, self._indexfile, offset + curr * self.index.entry_size
2262 )
2265 )
2263 else:
2266 else:
2264 # Entries in the .i are a consistent size.
2267 # Entries in the .i are a consistent size.
2265 self._concurrencychecker(
2268 self._concurrencychecker(
2266 ifh, self._indexfile, curr * self.index.entry_size
2269 ifh, self._indexfile, curr * self.index.entry_size
2267 )
2270 )
2268 self._concurrencychecker(dfh, self._datafile, offset)
2271 self._concurrencychecker(dfh, self._datafile, offset)
2269
2272
2270 p1r, p2r = self.rev(p1), self.rev(p2)
2273 p1r, p2r = self.rev(p1), self.rev(p2)
2271
2274
2272 # full versions are inserted when the needed deltas
2275 # full versions are inserted when the needed deltas
2273 # become comparable to the uncompressed text
2276 # become comparable to the uncompressed text
2274 if rawtext is None:
2277 if rawtext is None:
2275 # need rawtext size, before changed by flag processors, which is
2278 # need rawtext size, before changed by flag processors, which is
2276 # the non-raw size. use revlog explicitly to avoid filelog's extra
2279 # the non-raw size. use revlog explicitly to avoid filelog's extra
2277 # logic that might remove metadata size.
2280 # logic that might remove metadata size.
2278 textlen = mdiff.patchedsize(
2281 textlen = mdiff.patchedsize(
2279 revlog.size(self, cachedelta[0]), cachedelta[1]
2282 revlog.size(self, cachedelta[0]), cachedelta[1]
2280 )
2283 )
2281 else:
2284 else:
2282 textlen = len(rawtext)
2285 textlen = len(rawtext)
2283
2286
2284 if deltacomputer is None:
2287 if deltacomputer is None:
2285 deltacomputer = deltautil.deltacomputer(self)
2288 deltacomputer = deltautil.deltacomputer(self)
2286
2289
2287 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2290 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2288
2291
2289 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2292 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2290
2293
2291 if sidedata and self.hassidedata:
2294 if sidedata and self.hassidedata:
2292 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2295 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2293 sidedata_offset = offset + deltainfo.deltalen
2296 sidedata_offset = offset + deltainfo.deltalen
2294 else:
2297 else:
2295 serialized_sidedata = b""
2298 serialized_sidedata = b""
2296 # Don't store the offset if the sidedata is empty, that way
2299 # Don't store the offset if the sidedata is empty, that way
2297 # we can easily detect empty sidedata and they will be no different
2300 # we can easily detect empty sidedata and they will be no different
2298 # than ones we manually add.
2301 # than ones we manually add.
2299 sidedata_offset = 0
2302 sidedata_offset = 0
2300
2303
2301 e = (
2304 e = (
2302 offset_type(offset, flags),
2305 offset_type(offset, flags),
2303 deltainfo.deltalen,
2306 deltainfo.deltalen,
2304 textlen,
2307 textlen,
2305 deltainfo.base,
2308 deltainfo.base,
2306 link,
2309 link,
2307 p1r,
2310 p1r,
2308 p2r,
2311 p2r,
2309 node,
2312 node,
2310 sidedata_offset,
2313 sidedata_offset,
2311 len(serialized_sidedata),
2314 len(serialized_sidedata),
2312 )
2315 )
2313
2316
2314 self.index.append(e)
2317 self.index.append(e)
2315 entry = self.index.entry_binary(curr)
2318 entry = self.index.entry_binary(curr)
2316 if curr == 0:
2319 if curr == 0:
2317 header = self._format_flags | self._format_version
2320 header = self._format_flags | self._format_version
2318 header = self.index.pack_header(header)
2321 header = self.index.pack_header(header)
2319 entry = header + entry
2322 entry = header + entry
2320 self._writeentry(
2323 self._writeentry(
2321 transaction,
2324 transaction,
2322 ifh,
2325 ifh,
2323 dfh,
2326 dfh,
2324 entry,
2327 entry,
2325 deltainfo.data,
2328 deltainfo.data,
2326 link,
2329 link,
2327 offset,
2330 offset,
2328 serialized_sidedata,
2331 serialized_sidedata,
2329 )
2332 )
2330
2333
2331 rawtext = btext[0]
2334 rawtext = btext[0]
2332
2335
2333 if alwayscache and rawtext is None:
2336 if alwayscache and rawtext is None:
2334 rawtext = deltacomputer.buildtext(revinfo, fh)
2337 rawtext = deltacomputer.buildtext(revinfo, fh)
2335
2338
2336 if type(rawtext) == bytes: # only accept immutable objects
2339 if type(rawtext) == bytes: # only accept immutable objects
2337 self._revisioncache = (node, curr, rawtext)
2340 self._revisioncache = (node, curr, rawtext)
2338 self._chainbasecache[curr] = deltainfo.chainbase
2341 self._chainbasecache[curr] = deltainfo.chainbase
2339 return curr
2342 return curr
2340
2343
2341 def _get_data_offset(self, prev):
2344 def _get_data_offset(self, prev):
2342 """Returns the current offset in the (in-transaction) data file.
2345 """Returns the current offset in the (in-transaction) data file.
2343 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2346 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2344 file to store that information: since sidedata can be rewritten to the
2347 file to store that information: since sidedata can be rewritten to the
2345 end of the data file within a transaction, you can have cases where, for
2348 end of the data file within a transaction, you can have cases where, for
2346 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2349 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2347 to `n - 1`'s sidedata being written after `n`'s data.
2350 to `n - 1`'s sidedata being written after `n`'s data.
2348
2351
2349 TODO cache this in a docket file before getting out of experimental."""
2352 TODO cache this in a docket file before getting out of experimental."""
2350 if self._format_version != REVLOGV2:
2353 if self._format_version != REVLOGV2:
2351 return self.end(prev)
2354 return self.end(prev)
2352
2355
2353 offset = 0
2356 offset = 0
2354 for rev, entry in enumerate(self.index):
2357 for rev, entry in enumerate(self.index):
2355 sidedata_end = entry[8] + entry[9]
2358 sidedata_end = entry[8] + entry[9]
2356 # Sidedata for a previous rev has potentially been written after
2359 # Sidedata for a previous rev has potentially been written after
2357 # this rev's end, so take the max.
2360 # this rev's end, so take the max.
2358 offset = max(self.end(rev), offset, sidedata_end)
2361 offset = max(self.end(rev), offset, sidedata_end)
2359 return offset
2362 return offset
2360
2363
2361 def _writeentry(
2364 def _writeentry(
2362 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2365 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2363 ):
2366 ):
2364 # Files opened in a+ mode have inconsistent behavior on various
2367 # Files opened in a+ mode have inconsistent behavior on various
2365 # platforms. Windows requires that a file positioning call be made
2368 # platforms. Windows requires that a file positioning call be made
2366 # when the file handle transitions between reads and writes. See
2369 # when the file handle transitions between reads and writes. See
2367 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2370 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2368 # platforms, Python or the platform itself can be buggy. Some versions
2371 # platforms, Python or the platform itself can be buggy. Some versions
2369 # of Solaris have been observed to not append at the end of the file
2372 # of Solaris have been observed to not append at the end of the file
2370 # if the file was seeked to before the end. See issue4943 for more.
2373 # if the file was seeked to before the end. See issue4943 for more.
2371 #
2374 #
2372 # We work around this issue by inserting a seek() before writing.
2375 # We work around this issue by inserting a seek() before writing.
2373 # Note: This is likely not necessary on Python 3. However, because
2376 # Note: This is likely not necessary on Python 3. However, because
2374 # the file handle is reused for reads and may be seeked there, we need
2377 # the file handle is reused for reads and may be seeked there, we need
2375 # to be careful before changing this.
2378 # to be careful before changing this.
2376 ifh.seek(0, os.SEEK_END)
2379 ifh.seek(0, os.SEEK_END)
2377 if dfh:
2380 if dfh:
2378 dfh.seek(0, os.SEEK_END)
2381 dfh.seek(0, os.SEEK_END)
2379
2382
2380 curr = len(self) - 1
2383 curr = len(self) - 1
2381 if not self._inline:
2384 if not self._inline:
2382 transaction.add(self._datafile, offset)
2385 transaction.add(self._datafile, offset)
2383 transaction.add(self._indexfile, curr * len(entry))
2386 transaction.add(self._indexfile, curr * len(entry))
2384 if data[0]:
2387 if data[0]:
2385 dfh.write(data[0])
2388 dfh.write(data[0])
2386 dfh.write(data[1])
2389 dfh.write(data[1])
2387 if sidedata:
2390 if sidedata:
2388 dfh.write(sidedata)
2391 dfh.write(sidedata)
2389 ifh.write(entry)
2392 ifh.write(entry)
2390 else:
2393 else:
2391 offset += curr * self.index.entry_size
2394 offset += curr * self.index.entry_size
2392 transaction.add(self._indexfile, offset)
2395 transaction.add(self._indexfile, offset)
2393 ifh.write(entry)
2396 ifh.write(entry)
2394 ifh.write(data[0])
2397 ifh.write(data[0])
2395 ifh.write(data[1])
2398 ifh.write(data[1])
2396 if sidedata:
2399 if sidedata:
2397 ifh.write(sidedata)
2400 ifh.write(sidedata)
2398 self._enforceinlinesize(transaction, ifh)
2401 self._enforceinlinesize(transaction, ifh)
2399 nodemaputil.setup_persistent_nodemap(transaction, self)
2402 nodemaputil.setup_persistent_nodemap(transaction, self)
2400
2403
2401 def addgroup(
2404 def addgroup(
2402 self,
2405 self,
2403 deltas,
2406 deltas,
2404 linkmapper,
2407 linkmapper,
2405 transaction,
2408 transaction,
2406 alwayscache=False,
2409 alwayscache=False,
2407 addrevisioncb=None,
2410 addrevisioncb=None,
2408 duplicaterevisioncb=None,
2411 duplicaterevisioncb=None,
2409 ):
2412 ):
2410 """
2413 """
2411 add a delta group
2414 add a delta group
2412
2415
2413 given a set of deltas, add them to the revision log. the
2416 given a set of deltas, add them to the revision log. the
2414 first delta is against its parent, which should be in our
2417 first delta is against its parent, which should be in our
2415 log, the rest are against the previous delta.
2418 log, the rest are against the previous delta.
2416
2419
2417 If ``addrevisioncb`` is defined, it will be called with arguments of
2420 If ``addrevisioncb`` is defined, it will be called with arguments of
2418 this revlog and the node that was added.
2421 this revlog and the node that was added.
2419 """
2422 """
2420
2423
2421 if self._writinghandles:
2424 if self._writinghandles:
2422 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2425 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2423
2426
2424 r = len(self)
2427 r = len(self)
2425 end = 0
2428 end = 0
2426 if r:
2429 if r:
2427 end = self.end(r - 1)
2430 end = self.end(r - 1)
2428 ifh = self._indexfp(b"a+")
2431 ifh = self._indexfp(b"a+")
2429 isize = r * self.index.entry_size
2432 isize = r * self.index.entry_size
2430 if self._inline:
2433 if self._inline:
2431 transaction.add(self._indexfile, end + isize)
2434 transaction.add(self._indexfile, end + isize)
2432 dfh = None
2435 dfh = None
2433 else:
2436 else:
2434 transaction.add(self._indexfile, isize)
2437 transaction.add(self._indexfile, isize)
2435 transaction.add(self._datafile, end)
2438 transaction.add(self._datafile, end)
2436 dfh = self._datafp(b"a+")
2439 dfh = self._datafp(b"a+")
2437
2440
2438 self._writinghandles = (ifh, dfh)
2441 self._writinghandles = (ifh, dfh)
2439 empty = True
2442 empty = True
2440
2443
2441 try:
2444 try:
2442 deltacomputer = deltautil.deltacomputer(self)
2445 deltacomputer = deltautil.deltacomputer(self)
2443 # loop through our set of deltas
2446 # loop through our set of deltas
2444 for data in deltas:
2447 for data in deltas:
2445 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2448 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2446 link = linkmapper(linknode)
2449 link = linkmapper(linknode)
2447 flags = flags or REVIDX_DEFAULT_FLAGS
2450 flags = flags or REVIDX_DEFAULT_FLAGS
2448
2451
2449 rev = self.index.get_rev(node)
2452 rev = self.index.get_rev(node)
2450 if rev is not None:
2453 if rev is not None:
2451 # this can happen if two branches make the same change
2454 # this can happen if two branches make the same change
2452 self._nodeduplicatecallback(transaction, rev)
2455 self._nodeduplicatecallback(transaction, rev)
2453 if duplicaterevisioncb:
2456 if duplicaterevisioncb:
2454 duplicaterevisioncb(self, rev)
2457 duplicaterevisioncb(self, rev)
2455 empty = False
2458 empty = False
2456 continue
2459 continue
2457
2460
2458 for p in (p1, p2):
2461 for p in (p1, p2):
2459 if not self.index.has_node(p):
2462 if not self.index.has_node(p):
2460 raise error.LookupError(
2463 raise error.LookupError(
2461 p, self.radix, _(b'unknown parent')
2464 p, self.radix, _(b'unknown parent')
2462 )
2465 )
2463
2466
2464 if not self.index.has_node(deltabase):
2467 if not self.index.has_node(deltabase):
2465 raise error.LookupError(
2468 raise error.LookupError(
2466 deltabase, self.display_id, _(b'unknown delta base')
2469 deltabase, self.display_id, _(b'unknown delta base')
2467 )
2470 )
2468
2471
2469 baserev = self.rev(deltabase)
2472 baserev = self.rev(deltabase)
2470
2473
2471 if baserev != nullrev and self.iscensored(baserev):
2474 if baserev != nullrev and self.iscensored(baserev):
2472 # if base is censored, delta must be full replacement in a
2475 # if base is censored, delta must be full replacement in a
2473 # single patch operation
2476 # single patch operation
2474 hlen = struct.calcsize(b">lll")
2477 hlen = struct.calcsize(b">lll")
2475 oldlen = self.rawsize(baserev)
2478 oldlen = self.rawsize(baserev)
2476 newlen = len(delta) - hlen
2479 newlen = len(delta) - hlen
2477 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2480 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2478 raise error.CensoredBaseError(
2481 raise error.CensoredBaseError(
2479 self.display_id, self.node(baserev)
2482 self.display_id, self.node(baserev)
2480 )
2483 )
2481
2484
2482 if not flags and self._peek_iscensored(baserev, delta):
2485 if not flags and self._peek_iscensored(baserev, delta):
2483 flags |= REVIDX_ISCENSORED
2486 flags |= REVIDX_ISCENSORED
2484
2487
2485 # We assume consumers of addrevisioncb will want to retrieve
2488 # We assume consumers of addrevisioncb will want to retrieve
2486 # the added revision, which will require a call to
2489 # the added revision, which will require a call to
2487 # revision(). revision() will fast path if there is a cache
2490 # revision(). revision() will fast path if there is a cache
2488 # hit. So, we tell _addrevision() to always cache in this case.
2491 # hit. So, we tell _addrevision() to always cache in this case.
2489 # We're only using addgroup() in the context of changegroup
2492 # We're only using addgroup() in the context of changegroup
2490 # generation so the revision data can always be handled as raw
2493 # generation so the revision data can always be handled as raw
2491 # by the flagprocessor.
2494 # by the flagprocessor.
2492 rev = self._addrevision(
2495 rev = self._addrevision(
2493 node,
2496 node,
2494 None,
2497 None,
2495 transaction,
2498 transaction,
2496 link,
2499 link,
2497 p1,
2500 p1,
2498 p2,
2501 p2,
2499 flags,
2502 flags,
2500 (baserev, delta),
2503 (baserev, delta),
2501 ifh,
2504 ifh,
2502 dfh,
2505 dfh,
2503 alwayscache=alwayscache,
2506 alwayscache=alwayscache,
2504 deltacomputer=deltacomputer,
2507 deltacomputer=deltacomputer,
2505 sidedata=sidedata,
2508 sidedata=sidedata,
2506 )
2509 )
2507
2510
2508 if addrevisioncb:
2511 if addrevisioncb:
2509 addrevisioncb(self, rev)
2512 addrevisioncb(self, rev)
2510 empty = False
2513 empty = False
2511
2514
2512 if not dfh and not self._inline:
2515 if not dfh and not self._inline:
2513 # addrevision switched from inline to conventional
2516 # addrevision switched from inline to conventional
2514 # reopen the index
2517 # reopen the index
2515 ifh.close()
2518 ifh.close()
2516 dfh = self._datafp(b"a+")
2519 dfh = self._datafp(b"a+")
2517 ifh = self._indexfp(b"a+")
2520 ifh = self._indexfp(b"a+")
2518 self._writinghandles = (ifh, dfh)
2521 self._writinghandles = (ifh, dfh)
2519 finally:
2522 finally:
2520 self._writinghandles = None
2523 self._writinghandles = None
2521
2524
2522 if dfh:
2525 if dfh:
2523 dfh.close()
2526 dfh.close()
2524 ifh.close()
2527 ifh.close()
2525 return not empty
2528 return not empty
2526
2529
2527 def iscensored(self, rev):
2530 def iscensored(self, rev):
2528 """Check if a file revision is censored."""
2531 """Check if a file revision is censored."""
2529 if not self._censorable:
2532 if not self._censorable:
2530 return False
2533 return False
2531
2534
2532 return self.flags(rev) & REVIDX_ISCENSORED
2535 return self.flags(rev) & REVIDX_ISCENSORED
2533
2536
2534 def _peek_iscensored(self, baserev, delta):
2537 def _peek_iscensored(self, baserev, delta):
2535 """Quickly check if a delta produces a censored revision."""
2538 """Quickly check if a delta produces a censored revision."""
2536 if not self._censorable:
2539 if not self._censorable:
2537 return False
2540 return False
2538
2541
2539 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2542 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2540
2543
2541 def getstrippoint(self, minlink):
2544 def getstrippoint(self, minlink):
2542 """find the minimum rev that must be stripped to strip the linkrev
2545 """find the minimum rev that must be stripped to strip the linkrev
2543
2546
2544 Returns a tuple containing the minimum rev and a set of all revs that
2547 Returns a tuple containing the minimum rev and a set of all revs that
2545 have linkrevs that will be broken by this strip.
2548 have linkrevs that will be broken by this strip.
2546 """
2549 """
2547 return storageutil.resolvestripinfo(
2550 return storageutil.resolvestripinfo(
2548 minlink,
2551 minlink,
2549 len(self) - 1,
2552 len(self) - 1,
2550 self.headrevs(),
2553 self.headrevs(),
2551 self.linkrev,
2554 self.linkrev,
2552 self.parentrevs,
2555 self.parentrevs,
2553 )
2556 )
2554
2557
2555 def strip(self, minlink, transaction):
2558 def strip(self, minlink, transaction):
2556 """truncate the revlog on the first revision with a linkrev >= minlink
2559 """truncate the revlog on the first revision with a linkrev >= minlink
2557
2560
2558 This function is called when we're stripping revision minlink and
2561 This function is called when we're stripping revision minlink and
2559 its descendants from the repository.
2562 its descendants from the repository.
2560
2563
2561 We have to remove all revisions with linkrev >= minlink, because
2564 We have to remove all revisions with linkrev >= minlink, because
2562 the equivalent changelog revisions will be renumbered after the
2565 the equivalent changelog revisions will be renumbered after the
2563 strip.
2566 strip.
2564
2567
2565 So we truncate the revlog on the first of these revisions, and
2568 So we truncate the revlog on the first of these revisions, and
2566 trust that the caller has saved the revisions that shouldn't be
2569 trust that the caller has saved the revisions that shouldn't be
2567 removed and that it'll re-add them after this truncation.
2570 removed and that it'll re-add them after this truncation.
2568 """
2571 """
2569 if len(self) == 0:
2572 if len(self) == 0:
2570 return
2573 return
2571
2574
2572 rev, _ = self.getstrippoint(minlink)
2575 rev, _ = self.getstrippoint(minlink)
2573 if rev == len(self):
2576 if rev == len(self):
2574 return
2577 return
2575
2578
2576 # first truncate the files on disk
2579 # first truncate the files on disk
2577 end = self.start(rev)
2580 end = self.start(rev)
2578 if not self._inline:
2581 if not self._inline:
2579 transaction.add(self._datafile, end)
2582 transaction.add(self._datafile, end)
2580 end = rev * self.index.entry_size
2583 end = rev * self.index.entry_size
2581 else:
2584 else:
2582 end += rev * self.index.entry_size
2585 end += rev * self.index.entry_size
2583
2586
2584 transaction.add(self._indexfile, end)
2587 transaction.add(self._indexfile, end)
2585
2588
2586 # then reset internal state in memory to forget those revisions
2589 # then reset internal state in memory to forget those revisions
2587 self._revisioncache = None
2590 self._revisioncache = None
2588 self._chaininfocache = util.lrucachedict(500)
2591 self._chaininfocache = util.lrucachedict(500)
2589 self._chunkclear()
2592 self._chunkclear()
2590
2593
2591 del self.index[rev:-1]
2594 del self.index[rev:-1]
2592
2595
2593 def checksize(self):
2596 def checksize(self):
2594 """Check size of index and data files
2597 """Check size of index and data files
2595
2598
2596 return a (dd, di) tuple.
2599 return a (dd, di) tuple.
2597 - dd: extra bytes for the "data" file
2600 - dd: extra bytes for the "data" file
2598 - di: extra bytes for the "index" file
2601 - di: extra bytes for the "index" file
2599
2602
2600 A healthy revlog will return (0, 0).
2603 A healthy revlog will return (0, 0).
2601 """
2604 """
2602 expected = 0
2605 expected = 0
2603 if len(self):
2606 if len(self):
2604 expected = max(0, self.end(len(self) - 1))
2607 expected = max(0, self.end(len(self) - 1))
2605
2608
2606 try:
2609 try:
2607 with self._datafp() as f:
2610 with self._datafp() as f:
2608 f.seek(0, io.SEEK_END)
2611 f.seek(0, io.SEEK_END)
2609 actual = f.tell()
2612 actual = f.tell()
2610 dd = actual - expected
2613 dd = actual - expected
2611 except IOError as inst:
2614 except IOError as inst:
2612 if inst.errno != errno.ENOENT:
2615 if inst.errno != errno.ENOENT:
2613 raise
2616 raise
2614 dd = 0
2617 dd = 0
2615
2618
2616 try:
2619 try:
2617 f = self.opener(self._indexfile)
2620 f = self.opener(self._indexfile)
2618 f.seek(0, io.SEEK_END)
2621 f.seek(0, io.SEEK_END)
2619 actual = f.tell()
2622 actual = f.tell()
2620 f.close()
2623 f.close()
2621 s = self.index.entry_size
2624 s = self.index.entry_size
2622 i = max(0, actual // s)
2625 i = max(0, actual // s)
2623 di = actual - (i * s)
2626 di = actual - (i * s)
2624 if self._inline:
2627 if self._inline:
2625 databytes = 0
2628 databytes = 0
2626 for r in self:
2629 for r in self:
2627 databytes += max(0, self.length(r))
2630 databytes += max(0, self.length(r))
2628 dd = 0
2631 dd = 0
2629 di = actual - len(self) * s - databytes
2632 di = actual - len(self) * s - databytes
2630 except IOError as inst:
2633 except IOError as inst:
2631 if inst.errno != errno.ENOENT:
2634 if inst.errno != errno.ENOENT:
2632 raise
2635 raise
2633 di = 0
2636 di = 0
2634
2637
2635 return (dd, di)
2638 return (dd, di)
2636
2639
2637 def files(self):
2640 def files(self):
2638 res = [self._indexfile]
2641 res = [self._indexfile]
2639 if not self._inline:
2642 if not self._inline:
2640 res.append(self._datafile)
2643 res.append(self._datafile)
2641 return res
2644 return res
2642
2645
2643 def emitrevisions(
2646 def emitrevisions(
2644 self,
2647 self,
2645 nodes,
2648 nodes,
2646 nodesorder=None,
2649 nodesorder=None,
2647 revisiondata=False,
2650 revisiondata=False,
2648 assumehaveparentrevisions=False,
2651 assumehaveparentrevisions=False,
2649 deltamode=repository.CG_DELTAMODE_STD,
2652 deltamode=repository.CG_DELTAMODE_STD,
2650 sidedata_helpers=None,
2653 sidedata_helpers=None,
2651 ):
2654 ):
2652 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2655 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2653 raise error.ProgrammingError(
2656 raise error.ProgrammingError(
2654 b'unhandled value for nodesorder: %s' % nodesorder
2657 b'unhandled value for nodesorder: %s' % nodesorder
2655 )
2658 )
2656
2659
2657 if nodesorder is None and not self._generaldelta:
2660 if nodesorder is None and not self._generaldelta:
2658 nodesorder = b'storage'
2661 nodesorder = b'storage'
2659
2662
2660 if (
2663 if (
2661 not self._storedeltachains
2664 not self._storedeltachains
2662 and deltamode != repository.CG_DELTAMODE_PREV
2665 and deltamode != repository.CG_DELTAMODE_PREV
2663 ):
2666 ):
2664 deltamode = repository.CG_DELTAMODE_FULL
2667 deltamode = repository.CG_DELTAMODE_FULL
2665
2668
2666 return storageutil.emitrevisions(
2669 return storageutil.emitrevisions(
2667 self,
2670 self,
2668 nodes,
2671 nodes,
2669 nodesorder,
2672 nodesorder,
2670 revlogrevisiondelta,
2673 revlogrevisiondelta,
2671 deltaparentfn=self.deltaparent,
2674 deltaparentfn=self.deltaparent,
2672 candeltafn=self.candelta,
2675 candeltafn=self.candelta,
2673 rawsizefn=self.rawsize,
2676 rawsizefn=self.rawsize,
2674 revdifffn=self.revdiff,
2677 revdifffn=self.revdiff,
2675 flagsfn=self.flags,
2678 flagsfn=self.flags,
2676 deltamode=deltamode,
2679 deltamode=deltamode,
2677 revisiondata=revisiondata,
2680 revisiondata=revisiondata,
2678 assumehaveparentrevisions=assumehaveparentrevisions,
2681 assumehaveparentrevisions=assumehaveparentrevisions,
2679 sidedata_helpers=sidedata_helpers,
2682 sidedata_helpers=sidedata_helpers,
2680 )
2683 )
2681
2684
2682 DELTAREUSEALWAYS = b'always'
2685 DELTAREUSEALWAYS = b'always'
2683 DELTAREUSESAMEREVS = b'samerevs'
2686 DELTAREUSESAMEREVS = b'samerevs'
2684 DELTAREUSENEVER = b'never'
2687 DELTAREUSENEVER = b'never'
2685
2688
2686 DELTAREUSEFULLADD = b'fulladd'
2689 DELTAREUSEFULLADD = b'fulladd'
2687
2690
2688 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2691 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2689
2692
2690 def clone(
2693 def clone(
2691 self,
2694 self,
2692 tr,
2695 tr,
2693 destrevlog,
2696 destrevlog,
2694 addrevisioncb=None,
2697 addrevisioncb=None,
2695 deltareuse=DELTAREUSESAMEREVS,
2698 deltareuse=DELTAREUSESAMEREVS,
2696 forcedeltabothparents=None,
2699 forcedeltabothparents=None,
2697 sidedata_helpers=None,
2700 sidedata_helpers=None,
2698 ):
2701 ):
2699 """Copy this revlog to another, possibly with format changes.
2702 """Copy this revlog to another, possibly with format changes.
2700
2703
2701 The destination revlog will contain the same revisions and nodes.
2704 The destination revlog will contain the same revisions and nodes.
2702 However, it may not be bit-for-bit identical due to e.g. delta encoding
2705 However, it may not be bit-for-bit identical due to e.g. delta encoding
2703 differences.
2706 differences.
2704
2707
2705 The ``deltareuse`` argument control how deltas from the existing revlog
2708 The ``deltareuse`` argument control how deltas from the existing revlog
2706 are preserved in the destination revlog. The argument can have the
2709 are preserved in the destination revlog. The argument can have the
2707 following values:
2710 following values:
2708
2711
2709 DELTAREUSEALWAYS
2712 DELTAREUSEALWAYS
2710 Deltas will always be reused (if possible), even if the destination
2713 Deltas will always be reused (if possible), even if the destination
2711 revlog would not select the same revisions for the delta. This is the
2714 revlog would not select the same revisions for the delta. This is the
2712 fastest mode of operation.
2715 fastest mode of operation.
2713 DELTAREUSESAMEREVS
2716 DELTAREUSESAMEREVS
2714 Deltas will be reused if the destination revlog would pick the same
2717 Deltas will be reused if the destination revlog would pick the same
2715 revisions for the delta. This mode strikes a balance between speed
2718 revisions for the delta. This mode strikes a balance between speed
2716 and optimization.
2719 and optimization.
2717 DELTAREUSENEVER
2720 DELTAREUSENEVER
2718 Deltas will never be reused. This is the slowest mode of execution.
2721 Deltas will never be reused. This is the slowest mode of execution.
2719 This mode can be used to recompute deltas (e.g. if the diff/delta
2722 This mode can be used to recompute deltas (e.g. if the diff/delta
2720 algorithm changes).
2723 algorithm changes).
2721 DELTAREUSEFULLADD
2724 DELTAREUSEFULLADD
2722 Revision will be re-added as if their were new content. This is
2725 Revision will be re-added as if their were new content. This is
2723 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2726 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2724 eg: large file detection and handling.
2727 eg: large file detection and handling.
2725
2728
2726 Delta computation can be slow, so the choice of delta reuse policy can
2729 Delta computation can be slow, so the choice of delta reuse policy can
2727 significantly affect run time.
2730 significantly affect run time.
2728
2731
2729 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2732 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2730 two extremes. Deltas will be reused if they are appropriate. But if the
2733 two extremes. Deltas will be reused if they are appropriate. But if the
2731 delta could choose a better revision, it will do so. This means if you
2734 delta could choose a better revision, it will do so. This means if you
2732 are converting a non-generaldelta revlog to a generaldelta revlog,
2735 are converting a non-generaldelta revlog to a generaldelta revlog,
2733 deltas will be recomputed if the delta's parent isn't a parent of the
2736 deltas will be recomputed if the delta's parent isn't a parent of the
2734 revision.
2737 revision.
2735
2738
2736 In addition to the delta policy, the ``forcedeltabothparents``
2739 In addition to the delta policy, the ``forcedeltabothparents``
2737 argument controls whether to force compute deltas against both parents
2740 argument controls whether to force compute deltas against both parents
2738 for merges. By default, the current default is used.
2741 for merges. By default, the current default is used.
2739
2742
2740 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2743 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2741 `sidedata_helpers`.
2744 `sidedata_helpers`.
2742 """
2745 """
2743 if deltareuse not in self.DELTAREUSEALL:
2746 if deltareuse not in self.DELTAREUSEALL:
2744 raise ValueError(
2747 raise ValueError(
2745 _(b'value for deltareuse invalid: %s') % deltareuse
2748 _(b'value for deltareuse invalid: %s') % deltareuse
2746 )
2749 )
2747
2750
2748 if len(destrevlog):
2751 if len(destrevlog):
2749 raise ValueError(_(b'destination revlog is not empty'))
2752 raise ValueError(_(b'destination revlog is not empty'))
2750
2753
2751 if getattr(self, 'filteredrevs', None):
2754 if getattr(self, 'filteredrevs', None):
2752 raise ValueError(_(b'source revlog has filtered revisions'))
2755 raise ValueError(_(b'source revlog has filtered revisions'))
2753 if getattr(destrevlog, 'filteredrevs', None):
2756 if getattr(destrevlog, 'filteredrevs', None):
2754 raise ValueError(_(b'destination revlog has filtered revisions'))
2757 raise ValueError(_(b'destination revlog has filtered revisions'))
2755
2758
2756 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2759 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2757 # if possible.
2760 # if possible.
2758 oldlazydelta = destrevlog._lazydelta
2761 oldlazydelta = destrevlog._lazydelta
2759 oldlazydeltabase = destrevlog._lazydeltabase
2762 oldlazydeltabase = destrevlog._lazydeltabase
2760 oldamd = destrevlog._deltabothparents
2763 oldamd = destrevlog._deltabothparents
2761
2764
2762 try:
2765 try:
2763 if deltareuse == self.DELTAREUSEALWAYS:
2766 if deltareuse == self.DELTAREUSEALWAYS:
2764 destrevlog._lazydeltabase = True
2767 destrevlog._lazydeltabase = True
2765 destrevlog._lazydelta = True
2768 destrevlog._lazydelta = True
2766 elif deltareuse == self.DELTAREUSESAMEREVS:
2769 elif deltareuse == self.DELTAREUSESAMEREVS:
2767 destrevlog._lazydeltabase = False
2770 destrevlog._lazydeltabase = False
2768 destrevlog._lazydelta = True
2771 destrevlog._lazydelta = True
2769 elif deltareuse == self.DELTAREUSENEVER:
2772 elif deltareuse == self.DELTAREUSENEVER:
2770 destrevlog._lazydeltabase = False
2773 destrevlog._lazydeltabase = False
2771 destrevlog._lazydelta = False
2774 destrevlog._lazydelta = False
2772
2775
2773 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2776 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2774
2777
2775 self._clone(
2778 self._clone(
2776 tr,
2779 tr,
2777 destrevlog,
2780 destrevlog,
2778 addrevisioncb,
2781 addrevisioncb,
2779 deltareuse,
2782 deltareuse,
2780 forcedeltabothparents,
2783 forcedeltabothparents,
2781 sidedata_helpers,
2784 sidedata_helpers,
2782 )
2785 )
2783
2786
2784 finally:
2787 finally:
2785 destrevlog._lazydelta = oldlazydelta
2788 destrevlog._lazydelta = oldlazydelta
2786 destrevlog._lazydeltabase = oldlazydeltabase
2789 destrevlog._lazydeltabase = oldlazydeltabase
2787 destrevlog._deltabothparents = oldamd
2790 destrevlog._deltabothparents = oldamd
2788
2791
2789 def _clone(
2792 def _clone(
2790 self,
2793 self,
2791 tr,
2794 tr,
2792 destrevlog,
2795 destrevlog,
2793 addrevisioncb,
2796 addrevisioncb,
2794 deltareuse,
2797 deltareuse,
2795 forcedeltabothparents,
2798 forcedeltabothparents,
2796 sidedata_helpers,
2799 sidedata_helpers,
2797 ):
2800 ):
2798 """perform the core duty of `revlog.clone` after parameter processing"""
2801 """perform the core duty of `revlog.clone` after parameter processing"""
2799 deltacomputer = deltautil.deltacomputer(destrevlog)
2802 deltacomputer = deltautil.deltacomputer(destrevlog)
2800 index = self.index
2803 index = self.index
2801 for rev in self:
2804 for rev in self:
2802 entry = index[rev]
2805 entry = index[rev]
2803
2806
2804 # Some classes override linkrev to take filtered revs into
2807 # Some classes override linkrev to take filtered revs into
2805 # account. Use raw entry from index.
2808 # account. Use raw entry from index.
2806 flags = entry[0] & 0xFFFF
2809 flags = entry[0] & 0xFFFF
2807 linkrev = entry[4]
2810 linkrev = entry[4]
2808 p1 = index[entry[5]][7]
2811 p1 = index[entry[5]][7]
2809 p2 = index[entry[6]][7]
2812 p2 = index[entry[6]][7]
2810 node = entry[7]
2813 node = entry[7]
2811
2814
2812 # (Possibly) reuse the delta from the revlog if allowed and
2815 # (Possibly) reuse the delta from the revlog if allowed and
2813 # the revlog chunk is a delta.
2816 # the revlog chunk is a delta.
2814 cachedelta = None
2817 cachedelta = None
2815 rawtext = None
2818 rawtext = None
2816 if deltareuse == self.DELTAREUSEFULLADD:
2819 if deltareuse == self.DELTAREUSEFULLADD:
2817 text, sidedata = self._revisiondata(rev)
2820 text, sidedata = self._revisiondata(rev)
2818
2821
2819 if sidedata_helpers is not None:
2822 if sidedata_helpers is not None:
2820 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2823 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2821 self, sidedata_helpers, sidedata, rev
2824 self, sidedata_helpers, sidedata, rev
2822 )
2825 )
2823 flags = flags | new_flags[0] & ~new_flags[1]
2826 flags = flags | new_flags[0] & ~new_flags[1]
2824
2827
2825 destrevlog.addrevision(
2828 destrevlog.addrevision(
2826 text,
2829 text,
2827 tr,
2830 tr,
2828 linkrev,
2831 linkrev,
2829 p1,
2832 p1,
2830 p2,
2833 p2,
2831 cachedelta=cachedelta,
2834 cachedelta=cachedelta,
2832 node=node,
2835 node=node,
2833 flags=flags,
2836 flags=flags,
2834 deltacomputer=deltacomputer,
2837 deltacomputer=deltacomputer,
2835 sidedata=sidedata,
2838 sidedata=sidedata,
2836 )
2839 )
2837 else:
2840 else:
2838 if destrevlog._lazydelta:
2841 if destrevlog._lazydelta:
2839 dp = self.deltaparent(rev)
2842 dp = self.deltaparent(rev)
2840 if dp != nullrev:
2843 if dp != nullrev:
2841 cachedelta = (dp, bytes(self._chunk(rev)))
2844 cachedelta = (dp, bytes(self._chunk(rev)))
2842
2845
2843 sidedata = None
2846 sidedata = None
2844 if not cachedelta:
2847 if not cachedelta:
2845 rawtext, sidedata = self._revisiondata(rev)
2848 rawtext, sidedata = self._revisiondata(rev)
2846 if sidedata is None:
2849 if sidedata is None:
2847 sidedata = self.sidedata(rev)
2850 sidedata = self.sidedata(rev)
2848
2851
2849 if sidedata_helpers is not None:
2852 if sidedata_helpers is not None:
2850 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2853 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2851 self, sidedata_helpers, sidedata, rev
2854 self, sidedata_helpers, sidedata, rev
2852 )
2855 )
2853 flags = flags | new_flags[0] & ~new_flags[1]
2856 flags = flags | new_flags[0] & ~new_flags[1]
2854
2857
2855 ifh = destrevlog.opener(
2858 ifh = destrevlog.opener(
2856 destrevlog._indexfile, b'a+', checkambig=False
2859 destrevlog._indexfile, b'a+', checkambig=False
2857 )
2860 )
2858 dfh = None
2861 dfh = None
2859 if not destrevlog._inline:
2862 if not destrevlog._inline:
2860 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2863 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2861 try:
2864 try:
2862 destrevlog._addrevision(
2865 destrevlog._addrevision(
2863 node,
2866 node,
2864 rawtext,
2867 rawtext,
2865 tr,
2868 tr,
2866 linkrev,
2869 linkrev,
2867 p1,
2870 p1,
2868 p2,
2871 p2,
2869 flags,
2872 flags,
2870 cachedelta,
2873 cachedelta,
2871 ifh,
2874 ifh,
2872 dfh,
2875 dfh,
2873 deltacomputer=deltacomputer,
2876 deltacomputer=deltacomputer,
2874 sidedata=sidedata,
2877 sidedata=sidedata,
2875 )
2878 )
2876 finally:
2879 finally:
2877 if dfh:
2880 if dfh:
2878 dfh.close()
2881 dfh.close()
2879 ifh.close()
2882 ifh.close()
2880
2883
2881 if addrevisioncb:
2884 if addrevisioncb:
2882 addrevisioncb(self, rev, node)
2885 addrevisioncb(self, rev, node)
2883
2886
2884 def censorrevision(self, tr, censornode, tombstone=b''):
2887 def censorrevision(self, tr, censornode, tombstone=b''):
2885 if self._format_version == REVLOGV0:
2888 if self._format_version == REVLOGV0:
2886 raise error.RevlogError(
2889 raise error.RevlogError(
2887 _(b'cannot censor with version %d revlogs')
2890 _(b'cannot censor with version %d revlogs')
2888 % self._format_version
2891 % self._format_version
2889 )
2892 )
2890
2893
2891 censorrev = self.rev(censornode)
2894 censorrev = self.rev(censornode)
2892 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2895 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2893
2896
2894 if len(tombstone) > self.rawsize(censorrev):
2897 if len(tombstone) > self.rawsize(censorrev):
2895 raise error.Abort(
2898 raise error.Abort(
2896 _(b'censor tombstone must be no longer than censored data')
2899 _(b'censor tombstone must be no longer than censored data')
2897 )
2900 )
2898
2901
2899 # Rewriting the revlog in place is hard. Our strategy for censoring is
2902 # Rewriting the revlog in place is hard. Our strategy for censoring is
2900 # to create a new revlog, copy all revisions to it, then replace the
2903 # to create a new revlog, copy all revisions to it, then replace the
2901 # revlogs on transaction close.
2904 # revlogs on transaction close.
2902 #
2905 #
2903 # This is a bit dangerous. We could easily have a mismatch of state.
2906 # This is a bit dangerous. We could easily have a mismatch of state.
2904 newrl = revlog(
2907 newrl = revlog(
2905 self.opener,
2908 self.opener,
2906 target=self.target,
2909 target=self.target,
2907 radix=self.radix,
2910 radix=self.radix,
2908 postfix=b'tmpcensored',
2911 postfix=b'tmpcensored',
2909 censorable=True,
2912 censorable=True,
2910 )
2913 )
2911 newrl._format_version = self._format_version
2914 newrl._format_version = self._format_version
2912 newrl._format_flags = self._format_flags
2915 newrl._format_flags = self._format_flags
2913 newrl._generaldelta = self._generaldelta
2916 newrl._generaldelta = self._generaldelta
2914 newrl._parse_index = self._parse_index
2917 newrl._parse_index = self._parse_index
2915
2918
2916 for rev in self.revs():
2919 for rev in self.revs():
2917 node = self.node(rev)
2920 node = self.node(rev)
2918 p1, p2 = self.parents(node)
2921 p1, p2 = self.parents(node)
2919
2922
2920 if rev == censorrev:
2923 if rev == censorrev:
2921 newrl.addrawrevision(
2924 newrl.addrawrevision(
2922 tombstone,
2925 tombstone,
2923 tr,
2926 tr,
2924 self.linkrev(censorrev),
2927 self.linkrev(censorrev),
2925 p1,
2928 p1,
2926 p2,
2929 p2,
2927 censornode,
2930 censornode,
2928 REVIDX_ISCENSORED,
2931 REVIDX_ISCENSORED,
2929 )
2932 )
2930
2933
2931 if newrl.deltaparent(rev) != nullrev:
2934 if newrl.deltaparent(rev) != nullrev:
2932 raise error.Abort(
2935 raise error.Abort(
2933 _(
2936 _(
2934 b'censored revision stored as delta; '
2937 b'censored revision stored as delta; '
2935 b'cannot censor'
2938 b'cannot censor'
2936 ),
2939 ),
2937 hint=_(
2940 hint=_(
2938 b'censoring of revlogs is not '
2941 b'censoring of revlogs is not '
2939 b'fully implemented; please report '
2942 b'fully implemented; please report '
2940 b'this bug'
2943 b'this bug'
2941 ),
2944 ),
2942 )
2945 )
2943 continue
2946 continue
2944
2947
2945 if self.iscensored(rev):
2948 if self.iscensored(rev):
2946 if self.deltaparent(rev) != nullrev:
2949 if self.deltaparent(rev) != nullrev:
2947 raise error.Abort(
2950 raise error.Abort(
2948 _(
2951 _(
2949 b'cannot censor due to censored '
2952 b'cannot censor due to censored '
2950 b'revision having delta stored'
2953 b'revision having delta stored'
2951 )
2954 )
2952 )
2955 )
2953 rawtext = self._chunk(rev)
2956 rawtext = self._chunk(rev)
2954 else:
2957 else:
2955 rawtext = self.rawdata(rev)
2958 rawtext = self.rawdata(rev)
2956
2959
2957 newrl.addrawrevision(
2960 newrl.addrawrevision(
2958 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2961 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2959 )
2962 )
2960
2963
2961 tr.addbackup(self._indexfile, location=b'store')
2964 tr.addbackup(self._indexfile, location=b'store')
2962 if not self._inline:
2965 if not self._inline:
2963 tr.addbackup(self._datafile, location=b'store')
2966 tr.addbackup(self._datafile, location=b'store')
2964
2967
2965 self.opener.rename(newrl._indexfile, self._indexfile)
2968 self.opener.rename(newrl._indexfile, self._indexfile)
2966 if not self._inline:
2969 if not self._inline:
2967 self.opener.rename(newrl._datafile, self._datafile)
2970 self.opener.rename(newrl._datafile, self._datafile)
2968
2971
2969 self.clearcaches()
2972 self.clearcaches()
2970 self._loadindex()
2973 self._loadindex()
2971
2974
2972 def verifyintegrity(self, state):
2975 def verifyintegrity(self, state):
2973 """Verifies the integrity of the revlog.
2976 """Verifies the integrity of the revlog.
2974
2977
2975 Yields ``revlogproblem`` instances describing problems that are
2978 Yields ``revlogproblem`` instances describing problems that are
2976 found.
2979 found.
2977 """
2980 """
2978 dd, di = self.checksize()
2981 dd, di = self.checksize()
2979 if dd:
2982 if dd:
2980 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2983 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2981 if di:
2984 if di:
2982 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2985 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2983
2986
2984 version = self._format_version
2987 version = self._format_version
2985
2988
2986 # The verifier tells us what version revlog we should be.
2989 # The verifier tells us what version revlog we should be.
2987 if version != state[b'expectedversion']:
2990 if version != state[b'expectedversion']:
2988 yield revlogproblem(
2991 yield revlogproblem(
2989 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2992 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2990 % (self.display_id, version, state[b'expectedversion'])
2993 % (self.display_id, version, state[b'expectedversion'])
2991 )
2994 )
2992
2995
2993 state[b'skipread'] = set()
2996 state[b'skipread'] = set()
2994 state[b'safe_renamed'] = set()
2997 state[b'safe_renamed'] = set()
2995
2998
2996 for rev in self:
2999 for rev in self:
2997 node = self.node(rev)
3000 node = self.node(rev)
2998
3001
2999 # Verify contents. 4 cases to care about:
3002 # Verify contents. 4 cases to care about:
3000 #
3003 #
3001 # common: the most common case
3004 # common: the most common case
3002 # rename: with a rename
3005 # rename: with a rename
3003 # meta: file content starts with b'\1\n', the metadata
3006 # meta: file content starts with b'\1\n', the metadata
3004 # header defined in filelog.py, but without a rename
3007 # header defined in filelog.py, but without a rename
3005 # ext: content stored externally
3008 # ext: content stored externally
3006 #
3009 #
3007 # More formally, their differences are shown below:
3010 # More formally, their differences are shown below:
3008 #
3011 #
3009 # | common | rename | meta | ext
3012 # | common | rename | meta | ext
3010 # -------------------------------------------------------
3013 # -------------------------------------------------------
3011 # flags() | 0 | 0 | 0 | not 0
3014 # flags() | 0 | 0 | 0 | not 0
3012 # renamed() | False | True | False | ?
3015 # renamed() | False | True | False | ?
3013 # rawtext[0:2]=='\1\n'| False | True | True | ?
3016 # rawtext[0:2]=='\1\n'| False | True | True | ?
3014 #
3017 #
3015 # "rawtext" means the raw text stored in revlog data, which
3018 # "rawtext" means the raw text stored in revlog data, which
3016 # could be retrieved by "rawdata(rev)". "text"
3019 # could be retrieved by "rawdata(rev)". "text"
3017 # mentioned below is "revision(rev)".
3020 # mentioned below is "revision(rev)".
3018 #
3021 #
3019 # There are 3 different lengths stored physically:
3022 # There are 3 different lengths stored physically:
3020 # 1. L1: rawsize, stored in revlog index
3023 # 1. L1: rawsize, stored in revlog index
3021 # 2. L2: len(rawtext), stored in revlog data
3024 # 2. L2: len(rawtext), stored in revlog data
3022 # 3. L3: len(text), stored in revlog data if flags==0, or
3025 # 3. L3: len(text), stored in revlog data if flags==0, or
3023 # possibly somewhere else if flags!=0
3026 # possibly somewhere else if flags!=0
3024 #
3027 #
3025 # L1 should be equal to L2. L3 could be different from them.
3028 # L1 should be equal to L2. L3 could be different from them.
3026 # "text" may or may not affect commit hash depending on flag
3029 # "text" may or may not affect commit hash depending on flag
3027 # processors (see flagutil.addflagprocessor).
3030 # processors (see flagutil.addflagprocessor).
3028 #
3031 #
3029 # | common | rename | meta | ext
3032 # | common | rename | meta | ext
3030 # -------------------------------------------------
3033 # -------------------------------------------------
3031 # rawsize() | L1 | L1 | L1 | L1
3034 # rawsize() | L1 | L1 | L1 | L1
3032 # size() | L1 | L2-LM | L1(*) | L1 (?)
3035 # size() | L1 | L2-LM | L1(*) | L1 (?)
3033 # len(rawtext) | L2 | L2 | L2 | L2
3036 # len(rawtext) | L2 | L2 | L2 | L2
3034 # len(text) | L2 | L2 | L2 | L3
3037 # len(text) | L2 | L2 | L2 | L3
3035 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3038 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3036 #
3039 #
3037 # LM: length of metadata, depending on rawtext
3040 # LM: length of metadata, depending on rawtext
3038 # (*): not ideal, see comment in filelog.size
3041 # (*): not ideal, see comment in filelog.size
3039 # (?): could be "- len(meta)" if the resolved content has
3042 # (?): could be "- len(meta)" if the resolved content has
3040 # rename metadata
3043 # rename metadata
3041 #
3044 #
3042 # Checks needed to be done:
3045 # Checks needed to be done:
3043 # 1. length check: L1 == L2, in all cases.
3046 # 1. length check: L1 == L2, in all cases.
3044 # 2. hash check: depending on flag processor, we may need to
3047 # 2. hash check: depending on flag processor, we may need to
3045 # use either "text" (external), or "rawtext" (in revlog).
3048 # use either "text" (external), or "rawtext" (in revlog).
3046
3049
3047 try:
3050 try:
3048 skipflags = state.get(b'skipflags', 0)
3051 skipflags = state.get(b'skipflags', 0)
3049 if skipflags:
3052 if skipflags:
3050 skipflags &= self.flags(rev)
3053 skipflags &= self.flags(rev)
3051
3054
3052 _verify_revision(self, skipflags, state, node)
3055 _verify_revision(self, skipflags, state, node)
3053
3056
3054 l1 = self.rawsize(rev)
3057 l1 = self.rawsize(rev)
3055 l2 = len(self.rawdata(node))
3058 l2 = len(self.rawdata(node))
3056
3059
3057 if l1 != l2:
3060 if l1 != l2:
3058 yield revlogproblem(
3061 yield revlogproblem(
3059 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3062 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3060 node=node,
3063 node=node,
3061 )
3064 )
3062
3065
3063 except error.CensoredNodeError:
3066 except error.CensoredNodeError:
3064 if state[b'erroroncensored']:
3067 if state[b'erroroncensored']:
3065 yield revlogproblem(
3068 yield revlogproblem(
3066 error=_(b'censored file data'), node=node
3069 error=_(b'censored file data'), node=node
3067 )
3070 )
3068 state[b'skipread'].add(node)
3071 state[b'skipread'].add(node)
3069 except Exception as e:
3072 except Exception as e:
3070 yield revlogproblem(
3073 yield revlogproblem(
3071 error=_(b'unpacking %s: %s')
3074 error=_(b'unpacking %s: %s')
3072 % (short(node), stringutil.forcebytestr(e)),
3075 % (short(node), stringutil.forcebytestr(e)),
3073 node=node,
3076 node=node,
3074 )
3077 )
3075 state[b'skipread'].add(node)
3078 state[b'skipread'].add(node)
3076
3079
3077 def storageinfo(
3080 def storageinfo(
3078 self,
3081 self,
3079 exclusivefiles=False,
3082 exclusivefiles=False,
3080 sharedfiles=False,
3083 sharedfiles=False,
3081 revisionscount=False,
3084 revisionscount=False,
3082 trackedsize=False,
3085 trackedsize=False,
3083 storedsize=False,
3086 storedsize=False,
3084 ):
3087 ):
3085 d = {}
3088 d = {}
3086
3089
3087 if exclusivefiles:
3090 if exclusivefiles:
3088 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3091 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3089 if not self._inline:
3092 if not self._inline:
3090 d[b'exclusivefiles'].append((self.opener, self._datafile))
3093 d[b'exclusivefiles'].append((self.opener, self._datafile))
3091
3094
3092 if sharedfiles:
3095 if sharedfiles:
3093 d[b'sharedfiles'] = []
3096 d[b'sharedfiles'] = []
3094
3097
3095 if revisionscount:
3098 if revisionscount:
3096 d[b'revisionscount'] = len(self)
3099 d[b'revisionscount'] = len(self)
3097
3100
3098 if trackedsize:
3101 if trackedsize:
3099 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3102 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3100
3103
3101 if storedsize:
3104 if storedsize:
3102 d[b'storedsize'] = sum(
3105 d[b'storedsize'] = sum(
3103 self.opener.stat(path).st_size for path in self.files()
3106 self.opener.stat(path).st_size for path in self.files()
3104 )
3107 )
3105
3108
3106 return d
3109 return d
3107
3110
3108 def rewrite_sidedata(self, helpers, startrev, endrev):
3111 def rewrite_sidedata(self, helpers, startrev, endrev):
3109 if not self.hassidedata:
3112 if not self.hassidedata:
3110 return
3113 return
3111 # inline are not yet supported because they suffer from an issue when
3114 # inline are not yet supported because they suffer from an issue when
3112 # rewriting them (since it's not an append-only operation).
3115 # rewriting them (since it's not an append-only operation).
3113 # See issue6485.
3116 # See issue6485.
3114 assert not self._inline
3117 assert not self._inline
3115 if not helpers[1] and not helpers[2]:
3118 if not helpers[1] and not helpers[2]:
3116 # Nothing to generate or remove
3119 # Nothing to generate or remove
3117 return
3120 return
3118
3121
3119 # changelog implement some "delayed" writing mechanism that assume that
3122 # changelog implement some "delayed" writing mechanism that assume that
3120 # all index data is writen in append mode and is therefor incompatible
3123 # all index data is writen in append mode and is therefor incompatible
3121 # with the seeked write done in this method. The use of such "delayed"
3124 # with the seeked write done in this method. The use of such "delayed"
3122 # writing will soon be removed for revlog version that support side
3125 # writing will soon be removed for revlog version that support side
3123 # data, so for now, we only keep this simple assert to highlight the
3126 # data, so for now, we only keep this simple assert to highlight the
3124 # situation.
3127 # situation.
3125 delayed = getattr(self, '_delayed', False)
3128 delayed = getattr(self, '_delayed', False)
3126 diverted = getattr(self, '_divert', False)
3129 diverted = getattr(self, '_divert', False)
3127 if delayed and not diverted:
3130 if delayed and not diverted:
3128 msg = "cannot rewrite_sidedata of a delayed revlog"
3131 msg = "cannot rewrite_sidedata of a delayed revlog"
3129 raise error.ProgrammingError(msg)
3132 raise error.ProgrammingError(msg)
3130
3133
3131 new_entries = []
3134 new_entries = []
3132 # append the new sidedata
3135 # append the new sidedata
3133 with self._datafp(b'a+') as fp:
3136 with self._datafp(b'a+') as fp:
3134 # Maybe this bug still exists, see revlog._writeentry
3137 # Maybe this bug still exists, see revlog._writeentry
3135 fp.seek(0, os.SEEK_END)
3138 fp.seek(0, os.SEEK_END)
3136 current_offset = fp.tell()
3139 current_offset = fp.tell()
3137 for rev in range(startrev, endrev + 1):
3140 for rev in range(startrev, endrev + 1):
3138 entry = self.index[rev]
3141 entry = self.index[rev]
3139 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3142 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3140 store=self,
3143 store=self,
3141 sidedata_helpers=helpers,
3144 sidedata_helpers=helpers,
3142 sidedata={},
3145 sidedata={},
3143 rev=rev,
3146 rev=rev,
3144 )
3147 )
3145
3148
3146 serialized_sidedata = sidedatautil.serialize_sidedata(
3149 serialized_sidedata = sidedatautil.serialize_sidedata(
3147 new_sidedata
3150 new_sidedata
3148 )
3151 )
3149 if entry[8] != 0 or entry[9] != 0:
3152 if entry[8] != 0 or entry[9] != 0:
3150 # rewriting entries that already have sidedata is not
3153 # rewriting entries that already have sidedata is not
3151 # supported yet, because it introduces garbage data in the
3154 # supported yet, because it introduces garbage data in the
3152 # revlog.
3155 # revlog.
3153 msg = b"Rewriting existing sidedata is not supported yet"
3156 msg = b"Rewriting existing sidedata is not supported yet"
3154 raise error.Abort(msg)
3157 raise error.Abort(msg)
3155
3158
3156 # Apply (potential) flags to add and to remove after running
3159 # Apply (potential) flags to add and to remove after running
3157 # the sidedata helpers
3160 # the sidedata helpers
3158 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3161 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3159 entry = (new_offset_flags,) + entry[1:8]
3162 entry = (new_offset_flags,) + entry[1:8]
3160 entry += (current_offset, len(serialized_sidedata))
3163 entry += (current_offset, len(serialized_sidedata))
3161
3164
3162 fp.write(serialized_sidedata)
3165 fp.write(serialized_sidedata)
3163 new_entries.append(entry)
3166 new_entries.append(entry)
3164 current_offset += len(serialized_sidedata)
3167 current_offset += len(serialized_sidedata)
3165
3168
3166 # rewrite the new index entries
3169 # rewrite the new index entries
3167 with self._indexfp(b'r+') as fp:
3170 with self._indexfp(b'r+') as fp:
3168 fp.seek(startrev * self.index.entry_size)
3171 fp.seek(startrev * self.index.entry_size)
3169 for i, e in enumerate(new_entries):
3172 for i, e in enumerate(new_entries):
3170 rev = startrev + i
3173 rev = startrev + i
3171 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3174 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3172 packed = self.index.entry_binary(rev)
3175 packed = self.index.entry_binary(rev)
3173 if rev == 0:
3176 if rev == 0:
3174 header = self._format_flags | self._format_version
3177 header = self._format_flags | self._format_version
3175 header = self.index.pack_header(header)
3178 header = self.index.pack_header(header)
3176 packed = header + packed
3179 packed = header + packed
3177 fp.write(packed)
3180 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now