##// END OF EJS Templates
revlog: define the actual index and datafile at loading time...
marmoute -
r47939:cacb08f3 default
parent child Browse files
Show More
@@ -1,3173 +1,3177 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 if postfix is None:
318 self._indexfile = None
319 indexfile = b'%s.i' % self.radix
319 self._datafile = None
320 datafile = b'%s.d' % self.radix
321 elif postfix == b'a':
322 indexfile = b'%s.i.a' % self.radix
323 datafile = b'%s.d' % self.radix
324 else:
325 indexfile = b'%s.i.%s' % (self.radix, postfix)
326 datafile = b'%s.d.%s' % (self.radix, postfix)
327
328 self._indexfile = indexfile
329 self._datafile = datafile
330 self._nodemap_file = None
320 self._nodemap_file = None
331 self.postfix = postfix
321 self.postfix = postfix
332 self.opener = opener
322 self.opener = opener
333 if persistentnodemap:
323 if persistentnodemap:
334 self._nodemap_file = nodemaputil.get_nodemap_file(self)
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
335
325
336 assert target[0] in ALL_KINDS
326 assert target[0] in ALL_KINDS
337 assert len(target) == 2
327 assert len(target) == 2
338 self.target = target
328 self.target = target
339 # When True, indexfile is opened with checkambig=True at writing, to
329 # When True, indexfile is opened with checkambig=True at writing, to
340 # avoid file stat ambiguity.
330 # avoid file stat ambiguity.
341 self._checkambig = checkambig
331 self._checkambig = checkambig
342 self._mmaplargeindex = mmaplargeindex
332 self._mmaplargeindex = mmaplargeindex
343 self._censorable = censorable
333 self._censorable = censorable
344 # 3-tuple of (node, rev, text) for a raw revision.
334 # 3-tuple of (node, rev, text) for a raw revision.
345 self._revisioncache = None
335 self._revisioncache = None
346 # Maps rev to chain base rev.
336 # Maps rev to chain base rev.
347 self._chainbasecache = util.lrucachedict(100)
337 self._chainbasecache = util.lrucachedict(100)
348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
349 self._chunkcache = (0, b'')
339 self._chunkcache = (0, b'')
350 # How much data to read and cache into the raw revlog data cache.
340 # How much data to read and cache into the raw revlog data cache.
351 self._chunkcachesize = 65536
341 self._chunkcachesize = 65536
352 self._maxchainlen = None
342 self._maxchainlen = None
353 self._deltabothparents = True
343 self._deltabothparents = True
354 self.index = None
344 self.index = None
355 self._nodemap_docket = None
345 self._nodemap_docket = None
356 # Mapping of partial identifiers to full nodes.
346 # Mapping of partial identifiers to full nodes.
357 self._pcache = {}
347 self._pcache = {}
358 # Mapping of revision integer to full node.
348 # Mapping of revision integer to full node.
359 self._compengine = b'zlib'
349 self._compengine = b'zlib'
360 self._compengineopts = {}
350 self._compengineopts = {}
361 self._maxdeltachainspan = -1
351 self._maxdeltachainspan = -1
362 self._withsparseread = False
352 self._withsparseread = False
363 self._sparserevlog = False
353 self._sparserevlog = False
364 self._srdensitythreshold = 0.50
354 self._srdensitythreshold = 0.50
365 self._srmingapsize = 262144
355 self._srmingapsize = 262144
366
356
367 # Make copy of flag processors so each revlog instance can support
357 # Make copy of flag processors so each revlog instance can support
368 # custom flags.
358 # custom flags.
369 self._flagprocessors = dict(flagutil.flagprocessors)
359 self._flagprocessors = dict(flagutil.flagprocessors)
370
360
371 # 2-tuple of file handles being used for active writing.
361 # 2-tuple of file handles being used for active writing.
372 self._writinghandles = None
362 self._writinghandles = None
373
363
374 self._loadindex()
364 self._loadindex()
375
365
376 self._concurrencychecker = concurrencychecker
366 self._concurrencychecker = concurrencychecker
377
367
378 def _init_opts(self):
368 def _init_opts(self):
379 """process options (from above/config) to setup associated default revlog mode
369 """process options (from above/config) to setup associated default revlog mode
380
370
381 These values might be affected when actually reading on disk information.
371 These values might be affected when actually reading on disk information.
382
372
383 The relevant values are returned for use in _loadindex().
373 The relevant values are returned for use in _loadindex().
384
374
385 * newversionflags:
375 * newversionflags:
386 version header to use if we need to create a new revlog
376 version header to use if we need to create a new revlog
387
377
388 * mmapindexthreshold:
378 * mmapindexthreshold:
389 minimal index size for start to use mmap
379 minimal index size for start to use mmap
390
380
391 * force_nodemap:
381 * force_nodemap:
392 force the usage of a "development" version of the nodemap code
382 force the usage of a "development" version of the nodemap code
393 """
383 """
394 mmapindexthreshold = None
384 mmapindexthreshold = None
395 opts = self.opener.options
385 opts = self.opener.options
396
386
397 if b'revlogv2' in opts:
387 if b'revlogv2' in opts:
398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
388 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
399 elif b'revlogv1' in opts:
389 elif b'revlogv1' in opts:
400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
390 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
401 if b'generaldelta' in opts:
391 if b'generaldelta' in opts:
402 newversionflags |= FLAG_GENERALDELTA
392 newversionflags |= FLAG_GENERALDELTA
403 elif b'revlogv0' in self.opener.options:
393 elif b'revlogv0' in self.opener.options:
404 newversionflags = REVLOGV0
394 newversionflags = REVLOGV0
405 else:
395 else:
406 newversionflags = REVLOG_DEFAULT_VERSION
396 newversionflags = REVLOG_DEFAULT_VERSION
407
397
408 if b'chunkcachesize' in opts:
398 if b'chunkcachesize' in opts:
409 self._chunkcachesize = opts[b'chunkcachesize']
399 self._chunkcachesize = opts[b'chunkcachesize']
410 if b'maxchainlen' in opts:
400 if b'maxchainlen' in opts:
411 self._maxchainlen = opts[b'maxchainlen']
401 self._maxchainlen = opts[b'maxchainlen']
412 if b'deltabothparents' in opts:
402 if b'deltabothparents' in opts:
413 self._deltabothparents = opts[b'deltabothparents']
403 self._deltabothparents = opts[b'deltabothparents']
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
404 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydeltabase = False
405 self._lazydeltabase = False
416 if self._lazydelta:
406 if self._lazydelta:
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
407 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 if b'compengine' in opts:
408 if b'compengine' in opts:
419 self._compengine = opts[b'compengine']
409 self._compengine = opts[b'compengine']
420 if b'zlib.level' in opts:
410 if b'zlib.level' in opts:
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
411 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 if b'zstd.level' in opts:
412 if b'zstd.level' in opts:
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
413 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 if b'maxdeltachainspan' in opts:
414 if b'maxdeltachainspan' in opts:
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
415 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
416 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 mmapindexthreshold = opts[b'mmapindexthreshold']
417 mmapindexthreshold = opts[b'mmapindexthreshold']
428 self.hassidedata = bool(opts.get(b'side-data', False))
418 self.hassidedata = bool(opts.get(b'side-data', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
419 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
420 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 # sparse-revlog forces sparse-read
421 # sparse-revlog forces sparse-read
432 self._withsparseread = self._sparserevlog or withsparseread
422 self._withsparseread = self._sparserevlog or withsparseread
433 if b'sparse-read-density-threshold' in opts:
423 if b'sparse-read-density-threshold' in opts:
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
424 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 if b'sparse-read-min-gap-size' in opts:
425 if b'sparse-read-min-gap-size' in opts:
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
426 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 if opts.get(b'enableellipsis'):
427 if opts.get(b'enableellipsis'):
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439
429
440 # revlog v0 doesn't have flag processors
430 # revlog v0 doesn't have flag processors
441 for flag, processor in pycompat.iteritems(
431 for flag, processor in pycompat.iteritems(
442 opts.get(b'flagprocessors', {})
432 opts.get(b'flagprocessors', {})
443 ):
433 ):
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
434 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445
435
446 if self._chunkcachesize <= 0:
436 if self._chunkcachesize <= 0:
447 raise error.RevlogError(
437 raise error.RevlogError(
448 _(b'revlog chunk cache size %r is not greater than 0')
438 _(b'revlog chunk cache size %r is not greater than 0')
449 % self._chunkcachesize
439 % self._chunkcachesize
450 )
440 )
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
441 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 raise error.RevlogError(
442 raise error.RevlogError(
453 _(b'revlog chunk cache size %r is not a power of 2')
443 _(b'revlog chunk cache size %r is not a power of 2')
454 % self._chunkcachesize
444 % self._chunkcachesize
455 )
445 )
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
446 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 return newversionflags, mmapindexthreshold, force_nodemap
447 return newversionflags, mmapindexthreshold, force_nodemap
458
448
459 def _loadindex(self):
449 def _loadindex(self):
460
450
461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
451 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
452
453 if self.postfix is None:
454 index_file = b'%s.i' % self.radix
455 data_file = b'%s.d' % self.radix
456 elif self.postfix == b'a':
457 index_file = b'%s.i.a' % self.radix
458 data_file = b'%s.d' % self.radix
459 else:
460 index_file = b'%s.i.%s' % (self.radix, self.postfix)
461 data_file = b'%s.d.%s' % (self.radix, self.postfix)
462
463 self._indexfile = index_file
464 self._datafile = data_file
465
462 indexdata = b''
466 indexdata = b''
463 self._initempty = True
467 self._initempty = True
464 try:
468 try:
465 with self._indexfp() as f:
469 with self._indexfp() as f:
466 if (
470 if (
467 mmapindexthreshold is not None
471 mmapindexthreshold is not None
468 and self.opener.fstat(f).st_size >= mmapindexthreshold
472 and self.opener.fstat(f).st_size >= mmapindexthreshold
469 ):
473 ):
470 # TODO: should .close() to release resources without
474 # TODO: should .close() to release resources without
471 # relying on Python GC
475 # relying on Python GC
472 indexdata = util.buffer(util.mmapread(f))
476 indexdata = util.buffer(util.mmapread(f))
473 else:
477 else:
474 indexdata = f.read()
478 indexdata = f.read()
475 if len(indexdata) > 0:
479 if len(indexdata) > 0:
476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
480 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
477 self._initempty = False
481 self._initempty = False
478 else:
482 else:
479 versionflags = newversionflags
483 versionflags = newversionflags
480 except IOError as inst:
484 except IOError as inst:
481 if inst.errno != errno.ENOENT:
485 if inst.errno != errno.ENOENT:
482 raise
486 raise
483
487
484 versionflags = newversionflags
488 versionflags = newversionflags
485
489
486 flags = self._format_flags = versionflags & ~0xFFFF
490 flags = self._format_flags = versionflags & ~0xFFFF
487 fmt = self._format_version = versionflags & 0xFFFF
491 fmt = self._format_version = versionflags & 0xFFFF
488
492
489 if fmt == REVLOGV0:
493 if fmt == REVLOGV0:
490 if flags:
494 if flags:
491 raise error.RevlogError(
495 raise error.RevlogError(
492 _(b'unknown flags (%#04x) in version %d revlog %s')
496 _(b'unknown flags (%#04x) in version %d revlog %s')
493 % (flags >> 16, fmt, self.display_id)
497 % (flags >> 16, fmt, self.display_id)
494 )
498 )
495
499
496 self._inline = False
500 self._inline = False
497 self._generaldelta = False
501 self._generaldelta = False
498
502
499 elif fmt == REVLOGV1:
503 elif fmt == REVLOGV1:
500 if flags & ~REVLOGV1_FLAGS:
504 if flags & ~REVLOGV1_FLAGS:
501 raise error.RevlogError(
505 raise error.RevlogError(
502 _(b'unknown flags (%#04x) in version %d revlog %s')
506 _(b'unknown flags (%#04x) in version %d revlog %s')
503 % (flags >> 16, fmt, self.display_id)
507 % (flags >> 16, fmt, self.display_id)
504 )
508 )
505
509
506 self._inline = versionflags & FLAG_INLINE_DATA
510 self._inline = versionflags & FLAG_INLINE_DATA
507 self._generaldelta = versionflags & FLAG_GENERALDELTA
511 self._generaldelta = versionflags & FLAG_GENERALDELTA
508
512
509 elif fmt == REVLOGV2:
513 elif fmt == REVLOGV2:
510 if flags & ~REVLOGV2_FLAGS:
514 if flags & ~REVLOGV2_FLAGS:
511 raise error.RevlogError(
515 raise error.RevlogError(
512 _(b'unknown flags (%#04x) in version %d revlog %s')
516 _(b'unknown flags (%#04x) in version %d revlog %s')
513 % (flags >> 16, fmt, self.display_id)
517 % (flags >> 16, fmt, self.display_id)
514 )
518 )
515
519
516 # There is a bug in the transaction handling when going from an
520 # There is a bug in the transaction handling when going from an
517 # inline revlog to a separate index and data file. Turn it off until
521 # inline revlog to a separate index and data file. Turn it off until
518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
522 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
519 # See issue6485
523 # See issue6485
520 self._inline = False
524 self._inline = False
521 # generaldelta implied by version 2 revlogs.
525 # generaldelta implied by version 2 revlogs.
522 self._generaldelta = True
526 self._generaldelta = True
523
527
524 else:
528 else:
525 raise error.RevlogError(
529 raise error.RevlogError(
526 _(b'unknown version (%d) in revlog %s') % (fmt, self.display_id)
530 _(b'unknown version (%d) in revlog %s') % (fmt, self.display_id)
527 )
531 )
528
532
529 self.nodeconstants = sha1nodeconstants
533 self.nodeconstants = sha1nodeconstants
530 self.nullid = self.nodeconstants.nullid
534 self.nullid = self.nodeconstants.nullid
531
535
532 # sparse-revlog can't be on without general-delta (issue6056)
536 # sparse-revlog can't be on without general-delta (issue6056)
533 if not self._generaldelta:
537 if not self._generaldelta:
534 self._sparserevlog = False
538 self._sparserevlog = False
535
539
536 self._storedeltachains = True
540 self._storedeltachains = True
537
541
538 devel_nodemap = (
542 devel_nodemap = (
539 self._nodemap_file
543 self._nodemap_file
540 and force_nodemap
544 and force_nodemap
541 and parse_index_v1_nodemap is not None
545 and parse_index_v1_nodemap is not None
542 )
546 )
543
547
544 use_rust_index = False
548 use_rust_index = False
545 if rustrevlog is not None:
549 if rustrevlog is not None:
546 if self._nodemap_file is not None:
550 if self._nodemap_file is not None:
547 use_rust_index = True
551 use_rust_index = True
548 else:
552 else:
549 use_rust_index = self.opener.options.get(b'rust.index')
553 use_rust_index = self.opener.options.get(b'rust.index')
550
554
551 self._parse_index = parse_index_v1
555 self._parse_index = parse_index_v1
552 if self._format_version == REVLOGV0:
556 if self._format_version == REVLOGV0:
553 self._parse_index = revlogv0.parse_index_v0
557 self._parse_index = revlogv0.parse_index_v0
554 elif fmt == REVLOGV2:
558 elif fmt == REVLOGV2:
555 self._parse_index = parse_index_v2
559 self._parse_index = parse_index_v2
556 elif devel_nodemap:
560 elif devel_nodemap:
557 self._parse_index = parse_index_v1_nodemap
561 self._parse_index = parse_index_v1_nodemap
558 elif use_rust_index:
562 elif use_rust_index:
559 self._parse_index = parse_index_v1_mixed
563 self._parse_index = parse_index_v1_mixed
560 try:
564 try:
561 d = self._parse_index(indexdata, self._inline)
565 d = self._parse_index(indexdata, self._inline)
562 index, _chunkcache = d
566 index, _chunkcache = d
563 use_nodemap = (
567 use_nodemap = (
564 not self._inline
568 not self._inline
565 and self._nodemap_file is not None
569 and self._nodemap_file is not None
566 and util.safehasattr(index, 'update_nodemap_data')
570 and util.safehasattr(index, 'update_nodemap_data')
567 )
571 )
568 if use_nodemap:
572 if use_nodemap:
569 nodemap_data = nodemaputil.persisted_data(self)
573 nodemap_data = nodemaputil.persisted_data(self)
570 if nodemap_data is not None:
574 if nodemap_data is not None:
571 docket = nodemap_data[0]
575 docket = nodemap_data[0]
572 if (
576 if (
573 len(d[0]) > docket.tip_rev
577 len(d[0]) > docket.tip_rev
574 and d[0][docket.tip_rev][7] == docket.tip_node
578 and d[0][docket.tip_rev][7] == docket.tip_node
575 ):
579 ):
576 # no changelog tampering
580 # no changelog tampering
577 self._nodemap_docket = docket
581 self._nodemap_docket = docket
578 index.update_nodemap_data(*nodemap_data)
582 index.update_nodemap_data(*nodemap_data)
579 except (ValueError, IndexError):
583 except (ValueError, IndexError):
580 raise error.RevlogError(
584 raise error.RevlogError(
581 _(b"index %s is corrupted") % self.display_id
585 _(b"index %s is corrupted") % self.display_id
582 )
586 )
583 self.index, self._chunkcache = d
587 self.index, self._chunkcache = d
584 if not self._chunkcache:
588 if not self._chunkcache:
585 self._chunkclear()
589 self._chunkclear()
586 # revnum -> (chain-length, sum-delta-length)
590 # revnum -> (chain-length, sum-delta-length)
587 self._chaininfocache = util.lrucachedict(500)
591 self._chaininfocache = util.lrucachedict(500)
588 # revlog header -> revlog compressor
592 # revlog header -> revlog compressor
589 self._decompressors = {}
593 self._decompressors = {}
590
594
591 @util.propertycache
595 @util.propertycache
592 def revlog_kind(self):
596 def revlog_kind(self):
593 return self.target[0]
597 return self.target[0]
594
598
595 @util.propertycache
599 @util.propertycache
596 def display_id(self):
600 def display_id(self):
597 """The public facing "ID" of the revlog that we use in message"""
601 """The public facing "ID" of the revlog that we use in message"""
598 # Maybe we should build a user facing representation of
602 # Maybe we should build a user facing representation of
599 # revlog.target instead of using `self.radix`
603 # revlog.target instead of using `self.radix`
600 return self.radix
604 return self.radix
601
605
602 @util.propertycache
606 @util.propertycache
603 def _compressor(self):
607 def _compressor(self):
604 engine = util.compengines[self._compengine]
608 engine = util.compengines[self._compengine]
605 return engine.revlogcompressor(self._compengineopts)
609 return engine.revlogcompressor(self._compengineopts)
606
610
607 def _indexfp(self, mode=b'r'):
611 def _indexfp(self, mode=b'r'):
608 """file object for the revlog's index file"""
612 """file object for the revlog's index file"""
609 args = {'mode': mode}
613 args = {'mode': mode}
610 if mode != b'r':
614 if mode != b'r':
611 args['checkambig'] = self._checkambig
615 args['checkambig'] = self._checkambig
612 if mode == b'w':
616 if mode == b'w':
613 args['atomictemp'] = True
617 args['atomictemp'] = True
614 return self.opener(self._indexfile, **args)
618 return self.opener(self._indexfile, **args)
615
619
616 def _datafp(self, mode=b'r'):
620 def _datafp(self, mode=b'r'):
617 """file object for the revlog's data file"""
621 """file object for the revlog's data file"""
618 return self.opener(self._datafile, mode=mode)
622 return self.opener(self._datafile, mode=mode)
619
623
620 @contextlib.contextmanager
624 @contextlib.contextmanager
621 def _datareadfp(self, existingfp=None):
625 def _datareadfp(self, existingfp=None):
622 """file object suitable to read data"""
626 """file object suitable to read data"""
623 # Use explicit file handle, if given.
627 # Use explicit file handle, if given.
624 if existingfp is not None:
628 if existingfp is not None:
625 yield existingfp
629 yield existingfp
626
630
627 # Use a file handle being actively used for writes, if available.
631 # Use a file handle being actively used for writes, if available.
628 # There is some danger to doing this because reads will seek the
632 # There is some danger to doing this because reads will seek the
629 # file. However, _writeentry() performs a SEEK_END before all writes,
633 # file. However, _writeentry() performs a SEEK_END before all writes,
630 # so we should be safe.
634 # so we should be safe.
631 elif self._writinghandles:
635 elif self._writinghandles:
632 if self._inline:
636 if self._inline:
633 yield self._writinghandles[0]
637 yield self._writinghandles[0]
634 else:
638 else:
635 yield self._writinghandles[1]
639 yield self._writinghandles[1]
636
640
637 # Otherwise open a new file handle.
641 # Otherwise open a new file handle.
638 else:
642 else:
639 if self._inline:
643 if self._inline:
640 func = self._indexfp
644 func = self._indexfp
641 else:
645 else:
642 func = self._datafp
646 func = self._datafp
643 with func() as fp:
647 with func() as fp:
644 yield fp
648 yield fp
645
649
646 def tiprev(self):
650 def tiprev(self):
647 return len(self.index) - 1
651 return len(self.index) - 1
648
652
649 def tip(self):
653 def tip(self):
650 return self.node(self.tiprev())
654 return self.node(self.tiprev())
651
655
652 def __contains__(self, rev):
656 def __contains__(self, rev):
653 return 0 <= rev < len(self)
657 return 0 <= rev < len(self)
654
658
655 def __len__(self):
659 def __len__(self):
656 return len(self.index)
660 return len(self.index)
657
661
658 def __iter__(self):
662 def __iter__(self):
659 return iter(pycompat.xrange(len(self)))
663 return iter(pycompat.xrange(len(self)))
660
664
661 def revs(self, start=0, stop=None):
665 def revs(self, start=0, stop=None):
662 """iterate over all rev in this revlog (from start to stop)"""
666 """iterate over all rev in this revlog (from start to stop)"""
663 return storageutil.iterrevs(len(self), start=start, stop=stop)
667 return storageutil.iterrevs(len(self), start=start, stop=stop)
664
668
665 @property
669 @property
666 def nodemap(self):
670 def nodemap(self):
667 msg = (
671 msg = (
668 b"revlog.nodemap is deprecated, "
672 b"revlog.nodemap is deprecated, "
669 b"use revlog.index.[has_node|rev|get_rev]"
673 b"use revlog.index.[has_node|rev|get_rev]"
670 )
674 )
671 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
675 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
672 return self.index.nodemap
676 return self.index.nodemap
673
677
674 @property
678 @property
675 def _nodecache(self):
679 def _nodecache(self):
676 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
680 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
681 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
678 return self.index.nodemap
682 return self.index.nodemap
679
683
680 def hasnode(self, node):
684 def hasnode(self, node):
681 try:
685 try:
682 self.rev(node)
686 self.rev(node)
683 return True
687 return True
684 except KeyError:
688 except KeyError:
685 return False
689 return False
686
690
687 def candelta(self, baserev, rev):
691 def candelta(self, baserev, rev):
688 """whether two revisions (baserev, rev) can be delta-ed or not"""
692 """whether two revisions (baserev, rev) can be delta-ed or not"""
689 # Disable delta if either rev requires a content-changing flag
693 # Disable delta if either rev requires a content-changing flag
690 # processor (ex. LFS). This is because such flag processor can alter
694 # processor (ex. LFS). This is because such flag processor can alter
691 # the rawtext content that the delta will be based on, and two clients
695 # the rawtext content that the delta will be based on, and two clients
692 # could have a same revlog node with different flags (i.e. different
696 # could have a same revlog node with different flags (i.e. different
693 # rawtext contents) and the delta could be incompatible.
697 # rawtext contents) and the delta could be incompatible.
694 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
698 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
695 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
699 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
696 ):
700 ):
697 return False
701 return False
698 return True
702 return True
699
703
700 def update_caches(self, transaction):
704 def update_caches(self, transaction):
701 if self._nodemap_file is not None:
705 if self._nodemap_file is not None:
702 if transaction is None:
706 if transaction is None:
703 nodemaputil.update_persistent_nodemap(self)
707 nodemaputil.update_persistent_nodemap(self)
704 else:
708 else:
705 nodemaputil.setup_persistent_nodemap(transaction, self)
709 nodemaputil.setup_persistent_nodemap(transaction, self)
706
710
707 def clearcaches(self):
711 def clearcaches(self):
708 self._revisioncache = None
712 self._revisioncache = None
709 self._chainbasecache.clear()
713 self._chainbasecache.clear()
710 self._chunkcache = (0, b'')
714 self._chunkcache = (0, b'')
711 self._pcache = {}
715 self._pcache = {}
712 self._nodemap_docket = None
716 self._nodemap_docket = None
713 self.index.clearcaches()
717 self.index.clearcaches()
714 # The python code is the one responsible for validating the docket, we
718 # The python code is the one responsible for validating the docket, we
715 # end up having to refresh it here.
719 # end up having to refresh it here.
716 use_nodemap = (
720 use_nodemap = (
717 not self._inline
721 not self._inline
718 and self._nodemap_file is not None
722 and self._nodemap_file is not None
719 and util.safehasattr(self.index, 'update_nodemap_data')
723 and util.safehasattr(self.index, 'update_nodemap_data')
720 )
724 )
721 if use_nodemap:
725 if use_nodemap:
722 nodemap_data = nodemaputil.persisted_data(self)
726 nodemap_data = nodemaputil.persisted_data(self)
723 if nodemap_data is not None:
727 if nodemap_data is not None:
724 self._nodemap_docket = nodemap_data[0]
728 self._nodemap_docket = nodemap_data[0]
725 self.index.update_nodemap_data(*nodemap_data)
729 self.index.update_nodemap_data(*nodemap_data)
726
730
727 def rev(self, node):
731 def rev(self, node):
728 try:
732 try:
729 return self.index.rev(node)
733 return self.index.rev(node)
730 except TypeError:
734 except TypeError:
731 raise
735 raise
732 except error.RevlogError:
736 except error.RevlogError:
733 # parsers.c radix tree lookup failed
737 # parsers.c radix tree lookup failed
734 if (
738 if (
735 node == self.nodeconstants.wdirid
739 node == self.nodeconstants.wdirid
736 or node in self.nodeconstants.wdirfilenodeids
740 or node in self.nodeconstants.wdirfilenodeids
737 ):
741 ):
738 raise error.WdirUnsupported
742 raise error.WdirUnsupported
739 raise error.LookupError(node, self.display_id, _(b'no node'))
743 raise error.LookupError(node, self.display_id, _(b'no node'))
740
744
741 # Accessors for index entries.
745 # Accessors for index entries.
742
746
743 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
747 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
744 # are flags.
748 # are flags.
745 def start(self, rev):
749 def start(self, rev):
746 return int(self.index[rev][0] >> 16)
750 return int(self.index[rev][0] >> 16)
747
751
748 def flags(self, rev):
752 def flags(self, rev):
749 return self.index[rev][0] & 0xFFFF
753 return self.index[rev][0] & 0xFFFF
750
754
751 def length(self, rev):
755 def length(self, rev):
752 return self.index[rev][1]
756 return self.index[rev][1]
753
757
754 def sidedata_length(self, rev):
758 def sidedata_length(self, rev):
755 if not self.hassidedata:
759 if not self.hassidedata:
756 return 0
760 return 0
757 return self.index[rev][9]
761 return self.index[rev][9]
758
762
759 def rawsize(self, rev):
763 def rawsize(self, rev):
760 """return the length of the uncompressed text for a given revision"""
764 """return the length of the uncompressed text for a given revision"""
761 l = self.index[rev][2]
765 l = self.index[rev][2]
762 if l >= 0:
766 if l >= 0:
763 return l
767 return l
764
768
765 t = self.rawdata(rev)
769 t = self.rawdata(rev)
766 return len(t)
770 return len(t)
767
771
768 def size(self, rev):
772 def size(self, rev):
769 """length of non-raw text (processed by a "read" flag processor)"""
773 """length of non-raw text (processed by a "read" flag processor)"""
770 # fast path: if no "read" flag processor could change the content,
774 # fast path: if no "read" flag processor could change the content,
771 # size is rawsize. note: ELLIPSIS is known to not change the content.
775 # size is rawsize. note: ELLIPSIS is known to not change the content.
772 flags = self.flags(rev)
776 flags = self.flags(rev)
773 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
777 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
774 return self.rawsize(rev)
778 return self.rawsize(rev)
775
779
776 return len(self.revision(rev, raw=False))
780 return len(self.revision(rev, raw=False))
777
781
778 def chainbase(self, rev):
782 def chainbase(self, rev):
779 base = self._chainbasecache.get(rev)
783 base = self._chainbasecache.get(rev)
780 if base is not None:
784 if base is not None:
781 return base
785 return base
782
786
783 index = self.index
787 index = self.index
784 iterrev = rev
788 iterrev = rev
785 base = index[iterrev][3]
789 base = index[iterrev][3]
786 while base != iterrev:
790 while base != iterrev:
787 iterrev = base
791 iterrev = base
788 base = index[iterrev][3]
792 base = index[iterrev][3]
789
793
790 self._chainbasecache[rev] = base
794 self._chainbasecache[rev] = base
791 return base
795 return base
792
796
793 def linkrev(self, rev):
797 def linkrev(self, rev):
794 return self.index[rev][4]
798 return self.index[rev][4]
795
799
796 def parentrevs(self, rev):
800 def parentrevs(self, rev):
797 try:
801 try:
798 entry = self.index[rev]
802 entry = self.index[rev]
799 except IndexError:
803 except IndexError:
800 if rev == wdirrev:
804 if rev == wdirrev:
801 raise error.WdirUnsupported
805 raise error.WdirUnsupported
802 raise
806 raise
803 if entry[5] == nullrev:
807 if entry[5] == nullrev:
804 return entry[6], entry[5]
808 return entry[6], entry[5]
805 else:
809 else:
806 return entry[5], entry[6]
810 return entry[5], entry[6]
807
811
808 # fast parentrevs(rev) where rev isn't filtered
812 # fast parentrevs(rev) where rev isn't filtered
809 _uncheckedparentrevs = parentrevs
813 _uncheckedparentrevs = parentrevs
810
814
811 def node(self, rev):
815 def node(self, rev):
812 try:
816 try:
813 return self.index[rev][7]
817 return self.index[rev][7]
814 except IndexError:
818 except IndexError:
815 if rev == wdirrev:
819 if rev == wdirrev:
816 raise error.WdirUnsupported
820 raise error.WdirUnsupported
817 raise
821 raise
818
822
819 # Derived from index values.
823 # Derived from index values.
820
824
821 def end(self, rev):
825 def end(self, rev):
822 return self.start(rev) + self.length(rev)
826 return self.start(rev) + self.length(rev)
823
827
824 def parents(self, node):
828 def parents(self, node):
825 i = self.index
829 i = self.index
826 d = i[self.rev(node)]
830 d = i[self.rev(node)]
827 # inline node() to avoid function call overhead
831 # inline node() to avoid function call overhead
828 if d[5] == self.nullid:
832 if d[5] == self.nullid:
829 return i[d[6]][7], i[d[5]][7]
833 return i[d[6]][7], i[d[5]][7]
830 else:
834 else:
831 return i[d[5]][7], i[d[6]][7]
835 return i[d[5]][7], i[d[6]][7]
832
836
833 def chainlen(self, rev):
837 def chainlen(self, rev):
834 return self._chaininfo(rev)[0]
838 return self._chaininfo(rev)[0]
835
839
836 def _chaininfo(self, rev):
840 def _chaininfo(self, rev):
837 chaininfocache = self._chaininfocache
841 chaininfocache = self._chaininfocache
838 if rev in chaininfocache:
842 if rev in chaininfocache:
839 return chaininfocache[rev]
843 return chaininfocache[rev]
840 index = self.index
844 index = self.index
841 generaldelta = self._generaldelta
845 generaldelta = self._generaldelta
842 iterrev = rev
846 iterrev = rev
843 e = index[iterrev]
847 e = index[iterrev]
844 clen = 0
848 clen = 0
845 compresseddeltalen = 0
849 compresseddeltalen = 0
846 while iterrev != e[3]:
850 while iterrev != e[3]:
847 clen += 1
851 clen += 1
848 compresseddeltalen += e[1]
852 compresseddeltalen += e[1]
849 if generaldelta:
853 if generaldelta:
850 iterrev = e[3]
854 iterrev = e[3]
851 else:
855 else:
852 iterrev -= 1
856 iterrev -= 1
853 if iterrev in chaininfocache:
857 if iterrev in chaininfocache:
854 t = chaininfocache[iterrev]
858 t = chaininfocache[iterrev]
855 clen += t[0]
859 clen += t[0]
856 compresseddeltalen += t[1]
860 compresseddeltalen += t[1]
857 break
861 break
858 e = index[iterrev]
862 e = index[iterrev]
859 else:
863 else:
860 # Add text length of base since decompressing that also takes
864 # Add text length of base since decompressing that also takes
861 # work. For cache hits the length is already included.
865 # work. For cache hits the length is already included.
862 compresseddeltalen += e[1]
866 compresseddeltalen += e[1]
863 r = (clen, compresseddeltalen)
867 r = (clen, compresseddeltalen)
864 chaininfocache[rev] = r
868 chaininfocache[rev] = r
865 return r
869 return r
866
870
867 def _deltachain(self, rev, stoprev=None):
871 def _deltachain(self, rev, stoprev=None):
868 """Obtain the delta chain for a revision.
872 """Obtain the delta chain for a revision.
869
873
870 ``stoprev`` specifies a revision to stop at. If not specified, we
874 ``stoprev`` specifies a revision to stop at. If not specified, we
871 stop at the base of the chain.
875 stop at the base of the chain.
872
876
873 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
877 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
874 revs in ascending order and ``stopped`` is a bool indicating whether
878 revs in ascending order and ``stopped`` is a bool indicating whether
875 ``stoprev`` was hit.
879 ``stoprev`` was hit.
876 """
880 """
877 # Try C implementation.
881 # Try C implementation.
878 try:
882 try:
879 return self.index.deltachain(rev, stoprev, self._generaldelta)
883 return self.index.deltachain(rev, stoprev, self._generaldelta)
880 except AttributeError:
884 except AttributeError:
881 pass
885 pass
882
886
883 chain = []
887 chain = []
884
888
885 # Alias to prevent attribute lookup in tight loop.
889 # Alias to prevent attribute lookup in tight loop.
886 index = self.index
890 index = self.index
887 generaldelta = self._generaldelta
891 generaldelta = self._generaldelta
888
892
889 iterrev = rev
893 iterrev = rev
890 e = index[iterrev]
894 e = index[iterrev]
891 while iterrev != e[3] and iterrev != stoprev:
895 while iterrev != e[3] and iterrev != stoprev:
892 chain.append(iterrev)
896 chain.append(iterrev)
893 if generaldelta:
897 if generaldelta:
894 iterrev = e[3]
898 iterrev = e[3]
895 else:
899 else:
896 iterrev -= 1
900 iterrev -= 1
897 e = index[iterrev]
901 e = index[iterrev]
898
902
899 if iterrev == stoprev:
903 if iterrev == stoprev:
900 stopped = True
904 stopped = True
901 else:
905 else:
902 chain.append(iterrev)
906 chain.append(iterrev)
903 stopped = False
907 stopped = False
904
908
905 chain.reverse()
909 chain.reverse()
906 return chain, stopped
910 return chain, stopped
907
911
908 def ancestors(self, revs, stoprev=0, inclusive=False):
912 def ancestors(self, revs, stoprev=0, inclusive=False):
909 """Generate the ancestors of 'revs' in reverse revision order.
913 """Generate the ancestors of 'revs' in reverse revision order.
910 Does not generate revs lower than stoprev.
914 Does not generate revs lower than stoprev.
911
915
912 See the documentation for ancestor.lazyancestors for more details."""
916 See the documentation for ancestor.lazyancestors for more details."""
913
917
914 # first, make sure start revisions aren't filtered
918 # first, make sure start revisions aren't filtered
915 revs = list(revs)
919 revs = list(revs)
916 checkrev = self.node
920 checkrev = self.node
917 for r in revs:
921 for r in revs:
918 checkrev(r)
922 checkrev(r)
919 # and we're sure ancestors aren't filtered as well
923 # and we're sure ancestors aren't filtered as well
920
924
921 if rustancestor is not None:
925 if rustancestor is not None:
922 lazyancestors = rustancestor.LazyAncestors
926 lazyancestors = rustancestor.LazyAncestors
923 arg = self.index
927 arg = self.index
924 else:
928 else:
925 lazyancestors = ancestor.lazyancestors
929 lazyancestors = ancestor.lazyancestors
926 arg = self._uncheckedparentrevs
930 arg = self._uncheckedparentrevs
927 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
931 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
928
932
929 def descendants(self, revs):
933 def descendants(self, revs):
930 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
934 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
931
935
932 def findcommonmissing(self, common=None, heads=None):
936 def findcommonmissing(self, common=None, heads=None):
933 """Return a tuple of the ancestors of common and the ancestors of heads
937 """Return a tuple of the ancestors of common and the ancestors of heads
934 that are not ancestors of common. In revset terminology, we return the
938 that are not ancestors of common. In revset terminology, we return the
935 tuple:
939 tuple:
936
940
937 ::common, (::heads) - (::common)
941 ::common, (::heads) - (::common)
938
942
939 The list is sorted by revision number, meaning it is
943 The list is sorted by revision number, meaning it is
940 topologically sorted.
944 topologically sorted.
941
945
942 'heads' and 'common' are both lists of node IDs. If heads is
946 'heads' and 'common' are both lists of node IDs. If heads is
943 not supplied, uses all of the revlog's heads. If common is not
947 not supplied, uses all of the revlog's heads. If common is not
944 supplied, uses nullid."""
948 supplied, uses nullid."""
945 if common is None:
949 if common is None:
946 common = [self.nullid]
950 common = [self.nullid]
947 if heads is None:
951 if heads is None:
948 heads = self.heads()
952 heads = self.heads()
949
953
950 common = [self.rev(n) for n in common]
954 common = [self.rev(n) for n in common]
951 heads = [self.rev(n) for n in heads]
955 heads = [self.rev(n) for n in heads]
952
956
953 # we want the ancestors, but inclusive
957 # we want the ancestors, but inclusive
954 class lazyset(object):
958 class lazyset(object):
955 def __init__(self, lazyvalues):
959 def __init__(self, lazyvalues):
956 self.addedvalues = set()
960 self.addedvalues = set()
957 self.lazyvalues = lazyvalues
961 self.lazyvalues = lazyvalues
958
962
959 def __contains__(self, value):
963 def __contains__(self, value):
960 return value in self.addedvalues or value in self.lazyvalues
964 return value in self.addedvalues or value in self.lazyvalues
961
965
962 def __iter__(self):
966 def __iter__(self):
963 added = self.addedvalues
967 added = self.addedvalues
964 for r in added:
968 for r in added:
965 yield r
969 yield r
966 for r in self.lazyvalues:
970 for r in self.lazyvalues:
967 if not r in added:
971 if not r in added:
968 yield r
972 yield r
969
973
970 def add(self, value):
974 def add(self, value):
971 self.addedvalues.add(value)
975 self.addedvalues.add(value)
972
976
973 def update(self, values):
977 def update(self, values):
974 self.addedvalues.update(values)
978 self.addedvalues.update(values)
975
979
976 has = lazyset(self.ancestors(common))
980 has = lazyset(self.ancestors(common))
977 has.add(nullrev)
981 has.add(nullrev)
978 has.update(common)
982 has.update(common)
979
983
980 # take all ancestors from heads that aren't in has
984 # take all ancestors from heads that aren't in has
981 missing = set()
985 missing = set()
982 visit = collections.deque(r for r in heads if r not in has)
986 visit = collections.deque(r for r in heads if r not in has)
983 while visit:
987 while visit:
984 r = visit.popleft()
988 r = visit.popleft()
985 if r in missing:
989 if r in missing:
986 continue
990 continue
987 else:
991 else:
988 missing.add(r)
992 missing.add(r)
989 for p in self.parentrevs(r):
993 for p in self.parentrevs(r):
990 if p not in has:
994 if p not in has:
991 visit.append(p)
995 visit.append(p)
992 missing = list(missing)
996 missing = list(missing)
993 missing.sort()
997 missing.sort()
994 return has, [self.node(miss) for miss in missing]
998 return has, [self.node(miss) for miss in missing]
995
999
996 def incrementalmissingrevs(self, common=None):
1000 def incrementalmissingrevs(self, common=None):
997 """Return an object that can be used to incrementally compute the
1001 """Return an object that can be used to incrementally compute the
998 revision numbers of the ancestors of arbitrary sets that are not
1002 revision numbers of the ancestors of arbitrary sets that are not
999 ancestors of common. This is an ancestor.incrementalmissingancestors
1003 ancestors of common. This is an ancestor.incrementalmissingancestors
1000 object.
1004 object.
1001
1005
1002 'common' is a list of revision numbers. If common is not supplied, uses
1006 'common' is a list of revision numbers. If common is not supplied, uses
1003 nullrev.
1007 nullrev.
1004 """
1008 """
1005 if common is None:
1009 if common is None:
1006 common = [nullrev]
1010 common = [nullrev]
1007
1011
1008 if rustancestor is not None:
1012 if rustancestor is not None:
1009 return rustancestor.MissingAncestors(self.index, common)
1013 return rustancestor.MissingAncestors(self.index, common)
1010 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1014 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1011
1015
1012 def findmissingrevs(self, common=None, heads=None):
1016 def findmissingrevs(self, common=None, heads=None):
1013 """Return the revision numbers of the ancestors of heads that
1017 """Return the revision numbers of the ancestors of heads that
1014 are not ancestors of common.
1018 are not ancestors of common.
1015
1019
1016 More specifically, return a list of revision numbers corresponding to
1020 More specifically, return a list of revision numbers corresponding to
1017 nodes N such that every N satisfies the following constraints:
1021 nodes N such that every N satisfies the following constraints:
1018
1022
1019 1. N is an ancestor of some node in 'heads'
1023 1. N is an ancestor of some node in 'heads'
1020 2. N is not an ancestor of any node in 'common'
1024 2. N is not an ancestor of any node in 'common'
1021
1025
1022 The list is sorted by revision number, meaning it is
1026 The list is sorted by revision number, meaning it is
1023 topologically sorted.
1027 topologically sorted.
1024
1028
1025 'heads' and 'common' are both lists of revision numbers. If heads is
1029 'heads' and 'common' are both lists of revision numbers. If heads is
1026 not supplied, uses all of the revlog's heads. If common is not
1030 not supplied, uses all of the revlog's heads. If common is not
1027 supplied, uses nullid."""
1031 supplied, uses nullid."""
1028 if common is None:
1032 if common is None:
1029 common = [nullrev]
1033 common = [nullrev]
1030 if heads is None:
1034 if heads is None:
1031 heads = self.headrevs()
1035 heads = self.headrevs()
1032
1036
1033 inc = self.incrementalmissingrevs(common=common)
1037 inc = self.incrementalmissingrevs(common=common)
1034 return inc.missingancestors(heads)
1038 return inc.missingancestors(heads)
1035
1039
1036 def findmissing(self, common=None, heads=None):
1040 def findmissing(self, common=None, heads=None):
1037 """Return the ancestors of heads that are not ancestors of common.
1041 """Return the ancestors of heads that are not ancestors of common.
1038
1042
1039 More specifically, return a list of nodes N such that every N
1043 More specifically, return a list of nodes N such that every N
1040 satisfies the following constraints:
1044 satisfies the following constraints:
1041
1045
1042 1. N is an ancestor of some node in 'heads'
1046 1. N is an ancestor of some node in 'heads'
1043 2. N is not an ancestor of any node in 'common'
1047 2. N is not an ancestor of any node in 'common'
1044
1048
1045 The list is sorted by revision number, meaning it is
1049 The list is sorted by revision number, meaning it is
1046 topologically sorted.
1050 topologically sorted.
1047
1051
1048 'heads' and 'common' are both lists of node IDs. If heads is
1052 'heads' and 'common' are both lists of node IDs. If heads is
1049 not supplied, uses all of the revlog's heads. If common is not
1053 not supplied, uses all of the revlog's heads. If common is not
1050 supplied, uses nullid."""
1054 supplied, uses nullid."""
1051 if common is None:
1055 if common is None:
1052 common = [self.nullid]
1056 common = [self.nullid]
1053 if heads is None:
1057 if heads is None:
1054 heads = self.heads()
1058 heads = self.heads()
1055
1059
1056 common = [self.rev(n) for n in common]
1060 common = [self.rev(n) for n in common]
1057 heads = [self.rev(n) for n in heads]
1061 heads = [self.rev(n) for n in heads]
1058
1062
1059 inc = self.incrementalmissingrevs(common=common)
1063 inc = self.incrementalmissingrevs(common=common)
1060 return [self.node(r) for r in inc.missingancestors(heads)]
1064 return [self.node(r) for r in inc.missingancestors(heads)]
1061
1065
1062 def nodesbetween(self, roots=None, heads=None):
1066 def nodesbetween(self, roots=None, heads=None):
1063 """Return a topological path from 'roots' to 'heads'.
1067 """Return a topological path from 'roots' to 'heads'.
1064
1068
1065 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1069 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1066 topologically sorted list of all nodes N that satisfy both of
1070 topologically sorted list of all nodes N that satisfy both of
1067 these constraints:
1071 these constraints:
1068
1072
1069 1. N is a descendant of some node in 'roots'
1073 1. N is a descendant of some node in 'roots'
1070 2. N is an ancestor of some node in 'heads'
1074 2. N is an ancestor of some node in 'heads'
1071
1075
1072 Every node is considered to be both a descendant and an ancestor
1076 Every node is considered to be both a descendant and an ancestor
1073 of itself, so every reachable node in 'roots' and 'heads' will be
1077 of itself, so every reachable node in 'roots' and 'heads' will be
1074 included in 'nodes'.
1078 included in 'nodes'.
1075
1079
1076 'outroots' is the list of reachable nodes in 'roots', i.e., the
1080 'outroots' is the list of reachable nodes in 'roots', i.e., the
1077 subset of 'roots' that is returned in 'nodes'. Likewise,
1081 subset of 'roots' that is returned in 'nodes'. Likewise,
1078 'outheads' is the subset of 'heads' that is also in 'nodes'.
1082 'outheads' is the subset of 'heads' that is also in 'nodes'.
1079
1083
1080 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1084 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1081 unspecified, uses nullid as the only root. If 'heads' is
1085 unspecified, uses nullid as the only root. If 'heads' is
1082 unspecified, uses list of all of the revlog's heads."""
1086 unspecified, uses list of all of the revlog's heads."""
1083 nonodes = ([], [], [])
1087 nonodes = ([], [], [])
1084 if roots is not None:
1088 if roots is not None:
1085 roots = list(roots)
1089 roots = list(roots)
1086 if not roots:
1090 if not roots:
1087 return nonodes
1091 return nonodes
1088 lowestrev = min([self.rev(n) for n in roots])
1092 lowestrev = min([self.rev(n) for n in roots])
1089 else:
1093 else:
1090 roots = [self.nullid] # Everybody's a descendant of nullid
1094 roots = [self.nullid] # Everybody's a descendant of nullid
1091 lowestrev = nullrev
1095 lowestrev = nullrev
1092 if (lowestrev == nullrev) and (heads is None):
1096 if (lowestrev == nullrev) and (heads is None):
1093 # We want _all_ the nodes!
1097 # We want _all_ the nodes!
1094 return (
1098 return (
1095 [self.node(r) for r in self],
1099 [self.node(r) for r in self],
1096 [self.nullid],
1100 [self.nullid],
1097 list(self.heads()),
1101 list(self.heads()),
1098 )
1102 )
1099 if heads is None:
1103 if heads is None:
1100 # All nodes are ancestors, so the latest ancestor is the last
1104 # All nodes are ancestors, so the latest ancestor is the last
1101 # node.
1105 # node.
1102 highestrev = len(self) - 1
1106 highestrev = len(self) - 1
1103 # Set ancestors to None to signal that every node is an ancestor.
1107 # Set ancestors to None to signal that every node is an ancestor.
1104 ancestors = None
1108 ancestors = None
1105 # Set heads to an empty dictionary for later discovery of heads
1109 # Set heads to an empty dictionary for later discovery of heads
1106 heads = {}
1110 heads = {}
1107 else:
1111 else:
1108 heads = list(heads)
1112 heads = list(heads)
1109 if not heads:
1113 if not heads:
1110 return nonodes
1114 return nonodes
1111 ancestors = set()
1115 ancestors = set()
1112 # Turn heads into a dictionary so we can remove 'fake' heads.
1116 # Turn heads into a dictionary so we can remove 'fake' heads.
1113 # Also, later we will be using it to filter out the heads we can't
1117 # Also, later we will be using it to filter out the heads we can't
1114 # find from roots.
1118 # find from roots.
1115 heads = dict.fromkeys(heads, False)
1119 heads = dict.fromkeys(heads, False)
1116 # Start at the top and keep marking parents until we're done.
1120 # Start at the top and keep marking parents until we're done.
1117 nodestotag = set(heads)
1121 nodestotag = set(heads)
1118 # Remember where the top was so we can use it as a limit later.
1122 # Remember where the top was so we can use it as a limit later.
1119 highestrev = max([self.rev(n) for n in nodestotag])
1123 highestrev = max([self.rev(n) for n in nodestotag])
1120 while nodestotag:
1124 while nodestotag:
1121 # grab a node to tag
1125 # grab a node to tag
1122 n = nodestotag.pop()
1126 n = nodestotag.pop()
1123 # Never tag nullid
1127 # Never tag nullid
1124 if n == self.nullid:
1128 if n == self.nullid:
1125 continue
1129 continue
1126 # A node's revision number represents its place in a
1130 # A node's revision number represents its place in a
1127 # topologically sorted list of nodes.
1131 # topologically sorted list of nodes.
1128 r = self.rev(n)
1132 r = self.rev(n)
1129 if r >= lowestrev:
1133 if r >= lowestrev:
1130 if n not in ancestors:
1134 if n not in ancestors:
1131 # If we are possibly a descendant of one of the roots
1135 # If we are possibly a descendant of one of the roots
1132 # and we haven't already been marked as an ancestor
1136 # and we haven't already been marked as an ancestor
1133 ancestors.add(n) # Mark as ancestor
1137 ancestors.add(n) # Mark as ancestor
1134 # Add non-nullid parents to list of nodes to tag.
1138 # Add non-nullid parents to list of nodes to tag.
1135 nodestotag.update(
1139 nodestotag.update(
1136 [p for p in self.parents(n) if p != self.nullid]
1140 [p for p in self.parents(n) if p != self.nullid]
1137 )
1141 )
1138 elif n in heads: # We've seen it before, is it a fake head?
1142 elif n in heads: # We've seen it before, is it a fake head?
1139 # So it is, real heads should not be the ancestors of
1143 # So it is, real heads should not be the ancestors of
1140 # any other heads.
1144 # any other heads.
1141 heads.pop(n)
1145 heads.pop(n)
1142 if not ancestors:
1146 if not ancestors:
1143 return nonodes
1147 return nonodes
1144 # Now that we have our set of ancestors, we want to remove any
1148 # Now that we have our set of ancestors, we want to remove any
1145 # roots that are not ancestors.
1149 # roots that are not ancestors.
1146
1150
1147 # If one of the roots was nullid, everything is included anyway.
1151 # If one of the roots was nullid, everything is included anyway.
1148 if lowestrev > nullrev:
1152 if lowestrev > nullrev:
1149 # But, since we weren't, let's recompute the lowest rev to not
1153 # But, since we weren't, let's recompute the lowest rev to not
1150 # include roots that aren't ancestors.
1154 # include roots that aren't ancestors.
1151
1155
1152 # Filter out roots that aren't ancestors of heads
1156 # Filter out roots that aren't ancestors of heads
1153 roots = [root for root in roots if root in ancestors]
1157 roots = [root for root in roots if root in ancestors]
1154 # Recompute the lowest revision
1158 # Recompute the lowest revision
1155 if roots:
1159 if roots:
1156 lowestrev = min([self.rev(root) for root in roots])
1160 lowestrev = min([self.rev(root) for root in roots])
1157 else:
1161 else:
1158 # No more roots? Return empty list
1162 # No more roots? Return empty list
1159 return nonodes
1163 return nonodes
1160 else:
1164 else:
1161 # We are descending from nullid, and don't need to care about
1165 # We are descending from nullid, and don't need to care about
1162 # any other roots.
1166 # any other roots.
1163 lowestrev = nullrev
1167 lowestrev = nullrev
1164 roots = [self.nullid]
1168 roots = [self.nullid]
1165 # Transform our roots list into a set.
1169 # Transform our roots list into a set.
1166 descendants = set(roots)
1170 descendants = set(roots)
1167 # Also, keep the original roots so we can filter out roots that aren't
1171 # Also, keep the original roots so we can filter out roots that aren't
1168 # 'real' roots (i.e. are descended from other roots).
1172 # 'real' roots (i.e. are descended from other roots).
1169 roots = descendants.copy()
1173 roots = descendants.copy()
1170 # Our topologically sorted list of output nodes.
1174 # Our topologically sorted list of output nodes.
1171 orderedout = []
1175 orderedout = []
1172 # Don't start at nullid since we don't want nullid in our output list,
1176 # Don't start at nullid since we don't want nullid in our output list,
1173 # and if nullid shows up in descendants, empty parents will look like
1177 # and if nullid shows up in descendants, empty parents will look like
1174 # they're descendants.
1178 # they're descendants.
1175 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1179 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1176 n = self.node(r)
1180 n = self.node(r)
1177 isdescendant = False
1181 isdescendant = False
1178 if lowestrev == nullrev: # Everybody is a descendant of nullid
1182 if lowestrev == nullrev: # Everybody is a descendant of nullid
1179 isdescendant = True
1183 isdescendant = True
1180 elif n in descendants:
1184 elif n in descendants:
1181 # n is already a descendant
1185 # n is already a descendant
1182 isdescendant = True
1186 isdescendant = True
1183 # This check only needs to be done here because all the roots
1187 # This check only needs to be done here because all the roots
1184 # will start being marked is descendants before the loop.
1188 # will start being marked is descendants before the loop.
1185 if n in roots:
1189 if n in roots:
1186 # If n was a root, check if it's a 'real' root.
1190 # If n was a root, check if it's a 'real' root.
1187 p = tuple(self.parents(n))
1191 p = tuple(self.parents(n))
1188 # If any of its parents are descendants, it's not a root.
1192 # If any of its parents are descendants, it's not a root.
1189 if (p[0] in descendants) or (p[1] in descendants):
1193 if (p[0] in descendants) or (p[1] in descendants):
1190 roots.remove(n)
1194 roots.remove(n)
1191 else:
1195 else:
1192 p = tuple(self.parents(n))
1196 p = tuple(self.parents(n))
1193 # A node is a descendant if either of its parents are
1197 # A node is a descendant if either of its parents are
1194 # descendants. (We seeded the dependents list with the roots
1198 # descendants. (We seeded the dependents list with the roots
1195 # up there, remember?)
1199 # up there, remember?)
1196 if (p[0] in descendants) or (p[1] in descendants):
1200 if (p[0] in descendants) or (p[1] in descendants):
1197 descendants.add(n)
1201 descendants.add(n)
1198 isdescendant = True
1202 isdescendant = True
1199 if isdescendant and ((ancestors is None) or (n in ancestors)):
1203 if isdescendant and ((ancestors is None) or (n in ancestors)):
1200 # Only include nodes that are both descendants and ancestors.
1204 # Only include nodes that are both descendants and ancestors.
1201 orderedout.append(n)
1205 orderedout.append(n)
1202 if (ancestors is not None) and (n in heads):
1206 if (ancestors is not None) and (n in heads):
1203 # We're trying to figure out which heads are reachable
1207 # We're trying to figure out which heads are reachable
1204 # from roots.
1208 # from roots.
1205 # Mark this head as having been reached
1209 # Mark this head as having been reached
1206 heads[n] = True
1210 heads[n] = True
1207 elif ancestors is None:
1211 elif ancestors is None:
1208 # Otherwise, we're trying to discover the heads.
1212 # Otherwise, we're trying to discover the heads.
1209 # Assume this is a head because if it isn't, the next step
1213 # Assume this is a head because if it isn't, the next step
1210 # will eventually remove it.
1214 # will eventually remove it.
1211 heads[n] = True
1215 heads[n] = True
1212 # But, obviously its parents aren't.
1216 # But, obviously its parents aren't.
1213 for p in self.parents(n):
1217 for p in self.parents(n):
1214 heads.pop(p, None)
1218 heads.pop(p, None)
1215 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1219 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1216 roots = list(roots)
1220 roots = list(roots)
1217 assert orderedout
1221 assert orderedout
1218 assert roots
1222 assert roots
1219 assert heads
1223 assert heads
1220 return (orderedout, roots, heads)
1224 return (orderedout, roots, heads)
1221
1225
1222 def headrevs(self, revs=None):
1226 def headrevs(self, revs=None):
1223 if revs is None:
1227 if revs is None:
1224 try:
1228 try:
1225 return self.index.headrevs()
1229 return self.index.headrevs()
1226 except AttributeError:
1230 except AttributeError:
1227 return self._headrevs()
1231 return self._headrevs()
1228 if rustdagop is not None:
1232 if rustdagop is not None:
1229 return rustdagop.headrevs(self.index, revs)
1233 return rustdagop.headrevs(self.index, revs)
1230 return dagop.headrevs(revs, self._uncheckedparentrevs)
1234 return dagop.headrevs(revs, self._uncheckedparentrevs)
1231
1235
1232 def computephases(self, roots):
1236 def computephases(self, roots):
1233 return self.index.computephasesmapsets(roots)
1237 return self.index.computephasesmapsets(roots)
1234
1238
1235 def _headrevs(self):
1239 def _headrevs(self):
1236 count = len(self)
1240 count = len(self)
1237 if not count:
1241 if not count:
1238 return [nullrev]
1242 return [nullrev]
1239 # we won't iter over filtered rev so nobody is a head at start
1243 # we won't iter over filtered rev so nobody is a head at start
1240 ishead = [0] * (count + 1)
1244 ishead = [0] * (count + 1)
1241 index = self.index
1245 index = self.index
1242 for r in self:
1246 for r in self:
1243 ishead[r] = 1 # I may be an head
1247 ishead[r] = 1 # I may be an head
1244 e = index[r]
1248 e = index[r]
1245 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1249 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1246 return [r for r, val in enumerate(ishead) if val]
1250 return [r for r, val in enumerate(ishead) if val]
1247
1251
1248 def heads(self, start=None, stop=None):
1252 def heads(self, start=None, stop=None):
1249 """return the list of all nodes that have no children
1253 """return the list of all nodes that have no children
1250
1254
1251 if start is specified, only heads that are descendants of
1255 if start is specified, only heads that are descendants of
1252 start will be returned
1256 start will be returned
1253 if stop is specified, it will consider all the revs from stop
1257 if stop is specified, it will consider all the revs from stop
1254 as if they had no children
1258 as if they had no children
1255 """
1259 """
1256 if start is None and stop is None:
1260 if start is None and stop is None:
1257 if not len(self):
1261 if not len(self):
1258 return [self.nullid]
1262 return [self.nullid]
1259 return [self.node(r) for r in self.headrevs()]
1263 return [self.node(r) for r in self.headrevs()]
1260
1264
1261 if start is None:
1265 if start is None:
1262 start = nullrev
1266 start = nullrev
1263 else:
1267 else:
1264 start = self.rev(start)
1268 start = self.rev(start)
1265
1269
1266 stoprevs = {self.rev(n) for n in stop or []}
1270 stoprevs = {self.rev(n) for n in stop or []}
1267
1271
1268 revs = dagop.headrevssubset(
1272 revs = dagop.headrevssubset(
1269 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1273 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1270 )
1274 )
1271
1275
1272 return [self.node(rev) for rev in revs]
1276 return [self.node(rev) for rev in revs]
1273
1277
1274 def children(self, node):
1278 def children(self, node):
1275 """find the children of a given node"""
1279 """find the children of a given node"""
1276 c = []
1280 c = []
1277 p = self.rev(node)
1281 p = self.rev(node)
1278 for r in self.revs(start=p + 1):
1282 for r in self.revs(start=p + 1):
1279 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1283 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1280 if prevs:
1284 if prevs:
1281 for pr in prevs:
1285 for pr in prevs:
1282 if pr == p:
1286 if pr == p:
1283 c.append(self.node(r))
1287 c.append(self.node(r))
1284 elif p == nullrev:
1288 elif p == nullrev:
1285 c.append(self.node(r))
1289 c.append(self.node(r))
1286 return c
1290 return c
1287
1291
1288 def commonancestorsheads(self, a, b):
1292 def commonancestorsheads(self, a, b):
1289 """calculate all the heads of the common ancestors of nodes a and b"""
1293 """calculate all the heads of the common ancestors of nodes a and b"""
1290 a, b = self.rev(a), self.rev(b)
1294 a, b = self.rev(a), self.rev(b)
1291 ancs = self._commonancestorsheads(a, b)
1295 ancs = self._commonancestorsheads(a, b)
1292 return pycompat.maplist(self.node, ancs)
1296 return pycompat.maplist(self.node, ancs)
1293
1297
1294 def _commonancestorsheads(self, *revs):
1298 def _commonancestorsheads(self, *revs):
1295 """calculate all the heads of the common ancestors of revs"""
1299 """calculate all the heads of the common ancestors of revs"""
1296 try:
1300 try:
1297 ancs = self.index.commonancestorsheads(*revs)
1301 ancs = self.index.commonancestorsheads(*revs)
1298 except (AttributeError, OverflowError): # C implementation failed
1302 except (AttributeError, OverflowError): # C implementation failed
1299 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1303 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1300 return ancs
1304 return ancs
1301
1305
1302 def isancestor(self, a, b):
1306 def isancestor(self, a, b):
1303 """return True if node a is an ancestor of node b
1307 """return True if node a is an ancestor of node b
1304
1308
1305 A revision is considered an ancestor of itself."""
1309 A revision is considered an ancestor of itself."""
1306 a, b = self.rev(a), self.rev(b)
1310 a, b = self.rev(a), self.rev(b)
1307 return self.isancestorrev(a, b)
1311 return self.isancestorrev(a, b)
1308
1312
1309 def isancestorrev(self, a, b):
1313 def isancestorrev(self, a, b):
1310 """return True if revision a is an ancestor of revision b
1314 """return True if revision a is an ancestor of revision b
1311
1315
1312 A revision is considered an ancestor of itself.
1316 A revision is considered an ancestor of itself.
1313
1317
1314 The implementation of this is trivial but the use of
1318 The implementation of this is trivial but the use of
1315 reachableroots is not."""
1319 reachableroots is not."""
1316 if a == nullrev:
1320 if a == nullrev:
1317 return True
1321 return True
1318 elif a == b:
1322 elif a == b:
1319 return True
1323 return True
1320 elif a > b:
1324 elif a > b:
1321 return False
1325 return False
1322 return bool(self.reachableroots(a, [b], [a], includepath=False))
1326 return bool(self.reachableroots(a, [b], [a], includepath=False))
1323
1327
1324 def reachableroots(self, minroot, heads, roots, includepath=False):
1328 def reachableroots(self, minroot, heads, roots, includepath=False):
1325 """return (heads(::(<roots> and <roots>::<heads>)))
1329 """return (heads(::(<roots> and <roots>::<heads>)))
1326
1330
1327 If includepath is True, return (<roots>::<heads>)."""
1331 If includepath is True, return (<roots>::<heads>)."""
1328 try:
1332 try:
1329 return self.index.reachableroots2(
1333 return self.index.reachableroots2(
1330 minroot, heads, roots, includepath
1334 minroot, heads, roots, includepath
1331 )
1335 )
1332 except AttributeError:
1336 except AttributeError:
1333 return dagop._reachablerootspure(
1337 return dagop._reachablerootspure(
1334 self.parentrevs, minroot, roots, heads, includepath
1338 self.parentrevs, minroot, roots, heads, includepath
1335 )
1339 )
1336
1340
1337 def ancestor(self, a, b):
1341 def ancestor(self, a, b):
1338 """calculate the "best" common ancestor of nodes a and b"""
1342 """calculate the "best" common ancestor of nodes a and b"""
1339
1343
1340 a, b = self.rev(a), self.rev(b)
1344 a, b = self.rev(a), self.rev(b)
1341 try:
1345 try:
1342 ancs = self.index.ancestors(a, b)
1346 ancs = self.index.ancestors(a, b)
1343 except (AttributeError, OverflowError):
1347 except (AttributeError, OverflowError):
1344 ancs = ancestor.ancestors(self.parentrevs, a, b)
1348 ancs = ancestor.ancestors(self.parentrevs, a, b)
1345 if ancs:
1349 if ancs:
1346 # choose a consistent winner when there's a tie
1350 # choose a consistent winner when there's a tie
1347 return min(map(self.node, ancs))
1351 return min(map(self.node, ancs))
1348 return self.nullid
1352 return self.nullid
1349
1353
1350 def _match(self, id):
1354 def _match(self, id):
1351 if isinstance(id, int):
1355 if isinstance(id, int):
1352 # rev
1356 # rev
1353 return self.node(id)
1357 return self.node(id)
1354 if len(id) == self.nodeconstants.nodelen:
1358 if len(id) == self.nodeconstants.nodelen:
1355 # possibly a binary node
1359 # possibly a binary node
1356 # odds of a binary node being all hex in ASCII are 1 in 10**25
1360 # odds of a binary node being all hex in ASCII are 1 in 10**25
1357 try:
1361 try:
1358 node = id
1362 node = id
1359 self.rev(node) # quick search the index
1363 self.rev(node) # quick search the index
1360 return node
1364 return node
1361 except error.LookupError:
1365 except error.LookupError:
1362 pass # may be partial hex id
1366 pass # may be partial hex id
1363 try:
1367 try:
1364 # str(rev)
1368 # str(rev)
1365 rev = int(id)
1369 rev = int(id)
1366 if b"%d" % rev != id:
1370 if b"%d" % rev != id:
1367 raise ValueError
1371 raise ValueError
1368 if rev < 0:
1372 if rev < 0:
1369 rev = len(self) + rev
1373 rev = len(self) + rev
1370 if rev < 0 or rev >= len(self):
1374 if rev < 0 or rev >= len(self):
1371 raise ValueError
1375 raise ValueError
1372 return self.node(rev)
1376 return self.node(rev)
1373 except (ValueError, OverflowError):
1377 except (ValueError, OverflowError):
1374 pass
1378 pass
1375 if len(id) == 2 * self.nodeconstants.nodelen:
1379 if len(id) == 2 * self.nodeconstants.nodelen:
1376 try:
1380 try:
1377 # a full hex nodeid?
1381 # a full hex nodeid?
1378 node = bin(id)
1382 node = bin(id)
1379 self.rev(node)
1383 self.rev(node)
1380 return node
1384 return node
1381 except (TypeError, error.LookupError):
1385 except (TypeError, error.LookupError):
1382 pass
1386 pass
1383
1387
1384 def _partialmatch(self, id):
1388 def _partialmatch(self, id):
1385 # we don't care wdirfilenodeids as they should be always full hash
1389 # we don't care wdirfilenodeids as they should be always full hash
1386 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1390 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1387 try:
1391 try:
1388 partial = self.index.partialmatch(id)
1392 partial = self.index.partialmatch(id)
1389 if partial and self.hasnode(partial):
1393 if partial and self.hasnode(partial):
1390 if maybewdir:
1394 if maybewdir:
1391 # single 'ff...' match in radix tree, ambiguous with wdir
1395 # single 'ff...' match in radix tree, ambiguous with wdir
1392 raise error.RevlogError
1396 raise error.RevlogError
1393 return partial
1397 return partial
1394 if maybewdir:
1398 if maybewdir:
1395 # no 'ff...' match in radix tree, wdir identified
1399 # no 'ff...' match in radix tree, wdir identified
1396 raise error.WdirUnsupported
1400 raise error.WdirUnsupported
1397 return None
1401 return None
1398 except error.RevlogError:
1402 except error.RevlogError:
1399 # parsers.c radix tree lookup gave multiple matches
1403 # parsers.c radix tree lookup gave multiple matches
1400 # fast path: for unfiltered changelog, radix tree is accurate
1404 # fast path: for unfiltered changelog, radix tree is accurate
1401 if not getattr(self, 'filteredrevs', None):
1405 if not getattr(self, 'filteredrevs', None):
1402 raise error.AmbiguousPrefixLookupError(
1406 raise error.AmbiguousPrefixLookupError(
1403 id, self.display_id, _(b'ambiguous identifier')
1407 id, self.display_id, _(b'ambiguous identifier')
1404 )
1408 )
1405 # fall through to slow path that filters hidden revisions
1409 # fall through to slow path that filters hidden revisions
1406 except (AttributeError, ValueError):
1410 except (AttributeError, ValueError):
1407 # we are pure python, or key was too short to search radix tree
1411 # we are pure python, or key was too short to search radix tree
1408 pass
1412 pass
1409
1413
1410 if id in self._pcache:
1414 if id in self._pcache:
1411 return self._pcache[id]
1415 return self._pcache[id]
1412
1416
1413 if len(id) <= 40:
1417 if len(id) <= 40:
1414 try:
1418 try:
1415 # hex(node)[:...]
1419 # hex(node)[:...]
1416 l = len(id) // 2 # grab an even number of digits
1420 l = len(id) // 2 # grab an even number of digits
1417 prefix = bin(id[: l * 2])
1421 prefix = bin(id[: l * 2])
1418 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1422 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1419 nl = [
1423 nl = [
1420 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1424 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1421 ]
1425 ]
1422 if self.nodeconstants.nullhex.startswith(id):
1426 if self.nodeconstants.nullhex.startswith(id):
1423 nl.append(self.nullid)
1427 nl.append(self.nullid)
1424 if len(nl) > 0:
1428 if len(nl) > 0:
1425 if len(nl) == 1 and not maybewdir:
1429 if len(nl) == 1 and not maybewdir:
1426 self._pcache[id] = nl[0]
1430 self._pcache[id] = nl[0]
1427 return nl[0]
1431 return nl[0]
1428 raise error.AmbiguousPrefixLookupError(
1432 raise error.AmbiguousPrefixLookupError(
1429 id, self.display_id, _(b'ambiguous identifier')
1433 id, self.display_id, _(b'ambiguous identifier')
1430 )
1434 )
1431 if maybewdir:
1435 if maybewdir:
1432 raise error.WdirUnsupported
1436 raise error.WdirUnsupported
1433 return None
1437 return None
1434 except TypeError:
1438 except TypeError:
1435 pass
1439 pass
1436
1440
1437 def lookup(self, id):
1441 def lookup(self, id):
1438 """locate a node based on:
1442 """locate a node based on:
1439 - revision number or str(revision number)
1443 - revision number or str(revision number)
1440 - nodeid or subset of hex nodeid
1444 - nodeid or subset of hex nodeid
1441 """
1445 """
1442 n = self._match(id)
1446 n = self._match(id)
1443 if n is not None:
1447 if n is not None:
1444 return n
1448 return n
1445 n = self._partialmatch(id)
1449 n = self._partialmatch(id)
1446 if n:
1450 if n:
1447 return n
1451 return n
1448
1452
1449 raise error.LookupError(id, self.display_id, _(b'no match found'))
1453 raise error.LookupError(id, self.display_id, _(b'no match found'))
1450
1454
1451 def shortest(self, node, minlength=1):
1455 def shortest(self, node, minlength=1):
1452 """Find the shortest unambiguous prefix that matches node."""
1456 """Find the shortest unambiguous prefix that matches node."""
1453
1457
1454 def isvalid(prefix):
1458 def isvalid(prefix):
1455 try:
1459 try:
1456 matchednode = self._partialmatch(prefix)
1460 matchednode = self._partialmatch(prefix)
1457 except error.AmbiguousPrefixLookupError:
1461 except error.AmbiguousPrefixLookupError:
1458 return False
1462 return False
1459 except error.WdirUnsupported:
1463 except error.WdirUnsupported:
1460 # single 'ff...' match
1464 # single 'ff...' match
1461 return True
1465 return True
1462 if matchednode is None:
1466 if matchednode is None:
1463 raise error.LookupError(node, self.display_id, _(b'no node'))
1467 raise error.LookupError(node, self.display_id, _(b'no node'))
1464 return True
1468 return True
1465
1469
1466 def maybewdir(prefix):
1470 def maybewdir(prefix):
1467 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1471 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1468
1472
1469 hexnode = hex(node)
1473 hexnode = hex(node)
1470
1474
1471 def disambiguate(hexnode, minlength):
1475 def disambiguate(hexnode, minlength):
1472 """Disambiguate against wdirid."""
1476 """Disambiguate against wdirid."""
1473 for length in range(minlength, len(hexnode) + 1):
1477 for length in range(minlength, len(hexnode) + 1):
1474 prefix = hexnode[:length]
1478 prefix = hexnode[:length]
1475 if not maybewdir(prefix):
1479 if not maybewdir(prefix):
1476 return prefix
1480 return prefix
1477
1481
1478 if not getattr(self, 'filteredrevs', None):
1482 if not getattr(self, 'filteredrevs', None):
1479 try:
1483 try:
1480 length = max(self.index.shortest(node), minlength)
1484 length = max(self.index.shortest(node), minlength)
1481 return disambiguate(hexnode, length)
1485 return disambiguate(hexnode, length)
1482 except error.RevlogError:
1486 except error.RevlogError:
1483 if node != self.nodeconstants.wdirid:
1487 if node != self.nodeconstants.wdirid:
1484 raise error.LookupError(
1488 raise error.LookupError(
1485 node, self.display_id, _(b'no node')
1489 node, self.display_id, _(b'no node')
1486 )
1490 )
1487 except AttributeError:
1491 except AttributeError:
1488 # Fall through to pure code
1492 # Fall through to pure code
1489 pass
1493 pass
1490
1494
1491 if node == self.nodeconstants.wdirid:
1495 if node == self.nodeconstants.wdirid:
1492 for length in range(minlength, len(hexnode) + 1):
1496 for length in range(minlength, len(hexnode) + 1):
1493 prefix = hexnode[:length]
1497 prefix = hexnode[:length]
1494 if isvalid(prefix):
1498 if isvalid(prefix):
1495 return prefix
1499 return prefix
1496
1500
1497 for length in range(minlength, len(hexnode) + 1):
1501 for length in range(minlength, len(hexnode) + 1):
1498 prefix = hexnode[:length]
1502 prefix = hexnode[:length]
1499 if isvalid(prefix):
1503 if isvalid(prefix):
1500 return disambiguate(hexnode, length)
1504 return disambiguate(hexnode, length)
1501
1505
1502 def cmp(self, node, text):
1506 def cmp(self, node, text):
1503 """compare text with a given file revision
1507 """compare text with a given file revision
1504
1508
1505 returns True if text is different than what is stored.
1509 returns True if text is different than what is stored.
1506 """
1510 """
1507 p1, p2 = self.parents(node)
1511 p1, p2 = self.parents(node)
1508 return storageutil.hashrevisionsha1(text, p1, p2) != node
1512 return storageutil.hashrevisionsha1(text, p1, p2) != node
1509
1513
1510 def _cachesegment(self, offset, data):
1514 def _cachesegment(self, offset, data):
1511 """Add a segment to the revlog cache.
1515 """Add a segment to the revlog cache.
1512
1516
1513 Accepts an absolute offset and the data that is at that location.
1517 Accepts an absolute offset and the data that is at that location.
1514 """
1518 """
1515 o, d = self._chunkcache
1519 o, d = self._chunkcache
1516 # try to add to existing cache
1520 # try to add to existing cache
1517 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1521 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1518 self._chunkcache = o, d + data
1522 self._chunkcache = o, d + data
1519 else:
1523 else:
1520 self._chunkcache = offset, data
1524 self._chunkcache = offset, data
1521
1525
1522 def _readsegment(self, offset, length, df=None):
1526 def _readsegment(self, offset, length, df=None):
1523 """Load a segment of raw data from the revlog.
1527 """Load a segment of raw data from the revlog.
1524
1528
1525 Accepts an absolute offset, length to read, and an optional existing
1529 Accepts an absolute offset, length to read, and an optional existing
1526 file handle to read from.
1530 file handle to read from.
1527
1531
1528 If an existing file handle is passed, it will be seeked and the
1532 If an existing file handle is passed, it will be seeked and the
1529 original seek position will NOT be restored.
1533 original seek position will NOT be restored.
1530
1534
1531 Returns a str or buffer of raw byte data.
1535 Returns a str or buffer of raw byte data.
1532
1536
1533 Raises if the requested number of bytes could not be read.
1537 Raises if the requested number of bytes could not be read.
1534 """
1538 """
1535 # Cache data both forward and backward around the requested
1539 # Cache data both forward and backward around the requested
1536 # data, in a fixed size window. This helps speed up operations
1540 # data, in a fixed size window. This helps speed up operations
1537 # involving reading the revlog backwards.
1541 # involving reading the revlog backwards.
1538 cachesize = self._chunkcachesize
1542 cachesize = self._chunkcachesize
1539 realoffset = offset & ~(cachesize - 1)
1543 realoffset = offset & ~(cachesize - 1)
1540 reallength = (
1544 reallength = (
1541 (offset + length + cachesize) & ~(cachesize - 1)
1545 (offset + length + cachesize) & ~(cachesize - 1)
1542 ) - realoffset
1546 ) - realoffset
1543 with self._datareadfp(df) as df:
1547 with self._datareadfp(df) as df:
1544 df.seek(realoffset)
1548 df.seek(realoffset)
1545 d = df.read(reallength)
1549 d = df.read(reallength)
1546
1550
1547 self._cachesegment(realoffset, d)
1551 self._cachesegment(realoffset, d)
1548 if offset != realoffset or reallength != length:
1552 if offset != realoffset or reallength != length:
1549 startoffset = offset - realoffset
1553 startoffset = offset - realoffset
1550 if len(d) - startoffset < length:
1554 if len(d) - startoffset < length:
1551 raise error.RevlogError(
1555 raise error.RevlogError(
1552 _(
1556 _(
1553 b'partial read of revlog %s; expected %d bytes from '
1557 b'partial read of revlog %s; expected %d bytes from '
1554 b'offset %d, got %d'
1558 b'offset %d, got %d'
1555 )
1559 )
1556 % (
1560 % (
1557 self._indexfile if self._inline else self._datafile,
1561 self._indexfile if self._inline else self._datafile,
1558 length,
1562 length,
1559 offset,
1563 offset,
1560 len(d) - startoffset,
1564 len(d) - startoffset,
1561 )
1565 )
1562 )
1566 )
1563
1567
1564 return util.buffer(d, startoffset, length)
1568 return util.buffer(d, startoffset, length)
1565
1569
1566 if len(d) < length:
1570 if len(d) < length:
1567 raise error.RevlogError(
1571 raise error.RevlogError(
1568 _(
1572 _(
1569 b'partial read of revlog %s; expected %d bytes from offset '
1573 b'partial read of revlog %s; expected %d bytes from offset '
1570 b'%d, got %d'
1574 b'%d, got %d'
1571 )
1575 )
1572 % (
1576 % (
1573 self._indexfile if self._inline else self._datafile,
1577 self._indexfile if self._inline else self._datafile,
1574 length,
1578 length,
1575 offset,
1579 offset,
1576 len(d),
1580 len(d),
1577 )
1581 )
1578 )
1582 )
1579
1583
1580 return d
1584 return d
1581
1585
1582 def _getsegment(self, offset, length, df=None):
1586 def _getsegment(self, offset, length, df=None):
1583 """Obtain a segment of raw data from the revlog.
1587 """Obtain a segment of raw data from the revlog.
1584
1588
1585 Accepts an absolute offset, length of bytes to obtain, and an
1589 Accepts an absolute offset, length of bytes to obtain, and an
1586 optional file handle to the already-opened revlog. If the file
1590 optional file handle to the already-opened revlog. If the file
1587 handle is used, it's original seek position will not be preserved.
1591 handle is used, it's original seek position will not be preserved.
1588
1592
1589 Requests for data may be returned from a cache.
1593 Requests for data may be returned from a cache.
1590
1594
1591 Returns a str or a buffer instance of raw byte data.
1595 Returns a str or a buffer instance of raw byte data.
1592 """
1596 """
1593 o, d = self._chunkcache
1597 o, d = self._chunkcache
1594 l = len(d)
1598 l = len(d)
1595
1599
1596 # is it in the cache?
1600 # is it in the cache?
1597 cachestart = offset - o
1601 cachestart = offset - o
1598 cacheend = cachestart + length
1602 cacheend = cachestart + length
1599 if cachestart >= 0 and cacheend <= l:
1603 if cachestart >= 0 and cacheend <= l:
1600 if cachestart == 0 and cacheend == l:
1604 if cachestart == 0 and cacheend == l:
1601 return d # avoid a copy
1605 return d # avoid a copy
1602 return util.buffer(d, cachestart, cacheend - cachestart)
1606 return util.buffer(d, cachestart, cacheend - cachestart)
1603
1607
1604 return self._readsegment(offset, length, df=df)
1608 return self._readsegment(offset, length, df=df)
1605
1609
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1610 def _getsegmentforrevs(self, startrev, endrev, df=None):
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1611 """Obtain a segment of raw data corresponding to a range of revisions.
1608
1612
1609 Accepts the start and end revisions and an optional already-open
1613 Accepts the start and end revisions and an optional already-open
1610 file handle to be used for reading. If the file handle is read, its
1614 file handle to be used for reading. If the file handle is read, its
1611 seek position will not be preserved.
1615 seek position will not be preserved.
1612
1616
1613 Requests for data may be satisfied by a cache.
1617 Requests for data may be satisfied by a cache.
1614
1618
1615 Returns a 2-tuple of (offset, data) for the requested range of
1619 Returns a 2-tuple of (offset, data) for the requested range of
1616 revisions. Offset is the integer offset from the beginning of the
1620 revisions. Offset is the integer offset from the beginning of the
1617 revlog and data is a str or buffer of the raw byte data.
1621 revlog and data is a str or buffer of the raw byte data.
1618
1622
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1623 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1620 to determine where each revision's data begins and ends.
1624 to determine where each revision's data begins and ends.
1621 """
1625 """
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1626 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1623 # (functions are expensive).
1627 # (functions are expensive).
1624 index = self.index
1628 index = self.index
1625 istart = index[startrev]
1629 istart = index[startrev]
1626 start = int(istart[0] >> 16)
1630 start = int(istart[0] >> 16)
1627 if startrev == endrev:
1631 if startrev == endrev:
1628 end = start + istart[1]
1632 end = start + istart[1]
1629 else:
1633 else:
1630 iend = index[endrev]
1634 iend = index[endrev]
1631 end = int(iend[0] >> 16) + iend[1]
1635 end = int(iend[0] >> 16) + iend[1]
1632
1636
1633 if self._inline:
1637 if self._inline:
1634 start += (startrev + 1) * self.index.entry_size
1638 start += (startrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1639 end += (endrev + 1) * self.index.entry_size
1636 length = end - start
1640 length = end - start
1637
1641
1638 return start, self._getsegment(start, length, df=df)
1642 return start, self._getsegment(start, length, df=df)
1639
1643
1640 def _chunk(self, rev, df=None):
1644 def _chunk(self, rev, df=None):
1641 """Obtain a single decompressed chunk for a revision.
1645 """Obtain a single decompressed chunk for a revision.
1642
1646
1643 Accepts an integer revision and an optional already-open file handle
1647 Accepts an integer revision and an optional already-open file handle
1644 to be used for reading. If used, the seek position of the file will not
1648 to be used for reading. If used, the seek position of the file will not
1645 be preserved.
1649 be preserved.
1646
1650
1647 Returns a str holding uncompressed data for the requested revision.
1651 Returns a str holding uncompressed data for the requested revision.
1648 """
1652 """
1649 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1653 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1650
1654
1651 def _chunks(self, revs, df=None, targetsize=None):
1655 def _chunks(self, revs, df=None, targetsize=None):
1652 """Obtain decompressed chunks for the specified revisions.
1656 """Obtain decompressed chunks for the specified revisions.
1653
1657
1654 Accepts an iterable of numeric revisions that are assumed to be in
1658 Accepts an iterable of numeric revisions that are assumed to be in
1655 ascending order. Also accepts an optional already-open file handle
1659 ascending order. Also accepts an optional already-open file handle
1656 to be used for reading. If used, the seek position of the file will
1660 to be used for reading. If used, the seek position of the file will
1657 not be preserved.
1661 not be preserved.
1658
1662
1659 This function is similar to calling ``self._chunk()`` multiple times,
1663 This function is similar to calling ``self._chunk()`` multiple times,
1660 but is faster.
1664 but is faster.
1661
1665
1662 Returns a list with decompressed data for each requested revision.
1666 Returns a list with decompressed data for each requested revision.
1663 """
1667 """
1664 if not revs:
1668 if not revs:
1665 return []
1669 return []
1666 start = self.start
1670 start = self.start
1667 length = self.length
1671 length = self.length
1668 inline = self._inline
1672 inline = self._inline
1669 iosize = self.index.entry_size
1673 iosize = self.index.entry_size
1670 buffer = util.buffer
1674 buffer = util.buffer
1671
1675
1672 l = []
1676 l = []
1673 ladd = l.append
1677 ladd = l.append
1674
1678
1675 if not self._withsparseread:
1679 if not self._withsparseread:
1676 slicedchunks = (revs,)
1680 slicedchunks = (revs,)
1677 else:
1681 else:
1678 slicedchunks = deltautil.slicechunk(
1682 slicedchunks = deltautil.slicechunk(
1679 self, revs, targetsize=targetsize
1683 self, revs, targetsize=targetsize
1680 )
1684 )
1681
1685
1682 for revschunk in slicedchunks:
1686 for revschunk in slicedchunks:
1683 firstrev = revschunk[0]
1687 firstrev = revschunk[0]
1684 # Skip trailing revisions with empty diff
1688 # Skip trailing revisions with empty diff
1685 for lastrev in revschunk[::-1]:
1689 for lastrev in revschunk[::-1]:
1686 if length(lastrev) != 0:
1690 if length(lastrev) != 0:
1687 break
1691 break
1688
1692
1689 try:
1693 try:
1690 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1694 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1691 except OverflowError:
1695 except OverflowError:
1692 # issue4215 - we can't cache a run of chunks greater than
1696 # issue4215 - we can't cache a run of chunks greater than
1693 # 2G on Windows
1697 # 2G on Windows
1694 return [self._chunk(rev, df=df) for rev in revschunk]
1698 return [self._chunk(rev, df=df) for rev in revschunk]
1695
1699
1696 decomp = self.decompress
1700 decomp = self.decompress
1697 for rev in revschunk:
1701 for rev in revschunk:
1698 chunkstart = start(rev)
1702 chunkstart = start(rev)
1699 if inline:
1703 if inline:
1700 chunkstart += (rev + 1) * iosize
1704 chunkstart += (rev + 1) * iosize
1701 chunklength = length(rev)
1705 chunklength = length(rev)
1702 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1706 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1703
1707
1704 return l
1708 return l
1705
1709
1706 def _chunkclear(self):
1710 def _chunkclear(self):
1707 """Clear the raw chunk cache."""
1711 """Clear the raw chunk cache."""
1708 self._chunkcache = (0, b'')
1712 self._chunkcache = (0, b'')
1709
1713
1710 def deltaparent(self, rev):
1714 def deltaparent(self, rev):
1711 """return deltaparent of the given revision"""
1715 """return deltaparent of the given revision"""
1712 base = self.index[rev][3]
1716 base = self.index[rev][3]
1713 if base == rev:
1717 if base == rev:
1714 return nullrev
1718 return nullrev
1715 elif self._generaldelta:
1719 elif self._generaldelta:
1716 return base
1720 return base
1717 else:
1721 else:
1718 return rev - 1
1722 return rev - 1
1719
1723
1720 def issnapshot(self, rev):
1724 def issnapshot(self, rev):
1721 """tells whether rev is a snapshot"""
1725 """tells whether rev is a snapshot"""
1722 if not self._sparserevlog:
1726 if not self._sparserevlog:
1723 return self.deltaparent(rev) == nullrev
1727 return self.deltaparent(rev) == nullrev
1724 elif util.safehasattr(self.index, b'issnapshot'):
1728 elif util.safehasattr(self.index, b'issnapshot'):
1725 # directly assign the method to cache the testing and access
1729 # directly assign the method to cache the testing and access
1726 self.issnapshot = self.index.issnapshot
1730 self.issnapshot = self.index.issnapshot
1727 return self.issnapshot(rev)
1731 return self.issnapshot(rev)
1728 if rev == nullrev:
1732 if rev == nullrev:
1729 return True
1733 return True
1730 entry = self.index[rev]
1734 entry = self.index[rev]
1731 base = entry[3]
1735 base = entry[3]
1732 if base == rev:
1736 if base == rev:
1733 return True
1737 return True
1734 if base == nullrev:
1738 if base == nullrev:
1735 return True
1739 return True
1736 p1 = entry[5]
1740 p1 = entry[5]
1737 p2 = entry[6]
1741 p2 = entry[6]
1738 if base == p1 or base == p2:
1742 if base == p1 or base == p2:
1739 return False
1743 return False
1740 return self.issnapshot(base)
1744 return self.issnapshot(base)
1741
1745
1742 def snapshotdepth(self, rev):
1746 def snapshotdepth(self, rev):
1743 """number of snapshot in the chain before this one"""
1747 """number of snapshot in the chain before this one"""
1744 if not self.issnapshot(rev):
1748 if not self.issnapshot(rev):
1745 raise error.ProgrammingError(b'revision %d not a snapshot')
1749 raise error.ProgrammingError(b'revision %d not a snapshot')
1746 return len(self._deltachain(rev)[0]) - 1
1750 return len(self._deltachain(rev)[0]) - 1
1747
1751
1748 def revdiff(self, rev1, rev2):
1752 def revdiff(self, rev1, rev2):
1749 """return or calculate a delta between two revisions
1753 """return or calculate a delta between two revisions
1750
1754
1751 The delta calculated is in binary form and is intended to be written to
1755 The delta calculated is in binary form and is intended to be written to
1752 revlog data directly. So this function needs raw revision data.
1756 revlog data directly. So this function needs raw revision data.
1753 """
1757 """
1754 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1758 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1755 return bytes(self._chunk(rev2))
1759 return bytes(self._chunk(rev2))
1756
1760
1757 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1761 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1758
1762
1759 def _processflags(self, text, flags, operation, raw=False):
1763 def _processflags(self, text, flags, operation, raw=False):
1760 """deprecated entry point to access flag processors"""
1764 """deprecated entry point to access flag processors"""
1761 msg = b'_processflag(...) use the specialized variant'
1765 msg = b'_processflag(...) use the specialized variant'
1762 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1766 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1763 if raw:
1767 if raw:
1764 return text, flagutil.processflagsraw(self, text, flags)
1768 return text, flagutil.processflagsraw(self, text, flags)
1765 elif operation == b'read':
1769 elif operation == b'read':
1766 return flagutil.processflagsread(self, text, flags)
1770 return flagutil.processflagsread(self, text, flags)
1767 else: # write operation
1771 else: # write operation
1768 return flagutil.processflagswrite(self, text, flags)
1772 return flagutil.processflagswrite(self, text, flags)
1769
1773
1770 def revision(self, nodeorrev, _df=None, raw=False):
1774 def revision(self, nodeorrev, _df=None, raw=False):
1771 """return an uncompressed revision of a given node or revision
1775 """return an uncompressed revision of a given node or revision
1772 number.
1776 number.
1773
1777
1774 _df - an existing file handle to read from. (internal-only)
1778 _df - an existing file handle to read from. (internal-only)
1775 raw - an optional argument specifying if the revision data is to be
1779 raw - an optional argument specifying if the revision data is to be
1776 treated as raw data when applying flag transforms. 'raw' should be set
1780 treated as raw data when applying flag transforms. 'raw' should be set
1777 to True when generating changegroups or in debug commands.
1781 to True when generating changegroups or in debug commands.
1778 """
1782 """
1779 if raw:
1783 if raw:
1780 msg = (
1784 msg = (
1781 b'revlog.revision(..., raw=True) is deprecated, '
1785 b'revlog.revision(..., raw=True) is deprecated, '
1782 b'use revlog.rawdata(...)'
1786 b'use revlog.rawdata(...)'
1783 )
1787 )
1784 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1788 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1785 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1789 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1786
1790
1787 def sidedata(self, nodeorrev, _df=None):
1791 def sidedata(self, nodeorrev, _df=None):
1788 """a map of extra data related to the changeset but not part of the hash
1792 """a map of extra data related to the changeset but not part of the hash
1789
1793
1790 This function currently return a dictionary. However, more advanced
1794 This function currently return a dictionary. However, more advanced
1791 mapping object will likely be used in the future for a more
1795 mapping object will likely be used in the future for a more
1792 efficient/lazy code.
1796 efficient/lazy code.
1793 """
1797 """
1794 return self._revisiondata(nodeorrev, _df)[1]
1798 return self._revisiondata(nodeorrev, _df)[1]
1795
1799
1796 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1800 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1797 # deal with <nodeorrev> argument type
1801 # deal with <nodeorrev> argument type
1798 if isinstance(nodeorrev, int):
1802 if isinstance(nodeorrev, int):
1799 rev = nodeorrev
1803 rev = nodeorrev
1800 node = self.node(rev)
1804 node = self.node(rev)
1801 else:
1805 else:
1802 node = nodeorrev
1806 node = nodeorrev
1803 rev = None
1807 rev = None
1804
1808
1805 # fast path the special `nullid` rev
1809 # fast path the special `nullid` rev
1806 if node == self.nullid:
1810 if node == self.nullid:
1807 return b"", {}
1811 return b"", {}
1808
1812
1809 # ``rawtext`` is the text as stored inside the revlog. Might be the
1813 # ``rawtext`` is the text as stored inside the revlog. Might be the
1810 # revision or might need to be processed to retrieve the revision.
1814 # revision or might need to be processed to retrieve the revision.
1811 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1815 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1812
1816
1813 if self.hassidedata:
1817 if self.hassidedata:
1814 if rev is None:
1818 if rev is None:
1815 rev = self.rev(node)
1819 rev = self.rev(node)
1816 sidedata = self._sidedata(rev)
1820 sidedata = self._sidedata(rev)
1817 else:
1821 else:
1818 sidedata = {}
1822 sidedata = {}
1819
1823
1820 if raw and validated:
1824 if raw and validated:
1821 # if we don't want to process the raw text and that raw
1825 # if we don't want to process the raw text and that raw
1822 # text is cached, we can exit early.
1826 # text is cached, we can exit early.
1823 return rawtext, sidedata
1827 return rawtext, sidedata
1824 if rev is None:
1828 if rev is None:
1825 rev = self.rev(node)
1829 rev = self.rev(node)
1826 # the revlog's flag for this revision
1830 # the revlog's flag for this revision
1827 # (usually alter its state or content)
1831 # (usually alter its state or content)
1828 flags = self.flags(rev)
1832 flags = self.flags(rev)
1829
1833
1830 if validated and flags == REVIDX_DEFAULT_FLAGS:
1834 if validated and flags == REVIDX_DEFAULT_FLAGS:
1831 # no extra flags set, no flag processor runs, text = rawtext
1835 # no extra flags set, no flag processor runs, text = rawtext
1832 return rawtext, sidedata
1836 return rawtext, sidedata
1833
1837
1834 if raw:
1838 if raw:
1835 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1839 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1836 text = rawtext
1840 text = rawtext
1837 else:
1841 else:
1838 r = flagutil.processflagsread(self, rawtext, flags)
1842 r = flagutil.processflagsread(self, rawtext, flags)
1839 text, validatehash = r
1843 text, validatehash = r
1840 if validatehash:
1844 if validatehash:
1841 self.checkhash(text, node, rev=rev)
1845 self.checkhash(text, node, rev=rev)
1842 if not validated:
1846 if not validated:
1843 self._revisioncache = (node, rev, rawtext)
1847 self._revisioncache = (node, rev, rawtext)
1844
1848
1845 return text, sidedata
1849 return text, sidedata
1846
1850
1847 def _rawtext(self, node, rev, _df=None):
1851 def _rawtext(self, node, rev, _df=None):
1848 """return the possibly unvalidated rawtext for a revision
1852 """return the possibly unvalidated rawtext for a revision
1849
1853
1850 returns (rev, rawtext, validated)
1854 returns (rev, rawtext, validated)
1851 """
1855 """
1852
1856
1853 # revision in the cache (could be useful to apply delta)
1857 # revision in the cache (could be useful to apply delta)
1854 cachedrev = None
1858 cachedrev = None
1855 # An intermediate text to apply deltas to
1859 # An intermediate text to apply deltas to
1856 basetext = None
1860 basetext = None
1857
1861
1858 # Check if we have the entry in cache
1862 # Check if we have the entry in cache
1859 # The cache entry looks like (node, rev, rawtext)
1863 # The cache entry looks like (node, rev, rawtext)
1860 if self._revisioncache:
1864 if self._revisioncache:
1861 if self._revisioncache[0] == node:
1865 if self._revisioncache[0] == node:
1862 return (rev, self._revisioncache[2], True)
1866 return (rev, self._revisioncache[2], True)
1863 cachedrev = self._revisioncache[1]
1867 cachedrev = self._revisioncache[1]
1864
1868
1865 if rev is None:
1869 if rev is None:
1866 rev = self.rev(node)
1870 rev = self.rev(node)
1867
1871
1868 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1872 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1869 if stopped:
1873 if stopped:
1870 basetext = self._revisioncache[2]
1874 basetext = self._revisioncache[2]
1871
1875
1872 # drop cache to save memory, the caller is expected to
1876 # drop cache to save memory, the caller is expected to
1873 # update self._revisioncache after validating the text
1877 # update self._revisioncache after validating the text
1874 self._revisioncache = None
1878 self._revisioncache = None
1875
1879
1876 targetsize = None
1880 targetsize = None
1877 rawsize = self.index[rev][2]
1881 rawsize = self.index[rev][2]
1878 if 0 <= rawsize:
1882 if 0 <= rawsize:
1879 targetsize = 4 * rawsize
1883 targetsize = 4 * rawsize
1880
1884
1881 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1885 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1882 if basetext is None:
1886 if basetext is None:
1883 basetext = bytes(bins[0])
1887 basetext = bytes(bins[0])
1884 bins = bins[1:]
1888 bins = bins[1:]
1885
1889
1886 rawtext = mdiff.patches(basetext, bins)
1890 rawtext = mdiff.patches(basetext, bins)
1887 del basetext # let us have a chance to free memory early
1891 del basetext # let us have a chance to free memory early
1888 return (rev, rawtext, False)
1892 return (rev, rawtext, False)
1889
1893
1890 def _sidedata(self, rev):
1894 def _sidedata(self, rev):
1891 """Return the sidedata for a given revision number."""
1895 """Return the sidedata for a given revision number."""
1892 index_entry = self.index[rev]
1896 index_entry = self.index[rev]
1893 sidedata_offset = index_entry[8]
1897 sidedata_offset = index_entry[8]
1894 sidedata_size = index_entry[9]
1898 sidedata_size = index_entry[9]
1895
1899
1896 if self._inline:
1900 if self._inline:
1897 sidedata_offset += self.index.entry_size * (1 + rev)
1901 sidedata_offset += self.index.entry_size * (1 + rev)
1898 if sidedata_size == 0:
1902 if sidedata_size == 0:
1899 return {}
1903 return {}
1900
1904
1901 segment = self._getsegment(sidedata_offset, sidedata_size)
1905 segment = self._getsegment(sidedata_offset, sidedata_size)
1902 sidedata = sidedatautil.deserialize_sidedata(segment)
1906 sidedata = sidedatautil.deserialize_sidedata(segment)
1903 return sidedata
1907 return sidedata
1904
1908
1905 def rawdata(self, nodeorrev, _df=None):
1909 def rawdata(self, nodeorrev, _df=None):
1906 """return an uncompressed raw data of a given node or revision number.
1910 """return an uncompressed raw data of a given node or revision number.
1907
1911
1908 _df - an existing file handle to read from. (internal-only)
1912 _df - an existing file handle to read from. (internal-only)
1909 """
1913 """
1910 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1914 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1911
1915
1912 def hash(self, text, p1, p2):
1916 def hash(self, text, p1, p2):
1913 """Compute a node hash.
1917 """Compute a node hash.
1914
1918
1915 Available as a function so that subclasses can replace the hash
1919 Available as a function so that subclasses can replace the hash
1916 as needed.
1920 as needed.
1917 """
1921 """
1918 return storageutil.hashrevisionsha1(text, p1, p2)
1922 return storageutil.hashrevisionsha1(text, p1, p2)
1919
1923
1920 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1924 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1921 """Check node hash integrity.
1925 """Check node hash integrity.
1922
1926
1923 Available as a function so that subclasses can extend hash mismatch
1927 Available as a function so that subclasses can extend hash mismatch
1924 behaviors as needed.
1928 behaviors as needed.
1925 """
1929 """
1926 try:
1930 try:
1927 if p1 is None and p2 is None:
1931 if p1 is None and p2 is None:
1928 p1, p2 = self.parents(node)
1932 p1, p2 = self.parents(node)
1929 if node != self.hash(text, p1, p2):
1933 if node != self.hash(text, p1, p2):
1930 # Clear the revision cache on hash failure. The revision cache
1934 # Clear the revision cache on hash failure. The revision cache
1931 # only stores the raw revision and clearing the cache does have
1935 # only stores the raw revision and clearing the cache does have
1932 # the side-effect that we won't have a cache hit when the raw
1936 # the side-effect that we won't have a cache hit when the raw
1933 # revision data is accessed. But this case should be rare and
1937 # revision data is accessed. But this case should be rare and
1934 # it is extra work to teach the cache about the hash
1938 # it is extra work to teach the cache about the hash
1935 # verification state.
1939 # verification state.
1936 if self._revisioncache and self._revisioncache[0] == node:
1940 if self._revisioncache and self._revisioncache[0] == node:
1937 self._revisioncache = None
1941 self._revisioncache = None
1938
1942
1939 revornode = rev
1943 revornode = rev
1940 if revornode is None:
1944 if revornode is None:
1941 revornode = templatefilters.short(hex(node))
1945 revornode = templatefilters.short(hex(node))
1942 raise error.RevlogError(
1946 raise error.RevlogError(
1943 _(b"integrity check failed on %s:%s")
1947 _(b"integrity check failed on %s:%s")
1944 % (self.display_id, pycompat.bytestr(revornode))
1948 % (self.display_id, pycompat.bytestr(revornode))
1945 )
1949 )
1946 except error.RevlogError:
1950 except error.RevlogError:
1947 if self._censorable and storageutil.iscensoredtext(text):
1951 if self._censorable and storageutil.iscensoredtext(text):
1948 raise error.CensoredNodeError(self.display_id, node, text)
1952 raise error.CensoredNodeError(self.display_id, node, text)
1949 raise
1953 raise
1950
1954
1951 def _enforceinlinesize(self, tr, fp=None):
1955 def _enforceinlinesize(self, tr, fp=None):
1952 """Check if the revlog is too big for inline and convert if so.
1956 """Check if the revlog is too big for inline and convert if so.
1953
1957
1954 This should be called after revisions are added to the revlog. If the
1958 This should be called after revisions are added to the revlog. If the
1955 revlog has grown too large to be an inline revlog, it will convert it
1959 revlog has grown too large to be an inline revlog, it will convert it
1956 to use multiple index and data files.
1960 to use multiple index and data files.
1957 """
1961 """
1958 tiprev = len(self) - 1
1962 tiprev = len(self) - 1
1959 total_size = self.start(tiprev) + self.length(tiprev)
1963 total_size = self.start(tiprev) + self.length(tiprev)
1960 if not self._inline or total_size < _maxinline:
1964 if not self._inline or total_size < _maxinline:
1961 return
1965 return
1962
1966
1963 troffset = tr.findoffset(self._indexfile)
1967 troffset = tr.findoffset(self._indexfile)
1964 if troffset is None:
1968 if troffset is None:
1965 raise error.RevlogError(
1969 raise error.RevlogError(
1966 _(b"%s not found in the transaction") % self._indexfile
1970 _(b"%s not found in the transaction") % self._indexfile
1967 )
1971 )
1968 trindex = 0
1972 trindex = 0
1969 tr.add(self._datafile, 0)
1973 tr.add(self._datafile, 0)
1970
1974
1971 if fp:
1975 if fp:
1972 fp.flush()
1976 fp.flush()
1973 fp.close()
1977 fp.close()
1974 # We can't use the cached file handle after close(). So prevent
1978 # We can't use the cached file handle after close(). So prevent
1975 # its usage.
1979 # its usage.
1976 self._writinghandles = None
1980 self._writinghandles = None
1977
1981
1978 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1982 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1979 for r in self:
1983 for r in self:
1980 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1984 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1981 if troffset <= self.start(r):
1985 if troffset <= self.start(r):
1982 trindex = r
1986 trindex = r
1983
1987
1984 with self._indexfp(b'w') as fp:
1988 with self._indexfp(b'w') as fp:
1985 self._format_flags &= ~FLAG_INLINE_DATA
1989 self._format_flags &= ~FLAG_INLINE_DATA
1986 self._inline = False
1990 self._inline = False
1987 for i in self:
1991 for i in self:
1988 e = self.index.entry_binary(i)
1992 e = self.index.entry_binary(i)
1989 if i == 0:
1993 if i == 0:
1990 header = self._format_flags | self._format_version
1994 header = self._format_flags | self._format_version
1991 header = self.index.pack_header(header)
1995 header = self.index.pack_header(header)
1992 e = header + e
1996 e = header + e
1993 fp.write(e)
1997 fp.write(e)
1994
1998
1995 # the temp file replace the real index when we exit the context
1999 # the temp file replace the real index when we exit the context
1996 # manager
2000 # manager
1997
2001
1998 tr.replace(self._indexfile, trindex * self.index.entry_size)
2002 tr.replace(self._indexfile, trindex * self.index.entry_size)
1999 nodemaputil.setup_persistent_nodemap(tr, self)
2003 nodemaputil.setup_persistent_nodemap(tr, self)
2000 self._chunkclear()
2004 self._chunkclear()
2001
2005
2002 def _nodeduplicatecallback(self, transaction, node):
2006 def _nodeduplicatecallback(self, transaction, node):
2003 """called when trying to add a node already stored."""
2007 """called when trying to add a node already stored."""
2004
2008
2005 def addrevision(
2009 def addrevision(
2006 self,
2010 self,
2007 text,
2011 text,
2008 transaction,
2012 transaction,
2009 link,
2013 link,
2010 p1,
2014 p1,
2011 p2,
2015 p2,
2012 cachedelta=None,
2016 cachedelta=None,
2013 node=None,
2017 node=None,
2014 flags=REVIDX_DEFAULT_FLAGS,
2018 flags=REVIDX_DEFAULT_FLAGS,
2015 deltacomputer=None,
2019 deltacomputer=None,
2016 sidedata=None,
2020 sidedata=None,
2017 ):
2021 ):
2018 """add a revision to the log
2022 """add a revision to the log
2019
2023
2020 text - the revision data to add
2024 text - the revision data to add
2021 transaction - the transaction object used for rollback
2025 transaction - the transaction object used for rollback
2022 link - the linkrev data to add
2026 link - the linkrev data to add
2023 p1, p2 - the parent nodeids of the revision
2027 p1, p2 - the parent nodeids of the revision
2024 cachedelta - an optional precomputed delta
2028 cachedelta - an optional precomputed delta
2025 node - nodeid of revision; typically node is not specified, and it is
2029 node - nodeid of revision; typically node is not specified, and it is
2026 computed by default as hash(text, p1, p2), however subclasses might
2030 computed by default as hash(text, p1, p2), however subclasses might
2027 use different hashing method (and override checkhash() in such case)
2031 use different hashing method (and override checkhash() in such case)
2028 flags - the known flags to set on the revision
2032 flags - the known flags to set on the revision
2029 deltacomputer - an optional deltacomputer instance shared between
2033 deltacomputer - an optional deltacomputer instance shared between
2030 multiple calls
2034 multiple calls
2031 """
2035 """
2032 if link == nullrev:
2036 if link == nullrev:
2033 raise error.RevlogError(
2037 raise error.RevlogError(
2034 _(b"attempted to add linkrev -1 to %s") % self.display_id
2038 _(b"attempted to add linkrev -1 to %s") % self.display_id
2035 )
2039 )
2036
2040
2037 if sidedata is None:
2041 if sidedata is None:
2038 sidedata = {}
2042 sidedata = {}
2039 elif sidedata and not self.hassidedata:
2043 elif sidedata and not self.hassidedata:
2040 raise error.ProgrammingError(
2044 raise error.ProgrammingError(
2041 _(b"trying to add sidedata to a revlog who don't support them")
2045 _(b"trying to add sidedata to a revlog who don't support them")
2042 )
2046 )
2043
2047
2044 if flags:
2048 if flags:
2045 node = node or self.hash(text, p1, p2)
2049 node = node or self.hash(text, p1, p2)
2046
2050
2047 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2051 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2048
2052
2049 # If the flag processor modifies the revision data, ignore any provided
2053 # If the flag processor modifies the revision data, ignore any provided
2050 # cachedelta.
2054 # cachedelta.
2051 if rawtext != text:
2055 if rawtext != text:
2052 cachedelta = None
2056 cachedelta = None
2053
2057
2054 if len(rawtext) > _maxentrysize:
2058 if len(rawtext) > _maxentrysize:
2055 raise error.RevlogError(
2059 raise error.RevlogError(
2056 _(
2060 _(
2057 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2061 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2058 )
2062 )
2059 % (self.display_id, len(rawtext))
2063 % (self.display_id, len(rawtext))
2060 )
2064 )
2061
2065
2062 node = node or self.hash(rawtext, p1, p2)
2066 node = node or self.hash(rawtext, p1, p2)
2063 rev = self.index.get_rev(node)
2067 rev = self.index.get_rev(node)
2064 if rev is not None:
2068 if rev is not None:
2065 return rev
2069 return rev
2066
2070
2067 if validatehash:
2071 if validatehash:
2068 self.checkhash(rawtext, node, p1=p1, p2=p2)
2072 self.checkhash(rawtext, node, p1=p1, p2=p2)
2069
2073
2070 return self.addrawrevision(
2074 return self.addrawrevision(
2071 rawtext,
2075 rawtext,
2072 transaction,
2076 transaction,
2073 link,
2077 link,
2074 p1,
2078 p1,
2075 p2,
2079 p2,
2076 node,
2080 node,
2077 flags,
2081 flags,
2078 cachedelta=cachedelta,
2082 cachedelta=cachedelta,
2079 deltacomputer=deltacomputer,
2083 deltacomputer=deltacomputer,
2080 sidedata=sidedata,
2084 sidedata=sidedata,
2081 )
2085 )
2082
2086
2083 def addrawrevision(
2087 def addrawrevision(
2084 self,
2088 self,
2085 rawtext,
2089 rawtext,
2086 transaction,
2090 transaction,
2087 link,
2091 link,
2088 p1,
2092 p1,
2089 p2,
2093 p2,
2090 node,
2094 node,
2091 flags,
2095 flags,
2092 cachedelta=None,
2096 cachedelta=None,
2093 deltacomputer=None,
2097 deltacomputer=None,
2094 sidedata=None,
2098 sidedata=None,
2095 ):
2099 ):
2096 """add a raw revision with known flags, node and parents
2100 """add a raw revision with known flags, node and parents
2097 useful when reusing a revision not stored in this revlog (ex: received
2101 useful when reusing a revision not stored in this revlog (ex: received
2098 over wire, or read from an external bundle).
2102 over wire, or read from an external bundle).
2099 """
2103 """
2100 dfh = None
2104 dfh = None
2101 if not self._inline:
2105 if not self._inline:
2102 dfh = self._datafp(b"a+")
2106 dfh = self._datafp(b"a+")
2103 ifh = self._indexfp(b"a+")
2107 ifh = self._indexfp(b"a+")
2104 try:
2108 try:
2105 return self._addrevision(
2109 return self._addrevision(
2106 node,
2110 node,
2107 rawtext,
2111 rawtext,
2108 transaction,
2112 transaction,
2109 link,
2113 link,
2110 p1,
2114 p1,
2111 p2,
2115 p2,
2112 flags,
2116 flags,
2113 cachedelta,
2117 cachedelta,
2114 ifh,
2118 ifh,
2115 dfh,
2119 dfh,
2116 deltacomputer=deltacomputer,
2120 deltacomputer=deltacomputer,
2117 sidedata=sidedata,
2121 sidedata=sidedata,
2118 )
2122 )
2119 finally:
2123 finally:
2120 if dfh:
2124 if dfh:
2121 dfh.close()
2125 dfh.close()
2122 ifh.close()
2126 ifh.close()
2123
2127
2124 def compress(self, data):
2128 def compress(self, data):
2125 """Generate a possibly-compressed representation of data."""
2129 """Generate a possibly-compressed representation of data."""
2126 if not data:
2130 if not data:
2127 return b'', data
2131 return b'', data
2128
2132
2129 compressed = self._compressor.compress(data)
2133 compressed = self._compressor.compress(data)
2130
2134
2131 if compressed:
2135 if compressed:
2132 # The revlog compressor added the header in the returned data.
2136 # The revlog compressor added the header in the returned data.
2133 return b'', compressed
2137 return b'', compressed
2134
2138
2135 if data[0:1] == b'\0':
2139 if data[0:1] == b'\0':
2136 return b'', data
2140 return b'', data
2137 return b'u', data
2141 return b'u', data
2138
2142
2139 def decompress(self, data):
2143 def decompress(self, data):
2140 """Decompress a revlog chunk.
2144 """Decompress a revlog chunk.
2141
2145
2142 The chunk is expected to begin with a header identifying the
2146 The chunk is expected to begin with a header identifying the
2143 format type so it can be routed to an appropriate decompressor.
2147 format type so it can be routed to an appropriate decompressor.
2144 """
2148 """
2145 if not data:
2149 if not data:
2146 return data
2150 return data
2147
2151
2148 # Revlogs are read much more frequently than they are written and many
2152 # Revlogs are read much more frequently than they are written and many
2149 # chunks only take microseconds to decompress, so performance is
2153 # chunks only take microseconds to decompress, so performance is
2150 # important here.
2154 # important here.
2151 #
2155 #
2152 # We can make a few assumptions about revlogs:
2156 # We can make a few assumptions about revlogs:
2153 #
2157 #
2154 # 1) the majority of chunks will be compressed (as opposed to inline
2158 # 1) the majority of chunks will be compressed (as opposed to inline
2155 # raw data).
2159 # raw data).
2156 # 2) decompressing *any* data will likely by at least 10x slower than
2160 # 2) decompressing *any* data will likely by at least 10x slower than
2157 # returning raw inline data.
2161 # returning raw inline data.
2158 # 3) we want to prioritize common and officially supported compression
2162 # 3) we want to prioritize common and officially supported compression
2159 # engines
2163 # engines
2160 #
2164 #
2161 # It follows that we want to optimize for "decompress compressed data
2165 # It follows that we want to optimize for "decompress compressed data
2162 # when encoded with common and officially supported compression engines"
2166 # when encoded with common and officially supported compression engines"
2163 # case over "raw data" and "data encoded by less common or non-official
2167 # case over "raw data" and "data encoded by less common or non-official
2164 # compression engines." That is why we have the inline lookup first
2168 # compression engines." That is why we have the inline lookup first
2165 # followed by the compengines lookup.
2169 # followed by the compengines lookup.
2166 #
2170 #
2167 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2171 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2168 # compressed chunks. And this matters for changelog and manifest reads.
2172 # compressed chunks. And this matters for changelog and manifest reads.
2169 t = data[0:1]
2173 t = data[0:1]
2170
2174
2171 if t == b'x':
2175 if t == b'x':
2172 try:
2176 try:
2173 return _zlibdecompress(data)
2177 return _zlibdecompress(data)
2174 except zlib.error as e:
2178 except zlib.error as e:
2175 raise error.RevlogError(
2179 raise error.RevlogError(
2176 _(b'revlog decompress error: %s')
2180 _(b'revlog decompress error: %s')
2177 % stringutil.forcebytestr(e)
2181 % stringutil.forcebytestr(e)
2178 )
2182 )
2179 # '\0' is more common than 'u' so it goes first.
2183 # '\0' is more common than 'u' so it goes first.
2180 elif t == b'\0':
2184 elif t == b'\0':
2181 return data
2185 return data
2182 elif t == b'u':
2186 elif t == b'u':
2183 return util.buffer(data, 1)
2187 return util.buffer(data, 1)
2184
2188
2185 try:
2189 try:
2186 compressor = self._decompressors[t]
2190 compressor = self._decompressors[t]
2187 except KeyError:
2191 except KeyError:
2188 try:
2192 try:
2189 engine = util.compengines.forrevlogheader(t)
2193 engine = util.compengines.forrevlogheader(t)
2190 compressor = engine.revlogcompressor(self._compengineopts)
2194 compressor = engine.revlogcompressor(self._compengineopts)
2191 self._decompressors[t] = compressor
2195 self._decompressors[t] = compressor
2192 except KeyError:
2196 except KeyError:
2193 raise error.RevlogError(
2197 raise error.RevlogError(
2194 _(b'unknown compression type %s') % binascii.hexlify(t)
2198 _(b'unknown compression type %s') % binascii.hexlify(t)
2195 )
2199 )
2196
2200
2197 return compressor.decompress(data)
2201 return compressor.decompress(data)
2198
2202
2199 def _addrevision(
2203 def _addrevision(
2200 self,
2204 self,
2201 node,
2205 node,
2202 rawtext,
2206 rawtext,
2203 transaction,
2207 transaction,
2204 link,
2208 link,
2205 p1,
2209 p1,
2206 p2,
2210 p2,
2207 flags,
2211 flags,
2208 cachedelta,
2212 cachedelta,
2209 ifh,
2213 ifh,
2210 dfh,
2214 dfh,
2211 alwayscache=False,
2215 alwayscache=False,
2212 deltacomputer=None,
2216 deltacomputer=None,
2213 sidedata=None,
2217 sidedata=None,
2214 ):
2218 ):
2215 """internal function to add revisions to the log
2219 """internal function to add revisions to the log
2216
2220
2217 see addrevision for argument descriptions.
2221 see addrevision for argument descriptions.
2218
2222
2219 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2223 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2220
2224
2221 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2225 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2222 be used.
2226 be used.
2223
2227
2224 invariants:
2228 invariants:
2225 - rawtext is optional (can be None); if not set, cachedelta must be set.
2229 - rawtext is optional (can be None); if not set, cachedelta must be set.
2226 if both are set, they must correspond to each other.
2230 if both are set, they must correspond to each other.
2227 """
2231 """
2228 if node == self.nullid:
2232 if node == self.nullid:
2229 raise error.RevlogError(
2233 raise error.RevlogError(
2230 _(b"%s: attempt to add null revision") % self.display_id
2234 _(b"%s: attempt to add null revision") % self.display_id
2231 )
2235 )
2232 if (
2236 if (
2233 node == self.nodeconstants.wdirid
2237 node == self.nodeconstants.wdirid
2234 or node in self.nodeconstants.wdirfilenodeids
2238 or node in self.nodeconstants.wdirfilenodeids
2235 ):
2239 ):
2236 raise error.RevlogError(
2240 raise error.RevlogError(
2237 _(b"%s: attempt to add wdir revision") % self.display_id
2241 _(b"%s: attempt to add wdir revision") % self.display_id
2238 )
2242 )
2239
2243
2240 if self._inline:
2244 if self._inline:
2241 fh = ifh
2245 fh = ifh
2242 else:
2246 else:
2243 fh = dfh
2247 fh = dfh
2244
2248
2245 btext = [rawtext]
2249 btext = [rawtext]
2246
2250
2247 curr = len(self)
2251 curr = len(self)
2248 prev = curr - 1
2252 prev = curr - 1
2249
2253
2250 offset = self._get_data_offset(prev)
2254 offset = self._get_data_offset(prev)
2251
2255
2252 if self._concurrencychecker:
2256 if self._concurrencychecker:
2253 if self._inline:
2257 if self._inline:
2254 # offset is "as if" it were in the .d file, so we need to add on
2258 # offset is "as if" it were in the .d file, so we need to add on
2255 # the size of the entry metadata.
2259 # the size of the entry metadata.
2256 self._concurrencychecker(
2260 self._concurrencychecker(
2257 ifh, self._indexfile, offset + curr * self.index.entry_size
2261 ifh, self._indexfile, offset + curr * self.index.entry_size
2258 )
2262 )
2259 else:
2263 else:
2260 # Entries in the .i are a consistent size.
2264 # Entries in the .i are a consistent size.
2261 self._concurrencychecker(
2265 self._concurrencychecker(
2262 ifh, self._indexfile, curr * self.index.entry_size
2266 ifh, self._indexfile, curr * self.index.entry_size
2263 )
2267 )
2264 self._concurrencychecker(dfh, self._datafile, offset)
2268 self._concurrencychecker(dfh, self._datafile, offset)
2265
2269
2266 p1r, p2r = self.rev(p1), self.rev(p2)
2270 p1r, p2r = self.rev(p1), self.rev(p2)
2267
2271
2268 # full versions are inserted when the needed deltas
2272 # full versions are inserted when the needed deltas
2269 # become comparable to the uncompressed text
2273 # become comparable to the uncompressed text
2270 if rawtext is None:
2274 if rawtext is None:
2271 # need rawtext size, before changed by flag processors, which is
2275 # need rawtext size, before changed by flag processors, which is
2272 # the non-raw size. use revlog explicitly to avoid filelog's extra
2276 # the non-raw size. use revlog explicitly to avoid filelog's extra
2273 # logic that might remove metadata size.
2277 # logic that might remove metadata size.
2274 textlen = mdiff.patchedsize(
2278 textlen = mdiff.patchedsize(
2275 revlog.size(self, cachedelta[0]), cachedelta[1]
2279 revlog.size(self, cachedelta[0]), cachedelta[1]
2276 )
2280 )
2277 else:
2281 else:
2278 textlen = len(rawtext)
2282 textlen = len(rawtext)
2279
2283
2280 if deltacomputer is None:
2284 if deltacomputer is None:
2281 deltacomputer = deltautil.deltacomputer(self)
2285 deltacomputer = deltautil.deltacomputer(self)
2282
2286
2283 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2287 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2284
2288
2285 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2289 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2286
2290
2287 if sidedata and self.hassidedata:
2291 if sidedata and self.hassidedata:
2288 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2292 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2289 sidedata_offset = offset + deltainfo.deltalen
2293 sidedata_offset = offset + deltainfo.deltalen
2290 else:
2294 else:
2291 serialized_sidedata = b""
2295 serialized_sidedata = b""
2292 # Don't store the offset if the sidedata is empty, that way
2296 # Don't store the offset if the sidedata is empty, that way
2293 # we can easily detect empty sidedata and they will be no different
2297 # we can easily detect empty sidedata and they will be no different
2294 # than ones we manually add.
2298 # than ones we manually add.
2295 sidedata_offset = 0
2299 sidedata_offset = 0
2296
2300
2297 e = (
2301 e = (
2298 offset_type(offset, flags),
2302 offset_type(offset, flags),
2299 deltainfo.deltalen,
2303 deltainfo.deltalen,
2300 textlen,
2304 textlen,
2301 deltainfo.base,
2305 deltainfo.base,
2302 link,
2306 link,
2303 p1r,
2307 p1r,
2304 p2r,
2308 p2r,
2305 node,
2309 node,
2306 sidedata_offset,
2310 sidedata_offset,
2307 len(serialized_sidedata),
2311 len(serialized_sidedata),
2308 )
2312 )
2309
2313
2310 self.index.append(e)
2314 self.index.append(e)
2311 entry = self.index.entry_binary(curr)
2315 entry = self.index.entry_binary(curr)
2312 if curr == 0:
2316 if curr == 0:
2313 header = self._format_flags | self._format_version
2317 header = self._format_flags | self._format_version
2314 header = self.index.pack_header(header)
2318 header = self.index.pack_header(header)
2315 entry = header + entry
2319 entry = header + entry
2316 self._writeentry(
2320 self._writeentry(
2317 transaction,
2321 transaction,
2318 ifh,
2322 ifh,
2319 dfh,
2323 dfh,
2320 entry,
2324 entry,
2321 deltainfo.data,
2325 deltainfo.data,
2322 link,
2326 link,
2323 offset,
2327 offset,
2324 serialized_sidedata,
2328 serialized_sidedata,
2325 )
2329 )
2326
2330
2327 rawtext = btext[0]
2331 rawtext = btext[0]
2328
2332
2329 if alwayscache and rawtext is None:
2333 if alwayscache and rawtext is None:
2330 rawtext = deltacomputer.buildtext(revinfo, fh)
2334 rawtext = deltacomputer.buildtext(revinfo, fh)
2331
2335
2332 if type(rawtext) == bytes: # only accept immutable objects
2336 if type(rawtext) == bytes: # only accept immutable objects
2333 self._revisioncache = (node, curr, rawtext)
2337 self._revisioncache = (node, curr, rawtext)
2334 self._chainbasecache[curr] = deltainfo.chainbase
2338 self._chainbasecache[curr] = deltainfo.chainbase
2335 return curr
2339 return curr
2336
2340
2337 def _get_data_offset(self, prev):
2341 def _get_data_offset(self, prev):
2338 """Returns the current offset in the (in-transaction) data file.
2342 """Returns the current offset in the (in-transaction) data file.
2339 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2343 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2340 file to store that information: since sidedata can be rewritten to the
2344 file to store that information: since sidedata can be rewritten to the
2341 end of the data file within a transaction, you can have cases where, for
2345 end of the data file within a transaction, you can have cases where, for
2342 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2346 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2343 to `n - 1`'s sidedata being written after `n`'s data.
2347 to `n - 1`'s sidedata being written after `n`'s data.
2344
2348
2345 TODO cache this in a docket file before getting out of experimental."""
2349 TODO cache this in a docket file before getting out of experimental."""
2346 if self._format_version != REVLOGV2:
2350 if self._format_version != REVLOGV2:
2347 return self.end(prev)
2351 return self.end(prev)
2348
2352
2349 offset = 0
2353 offset = 0
2350 for rev, entry in enumerate(self.index):
2354 for rev, entry in enumerate(self.index):
2351 sidedata_end = entry[8] + entry[9]
2355 sidedata_end = entry[8] + entry[9]
2352 # Sidedata for a previous rev has potentially been written after
2356 # Sidedata for a previous rev has potentially been written after
2353 # this rev's end, so take the max.
2357 # this rev's end, so take the max.
2354 offset = max(self.end(rev), offset, sidedata_end)
2358 offset = max(self.end(rev), offset, sidedata_end)
2355 return offset
2359 return offset
2356
2360
2357 def _writeentry(
2361 def _writeentry(
2358 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2362 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2359 ):
2363 ):
2360 # Files opened in a+ mode have inconsistent behavior on various
2364 # Files opened in a+ mode have inconsistent behavior on various
2361 # platforms. Windows requires that a file positioning call be made
2365 # platforms. Windows requires that a file positioning call be made
2362 # when the file handle transitions between reads and writes. See
2366 # when the file handle transitions between reads and writes. See
2363 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2367 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2364 # platforms, Python or the platform itself can be buggy. Some versions
2368 # platforms, Python or the platform itself can be buggy. Some versions
2365 # of Solaris have been observed to not append at the end of the file
2369 # of Solaris have been observed to not append at the end of the file
2366 # if the file was seeked to before the end. See issue4943 for more.
2370 # if the file was seeked to before the end. See issue4943 for more.
2367 #
2371 #
2368 # We work around this issue by inserting a seek() before writing.
2372 # We work around this issue by inserting a seek() before writing.
2369 # Note: This is likely not necessary on Python 3. However, because
2373 # Note: This is likely not necessary on Python 3. However, because
2370 # the file handle is reused for reads and may be seeked there, we need
2374 # the file handle is reused for reads and may be seeked there, we need
2371 # to be careful before changing this.
2375 # to be careful before changing this.
2372 ifh.seek(0, os.SEEK_END)
2376 ifh.seek(0, os.SEEK_END)
2373 if dfh:
2377 if dfh:
2374 dfh.seek(0, os.SEEK_END)
2378 dfh.seek(0, os.SEEK_END)
2375
2379
2376 curr = len(self) - 1
2380 curr = len(self) - 1
2377 if not self._inline:
2381 if not self._inline:
2378 transaction.add(self._datafile, offset)
2382 transaction.add(self._datafile, offset)
2379 transaction.add(self._indexfile, curr * len(entry))
2383 transaction.add(self._indexfile, curr * len(entry))
2380 if data[0]:
2384 if data[0]:
2381 dfh.write(data[0])
2385 dfh.write(data[0])
2382 dfh.write(data[1])
2386 dfh.write(data[1])
2383 if sidedata:
2387 if sidedata:
2384 dfh.write(sidedata)
2388 dfh.write(sidedata)
2385 ifh.write(entry)
2389 ifh.write(entry)
2386 else:
2390 else:
2387 offset += curr * self.index.entry_size
2391 offset += curr * self.index.entry_size
2388 transaction.add(self._indexfile, offset)
2392 transaction.add(self._indexfile, offset)
2389 ifh.write(entry)
2393 ifh.write(entry)
2390 ifh.write(data[0])
2394 ifh.write(data[0])
2391 ifh.write(data[1])
2395 ifh.write(data[1])
2392 if sidedata:
2396 if sidedata:
2393 ifh.write(sidedata)
2397 ifh.write(sidedata)
2394 self._enforceinlinesize(transaction, ifh)
2398 self._enforceinlinesize(transaction, ifh)
2395 nodemaputil.setup_persistent_nodemap(transaction, self)
2399 nodemaputil.setup_persistent_nodemap(transaction, self)
2396
2400
2397 def addgroup(
2401 def addgroup(
2398 self,
2402 self,
2399 deltas,
2403 deltas,
2400 linkmapper,
2404 linkmapper,
2401 transaction,
2405 transaction,
2402 alwayscache=False,
2406 alwayscache=False,
2403 addrevisioncb=None,
2407 addrevisioncb=None,
2404 duplicaterevisioncb=None,
2408 duplicaterevisioncb=None,
2405 ):
2409 ):
2406 """
2410 """
2407 add a delta group
2411 add a delta group
2408
2412
2409 given a set of deltas, add them to the revision log. the
2413 given a set of deltas, add them to the revision log. the
2410 first delta is against its parent, which should be in our
2414 first delta is against its parent, which should be in our
2411 log, the rest are against the previous delta.
2415 log, the rest are against the previous delta.
2412
2416
2413 If ``addrevisioncb`` is defined, it will be called with arguments of
2417 If ``addrevisioncb`` is defined, it will be called with arguments of
2414 this revlog and the node that was added.
2418 this revlog and the node that was added.
2415 """
2419 """
2416
2420
2417 if self._writinghandles:
2421 if self._writinghandles:
2418 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2422 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2419
2423
2420 r = len(self)
2424 r = len(self)
2421 end = 0
2425 end = 0
2422 if r:
2426 if r:
2423 end = self.end(r - 1)
2427 end = self.end(r - 1)
2424 ifh = self._indexfp(b"a+")
2428 ifh = self._indexfp(b"a+")
2425 isize = r * self.index.entry_size
2429 isize = r * self.index.entry_size
2426 if self._inline:
2430 if self._inline:
2427 transaction.add(self._indexfile, end + isize)
2431 transaction.add(self._indexfile, end + isize)
2428 dfh = None
2432 dfh = None
2429 else:
2433 else:
2430 transaction.add(self._indexfile, isize)
2434 transaction.add(self._indexfile, isize)
2431 transaction.add(self._datafile, end)
2435 transaction.add(self._datafile, end)
2432 dfh = self._datafp(b"a+")
2436 dfh = self._datafp(b"a+")
2433
2437
2434 self._writinghandles = (ifh, dfh)
2438 self._writinghandles = (ifh, dfh)
2435 empty = True
2439 empty = True
2436
2440
2437 try:
2441 try:
2438 deltacomputer = deltautil.deltacomputer(self)
2442 deltacomputer = deltautil.deltacomputer(self)
2439 # loop through our set of deltas
2443 # loop through our set of deltas
2440 for data in deltas:
2444 for data in deltas:
2441 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2445 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2442 link = linkmapper(linknode)
2446 link = linkmapper(linknode)
2443 flags = flags or REVIDX_DEFAULT_FLAGS
2447 flags = flags or REVIDX_DEFAULT_FLAGS
2444
2448
2445 rev = self.index.get_rev(node)
2449 rev = self.index.get_rev(node)
2446 if rev is not None:
2450 if rev is not None:
2447 # this can happen if two branches make the same change
2451 # this can happen if two branches make the same change
2448 self._nodeduplicatecallback(transaction, rev)
2452 self._nodeduplicatecallback(transaction, rev)
2449 if duplicaterevisioncb:
2453 if duplicaterevisioncb:
2450 duplicaterevisioncb(self, rev)
2454 duplicaterevisioncb(self, rev)
2451 empty = False
2455 empty = False
2452 continue
2456 continue
2453
2457
2454 for p in (p1, p2):
2458 for p in (p1, p2):
2455 if not self.index.has_node(p):
2459 if not self.index.has_node(p):
2456 raise error.LookupError(
2460 raise error.LookupError(
2457 p, self.radix, _(b'unknown parent')
2461 p, self.radix, _(b'unknown parent')
2458 )
2462 )
2459
2463
2460 if not self.index.has_node(deltabase):
2464 if not self.index.has_node(deltabase):
2461 raise error.LookupError(
2465 raise error.LookupError(
2462 deltabase, self.display_id, _(b'unknown delta base')
2466 deltabase, self.display_id, _(b'unknown delta base')
2463 )
2467 )
2464
2468
2465 baserev = self.rev(deltabase)
2469 baserev = self.rev(deltabase)
2466
2470
2467 if baserev != nullrev and self.iscensored(baserev):
2471 if baserev != nullrev and self.iscensored(baserev):
2468 # if base is censored, delta must be full replacement in a
2472 # if base is censored, delta must be full replacement in a
2469 # single patch operation
2473 # single patch operation
2470 hlen = struct.calcsize(b">lll")
2474 hlen = struct.calcsize(b">lll")
2471 oldlen = self.rawsize(baserev)
2475 oldlen = self.rawsize(baserev)
2472 newlen = len(delta) - hlen
2476 newlen = len(delta) - hlen
2473 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2477 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2474 raise error.CensoredBaseError(
2478 raise error.CensoredBaseError(
2475 self.display_id, self.node(baserev)
2479 self.display_id, self.node(baserev)
2476 )
2480 )
2477
2481
2478 if not flags and self._peek_iscensored(baserev, delta):
2482 if not flags and self._peek_iscensored(baserev, delta):
2479 flags |= REVIDX_ISCENSORED
2483 flags |= REVIDX_ISCENSORED
2480
2484
2481 # We assume consumers of addrevisioncb will want to retrieve
2485 # We assume consumers of addrevisioncb will want to retrieve
2482 # the added revision, which will require a call to
2486 # the added revision, which will require a call to
2483 # revision(). revision() will fast path if there is a cache
2487 # revision(). revision() will fast path if there is a cache
2484 # hit. So, we tell _addrevision() to always cache in this case.
2488 # hit. So, we tell _addrevision() to always cache in this case.
2485 # We're only using addgroup() in the context of changegroup
2489 # We're only using addgroup() in the context of changegroup
2486 # generation so the revision data can always be handled as raw
2490 # generation so the revision data can always be handled as raw
2487 # by the flagprocessor.
2491 # by the flagprocessor.
2488 rev = self._addrevision(
2492 rev = self._addrevision(
2489 node,
2493 node,
2490 None,
2494 None,
2491 transaction,
2495 transaction,
2492 link,
2496 link,
2493 p1,
2497 p1,
2494 p2,
2498 p2,
2495 flags,
2499 flags,
2496 (baserev, delta),
2500 (baserev, delta),
2497 ifh,
2501 ifh,
2498 dfh,
2502 dfh,
2499 alwayscache=alwayscache,
2503 alwayscache=alwayscache,
2500 deltacomputer=deltacomputer,
2504 deltacomputer=deltacomputer,
2501 sidedata=sidedata,
2505 sidedata=sidedata,
2502 )
2506 )
2503
2507
2504 if addrevisioncb:
2508 if addrevisioncb:
2505 addrevisioncb(self, rev)
2509 addrevisioncb(self, rev)
2506 empty = False
2510 empty = False
2507
2511
2508 if not dfh and not self._inline:
2512 if not dfh and not self._inline:
2509 # addrevision switched from inline to conventional
2513 # addrevision switched from inline to conventional
2510 # reopen the index
2514 # reopen the index
2511 ifh.close()
2515 ifh.close()
2512 dfh = self._datafp(b"a+")
2516 dfh = self._datafp(b"a+")
2513 ifh = self._indexfp(b"a+")
2517 ifh = self._indexfp(b"a+")
2514 self._writinghandles = (ifh, dfh)
2518 self._writinghandles = (ifh, dfh)
2515 finally:
2519 finally:
2516 self._writinghandles = None
2520 self._writinghandles = None
2517
2521
2518 if dfh:
2522 if dfh:
2519 dfh.close()
2523 dfh.close()
2520 ifh.close()
2524 ifh.close()
2521 return not empty
2525 return not empty
2522
2526
2523 def iscensored(self, rev):
2527 def iscensored(self, rev):
2524 """Check if a file revision is censored."""
2528 """Check if a file revision is censored."""
2525 if not self._censorable:
2529 if not self._censorable:
2526 return False
2530 return False
2527
2531
2528 return self.flags(rev) & REVIDX_ISCENSORED
2532 return self.flags(rev) & REVIDX_ISCENSORED
2529
2533
2530 def _peek_iscensored(self, baserev, delta):
2534 def _peek_iscensored(self, baserev, delta):
2531 """Quickly check if a delta produces a censored revision."""
2535 """Quickly check if a delta produces a censored revision."""
2532 if not self._censorable:
2536 if not self._censorable:
2533 return False
2537 return False
2534
2538
2535 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2539 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2536
2540
2537 def getstrippoint(self, minlink):
2541 def getstrippoint(self, minlink):
2538 """find the minimum rev that must be stripped to strip the linkrev
2542 """find the minimum rev that must be stripped to strip the linkrev
2539
2543
2540 Returns a tuple containing the minimum rev and a set of all revs that
2544 Returns a tuple containing the minimum rev and a set of all revs that
2541 have linkrevs that will be broken by this strip.
2545 have linkrevs that will be broken by this strip.
2542 """
2546 """
2543 return storageutil.resolvestripinfo(
2547 return storageutil.resolvestripinfo(
2544 minlink,
2548 minlink,
2545 len(self) - 1,
2549 len(self) - 1,
2546 self.headrevs(),
2550 self.headrevs(),
2547 self.linkrev,
2551 self.linkrev,
2548 self.parentrevs,
2552 self.parentrevs,
2549 )
2553 )
2550
2554
2551 def strip(self, minlink, transaction):
2555 def strip(self, minlink, transaction):
2552 """truncate the revlog on the first revision with a linkrev >= minlink
2556 """truncate the revlog on the first revision with a linkrev >= minlink
2553
2557
2554 This function is called when we're stripping revision minlink and
2558 This function is called when we're stripping revision minlink and
2555 its descendants from the repository.
2559 its descendants from the repository.
2556
2560
2557 We have to remove all revisions with linkrev >= minlink, because
2561 We have to remove all revisions with linkrev >= minlink, because
2558 the equivalent changelog revisions will be renumbered after the
2562 the equivalent changelog revisions will be renumbered after the
2559 strip.
2563 strip.
2560
2564
2561 So we truncate the revlog on the first of these revisions, and
2565 So we truncate the revlog on the first of these revisions, and
2562 trust that the caller has saved the revisions that shouldn't be
2566 trust that the caller has saved the revisions that shouldn't be
2563 removed and that it'll re-add them after this truncation.
2567 removed and that it'll re-add them after this truncation.
2564 """
2568 """
2565 if len(self) == 0:
2569 if len(self) == 0:
2566 return
2570 return
2567
2571
2568 rev, _ = self.getstrippoint(minlink)
2572 rev, _ = self.getstrippoint(minlink)
2569 if rev == len(self):
2573 if rev == len(self):
2570 return
2574 return
2571
2575
2572 # first truncate the files on disk
2576 # first truncate the files on disk
2573 end = self.start(rev)
2577 end = self.start(rev)
2574 if not self._inline:
2578 if not self._inline:
2575 transaction.add(self._datafile, end)
2579 transaction.add(self._datafile, end)
2576 end = rev * self.index.entry_size
2580 end = rev * self.index.entry_size
2577 else:
2581 else:
2578 end += rev * self.index.entry_size
2582 end += rev * self.index.entry_size
2579
2583
2580 transaction.add(self._indexfile, end)
2584 transaction.add(self._indexfile, end)
2581
2585
2582 # then reset internal state in memory to forget those revisions
2586 # then reset internal state in memory to forget those revisions
2583 self._revisioncache = None
2587 self._revisioncache = None
2584 self._chaininfocache = util.lrucachedict(500)
2588 self._chaininfocache = util.lrucachedict(500)
2585 self._chunkclear()
2589 self._chunkclear()
2586
2590
2587 del self.index[rev:-1]
2591 del self.index[rev:-1]
2588
2592
2589 def checksize(self):
2593 def checksize(self):
2590 """Check size of index and data files
2594 """Check size of index and data files
2591
2595
2592 return a (dd, di) tuple.
2596 return a (dd, di) tuple.
2593 - dd: extra bytes for the "data" file
2597 - dd: extra bytes for the "data" file
2594 - di: extra bytes for the "index" file
2598 - di: extra bytes for the "index" file
2595
2599
2596 A healthy revlog will return (0, 0).
2600 A healthy revlog will return (0, 0).
2597 """
2601 """
2598 expected = 0
2602 expected = 0
2599 if len(self):
2603 if len(self):
2600 expected = max(0, self.end(len(self) - 1))
2604 expected = max(0, self.end(len(self) - 1))
2601
2605
2602 try:
2606 try:
2603 with self._datafp() as f:
2607 with self._datafp() as f:
2604 f.seek(0, io.SEEK_END)
2608 f.seek(0, io.SEEK_END)
2605 actual = f.tell()
2609 actual = f.tell()
2606 dd = actual - expected
2610 dd = actual - expected
2607 except IOError as inst:
2611 except IOError as inst:
2608 if inst.errno != errno.ENOENT:
2612 if inst.errno != errno.ENOENT:
2609 raise
2613 raise
2610 dd = 0
2614 dd = 0
2611
2615
2612 try:
2616 try:
2613 f = self.opener(self._indexfile)
2617 f = self.opener(self._indexfile)
2614 f.seek(0, io.SEEK_END)
2618 f.seek(0, io.SEEK_END)
2615 actual = f.tell()
2619 actual = f.tell()
2616 f.close()
2620 f.close()
2617 s = self.index.entry_size
2621 s = self.index.entry_size
2618 i = max(0, actual // s)
2622 i = max(0, actual // s)
2619 di = actual - (i * s)
2623 di = actual - (i * s)
2620 if self._inline:
2624 if self._inline:
2621 databytes = 0
2625 databytes = 0
2622 for r in self:
2626 for r in self:
2623 databytes += max(0, self.length(r))
2627 databytes += max(0, self.length(r))
2624 dd = 0
2628 dd = 0
2625 di = actual - len(self) * s - databytes
2629 di = actual - len(self) * s - databytes
2626 except IOError as inst:
2630 except IOError as inst:
2627 if inst.errno != errno.ENOENT:
2631 if inst.errno != errno.ENOENT:
2628 raise
2632 raise
2629 di = 0
2633 di = 0
2630
2634
2631 return (dd, di)
2635 return (dd, di)
2632
2636
2633 def files(self):
2637 def files(self):
2634 res = [self._indexfile]
2638 res = [self._indexfile]
2635 if not self._inline:
2639 if not self._inline:
2636 res.append(self._datafile)
2640 res.append(self._datafile)
2637 return res
2641 return res
2638
2642
2639 def emitrevisions(
2643 def emitrevisions(
2640 self,
2644 self,
2641 nodes,
2645 nodes,
2642 nodesorder=None,
2646 nodesorder=None,
2643 revisiondata=False,
2647 revisiondata=False,
2644 assumehaveparentrevisions=False,
2648 assumehaveparentrevisions=False,
2645 deltamode=repository.CG_DELTAMODE_STD,
2649 deltamode=repository.CG_DELTAMODE_STD,
2646 sidedata_helpers=None,
2650 sidedata_helpers=None,
2647 ):
2651 ):
2648 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2652 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2649 raise error.ProgrammingError(
2653 raise error.ProgrammingError(
2650 b'unhandled value for nodesorder: %s' % nodesorder
2654 b'unhandled value for nodesorder: %s' % nodesorder
2651 )
2655 )
2652
2656
2653 if nodesorder is None and not self._generaldelta:
2657 if nodesorder is None and not self._generaldelta:
2654 nodesorder = b'storage'
2658 nodesorder = b'storage'
2655
2659
2656 if (
2660 if (
2657 not self._storedeltachains
2661 not self._storedeltachains
2658 and deltamode != repository.CG_DELTAMODE_PREV
2662 and deltamode != repository.CG_DELTAMODE_PREV
2659 ):
2663 ):
2660 deltamode = repository.CG_DELTAMODE_FULL
2664 deltamode = repository.CG_DELTAMODE_FULL
2661
2665
2662 return storageutil.emitrevisions(
2666 return storageutil.emitrevisions(
2663 self,
2667 self,
2664 nodes,
2668 nodes,
2665 nodesorder,
2669 nodesorder,
2666 revlogrevisiondelta,
2670 revlogrevisiondelta,
2667 deltaparentfn=self.deltaparent,
2671 deltaparentfn=self.deltaparent,
2668 candeltafn=self.candelta,
2672 candeltafn=self.candelta,
2669 rawsizefn=self.rawsize,
2673 rawsizefn=self.rawsize,
2670 revdifffn=self.revdiff,
2674 revdifffn=self.revdiff,
2671 flagsfn=self.flags,
2675 flagsfn=self.flags,
2672 deltamode=deltamode,
2676 deltamode=deltamode,
2673 revisiondata=revisiondata,
2677 revisiondata=revisiondata,
2674 assumehaveparentrevisions=assumehaveparentrevisions,
2678 assumehaveparentrevisions=assumehaveparentrevisions,
2675 sidedata_helpers=sidedata_helpers,
2679 sidedata_helpers=sidedata_helpers,
2676 )
2680 )
2677
2681
2678 DELTAREUSEALWAYS = b'always'
2682 DELTAREUSEALWAYS = b'always'
2679 DELTAREUSESAMEREVS = b'samerevs'
2683 DELTAREUSESAMEREVS = b'samerevs'
2680 DELTAREUSENEVER = b'never'
2684 DELTAREUSENEVER = b'never'
2681
2685
2682 DELTAREUSEFULLADD = b'fulladd'
2686 DELTAREUSEFULLADD = b'fulladd'
2683
2687
2684 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2688 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2685
2689
2686 def clone(
2690 def clone(
2687 self,
2691 self,
2688 tr,
2692 tr,
2689 destrevlog,
2693 destrevlog,
2690 addrevisioncb=None,
2694 addrevisioncb=None,
2691 deltareuse=DELTAREUSESAMEREVS,
2695 deltareuse=DELTAREUSESAMEREVS,
2692 forcedeltabothparents=None,
2696 forcedeltabothparents=None,
2693 sidedata_helpers=None,
2697 sidedata_helpers=None,
2694 ):
2698 ):
2695 """Copy this revlog to another, possibly with format changes.
2699 """Copy this revlog to another, possibly with format changes.
2696
2700
2697 The destination revlog will contain the same revisions and nodes.
2701 The destination revlog will contain the same revisions and nodes.
2698 However, it may not be bit-for-bit identical due to e.g. delta encoding
2702 However, it may not be bit-for-bit identical due to e.g. delta encoding
2699 differences.
2703 differences.
2700
2704
2701 The ``deltareuse`` argument control how deltas from the existing revlog
2705 The ``deltareuse`` argument control how deltas from the existing revlog
2702 are preserved in the destination revlog. The argument can have the
2706 are preserved in the destination revlog. The argument can have the
2703 following values:
2707 following values:
2704
2708
2705 DELTAREUSEALWAYS
2709 DELTAREUSEALWAYS
2706 Deltas will always be reused (if possible), even if the destination
2710 Deltas will always be reused (if possible), even if the destination
2707 revlog would not select the same revisions for the delta. This is the
2711 revlog would not select the same revisions for the delta. This is the
2708 fastest mode of operation.
2712 fastest mode of operation.
2709 DELTAREUSESAMEREVS
2713 DELTAREUSESAMEREVS
2710 Deltas will be reused if the destination revlog would pick the same
2714 Deltas will be reused if the destination revlog would pick the same
2711 revisions for the delta. This mode strikes a balance between speed
2715 revisions for the delta. This mode strikes a balance between speed
2712 and optimization.
2716 and optimization.
2713 DELTAREUSENEVER
2717 DELTAREUSENEVER
2714 Deltas will never be reused. This is the slowest mode of execution.
2718 Deltas will never be reused. This is the slowest mode of execution.
2715 This mode can be used to recompute deltas (e.g. if the diff/delta
2719 This mode can be used to recompute deltas (e.g. if the diff/delta
2716 algorithm changes).
2720 algorithm changes).
2717 DELTAREUSEFULLADD
2721 DELTAREUSEFULLADD
2718 Revision will be re-added as if their were new content. This is
2722 Revision will be re-added as if their were new content. This is
2719 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2723 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2720 eg: large file detection and handling.
2724 eg: large file detection and handling.
2721
2725
2722 Delta computation can be slow, so the choice of delta reuse policy can
2726 Delta computation can be slow, so the choice of delta reuse policy can
2723 significantly affect run time.
2727 significantly affect run time.
2724
2728
2725 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2729 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2726 two extremes. Deltas will be reused if they are appropriate. But if the
2730 two extremes. Deltas will be reused if they are appropriate. But if the
2727 delta could choose a better revision, it will do so. This means if you
2731 delta could choose a better revision, it will do so. This means if you
2728 are converting a non-generaldelta revlog to a generaldelta revlog,
2732 are converting a non-generaldelta revlog to a generaldelta revlog,
2729 deltas will be recomputed if the delta's parent isn't a parent of the
2733 deltas will be recomputed if the delta's parent isn't a parent of the
2730 revision.
2734 revision.
2731
2735
2732 In addition to the delta policy, the ``forcedeltabothparents``
2736 In addition to the delta policy, the ``forcedeltabothparents``
2733 argument controls whether to force compute deltas against both parents
2737 argument controls whether to force compute deltas against both parents
2734 for merges. By default, the current default is used.
2738 for merges. By default, the current default is used.
2735
2739
2736 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2740 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2737 `sidedata_helpers`.
2741 `sidedata_helpers`.
2738 """
2742 """
2739 if deltareuse not in self.DELTAREUSEALL:
2743 if deltareuse not in self.DELTAREUSEALL:
2740 raise ValueError(
2744 raise ValueError(
2741 _(b'value for deltareuse invalid: %s') % deltareuse
2745 _(b'value for deltareuse invalid: %s') % deltareuse
2742 )
2746 )
2743
2747
2744 if len(destrevlog):
2748 if len(destrevlog):
2745 raise ValueError(_(b'destination revlog is not empty'))
2749 raise ValueError(_(b'destination revlog is not empty'))
2746
2750
2747 if getattr(self, 'filteredrevs', None):
2751 if getattr(self, 'filteredrevs', None):
2748 raise ValueError(_(b'source revlog has filtered revisions'))
2752 raise ValueError(_(b'source revlog has filtered revisions'))
2749 if getattr(destrevlog, 'filteredrevs', None):
2753 if getattr(destrevlog, 'filteredrevs', None):
2750 raise ValueError(_(b'destination revlog has filtered revisions'))
2754 raise ValueError(_(b'destination revlog has filtered revisions'))
2751
2755
2752 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2756 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2753 # if possible.
2757 # if possible.
2754 oldlazydelta = destrevlog._lazydelta
2758 oldlazydelta = destrevlog._lazydelta
2755 oldlazydeltabase = destrevlog._lazydeltabase
2759 oldlazydeltabase = destrevlog._lazydeltabase
2756 oldamd = destrevlog._deltabothparents
2760 oldamd = destrevlog._deltabothparents
2757
2761
2758 try:
2762 try:
2759 if deltareuse == self.DELTAREUSEALWAYS:
2763 if deltareuse == self.DELTAREUSEALWAYS:
2760 destrevlog._lazydeltabase = True
2764 destrevlog._lazydeltabase = True
2761 destrevlog._lazydelta = True
2765 destrevlog._lazydelta = True
2762 elif deltareuse == self.DELTAREUSESAMEREVS:
2766 elif deltareuse == self.DELTAREUSESAMEREVS:
2763 destrevlog._lazydeltabase = False
2767 destrevlog._lazydeltabase = False
2764 destrevlog._lazydelta = True
2768 destrevlog._lazydelta = True
2765 elif deltareuse == self.DELTAREUSENEVER:
2769 elif deltareuse == self.DELTAREUSENEVER:
2766 destrevlog._lazydeltabase = False
2770 destrevlog._lazydeltabase = False
2767 destrevlog._lazydelta = False
2771 destrevlog._lazydelta = False
2768
2772
2769 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2773 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2770
2774
2771 self._clone(
2775 self._clone(
2772 tr,
2776 tr,
2773 destrevlog,
2777 destrevlog,
2774 addrevisioncb,
2778 addrevisioncb,
2775 deltareuse,
2779 deltareuse,
2776 forcedeltabothparents,
2780 forcedeltabothparents,
2777 sidedata_helpers,
2781 sidedata_helpers,
2778 )
2782 )
2779
2783
2780 finally:
2784 finally:
2781 destrevlog._lazydelta = oldlazydelta
2785 destrevlog._lazydelta = oldlazydelta
2782 destrevlog._lazydeltabase = oldlazydeltabase
2786 destrevlog._lazydeltabase = oldlazydeltabase
2783 destrevlog._deltabothparents = oldamd
2787 destrevlog._deltabothparents = oldamd
2784
2788
2785 def _clone(
2789 def _clone(
2786 self,
2790 self,
2787 tr,
2791 tr,
2788 destrevlog,
2792 destrevlog,
2789 addrevisioncb,
2793 addrevisioncb,
2790 deltareuse,
2794 deltareuse,
2791 forcedeltabothparents,
2795 forcedeltabothparents,
2792 sidedata_helpers,
2796 sidedata_helpers,
2793 ):
2797 ):
2794 """perform the core duty of `revlog.clone` after parameter processing"""
2798 """perform the core duty of `revlog.clone` after parameter processing"""
2795 deltacomputer = deltautil.deltacomputer(destrevlog)
2799 deltacomputer = deltautil.deltacomputer(destrevlog)
2796 index = self.index
2800 index = self.index
2797 for rev in self:
2801 for rev in self:
2798 entry = index[rev]
2802 entry = index[rev]
2799
2803
2800 # Some classes override linkrev to take filtered revs into
2804 # Some classes override linkrev to take filtered revs into
2801 # account. Use raw entry from index.
2805 # account. Use raw entry from index.
2802 flags = entry[0] & 0xFFFF
2806 flags = entry[0] & 0xFFFF
2803 linkrev = entry[4]
2807 linkrev = entry[4]
2804 p1 = index[entry[5]][7]
2808 p1 = index[entry[5]][7]
2805 p2 = index[entry[6]][7]
2809 p2 = index[entry[6]][7]
2806 node = entry[7]
2810 node = entry[7]
2807
2811
2808 # (Possibly) reuse the delta from the revlog if allowed and
2812 # (Possibly) reuse the delta from the revlog if allowed and
2809 # the revlog chunk is a delta.
2813 # the revlog chunk is a delta.
2810 cachedelta = None
2814 cachedelta = None
2811 rawtext = None
2815 rawtext = None
2812 if deltareuse == self.DELTAREUSEFULLADD:
2816 if deltareuse == self.DELTAREUSEFULLADD:
2813 text, sidedata = self._revisiondata(rev)
2817 text, sidedata = self._revisiondata(rev)
2814
2818
2815 if sidedata_helpers is not None:
2819 if sidedata_helpers is not None:
2816 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2820 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2817 self, sidedata_helpers, sidedata, rev
2821 self, sidedata_helpers, sidedata, rev
2818 )
2822 )
2819 flags = flags | new_flags[0] & ~new_flags[1]
2823 flags = flags | new_flags[0] & ~new_flags[1]
2820
2824
2821 destrevlog.addrevision(
2825 destrevlog.addrevision(
2822 text,
2826 text,
2823 tr,
2827 tr,
2824 linkrev,
2828 linkrev,
2825 p1,
2829 p1,
2826 p2,
2830 p2,
2827 cachedelta=cachedelta,
2831 cachedelta=cachedelta,
2828 node=node,
2832 node=node,
2829 flags=flags,
2833 flags=flags,
2830 deltacomputer=deltacomputer,
2834 deltacomputer=deltacomputer,
2831 sidedata=sidedata,
2835 sidedata=sidedata,
2832 )
2836 )
2833 else:
2837 else:
2834 if destrevlog._lazydelta:
2838 if destrevlog._lazydelta:
2835 dp = self.deltaparent(rev)
2839 dp = self.deltaparent(rev)
2836 if dp != nullrev:
2840 if dp != nullrev:
2837 cachedelta = (dp, bytes(self._chunk(rev)))
2841 cachedelta = (dp, bytes(self._chunk(rev)))
2838
2842
2839 sidedata = None
2843 sidedata = None
2840 if not cachedelta:
2844 if not cachedelta:
2841 rawtext, sidedata = self._revisiondata(rev)
2845 rawtext, sidedata = self._revisiondata(rev)
2842 if sidedata is None:
2846 if sidedata is None:
2843 sidedata = self.sidedata(rev)
2847 sidedata = self.sidedata(rev)
2844
2848
2845 if sidedata_helpers is not None:
2849 if sidedata_helpers is not None:
2846 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2850 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2847 self, sidedata_helpers, sidedata, rev
2851 self, sidedata_helpers, sidedata, rev
2848 )
2852 )
2849 flags = flags | new_flags[0] & ~new_flags[1]
2853 flags = flags | new_flags[0] & ~new_flags[1]
2850
2854
2851 ifh = destrevlog.opener(
2855 ifh = destrevlog.opener(
2852 destrevlog._indexfile, b'a+', checkambig=False
2856 destrevlog._indexfile, b'a+', checkambig=False
2853 )
2857 )
2854 dfh = None
2858 dfh = None
2855 if not destrevlog._inline:
2859 if not destrevlog._inline:
2856 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2860 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2857 try:
2861 try:
2858 destrevlog._addrevision(
2862 destrevlog._addrevision(
2859 node,
2863 node,
2860 rawtext,
2864 rawtext,
2861 tr,
2865 tr,
2862 linkrev,
2866 linkrev,
2863 p1,
2867 p1,
2864 p2,
2868 p2,
2865 flags,
2869 flags,
2866 cachedelta,
2870 cachedelta,
2867 ifh,
2871 ifh,
2868 dfh,
2872 dfh,
2869 deltacomputer=deltacomputer,
2873 deltacomputer=deltacomputer,
2870 sidedata=sidedata,
2874 sidedata=sidedata,
2871 )
2875 )
2872 finally:
2876 finally:
2873 if dfh:
2877 if dfh:
2874 dfh.close()
2878 dfh.close()
2875 ifh.close()
2879 ifh.close()
2876
2880
2877 if addrevisioncb:
2881 if addrevisioncb:
2878 addrevisioncb(self, rev, node)
2882 addrevisioncb(self, rev, node)
2879
2883
2880 def censorrevision(self, tr, censornode, tombstone=b''):
2884 def censorrevision(self, tr, censornode, tombstone=b''):
2881 if self._format_version == REVLOGV0:
2885 if self._format_version == REVLOGV0:
2882 raise error.RevlogError(
2886 raise error.RevlogError(
2883 _(b'cannot censor with version %d revlogs')
2887 _(b'cannot censor with version %d revlogs')
2884 % self._format_version
2888 % self._format_version
2885 )
2889 )
2886
2890
2887 censorrev = self.rev(censornode)
2891 censorrev = self.rev(censornode)
2888 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2892 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2889
2893
2890 if len(tombstone) > self.rawsize(censorrev):
2894 if len(tombstone) > self.rawsize(censorrev):
2891 raise error.Abort(
2895 raise error.Abort(
2892 _(b'censor tombstone must be no longer than censored data')
2896 _(b'censor tombstone must be no longer than censored data')
2893 )
2897 )
2894
2898
2895 # Rewriting the revlog in place is hard. Our strategy for censoring is
2899 # Rewriting the revlog in place is hard. Our strategy for censoring is
2896 # to create a new revlog, copy all revisions to it, then replace the
2900 # to create a new revlog, copy all revisions to it, then replace the
2897 # revlogs on transaction close.
2901 # revlogs on transaction close.
2898 #
2902 #
2899 # This is a bit dangerous. We could easily have a mismatch of state.
2903 # This is a bit dangerous. We could easily have a mismatch of state.
2900 newrl = revlog(
2904 newrl = revlog(
2901 self.opener,
2905 self.opener,
2902 target=self.target,
2906 target=self.target,
2903 radix=self.radix,
2907 radix=self.radix,
2904 postfix=b'tmpcensored',
2908 postfix=b'tmpcensored',
2905 censorable=True,
2909 censorable=True,
2906 )
2910 )
2907 newrl._format_version = self._format_version
2911 newrl._format_version = self._format_version
2908 newrl._format_flags = self._format_flags
2912 newrl._format_flags = self._format_flags
2909 newrl._generaldelta = self._generaldelta
2913 newrl._generaldelta = self._generaldelta
2910 newrl._parse_index = self._parse_index
2914 newrl._parse_index = self._parse_index
2911
2915
2912 for rev in self.revs():
2916 for rev in self.revs():
2913 node = self.node(rev)
2917 node = self.node(rev)
2914 p1, p2 = self.parents(node)
2918 p1, p2 = self.parents(node)
2915
2919
2916 if rev == censorrev:
2920 if rev == censorrev:
2917 newrl.addrawrevision(
2921 newrl.addrawrevision(
2918 tombstone,
2922 tombstone,
2919 tr,
2923 tr,
2920 self.linkrev(censorrev),
2924 self.linkrev(censorrev),
2921 p1,
2925 p1,
2922 p2,
2926 p2,
2923 censornode,
2927 censornode,
2924 REVIDX_ISCENSORED,
2928 REVIDX_ISCENSORED,
2925 )
2929 )
2926
2930
2927 if newrl.deltaparent(rev) != nullrev:
2931 if newrl.deltaparent(rev) != nullrev:
2928 raise error.Abort(
2932 raise error.Abort(
2929 _(
2933 _(
2930 b'censored revision stored as delta; '
2934 b'censored revision stored as delta; '
2931 b'cannot censor'
2935 b'cannot censor'
2932 ),
2936 ),
2933 hint=_(
2937 hint=_(
2934 b'censoring of revlogs is not '
2938 b'censoring of revlogs is not '
2935 b'fully implemented; please report '
2939 b'fully implemented; please report '
2936 b'this bug'
2940 b'this bug'
2937 ),
2941 ),
2938 )
2942 )
2939 continue
2943 continue
2940
2944
2941 if self.iscensored(rev):
2945 if self.iscensored(rev):
2942 if self.deltaparent(rev) != nullrev:
2946 if self.deltaparent(rev) != nullrev:
2943 raise error.Abort(
2947 raise error.Abort(
2944 _(
2948 _(
2945 b'cannot censor due to censored '
2949 b'cannot censor due to censored '
2946 b'revision having delta stored'
2950 b'revision having delta stored'
2947 )
2951 )
2948 )
2952 )
2949 rawtext = self._chunk(rev)
2953 rawtext = self._chunk(rev)
2950 else:
2954 else:
2951 rawtext = self.rawdata(rev)
2955 rawtext = self.rawdata(rev)
2952
2956
2953 newrl.addrawrevision(
2957 newrl.addrawrevision(
2954 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2958 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2955 )
2959 )
2956
2960
2957 tr.addbackup(self._indexfile, location=b'store')
2961 tr.addbackup(self._indexfile, location=b'store')
2958 if not self._inline:
2962 if not self._inline:
2959 tr.addbackup(self._datafile, location=b'store')
2963 tr.addbackup(self._datafile, location=b'store')
2960
2964
2961 self.opener.rename(newrl._indexfile, self._indexfile)
2965 self.opener.rename(newrl._indexfile, self._indexfile)
2962 if not self._inline:
2966 if not self._inline:
2963 self.opener.rename(newrl._datafile, self._datafile)
2967 self.opener.rename(newrl._datafile, self._datafile)
2964
2968
2965 self.clearcaches()
2969 self.clearcaches()
2966 self._loadindex()
2970 self._loadindex()
2967
2971
2968 def verifyintegrity(self, state):
2972 def verifyintegrity(self, state):
2969 """Verifies the integrity of the revlog.
2973 """Verifies the integrity of the revlog.
2970
2974
2971 Yields ``revlogproblem`` instances describing problems that are
2975 Yields ``revlogproblem`` instances describing problems that are
2972 found.
2976 found.
2973 """
2977 """
2974 dd, di = self.checksize()
2978 dd, di = self.checksize()
2975 if dd:
2979 if dd:
2976 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2980 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2977 if di:
2981 if di:
2978 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2982 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2979
2983
2980 version = self._format_version
2984 version = self._format_version
2981
2985
2982 # The verifier tells us what version revlog we should be.
2986 # The verifier tells us what version revlog we should be.
2983 if version != state[b'expectedversion']:
2987 if version != state[b'expectedversion']:
2984 yield revlogproblem(
2988 yield revlogproblem(
2985 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2989 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2986 % (self.display_id, version, state[b'expectedversion'])
2990 % (self.display_id, version, state[b'expectedversion'])
2987 )
2991 )
2988
2992
2989 state[b'skipread'] = set()
2993 state[b'skipread'] = set()
2990 state[b'safe_renamed'] = set()
2994 state[b'safe_renamed'] = set()
2991
2995
2992 for rev in self:
2996 for rev in self:
2993 node = self.node(rev)
2997 node = self.node(rev)
2994
2998
2995 # Verify contents. 4 cases to care about:
2999 # Verify contents. 4 cases to care about:
2996 #
3000 #
2997 # common: the most common case
3001 # common: the most common case
2998 # rename: with a rename
3002 # rename: with a rename
2999 # meta: file content starts with b'\1\n', the metadata
3003 # meta: file content starts with b'\1\n', the metadata
3000 # header defined in filelog.py, but without a rename
3004 # header defined in filelog.py, but without a rename
3001 # ext: content stored externally
3005 # ext: content stored externally
3002 #
3006 #
3003 # More formally, their differences are shown below:
3007 # More formally, their differences are shown below:
3004 #
3008 #
3005 # | common | rename | meta | ext
3009 # | common | rename | meta | ext
3006 # -------------------------------------------------------
3010 # -------------------------------------------------------
3007 # flags() | 0 | 0 | 0 | not 0
3011 # flags() | 0 | 0 | 0 | not 0
3008 # renamed() | False | True | False | ?
3012 # renamed() | False | True | False | ?
3009 # rawtext[0:2]=='\1\n'| False | True | True | ?
3013 # rawtext[0:2]=='\1\n'| False | True | True | ?
3010 #
3014 #
3011 # "rawtext" means the raw text stored in revlog data, which
3015 # "rawtext" means the raw text stored in revlog data, which
3012 # could be retrieved by "rawdata(rev)". "text"
3016 # could be retrieved by "rawdata(rev)". "text"
3013 # mentioned below is "revision(rev)".
3017 # mentioned below is "revision(rev)".
3014 #
3018 #
3015 # There are 3 different lengths stored physically:
3019 # There are 3 different lengths stored physically:
3016 # 1. L1: rawsize, stored in revlog index
3020 # 1. L1: rawsize, stored in revlog index
3017 # 2. L2: len(rawtext), stored in revlog data
3021 # 2. L2: len(rawtext), stored in revlog data
3018 # 3. L3: len(text), stored in revlog data if flags==0, or
3022 # 3. L3: len(text), stored in revlog data if flags==0, or
3019 # possibly somewhere else if flags!=0
3023 # possibly somewhere else if flags!=0
3020 #
3024 #
3021 # L1 should be equal to L2. L3 could be different from them.
3025 # L1 should be equal to L2. L3 could be different from them.
3022 # "text" may or may not affect commit hash depending on flag
3026 # "text" may or may not affect commit hash depending on flag
3023 # processors (see flagutil.addflagprocessor).
3027 # processors (see flagutil.addflagprocessor).
3024 #
3028 #
3025 # | common | rename | meta | ext
3029 # | common | rename | meta | ext
3026 # -------------------------------------------------
3030 # -------------------------------------------------
3027 # rawsize() | L1 | L1 | L1 | L1
3031 # rawsize() | L1 | L1 | L1 | L1
3028 # size() | L1 | L2-LM | L1(*) | L1 (?)
3032 # size() | L1 | L2-LM | L1(*) | L1 (?)
3029 # len(rawtext) | L2 | L2 | L2 | L2
3033 # len(rawtext) | L2 | L2 | L2 | L2
3030 # len(text) | L2 | L2 | L2 | L3
3034 # len(text) | L2 | L2 | L2 | L3
3031 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3035 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3032 #
3036 #
3033 # LM: length of metadata, depending on rawtext
3037 # LM: length of metadata, depending on rawtext
3034 # (*): not ideal, see comment in filelog.size
3038 # (*): not ideal, see comment in filelog.size
3035 # (?): could be "- len(meta)" if the resolved content has
3039 # (?): could be "- len(meta)" if the resolved content has
3036 # rename metadata
3040 # rename metadata
3037 #
3041 #
3038 # Checks needed to be done:
3042 # Checks needed to be done:
3039 # 1. length check: L1 == L2, in all cases.
3043 # 1. length check: L1 == L2, in all cases.
3040 # 2. hash check: depending on flag processor, we may need to
3044 # 2. hash check: depending on flag processor, we may need to
3041 # use either "text" (external), or "rawtext" (in revlog).
3045 # use either "text" (external), or "rawtext" (in revlog).
3042
3046
3043 try:
3047 try:
3044 skipflags = state.get(b'skipflags', 0)
3048 skipflags = state.get(b'skipflags', 0)
3045 if skipflags:
3049 if skipflags:
3046 skipflags &= self.flags(rev)
3050 skipflags &= self.flags(rev)
3047
3051
3048 _verify_revision(self, skipflags, state, node)
3052 _verify_revision(self, skipflags, state, node)
3049
3053
3050 l1 = self.rawsize(rev)
3054 l1 = self.rawsize(rev)
3051 l2 = len(self.rawdata(node))
3055 l2 = len(self.rawdata(node))
3052
3056
3053 if l1 != l2:
3057 if l1 != l2:
3054 yield revlogproblem(
3058 yield revlogproblem(
3055 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3059 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3056 node=node,
3060 node=node,
3057 )
3061 )
3058
3062
3059 except error.CensoredNodeError:
3063 except error.CensoredNodeError:
3060 if state[b'erroroncensored']:
3064 if state[b'erroroncensored']:
3061 yield revlogproblem(
3065 yield revlogproblem(
3062 error=_(b'censored file data'), node=node
3066 error=_(b'censored file data'), node=node
3063 )
3067 )
3064 state[b'skipread'].add(node)
3068 state[b'skipread'].add(node)
3065 except Exception as e:
3069 except Exception as e:
3066 yield revlogproblem(
3070 yield revlogproblem(
3067 error=_(b'unpacking %s: %s')
3071 error=_(b'unpacking %s: %s')
3068 % (short(node), stringutil.forcebytestr(e)),
3072 % (short(node), stringutil.forcebytestr(e)),
3069 node=node,
3073 node=node,
3070 )
3074 )
3071 state[b'skipread'].add(node)
3075 state[b'skipread'].add(node)
3072
3076
3073 def storageinfo(
3077 def storageinfo(
3074 self,
3078 self,
3075 exclusivefiles=False,
3079 exclusivefiles=False,
3076 sharedfiles=False,
3080 sharedfiles=False,
3077 revisionscount=False,
3081 revisionscount=False,
3078 trackedsize=False,
3082 trackedsize=False,
3079 storedsize=False,
3083 storedsize=False,
3080 ):
3084 ):
3081 d = {}
3085 d = {}
3082
3086
3083 if exclusivefiles:
3087 if exclusivefiles:
3084 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3088 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3085 if not self._inline:
3089 if not self._inline:
3086 d[b'exclusivefiles'].append((self.opener, self._datafile))
3090 d[b'exclusivefiles'].append((self.opener, self._datafile))
3087
3091
3088 if sharedfiles:
3092 if sharedfiles:
3089 d[b'sharedfiles'] = []
3093 d[b'sharedfiles'] = []
3090
3094
3091 if revisionscount:
3095 if revisionscount:
3092 d[b'revisionscount'] = len(self)
3096 d[b'revisionscount'] = len(self)
3093
3097
3094 if trackedsize:
3098 if trackedsize:
3095 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3099 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3096
3100
3097 if storedsize:
3101 if storedsize:
3098 d[b'storedsize'] = sum(
3102 d[b'storedsize'] = sum(
3099 self.opener.stat(path).st_size for path in self.files()
3103 self.opener.stat(path).st_size for path in self.files()
3100 )
3104 )
3101
3105
3102 return d
3106 return d
3103
3107
3104 def rewrite_sidedata(self, helpers, startrev, endrev):
3108 def rewrite_sidedata(self, helpers, startrev, endrev):
3105 if not self.hassidedata:
3109 if not self.hassidedata:
3106 return
3110 return
3107 # inline are not yet supported because they suffer from an issue when
3111 # inline are not yet supported because they suffer from an issue when
3108 # rewriting them (since it's not an append-only operation).
3112 # rewriting them (since it's not an append-only operation).
3109 # See issue6485.
3113 # See issue6485.
3110 assert not self._inline
3114 assert not self._inline
3111 if not helpers[1] and not helpers[2]:
3115 if not helpers[1] and not helpers[2]:
3112 # Nothing to generate or remove
3116 # Nothing to generate or remove
3113 return
3117 return
3114
3118
3115 # changelog implement some "delayed" writing mechanism that assume that
3119 # changelog implement some "delayed" writing mechanism that assume that
3116 # all index data is writen in append mode and is therefor incompatible
3120 # all index data is writen in append mode and is therefor incompatible
3117 # with the seeked write done in this method. The use of such "delayed"
3121 # with the seeked write done in this method. The use of such "delayed"
3118 # writing will soon be removed for revlog version that support side
3122 # writing will soon be removed for revlog version that support side
3119 # data, so for now, we only keep this simple assert to highlight the
3123 # data, so for now, we only keep this simple assert to highlight the
3120 # situation.
3124 # situation.
3121 delayed = getattr(self, '_delayed', False)
3125 delayed = getattr(self, '_delayed', False)
3122 diverted = getattr(self, '_divert', False)
3126 diverted = getattr(self, '_divert', False)
3123 if delayed and not diverted:
3127 if delayed and not diverted:
3124 msg = "cannot rewrite_sidedata of a delayed revlog"
3128 msg = "cannot rewrite_sidedata of a delayed revlog"
3125 raise error.ProgrammingError(msg)
3129 raise error.ProgrammingError(msg)
3126
3130
3127 new_entries = []
3131 new_entries = []
3128 # append the new sidedata
3132 # append the new sidedata
3129 with self._datafp(b'a+') as fp:
3133 with self._datafp(b'a+') as fp:
3130 # Maybe this bug still exists, see revlog._writeentry
3134 # Maybe this bug still exists, see revlog._writeentry
3131 fp.seek(0, os.SEEK_END)
3135 fp.seek(0, os.SEEK_END)
3132 current_offset = fp.tell()
3136 current_offset = fp.tell()
3133 for rev in range(startrev, endrev + 1):
3137 for rev in range(startrev, endrev + 1):
3134 entry = self.index[rev]
3138 entry = self.index[rev]
3135 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3139 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3136 store=self,
3140 store=self,
3137 sidedata_helpers=helpers,
3141 sidedata_helpers=helpers,
3138 sidedata={},
3142 sidedata={},
3139 rev=rev,
3143 rev=rev,
3140 )
3144 )
3141
3145
3142 serialized_sidedata = sidedatautil.serialize_sidedata(
3146 serialized_sidedata = sidedatautil.serialize_sidedata(
3143 new_sidedata
3147 new_sidedata
3144 )
3148 )
3145 if entry[8] != 0 or entry[9] != 0:
3149 if entry[8] != 0 or entry[9] != 0:
3146 # rewriting entries that already have sidedata is not
3150 # rewriting entries that already have sidedata is not
3147 # supported yet, because it introduces garbage data in the
3151 # supported yet, because it introduces garbage data in the
3148 # revlog.
3152 # revlog.
3149 msg = b"Rewriting existing sidedata is not supported yet"
3153 msg = b"Rewriting existing sidedata is not supported yet"
3150 raise error.Abort(msg)
3154 raise error.Abort(msg)
3151
3155
3152 # Apply (potential) flags to add and to remove after running
3156 # Apply (potential) flags to add and to remove after running
3153 # the sidedata helpers
3157 # the sidedata helpers
3154 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3158 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3155 entry = (new_offset_flags,) + entry[1:8]
3159 entry = (new_offset_flags,) + entry[1:8]
3156 entry += (current_offset, len(serialized_sidedata))
3160 entry += (current_offset, len(serialized_sidedata))
3157
3161
3158 fp.write(serialized_sidedata)
3162 fp.write(serialized_sidedata)
3159 new_entries.append(entry)
3163 new_entries.append(entry)
3160 current_offset += len(serialized_sidedata)
3164 current_offset += len(serialized_sidedata)
3161
3165
3162 # rewrite the new index entries
3166 # rewrite the new index entries
3163 with self._indexfp(b'r+') as fp:
3167 with self._indexfp(b'r+') as fp:
3164 fp.seek(startrev * self.index.entry_size)
3168 fp.seek(startrev * self.index.entry_size)
3165 for i, e in enumerate(new_entries):
3169 for i, e in enumerate(new_entries):
3166 rev = startrev + i
3170 rev = startrev + i
3167 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3171 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3168 packed = self.index.entry_binary(rev)
3172 packed = self.index.entry_binary(rev)
3169 if rev == 0:
3173 if rev == 0:
3170 header = self._format_flags | self._format_version
3174 header = self._format_flags | self._format_version
3171 header = self.index.pack_header(header)
3175 header = self.index.pack_header(header)
3172 packed = header + packed
3176 packed = header + packed
3173 fp.write(packed)
3177 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now