##// END OF EJS Templates
revlog: introduce a `display_id` property...
marmoute -
r47924:fbf38517 default
parent child Browse files
Show More
@@ -1,3173 +1,3180 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 if postfix is None:
318 if postfix is None:
319 indexfile = b'%s.i' % self.radix
319 indexfile = b'%s.i' % self.radix
320 datafile = b'%s.d' % self.radix
320 datafile = b'%s.d' % self.radix
321 elif postfix == b'a':
321 elif postfix == b'a':
322 indexfile = b'%s.i.a' % self.radix
322 indexfile = b'%s.i.a' % self.radix
323 datafile = b'%s.d' % self.radix
323 datafile = b'%s.d' % self.radix
324 else:
324 else:
325 indexfile = b'%s.i.%s' % (self.radix, postfix)
325 indexfile = b'%s.i.%s' % (self.radix, postfix)
326 datafile = b'%s.d.%s' % (self.radix, postfix)
326 datafile = b'%s.d.%s' % (self.radix, postfix)
327
327
328 self._indexfile = indexfile
328 self._indexfile = indexfile
329 self._datafile = datafile
329 self._datafile = datafile
330 self.nodemap_file = None
330 self.nodemap_file = None
331 self.postfix = postfix
331 self.postfix = postfix
332 self.opener = opener
332 self.opener = opener
333 if persistentnodemap:
333 if persistentnodemap:
334 self.nodemap_file = nodemaputil.get_nodemap_file(self)
334 self.nodemap_file = nodemaputil.get_nodemap_file(self)
335
335
336 assert target[0] in ALL_KINDS
336 assert target[0] in ALL_KINDS
337 assert len(target) == 2
337 assert len(target) == 2
338 self.target = target
338 self.target = target
339 # When True, indexfile is opened with checkambig=True at writing, to
339 # When True, indexfile is opened with checkambig=True at writing, to
340 # avoid file stat ambiguity.
340 # avoid file stat ambiguity.
341 self._checkambig = checkambig
341 self._checkambig = checkambig
342 self._mmaplargeindex = mmaplargeindex
342 self._mmaplargeindex = mmaplargeindex
343 self._censorable = censorable
343 self._censorable = censorable
344 # 3-tuple of (node, rev, text) for a raw revision.
344 # 3-tuple of (node, rev, text) for a raw revision.
345 self._revisioncache = None
345 self._revisioncache = None
346 # Maps rev to chain base rev.
346 # Maps rev to chain base rev.
347 self._chainbasecache = util.lrucachedict(100)
347 self._chainbasecache = util.lrucachedict(100)
348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
349 self._chunkcache = (0, b'')
349 self._chunkcache = (0, b'')
350 # How much data to read and cache into the raw revlog data cache.
350 # How much data to read and cache into the raw revlog data cache.
351 self._chunkcachesize = 65536
351 self._chunkcachesize = 65536
352 self._maxchainlen = None
352 self._maxchainlen = None
353 self._deltabothparents = True
353 self._deltabothparents = True
354 self.index = None
354 self.index = None
355 self._nodemap_docket = None
355 self._nodemap_docket = None
356 # Mapping of partial identifiers to full nodes.
356 # Mapping of partial identifiers to full nodes.
357 self._pcache = {}
357 self._pcache = {}
358 # Mapping of revision integer to full node.
358 # Mapping of revision integer to full node.
359 self._compengine = b'zlib'
359 self._compengine = b'zlib'
360 self._compengineopts = {}
360 self._compengineopts = {}
361 self._maxdeltachainspan = -1
361 self._maxdeltachainspan = -1
362 self._withsparseread = False
362 self._withsparseread = False
363 self._sparserevlog = False
363 self._sparserevlog = False
364 self._srdensitythreshold = 0.50
364 self._srdensitythreshold = 0.50
365 self._srmingapsize = 262144
365 self._srmingapsize = 262144
366
366
367 # Make copy of flag processors so each revlog instance can support
367 # Make copy of flag processors so each revlog instance can support
368 # custom flags.
368 # custom flags.
369 self._flagprocessors = dict(flagutil.flagprocessors)
369 self._flagprocessors = dict(flagutil.flagprocessors)
370
370
371 # 2-tuple of file handles being used for active writing.
371 # 2-tuple of file handles being used for active writing.
372 self._writinghandles = None
372 self._writinghandles = None
373
373
374 self._loadindex()
374 self._loadindex()
375
375
376 self._concurrencychecker = concurrencychecker
376 self._concurrencychecker = concurrencychecker
377
377
378 def _init_opts(self):
378 def _init_opts(self):
379 """process options (from above/config) to setup associated default revlog mode
379 """process options (from above/config) to setup associated default revlog mode
380
380
381 These values might be affected when actually reading on disk information.
381 These values might be affected when actually reading on disk information.
382
382
383 The relevant values are returned for use in _loadindex().
383 The relevant values are returned for use in _loadindex().
384
384
385 * newversionflags:
385 * newversionflags:
386 version header to use if we need to create a new revlog
386 version header to use if we need to create a new revlog
387
387
388 * mmapindexthreshold:
388 * mmapindexthreshold:
389 minimal index size for start to use mmap
389 minimal index size for start to use mmap
390
390
391 * force_nodemap:
391 * force_nodemap:
392 force the usage of a "development" version of the nodemap code
392 force the usage of a "development" version of the nodemap code
393 """
393 """
394 mmapindexthreshold = None
394 mmapindexthreshold = None
395 opts = self.opener.options
395 opts = self.opener.options
396
396
397 if b'revlogv2' in opts:
397 if b'revlogv2' in opts:
398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
399 elif b'revlogv1' in opts:
399 elif b'revlogv1' in opts:
400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
401 if b'generaldelta' in opts:
401 if b'generaldelta' in opts:
402 newversionflags |= FLAG_GENERALDELTA
402 newversionflags |= FLAG_GENERALDELTA
403 elif b'revlogv0' in self.opener.options:
403 elif b'revlogv0' in self.opener.options:
404 newversionflags = REVLOGV0
404 newversionflags = REVLOGV0
405 else:
405 else:
406 newversionflags = REVLOG_DEFAULT_VERSION
406 newversionflags = REVLOG_DEFAULT_VERSION
407
407
408 if b'chunkcachesize' in opts:
408 if b'chunkcachesize' in opts:
409 self._chunkcachesize = opts[b'chunkcachesize']
409 self._chunkcachesize = opts[b'chunkcachesize']
410 if b'maxchainlen' in opts:
410 if b'maxchainlen' in opts:
411 self._maxchainlen = opts[b'maxchainlen']
411 self._maxchainlen = opts[b'maxchainlen']
412 if b'deltabothparents' in opts:
412 if b'deltabothparents' in opts:
413 self._deltabothparents = opts[b'deltabothparents']
413 self._deltabothparents = opts[b'deltabothparents']
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydeltabase = False
415 self._lazydeltabase = False
416 if self._lazydelta:
416 if self._lazydelta:
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 if b'compengine' in opts:
418 if b'compengine' in opts:
419 self._compengine = opts[b'compengine']
419 self._compengine = opts[b'compengine']
420 if b'zlib.level' in opts:
420 if b'zlib.level' in opts:
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 if b'zstd.level' in opts:
422 if b'zstd.level' in opts:
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 if b'maxdeltachainspan' in opts:
424 if b'maxdeltachainspan' in opts:
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 mmapindexthreshold = opts[b'mmapindexthreshold']
427 mmapindexthreshold = opts[b'mmapindexthreshold']
428 self.hassidedata = bool(opts.get(b'side-data', False))
428 self.hassidedata = bool(opts.get(b'side-data', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
432 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
433 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439
439
440 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
441 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
442 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
443 ):
443 ):
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445
445
446 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
447 raise error.RevlogError(
447 raise error.RevlogError(
448 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
449 % self._chunkcachesize
449 % self._chunkcachesize
450 )
450 )
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 raise error.RevlogError(
452 raise error.RevlogError(
453 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
454 % self._chunkcachesize
454 % self._chunkcachesize
455 )
455 )
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 return newversionflags, mmapindexthreshold, force_nodemap
457 return newversionflags, mmapindexthreshold, force_nodemap
458
458
459 def _loadindex(self):
459 def _loadindex(self):
460
460
461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
462 indexdata = b''
462 indexdata = b''
463 self._initempty = True
463 self._initempty = True
464 try:
464 try:
465 with self._indexfp() as f:
465 with self._indexfp() as f:
466 if (
466 if (
467 mmapindexthreshold is not None
467 mmapindexthreshold is not None
468 and self.opener.fstat(f).st_size >= mmapindexthreshold
468 and self.opener.fstat(f).st_size >= mmapindexthreshold
469 ):
469 ):
470 # TODO: should .close() to release resources without
470 # TODO: should .close() to release resources without
471 # relying on Python GC
471 # relying on Python GC
472 indexdata = util.buffer(util.mmapread(f))
472 indexdata = util.buffer(util.mmapread(f))
473 else:
473 else:
474 indexdata = f.read()
474 indexdata = f.read()
475 if len(indexdata) > 0:
475 if len(indexdata) > 0:
476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
477 self._initempty = False
477 self._initempty = False
478 else:
478 else:
479 versionflags = newversionflags
479 versionflags = newversionflags
480 except IOError as inst:
480 except IOError as inst:
481 if inst.errno != errno.ENOENT:
481 if inst.errno != errno.ENOENT:
482 raise
482 raise
483
483
484 versionflags = newversionflags
484 versionflags = newversionflags
485
485
486 flags = self._format_flags = versionflags & ~0xFFFF
486 flags = self._format_flags = versionflags & ~0xFFFF
487 fmt = self._format_version = versionflags & 0xFFFF
487 fmt = self._format_version = versionflags & 0xFFFF
488
488
489 if fmt == REVLOGV0:
489 if fmt == REVLOGV0:
490 if flags:
490 if flags:
491 raise error.RevlogError(
491 raise error.RevlogError(
492 _(b'unknown flags (%#04x) in version %d revlog %s')
492 _(b'unknown flags (%#04x) in version %d revlog %s')
493 % (flags >> 16, fmt, self._indexfile)
493 % (flags >> 16, fmt, self._indexfile)
494 )
494 )
495
495
496 self._inline = False
496 self._inline = False
497 self._generaldelta = False
497 self._generaldelta = False
498
498
499 elif fmt == REVLOGV1:
499 elif fmt == REVLOGV1:
500 if flags & ~REVLOGV1_FLAGS:
500 if flags & ~REVLOGV1_FLAGS:
501 raise error.RevlogError(
501 raise error.RevlogError(
502 _(b'unknown flags (%#04x) in version %d revlog %s')
502 _(b'unknown flags (%#04x) in version %d revlog %s')
503 % (flags >> 16, fmt, self._indexfile)
503 % (flags >> 16, fmt, self._indexfile)
504 )
504 )
505
505
506 self._inline = versionflags & FLAG_INLINE_DATA
506 self._inline = versionflags & FLAG_INLINE_DATA
507 self._generaldelta = versionflags & FLAG_GENERALDELTA
507 self._generaldelta = versionflags & FLAG_GENERALDELTA
508
508
509 elif fmt == REVLOGV2:
509 elif fmt == REVLOGV2:
510 if flags & ~REVLOGV2_FLAGS:
510 if flags & ~REVLOGV2_FLAGS:
511 raise error.RevlogError(
511 raise error.RevlogError(
512 _(b'unknown flags (%#04x) in version %d revlog %s')
512 _(b'unknown flags (%#04x) in version %d revlog %s')
513 % (flags >> 16, fmt, self._indexfile)
513 % (flags >> 16, fmt, self._indexfile)
514 )
514 )
515
515
516 # There is a bug in the transaction handling when going from an
516 # There is a bug in the transaction handling when going from an
517 # inline revlog to a separate index and data file. Turn it off until
517 # inline revlog to a separate index and data file. Turn it off until
518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
519 # See issue6485
519 # See issue6485
520 self._inline = False
520 self._inline = False
521 # generaldelta implied by version 2 revlogs.
521 # generaldelta implied by version 2 revlogs.
522 self._generaldelta = True
522 self._generaldelta = True
523
523
524 else:
524 else:
525 raise error.RevlogError(
525 raise error.RevlogError(
526 _(b'unknown version (%d) in revlog %s') % (fmt, self._indexfile)
526 _(b'unknown version (%d) in revlog %s') % (fmt, self._indexfile)
527 )
527 )
528
528
529 self.nodeconstants = sha1nodeconstants
529 self.nodeconstants = sha1nodeconstants
530 self.nullid = self.nodeconstants.nullid
530 self.nullid = self.nodeconstants.nullid
531
531
532 # sparse-revlog can't be on without general-delta (issue6056)
532 # sparse-revlog can't be on without general-delta (issue6056)
533 if not self._generaldelta:
533 if not self._generaldelta:
534 self._sparserevlog = False
534 self._sparserevlog = False
535
535
536 self._storedeltachains = True
536 self._storedeltachains = True
537
537
538 devel_nodemap = (
538 devel_nodemap = (
539 self.nodemap_file
539 self.nodemap_file
540 and force_nodemap
540 and force_nodemap
541 and parse_index_v1_nodemap is not None
541 and parse_index_v1_nodemap is not None
542 )
542 )
543
543
544 use_rust_index = False
544 use_rust_index = False
545 if rustrevlog is not None:
545 if rustrevlog is not None:
546 if self.nodemap_file is not None:
546 if self.nodemap_file is not None:
547 use_rust_index = True
547 use_rust_index = True
548 else:
548 else:
549 use_rust_index = self.opener.options.get(b'rust.index')
549 use_rust_index = self.opener.options.get(b'rust.index')
550
550
551 self._parse_index = parse_index_v1
551 self._parse_index = parse_index_v1
552 if self._format_version == REVLOGV0:
552 if self._format_version == REVLOGV0:
553 self._parse_index = revlogv0.parse_index_v0
553 self._parse_index = revlogv0.parse_index_v0
554 elif fmt == REVLOGV2:
554 elif fmt == REVLOGV2:
555 self._parse_index = parse_index_v2
555 self._parse_index = parse_index_v2
556 elif devel_nodemap:
556 elif devel_nodemap:
557 self._parse_index = parse_index_v1_nodemap
557 self._parse_index = parse_index_v1_nodemap
558 elif use_rust_index:
558 elif use_rust_index:
559 self._parse_index = parse_index_v1_mixed
559 self._parse_index = parse_index_v1_mixed
560 try:
560 try:
561 d = self._parse_index(indexdata, self._inline)
561 d = self._parse_index(indexdata, self._inline)
562 index, _chunkcache = d
562 index, _chunkcache = d
563 use_nodemap = (
563 use_nodemap = (
564 not self._inline
564 not self._inline
565 and self.nodemap_file is not None
565 and self.nodemap_file is not None
566 and util.safehasattr(index, 'update_nodemap_data')
566 and util.safehasattr(index, 'update_nodemap_data')
567 )
567 )
568 if use_nodemap:
568 if use_nodemap:
569 nodemap_data = nodemaputil.persisted_data(self)
569 nodemap_data = nodemaputil.persisted_data(self)
570 if nodemap_data is not None:
570 if nodemap_data is not None:
571 docket = nodemap_data[0]
571 docket = nodemap_data[0]
572 if (
572 if (
573 len(d[0]) > docket.tip_rev
573 len(d[0]) > docket.tip_rev
574 and d[0][docket.tip_rev][7] == docket.tip_node
574 and d[0][docket.tip_rev][7] == docket.tip_node
575 ):
575 ):
576 # no changelog tampering
576 # no changelog tampering
577 self._nodemap_docket = docket
577 self._nodemap_docket = docket
578 index.update_nodemap_data(*nodemap_data)
578 index.update_nodemap_data(*nodemap_data)
579 except (ValueError, IndexError):
579 except (ValueError, IndexError):
580 raise error.RevlogError(
580 raise error.RevlogError(
581 _(b"index %s is corrupted") % self._indexfile
581 _(b"index %s is corrupted") % self._indexfile
582 )
582 )
583 self.index, self._chunkcache = d
583 self.index, self._chunkcache = d
584 if not self._chunkcache:
584 if not self._chunkcache:
585 self._chunkclear()
585 self._chunkclear()
586 # revnum -> (chain-length, sum-delta-length)
586 # revnum -> (chain-length, sum-delta-length)
587 self._chaininfocache = util.lrucachedict(500)
587 self._chaininfocache = util.lrucachedict(500)
588 # revlog header -> revlog compressor
588 # revlog header -> revlog compressor
589 self._decompressors = {}
589 self._decompressors = {}
590
590
591 @util.propertycache
591 @util.propertycache
592 def revlog_kind(self):
592 def revlog_kind(self):
593 return self.target[0]
593 return self.target[0]
594
594
595 @util.propertycache
595 @util.propertycache
596 def display_id(self):
597 """The public facing "ID" of the revlog that we use in message"""
598 # Maybe we should build a user facing representation of
599 # revlog.target instead of using `self.radix`
600 return self.radix
601
602 @util.propertycache
596 def _compressor(self):
603 def _compressor(self):
597 engine = util.compengines[self._compengine]
604 engine = util.compengines[self._compengine]
598 return engine.revlogcompressor(self._compengineopts)
605 return engine.revlogcompressor(self._compengineopts)
599
606
600 def _indexfp(self, mode=b'r'):
607 def _indexfp(self, mode=b'r'):
601 """file object for the revlog's index file"""
608 """file object for the revlog's index file"""
602 args = {'mode': mode}
609 args = {'mode': mode}
603 if mode != b'r':
610 if mode != b'r':
604 args['checkambig'] = self._checkambig
611 args['checkambig'] = self._checkambig
605 if mode == b'w':
612 if mode == b'w':
606 args['atomictemp'] = True
613 args['atomictemp'] = True
607 return self.opener(self._indexfile, **args)
614 return self.opener(self._indexfile, **args)
608
615
609 def _datafp(self, mode=b'r'):
616 def _datafp(self, mode=b'r'):
610 """file object for the revlog's data file"""
617 """file object for the revlog's data file"""
611 return self.opener(self._datafile, mode=mode)
618 return self.opener(self._datafile, mode=mode)
612
619
613 @contextlib.contextmanager
620 @contextlib.contextmanager
614 def _datareadfp(self, existingfp=None):
621 def _datareadfp(self, existingfp=None):
615 """file object suitable to read data"""
622 """file object suitable to read data"""
616 # Use explicit file handle, if given.
623 # Use explicit file handle, if given.
617 if existingfp is not None:
624 if existingfp is not None:
618 yield existingfp
625 yield existingfp
619
626
620 # Use a file handle being actively used for writes, if available.
627 # Use a file handle being actively used for writes, if available.
621 # There is some danger to doing this because reads will seek the
628 # There is some danger to doing this because reads will seek the
622 # file. However, _writeentry() performs a SEEK_END before all writes,
629 # file. However, _writeentry() performs a SEEK_END before all writes,
623 # so we should be safe.
630 # so we should be safe.
624 elif self._writinghandles:
631 elif self._writinghandles:
625 if self._inline:
632 if self._inline:
626 yield self._writinghandles[0]
633 yield self._writinghandles[0]
627 else:
634 else:
628 yield self._writinghandles[1]
635 yield self._writinghandles[1]
629
636
630 # Otherwise open a new file handle.
637 # Otherwise open a new file handle.
631 else:
638 else:
632 if self._inline:
639 if self._inline:
633 func = self._indexfp
640 func = self._indexfp
634 else:
641 else:
635 func = self._datafp
642 func = self._datafp
636 with func() as fp:
643 with func() as fp:
637 yield fp
644 yield fp
638
645
639 def tiprev(self):
646 def tiprev(self):
640 return len(self.index) - 1
647 return len(self.index) - 1
641
648
642 def tip(self):
649 def tip(self):
643 return self.node(self.tiprev())
650 return self.node(self.tiprev())
644
651
645 def __contains__(self, rev):
652 def __contains__(self, rev):
646 return 0 <= rev < len(self)
653 return 0 <= rev < len(self)
647
654
648 def __len__(self):
655 def __len__(self):
649 return len(self.index)
656 return len(self.index)
650
657
651 def __iter__(self):
658 def __iter__(self):
652 return iter(pycompat.xrange(len(self)))
659 return iter(pycompat.xrange(len(self)))
653
660
654 def revs(self, start=0, stop=None):
661 def revs(self, start=0, stop=None):
655 """iterate over all rev in this revlog (from start to stop)"""
662 """iterate over all rev in this revlog (from start to stop)"""
656 return storageutil.iterrevs(len(self), start=start, stop=stop)
663 return storageutil.iterrevs(len(self), start=start, stop=stop)
657
664
658 @property
665 @property
659 def nodemap(self):
666 def nodemap(self):
660 msg = (
667 msg = (
661 b"revlog.nodemap is deprecated, "
668 b"revlog.nodemap is deprecated, "
662 b"use revlog.index.[has_node|rev|get_rev]"
669 b"use revlog.index.[has_node|rev|get_rev]"
663 )
670 )
664 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
671 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
665 return self.index.nodemap
672 return self.index.nodemap
666
673
667 @property
674 @property
668 def _nodecache(self):
675 def _nodecache(self):
669 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
676 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
670 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
671 return self.index.nodemap
678 return self.index.nodemap
672
679
673 def hasnode(self, node):
680 def hasnode(self, node):
674 try:
681 try:
675 self.rev(node)
682 self.rev(node)
676 return True
683 return True
677 except KeyError:
684 except KeyError:
678 return False
685 return False
679
686
680 def candelta(self, baserev, rev):
687 def candelta(self, baserev, rev):
681 """whether two revisions (baserev, rev) can be delta-ed or not"""
688 """whether two revisions (baserev, rev) can be delta-ed or not"""
682 # Disable delta if either rev requires a content-changing flag
689 # Disable delta if either rev requires a content-changing flag
683 # processor (ex. LFS). This is because such flag processor can alter
690 # processor (ex. LFS). This is because such flag processor can alter
684 # the rawtext content that the delta will be based on, and two clients
691 # the rawtext content that the delta will be based on, and two clients
685 # could have a same revlog node with different flags (i.e. different
692 # could have a same revlog node with different flags (i.e. different
686 # rawtext contents) and the delta could be incompatible.
693 # rawtext contents) and the delta could be incompatible.
687 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
694 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
688 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
695 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
689 ):
696 ):
690 return False
697 return False
691 return True
698 return True
692
699
693 def update_caches(self, transaction):
700 def update_caches(self, transaction):
694 if self.nodemap_file is not None:
701 if self.nodemap_file is not None:
695 if transaction is None:
702 if transaction is None:
696 nodemaputil.update_persistent_nodemap(self)
703 nodemaputil.update_persistent_nodemap(self)
697 else:
704 else:
698 nodemaputil.setup_persistent_nodemap(transaction, self)
705 nodemaputil.setup_persistent_nodemap(transaction, self)
699
706
700 def clearcaches(self):
707 def clearcaches(self):
701 self._revisioncache = None
708 self._revisioncache = None
702 self._chainbasecache.clear()
709 self._chainbasecache.clear()
703 self._chunkcache = (0, b'')
710 self._chunkcache = (0, b'')
704 self._pcache = {}
711 self._pcache = {}
705 self._nodemap_docket = None
712 self._nodemap_docket = None
706 self.index.clearcaches()
713 self.index.clearcaches()
707 # The python code is the one responsible for validating the docket, we
714 # The python code is the one responsible for validating the docket, we
708 # end up having to refresh it here.
715 # end up having to refresh it here.
709 use_nodemap = (
716 use_nodemap = (
710 not self._inline
717 not self._inline
711 and self.nodemap_file is not None
718 and self.nodemap_file is not None
712 and util.safehasattr(self.index, 'update_nodemap_data')
719 and util.safehasattr(self.index, 'update_nodemap_data')
713 )
720 )
714 if use_nodemap:
721 if use_nodemap:
715 nodemap_data = nodemaputil.persisted_data(self)
722 nodemap_data = nodemaputil.persisted_data(self)
716 if nodemap_data is not None:
723 if nodemap_data is not None:
717 self._nodemap_docket = nodemap_data[0]
724 self._nodemap_docket = nodemap_data[0]
718 self.index.update_nodemap_data(*nodemap_data)
725 self.index.update_nodemap_data(*nodemap_data)
719
726
720 def rev(self, node):
727 def rev(self, node):
721 try:
728 try:
722 return self.index.rev(node)
729 return self.index.rev(node)
723 except TypeError:
730 except TypeError:
724 raise
731 raise
725 except error.RevlogError:
732 except error.RevlogError:
726 # parsers.c radix tree lookup failed
733 # parsers.c radix tree lookup failed
727 if (
734 if (
728 node == self.nodeconstants.wdirid
735 node == self.nodeconstants.wdirid
729 or node in self.nodeconstants.wdirfilenodeids
736 or node in self.nodeconstants.wdirfilenodeids
730 ):
737 ):
731 raise error.WdirUnsupported
738 raise error.WdirUnsupported
732 raise error.LookupError(node, self._indexfile, _(b'no node'))
739 raise error.LookupError(node, self._indexfile, _(b'no node'))
733
740
734 # Accessors for index entries.
741 # Accessors for index entries.
735
742
736 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
743 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
737 # are flags.
744 # are flags.
738 def start(self, rev):
745 def start(self, rev):
739 return int(self.index[rev][0] >> 16)
746 return int(self.index[rev][0] >> 16)
740
747
741 def flags(self, rev):
748 def flags(self, rev):
742 return self.index[rev][0] & 0xFFFF
749 return self.index[rev][0] & 0xFFFF
743
750
744 def length(self, rev):
751 def length(self, rev):
745 return self.index[rev][1]
752 return self.index[rev][1]
746
753
747 def sidedata_length(self, rev):
754 def sidedata_length(self, rev):
748 if not self.hassidedata:
755 if not self.hassidedata:
749 return 0
756 return 0
750 return self.index[rev][9]
757 return self.index[rev][9]
751
758
752 def rawsize(self, rev):
759 def rawsize(self, rev):
753 """return the length of the uncompressed text for a given revision"""
760 """return the length of the uncompressed text for a given revision"""
754 l = self.index[rev][2]
761 l = self.index[rev][2]
755 if l >= 0:
762 if l >= 0:
756 return l
763 return l
757
764
758 t = self.rawdata(rev)
765 t = self.rawdata(rev)
759 return len(t)
766 return len(t)
760
767
761 def size(self, rev):
768 def size(self, rev):
762 """length of non-raw text (processed by a "read" flag processor)"""
769 """length of non-raw text (processed by a "read" flag processor)"""
763 # fast path: if no "read" flag processor could change the content,
770 # fast path: if no "read" flag processor could change the content,
764 # size is rawsize. note: ELLIPSIS is known to not change the content.
771 # size is rawsize. note: ELLIPSIS is known to not change the content.
765 flags = self.flags(rev)
772 flags = self.flags(rev)
766 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
773 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
767 return self.rawsize(rev)
774 return self.rawsize(rev)
768
775
769 return len(self.revision(rev, raw=False))
776 return len(self.revision(rev, raw=False))
770
777
771 def chainbase(self, rev):
778 def chainbase(self, rev):
772 base = self._chainbasecache.get(rev)
779 base = self._chainbasecache.get(rev)
773 if base is not None:
780 if base is not None:
774 return base
781 return base
775
782
776 index = self.index
783 index = self.index
777 iterrev = rev
784 iterrev = rev
778 base = index[iterrev][3]
785 base = index[iterrev][3]
779 while base != iterrev:
786 while base != iterrev:
780 iterrev = base
787 iterrev = base
781 base = index[iterrev][3]
788 base = index[iterrev][3]
782
789
783 self._chainbasecache[rev] = base
790 self._chainbasecache[rev] = base
784 return base
791 return base
785
792
786 def linkrev(self, rev):
793 def linkrev(self, rev):
787 return self.index[rev][4]
794 return self.index[rev][4]
788
795
789 def parentrevs(self, rev):
796 def parentrevs(self, rev):
790 try:
797 try:
791 entry = self.index[rev]
798 entry = self.index[rev]
792 except IndexError:
799 except IndexError:
793 if rev == wdirrev:
800 if rev == wdirrev:
794 raise error.WdirUnsupported
801 raise error.WdirUnsupported
795 raise
802 raise
796 if entry[5] == nullrev:
803 if entry[5] == nullrev:
797 return entry[6], entry[5]
804 return entry[6], entry[5]
798 else:
805 else:
799 return entry[5], entry[6]
806 return entry[5], entry[6]
800
807
801 # fast parentrevs(rev) where rev isn't filtered
808 # fast parentrevs(rev) where rev isn't filtered
802 _uncheckedparentrevs = parentrevs
809 _uncheckedparentrevs = parentrevs
803
810
804 def node(self, rev):
811 def node(self, rev):
805 try:
812 try:
806 return self.index[rev][7]
813 return self.index[rev][7]
807 except IndexError:
814 except IndexError:
808 if rev == wdirrev:
815 if rev == wdirrev:
809 raise error.WdirUnsupported
816 raise error.WdirUnsupported
810 raise
817 raise
811
818
812 # Derived from index values.
819 # Derived from index values.
813
820
814 def end(self, rev):
821 def end(self, rev):
815 return self.start(rev) + self.length(rev)
822 return self.start(rev) + self.length(rev)
816
823
817 def parents(self, node):
824 def parents(self, node):
818 i = self.index
825 i = self.index
819 d = i[self.rev(node)]
826 d = i[self.rev(node)]
820 # inline node() to avoid function call overhead
827 # inline node() to avoid function call overhead
821 if d[5] == self.nullid:
828 if d[5] == self.nullid:
822 return i[d[6]][7], i[d[5]][7]
829 return i[d[6]][7], i[d[5]][7]
823 else:
830 else:
824 return i[d[5]][7], i[d[6]][7]
831 return i[d[5]][7], i[d[6]][7]
825
832
826 def chainlen(self, rev):
833 def chainlen(self, rev):
827 return self._chaininfo(rev)[0]
834 return self._chaininfo(rev)[0]
828
835
829 def _chaininfo(self, rev):
836 def _chaininfo(self, rev):
830 chaininfocache = self._chaininfocache
837 chaininfocache = self._chaininfocache
831 if rev in chaininfocache:
838 if rev in chaininfocache:
832 return chaininfocache[rev]
839 return chaininfocache[rev]
833 index = self.index
840 index = self.index
834 generaldelta = self._generaldelta
841 generaldelta = self._generaldelta
835 iterrev = rev
842 iterrev = rev
836 e = index[iterrev]
843 e = index[iterrev]
837 clen = 0
844 clen = 0
838 compresseddeltalen = 0
845 compresseddeltalen = 0
839 while iterrev != e[3]:
846 while iterrev != e[3]:
840 clen += 1
847 clen += 1
841 compresseddeltalen += e[1]
848 compresseddeltalen += e[1]
842 if generaldelta:
849 if generaldelta:
843 iterrev = e[3]
850 iterrev = e[3]
844 else:
851 else:
845 iterrev -= 1
852 iterrev -= 1
846 if iterrev in chaininfocache:
853 if iterrev in chaininfocache:
847 t = chaininfocache[iterrev]
854 t = chaininfocache[iterrev]
848 clen += t[0]
855 clen += t[0]
849 compresseddeltalen += t[1]
856 compresseddeltalen += t[1]
850 break
857 break
851 e = index[iterrev]
858 e = index[iterrev]
852 else:
859 else:
853 # Add text length of base since decompressing that also takes
860 # Add text length of base since decompressing that also takes
854 # work. For cache hits the length is already included.
861 # work. For cache hits the length is already included.
855 compresseddeltalen += e[1]
862 compresseddeltalen += e[1]
856 r = (clen, compresseddeltalen)
863 r = (clen, compresseddeltalen)
857 chaininfocache[rev] = r
864 chaininfocache[rev] = r
858 return r
865 return r
859
866
860 def _deltachain(self, rev, stoprev=None):
867 def _deltachain(self, rev, stoprev=None):
861 """Obtain the delta chain for a revision.
868 """Obtain the delta chain for a revision.
862
869
863 ``stoprev`` specifies a revision to stop at. If not specified, we
870 ``stoprev`` specifies a revision to stop at. If not specified, we
864 stop at the base of the chain.
871 stop at the base of the chain.
865
872
866 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
873 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
867 revs in ascending order and ``stopped`` is a bool indicating whether
874 revs in ascending order and ``stopped`` is a bool indicating whether
868 ``stoprev`` was hit.
875 ``stoprev`` was hit.
869 """
876 """
870 # Try C implementation.
877 # Try C implementation.
871 try:
878 try:
872 return self.index.deltachain(rev, stoprev, self._generaldelta)
879 return self.index.deltachain(rev, stoprev, self._generaldelta)
873 except AttributeError:
880 except AttributeError:
874 pass
881 pass
875
882
876 chain = []
883 chain = []
877
884
878 # Alias to prevent attribute lookup in tight loop.
885 # Alias to prevent attribute lookup in tight loop.
879 index = self.index
886 index = self.index
880 generaldelta = self._generaldelta
887 generaldelta = self._generaldelta
881
888
882 iterrev = rev
889 iterrev = rev
883 e = index[iterrev]
890 e = index[iterrev]
884 while iterrev != e[3] and iterrev != stoprev:
891 while iterrev != e[3] and iterrev != stoprev:
885 chain.append(iterrev)
892 chain.append(iterrev)
886 if generaldelta:
893 if generaldelta:
887 iterrev = e[3]
894 iterrev = e[3]
888 else:
895 else:
889 iterrev -= 1
896 iterrev -= 1
890 e = index[iterrev]
897 e = index[iterrev]
891
898
892 if iterrev == stoprev:
899 if iterrev == stoprev:
893 stopped = True
900 stopped = True
894 else:
901 else:
895 chain.append(iterrev)
902 chain.append(iterrev)
896 stopped = False
903 stopped = False
897
904
898 chain.reverse()
905 chain.reverse()
899 return chain, stopped
906 return chain, stopped
900
907
901 def ancestors(self, revs, stoprev=0, inclusive=False):
908 def ancestors(self, revs, stoprev=0, inclusive=False):
902 """Generate the ancestors of 'revs' in reverse revision order.
909 """Generate the ancestors of 'revs' in reverse revision order.
903 Does not generate revs lower than stoprev.
910 Does not generate revs lower than stoprev.
904
911
905 See the documentation for ancestor.lazyancestors for more details."""
912 See the documentation for ancestor.lazyancestors for more details."""
906
913
907 # first, make sure start revisions aren't filtered
914 # first, make sure start revisions aren't filtered
908 revs = list(revs)
915 revs = list(revs)
909 checkrev = self.node
916 checkrev = self.node
910 for r in revs:
917 for r in revs:
911 checkrev(r)
918 checkrev(r)
912 # and we're sure ancestors aren't filtered as well
919 # and we're sure ancestors aren't filtered as well
913
920
914 if rustancestor is not None:
921 if rustancestor is not None:
915 lazyancestors = rustancestor.LazyAncestors
922 lazyancestors = rustancestor.LazyAncestors
916 arg = self.index
923 arg = self.index
917 else:
924 else:
918 lazyancestors = ancestor.lazyancestors
925 lazyancestors = ancestor.lazyancestors
919 arg = self._uncheckedparentrevs
926 arg = self._uncheckedparentrevs
920 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
927 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
921
928
922 def descendants(self, revs):
929 def descendants(self, revs):
923 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
930 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
924
931
925 def findcommonmissing(self, common=None, heads=None):
932 def findcommonmissing(self, common=None, heads=None):
926 """Return a tuple of the ancestors of common and the ancestors of heads
933 """Return a tuple of the ancestors of common and the ancestors of heads
927 that are not ancestors of common. In revset terminology, we return the
934 that are not ancestors of common. In revset terminology, we return the
928 tuple:
935 tuple:
929
936
930 ::common, (::heads) - (::common)
937 ::common, (::heads) - (::common)
931
938
932 The list is sorted by revision number, meaning it is
939 The list is sorted by revision number, meaning it is
933 topologically sorted.
940 topologically sorted.
934
941
935 'heads' and 'common' are both lists of node IDs. If heads is
942 'heads' and 'common' are both lists of node IDs. If heads is
936 not supplied, uses all of the revlog's heads. If common is not
943 not supplied, uses all of the revlog's heads. If common is not
937 supplied, uses nullid."""
944 supplied, uses nullid."""
938 if common is None:
945 if common is None:
939 common = [self.nullid]
946 common = [self.nullid]
940 if heads is None:
947 if heads is None:
941 heads = self.heads()
948 heads = self.heads()
942
949
943 common = [self.rev(n) for n in common]
950 common = [self.rev(n) for n in common]
944 heads = [self.rev(n) for n in heads]
951 heads = [self.rev(n) for n in heads]
945
952
946 # we want the ancestors, but inclusive
953 # we want the ancestors, but inclusive
947 class lazyset(object):
954 class lazyset(object):
948 def __init__(self, lazyvalues):
955 def __init__(self, lazyvalues):
949 self.addedvalues = set()
956 self.addedvalues = set()
950 self.lazyvalues = lazyvalues
957 self.lazyvalues = lazyvalues
951
958
952 def __contains__(self, value):
959 def __contains__(self, value):
953 return value in self.addedvalues or value in self.lazyvalues
960 return value in self.addedvalues or value in self.lazyvalues
954
961
955 def __iter__(self):
962 def __iter__(self):
956 added = self.addedvalues
963 added = self.addedvalues
957 for r in added:
964 for r in added:
958 yield r
965 yield r
959 for r in self.lazyvalues:
966 for r in self.lazyvalues:
960 if not r in added:
967 if not r in added:
961 yield r
968 yield r
962
969
963 def add(self, value):
970 def add(self, value):
964 self.addedvalues.add(value)
971 self.addedvalues.add(value)
965
972
966 def update(self, values):
973 def update(self, values):
967 self.addedvalues.update(values)
974 self.addedvalues.update(values)
968
975
969 has = lazyset(self.ancestors(common))
976 has = lazyset(self.ancestors(common))
970 has.add(nullrev)
977 has.add(nullrev)
971 has.update(common)
978 has.update(common)
972
979
973 # take all ancestors from heads that aren't in has
980 # take all ancestors from heads that aren't in has
974 missing = set()
981 missing = set()
975 visit = collections.deque(r for r in heads if r not in has)
982 visit = collections.deque(r for r in heads if r not in has)
976 while visit:
983 while visit:
977 r = visit.popleft()
984 r = visit.popleft()
978 if r in missing:
985 if r in missing:
979 continue
986 continue
980 else:
987 else:
981 missing.add(r)
988 missing.add(r)
982 for p in self.parentrevs(r):
989 for p in self.parentrevs(r):
983 if p not in has:
990 if p not in has:
984 visit.append(p)
991 visit.append(p)
985 missing = list(missing)
992 missing = list(missing)
986 missing.sort()
993 missing.sort()
987 return has, [self.node(miss) for miss in missing]
994 return has, [self.node(miss) for miss in missing]
988
995
989 def incrementalmissingrevs(self, common=None):
996 def incrementalmissingrevs(self, common=None):
990 """Return an object that can be used to incrementally compute the
997 """Return an object that can be used to incrementally compute the
991 revision numbers of the ancestors of arbitrary sets that are not
998 revision numbers of the ancestors of arbitrary sets that are not
992 ancestors of common. This is an ancestor.incrementalmissingancestors
999 ancestors of common. This is an ancestor.incrementalmissingancestors
993 object.
1000 object.
994
1001
995 'common' is a list of revision numbers. If common is not supplied, uses
1002 'common' is a list of revision numbers. If common is not supplied, uses
996 nullrev.
1003 nullrev.
997 """
1004 """
998 if common is None:
1005 if common is None:
999 common = [nullrev]
1006 common = [nullrev]
1000
1007
1001 if rustancestor is not None:
1008 if rustancestor is not None:
1002 return rustancestor.MissingAncestors(self.index, common)
1009 return rustancestor.MissingAncestors(self.index, common)
1003 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1010 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1004
1011
1005 def findmissingrevs(self, common=None, heads=None):
1012 def findmissingrevs(self, common=None, heads=None):
1006 """Return the revision numbers of the ancestors of heads that
1013 """Return the revision numbers of the ancestors of heads that
1007 are not ancestors of common.
1014 are not ancestors of common.
1008
1015
1009 More specifically, return a list of revision numbers corresponding to
1016 More specifically, return a list of revision numbers corresponding to
1010 nodes N such that every N satisfies the following constraints:
1017 nodes N such that every N satisfies the following constraints:
1011
1018
1012 1. N is an ancestor of some node in 'heads'
1019 1. N is an ancestor of some node in 'heads'
1013 2. N is not an ancestor of any node in 'common'
1020 2. N is not an ancestor of any node in 'common'
1014
1021
1015 The list is sorted by revision number, meaning it is
1022 The list is sorted by revision number, meaning it is
1016 topologically sorted.
1023 topologically sorted.
1017
1024
1018 'heads' and 'common' are both lists of revision numbers. If heads is
1025 'heads' and 'common' are both lists of revision numbers. If heads is
1019 not supplied, uses all of the revlog's heads. If common is not
1026 not supplied, uses all of the revlog's heads. If common is not
1020 supplied, uses nullid."""
1027 supplied, uses nullid."""
1021 if common is None:
1028 if common is None:
1022 common = [nullrev]
1029 common = [nullrev]
1023 if heads is None:
1030 if heads is None:
1024 heads = self.headrevs()
1031 heads = self.headrevs()
1025
1032
1026 inc = self.incrementalmissingrevs(common=common)
1033 inc = self.incrementalmissingrevs(common=common)
1027 return inc.missingancestors(heads)
1034 return inc.missingancestors(heads)
1028
1035
1029 def findmissing(self, common=None, heads=None):
1036 def findmissing(self, common=None, heads=None):
1030 """Return the ancestors of heads that are not ancestors of common.
1037 """Return the ancestors of heads that are not ancestors of common.
1031
1038
1032 More specifically, return a list of nodes N such that every N
1039 More specifically, return a list of nodes N such that every N
1033 satisfies the following constraints:
1040 satisfies the following constraints:
1034
1041
1035 1. N is an ancestor of some node in 'heads'
1042 1. N is an ancestor of some node in 'heads'
1036 2. N is not an ancestor of any node in 'common'
1043 2. N is not an ancestor of any node in 'common'
1037
1044
1038 The list is sorted by revision number, meaning it is
1045 The list is sorted by revision number, meaning it is
1039 topologically sorted.
1046 topologically sorted.
1040
1047
1041 'heads' and 'common' are both lists of node IDs. If heads is
1048 'heads' and 'common' are both lists of node IDs. If heads is
1042 not supplied, uses all of the revlog's heads. If common is not
1049 not supplied, uses all of the revlog's heads. If common is not
1043 supplied, uses nullid."""
1050 supplied, uses nullid."""
1044 if common is None:
1051 if common is None:
1045 common = [self.nullid]
1052 common = [self.nullid]
1046 if heads is None:
1053 if heads is None:
1047 heads = self.heads()
1054 heads = self.heads()
1048
1055
1049 common = [self.rev(n) for n in common]
1056 common = [self.rev(n) for n in common]
1050 heads = [self.rev(n) for n in heads]
1057 heads = [self.rev(n) for n in heads]
1051
1058
1052 inc = self.incrementalmissingrevs(common=common)
1059 inc = self.incrementalmissingrevs(common=common)
1053 return [self.node(r) for r in inc.missingancestors(heads)]
1060 return [self.node(r) for r in inc.missingancestors(heads)]
1054
1061
1055 def nodesbetween(self, roots=None, heads=None):
1062 def nodesbetween(self, roots=None, heads=None):
1056 """Return a topological path from 'roots' to 'heads'.
1063 """Return a topological path from 'roots' to 'heads'.
1057
1064
1058 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1065 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1059 topologically sorted list of all nodes N that satisfy both of
1066 topologically sorted list of all nodes N that satisfy both of
1060 these constraints:
1067 these constraints:
1061
1068
1062 1. N is a descendant of some node in 'roots'
1069 1. N is a descendant of some node in 'roots'
1063 2. N is an ancestor of some node in 'heads'
1070 2. N is an ancestor of some node in 'heads'
1064
1071
1065 Every node is considered to be both a descendant and an ancestor
1072 Every node is considered to be both a descendant and an ancestor
1066 of itself, so every reachable node in 'roots' and 'heads' will be
1073 of itself, so every reachable node in 'roots' and 'heads' will be
1067 included in 'nodes'.
1074 included in 'nodes'.
1068
1075
1069 'outroots' is the list of reachable nodes in 'roots', i.e., the
1076 'outroots' is the list of reachable nodes in 'roots', i.e., the
1070 subset of 'roots' that is returned in 'nodes'. Likewise,
1077 subset of 'roots' that is returned in 'nodes'. Likewise,
1071 'outheads' is the subset of 'heads' that is also in 'nodes'.
1078 'outheads' is the subset of 'heads' that is also in 'nodes'.
1072
1079
1073 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1080 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1074 unspecified, uses nullid as the only root. If 'heads' is
1081 unspecified, uses nullid as the only root. If 'heads' is
1075 unspecified, uses list of all of the revlog's heads."""
1082 unspecified, uses list of all of the revlog's heads."""
1076 nonodes = ([], [], [])
1083 nonodes = ([], [], [])
1077 if roots is not None:
1084 if roots is not None:
1078 roots = list(roots)
1085 roots = list(roots)
1079 if not roots:
1086 if not roots:
1080 return nonodes
1087 return nonodes
1081 lowestrev = min([self.rev(n) for n in roots])
1088 lowestrev = min([self.rev(n) for n in roots])
1082 else:
1089 else:
1083 roots = [self.nullid] # Everybody's a descendant of nullid
1090 roots = [self.nullid] # Everybody's a descendant of nullid
1084 lowestrev = nullrev
1091 lowestrev = nullrev
1085 if (lowestrev == nullrev) and (heads is None):
1092 if (lowestrev == nullrev) and (heads is None):
1086 # We want _all_ the nodes!
1093 # We want _all_ the nodes!
1087 return (
1094 return (
1088 [self.node(r) for r in self],
1095 [self.node(r) for r in self],
1089 [self.nullid],
1096 [self.nullid],
1090 list(self.heads()),
1097 list(self.heads()),
1091 )
1098 )
1092 if heads is None:
1099 if heads is None:
1093 # All nodes are ancestors, so the latest ancestor is the last
1100 # All nodes are ancestors, so the latest ancestor is the last
1094 # node.
1101 # node.
1095 highestrev = len(self) - 1
1102 highestrev = len(self) - 1
1096 # Set ancestors to None to signal that every node is an ancestor.
1103 # Set ancestors to None to signal that every node is an ancestor.
1097 ancestors = None
1104 ancestors = None
1098 # Set heads to an empty dictionary for later discovery of heads
1105 # Set heads to an empty dictionary for later discovery of heads
1099 heads = {}
1106 heads = {}
1100 else:
1107 else:
1101 heads = list(heads)
1108 heads = list(heads)
1102 if not heads:
1109 if not heads:
1103 return nonodes
1110 return nonodes
1104 ancestors = set()
1111 ancestors = set()
1105 # Turn heads into a dictionary so we can remove 'fake' heads.
1112 # Turn heads into a dictionary so we can remove 'fake' heads.
1106 # Also, later we will be using it to filter out the heads we can't
1113 # Also, later we will be using it to filter out the heads we can't
1107 # find from roots.
1114 # find from roots.
1108 heads = dict.fromkeys(heads, False)
1115 heads = dict.fromkeys(heads, False)
1109 # Start at the top and keep marking parents until we're done.
1116 # Start at the top and keep marking parents until we're done.
1110 nodestotag = set(heads)
1117 nodestotag = set(heads)
1111 # Remember where the top was so we can use it as a limit later.
1118 # Remember where the top was so we can use it as a limit later.
1112 highestrev = max([self.rev(n) for n in nodestotag])
1119 highestrev = max([self.rev(n) for n in nodestotag])
1113 while nodestotag:
1120 while nodestotag:
1114 # grab a node to tag
1121 # grab a node to tag
1115 n = nodestotag.pop()
1122 n = nodestotag.pop()
1116 # Never tag nullid
1123 # Never tag nullid
1117 if n == self.nullid:
1124 if n == self.nullid:
1118 continue
1125 continue
1119 # A node's revision number represents its place in a
1126 # A node's revision number represents its place in a
1120 # topologically sorted list of nodes.
1127 # topologically sorted list of nodes.
1121 r = self.rev(n)
1128 r = self.rev(n)
1122 if r >= lowestrev:
1129 if r >= lowestrev:
1123 if n not in ancestors:
1130 if n not in ancestors:
1124 # If we are possibly a descendant of one of the roots
1131 # If we are possibly a descendant of one of the roots
1125 # and we haven't already been marked as an ancestor
1132 # and we haven't already been marked as an ancestor
1126 ancestors.add(n) # Mark as ancestor
1133 ancestors.add(n) # Mark as ancestor
1127 # Add non-nullid parents to list of nodes to tag.
1134 # Add non-nullid parents to list of nodes to tag.
1128 nodestotag.update(
1135 nodestotag.update(
1129 [p for p in self.parents(n) if p != self.nullid]
1136 [p for p in self.parents(n) if p != self.nullid]
1130 )
1137 )
1131 elif n in heads: # We've seen it before, is it a fake head?
1138 elif n in heads: # We've seen it before, is it a fake head?
1132 # So it is, real heads should not be the ancestors of
1139 # So it is, real heads should not be the ancestors of
1133 # any other heads.
1140 # any other heads.
1134 heads.pop(n)
1141 heads.pop(n)
1135 if not ancestors:
1142 if not ancestors:
1136 return nonodes
1143 return nonodes
1137 # Now that we have our set of ancestors, we want to remove any
1144 # Now that we have our set of ancestors, we want to remove any
1138 # roots that are not ancestors.
1145 # roots that are not ancestors.
1139
1146
1140 # If one of the roots was nullid, everything is included anyway.
1147 # If one of the roots was nullid, everything is included anyway.
1141 if lowestrev > nullrev:
1148 if lowestrev > nullrev:
1142 # But, since we weren't, let's recompute the lowest rev to not
1149 # But, since we weren't, let's recompute the lowest rev to not
1143 # include roots that aren't ancestors.
1150 # include roots that aren't ancestors.
1144
1151
1145 # Filter out roots that aren't ancestors of heads
1152 # Filter out roots that aren't ancestors of heads
1146 roots = [root for root in roots if root in ancestors]
1153 roots = [root for root in roots if root in ancestors]
1147 # Recompute the lowest revision
1154 # Recompute the lowest revision
1148 if roots:
1155 if roots:
1149 lowestrev = min([self.rev(root) for root in roots])
1156 lowestrev = min([self.rev(root) for root in roots])
1150 else:
1157 else:
1151 # No more roots? Return empty list
1158 # No more roots? Return empty list
1152 return nonodes
1159 return nonodes
1153 else:
1160 else:
1154 # We are descending from nullid, and don't need to care about
1161 # We are descending from nullid, and don't need to care about
1155 # any other roots.
1162 # any other roots.
1156 lowestrev = nullrev
1163 lowestrev = nullrev
1157 roots = [self.nullid]
1164 roots = [self.nullid]
1158 # Transform our roots list into a set.
1165 # Transform our roots list into a set.
1159 descendants = set(roots)
1166 descendants = set(roots)
1160 # Also, keep the original roots so we can filter out roots that aren't
1167 # Also, keep the original roots so we can filter out roots that aren't
1161 # 'real' roots (i.e. are descended from other roots).
1168 # 'real' roots (i.e. are descended from other roots).
1162 roots = descendants.copy()
1169 roots = descendants.copy()
1163 # Our topologically sorted list of output nodes.
1170 # Our topologically sorted list of output nodes.
1164 orderedout = []
1171 orderedout = []
1165 # Don't start at nullid since we don't want nullid in our output list,
1172 # Don't start at nullid since we don't want nullid in our output list,
1166 # and if nullid shows up in descendants, empty parents will look like
1173 # and if nullid shows up in descendants, empty parents will look like
1167 # they're descendants.
1174 # they're descendants.
1168 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1175 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1169 n = self.node(r)
1176 n = self.node(r)
1170 isdescendant = False
1177 isdescendant = False
1171 if lowestrev == nullrev: # Everybody is a descendant of nullid
1178 if lowestrev == nullrev: # Everybody is a descendant of nullid
1172 isdescendant = True
1179 isdescendant = True
1173 elif n in descendants:
1180 elif n in descendants:
1174 # n is already a descendant
1181 # n is already a descendant
1175 isdescendant = True
1182 isdescendant = True
1176 # This check only needs to be done here because all the roots
1183 # This check only needs to be done here because all the roots
1177 # will start being marked is descendants before the loop.
1184 # will start being marked is descendants before the loop.
1178 if n in roots:
1185 if n in roots:
1179 # If n was a root, check if it's a 'real' root.
1186 # If n was a root, check if it's a 'real' root.
1180 p = tuple(self.parents(n))
1187 p = tuple(self.parents(n))
1181 # If any of its parents are descendants, it's not a root.
1188 # If any of its parents are descendants, it's not a root.
1182 if (p[0] in descendants) or (p[1] in descendants):
1189 if (p[0] in descendants) or (p[1] in descendants):
1183 roots.remove(n)
1190 roots.remove(n)
1184 else:
1191 else:
1185 p = tuple(self.parents(n))
1192 p = tuple(self.parents(n))
1186 # A node is a descendant if either of its parents are
1193 # A node is a descendant if either of its parents are
1187 # descendants. (We seeded the dependents list with the roots
1194 # descendants. (We seeded the dependents list with the roots
1188 # up there, remember?)
1195 # up there, remember?)
1189 if (p[0] in descendants) or (p[1] in descendants):
1196 if (p[0] in descendants) or (p[1] in descendants):
1190 descendants.add(n)
1197 descendants.add(n)
1191 isdescendant = True
1198 isdescendant = True
1192 if isdescendant and ((ancestors is None) or (n in ancestors)):
1199 if isdescendant and ((ancestors is None) or (n in ancestors)):
1193 # Only include nodes that are both descendants and ancestors.
1200 # Only include nodes that are both descendants and ancestors.
1194 orderedout.append(n)
1201 orderedout.append(n)
1195 if (ancestors is not None) and (n in heads):
1202 if (ancestors is not None) and (n in heads):
1196 # We're trying to figure out which heads are reachable
1203 # We're trying to figure out which heads are reachable
1197 # from roots.
1204 # from roots.
1198 # Mark this head as having been reached
1205 # Mark this head as having been reached
1199 heads[n] = True
1206 heads[n] = True
1200 elif ancestors is None:
1207 elif ancestors is None:
1201 # Otherwise, we're trying to discover the heads.
1208 # Otherwise, we're trying to discover the heads.
1202 # Assume this is a head because if it isn't, the next step
1209 # Assume this is a head because if it isn't, the next step
1203 # will eventually remove it.
1210 # will eventually remove it.
1204 heads[n] = True
1211 heads[n] = True
1205 # But, obviously its parents aren't.
1212 # But, obviously its parents aren't.
1206 for p in self.parents(n):
1213 for p in self.parents(n):
1207 heads.pop(p, None)
1214 heads.pop(p, None)
1208 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1215 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1209 roots = list(roots)
1216 roots = list(roots)
1210 assert orderedout
1217 assert orderedout
1211 assert roots
1218 assert roots
1212 assert heads
1219 assert heads
1213 return (orderedout, roots, heads)
1220 return (orderedout, roots, heads)
1214
1221
1215 def headrevs(self, revs=None):
1222 def headrevs(self, revs=None):
1216 if revs is None:
1223 if revs is None:
1217 try:
1224 try:
1218 return self.index.headrevs()
1225 return self.index.headrevs()
1219 except AttributeError:
1226 except AttributeError:
1220 return self._headrevs()
1227 return self._headrevs()
1221 if rustdagop is not None:
1228 if rustdagop is not None:
1222 return rustdagop.headrevs(self.index, revs)
1229 return rustdagop.headrevs(self.index, revs)
1223 return dagop.headrevs(revs, self._uncheckedparentrevs)
1230 return dagop.headrevs(revs, self._uncheckedparentrevs)
1224
1231
1225 def computephases(self, roots):
1232 def computephases(self, roots):
1226 return self.index.computephasesmapsets(roots)
1233 return self.index.computephasesmapsets(roots)
1227
1234
1228 def _headrevs(self):
1235 def _headrevs(self):
1229 count = len(self)
1236 count = len(self)
1230 if not count:
1237 if not count:
1231 return [nullrev]
1238 return [nullrev]
1232 # we won't iter over filtered rev so nobody is a head at start
1239 # we won't iter over filtered rev so nobody is a head at start
1233 ishead = [0] * (count + 1)
1240 ishead = [0] * (count + 1)
1234 index = self.index
1241 index = self.index
1235 for r in self:
1242 for r in self:
1236 ishead[r] = 1 # I may be an head
1243 ishead[r] = 1 # I may be an head
1237 e = index[r]
1244 e = index[r]
1238 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1245 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1239 return [r for r, val in enumerate(ishead) if val]
1246 return [r for r, val in enumerate(ishead) if val]
1240
1247
1241 def heads(self, start=None, stop=None):
1248 def heads(self, start=None, stop=None):
1242 """return the list of all nodes that have no children
1249 """return the list of all nodes that have no children
1243
1250
1244 if start is specified, only heads that are descendants of
1251 if start is specified, only heads that are descendants of
1245 start will be returned
1252 start will be returned
1246 if stop is specified, it will consider all the revs from stop
1253 if stop is specified, it will consider all the revs from stop
1247 as if they had no children
1254 as if they had no children
1248 """
1255 """
1249 if start is None and stop is None:
1256 if start is None and stop is None:
1250 if not len(self):
1257 if not len(self):
1251 return [self.nullid]
1258 return [self.nullid]
1252 return [self.node(r) for r in self.headrevs()]
1259 return [self.node(r) for r in self.headrevs()]
1253
1260
1254 if start is None:
1261 if start is None:
1255 start = nullrev
1262 start = nullrev
1256 else:
1263 else:
1257 start = self.rev(start)
1264 start = self.rev(start)
1258
1265
1259 stoprevs = {self.rev(n) for n in stop or []}
1266 stoprevs = {self.rev(n) for n in stop or []}
1260
1267
1261 revs = dagop.headrevssubset(
1268 revs = dagop.headrevssubset(
1262 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1269 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1263 )
1270 )
1264
1271
1265 return [self.node(rev) for rev in revs]
1272 return [self.node(rev) for rev in revs]
1266
1273
1267 def children(self, node):
1274 def children(self, node):
1268 """find the children of a given node"""
1275 """find the children of a given node"""
1269 c = []
1276 c = []
1270 p = self.rev(node)
1277 p = self.rev(node)
1271 for r in self.revs(start=p + 1):
1278 for r in self.revs(start=p + 1):
1272 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1279 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1273 if prevs:
1280 if prevs:
1274 for pr in prevs:
1281 for pr in prevs:
1275 if pr == p:
1282 if pr == p:
1276 c.append(self.node(r))
1283 c.append(self.node(r))
1277 elif p == nullrev:
1284 elif p == nullrev:
1278 c.append(self.node(r))
1285 c.append(self.node(r))
1279 return c
1286 return c
1280
1287
1281 def commonancestorsheads(self, a, b):
1288 def commonancestorsheads(self, a, b):
1282 """calculate all the heads of the common ancestors of nodes a and b"""
1289 """calculate all the heads of the common ancestors of nodes a and b"""
1283 a, b = self.rev(a), self.rev(b)
1290 a, b = self.rev(a), self.rev(b)
1284 ancs = self._commonancestorsheads(a, b)
1291 ancs = self._commonancestorsheads(a, b)
1285 return pycompat.maplist(self.node, ancs)
1292 return pycompat.maplist(self.node, ancs)
1286
1293
1287 def _commonancestorsheads(self, *revs):
1294 def _commonancestorsheads(self, *revs):
1288 """calculate all the heads of the common ancestors of revs"""
1295 """calculate all the heads of the common ancestors of revs"""
1289 try:
1296 try:
1290 ancs = self.index.commonancestorsheads(*revs)
1297 ancs = self.index.commonancestorsheads(*revs)
1291 except (AttributeError, OverflowError): # C implementation failed
1298 except (AttributeError, OverflowError): # C implementation failed
1292 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1299 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1293 return ancs
1300 return ancs
1294
1301
1295 def isancestor(self, a, b):
1302 def isancestor(self, a, b):
1296 """return True if node a is an ancestor of node b
1303 """return True if node a is an ancestor of node b
1297
1304
1298 A revision is considered an ancestor of itself."""
1305 A revision is considered an ancestor of itself."""
1299 a, b = self.rev(a), self.rev(b)
1306 a, b = self.rev(a), self.rev(b)
1300 return self.isancestorrev(a, b)
1307 return self.isancestorrev(a, b)
1301
1308
1302 def isancestorrev(self, a, b):
1309 def isancestorrev(self, a, b):
1303 """return True if revision a is an ancestor of revision b
1310 """return True if revision a is an ancestor of revision b
1304
1311
1305 A revision is considered an ancestor of itself.
1312 A revision is considered an ancestor of itself.
1306
1313
1307 The implementation of this is trivial but the use of
1314 The implementation of this is trivial but the use of
1308 reachableroots is not."""
1315 reachableroots is not."""
1309 if a == nullrev:
1316 if a == nullrev:
1310 return True
1317 return True
1311 elif a == b:
1318 elif a == b:
1312 return True
1319 return True
1313 elif a > b:
1320 elif a > b:
1314 return False
1321 return False
1315 return bool(self.reachableroots(a, [b], [a], includepath=False))
1322 return bool(self.reachableroots(a, [b], [a], includepath=False))
1316
1323
1317 def reachableroots(self, minroot, heads, roots, includepath=False):
1324 def reachableroots(self, minroot, heads, roots, includepath=False):
1318 """return (heads(::(<roots> and <roots>::<heads>)))
1325 """return (heads(::(<roots> and <roots>::<heads>)))
1319
1326
1320 If includepath is True, return (<roots>::<heads>)."""
1327 If includepath is True, return (<roots>::<heads>)."""
1321 try:
1328 try:
1322 return self.index.reachableroots2(
1329 return self.index.reachableroots2(
1323 minroot, heads, roots, includepath
1330 minroot, heads, roots, includepath
1324 )
1331 )
1325 except AttributeError:
1332 except AttributeError:
1326 return dagop._reachablerootspure(
1333 return dagop._reachablerootspure(
1327 self.parentrevs, minroot, roots, heads, includepath
1334 self.parentrevs, minroot, roots, heads, includepath
1328 )
1335 )
1329
1336
1330 def ancestor(self, a, b):
1337 def ancestor(self, a, b):
1331 """calculate the "best" common ancestor of nodes a and b"""
1338 """calculate the "best" common ancestor of nodes a and b"""
1332
1339
1333 a, b = self.rev(a), self.rev(b)
1340 a, b = self.rev(a), self.rev(b)
1334 try:
1341 try:
1335 ancs = self.index.ancestors(a, b)
1342 ancs = self.index.ancestors(a, b)
1336 except (AttributeError, OverflowError):
1343 except (AttributeError, OverflowError):
1337 ancs = ancestor.ancestors(self.parentrevs, a, b)
1344 ancs = ancestor.ancestors(self.parentrevs, a, b)
1338 if ancs:
1345 if ancs:
1339 # choose a consistent winner when there's a tie
1346 # choose a consistent winner when there's a tie
1340 return min(map(self.node, ancs))
1347 return min(map(self.node, ancs))
1341 return self.nullid
1348 return self.nullid
1342
1349
1343 def _match(self, id):
1350 def _match(self, id):
1344 if isinstance(id, int):
1351 if isinstance(id, int):
1345 # rev
1352 # rev
1346 return self.node(id)
1353 return self.node(id)
1347 if len(id) == self.nodeconstants.nodelen:
1354 if len(id) == self.nodeconstants.nodelen:
1348 # possibly a binary node
1355 # possibly a binary node
1349 # odds of a binary node being all hex in ASCII are 1 in 10**25
1356 # odds of a binary node being all hex in ASCII are 1 in 10**25
1350 try:
1357 try:
1351 node = id
1358 node = id
1352 self.rev(node) # quick search the index
1359 self.rev(node) # quick search the index
1353 return node
1360 return node
1354 except error.LookupError:
1361 except error.LookupError:
1355 pass # may be partial hex id
1362 pass # may be partial hex id
1356 try:
1363 try:
1357 # str(rev)
1364 # str(rev)
1358 rev = int(id)
1365 rev = int(id)
1359 if b"%d" % rev != id:
1366 if b"%d" % rev != id:
1360 raise ValueError
1367 raise ValueError
1361 if rev < 0:
1368 if rev < 0:
1362 rev = len(self) + rev
1369 rev = len(self) + rev
1363 if rev < 0 or rev >= len(self):
1370 if rev < 0 or rev >= len(self):
1364 raise ValueError
1371 raise ValueError
1365 return self.node(rev)
1372 return self.node(rev)
1366 except (ValueError, OverflowError):
1373 except (ValueError, OverflowError):
1367 pass
1374 pass
1368 if len(id) == 2 * self.nodeconstants.nodelen:
1375 if len(id) == 2 * self.nodeconstants.nodelen:
1369 try:
1376 try:
1370 # a full hex nodeid?
1377 # a full hex nodeid?
1371 node = bin(id)
1378 node = bin(id)
1372 self.rev(node)
1379 self.rev(node)
1373 return node
1380 return node
1374 except (TypeError, error.LookupError):
1381 except (TypeError, error.LookupError):
1375 pass
1382 pass
1376
1383
1377 def _partialmatch(self, id):
1384 def _partialmatch(self, id):
1378 # we don't care wdirfilenodeids as they should be always full hash
1385 # we don't care wdirfilenodeids as they should be always full hash
1379 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1386 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1380 try:
1387 try:
1381 partial = self.index.partialmatch(id)
1388 partial = self.index.partialmatch(id)
1382 if partial and self.hasnode(partial):
1389 if partial and self.hasnode(partial):
1383 if maybewdir:
1390 if maybewdir:
1384 # single 'ff...' match in radix tree, ambiguous with wdir
1391 # single 'ff...' match in radix tree, ambiguous with wdir
1385 raise error.RevlogError
1392 raise error.RevlogError
1386 return partial
1393 return partial
1387 if maybewdir:
1394 if maybewdir:
1388 # no 'ff...' match in radix tree, wdir identified
1395 # no 'ff...' match in radix tree, wdir identified
1389 raise error.WdirUnsupported
1396 raise error.WdirUnsupported
1390 return None
1397 return None
1391 except error.RevlogError:
1398 except error.RevlogError:
1392 # parsers.c radix tree lookup gave multiple matches
1399 # parsers.c radix tree lookup gave multiple matches
1393 # fast path: for unfiltered changelog, radix tree is accurate
1400 # fast path: for unfiltered changelog, radix tree is accurate
1394 if not getattr(self, 'filteredrevs', None):
1401 if not getattr(self, 'filteredrevs', None):
1395 raise error.AmbiguousPrefixLookupError(
1402 raise error.AmbiguousPrefixLookupError(
1396 id, self._indexfile, _(b'ambiguous identifier')
1403 id, self._indexfile, _(b'ambiguous identifier')
1397 )
1404 )
1398 # fall through to slow path that filters hidden revisions
1405 # fall through to slow path that filters hidden revisions
1399 except (AttributeError, ValueError):
1406 except (AttributeError, ValueError):
1400 # we are pure python, or key was too short to search radix tree
1407 # we are pure python, or key was too short to search radix tree
1401 pass
1408 pass
1402
1409
1403 if id in self._pcache:
1410 if id in self._pcache:
1404 return self._pcache[id]
1411 return self._pcache[id]
1405
1412
1406 if len(id) <= 40:
1413 if len(id) <= 40:
1407 try:
1414 try:
1408 # hex(node)[:...]
1415 # hex(node)[:...]
1409 l = len(id) // 2 # grab an even number of digits
1416 l = len(id) // 2 # grab an even number of digits
1410 prefix = bin(id[: l * 2])
1417 prefix = bin(id[: l * 2])
1411 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1418 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1412 nl = [
1419 nl = [
1413 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1420 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1414 ]
1421 ]
1415 if self.nodeconstants.nullhex.startswith(id):
1422 if self.nodeconstants.nullhex.startswith(id):
1416 nl.append(self.nullid)
1423 nl.append(self.nullid)
1417 if len(nl) > 0:
1424 if len(nl) > 0:
1418 if len(nl) == 1 and not maybewdir:
1425 if len(nl) == 1 and not maybewdir:
1419 self._pcache[id] = nl[0]
1426 self._pcache[id] = nl[0]
1420 return nl[0]
1427 return nl[0]
1421 raise error.AmbiguousPrefixLookupError(
1428 raise error.AmbiguousPrefixLookupError(
1422 id, self._indexfile, _(b'ambiguous identifier')
1429 id, self._indexfile, _(b'ambiguous identifier')
1423 )
1430 )
1424 if maybewdir:
1431 if maybewdir:
1425 raise error.WdirUnsupported
1432 raise error.WdirUnsupported
1426 return None
1433 return None
1427 except TypeError:
1434 except TypeError:
1428 pass
1435 pass
1429
1436
1430 def lookup(self, id):
1437 def lookup(self, id):
1431 """locate a node based on:
1438 """locate a node based on:
1432 - revision number or str(revision number)
1439 - revision number or str(revision number)
1433 - nodeid or subset of hex nodeid
1440 - nodeid or subset of hex nodeid
1434 """
1441 """
1435 n = self._match(id)
1442 n = self._match(id)
1436 if n is not None:
1443 if n is not None:
1437 return n
1444 return n
1438 n = self._partialmatch(id)
1445 n = self._partialmatch(id)
1439 if n:
1446 if n:
1440 return n
1447 return n
1441
1448
1442 raise error.LookupError(id, self._indexfile, _(b'no match found'))
1449 raise error.LookupError(id, self._indexfile, _(b'no match found'))
1443
1450
1444 def shortest(self, node, minlength=1):
1451 def shortest(self, node, minlength=1):
1445 """Find the shortest unambiguous prefix that matches node."""
1452 """Find the shortest unambiguous prefix that matches node."""
1446
1453
1447 def isvalid(prefix):
1454 def isvalid(prefix):
1448 try:
1455 try:
1449 matchednode = self._partialmatch(prefix)
1456 matchednode = self._partialmatch(prefix)
1450 except error.AmbiguousPrefixLookupError:
1457 except error.AmbiguousPrefixLookupError:
1451 return False
1458 return False
1452 except error.WdirUnsupported:
1459 except error.WdirUnsupported:
1453 # single 'ff...' match
1460 # single 'ff...' match
1454 return True
1461 return True
1455 if matchednode is None:
1462 if matchednode is None:
1456 raise error.LookupError(node, self._indexfile, _(b'no node'))
1463 raise error.LookupError(node, self._indexfile, _(b'no node'))
1457 return True
1464 return True
1458
1465
1459 def maybewdir(prefix):
1466 def maybewdir(prefix):
1460 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1467 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1461
1468
1462 hexnode = hex(node)
1469 hexnode = hex(node)
1463
1470
1464 def disambiguate(hexnode, minlength):
1471 def disambiguate(hexnode, minlength):
1465 """Disambiguate against wdirid."""
1472 """Disambiguate against wdirid."""
1466 for length in range(minlength, len(hexnode) + 1):
1473 for length in range(minlength, len(hexnode) + 1):
1467 prefix = hexnode[:length]
1474 prefix = hexnode[:length]
1468 if not maybewdir(prefix):
1475 if not maybewdir(prefix):
1469 return prefix
1476 return prefix
1470
1477
1471 if not getattr(self, 'filteredrevs', None):
1478 if not getattr(self, 'filteredrevs', None):
1472 try:
1479 try:
1473 length = max(self.index.shortest(node), minlength)
1480 length = max(self.index.shortest(node), minlength)
1474 return disambiguate(hexnode, length)
1481 return disambiguate(hexnode, length)
1475 except error.RevlogError:
1482 except error.RevlogError:
1476 if node != self.nodeconstants.wdirid:
1483 if node != self.nodeconstants.wdirid:
1477 raise error.LookupError(
1484 raise error.LookupError(
1478 node, self._indexfile, _(b'no node')
1485 node, self._indexfile, _(b'no node')
1479 )
1486 )
1480 except AttributeError:
1487 except AttributeError:
1481 # Fall through to pure code
1488 # Fall through to pure code
1482 pass
1489 pass
1483
1490
1484 if node == self.nodeconstants.wdirid:
1491 if node == self.nodeconstants.wdirid:
1485 for length in range(minlength, len(hexnode) + 1):
1492 for length in range(minlength, len(hexnode) + 1):
1486 prefix = hexnode[:length]
1493 prefix = hexnode[:length]
1487 if isvalid(prefix):
1494 if isvalid(prefix):
1488 return prefix
1495 return prefix
1489
1496
1490 for length in range(minlength, len(hexnode) + 1):
1497 for length in range(minlength, len(hexnode) + 1):
1491 prefix = hexnode[:length]
1498 prefix = hexnode[:length]
1492 if isvalid(prefix):
1499 if isvalid(prefix):
1493 return disambiguate(hexnode, length)
1500 return disambiguate(hexnode, length)
1494
1501
1495 def cmp(self, node, text):
1502 def cmp(self, node, text):
1496 """compare text with a given file revision
1503 """compare text with a given file revision
1497
1504
1498 returns True if text is different than what is stored.
1505 returns True if text is different than what is stored.
1499 """
1506 """
1500 p1, p2 = self.parents(node)
1507 p1, p2 = self.parents(node)
1501 return storageutil.hashrevisionsha1(text, p1, p2) != node
1508 return storageutil.hashrevisionsha1(text, p1, p2) != node
1502
1509
1503 def _cachesegment(self, offset, data):
1510 def _cachesegment(self, offset, data):
1504 """Add a segment to the revlog cache.
1511 """Add a segment to the revlog cache.
1505
1512
1506 Accepts an absolute offset and the data that is at that location.
1513 Accepts an absolute offset and the data that is at that location.
1507 """
1514 """
1508 o, d = self._chunkcache
1515 o, d = self._chunkcache
1509 # try to add to existing cache
1516 # try to add to existing cache
1510 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1517 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1511 self._chunkcache = o, d + data
1518 self._chunkcache = o, d + data
1512 else:
1519 else:
1513 self._chunkcache = offset, data
1520 self._chunkcache = offset, data
1514
1521
1515 def _readsegment(self, offset, length, df=None):
1522 def _readsegment(self, offset, length, df=None):
1516 """Load a segment of raw data from the revlog.
1523 """Load a segment of raw data from the revlog.
1517
1524
1518 Accepts an absolute offset, length to read, and an optional existing
1525 Accepts an absolute offset, length to read, and an optional existing
1519 file handle to read from.
1526 file handle to read from.
1520
1527
1521 If an existing file handle is passed, it will be seeked and the
1528 If an existing file handle is passed, it will be seeked and the
1522 original seek position will NOT be restored.
1529 original seek position will NOT be restored.
1523
1530
1524 Returns a str or buffer of raw byte data.
1531 Returns a str or buffer of raw byte data.
1525
1532
1526 Raises if the requested number of bytes could not be read.
1533 Raises if the requested number of bytes could not be read.
1527 """
1534 """
1528 # Cache data both forward and backward around the requested
1535 # Cache data both forward and backward around the requested
1529 # data, in a fixed size window. This helps speed up operations
1536 # data, in a fixed size window. This helps speed up operations
1530 # involving reading the revlog backwards.
1537 # involving reading the revlog backwards.
1531 cachesize = self._chunkcachesize
1538 cachesize = self._chunkcachesize
1532 realoffset = offset & ~(cachesize - 1)
1539 realoffset = offset & ~(cachesize - 1)
1533 reallength = (
1540 reallength = (
1534 (offset + length + cachesize) & ~(cachesize - 1)
1541 (offset + length + cachesize) & ~(cachesize - 1)
1535 ) - realoffset
1542 ) - realoffset
1536 with self._datareadfp(df) as df:
1543 with self._datareadfp(df) as df:
1537 df.seek(realoffset)
1544 df.seek(realoffset)
1538 d = df.read(reallength)
1545 d = df.read(reallength)
1539
1546
1540 self._cachesegment(realoffset, d)
1547 self._cachesegment(realoffset, d)
1541 if offset != realoffset or reallength != length:
1548 if offset != realoffset or reallength != length:
1542 startoffset = offset - realoffset
1549 startoffset = offset - realoffset
1543 if len(d) - startoffset < length:
1550 if len(d) - startoffset < length:
1544 raise error.RevlogError(
1551 raise error.RevlogError(
1545 _(
1552 _(
1546 b'partial read of revlog %s; expected %d bytes from '
1553 b'partial read of revlog %s; expected %d bytes from '
1547 b'offset %d, got %d'
1554 b'offset %d, got %d'
1548 )
1555 )
1549 % (
1556 % (
1550 self._indexfile if self._inline else self._datafile,
1557 self._indexfile if self._inline else self._datafile,
1551 length,
1558 length,
1552 realoffset,
1559 realoffset,
1553 len(d) - startoffset,
1560 len(d) - startoffset,
1554 )
1561 )
1555 )
1562 )
1556
1563
1557 return util.buffer(d, startoffset, length)
1564 return util.buffer(d, startoffset, length)
1558
1565
1559 if len(d) < length:
1566 if len(d) < length:
1560 raise error.RevlogError(
1567 raise error.RevlogError(
1561 _(
1568 _(
1562 b'partial read of revlog %s; expected %d bytes from offset '
1569 b'partial read of revlog %s; expected %d bytes from offset '
1563 b'%d, got %d'
1570 b'%d, got %d'
1564 )
1571 )
1565 % (
1572 % (
1566 self._indexfile if self._inline else self._datafile,
1573 self._indexfile if self._inline else self._datafile,
1567 length,
1574 length,
1568 offset,
1575 offset,
1569 len(d),
1576 len(d),
1570 )
1577 )
1571 )
1578 )
1572
1579
1573 return d
1580 return d
1574
1581
1575 def _getsegment(self, offset, length, df=None):
1582 def _getsegment(self, offset, length, df=None):
1576 """Obtain a segment of raw data from the revlog.
1583 """Obtain a segment of raw data from the revlog.
1577
1584
1578 Accepts an absolute offset, length of bytes to obtain, and an
1585 Accepts an absolute offset, length of bytes to obtain, and an
1579 optional file handle to the already-opened revlog. If the file
1586 optional file handle to the already-opened revlog. If the file
1580 handle is used, it's original seek position will not be preserved.
1587 handle is used, it's original seek position will not be preserved.
1581
1588
1582 Requests for data may be returned from a cache.
1589 Requests for data may be returned from a cache.
1583
1590
1584 Returns a str or a buffer instance of raw byte data.
1591 Returns a str or a buffer instance of raw byte data.
1585 """
1592 """
1586 o, d = self._chunkcache
1593 o, d = self._chunkcache
1587 l = len(d)
1594 l = len(d)
1588
1595
1589 # is it in the cache?
1596 # is it in the cache?
1590 cachestart = offset - o
1597 cachestart = offset - o
1591 cacheend = cachestart + length
1598 cacheend = cachestart + length
1592 if cachestart >= 0 and cacheend <= l:
1599 if cachestart >= 0 and cacheend <= l:
1593 if cachestart == 0 and cacheend == l:
1600 if cachestart == 0 and cacheend == l:
1594 return d # avoid a copy
1601 return d # avoid a copy
1595 return util.buffer(d, cachestart, cacheend - cachestart)
1602 return util.buffer(d, cachestart, cacheend - cachestart)
1596
1603
1597 return self._readsegment(offset, length, df=df)
1604 return self._readsegment(offset, length, df=df)
1598
1605
1599 def _getsegmentforrevs(self, startrev, endrev, df=None):
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1600 """Obtain a segment of raw data corresponding to a range of revisions.
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1601
1608
1602 Accepts the start and end revisions and an optional already-open
1609 Accepts the start and end revisions and an optional already-open
1603 file handle to be used for reading. If the file handle is read, its
1610 file handle to be used for reading. If the file handle is read, its
1604 seek position will not be preserved.
1611 seek position will not be preserved.
1605
1612
1606 Requests for data may be satisfied by a cache.
1613 Requests for data may be satisfied by a cache.
1607
1614
1608 Returns a 2-tuple of (offset, data) for the requested range of
1615 Returns a 2-tuple of (offset, data) for the requested range of
1609 revisions. Offset is the integer offset from the beginning of the
1616 revisions. Offset is the integer offset from the beginning of the
1610 revlog and data is a str or buffer of the raw byte data.
1617 revlog and data is a str or buffer of the raw byte data.
1611
1618
1612 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1613 to determine where each revision's data begins and ends.
1620 to determine where each revision's data begins and ends.
1614 """
1621 """
1615 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1616 # (functions are expensive).
1623 # (functions are expensive).
1617 index = self.index
1624 index = self.index
1618 istart = index[startrev]
1625 istart = index[startrev]
1619 start = int(istart[0] >> 16)
1626 start = int(istart[0] >> 16)
1620 if startrev == endrev:
1627 if startrev == endrev:
1621 end = start + istart[1]
1628 end = start + istart[1]
1622 else:
1629 else:
1623 iend = index[endrev]
1630 iend = index[endrev]
1624 end = int(iend[0] >> 16) + iend[1]
1631 end = int(iend[0] >> 16) + iend[1]
1625
1632
1626 if self._inline:
1633 if self._inline:
1627 start += (startrev + 1) * self.index.entry_size
1634 start += (startrev + 1) * self.index.entry_size
1628 end += (endrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1629 length = end - start
1636 length = end - start
1630
1637
1631 return start, self._getsegment(start, length, df=df)
1638 return start, self._getsegment(start, length, df=df)
1632
1639
1633 def _chunk(self, rev, df=None):
1640 def _chunk(self, rev, df=None):
1634 """Obtain a single decompressed chunk for a revision.
1641 """Obtain a single decompressed chunk for a revision.
1635
1642
1636 Accepts an integer revision and an optional already-open file handle
1643 Accepts an integer revision and an optional already-open file handle
1637 to be used for reading. If used, the seek position of the file will not
1644 to be used for reading. If used, the seek position of the file will not
1638 be preserved.
1645 be preserved.
1639
1646
1640 Returns a str holding uncompressed data for the requested revision.
1647 Returns a str holding uncompressed data for the requested revision.
1641 """
1648 """
1642 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1649 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1643
1650
1644 def _chunks(self, revs, df=None, targetsize=None):
1651 def _chunks(self, revs, df=None, targetsize=None):
1645 """Obtain decompressed chunks for the specified revisions.
1652 """Obtain decompressed chunks for the specified revisions.
1646
1653
1647 Accepts an iterable of numeric revisions that are assumed to be in
1654 Accepts an iterable of numeric revisions that are assumed to be in
1648 ascending order. Also accepts an optional already-open file handle
1655 ascending order. Also accepts an optional already-open file handle
1649 to be used for reading. If used, the seek position of the file will
1656 to be used for reading. If used, the seek position of the file will
1650 not be preserved.
1657 not be preserved.
1651
1658
1652 This function is similar to calling ``self._chunk()`` multiple times,
1659 This function is similar to calling ``self._chunk()`` multiple times,
1653 but is faster.
1660 but is faster.
1654
1661
1655 Returns a list with decompressed data for each requested revision.
1662 Returns a list with decompressed data for each requested revision.
1656 """
1663 """
1657 if not revs:
1664 if not revs:
1658 return []
1665 return []
1659 start = self.start
1666 start = self.start
1660 length = self.length
1667 length = self.length
1661 inline = self._inline
1668 inline = self._inline
1662 iosize = self.index.entry_size
1669 iosize = self.index.entry_size
1663 buffer = util.buffer
1670 buffer = util.buffer
1664
1671
1665 l = []
1672 l = []
1666 ladd = l.append
1673 ladd = l.append
1667
1674
1668 if not self._withsparseread:
1675 if not self._withsparseread:
1669 slicedchunks = (revs,)
1676 slicedchunks = (revs,)
1670 else:
1677 else:
1671 slicedchunks = deltautil.slicechunk(
1678 slicedchunks = deltautil.slicechunk(
1672 self, revs, targetsize=targetsize
1679 self, revs, targetsize=targetsize
1673 )
1680 )
1674
1681
1675 for revschunk in slicedchunks:
1682 for revschunk in slicedchunks:
1676 firstrev = revschunk[0]
1683 firstrev = revschunk[0]
1677 # Skip trailing revisions with empty diff
1684 # Skip trailing revisions with empty diff
1678 for lastrev in revschunk[::-1]:
1685 for lastrev in revschunk[::-1]:
1679 if length(lastrev) != 0:
1686 if length(lastrev) != 0:
1680 break
1687 break
1681
1688
1682 try:
1689 try:
1683 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1690 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1684 except OverflowError:
1691 except OverflowError:
1685 # issue4215 - we can't cache a run of chunks greater than
1692 # issue4215 - we can't cache a run of chunks greater than
1686 # 2G on Windows
1693 # 2G on Windows
1687 return [self._chunk(rev, df=df) for rev in revschunk]
1694 return [self._chunk(rev, df=df) for rev in revschunk]
1688
1695
1689 decomp = self.decompress
1696 decomp = self.decompress
1690 for rev in revschunk:
1697 for rev in revschunk:
1691 chunkstart = start(rev)
1698 chunkstart = start(rev)
1692 if inline:
1699 if inline:
1693 chunkstart += (rev + 1) * iosize
1700 chunkstart += (rev + 1) * iosize
1694 chunklength = length(rev)
1701 chunklength = length(rev)
1695 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1702 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1696
1703
1697 return l
1704 return l
1698
1705
1699 def _chunkclear(self):
1706 def _chunkclear(self):
1700 """Clear the raw chunk cache."""
1707 """Clear the raw chunk cache."""
1701 self._chunkcache = (0, b'')
1708 self._chunkcache = (0, b'')
1702
1709
1703 def deltaparent(self, rev):
1710 def deltaparent(self, rev):
1704 """return deltaparent of the given revision"""
1711 """return deltaparent of the given revision"""
1705 base = self.index[rev][3]
1712 base = self.index[rev][3]
1706 if base == rev:
1713 if base == rev:
1707 return nullrev
1714 return nullrev
1708 elif self._generaldelta:
1715 elif self._generaldelta:
1709 return base
1716 return base
1710 else:
1717 else:
1711 return rev - 1
1718 return rev - 1
1712
1719
1713 def issnapshot(self, rev):
1720 def issnapshot(self, rev):
1714 """tells whether rev is a snapshot"""
1721 """tells whether rev is a snapshot"""
1715 if not self._sparserevlog:
1722 if not self._sparserevlog:
1716 return self.deltaparent(rev) == nullrev
1723 return self.deltaparent(rev) == nullrev
1717 elif util.safehasattr(self.index, b'issnapshot'):
1724 elif util.safehasattr(self.index, b'issnapshot'):
1718 # directly assign the method to cache the testing and access
1725 # directly assign the method to cache the testing and access
1719 self.issnapshot = self.index.issnapshot
1726 self.issnapshot = self.index.issnapshot
1720 return self.issnapshot(rev)
1727 return self.issnapshot(rev)
1721 if rev == nullrev:
1728 if rev == nullrev:
1722 return True
1729 return True
1723 entry = self.index[rev]
1730 entry = self.index[rev]
1724 base = entry[3]
1731 base = entry[3]
1725 if base == rev:
1732 if base == rev:
1726 return True
1733 return True
1727 if base == nullrev:
1734 if base == nullrev:
1728 return True
1735 return True
1729 p1 = entry[5]
1736 p1 = entry[5]
1730 p2 = entry[6]
1737 p2 = entry[6]
1731 if base == p1 or base == p2:
1738 if base == p1 or base == p2:
1732 return False
1739 return False
1733 return self.issnapshot(base)
1740 return self.issnapshot(base)
1734
1741
1735 def snapshotdepth(self, rev):
1742 def snapshotdepth(self, rev):
1736 """number of snapshot in the chain before this one"""
1743 """number of snapshot in the chain before this one"""
1737 if not self.issnapshot(rev):
1744 if not self.issnapshot(rev):
1738 raise error.ProgrammingError(b'revision %d not a snapshot')
1745 raise error.ProgrammingError(b'revision %d not a snapshot')
1739 return len(self._deltachain(rev)[0]) - 1
1746 return len(self._deltachain(rev)[0]) - 1
1740
1747
1741 def revdiff(self, rev1, rev2):
1748 def revdiff(self, rev1, rev2):
1742 """return or calculate a delta between two revisions
1749 """return or calculate a delta between two revisions
1743
1750
1744 The delta calculated is in binary form and is intended to be written to
1751 The delta calculated is in binary form and is intended to be written to
1745 revlog data directly. So this function needs raw revision data.
1752 revlog data directly. So this function needs raw revision data.
1746 """
1753 """
1747 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1754 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1748 return bytes(self._chunk(rev2))
1755 return bytes(self._chunk(rev2))
1749
1756
1750 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1757 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1751
1758
1752 def _processflags(self, text, flags, operation, raw=False):
1759 def _processflags(self, text, flags, operation, raw=False):
1753 """deprecated entry point to access flag processors"""
1760 """deprecated entry point to access flag processors"""
1754 msg = b'_processflag(...) use the specialized variant'
1761 msg = b'_processflag(...) use the specialized variant'
1755 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1762 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1756 if raw:
1763 if raw:
1757 return text, flagutil.processflagsraw(self, text, flags)
1764 return text, flagutil.processflagsraw(self, text, flags)
1758 elif operation == b'read':
1765 elif operation == b'read':
1759 return flagutil.processflagsread(self, text, flags)
1766 return flagutil.processflagsread(self, text, flags)
1760 else: # write operation
1767 else: # write operation
1761 return flagutil.processflagswrite(self, text, flags)
1768 return flagutil.processflagswrite(self, text, flags)
1762
1769
1763 def revision(self, nodeorrev, _df=None, raw=False):
1770 def revision(self, nodeorrev, _df=None, raw=False):
1764 """return an uncompressed revision of a given node or revision
1771 """return an uncompressed revision of a given node or revision
1765 number.
1772 number.
1766
1773
1767 _df - an existing file handle to read from. (internal-only)
1774 _df - an existing file handle to read from. (internal-only)
1768 raw - an optional argument specifying if the revision data is to be
1775 raw - an optional argument specifying if the revision data is to be
1769 treated as raw data when applying flag transforms. 'raw' should be set
1776 treated as raw data when applying flag transforms. 'raw' should be set
1770 to True when generating changegroups or in debug commands.
1777 to True when generating changegroups or in debug commands.
1771 """
1778 """
1772 if raw:
1779 if raw:
1773 msg = (
1780 msg = (
1774 b'revlog.revision(..., raw=True) is deprecated, '
1781 b'revlog.revision(..., raw=True) is deprecated, '
1775 b'use revlog.rawdata(...)'
1782 b'use revlog.rawdata(...)'
1776 )
1783 )
1777 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1784 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1778 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1785 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1779
1786
1780 def sidedata(self, nodeorrev, _df=None):
1787 def sidedata(self, nodeorrev, _df=None):
1781 """a map of extra data related to the changeset but not part of the hash
1788 """a map of extra data related to the changeset but not part of the hash
1782
1789
1783 This function currently return a dictionary. However, more advanced
1790 This function currently return a dictionary. However, more advanced
1784 mapping object will likely be used in the future for a more
1791 mapping object will likely be used in the future for a more
1785 efficient/lazy code.
1792 efficient/lazy code.
1786 """
1793 """
1787 return self._revisiondata(nodeorrev, _df)[1]
1794 return self._revisiondata(nodeorrev, _df)[1]
1788
1795
1789 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1796 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1790 # deal with <nodeorrev> argument type
1797 # deal with <nodeorrev> argument type
1791 if isinstance(nodeorrev, int):
1798 if isinstance(nodeorrev, int):
1792 rev = nodeorrev
1799 rev = nodeorrev
1793 node = self.node(rev)
1800 node = self.node(rev)
1794 else:
1801 else:
1795 node = nodeorrev
1802 node = nodeorrev
1796 rev = None
1803 rev = None
1797
1804
1798 # fast path the special `nullid` rev
1805 # fast path the special `nullid` rev
1799 if node == self.nullid:
1806 if node == self.nullid:
1800 return b"", {}
1807 return b"", {}
1801
1808
1802 # ``rawtext`` is the text as stored inside the revlog. Might be the
1809 # ``rawtext`` is the text as stored inside the revlog. Might be the
1803 # revision or might need to be processed to retrieve the revision.
1810 # revision or might need to be processed to retrieve the revision.
1804 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1811 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1805
1812
1806 if self.hassidedata:
1813 if self.hassidedata:
1807 if rev is None:
1814 if rev is None:
1808 rev = self.rev(node)
1815 rev = self.rev(node)
1809 sidedata = self._sidedata(rev)
1816 sidedata = self._sidedata(rev)
1810 else:
1817 else:
1811 sidedata = {}
1818 sidedata = {}
1812
1819
1813 if raw and validated:
1820 if raw and validated:
1814 # if we don't want to process the raw text and that raw
1821 # if we don't want to process the raw text and that raw
1815 # text is cached, we can exit early.
1822 # text is cached, we can exit early.
1816 return rawtext, sidedata
1823 return rawtext, sidedata
1817 if rev is None:
1824 if rev is None:
1818 rev = self.rev(node)
1825 rev = self.rev(node)
1819 # the revlog's flag for this revision
1826 # the revlog's flag for this revision
1820 # (usually alter its state or content)
1827 # (usually alter its state or content)
1821 flags = self.flags(rev)
1828 flags = self.flags(rev)
1822
1829
1823 if validated and flags == REVIDX_DEFAULT_FLAGS:
1830 if validated and flags == REVIDX_DEFAULT_FLAGS:
1824 # no extra flags set, no flag processor runs, text = rawtext
1831 # no extra flags set, no flag processor runs, text = rawtext
1825 return rawtext, sidedata
1832 return rawtext, sidedata
1826
1833
1827 if raw:
1834 if raw:
1828 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1835 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1829 text = rawtext
1836 text = rawtext
1830 else:
1837 else:
1831 r = flagutil.processflagsread(self, rawtext, flags)
1838 r = flagutil.processflagsread(self, rawtext, flags)
1832 text, validatehash = r
1839 text, validatehash = r
1833 if validatehash:
1840 if validatehash:
1834 self.checkhash(text, node, rev=rev)
1841 self.checkhash(text, node, rev=rev)
1835 if not validated:
1842 if not validated:
1836 self._revisioncache = (node, rev, rawtext)
1843 self._revisioncache = (node, rev, rawtext)
1837
1844
1838 return text, sidedata
1845 return text, sidedata
1839
1846
1840 def _rawtext(self, node, rev, _df=None):
1847 def _rawtext(self, node, rev, _df=None):
1841 """return the possibly unvalidated rawtext for a revision
1848 """return the possibly unvalidated rawtext for a revision
1842
1849
1843 returns (rev, rawtext, validated)
1850 returns (rev, rawtext, validated)
1844 """
1851 """
1845
1852
1846 # revision in the cache (could be useful to apply delta)
1853 # revision in the cache (could be useful to apply delta)
1847 cachedrev = None
1854 cachedrev = None
1848 # An intermediate text to apply deltas to
1855 # An intermediate text to apply deltas to
1849 basetext = None
1856 basetext = None
1850
1857
1851 # Check if we have the entry in cache
1858 # Check if we have the entry in cache
1852 # The cache entry looks like (node, rev, rawtext)
1859 # The cache entry looks like (node, rev, rawtext)
1853 if self._revisioncache:
1860 if self._revisioncache:
1854 if self._revisioncache[0] == node:
1861 if self._revisioncache[0] == node:
1855 return (rev, self._revisioncache[2], True)
1862 return (rev, self._revisioncache[2], True)
1856 cachedrev = self._revisioncache[1]
1863 cachedrev = self._revisioncache[1]
1857
1864
1858 if rev is None:
1865 if rev is None:
1859 rev = self.rev(node)
1866 rev = self.rev(node)
1860
1867
1861 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1868 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1862 if stopped:
1869 if stopped:
1863 basetext = self._revisioncache[2]
1870 basetext = self._revisioncache[2]
1864
1871
1865 # drop cache to save memory, the caller is expected to
1872 # drop cache to save memory, the caller is expected to
1866 # update self._revisioncache after validating the text
1873 # update self._revisioncache after validating the text
1867 self._revisioncache = None
1874 self._revisioncache = None
1868
1875
1869 targetsize = None
1876 targetsize = None
1870 rawsize = self.index[rev][2]
1877 rawsize = self.index[rev][2]
1871 if 0 <= rawsize:
1878 if 0 <= rawsize:
1872 targetsize = 4 * rawsize
1879 targetsize = 4 * rawsize
1873
1880
1874 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1881 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1875 if basetext is None:
1882 if basetext is None:
1876 basetext = bytes(bins[0])
1883 basetext = bytes(bins[0])
1877 bins = bins[1:]
1884 bins = bins[1:]
1878
1885
1879 rawtext = mdiff.patches(basetext, bins)
1886 rawtext = mdiff.patches(basetext, bins)
1880 del basetext # let us have a chance to free memory early
1887 del basetext # let us have a chance to free memory early
1881 return (rev, rawtext, False)
1888 return (rev, rawtext, False)
1882
1889
1883 def _sidedata(self, rev):
1890 def _sidedata(self, rev):
1884 """Return the sidedata for a given revision number."""
1891 """Return the sidedata for a given revision number."""
1885 index_entry = self.index[rev]
1892 index_entry = self.index[rev]
1886 sidedata_offset = index_entry[8]
1893 sidedata_offset = index_entry[8]
1887 sidedata_size = index_entry[9]
1894 sidedata_size = index_entry[9]
1888
1895
1889 if self._inline:
1896 if self._inline:
1890 sidedata_offset += self.index.entry_size * (1 + rev)
1897 sidedata_offset += self.index.entry_size * (1 + rev)
1891 if sidedata_size == 0:
1898 if sidedata_size == 0:
1892 return {}
1899 return {}
1893
1900
1894 segment = self._getsegment(sidedata_offset, sidedata_size)
1901 segment = self._getsegment(sidedata_offset, sidedata_size)
1895 sidedata = sidedatautil.deserialize_sidedata(segment)
1902 sidedata = sidedatautil.deserialize_sidedata(segment)
1896 return sidedata
1903 return sidedata
1897
1904
1898 def rawdata(self, nodeorrev, _df=None):
1905 def rawdata(self, nodeorrev, _df=None):
1899 """return an uncompressed raw data of a given node or revision number.
1906 """return an uncompressed raw data of a given node or revision number.
1900
1907
1901 _df - an existing file handle to read from. (internal-only)
1908 _df - an existing file handle to read from. (internal-only)
1902 """
1909 """
1903 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1910 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1904
1911
1905 def hash(self, text, p1, p2):
1912 def hash(self, text, p1, p2):
1906 """Compute a node hash.
1913 """Compute a node hash.
1907
1914
1908 Available as a function so that subclasses can replace the hash
1915 Available as a function so that subclasses can replace the hash
1909 as needed.
1916 as needed.
1910 """
1917 """
1911 return storageutil.hashrevisionsha1(text, p1, p2)
1918 return storageutil.hashrevisionsha1(text, p1, p2)
1912
1919
1913 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1920 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1914 """Check node hash integrity.
1921 """Check node hash integrity.
1915
1922
1916 Available as a function so that subclasses can extend hash mismatch
1923 Available as a function so that subclasses can extend hash mismatch
1917 behaviors as needed.
1924 behaviors as needed.
1918 """
1925 """
1919 try:
1926 try:
1920 if p1 is None and p2 is None:
1927 if p1 is None and p2 is None:
1921 p1, p2 = self.parents(node)
1928 p1, p2 = self.parents(node)
1922 if node != self.hash(text, p1, p2):
1929 if node != self.hash(text, p1, p2):
1923 # Clear the revision cache on hash failure. The revision cache
1930 # Clear the revision cache on hash failure. The revision cache
1924 # only stores the raw revision and clearing the cache does have
1931 # only stores the raw revision and clearing the cache does have
1925 # the side-effect that we won't have a cache hit when the raw
1932 # the side-effect that we won't have a cache hit when the raw
1926 # revision data is accessed. But this case should be rare and
1933 # revision data is accessed. But this case should be rare and
1927 # it is extra work to teach the cache about the hash
1934 # it is extra work to teach the cache about the hash
1928 # verification state.
1935 # verification state.
1929 if self._revisioncache and self._revisioncache[0] == node:
1936 if self._revisioncache and self._revisioncache[0] == node:
1930 self._revisioncache = None
1937 self._revisioncache = None
1931
1938
1932 revornode = rev
1939 revornode = rev
1933 if revornode is None:
1940 if revornode is None:
1934 revornode = templatefilters.short(hex(node))
1941 revornode = templatefilters.short(hex(node))
1935 raise error.RevlogError(
1942 raise error.RevlogError(
1936 _(b"integrity check failed on %s:%s")
1943 _(b"integrity check failed on %s:%s")
1937 % (self._indexfile, pycompat.bytestr(revornode))
1944 % (self._indexfile, pycompat.bytestr(revornode))
1938 )
1945 )
1939 except error.RevlogError:
1946 except error.RevlogError:
1940 if self._censorable and storageutil.iscensoredtext(text):
1947 if self._censorable and storageutil.iscensoredtext(text):
1941 raise error.CensoredNodeError(self._indexfile, node, text)
1948 raise error.CensoredNodeError(self._indexfile, node, text)
1942 raise
1949 raise
1943
1950
1944 def _enforceinlinesize(self, tr, fp=None):
1951 def _enforceinlinesize(self, tr, fp=None):
1945 """Check if the revlog is too big for inline and convert if so.
1952 """Check if the revlog is too big for inline and convert if so.
1946
1953
1947 This should be called after revisions are added to the revlog. If the
1954 This should be called after revisions are added to the revlog. If the
1948 revlog has grown too large to be an inline revlog, it will convert it
1955 revlog has grown too large to be an inline revlog, it will convert it
1949 to use multiple index and data files.
1956 to use multiple index and data files.
1950 """
1957 """
1951 tiprev = len(self) - 1
1958 tiprev = len(self) - 1
1952 if (
1959 if (
1953 not self._inline
1960 not self._inline
1954 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1961 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1955 ):
1962 ):
1956 return
1963 return
1957
1964
1958 troffset = tr.findoffset(self._indexfile)
1965 troffset = tr.findoffset(self._indexfile)
1959 if troffset is None:
1966 if troffset is None:
1960 raise error.RevlogError(
1967 raise error.RevlogError(
1961 _(b"%s not found in the transaction") % self._indexfile
1968 _(b"%s not found in the transaction") % self._indexfile
1962 )
1969 )
1963 trindex = 0
1970 trindex = 0
1964 tr.add(self._datafile, 0)
1971 tr.add(self._datafile, 0)
1965
1972
1966 if fp:
1973 if fp:
1967 fp.flush()
1974 fp.flush()
1968 fp.close()
1975 fp.close()
1969 # We can't use the cached file handle after close(). So prevent
1976 # We can't use the cached file handle after close(). So prevent
1970 # its usage.
1977 # its usage.
1971 self._writinghandles = None
1978 self._writinghandles = None
1972
1979
1973 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1980 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1974 for r in self:
1981 for r in self:
1975 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1982 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1976 if troffset <= self.start(r):
1983 if troffset <= self.start(r):
1977 trindex = r
1984 trindex = r
1978
1985
1979 with self._indexfp(b'w') as fp:
1986 with self._indexfp(b'w') as fp:
1980 self._format_flags &= ~FLAG_INLINE_DATA
1987 self._format_flags &= ~FLAG_INLINE_DATA
1981 self._inline = False
1988 self._inline = False
1982 for i in self:
1989 for i in self:
1983 e = self.index.entry_binary(i)
1990 e = self.index.entry_binary(i)
1984 if i == 0:
1991 if i == 0:
1985 header = self._format_flags | self._format_version
1992 header = self._format_flags | self._format_version
1986 header = self.index.pack_header(header)
1993 header = self.index.pack_header(header)
1987 e = header + e
1994 e = header + e
1988 fp.write(e)
1995 fp.write(e)
1989
1996
1990 # the temp file replace the real index when we exit the context
1997 # the temp file replace the real index when we exit the context
1991 # manager
1998 # manager
1992
1999
1993 tr.replace(self._indexfile, trindex * self.index.entry_size)
2000 tr.replace(self._indexfile, trindex * self.index.entry_size)
1994 nodemaputil.setup_persistent_nodemap(tr, self)
2001 nodemaputil.setup_persistent_nodemap(tr, self)
1995 self._chunkclear()
2002 self._chunkclear()
1996
2003
1997 def _nodeduplicatecallback(self, transaction, node):
2004 def _nodeduplicatecallback(self, transaction, node):
1998 """called when trying to add a node already stored."""
2005 """called when trying to add a node already stored."""
1999
2006
2000 def addrevision(
2007 def addrevision(
2001 self,
2008 self,
2002 text,
2009 text,
2003 transaction,
2010 transaction,
2004 link,
2011 link,
2005 p1,
2012 p1,
2006 p2,
2013 p2,
2007 cachedelta=None,
2014 cachedelta=None,
2008 node=None,
2015 node=None,
2009 flags=REVIDX_DEFAULT_FLAGS,
2016 flags=REVIDX_DEFAULT_FLAGS,
2010 deltacomputer=None,
2017 deltacomputer=None,
2011 sidedata=None,
2018 sidedata=None,
2012 ):
2019 ):
2013 """add a revision to the log
2020 """add a revision to the log
2014
2021
2015 text - the revision data to add
2022 text - the revision data to add
2016 transaction - the transaction object used for rollback
2023 transaction - the transaction object used for rollback
2017 link - the linkrev data to add
2024 link - the linkrev data to add
2018 p1, p2 - the parent nodeids of the revision
2025 p1, p2 - the parent nodeids of the revision
2019 cachedelta - an optional precomputed delta
2026 cachedelta - an optional precomputed delta
2020 node - nodeid of revision; typically node is not specified, and it is
2027 node - nodeid of revision; typically node is not specified, and it is
2021 computed by default as hash(text, p1, p2), however subclasses might
2028 computed by default as hash(text, p1, p2), however subclasses might
2022 use different hashing method (and override checkhash() in such case)
2029 use different hashing method (and override checkhash() in such case)
2023 flags - the known flags to set on the revision
2030 flags - the known flags to set on the revision
2024 deltacomputer - an optional deltacomputer instance shared between
2031 deltacomputer - an optional deltacomputer instance shared between
2025 multiple calls
2032 multiple calls
2026 """
2033 """
2027 if link == nullrev:
2034 if link == nullrev:
2028 raise error.RevlogError(
2035 raise error.RevlogError(
2029 _(b"attempted to add linkrev -1 to %s") % self._indexfile
2036 _(b"attempted to add linkrev -1 to %s") % self._indexfile
2030 )
2037 )
2031
2038
2032 if sidedata is None:
2039 if sidedata is None:
2033 sidedata = {}
2040 sidedata = {}
2034 elif sidedata and not self.hassidedata:
2041 elif sidedata and not self.hassidedata:
2035 raise error.ProgrammingError(
2042 raise error.ProgrammingError(
2036 _(b"trying to add sidedata to a revlog who don't support them")
2043 _(b"trying to add sidedata to a revlog who don't support them")
2037 )
2044 )
2038
2045
2039 if flags:
2046 if flags:
2040 node = node or self.hash(text, p1, p2)
2047 node = node or self.hash(text, p1, p2)
2041
2048
2042 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2049 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2043
2050
2044 # If the flag processor modifies the revision data, ignore any provided
2051 # If the flag processor modifies the revision data, ignore any provided
2045 # cachedelta.
2052 # cachedelta.
2046 if rawtext != text:
2053 if rawtext != text:
2047 cachedelta = None
2054 cachedelta = None
2048
2055
2049 if len(rawtext) > _maxentrysize:
2056 if len(rawtext) > _maxentrysize:
2050 raise error.RevlogError(
2057 raise error.RevlogError(
2051 _(
2058 _(
2052 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2059 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2053 )
2060 )
2054 % (self._indexfile, len(rawtext))
2061 % (self._indexfile, len(rawtext))
2055 )
2062 )
2056
2063
2057 node = node or self.hash(rawtext, p1, p2)
2064 node = node or self.hash(rawtext, p1, p2)
2058 rev = self.index.get_rev(node)
2065 rev = self.index.get_rev(node)
2059 if rev is not None:
2066 if rev is not None:
2060 return rev
2067 return rev
2061
2068
2062 if validatehash:
2069 if validatehash:
2063 self.checkhash(rawtext, node, p1=p1, p2=p2)
2070 self.checkhash(rawtext, node, p1=p1, p2=p2)
2064
2071
2065 return self.addrawrevision(
2072 return self.addrawrevision(
2066 rawtext,
2073 rawtext,
2067 transaction,
2074 transaction,
2068 link,
2075 link,
2069 p1,
2076 p1,
2070 p2,
2077 p2,
2071 node,
2078 node,
2072 flags,
2079 flags,
2073 cachedelta=cachedelta,
2080 cachedelta=cachedelta,
2074 deltacomputer=deltacomputer,
2081 deltacomputer=deltacomputer,
2075 sidedata=sidedata,
2082 sidedata=sidedata,
2076 )
2083 )
2077
2084
2078 def addrawrevision(
2085 def addrawrevision(
2079 self,
2086 self,
2080 rawtext,
2087 rawtext,
2081 transaction,
2088 transaction,
2082 link,
2089 link,
2083 p1,
2090 p1,
2084 p2,
2091 p2,
2085 node,
2092 node,
2086 flags,
2093 flags,
2087 cachedelta=None,
2094 cachedelta=None,
2088 deltacomputer=None,
2095 deltacomputer=None,
2089 sidedata=None,
2096 sidedata=None,
2090 ):
2097 ):
2091 """add a raw revision with known flags, node and parents
2098 """add a raw revision with known flags, node and parents
2092 useful when reusing a revision not stored in this revlog (ex: received
2099 useful when reusing a revision not stored in this revlog (ex: received
2093 over wire, or read from an external bundle).
2100 over wire, or read from an external bundle).
2094 """
2101 """
2095 dfh = None
2102 dfh = None
2096 if not self._inline:
2103 if not self._inline:
2097 dfh = self._datafp(b"a+")
2104 dfh = self._datafp(b"a+")
2098 ifh = self._indexfp(b"a+")
2105 ifh = self._indexfp(b"a+")
2099 try:
2106 try:
2100 return self._addrevision(
2107 return self._addrevision(
2101 node,
2108 node,
2102 rawtext,
2109 rawtext,
2103 transaction,
2110 transaction,
2104 link,
2111 link,
2105 p1,
2112 p1,
2106 p2,
2113 p2,
2107 flags,
2114 flags,
2108 cachedelta,
2115 cachedelta,
2109 ifh,
2116 ifh,
2110 dfh,
2117 dfh,
2111 deltacomputer=deltacomputer,
2118 deltacomputer=deltacomputer,
2112 sidedata=sidedata,
2119 sidedata=sidedata,
2113 )
2120 )
2114 finally:
2121 finally:
2115 if dfh:
2122 if dfh:
2116 dfh.close()
2123 dfh.close()
2117 ifh.close()
2124 ifh.close()
2118
2125
2119 def compress(self, data):
2126 def compress(self, data):
2120 """Generate a possibly-compressed representation of data."""
2127 """Generate a possibly-compressed representation of data."""
2121 if not data:
2128 if not data:
2122 return b'', data
2129 return b'', data
2123
2130
2124 compressed = self._compressor.compress(data)
2131 compressed = self._compressor.compress(data)
2125
2132
2126 if compressed:
2133 if compressed:
2127 # The revlog compressor added the header in the returned data.
2134 # The revlog compressor added the header in the returned data.
2128 return b'', compressed
2135 return b'', compressed
2129
2136
2130 if data[0:1] == b'\0':
2137 if data[0:1] == b'\0':
2131 return b'', data
2138 return b'', data
2132 return b'u', data
2139 return b'u', data
2133
2140
2134 def decompress(self, data):
2141 def decompress(self, data):
2135 """Decompress a revlog chunk.
2142 """Decompress a revlog chunk.
2136
2143
2137 The chunk is expected to begin with a header identifying the
2144 The chunk is expected to begin with a header identifying the
2138 format type so it can be routed to an appropriate decompressor.
2145 format type so it can be routed to an appropriate decompressor.
2139 """
2146 """
2140 if not data:
2147 if not data:
2141 return data
2148 return data
2142
2149
2143 # Revlogs are read much more frequently than they are written and many
2150 # Revlogs are read much more frequently than they are written and many
2144 # chunks only take microseconds to decompress, so performance is
2151 # chunks only take microseconds to decompress, so performance is
2145 # important here.
2152 # important here.
2146 #
2153 #
2147 # We can make a few assumptions about revlogs:
2154 # We can make a few assumptions about revlogs:
2148 #
2155 #
2149 # 1) the majority of chunks will be compressed (as opposed to inline
2156 # 1) the majority of chunks will be compressed (as opposed to inline
2150 # raw data).
2157 # raw data).
2151 # 2) decompressing *any* data will likely by at least 10x slower than
2158 # 2) decompressing *any* data will likely by at least 10x slower than
2152 # returning raw inline data.
2159 # returning raw inline data.
2153 # 3) we want to prioritize common and officially supported compression
2160 # 3) we want to prioritize common and officially supported compression
2154 # engines
2161 # engines
2155 #
2162 #
2156 # It follows that we want to optimize for "decompress compressed data
2163 # It follows that we want to optimize for "decompress compressed data
2157 # when encoded with common and officially supported compression engines"
2164 # when encoded with common and officially supported compression engines"
2158 # case over "raw data" and "data encoded by less common or non-official
2165 # case over "raw data" and "data encoded by less common or non-official
2159 # compression engines." That is why we have the inline lookup first
2166 # compression engines." That is why we have the inline lookup first
2160 # followed by the compengines lookup.
2167 # followed by the compengines lookup.
2161 #
2168 #
2162 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2169 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2163 # compressed chunks. And this matters for changelog and manifest reads.
2170 # compressed chunks. And this matters for changelog and manifest reads.
2164 t = data[0:1]
2171 t = data[0:1]
2165
2172
2166 if t == b'x':
2173 if t == b'x':
2167 try:
2174 try:
2168 return _zlibdecompress(data)
2175 return _zlibdecompress(data)
2169 except zlib.error as e:
2176 except zlib.error as e:
2170 raise error.RevlogError(
2177 raise error.RevlogError(
2171 _(b'revlog decompress error: %s')
2178 _(b'revlog decompress error: %s')
2172 % stringutil.forcebytestr(e)
2179 % stringutil.forcebytestr(e)
2173 )
2180 )
2174 # '\0' is more common than 'u' so it goes first.
2181 # '\0' is more common than 'u' so it goes first.
2175 elif t == b'\0':
2182 elif t == b'\0':
2176 return data
2183 return data
2177 elif t == b'u':
2184 elif t == b'u':
2178 return util.buffer(data, 1)
2185 return util.buffer(data, 1)
2179
2186
2180 try:
2187 try:
2181 compressor = self._decompressors[t]
2188 compressor = self._decompressors[t]
2182 except KeyError:
2189 except KeyError:
2183 try:
2190 try:
2184 engine = util.compengines.forrevlogheader(t)
2191 engine = util.compengines.forrevlogheader(t)
2185 compressor = engine.revlogcompressor(self._compengineopts)
2192 compressor = engine.revlogcompressor(self._compengineopts)
2186 self._decompressors[t] = compressor
2193 self._decompressors[t] = compressor
2187 except KeyError:
2194 except KeyError:
2188 raise error.RevlogError(
2195 raise error.RevlogError(
2189 _(b'unknown compression type %s') % binascii.hexlify(t)
2196 _(b'unknown compression type %s') % binascii.hexlify(t)
2190 )
2197 )
2191
2198
2192 return compressor.decompress(data)
2199 return compressor.decompress(data)
2193
2200
2194 def _addrevision(
2201 def _addrevision(
2195 self,
2202 self,
2196 node,
2203 node,
2197 rawtext,
2204 rawtext,
2198 transaction,
2205 transaction,
2199 link,
2206 link,
2200 p1,
2207 p1,
2201 p2,
2208 p2,
2202 flags,
2209 flags,
2203 cachedelta,
2210 cachedelta,
2204 ifh,
2211 ifh,
2205 dfh,
2212 dfh,
2206 alwayscache=False,
2213 alwayscache=False,
2207 deltacomputer=None,
2214 deltacomputer=None,
2208 sidedata=None,
2215 sidedata=None,
2209 ):
2216 ):
2210 """internal function to add revisions to the log
2217 """internal function to add revisions to the log
2211
2218
2212 see addrevision for argument descriptions.
2219 see addrevision for argument descriptions.
2213
2220
2214 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2221 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2215
2222
2216 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2223 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2217 be used.
2224 be used.
2218
2225
2219 invariants:
2226 invariants:
2220 - rawtext is optional (can be None); if not set, cachedelta must be set.
2227 - rawtext is optional (can be None); if not set, cachedelta must be set.
2221 if both are set, they must correspond to each other.
2228 if both are set, they must correspond to each other.
2222 """
2229 """
2223 if node == self.nullid:
2230 if node == self.nullid:
2224 raise error.RevlogError(
2231 raise error.RevlogError(
2225 _(b"%s: attempt to add null revision") % self._indexfile
2232 _(b"%s: attempt to add null revision") % self._indexfile
2226 )
2233 )
2227 if (
2234 if (
2228 node == self.nodeconstants.wdirid
2235 node == self.nodeconstants.wdirid
2229 or node in self.nodeconstants.wdirfilenodeids
2236 or node in self.nodeconstants.wdirfilenodeids
2230 ):
2237 ):
2231 raise error.RevlogError(
2238 raise error.RevlogError(
2232 _(b"%s: attempt to add wdir revision") % self._indexfile
2239 _(b"%s: attempt to add wdir revision") % self._indexfile
2233 )
2240 )
2234
2241
2235 if self._inline:
2242 if self._inline:
2236 fh = ifh
2243 fh = ifh
2237 else:
2244 else:
2238 fh = dfh
2245 fh = dfh
2239
2246
2240 btext = [rawtext]
2247 btext = [rawtext]
2241
2248
2242 curr = len(self)
2249 curr = len(self)
2243 prev = curr - 1
2250 prev = curr - 1
2244
2251
2245 offset = self._get_data_offset(prev)
2252 offset = self._get_data_offset(prev)
2246
2253
2247 if self._concurrencychecker:
2254 if self._concurrencychecker:
2248 if self._inline:
2255 if self._inline:
2249 # offset is "as if" it were in the .d file, so we need to add on
2256 # offset is "as if" it were in the .d file, so we need to add on
2250 # the size of the entry metadata.
2257 # the size of the entry metadata.
2251 self._concurrencychecker(
2258 self._concurrencychecker(
2252 ifh, self._indexfile, offset + curr * self.index.entry_size
2259 ifh, self._indexfile, offset + curr * self.index.entry_size
2253 )
2260 )
2254 else:
2261 else:
2255 # Entries in the .i are a consistent size.
2262 # Entries in the .i are a consistent size.
2256 self._concurrencychecker(
2263 self._concurrencychecker(
2257 ifh, self._indexfile, curr * self.index.entry_size
2264 ifh, self._indexfile, curr * self.index.entry_size
2258 )
2265 )
2259 self._concurrencychecker(dfh, self._datafile, offset)
2266 self._concurrencychecker(dfh, self._datafile, offset)
2260
2267
2261 p1r, p2r = self.rev(p1), self.rev(p2)
2268 p1r, p2r = self.rev(p1), self.rev(p2)
2262
2269
2263 # full versions are inserted when the needed deltas
2270 # full versions are inserted when the needed deltas
2264 # become comparable to the uncompressed text
2271 # become comparable to the uncompressed text
2265 if rawtext is None:
2272 if rawtext is None:
2266 # need rawtext size, before changed by flag processors, which is
2273 # need rawtext size, before changed by flag processors, which is
2267 # the non-raw size. use revlog explicitly to avoid filelog's extra
2274 # the non-raw size. use revlog explicitly to avoid filelog's extra
2268 # logic that might remove metadata size.
2275 # logic that might remove metadata size.
2269 textlen = mdiff.patchedsize(
2276 textlen = mdiff.patchedsize(
2270 revlog.size(self, cachedelta[0]), cachedelta[1]
2277 revlog.size(self, cachedelta[0]), cachedelta[1]
2271 )
2278 )
2272 else:
2279 else:
2273 textlen = len(rawtext)
2280 textlen = len(rawtext)
2274
2281
2275 if deltacomputer is None:
2282 if deltacomputer is None:
2276 deltacomputer = deltautil.deltacomputer(self)
2283 deltacomputer = deltautil.deltacomputer(self)
2277
2284
2278 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2285 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2279
2286
2280 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2287 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2281
2288
2282 if sidedata and self.hassidedata:
2289 if sidedata and self.hassidedata:
2283 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2290 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2284 sidedata_offset = offset + deltainfo.deltalen
2291 sidedata_offset = offset + deltainfo.deltalen
2285 else:
2292 else:
2286 serialized_sidedata = b""
2293 serialized_sidedata = b""
2287 # Don't store the offset if the sidedata is empty, that way
2294 # Don't store the offset if the sidedata is empty, that way
2288 # we can easily detect empty sidedata and they will be no different
2295 # we can easily detect empty sidedata and they will be no different
2289 # than ones we manually add.
2296 # than ones we manually add.
2290 sidedata_offset = 0
2297 sidedata_offset = 0
2291
2298
2292 e = (
2299 e = (
2293 offset_type(offset, flags),
2300 offset_type(offset, flags),
2294 deltainfo.deltalen,
2301 deltainfo.deltalen,
2295 textlen,
2302 textlen,
2296 deltainfo.base,
2303 deltainfo.base,
2297 link,
2304 link,
2298 p1r,
2305 p1r,
2299 p2r,
2306 p2r,
2300 node,
2307 node,
2301 sidedata_offset,
2308 sidedata_offset,
2302 len(serialized_sidedata),
2309 len(serialized_sidedata),
2303 )
2310 )
2304
2311
2305 self.index.append(e)
2312 self.index.append(e)
2306 entry = self.index.entry_binary(curr)
2313 entry = self.index.entry_binary(curr)
2307 if curr == 0:
2314 if curr == 0:
2308 header = self._format_flags | self._format_version
2315 header = self._format_flags | self._format_version
2309 header = self.index.pack_header(header)
2316 header = self.index.pack_header(header)
2310 entry = header + entry
2317 entry = header + entry
2311 self._writeentry(
2318 self._writeentry(
2312 transaction,
2319 transaction,
2313 ifh,
2320 ifh,
2314 dfh,
2321 dfh,
2315 entry,
2322 entry,
2316 deltainfo.data,
2323 deltainfo.data,
2317 link,
2324 link,
2318 offset,
2325 offset,
2319 serialized_sidedata,
2326 serialized_sidedata,
2320 )
2327 )
2321
2328
2322 rawtext = btext[0]
2329 rawtext = btext[0]
2323
2330
2324 if alwayscache and rawtext is None:
2331 if alwayscache and rawtext is None:
2325 rawtext = deltacomputer.buildtext(revinfo, fh)
2332 rawtext = deltacomputer.buildtext(revinfo, fh)
2326
2333
2327 if type(rawtext) == bytes: # only accept immutable objects
2334 if type(rawtext) == bytes: # only accept immutable objects
2328 self._revisioncache = (node, curr, rawtext)
2335 self._revisioncache = (node, curr, rawtext)
2329 self._chainbasecache[curr] = deltainfo.chainbase
2336 self._chainbasecache[curr] = deltainfo.chainbase
2330 return curr
2337 return curr
2331
2338
2332 def _get_data_offset(self, prev):
2339 def _get_data_offset(self, prev):
2333 """Returns the current offset in the (in-transaction) data file.
2340 """Returns the current offset in the (in-transaction) data file.
2334 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2341 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2335 file to store that information: since sidedata can be rewritten to the
2342 file to store that information: since sidedata can be rewritten to the
2336 end of the data file within a transaction, you can have cases where, for
2343 end of the data file within a transaction, you can have cases where, for
2337 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2344 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2338 to `n - 1`'s sidedata being written after `n`'s data.
2345 to `n - 1`'s sidedata being written after `n`'s data.
2339
2346
2340 TODO cache this in a docket file before getting out of experimental."""
2347 TODO cache this in a docket file before getting out of experimental."""
2341 if self._format_version != REVLOGV2:
2348 if self._format_version != REVLOGV2:
2342 return self.end(prev)
2349 return self.end(prev)
2343
2350
2344 offset = 0
2351 offset = 0
2345 for rev, entry in enumerate(self.index):
2352 for rev, entry in enumerate(self.index):
2346 sidedata_end = entry[8] + entry[9]
2353 sidedata_end = entry[8] + entry[9]
2347 # Sidedata for a previous rev has potentially been written after
2354 # Sidedata for a previous rev has potentially been written after
2348 # this rev's end, so take the max.
2355 # this rev's end, so take the max.
2349 offset = max(self.end(rev), offset, sidedata_end)
2356 offset = max(self.end(rev), offset, sidedata_end)
2350 return offset
2357 return offset
2351
2358
2352 def _writeentry(
2359 def _writeentry(
2353 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2360 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2354 ):
2361 ):
2355 # Files opened in a+ mode have inconsistent behavior on various
2362 # Files opened in a+ mode have inconsistent behavior on various
2356 # platforms. Windows requires that a file positioning call be made
2363 # platforms. Windows requires that a file positioning call be made
2357 # when the file handle transitions between reads and writes. See
2364 # when the file handle transitions between reads and writes. See
2358 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2365 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2359 # platforms, Python or the platform itself can be buggy. Some versions
2366 # platforms, Python or the platform itself can be buggy. Some versions
2360 # of Solaris have been observed to not append at the end of the file
2367 # of Solaris have been observed to not append at the end of the file
2361 # if the file was seeked to before the end. See issue4943 for more.
2368 # if the file was seeked to before the end. See issue4943 for more.
2362 #
2369 #
2363 # We work around this issue by inserting a seek() before writing.
2370 # We work around this issue by inserting a seek() before writing.
2364 # Note: This is likely not necessary on Python 3. However, because
2371 # Note: This is likely not necessary on Python 3. However, because
2365 # the file handle is reused for reads and may be seeked there, we need
2372 # the file handle is reused for reads and may be seeked there, we need
2366 # to be careful before changing this.
2373 # to be careful before changing this.
2367 ifh.seek(0, os.SEEK_END)
2374 ifh.seek(0, os.SEEK_END)
2368 if dfh:
2375 if dfh:
2369 dfh.seek(0, os.SEEK_END)
2376 dfh.seek(0, os.SEEK_END)
2370
2377
2371 curr = len(self) - 1
2378 curr = len(self) - 1
2372 if not self._inline:
2379 if not self._inline:
2373 transaction.add(self._datafile, offset)
2380 transaction.add(self._datafile, offset)
2374 transaction.add(self._indexfile, curr * len(entry))
2381 transaction.add(self._indexfile, curr * len(entry))
2375 if data[0]:
2382 if data[0]:
2376 dfh.write(data[0])
2383 dfh.write(data[0])
2377 dfh.write(data[1])
2384 dfh.write(data[1])
2378 if sidedata:
2385 if sidedata:
2379 dfh.write(sidedata)
2386 dfh.write(sidedata)
2380 ifh.write(entry)
2387 ifh.write(entry)
2381 else:
2388 else:
2382 offset += curr * self.index.entry_size
2389 offset += curr * self.index.entry_size
2383 transaction.add(self._indexfile, offset)
2390 transaction.add(self._indexfile, offset)
2384 ifh.write(entry)
2391 ifh.write(entry)
2385 ifh.write(data[0])
2392 ifh.write(data[0])
2386 ifh.write(data[1])
2393 ifh.write(data[1])
2387 if sidedata:
2394 if sidedata:
2388 ifh.write(sidedata)
2395 ifh.write(sidedata)
2389 self._enforceinlinesize(transaction, ifh)
2396 self._enforceinlinesize(transaction, ifh)
2390 nodemaputil.setup_persistent_nodemap(transaction, self)
2397 nodemaputil.setup_persistent_nodemap(transaction, self)
2391
2398
2392 def addgroup(
2399 def addgroup(
2393 self,
2400 self,
2394 deltas,
2401 deltas,
2395 linkmapper,
2402 linkmapper,
2396 transaction,
2403 transaction,
2397 alwayscache=False,
2404 alwayscache=False,
2398 addrevisioncb=None,
2405 addrevisioncb=None,
2399 duplicaterevisioncb=None,
2406 duplicaterevisioncb=None,
2400 ):
2407 ):
2401 """
2408 """
2402 add a delta group
2409 add a delta group
2403
2410
2404 given a set of deltas, add them to the revision log. the
2411 given a set of deltas, add them to the revision log. the
2405 first delta is against its parent, which should be in our
2412 first delta is against its parent, which should be in our
2406 log, the rest are against the previous delta.
2413 log, the rest are against the previous delta.
2407
2414
2408 If ``addrevisioncb`` is defined, it will be called with arguments of
2415 If ``addrevisioncb`` is defined, it will be called with arguments of
2409 this revlog and the node that was added.
2416 this revlog and the node that was added.
2410 """
2417 """
2411
2418
2412 if self._writinghandles:
2419 if self._writinghandles:
2413 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2420 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2414
2421
2415 r = len(self)
2422 r = len(self)
2416 end = 0
2423 end = 0
2417 if r:
2424 if r:
2418 end = self.end(r - 1)
2425 end = self.end(r - 1)
2419 ifh = self._indexfp(b"a+")
2426 ifh = self._indexfp(b"a+")
2420 isize = r * self.index.entry_size
2427 isize = r * self.index.entry_size
2421 if self._inline:
2428 if self._inline:
2422 transaction.add(self._indexfile, end + isize)
2429 transaction.add(self._indexfile, end + isize)
2423 dfh = None
2430 dfh = None
2424 else:
2431 else:
2425 transaction.add(self._indexfile, isize)
2432 transaction.add(self._indexfile, isize)
2426 transaction.add(self._datafile, end)
2433 transaction.add(self._datafile, end)
2427 dfh = self._datafp(b"a+")
2434 dfh = self._datafp(b"a+")
2428
2435
2429 def flush():
2436 def flush():
2430 if dfh:
2437 if dfh:
2431 dfh.flush()
2438 dfh.flush()
2432 ifh.flush()
2439 ifh.flush()
2433
2440
2434 self._writinghandles = (ifh, dfh)
2441 self._writinghandles = (ifh, dfh)
2435 empty = True
2442 empty = True
2436
2443
2437 try:
2444 try:
2438 deltacomputer = deltautil.deltacomputer(self)
2445 deltacomputer = deltautil.deltacomputer(self)
2439 # loop through our set of deltas
2446 # loop through our set of deltas
2440 for data in deltas:
2447 for data in deltas:
2441 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2448 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2442 link = linkmapper(linknode)
2449 link = linkmapper(linknode)
2443 flags = flags or REVIDX_DEFAULT_FLAGS
2450 flags = flags or REVIDX_DEFAULT_FLAGS
2444
2451
2445 rev = self.index.get_rev(node)
2452 rev = self.index.get_rev(node)
2446 if rev is not None:
2453 if rev is not None:
2447 # this can happen if two branches make the same change
2454 # this can happen if two branches make the same change
2448 self._nodeduplicatecallback(transaction, rev)
2455 self._nodeduplicatecallback(transaction, rev)
2449 if duplicaterevisioncb:
2456 if duplicaterevisioncb:
2450 duplicaterevisioncb(self, rev)
2457 duplicaterevisioncb(self, rev)
2451 empty = False
2458 empty = False
2452 continue
2459 continue
2453
2460
2454 for p in (p1, p2):
2461 for p in (p1, p2):
2455 if not self.index.has_node(p):
2462 if not self.index.has_node(p):
2456 raise error.LookupError(
2463 raise error.LookupError(
2457 p, self._indexfile, _(b'unknown parent')
2464 p, self._indexfile, _(b'unknown parent')
2458 )
2465 )
2459
2466
2460 if not self.index.has_node(deltabase):
2467 if not self.index.has_node(deltabase):
2461 raise error.LookupError(
2468 raise error.LookupError(
2462 deltabase, self._indexfile, _(b'unknown delta base')
2469 deltabase, self._indexfile, _(b'unknown delta base')
2463 )
2470 )
2464
2471
2465 baserev = self.rev(deltabase)
2472 baserev = self.rev(deltabase)
2466
2473
2467 if baserev != nullrev and self.iscensored(baserev):
2474 if baserev != nullrev and self.iscensored(baserev):
2468 # if base is censored, delta must be full replacement in a
2475 # if base is censored, delta must be full replacement in a
2469 # single patch operation
2476 # single patch operation
2470 hlen = struct.calcsize(b">lll")
2477 hlen = struct.calcsize(b">lll")
2471 oldlen = self.rawsize(baserev)
2478 oldlen = self.rawsize(baserev)
2472 newlen = len(delta) - hlen
2479 newlen = len(delta) - hlen
2473 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2480 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2474 raise error.CensoredBaseError(
2481 raise error.CensoredBaseError(
2475 self._indexfile, self.node(baserev)
2482 self._indexfile, self.node(baserev)
2476 )
2483 )
2477
2484
2478 if not flags and self._peek_iscensored(baserev, delta, flush):
2485 if not flags and self._peek_iscensored(baserev, delta, flush):
2479 flags |= REVIDX_ISCENSORED
2486 flags |= REVIDX_ISCENSORED
2480
2487
2481 # We assume consumers of addrevisioncb will want to retrieve
2488 # We assume consumers of addrevisioncb will want to retrieve
2482 # the added revision, which will require a call to
2489 # the added revision, which will require a call to
2483 # revision(). revision() will fast path if there is a cache
2490 # revision(). revision() will fast path if there is a cache
2484 # hit. So, we tell _addrevision() to always cache in this case.
2491 # hit. So, we tell _addrevision() to always cache in this case.
2485 # We're only using addgroup() in the context of changegroup
2492 # We're only using addgroup() in the context of changegroup
2486 # generation so the revision data can always be handled as raw
2493 # generation so the revision data can always be handled as raw
2487 # by the flagprocessor.
2494 # by the flagprocessor.
2488 rev = self._addrevision(
2495 rev = self._addrevision(
2489 node,
2496 node,
2490 None,
2497 None,
2491 transaction,
2498 transaction,
2492 link,
2499 link,
2493 p1,
2500 p1,
2494 p2,
2501 p2,
2495 flags,
2502 flags,
2496 (baserev, delta),
2503 (baserev, delta),
2497 ifh,
2504 ifh,
2498 dfh,
2505 dfh,
2499 alwayscache=alwayscache,
2506 alwayscache=alwayscache,
2500 deltacomputer=deltacomputer,
2507 deltacomputer=deltacomputer,
2501 sidedata=sidedata,
2508 sidedata=sidedata,
2502 )
2509 )
2503
2510
2504 if addrevisioncb:
2511 if addrevisioncb:
2505 addrevisioncb(self, rev)
2512 addrevisioncb(self, rev)
2506 empty = False
2513 empty = False
2507
2514
2508 if not dfh and not self._inline:
2515 if not dfh and not self._inline:
2509 # addrevision switched from inline to conventional
2516 # addrevision switched from inline to conventional
2510 # reopen the index
2517 # reopen the index
2511 ifh.close()
2518 ifh.close()
2512 dfh = self._datafp(b"a+")
2519 dfh = self._datafp(b"a+")
2513 ifh = self._indexfp(b"a+")
2520 ifh = self._indexfp(b"a+")
2514 self._writinghandles = (ifh, dfh)
2521 self._writinghandles = (ifh, dfh)
2515 finally:
2522 finally:
2516 self._writinghandles = None
2523 self._writinghandles = None
2517
2524
2518 if dfh:
2525 if dfh:
2519 dfh.close()
2526 dfh.close()
2520 ifh.close()
2527 ifh.close()
2521 return not empty
2528 return not empty
2522
2529
2523 def iscensored(self, rev):
2530 def iscensored(self, rev):
2524 """Check if a file revision is censored."""
2531 """Check if a file revision is censored."""
2525 if not self._censorable:
2532 if not self._censorable:
2526 return False
2533 return False
2527
2534
2528 return self.flags(rev) & REVIDX_ISCENSORED
2535 return self.flags(rev) & REVIDX_ISCENSORED
2529
2536
2530 def _peek_iscensored(self, baserev, delta, flush):
2537 def _peek_iscensored(self, baserev, delta, flush):
2531 """Quickly check if a delta produces a censored revision."""
2538 """Quickly check if a delta produces a censored revision."""
2532 if not self._censorable:
2539 if not self._censorable:
2533 return False
2540 return False
2534
2541
2535 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2542 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2536
2543
2537 def getstrippoint(self, minlink):
2544 def getstrippoint(self, minlink):
2538 """find the minimum rev that must be stripped to strip the linkrev
2545 """find the minimum rev that must be stripped to strip the linkrev
2539
2546
2540 Returns a tuple containing the minimum rev and a set of all revs that
2547 Returns a tuple containing the minimum rev and a set of all revs that
2541 have linkrevs that will be broken by this strip.
2548 have linkrevs that will be broken by this strip.
2542 """
2549 """
2543 return storageutil.resolvestripinfo(
2550 return storageutil.resolvestripinfo(
2544 minlink,
2551 minlink,
2545 len(self) - 1,
2552 len(self) - 1,
2546 self.headrevs(),
2553 self.headrevs(),
2547 self.linkrev,
2554 self.linkrev,
2548 self.parentrevs,
2555 self.parentrevs,
2549 )
2556 )
2550
2557
2551 def strip(self, minlink, transaction):
2558 def strip(self, minlink, transaction):
2552 """truncate the revlog on the first revision with a linkrev >= minlink
2559 """truncate the revlog on the first revision with a linkrev >= minlink
2553
2560
2554 This function is called when we're stripping revision minlink and
2561 This function is called when we're stripping revision minlink and
2555 its descendants from the repository.
2562 its descendants from the repository.
2556
2563
2557 We have to remove all revisions with linkrev >= minlink, because
2564 We have to remove all revisions with linkrev >= minlink, because
2558 the equivalent changelog revisions will be renumbered after the
2565 the equivalent changelog revisions will be renumbered after the
2559 strip.
2566 strip.
2560
2567
2561 So we truncate the revlog on the first of these revisions, and
2568 So we truncate the revlog on the first of these revisions, and
2562 trust that the caller has saved the revisions that shouldn't be
2569 trust that the caller has saved the revisions that shouldn't be
2563 removed and that it'll re-add them after this truncation.
2570 removed and that it'll re-add them after this truncation.
2564 """
2571 """
2565 if len(self) == 0:
2572 if len(self) == 0:
2566 return
2573 return
2567
2574
2568 rev, _ = self.getstrippoint(minlink)
2575 rev, _ = self.getstrippoint(minlink)
2569 if rev == len(self):
2576 if rev == len(self):
2570 return
2577 return
2571
2578
2572 # first truncate the files on disk
2579 # first truncate the files on disk
2573 end = self.start(rev)
2580 end = self.start(rev)
2574 if not self._inline:
2581 if not self._inline:
2575 transaction.add(self._datafile, end)
2582 transaction.add(self._datafile, end)
2576 end = rev * self.index.entry_size
2583 end = rev * self.index.entry_size
2577 else:
2584 else:
2578 end += rev * self.index.entry_size
2585 end += rev * self.index.entry_size
2579
2586
2580 transaction.add(self._indexfile, end)
2587 transaction.add(self._indexfile, end)
2581
2588
2582 # then reset internal state in memory to forget those revisions
2589 # then reset internal state in memory to forget those revisions
2583 self._revisioncache = None
2590 self._revisioncache = None
2584 self._chaininfocache = util.lrucachedict(500)
2591 self._chaininfocache = util.lrucachedict(500)
2585 self._chunkclear()
2592 self._chunkclear()
2586
2593
2587 del self.index[rev:-1]
2594 del self.index[rev:-1]
2588
2595
2589 def checksize(self):
2596 def checksize(self):
2590 """Check size of index and data files
2597 """Check size of index and data files
2591
2598
2592 return a (dd, di) tuple.
2599 return a (dd, di) tuple.
2593 - dd: extra bytes for the "data" file
2600 - dd: extra bytes for the "data" file
2594 - di: extra bytes for the "index" file
2601 - di: extra bytes for the "index" file
2595
2602
2596 A healthy revlog will return (0, 0).
2603 A healthy revlog will return (0, 0).
2597 """
2604 """
2598 expected = 0
2605 expected = 0
2599 if len(self):
2606 if len(self):
2600 expected = max(0, self.end(len(self) - 1))
2607 expected = max(0, self.end(len(self) - 1))
2601
2608
2602 try:
2609 try:
2603 with self._datafp() as f:
2610 with self._datafp() as f:
2604 f.seek(0, io.SEEK_END)
2611 f.seek(0, io.SEEK_END)
2605 actual = f.tell()
2612 actual = f.tell()
2606 dd = actual - expected
2613 dd = actual - expected
2607 except IOError as inst:
2614 except IOError as inst:
2608 if inst.errno != errno.ENOENT:
2615 if inst.errno != errno.ENOENT:
2609 raise
2616 raise
2610 dd = 0
2617 dd = 0
2611
2618
2612 try:
2619 try:
2613 f = self.opener(self._indexfile)
2620 f = self.opener(self._indexfile)
2614 f.seek(0, io.SEEK_END)
2621 f.seek(0, io.SEEK_END)
2615 actual = f.tell()
2622 actual = f.tell()
2616 f.close()
2623 f.close()
2617 s = self.index.entry_size
2624 s = self.index.entry_size
2618 i = max(0, actual // s)
2625 i = max(0, actual // s)
2619 di = actual - (i * s)
2626 di = actual - (i * s)
2620 if self._inline:
2627 if self._inline:
2621 databytes = 0
2628 databytes = 0
2622 for r in self:
2629 for r in self:
2623 databytes += max(0, self.length(r))
2630 databytes += max(0, self.length(r))
2624 dd = 0
2631 dd = 0
2625 di = actual - len(self) * s - databytes
2632 di = actual - len(self) * s - databytes
2626 except IOError as inst:
2633 except IOError as inst:
2627 if inst.errno != errno.ENOENT:
2634 if inst.errno != errno.ENOENT:
2628 raise
2635 raise
2629 di = 0
2636 di = 0
2630
2637
2631 return (dd, di)
2638 return (dd, di)
2632
2639
2633 def files(self):
2640 def files(self):
2634 res = [self._indexfile]
2641 res = [self._indexfile]
2635 if not self._inline:
2642 if not self._inline:
2636 res.append(self._datafile)
2643 res.append(self._datafile)
2637 return res
2644 return res
2638
2645
2639 def emitrevisions(
2646 def emitrevisions(
2640 self,
2647 self,
2641 nodes,
2648 nodes,
2642 nodesorder=None,
2649 nodesorder=None,
2643 revisiondata=False,
2650 revisiondata=False,
2644 assumehaveparentrevisions=False,
2651 assumehaveparentrevisions=False,
2645 deltamode=repository.CG_DELTAMODE_STD,
2652 deltamode=repository.CG_DELTAMODE_STD,
2646 sidedata_helpers=None,
2653 sidedata_helpers=None,
2647 ):
2654 ):
2648 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2655 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2649 raise error.ProgrammingError(
2656 raise error.ProgrammingError(
2650 b'unhandled value for nodesorder: %s' % nodesorder
2657 b'unhandled value for nodesorder: %s' % nodesorder
2651 )
2658 )
2652
2659
2653 if nodesorder is None and not self._generaldelta:
2660 if nodesorder is None and not self._generaldelta:
2654 nodesorder = b'storage'
2661 nodesorder = b'storage'
2655
2662
2656 if (
2663 if (
2657 not self._storedeltachains
2664 not self._storedeltachains
2658 and deltamode != repository.CG_DELTAMODE_PREV
2665 and deltamode != repository.CG_DELTAMODE_PREV
2659 ):
2666 ):
2660 deltamode = repository.CG_DELTAMODE_FULL
2667 deltamode = repository.CG_DELTAMODE_FULL
2661
2668
2662 return storageutil.emitrevisions(
2669 return storageutil.emitrevisions(
2663 self,
2670 self,
2664 nodes,
2671 nodes,
2665 nodesorder,
2672 nodesorder,
2666 revlogrevisiondelta,
2673 revlogrevisiondelta,
2667 deltaparentfn=self.deltaparent,
2674 deltaparentfn=self.deltaparent,
2668 candeltafn=self.candelta,
2675 candeltafn=self.candelta,
2669 rawsizefn=self.rawsize,
2676 rawsizefn=self.rawsize,
2670 revdifffn=self.revdiff,
2677 revdifffn=self.revdiff,
2671 flagsfn=self.flags,
2678 flagsfn=self.flags,
2672 deltamode=deltamode,
2679 deltamode=deltamode,
2673 revisiondata=revisiondata,
2680 revisiondata=revisiondata,
2674 assumehaveparentrevisions=assumehaveparentrevisions,
2681 assumehaveparentrevisions=assumehaveparentrevisions,
2675 sidedata_helpers=sidedata_helpers,
2682 sidedata_helpers=sidedata_helpers,
2676 )
2683 )
2677
2684
2678 DELTAREUSEALWAYS = b'always'
2685 DELTAREUSEALWAYS = b'always'
2679 DELTAREUSESAMEREVS = b'samerevs'
2686 DELTAREUSESAMEREVS = b'samerevs'
2680 DELTAREUSENEVER = b'never'
2687 DELTAREUSENEVER = b'never'
2681
2688
2682 DELTAREUSEFULLADD = b'fulladd'
2689 DELTAREUSEFULLADD = b'fulladd'
2683
2690
2684 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2691 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2685
2692
2686 def clone(
2693 def clone(
2687 self,
2694 self,
2688 tr,
2695 tr,
2689 destrevlog,
2696 destrevlog,
2690 addrevisioncb=None,
2697 addrevisioncb=None,
2691 deltareuse=DELTAREUSESAMEREVS,
2698 deltareuse=DELTAREUSESAMEREVS,
2692 forcedeltabothparents=None,
2699 forcedeltabothparents=None,
2693 sidedata_helpers=None,
2700 sidedata_helpers=None,
2694 ):
2701 ):
2695 """Copy this revlog to another, possibly with format changes.
2702 """Copy this revlog to another, possibly with format changes.
2696
2703
2697 The destination revlog will contain the same revisions and nodes.
2704 The destination revlog will contain the same revisions and nodes.
2698 However, it may not be bit-for-bit identical due to e.g. delta encoding
2705 However, it may not be bit-for-bit identical due to e.g. delta encoding
2699 differences.
2706 differences.
2700
2707
2701 The ``deltareuse`` argument control how deltas from the existing revlog
2708 The ``deltareuse`` argument control how deltas from the existing revlog
2702 are preserved in the destination revlog. The argument can have the
2709 are preserved in the destination revlog. The argument can have the
2703 following values:
2710 following values:
2704
2711
2705 DELTAREUSEALWAYS
2712 DELTAREUSEALWAYS
2706 Deltas will always be reused (if possible), even if the destination
2713 Deltas will always be reused (if possible), even if the destination
2707 revlog would not select the same revisions for the delta. This is the
2714 revlog would not select the same revisions for the delta. This is the
2708 fastest mode of operation.
2715 fastest mode of operation.
2709 DELTAREUSESAMEREVS
2716 DELTAREUSESAMEREVS
2710 Deltas will be reused if the destination revlog would pick the same
2717 Deltas will be reused if the destination revlog would pick the same
2711 revisions for the delta. This mode strikes a balance between speed
2718 revisions for the delta. This mode strikes a balance between speed
2712 and optimization.
2719 and optimization.
2713 DELTAREUSENEVER
2720 DELTAREUSENEVER
2714 Deltas will never be reused. This is the slowest mode of execution.
2721 Deltas will never be reused. This is the slowest mode of execution.
2715 This mode can be used to recompute deltas (e.g. if the diff/delta
2722 This mode can be used to recompute deltas (e.g. if the diff/delta
2716 algorithm changes).
2723 algorithm changes).
2717 DELTAREUSEFULLADD
2724 DELTAREUSEFULLADD
2718 Revision will be re-added as if their were new content. This is
2725 Revision will be re-added as if their were new content. This is
2719 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2726 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2720 eg: large file detection and handling.
2727 eg: large file detection and handling.
2721
2728
2722 Delta computation can be slow, so the choice of delta reuse policy can
2729 Delta computation can be slow, so the choice of delta reuse policy can
2723 significantly affect run time.
2730 significantly affect run time.
2724
2731
2725 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2732 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2726 two extremes. Deltas will be reused if they are appropriate. But if the
2733 two extremes. Deltas will be reused if they are appropriate. But if the
2727 delta could choose a better revision, it will do so. This means if you
2734 delta could choose a better revision, it will do so. This means if you
2728 are converting a non-generaldelta revlog to a generaldelta revlog,
2735 are converting a non-generaldelta revlog to a generaldelta revlog,
2729 deltas will be recomputed if the delta's parent isn't a parent of the
2736 deltas will be recomputed if the delta's parent isn't a parent of the
2730 revision.
2737 revision.
2731
2738
2732 In addition to the delta policy, the ``forcedeltabothparents``
2739 In addition to the delta policy, the ``forcedeltabothparents``
2733 argument controls whether to force compute deltas against both parents
2740 argument controls whether to force compute deltas against both parents
2734 for merges. By default, the current default is used.
2741 for merges. By default, the current default is used.
2735
2742
2736 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2743 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2737 `sidedata_helpers`.
2744 `sidedata_helpers`.
2738 """
2745 """
2739 if deltareuse not in self.DELTAREUSEALL:
2746 if deltareuse not in self.DELTAREUSEALL:
2740 raise ValueError(
2747 raise ValueError(
2741 _(b'value for deltareuse invalid: %s') % deltareuse
2748 _(b'value for deltareuse invalid: %s') % deltareuse
2742 )
2749 )
2743
2750
2744 if len(destrevlog):
2751 if len(destrevlog):
2745 raise ValueError(_(b'destination revlog is not empty'))
2752 raise ValueError(_(b'destination revlog is not empty'))
2746
2753
2747 if getattr(self, 'filteredrevs', None):
2754 if getattr(self, 'filteredrevs', None):
2748 raise ValueError(_(b'source revlog has filtered revisions'))
2755 raise ValueError(_(b'source revlog has filtered revisions'))
2749 if getattr(destrevlog, 'filteredrevs', None):
2756 if getattr(destrevlog, 'filteredrevs', None):
2750 raise ValueError(_(b'destination revlog has filtered revisions'))
2757 raise ValueError(_(b'destination revlog has filtered revisions'))
2751
2758
2752 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2759 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2753 # if possible.
2760 # if possible.
2754 oldlazydelta = destrevlog._lazydelta
2761 oldlazydelta = destrevlog._lazydelta
2755 oldlazydeltabase = destrevlog._lazydeltabase
2762 oldlazydeltabase = destrevlog._lazydeltabase
2756 oldamd = destrevlog._deltabothparents
2763 oldamd = destrevlog._deltabothparents
2757
2764
2758 try:
2765 try:
2759 if deltareuse == self.DELTAREUSEALWAYS:
2766 if deltareuse == self.DELTAREUSEALWAYS:
2760 destrevlog._lazydeltabase = True
2767 destrevlog._lazydeltabase = True
2761 destrevlog._lazydelta = True
2768 destrevlog._lazydelta = True
2762 elif deltareuse == self.DELTAREUSESAMEREVS:
2769 elif deltareuse == self.DELTAREUSESAMEREVS:
2763 destrevlog._lazydeltabase = False
2770 destrevlog._lazydeltabase = False
2764 destrevlog._lazydelta = True
2771 destrevlog._lazydelta = True
2765 elif deltareuse == self.DELTAREUSENEVER:
2772 elif deltareuse == self.DELTAREUSENEVER:
2766 destrevlog._lazydeltabase = False
2773 destrevlog._lazydeltabase = False
2767 destrevlog._lazydelta = False
2774 destrevlog._lazydelta = False
2768
2775
2769 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2776 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2770
2777
2771 self._clone(
2778 self._clone(
2772 tr,
2779 tr,
2773 destrevlog,
2780 destrevlog,
2774 addrevisioncb,
2781 addrevisioncb,
2775 deltareuse,
2782 deltareuse,
2776 forcedeltabothparents,
2783 forcedeltabothparents,
2777 sidedata_helpers,
2784 sidedata_helpers,
2778 )
2785 )
2779
2786
2780 finally:
2787 finally:
2781 destrevlog._lazydelta = oldlazydelta
2788 destrevlog._lazydelta = oldlazydelta
2782 destrevlog._lazydeltabase = oldlazydeltabase
2789 destrevlog._lazydeltabase = oldlazydeltabase
2783 destrevlog._deltabothparents = oldamd
2790 destrevlog._deltabothparents = oldamd
2784
2791
2785 def _clone(
2792 def _clone(
2786 self,
2793 self,
2787 tr,
2794 tr,
2788 destrevlog,
2795 destrevlog,
2789 addrevisioncb,
2796 addrevisioncb,
2790 deltareuse,
2797 deltareuse,
2791 forcedeltabothparents,
2798 forcedeltabothparents,
2792 sidedata_helpers,
2799 sidedata_helpers,
2793 ):
2800 ):
2794 """perform the core duty of `revlog.clone` after parameter processing"""
2801 """perform the core duty of `revlog.clone` after parameter processing"""
2795 deltacomputer = deltautil.deltacomputer(destrevlog)
2802 deltacomputer = deltautil.deltacomputer(destrevlog)
2796 index = self.index
2803 index = self.index
2797 for rev in self:
2804 for rev in self:
2798 entry = index[rev]
2805 entry = index[rev]
2799
2806
2800 # Some classes override linkrev to take filtered revs into
2807 # Some classes override linkrev to take filtered revs into
2801 # account. Use raw entry from index.
2808 # account. Use raw entry from index.
2802 flags = entry[0] & 0xFFFF
2809 flags = entry[0] & 0xFFFF
2803 linkrev = entry[4]
2810 linkrev = entry[4]
2804 p1 = index[entry[5]][7]
2811 p1 = index[entry[5]][7]
2805 p2 = index[entry[6]][7]
2812 p2 = index[entry[6]][7]
2806 node = entry[7]
2813 node = entry[7]
2807
2814
2808 # (Possibly) reuse the delta from the revlog if allowed and
2815 # (Possibly) reuse the delta from the revlog if allowed and
2809 # the revlog chunk is a delta.
2816 # the revlog chunk is a delta.
2810 cachedelta = None
2817 cachedelta = None
2811 rawtext = None
2818 rawtext = None
2812 if deltareuse == self.DELTAREUSEFULLADD:
2819 if deltareuse == self.DELTAREUSEFULLADD:
2813 text, sidedata = self._revisiondata(rev)
2820 text, sidedata = self._revisiondata(rev)
2814
2821
2815 if sidedata_helpers is not None:
2822 if sidedata_helpers is not None:
2816 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2823 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2817 self, sidedata_helpers, sidedata, rev
2824 self, sidedata_helpers, sidedata, rev
2818 )
2825 )
2819 flags = flags | new_flags[0] & ~new_flags[1]
2826 flags = flags | new_flags[0] & ~new_flags[1]
2820
2827
2821 destrevlog.addrevision(
2828 destrevlog.addrevision(
2822 text,
2829 text,
2823 tr,
2830 tr,
2824 linkrev,
2831 linkrev,
2825 p1,
2832 p1,
2826 p2,
2833 p2,
2827 cachedelta=cachedelta,
2834 cachedelta=cachedelta,
2828 node=node,
2835 node=node,
2829 flags=flags,
2836 flags=flags,
2830 deltacomputer=deltacomputer,
2837 deltacomputer=deltacomputer,
2831 sidedata=sidedata,
2838 sidedata=sidedata,
2832 )
2839 )
2833 else:
2840 else:
2834 if destrevlog._lazydelta:
2841 if destrevlog._lazydelta:
2835 dp = self.deltaparent(rev)
2842 dp = self.deltaparent(rev)
2836 if dp != nullrev:
2843 if dp != nullrev:
2837 cachedelta = (dp, bytes(self._chunk(rev)))
2844 cachedelta = (dp, bytes(self._chunk(rev)))
2838
2845
2839 sidedata = None
2846 sidedata = None
2840 if not cachedelta:
2847 if not cachedelta:
2841 rawtext, sidedata = self._revisiondata(rev)
2848 rawtext, sidedata = self._revisiondata(rev)
2842 if sidedata is None:
2849 if sidedata is None:
2843 sidedata = self.sidedata(rev)
2850 sidedata = self.sidedata(rev)
2844
2851
2845 if sidedata_helpers is not None:
2852 if sidedata_helpers is not None:
2846 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2853 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2847 self, sidedata_helpers, sidedata, rev
2854 self, sidedata_helpers, sidedata, rev
2848 )
2855 )
2849 flags = flags | new_flags[0] & ~new_flags[1]
2856 flags = flags | new_flags[0] & ~new_flags[1]
2850
2857
2851 ifh = destrevlog.opener(
2858 ifh = destrevlog.opener(
2852 destrevlog._indexfile, b'a+', checkambig=False
2859 destrevlog._indexfile, b'a+', checkambig=False
2853 )
2860 )
2854 dfh = None
2861 dfh = None
2855 if not destrevlog._inline:
2862 if not destrevlog._inline:
2856 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2863 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2857 try:
2864 try:
2858 destrevlog._addrevision(
2865 destrevlog._addrevision(
2859 node,
2866 node,
2860 rawtext,
2867 rawtext,
2861 tr,
2868 tr,
2862 linkrev,
2869 linkrev,
2863 p1,
2870 p1,
2864 p2,
2871 p2,
2865 flags,
2872 flags,
2866 cachedelta,
2873 cachedelta,
2867 ifh,
2874 ifh,
2868 dfh,
2875 dfh,
2869 deltacomputer=deltacomputer,
2876 deltacomputer=deltacomputer,
2870 sidedata=sidedata,
2877 sidedata=sidedata,
2871 )
2878 )
2872 finally:
2879 finally:
2873 if dfh:
2880 if dfh:
2874 dfh.close()
2881 dfh.close()
2875 ifh.close()
2882 ifh.close()
2876
2883
2877 if addrevisioncb:
2884 if addrevisioncb:
2878 addrevisioncb(self, rev, node)
2885 addrevisioncb(self, rev, node)
2879
2886
2880 def censorrevision(self, tr, censornode, tombstone=b''):
2887 def censorrevision(self, tr, censornode, tombstone=b''):
2881 if self._format_version == REVLOGV0:
2888 if self._format_version == REVLOGV0:
2882 raise error.RevlogError(
2889 raise error.RevlogError(
2883 _(b'cannot censor with version %d revlogs')
2890 _(b'cannot censor with version %d revlogs')
2884 % self._format_version
2891 % self._format_version
2885 )
2892 )
2886
2893
2887 censorrev = self.rev(censornode)
2894 censorrev = self.rev(censornode)
2888 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2895 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2889
2896
2890 if len(tombstone) > self.rawsize(censorrev):
2897 if len(tombstone) > self.rawsize(censorrev):
2891 raise error.Abort(
2898 raise error.Abort(
2892 _(b'censor tombstone must be no longer than censored data')
2899 _(b'censor tombstone must be no longer than censored data')
2893 )
2900 )
2894
2901
2895 # Rewriting the revlog in place is hard. Our strategy for censoring is
2902 # Rewriting the revlog in place is hard. Our strategy for censoring is
2896 # to create a new revlog, copy all revisions to it, then replace the
2903 # to create a new revlog, copy all revisions to it, then replace the
2897 # revlogs on transaction close.
2904 # revlogs on transaction close.
2898 #
2905 #
2899 # This is a bit dangerous. We could easily have a mismatch of state.
2906 # This is a bit dangerous. We could easily have a mismatch of state.
2900 newrl = revlog(
2907 newrl = revlog(
2901 self.opener,
2908 self.opener,
2902 target=self.target,
2909 target=self.target,
2903 radix=self.radix,
2910 radix=self.radix,
2904 postfix=b'tmpcensored',
2911 postfix=b'tmpcensored',
2905 censorable=True,
2912 censorable=True,
2906 )
2913 )
2907 newrl._format_version = self._format_version
2914 newrl._format_version = self._format_version
2908 newrl._format_flags = self._format_flags
2915 newrl._format_flags = self._format_flags
2909 newrl._generaldelta = self._generaldelta
2916 newrl._generaldelta = self._generaldelta
2910 newrl._parse_index = self._parse_index
2917 newrl._parse_index = self._parse_index
2911
2918
2912 for rev in self.revs():
2919 for rev in self.revs():
2913 node = self.node(rev)
2920 node = self.node(rev)
2914 p1, p2 = self.parents(node)
2921 p1, p2 = self.parents(node)
2915
2922
2916 if rev == censorrev:
2923 if rev == censorrev:
2917 newrl.addrawrevision(
2924 newrl.addrawrevision(
2918 tombstone,
2925 tombstone,
2919 tr,
2926 tr,
2920 self.linkrev(censorrev),
2927 self.linkrev(censorrev),
2921 p1,
2928 p1,
2922 p2,
2929 p2,
2923 censornode,
2930 censornode,
2924 REVIDX_ISCENSORED,
2931 REVIDX_ISCENSORED,
2925 )
2932 )
2926
2933
2927 if newrl.deltaparent(rev) != nullrev:
2934 if newrl.deltaparent(rev) != nullrev:
2928 raise error.Abort(
2935 raise error.Abort(
2929 _(
2936 _(
2930 b'censored revision stored as delta; '
2937 b'censored revision stored as delta; '
2931 b'cannot censor'
2938 b'cannot censor'
2932 ),
2939 ),
2933 hint=_(
2940 hint=_(
2934 b'censoring of revlogs is not '
2941 b'censoring of revlogs is not '
2935 b'fully implemented; please report '
2942 b'fully implemented; please report '
2936 b'this bug'
2943 b'this bug'
2937 ),
2944 ),
2938 )
2945 )
2939 continue
2946 continue
2940
2947
2941 if self.iscensored(rev):
2948 if self.iscensored(rev):
2942 if self.deltaparent(rev) != nullrev:
2949 if self.deltaparent(rev) != nullrev:
2943 raise error.Abort(
2950 raise error.Abort(
2944 _(
2951 _(
2945 b'cannot censor due to censored '
2952 b'cannot censor due to censored '
2946 b'revision having delta stored'
2953 b'revision having delta stored'
2947 )
2954 )
2948 )
2955 )
2949 rawtext = self._chunk(rev)
2956 rawtext = self._chunk(rev)
2950 else:
2957 else:
2951 rawtext = self.rawdata(rev)
2958 rawtext = self.rawdata(rev)
2952
2959
2953 newrl.addrawrevision(
2960 newrl.addrawrevision(
2954 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2961 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2955 )
2962 )
2956
2963
2957 tr.addbackup(self._indexfile, location=b'store')
2964 tr.addbackup(self._indexfile, location=b'store')
2958 if not self._inline:
2965 if not self._inline:
2959 tr.addbackup(self._datafile, location=b'store')
2966 tr.addbackup(self._datafile, location=b'store')
2960
2967
2961 self.opener.rename(newrl._indexfile, self._indexfile)
2968 self.opener.rename(newrl._indexfile, self._indexfile)
2962 if not self._inline:
2969 if not self._inline:
2963 self.opener.rename(newrl._datafile, self._datafile)
2970 self.opener.rename(newrl._datafile, self._datafile)
2964
2971
2965 self.clearcaches()
2972 self.clearcaches()
2966 self._loadindex()
2973 self._loadindex()
2967
2974
2968 def verifyintegrity(self, state):
2975 def verifyintegrity(self, state):
2969 """Verifies the integrity of the revlog.
2976 """Verifies the integrity of the revlog.
2970
2977
2971 Yields ``revlogproblem`` instances describing problems that are
2978 Yields ``revlogproblem`` instances describing problems that are
2972 found.
2979 found.
2973 """
2980 """
2974 dd, di = self.checksize()
2981 dd, di = self.checksize()
2975 if dd:
2982 if dd:
2976 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2983 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2977 if di:
2984 if di:
2978 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2985 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2979
2986
2980 version = self._format_version
2987 version = self._format_version
2981
2988
2982 # The verifier tells us what version revlog we should be.
2989 # The verifier tells us what version revlog we should be.
2983 if version != state[b'expectedversion']:
2990 if version != state[b'expectedversion']:
2984 yield revlogproblem(
2991 yield revlogproblem(
2985 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2992 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2986 % (self._indexfile, version, state[b'expectedversion'])
2993 % (self._indexfile, version, state[b'expectedversion'])
2987 )
2994 )
2988
2995
2989 state[b'skipread'] = set()
2996 state[b'skipread'] = set()
2990 state[b'safe_renamed'] = set()
2997 state[b'safe_renamed'] = set()
2991
2998
2992 for rev in self:
2999 for rev in self:
2993 node = self.node(rev)
3000 node = self.node(rev)
2994
3001
2995 # Verify contents. 4 cases to care about:
3002 # Verify contents. 4 cases to care about:
2996 #
3003 #
2997 # common: the most common case
3004 # common: the most common case
2998 # rename: with a rename
3005 # rename: with a rename
2999 # meta: file content starts with b'\1\n', the metadata
3006 # meta: file content starts with b'\1\n', the metadata
3000 # header defined in filelog.py, but without a rename
3007 # header defined in filelog.py, but without a rename
3001 # ext: content stored externally
3008 # ext: content stored externally
3002 #
3009 #
3003 # More formally, their differences are shown below:
3010 # More formally, their differences are shown below:
3004 #
3011 #
3005 # | common | rename | meta | ext
3012 # | common | rename | meta | ext
3006 # -------------------------------------------------------
3013 # -------------------------------------------------------
3007 # flags() | 0 | 0 | 0 | not 0
3014 # flags() | 0 | 0 | 0 | not 0
3008 # renamed() | False | True | False | ?
3015 # renamed() | False | True | False | ?
3009 # rawtext[0:2]=='\1\n'| False | True | True | ?
3016 # rawtext[0:2]=='\1\n'| False | True | True | ?
3010 #
3017 #
3011 # "rawtext" means the raw text stored in revlog data, which
3018 # "rawtext" means the raw text stored in revlog data, which
3012 # could be retrieved by "rawdata(rev)". "text"
3019 # could be retrieved by "rawdata(rev)". "text"
3013 # mentioned below is "revision(rev)".
3020 # mentioned below is "revision(rev)".
3014 #
3021 #
3015 # There are 3 different lengths stored physically:
3022 # There are 3 different lengths stored physically:
3016 # 1. L1: rawsize, stored in revlog index
3023 # 1. L1: rawsize, stored in revlog index
3017 # 2. L2: len(rawtext), stored in revlog data
3024 # 2. L2: len(rawtext), stored in revlog data
3018 # 3. L3: len(text), stored in revlog data if flags==0, or
3025 # 3. L3: len(text), stored in revlog data if flags==0, or
3019 # possibly somewhere else if flags!=0
3026 # possibly somewhere else if flags!=0
3020 #
3027 #
3021 # L1 should be equal to L2. L3 could be different from them.
3028 # L1 should be equal to L2. L3 could be different from them.
3022 # "text" may or may not affect commit hash depending on flag
3029 # "text" may or may not affect commit hash depending on flag
3023 # processors (see flagutil.addflagprocessor).
3030 # processors (see flagutil.addflagprocessor).
3024 #
3031 #
3025 # | common | rename | meta | ext
3032 # | common | rename | meta | ext
3026 # -------------------------------------------------
3033 # -------------------------------------------------
3027 # rawsize() | L1 | L1 | L1 | L1
3034 # rawsize() | L1 | L1 | L1 | L1
3028 # size() | L1 | L2-LM | L1(*) | L1 (?)
3035 # size() | L1 | L2-LM | L1(*) | L1 (?)
3029 # len(rawtext) | L2 | L2 | L2 | L2
3036 # len(rawtext) | L2 | L2 | L2 | L2
3030 # len(text) | L2 | L2 | L2 | L3
3037 # len(text) | L2 | L2 | L2 | L3
3031 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3038 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3032 #
3039 #
3033 # LM: length of metadata, depending on rawtext
3040 # LM: length of metadata, depending on rawtext
3034 # (*): not ideal, see comment in filelog.size
3041 # (*): not ideal, see comment in filelog.size
3035 # (?): could be "- len(meta)" if the resolved content has
3042 # (?): could be "- len(meta)" if the resolved content has
3036 # rename metadata
3043 # rename metadata
3037 #
3044 #
3038 # Checks needed to be done:
3045 # Checks needed to be done:
3039 # 1. length check: L1 == L2, in all cases.
3046 # 1. length check: L1 == L2, in all cases.
3040 # 2. hash check: depending on flag processor, we may need to
3047 # 2. hash check: depending on flag processor, we may need to
3041 # use either "text" (external), or "rawtext" (in revlog).
3048 # use either "text" (external), or "rawtext" (in revlog).
3042
3049
3043 try:
3050 try:
3044 skipflags = state.get(b'skipflags', 0)
3051 skipflags = state.get(b'skipflags', 0)
3045 if skipflags:
3052 if skipflags:
3046 skipflags &= self.flags(rev)
3053 skipflags &= self.flags(rev)
3047
3054
3048 _verify_revision(self, skipflags, state, node)
3055 _verify_revision(self, skipflags, state, node)
3049
3056
3050 l1 = self.rawsize(rev)
3057 l1 = self.rawsize(rev)
3051 l2 = len(self.rawdata(node))
3058 l2 = len(self.rawdata(node))
3052
3059
3053 if l1 != l2:
3060 if l1 != l2:
3054 yield revlogproblem(
3061 yield revlogproblem(
3055 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3062 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3056 node=node,
3063 node=node,
3057 )
3064 )
3058
3065
3059 except error.CensoredNodeError:
3066 except error.CensoredNodeError:
3060 if state[b'erroroncensored']:
3067 if state[b'erroroncensored']:
3061 yield revlogproblem(
3068 yield revlogproblem(
3062 error=_(b'censored file data'), node=node
3069 error=_(b'censored file data'), node=node
3063 )
3070 )
3064 state[b'skipread'].add(node)
3071 state[b'skipread'].add(node)
3065 except Exception as e:
3072 except Exception as e:
3066 yield revlogproblem(
3073 yield revlogproblem(
3067 error=_(b'unpacking %s: %s')
3074 error=_(b'unpacking %s: %s')
3068 % (short(node), stringutil.forcebytestr(e)),
3075 % (short(node), stringutil.forcebytestr(e)),
3069 node=node,
3076 node=node,
3070 )
3077 )
3071 state[b'skipread'].add(node)
3078 state[b'skipread'].add(node)
3072
3079
3073 def storageinfo(
3080 def storageinfo(
3074 self,
3081 self,
3075 exclusivefiles=False,
3082 exclusivefiles=False,
3076 sharedfiles=False,
3083 sharedfiles=False,
3077 revisionscount=False,
3084 revisionscount=False,
3078 trackedsize=False,
3085 trackedsize=False,
3079 storedsize=False,
3086 storedsize=False,
3080 ):
3087 ):
3081 d = {}
3088 d = {}
3082
3089
3083 if exclusivefiles:
3090 if exclusivefiles:
3084 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3091 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3085 if not self._inline:
3092 if not self._inline:
3086 d[b'exclusivefiles'].append((self.opener, self._datafile))
3093 d[b'exclusivefiles'].append((self.opener, self._datafile))
3087
3094
3088 if sharedfiles:
3095 if sharedfiles:
3089 d[b'sharedfiles'] = []
3096 d[b'sharedfiles'] = []
3090
3097
3091 if revisionscount:
3098 if revisionscount:
3092 d[b'revisionscount'] = len(self)
3099 d[b'revisionscount'] = len(self)
3093
3100
3094 if trackedsize:
3101 if trackedsize:
3095 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3102 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3096
3103
3097 if storedsize:
3104 if storedsize:
3098 d[b'storedsize'] = sum(
3105 d[b'storedsize'] = sum(
3099 self.opener.stat(path).st_size for path in self.files()
3106 self.opener.stat(path).st_size for path in self.files()
3100 )
3107 )
3101
3108
3102 return d
3109 return d
3103
3110
3104 def rewrite_sidedata(self, helpers, startrev, endrev):
3111 def rewrite_sidedata(self, helpers, startrev, endrev):
3105 if not self.hassidedata:
3112 if not self.hassidedata:
3106 return
3113 return
3107 # inline are not yet supported because they suffer from an issue when
3114 # inline are not yet supported because they suffer from an issue when
3108 # rewriting them (since it's not an append-only operation).
3115 # rewriting them (since it's not an append-only operation).
3109 # See issue6485.
3116 # See issue6485.
3110 assert not self._inline
3117 assert not self._inline
3111 if not helpers[1] and not helpers[2]:
3118 if not helpers[1] and not helpers[2]:
3112 # Nothing to generate or remove
3119 # Nothing to generate or remove
3113 return
3120 return
3114
3121
3115 # changelog implement some "delayed" writing mechanism that assume that
3122 # changelog implement some "delayed" writing mechanism that assume that
3116 # all index data is writen in append mode and is therefor incompatible
3123 # all index data is writen in append mode and is therefor incompatible
3117 # with the seeked write done in this method. The use of such "delayed"
3124 # with the seeked write done in this method. The use of such "delayed"
3118 # writing will soon be removed for revlog version that support side
3125 # writing will soon be removed for revlog version that support side
3119 # data, so for now, we only keep this simple assert to highlight the
3126 # data, so for now, we only keep this simple assert to highlight the
3120 # situation.
3127 # situation.
3121 delayed = getattr(self, '_delayed', False)
3128 delayed = getattr(self, '_delayed', False)
3122 diverted = getattr(self, '_divert', False)
3129 diverted = getattr(self, '_divert', False)
3123 if delayed and not diverted:
3130 if delayed and not diverted:
3124 msg = "cannot rewrite_sidedata of a delayed revlog"
3131 msg = "cannot rewrite_sidedata of a delayed revlog"
3125 raise error.ProgrammingError(msg)
3132 raise error.ProgrammingError(msg)
3126
3133
3127 new_entries = []
3134 new_entries = []
3128 # append the new sidedata
3135 # append the new sidedata
3129 with self._datafp(b'a+') as fp:
3136 with self._datafp(b'a+') as fp:
3130 # Maybe this bug still exists, see revlog._writeentry
3137 # Maybe this bug still exists, see revlog._writeentry
3131 fp.seek(0, os.SEEK_END)
3138 fp.seek(0, os.SEEK_END)
3132 current_offset = fp.tell()
3139 current_offset = fp.tell()
3133 for rev in range(startrev, endrev + 1):
3140 for rev in range(startrev, endrev + 1):
3134 entry = self.index[rev]
3141 entry = self.index[rev]
3135 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3142 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3136 store=self,
3143 store=self,
3137 sidedata_helpers=helpers,
3144 sidedata_helpers=helpers,
3138 sidedata={},
3145 sidedata={},
3139 rev=rev,
3146 rev=rev,
3140 )
3147 )
3141
3148
3142 serialized_sidedata = sidedatautil.serialize_sidedata(
3149 serialized_sidedata = sidedatautil.serialize_sidedata(
3143 new_sidedata
3150 new_sidedata
3144 )
3151 )
3145 if entry[8] != 0 or entry[9] != 0:
3152 if entry[8] != 0 or entry[9] != 0:
3146 # rewriting entries that already have sidedata is not
3153 # rewriting entries that already have sidedata is not
3147 # supported yet, because it introduces garbage data in the
3154 # supported yet, because it introduces garbage data in the
3148 # revlog.
3155 # revlog.
3149 msg = b"Rewriting existing sidedata is not supported yet"
3156 msg = b"Rewriting existing sidedata is not supported yet"
3150 raise error.Abort(msg)
3157 raise error.Abort(msg)
3151
3158
3152 # Apply (potential) flags to add and to remove after running
3159 # Apply (potential) flags to add and to remove after running
3153 # the sidedata helpers
3160 # the sidedata helpers
3154 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3161 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3155 entry = (new_offset_flags,) + entry[1:8]
3162 entry = (new_offset_flags,) + entry[1:8]
3156 entry += (current_offset, len(serialized_sidedata))
3163 entry += (current_offset, len(serialized_sidedata))
3157
3164
3158 fp.write(serialized_sidedata)
3165 fp.write(serialized_sidedata)
3159 new_entries.append(entry)
3166 new_entries.append(entry)
3160 current_offset += len(serialized_sidedata)
3167 current_offset += len(serialized_sidedata)
3161
3168
3162 # rewrite the new index entries
3169 # rewrite the new index entries
3163 with self._indexfp(b'r+') as fp:
3170 with self._indexfp(b'r+') as fp:
3164 fp.seek(startrev * self.index.entry_size)
3171 fp.seek(startrev * self.index.entry_size)
3165 for i, e in enumerate(new_entries):
3172 for i, e in enumerate(new_entries):
3166 rev = startrev + i
3173 rev = startrev + i
3167 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3174 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3168 packed = self.index.entry_binary(rev)
3175 packed = self.index.entry_binary(rev)
3169 if rev == 0:
3176 if rev == 0:
3170 header = self._format_flags | self._format_version
3177 header = self._format_flags | self._format_version
3171 header = self.index.pack_header(header)
3178 header = self.index.pack_header(header)
3172 packed = header + packed
3179 packed = header + packed
3173 fp.write(packed)
3180 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now