##// END OF EJS Templates
revlog: simplify a conditionnal in _enforceinlinesize...
marmoute -
r47938:88bd08a6 default
parent child Browse files
Show More
@@ -1,3175 +1,3173
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 if postfix is None:
318 if postfix is None:
319 indexfile = b'%s.i' % self.radix
319 indexfile = b'%s.i' % self.radix
320 datafile = b'%s.d' % self.radix
320 datafile = b'%s.d' % self.radix
321 elif postfix == b'a':
321 elif postfix == b'a':
322 indexfile = b'%s.i.a' % self.radix
322 indexfile = b'%s.i.a' % self.radix
323 datafile = b'%s.d' % self.radix
323 datafile = b'%s.d' % self.radix
324 else:
324 else:
325 indexfile = b'%s.i.%s' % (self.radix, postfix)
325 indexfile = b'%s.i.%s' % (self.radix, postfix)
326 datafile = b'%s.d.%s' % (self.radix, postfix)
326 datafile = b'%s.d.%s' % (self.radix, postfix)
327
327
328 self._indexfile = indexfile
328 self._indexfile = indexfile
329 self._datafile = datafile
329 self._datafile = datafile
330 self._nodemap_file = None
330 self._nodemap_file = None
331 self.postfix = postfix
331 self.postfix = postfix
332 self.opener = opener
332 self.opener = opener
333 if persistentnodemap:
333 if persistentnodemap:
334 self._nodemap_file = nodemaputil.get_nodemap_file(self)
334 self._nodemap_file = nodemaputil.get_nodemap_file(self)
335
335
336 assert target[0] in ALL_KINDS
336 assert target[0] in ALL_KINDS
337 assert len(target) == 2
337 assert len(target) == 2
338 self.target = target
338 self.target = target
339 # When True, indexfile is opened with checkambig=True at writing, to
339 # When True, indexfile is opened with checkambig=True at writing, to
340 # avoid file stat ambiguity.
340 # avoid file stat ambiguity.
341 self._checkambig = checkambig
341 self._checkambig = checkambig
342 self._mmaplargeindex = mmaplargeindex
342 self._mmaplargeindex = mmaplargeindex
343 self._censorable = censorable
343 self._censorable = censorable
344 # 3-tuple of (node, rev, text) for a raw revision.
344 # 3-tuple of (node, rev, text) for a raw revision.
345 self._revisioncache = None
345 self._revisioncache = None
346 # Maps rev to chain base rev.
346 # Maps rev to chain base rev.
347 self._chainbasecache = util.lrucachedict(100)
347 self._chainbasecache = util.lrucachedict(100)
348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
349 self._chunkcache = (0, b'')
349 self._chunkcache = (0, b'')
350 # How much data to read and cache into the raw revlog data cache.
350 # How much data to read and cache into the raw revlog data cache.
351 self._chunkcachesize = 65536
351 self._chunkcachesize = 65536
352 self._maxchainlen = None
352 self._maxchainlen = None
353 self._deltabothparents = True
353 self._deltabothparents = True
354 self.index = None
354 self.index = None
355 self._nodemap_docket = None
355 self._nodemap_docket = None
356 # Mapping of partial identifiers to full nodes.
356 # Mapping of partial identifiers to full nodes.
357 self._pcache = {}
357 self._pcache = {}
358 # Mapping of revision integer to full node.
358 # Mapping of revision integer to full node.
359 self._compengine = b'zlib'
359 self._compengine = b'zlib'
360 self._compengineopts = {}
360 self._compengineopts = {}
361 self._maxdeltachainspan = -1
361 self._maxdeltachainspan = -1
362 self._withsparseread = False
362 self._withsparseread = False
363 self._sparserevlog = False
363 self._sparserevlog = False
364 self._srdensitythreshold = 0.50
364 self._srdensitythreshold = 0.50
365 self._srmingapsize = 262144
365 self._srmingapsize = 262144
366
366
367 # Make copy of flag processors so each revlog instance can support
367 # Make copy of flag processors so each revlog instance can support
368 # custom flags.
368 # custom flags.
369 self._flagprocessors = dict(flagutil.flagprocessors)
369 self._flagprocessors = dict(flagutil.flagprocessors)
370
370
371 # 2-tuple of file handles being used for active writing.
371 # 2-tuple of file handles being used for active writing.
372 self._writinghandles = None
372 self._writinghandles = None
373
373
374 self._loadindex()
374 self._loadindex()
375
375
376 self._concurrencychecker = concurrencychecker
376 self._concurrencychecker = concurrencychecker
377
377
378 def _init_opts(self):
378 def _init_opts(self):
379 """process options (from above/config) to setup associated default revlog mode
379 """process options (from above/config) to setup associated default revlog mode
380
380
381 These values might be affected when actually reading on disk information.
381 These values might be affected when actually reading on disk information.
382
382
383 The relevant values are returned for use in _loadindex().
383 The relevant values are returned for use in _loadindex().
384
384
385 * newversionflags:
385 * newversionflags:
386 version header to use if we need to create a new revlog
386 version header to use if we need to create a new revlog
387
387
388 * mmapindexthreshold:
388 * mmapindexthreshold:
389 minimal index size for start to use mmap
389 minimal index size for start to use mmap
390
390
391 * force_nodemap:
391 * force_nodemap:
392 force the usage of a "development" version of the nodemap code
392 force the usage of a "development" version of the nodemap code
393 """
393 """
394 mmapindexthreshold = None
394 mmapindexthreshold = None
395 opts = self.opener.options
395 opts = self.opener.options
396
396
397 if b'revlogv2' in opts:
397 if b'revlogv2' in opts:
398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
399 elif b'revlogv1' in opts:
399 elif b'revlogv1' in opts:
400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
401 if b'generaldelta' in opts:
401 if b'generaldelta' in opts:
402 newversionflags |= FLAG_GENERALDELTA
402 newversionflags |= FLAG_GENERALDELTA
403 elif b'revlogv0' in self.opener.options:
403 elif b'revlogv0' in self.opener.options:
404 newversionflags = REVLOGV0
404 newversionflags = REVLOGV0
405 else:
405 else:
406 newversionflags = REVLOG_DEFAULT_VERSION
406 newversionflags = REVLOG_DEFAULT_VERSION
407
407
408 if b'chunkcachesize' in opts:
408 if b'chunkcachesize' in opts:
409 self._chunkcachesize = opts[b'chunkcachesize']
409 self._chunkcachesize = opts[b'chunkcachesize']
410 if b'maxchainlen' in opts:
410 if b'maxchainlen' in opts:
411 self._maxchainlen = opts[b'maxchainlen']
411 self._maxchainlen = opts[b'maxchainlen']
412 if b'deltabothparents' in opts:
412 if b'deltabothparents' in opts:
413 self._deltabothparents = opts[b'deltabothparents']
413 self._deltabothparents = opts[b'deltabothparents']
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydeltabase = False
415 self._lazydeltabase = False
416 if self._lazydelta:
416 if self._lazydelta:
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 if b'compengine' in opts:
418 if b'compengine' in opts:
419 self._compengine = opts[b'compengine']
419 self._compengine = opts[b'compengine']
420 if b'zlib.level' in opts:
420 if b'zlib.level' in opts:
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 if b'zstd.level' in opts:
422 if b'zstd.level' in opts:
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 if b'maxdeltachainspan' in opts:
424 if b'maxdeltachainspan' in opts:
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 mmapindexthreshold = opts[b'mmapindexthreshold']
427 mmapindexthreshold = opts[b'mmapindexthreshold']
428 self.hassidedata = bool(opts.get(b'side-data', False))
428 self.hassidedata = bool(opts.get(b'side-data', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
432 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
433 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439
439
440 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
441 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
442 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
443 ):
443 ):
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445
445
446 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
447 raise error.RevlogError(
447 raise error.RevlogError(
448 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
449 % self._chunkcachesize
449 % self._chunkcachesize
450 )
450 )
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 raise error.RevlogError(
452 raise error.RevlogError(
453 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
454 % self._chunkcachesize
454 % self._chunkcachesize
455 )
455 )
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 return newversionflags, mmapindexthreshold, force_nodemap
457 return newversionflags, mmapindexthreshold, force_nodemap
458
458
459 def _loadindex(self):
459 def _loadindex(self):
460
460
461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
462 indexdata = b''
462 indexdata = b''
463 self._initempty = True
463 self._initempty = True
464 try:
464 try:
465 with self._indexfp() as f:
465 with self._indexfp() as f:
466 if (
466 if (
467 mmapindexthreshold is not None
467 mmapindexthreshold is not None
468 and self.opener.fstat(f).st_size >= mmapindexthreshold
468 and self.opener.fstat(f).st_size >= mmapindexthreshold
469 ):
469 ):
470 # TODO: should .close() to release resources without
470 # TODO: should .close() to release resources without
471 # relying on Python GC
471 # relying on Python GC
472 indexdata = util.buffer(util.mmapread(f))
472 indexdata = util.buffer(util.mmapread(f))
473 else:
473 else:
474 indexdata = f.read()
474 indexdata = f.read()
475 if len(indexdata) > 0:
475 if len(indexdata) > 0:
476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
477 self._initempty = False
477 self._initempty = False
478 else:
478 else:
479 versionflags = newversionflags
479 versionflags = newversionflags
480 except IOError as inst:
480 except IOError as inst:
481 if inst.errno != errno.ENOENT:
481 if inst.errno != errno.ENOENT:
482 raise
482 raise
483
483
484 versionflags = newversionflags
484 versionflags = newversionflags
485
485
486 flags = self._format_flags = versionflags & ~0xFFFF
486 flags = self._format_flags = versionflags & ~0xFFFF
487 fmt = self._format_version = versionflags & 0xFFFF
487 fmt = self._format_version = versionflags & 0xFFFF
488
488
489 if fmt == REVLOGV0:
489 if fmt == REVLOGV0:
490 if flags:
490 if flags:
491 raise error.RevlogError(
491 raise error.RevlogError(
492 _(b'unknown flags (%#04x) in version %d revlog %s')
492 _(b'unknown flags (%#04x) in version %d revlog %s')
493 % (flags >> 16, fmt, self.display_id)
493 % (flags >> 16, fmt, self.display_id)
494 )
494 )
495
495
496 self._inline = False
496 self._inline = False
497 self._generaldelta = False
497 self._generaldelta = False
498
498
499 elif fmt == REVLOGV1:
499 elif fmt == REVLOGV1:
500 if flags & ~REVLOGV1_FLAGS:
500 if flags & ~REVLOGV1_FLAGS:
501 raise error.RevlogError(
501 raise error.RevlogError(
502 _(b'unknown flags (%#04x) in version %d revlog %s')
502 _(b'unknown flags (%#04x) in version %d revlog %s')
503 % (flags >> 16, fmt, self.display_id)
503 % (flags >> 16, fmt, self.display_id)
504 )
504 )
505
505
506 self._inline = versionflags & FLAG_INLINE_DATA
506 self._inline = versionflags & FLAG_INLINE_DATA
507 self._generaldelta = versionflags & FLAG_GENERALDELTA
507 self._generaldelta = versionflags & FLAG_GENERALDELTA
508
508
509 elif fmt == REVLOGV2:
509 elif fmt == REVLOGV2:
510 if flags & ~REVLOGV2_FLAGS:
510 if flags & ~REVLOGV2_FLAGS:
511 raise error.RevlogError(
511 raise error.RevlogError(
512 _(b'unknown flags (%#04x) in version %d revlog %s')
512 _(b'unknown flags (%#04x) in version %d revlog %s')
513 % (flags >> 16, fmt, self.display_id)
513 % (flags >> 16, fmt, self.display_id)
514 )
514 )
515
515
516 # There is a bug in the transaction handling when going from an
516 # There is a bug in the transaction handling when going from an
517 # inline revlog to a separate index and data file. Turn it off until
517 # inline revlog to a separate index and data file. Turn it off until
518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
519 # See issue6485
519 # See issue6485
520 self._inline = False
520 self._inline = False
521 # generaldelta implied by version 2 revlogs.
521 # generaldelta implied by version 2 revlogs.
522 self._generaldelta = True
522 self._generaldelta = True
523
523
524 else:
524 else:
525 raise error.RevlogError(
525 raise error.RevlogError(
526 _(b'unknown version (%d) in revlog %s') % (fmt, self.display_id)
526 _(b'unknown version (%d) in revlog %s') % (fmt, self.display_id)
527 )
527 )
528
528
529 self.nodeconstants = sha1nodeconstants
529 self.nodeconstants = sha1nodeconstants
530 self.nullid = self.nodeconstants.nullid
530 self.nullid = self.nodeconstants.nullid
531
531
532 # sparse-revlog can't be on without general-delta (issue6056)
532 # sparse-revlog can't be on without general-delta (issue6056)
533 if not self._generaldelta:
533 if not self._generaldelta:
534 self._sparserevlog = False
534 self._sparserevlog = False
535
535
536 self._storedeltachains = True
536 self._storedeltachains = True
537
537
538 devel_nodemap = (
538 devel_nodemap = (
539 self._nodemap_file
539 self._nodemap_file
540 and force_nodemap
540 and force_nodemap
541 and parse_index_v1_nodemap is not None
541 and parse_index_v1_nodemap is not None
542 )
542 )
543
543
544 use_rust_index = False
544 use_rust_index = False
545 if rustrevlog is not None:
545 if rustrevlog is not None:
546 if self._nodemap_file is not None:
546 if self._nodemap_file is not None:
547 use_rust_index = True
547 use_rust_index = True
548 else:
548 else:
549 use_rust_index = self.opener.options.get(b'rust.index')
549 use_rust_index = self.opener.options.get(b'rust.index')
550
550
551 self._parse_index = parse_index_v1
551 self._parse_index = parse_index_v1
552 if self._format_version == REVLOGV0:
552 if self._format_version == REVLOGV0:
553 self._parse_index = revlogv0.parse_index_v0
553 self._parse_index = revlogv0.parse_index_v0
554 elif fmt == REVLOGV2:
554 elif fmt == REVLOGV2:
555 self._parse_index = parse_index_v2
555 self._parse_index = parse_index_v2
556 elif devel_nodemap:
556 elif devel_nodemap:
557 self._parse_index = parse_index_v1_nodemap
557 self._parse_index = parse_index_v1_nodemap
558 elif use_rust_index:
558 elif use_rust_index:
559 self._parse_index = parse_index_v1_mixed
559 self._parse_index = parse_index_v1_mixed
560 try:
560 try:
561 d = self._parse_index(indexdata, self._inline)
561 d = self._parse_index(indexdata, self._inline)
562 index, _chunkcache = d
562 index, _chunkcache = d
563 use_nodemap = (
563 use_nodemap = (
564 not self._inline
564 not self._inline
565 and self._nodemap_file is not None
565 and self._nodemap_file is not None
566 and util.safehasattr(index, 'update_nodemap_data')
566 and util.safehasattr(index, 'update_nodemap_data')
567 )
567 )
568 if use_nodemap:
568 if use_nodemap:
569 nodemap_data = nodemaputil.persisted_data(self)
569 nodemap_data = nodemaputil.persisted_data(self)
570 if nodemap_data is not None:
570 if nodemap_data is not None:
571 docket = nodemap_data[0]
571 docket = nodemap_data[0]
572 if (
572 if (
573 len(d[0]) > docket.tip_rev
573 len(d[0]) > docket.tip_rev
574 and d[0][docket.tip_rev][7] == docket.tip_node
574 and d[0][docket.tip_rev][7] == docket.tip_node
575 ):
575 ):
576 # no changelog tampering
576 # no changelog tampering
577 self._nodemap_docket = docket
577 self._nodemap_docket = docket
578 index.update_nodemap_data(*nodemap_data)
578 index.update_nodemap_data(*nodemap_data)
579 except (ValueError, IndexError):
579 except (ValueError, IndexError):
580 raise error.RevlogError(
580 raise error.RevlogError(
581 _(b"index %s is corrupted") % self.display_id
581 _(b"index %s is corrupted") % self.display_id
582 )
582 )
583 self.index, self._chunkcache = d
583 self.index, self._chunkcache = d
584 if not self._chunkcache:
584 if not self._chunkcache:
585 self._chunkclear()
585 self._chunkclear()
586 # revnum -> (chain-length, sum-delta-length)
586 # revnum -> (chain-length, sum-delta-length)
587 self._chaininfocache = util.lrucachedict(500)
587 self._chaininfocache = util.lrucachedict(500)
588 # revlog header -> revlog compressor
588 # revlog header -> revlog compressor
589 self._decompressors = {}
589 self._decompressors = {}
590
590
591 @util.propertycache
591 @util.propertycache
592 def revlog_kind(self):
592 def revlog_kind(self):
593 return self.target[0]
593 return self.target[0]
594
594
595 @util.propertycache
595 @util.propertycache
596 def display_id(self):
596 def display_id(self):
597 """The public facing "ID" of the revlog that we use in message"""
597 """The public facing "ID" of the revlog that we use in message"""
598 # Maybe we should build a user facing representation of
598 # Maybe we should build a user facing representation of
599 # revlog.target instead of using `self.radix`
599 # revlog.target instead of using `self.radix`
600 return self.radix
600 return self.radix
601
601
602 @util.propertycache
602 @util.propertycache
603 def _compressor(self):
603 def _compressor(self):
604 engine = util.compengines[self._compengine]
604 engine = util.compengines[self._compengine]
605 return engine.revlogcompressor(self._compengineopts)
605 return engine.revlogcompressor(self._compengineopts)
606
606
607 def _indexfp(self, mode=b'r'):
607 def _indexfp(self, mode=b'r'):
608 """file object for the revlog's index file"""
608 """file object for the revlog's index file"""
609 args = {'mode': mode}
609 args = {'mode': mode}
610 if mode != b'r':
610 if mode != b'r':
611 args['checkambig'] = self._checkambig
611 args['checkambig'] = self._checkambig
612 if mode == b'w':
612 if mode == b'w':
613 args['atomictemp'] = True
613 args['atomictemp'] = True
614 return self.opener(self._indexfile, **args)
614 return self.opener(self._indexfile, **args)
615
615
616 def _datafp(self, mode=b'r'):
616 def _datafp(self, mode=b'r'):
617 """file object for the revlog's data file"""
617 """file object for the revlog's data file"""
618 return self.opener(self._datafile, mode=mode)
618 return self.opener(self._datafile, mode=mode)
619
619
620 @contextlib.contextmanager
620 @contextlib.contextmanager
621 def _datareadfp(self, existingfp=None):
621 def _datareadfp(self, existingfp=None):
622 """file object suitable to read data"""
622 """file object suitable to read data"""
623 # Use explicit file handle, if given.
623 # Use explicit file handle, if given.
624 if existingfp is not None:
624 if existingfp is not None:
625 yield existingfp
625 yield existingfp
626
626
627 # Use a file handle being actively used for writes, if available.
627 # Use a file handle being actively used for writes, if available.
628 # There is some danger to doing this because reads will seek the
628 # There is some danger to doing this because reads will seek the
629 # file. However, _writeentry() performs a SEEK_END before all writes,
629 # file. However, _writeentry() performs a SEEK_END before all writes,
630 # so we should be safe.
630 # so we should be safe.
631 elif self._writinghandles:
631 elif self._writinghandles:
632 if self._inline:
632 if self._inline:
633 yield self._writinghandles[0]
633 yield self._writinghandles[0]
634 else:
634 else:
635 yield self._writinghandles[1]
635 yield self._writinghandles[1]
636
636
637 # Otherwise open a new file handle.
637 # Otherwise open a new file handle.
638 else:
638 else:
639 if self._inline:
639 if self._inline:
640 func = self._indexfp
640 func = self._indexfp
641 else:
641 else:
642 func = self._datafp
642 func = self._datafp
643 with func() as fp:
643 with func() as fp:
644 yield fp
644 yield fp
645
645
646 def tiprev(self):
646 def tiprev(self):
647 return len(self.index) - 1
647 return len(self.index) - 1
648
648
649 def tip(self):
649 def tip(self):
650 return self.node(self.tiprev())
650 return self.node(self.tiprev())
651
651
652 def __contains__(self, rev):
652 def __contains__(self, rev):
653 return 0 <= rev < len(self)
653 return 0 <= rev < len(self)
654
654
655 def __len__(self):
655 def __len__(self):
656 return len(self.index)
656 return len(self.index)
657
657
658 def __iter__(self):
658 def __iter__(self):
659 return iter(pycompat.xrange(len(self)))
659 return iter(pycompat.xrange(len(self)))
660
660
661 def revs(self, start=0, stop=None):
661 def revs(self, start=0, stop=None):
662 """iterate over all rev in this revlog (from start to stop)"""
662 """iterate over all rev in this revlog (from start to stop)"""
663 return storageutil.iterrevs(len(self), start=start, stop=stop)
663 return storageutil.iterrevs(len(self), start=start, stop=stop)
664
664
665 @property
665 @property
666 def nodemap(self):
666 def nodemap(self):
667 msg = (
667 msg = (
668 b"revlog.nodemap is deprecated, "
668 b"revlog.nodemap is deprecated, "
669 b"use revlog.index.[has_node|rev|get_rev]"
669 b"use revlog.index.[has_node|rev|get_rev]"
670 )
670 )
671 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
671 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
672 return self.index.nodemap
672 return self.index.nodemap
673
673
674 @property
674 @property
675 def _nodecache(self):
675 def _nodecache(self):
676 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
676 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
678 return self.index.nodemap
678 return self.index.nodemap
679
679
680 def hasnode(self, node):
680 def hasnode(self, node):
681 try:
681 try:
682 self.rev(node)
682 self.rev(node)
683 return True
683 return True
684 except KeyError:
684 except KeyError:
685 return False
685 return False
686
686
687 def candelta(self, baserev, rev):
687 def candelta(self, baserev, rev):
688 """whether two revisions (baserev, rev) can be delta-ed or not"""
688 """whether two revisions (baserev, rev) can be delta-ed or not"""
689 # Disable delta if either rev requires a content-changing flag
689 # Disable delta if either rev requires a content-changing flag
690 # processor (ex. LFS). This is because such flag processor can alter
690 # processor (ex. LFS). This is because such flag processor can alter
691 # the rawtext content that the delta will be based on, and two clients
691 # the rawtext content that the delta will be based on, and two clients
692 # could have a same revlog node with different flags (i.e. different
692 # could have a same revlog node with different flags (i.e. different
693 # rawtext contents) and the delta could be incompatible.
693 # rawtext contents) and the delta could be incompatible.
694 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
694 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
695 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
695 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
696 ):
696 ):
697 return False
697 return False
698 return True
698 return True
699
699
700 def update_caches(self, transaction):
700 def update_caches(self, transaction):
701 if self._nodemap_file is not None:
701 if self._nodemap_file is not None:
702 if transaction is None:
702 if transaction is None:
703 nodemaputil.update_persistent_nodemap(self)
703 nodemaputil.update_persistent_nodemap(self)
704 else:
704 else:
705 nodemaputil.setup_persistent_nodemap(transaction, self)
705 nodemaputil.setup_persistent_nodemap(transaction, self)
706
706
707 def clearcaches(self):
707 def clearcaches(self):
708 self._revisioncache = None
708 self._revisioncache = None
709 self._chainbasecache.clear()
709 self._chainbasecache.clear()
710 self._chunkcache = (0, b'')
710 self._chunkcache = (0, b'')
711 self._pcache = {}
711 self._pcache = {}
712 self._nodemap_docket = None
712 self._nodemap_docket = None
713 self.index.clearcaches()
713 self.index.clearcaches()
714 # The python code is the one responsible for validating the docket, we
714 # The python code is the one responsible for validating the docket, we
715 # end up having to refresh it here.
715 # end up having to refresh it here.
716 use_nodemap = (
716 use_nodemap = (
717 not self._inline
717 not self._inline
718 and self._nodemap_file is not None
718 and self._nodemap_file is not None
719 and util.safehasattr(self.index, 'update_nodemap_data')
719 and util.safehasattr(self.index, 'update_nodemap_data')
720 )
720 )
721 if use_nodemap:
721 if use_nodemap:
722 nodemap_data = nodemaputil.persisted_data(self)
722 nodemap_data = nodemaputil.persisted_data(self)
723 if nodemap_data is not None:
723 if nodemap_data is not None:
724 self._nodemap_docket = nodemap_data[0]
724 self._nodemap_docket = nodemap_data[0]
725 self.index.update_nodemap_data(*nodemap_data)
725 self.index.update_nodemap_data(*nodemap_data)
726
726
727 def rev(self, node):
727 def rev(self, node):
728 try:
728 try:
729 return self.index.rev(node)
729 return self.index.rev(node)
730 except TypeError:
730 except TypeError:
731 raise
731 raise
732 except error.RevlogError:
732 except error.RevlogError:
733 # parsers.c radix tree lookup failed
733 # parsers.c radix tree lookup failed
734 if (
734 if (
735 node == self.nodeconstants.wdirid
735 node == self.nodeconstants.wdirid
736 or node in self.nodeconstants.wdirfilenodeids
736 or node in self.nodeconstants.wdirfilenodeids
737 ):
737 ):
738 raise error.WdirUnsupported
738 raise error.WdirUnsupported
739 raise error.LookupError(node, self.display_id, _(b'no node'))
739 raise error.LookupError(node, self.display_id, _(b'no node'))
740
740
741 # Accessors for index entries.
741 # Accessors for index entries.
742
742
743 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
743 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
744 # are flags.
744 # are flags.
745 def start(self, rev):
745 def start(self, rev):
746 return int(self.index[rev][0] >> 16)
746 return int(self.index[rev][0] >> 16)
747
747
748 def flags(self, rev):
748 def flags(self, rev):
749 return self.index[rev][0] & 0xFFFF
749 return self.index[rev][0] & 0xFFFF
750
750
751 def length(self, rev):
751 def length(self, rev):
752 return self.index[rev][1]
752 return self.index[rev][1]
753
753
754 def sidedata_length(self, rev):
754 def sidedata_length(self, rev):
755 if not self.hassidedata:
755 if not self.hassidedata:
756 return 0
756 return 0
757 return self.index[rev][9]
757 return self.index[rev][9]
758
758
759 def rawsize(self, rev):
759 def rawsize(self, rev):
760 """return the length of the uncompressed text for a given revision"""
760 """return the length of the uncompressed text for a given revision"""
761 l = self.index[rev][2]
761 l = self.index[rev][2]
762 if l >= 0:
762 if l >= 0:
763 return l
763 return l
764
764
765 t = self.rawdata(rev)
765 t = self.rawdata(rev)
766 return len(t)
766 return len(t)
767
767
768 def size(self, rev):
768 def size(self, rev):
769 """length of non-raw text (processed by a "read" flag processor)"""
769 """length of non-raw text (processed by a "read" flag processor)"""
770 # fast path: if no "read" flag processor could change the content,
770 # fast path: if no "read" flag processor could change the content,
771 # size is rawsize. note: ELLIPSIS is known to not change the content.
771 # size is rawsize. note: ELLIPSIS is known to not change the content.
772 flags = self.flags(rev)
772 flags = self.flags(rev)
773 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
773 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
774 return self.rawsize(rev)
774 return self.rawsize(rev)
775
775
776 return len(self.revision(rev, raw=False))
776 return len(self.revision(rev, raw=False))
777
777
778 def chainbase(self, rev):
778 def chainbase(self, rev):
779 base = self._chainbasecache.get(rev)
779 base = self._chainbasecache.get(rev)
780 if base is not None:
780 if base is not None:
781 return base
781 return base
782
782
783 index = self.index
783 index = self.index
784 iterrev = rev
784 iterrev = rev
785 base = index[iterrev][3]
785 base = index[iterrev][3]
786 while base != iterrev:
786 while base != iterrev:
787 iterrev = base
787 iterrev = base
788 base = index[iterrev][3]
788 base = index[iterrev][3]
789
789
790 self._chainbasecache[rev] = base
790 self._chainbasecache[rev] = base
791 return base
791 return base
792
792
793 def linkrev(self, rev):
793 def linkrev(self, rev):
794 return self.index[rev][4]
794 return self.index[rev][4]
795
795
796 def parentrevs(self, rev):
796 def parentrevs(self, rev):
797 try:
797 try:
798 entry = self.index[rev]
798 entry = self.index[rev]
799 except IndexError:
799 except IndexError:
800 if rev == wdirrev:
800 if rev == wdirrev:
801 raise error.WdirUnsupported
801 raise error.WdirUnsupported
802 raise
802 raise
803 if entry[5] == nullrev:
803 if entry[5] == nullrev:
804 return entry[6], entry[5]
804 return entry[6], entry[5]
805 else:
805 else:
806 return entry[5], entry[6]
806 return entry[5], entry[6]
807
807
808 # fast parentrevs(rev) where rev isn't filtered
808 # fast parentrevs(rev) where rev isn't filtered
809 _uncheckedparentrevs = parentrevs
809 _uncheckedparentrevs = parentrevs
810
810
811 def node(self, rev):
811 def node(self, rev):
812 try:
812 try:
813 return self.index[rev][7]
813 return self.index[rev][7]
814 except IndexError:
814 except IndexError:
815 if rev == wdirrev:
815 if rev == wdirrev:
816 raise error.WdirUnsupported
816 raise error.WdirUnsupported
817 raise
817 raise
818
818
819 # Derived from index values.
819 # Derived from index values.
820
820
821 def end(self, rev):
821 def end(self, rev):
822 return self.start(rev) + self.length(rev)
822 return self.start(rev) + self.length(rev)
823
823
824 def parents(self, node):
824 def parents(self, node):
825 i = self.index
825 i = self.index
826 d = i[self.rev(node)]
826 d = i[self.rev(node)]
827 # inline node() to avoid function call overhead
827 # inline node() to avoid function call overhead
828 if d[5] == self.nullid:
828 if d[5] == self.nullid:
829 return i[d[6]][7], i[d[5]][7]
829 return i[d[6]][7], i[d[5]][7]
830 else:
830 else:
831 return i[d[5]][7], i[d[6]][7]
831 return i[d[5]][7], i[d[6]][7]
832
832
833 def chainlen(self, rev):
833 def chainlen(self, rev):
834 return self._chaininfo(rev)[0]
834 return self._chaininfo(rev)[0]
835
835
836 def _chaininfo(self, rev):
836 def _chaininfo(self, rev):
837 chaininfocache = self._chaininfocache
837 chaininfocache = self._chaininfocache
838 if rev in chaininfocache:
838 if rev in chaininfocache:
839 return chaininfocache[rev]
839 return chaininfocache[rev]
840 index = self.index
840 index = self.index
841 generaldelta = self._generaldelta
841 generaldelta = self._generaldelta
842 iterrev = rev
842 iterrev = rev
843 e = index[iterrev]
843 e = index[iterrev]
844 clen = 0
844 clen = 0
845 compresseddeltalen = 0
845 compresseddeltalen = 0
846 while iterrev != e[3]:
846 while iterrev != e[3]:
847 clen += 1
847 clen += 1
848 compresseddeltalen += e[1]
848 compresseddeltalen += e[1]
849 if generaldelta:
849 if generaldelta:
850 iterrev = e[3]
850 iterrev = e[3]
851 else:
851 else:
852 iterrev -= 1
852 iterrev -= 1
853 if iterrev in chaininfocache:
853 if iterrev in chaininfocache:
854 t = chaininfocache[iterrev]
854 t = chaininfocache[iterrev]
855 clen += t[0]
855 clen += t[0]
856 compresseddeltalen += t[1]
856 compresseddeltalen += t[1]
857 break
857 break
858 e = index[iterrev]
858 e = index[iterrev]
859 else:
859 else:
860 # Add text length of base since decompressing that also takes
860 # Add text length of base since decompressing that also takes
861 # work. For cache hits the length is already included.
861 # work. For cache hits the length is already included.
862 compresseddeltalen += e[1]
862 compresseddeltalen += e[1]
863 r = (clen, compresseddeltalen)
863 r = (clen, compresseddeltalen)
864 chaininfocache[rev] = r
864 chaininfocache[rev] = r
865 return r
865 return r
866
866
867 def _deltachain(self, rev, stoprev=None):
867 def _deltachain(self, rev, stoprev=None):
868 """Obtain the delta chain for a revision.
868 """Obtain the delta chain for a revision.
869
869
870 ``stoprev`` specifies a revision to stop at. If not specified, we
870 ``stoprev`` specifies a revision to stop at. If not specified, we
871 stop at the base of the chain.
871 stop at the base of the chain.
872
872
873 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
873 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
874 revs in ascending order and ``stopped`` is a bool indicating whether
874 revs in ascending order and ``stopped`` is a bool indicating whether
875 ``stoprev`` was hit.
875 ``stoprev`` was hit.
876 """
876 """
877 # Try C implementation.
877 # Try C implementation.
878 try:
878 try:
879 return self.index.deltachain(rev, stoprev, self._generaldelta)
879 return self.index.deltachain(rev, stoprev, self._generaldelta)
880 except AttributeError:
880 except AttributeError:
881 pass
881 pass
882
882
883 chain = []
883 chain = []
884
884
885 # Alias to prevent attribute lookup in tight loop.
885 # Alias to prevent attribute lookup in tight loop.
886 index = self.index
886 index = self.index
887 generaldelta = self._generaldelta
887 generaldelta = self._generaldelta
888
888
889 iterrev = rev
889 iterrev = rev
890 e = index[iterrev]
890 e = index[iterrev]
891 while iterrev != e[3] and iterrev != stoprev:
891 while iterrev != e[3] and iterrev != stoprev:
892 chain.append(iterrev)
892 chain.append(iterrev)
893 if generaldelta:
893 if generaldelta:
894 iterrev = e[3]
894 iterrev = e[3]
895 else:
895 else:
896 iterrev -= 1
896 iterrev -= 1
897 e = index[iterrev]
897 e = index[iterrev]
898
898
899 if iterrev == stoprev:
899 if iterrev == stoprev:
900 stopped = True
900 stopped = True
901 else:
901 else:
902 chain.append(iterrev)
902 chain.append(iterrev)
903 stopped = False
903 stopped = False
904
904
905 chain.reverse()
905 chain.reverse()
906 return chain, stopped
906 return chain, stopped
907
907
908 def ancestors(self, revs, stoprev=0, inclusive=False):
908 def ancestors(self, revs, stoprev=0, inclusive=False):
909 """Generate the ancestors of 'revs' in reverse revision order.
909 """Generate the ancestors of 'revs' in reverse revision order.
910 Does not generate revs lower than stoprev.
910 Does not generate revs lower than stoprev.
911
911
912 See the documentation for ancestor.lazyancestors for more details."""
912 See the documentation for ancestor.lazyancestors for more details."""
913
913
914 # first, make sure start revisions aren't filtered
914 # first, make sure start revisions aren't filtered
915 revs = list(revs)
915 revs = list(revs)
916 checkrev = self.node
916 checkrev = self.node
917 for r in revs:
917 for r in revs:
918 checkrev(r)
918 checkrev(r)
919 # and we're sure ancestors aren't filtered as well
919 # and we're sure ancestors aren't filtered as well
920
920
921 if rustancestor is not None:
921 if rustancestor is not None:
922 lazyancestors = rustancestor.LazyAncestors
922 lazyancestors = rustancestor.LazyAncestors
923 arg = self.index
923 arg = self.index
924 else:
924 else:
925 lazyancestors = ancestor.lazyancestors
925 lazyancestors = ancestor.lazyancestors
926 arg = self._uncheckedparentrevs
926 arg = self._uncheckedparentrevs
927 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
927 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
928
928
929 def descendants(self, revs):
929 def descendants(self, revs):
930 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
930 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
931
931
932 def findcommonmissing(self, common=None, heads=None):
932 def findcommonmissing(self, common=None, heads=None):
933 """Return a tuple of the ancestors of common and the ancestors of heads
933 """Return a tuple of the ancestors of common and the ancestors of heads
934 that are not ancestors of common. In revset terminology, we return the
934 that are not ancestors of common. In revset terminology, we return the
935 tuple:
935 tuple:
936
936
937 ::common, (::heads) - (::common)
937 ::common, (::heads) - (::common)
938
938
939 The list is sorted by revision number, meaning it is
939 The list is sorted by revision number, meaning it is
940 topologically sorted.
940 topologically sorted.
941
941
942 'heads' and 'common' are both lists of node IDs. If heads is
942 'heads' and 'common' are both lists of node IDs. If heads is
943 not supplied, uses all of the revlog's heads. If common is not
943 not supplied, uses all of the revlog's heads. If common is not
944 supplied, uses nullid."""
944 supplied, uses nullid."""
945 if common is None:
945 if common is None:
946 common = [self.nullid]
946 common = [self.nullid]
947 if heads is None:
947 if heads is None:
948 heads = self.heads()
948 heads = self.heads()
949
949
950 common = [self.rev(n) for n in common]
950 common = [self.rev(n) for n in common]
951 heads = [self.rev(n) for n in heads]
951 heads = [self.rev(n) for n in heads]
952
952
953 # we want the ancestors, but inclusive
953 # we want the ancestors, but inclusive
954 class lazyset(object):
954 class lazyset(object):
955 def __init__(self, lazyvalues):
955 def __init__(self, lazyvalues):
956 self.addedvalues = set()
956 self.addedvalues = set()
957 self.lazyvalues = lazyvalues
957 self.lazyvalues = lazyvalues
958
958
959 def __contains__(self, value):
959 def __contains__(self, value):
960 return value in self.addedvalues or value in self.lazyvalues
960 return value in self.addedvalues or value in self.lazyvalues
961
961
962 def __iter__(self):
962 def __iter__(self):
963 added = self.addedvalues
963 added = self.addedvalues
964 for r in added:
964 for r in added:
965 yield r
965 yield r
966 for r in self.lazyvalues:
966 for r in self.lazyvalues:
967 if not r in added:
967 if not r in added:
968 yield r
968 yield r
969
969
970 def add(self, value):
970 def add(self, value):
971 self.addedvalues.add(value)
971 self.addedvalues.add(value)
972
972
973 def update(self, values):
973 def update(self, values):
974 self.addedvalues.update(values)
974 self.addedvalues.update(values)
975
975
976 has = lazyset(self.ancestors(common))
976 has = lazyset(self.ancestors(common))
977 has.add(nullrev)
977 has.add(nullrev)
978 has.update(common)
978 has.update(common)
979
979
980 # take all ancestors from heads that aren't in has
980 # take all ancestors from heads that aren't in has
981 missing = set()
981 missing = set()
982 visit = collections.deque(r for r in heads if r not in has)
982 visit = collections.deque(r for r in heads if r not in has)
983 while visit:
983 while visit:
984 r = visit.popleft()
984 r = visit.popleft()
985 if r in missing:
985 if r in missing:
986 continue
986 continue
987 else:
987 else:
988 missing.add(r)
988 missing.add(r)
989 for p in self.parentrevs(r):
989 for p in self.parentrevs(r):
990 if p not in has:
990 if p not in has:
991 visit.append(p)
991 visit.append(p)
992 missing = list(missing)
992 missing = list(missing)
993 missing.sort()
993 missing.sort()
994 return has, [self.node(miss) for miss in missing]
994 return has, [self.node(miss) for miss in missing]
995
995
996 def incrementalmissingrevs(self, common=None):
996 def incrementalmissingrevs(self, common=None):
997 """Return an object that can be used to incrementally compute the
997 """Return an object that can be used to incrementally compute the
998 revision numbers of the ancestors of arbitrary sets that are not
998 revision numbers of the ancestors of arbitrary sets that are not
999 ancestors of common. This is an ancestor.incrementalmissingancestors
999 ancestors of common. This is an ancestor.incrementalmissingancestors
1000 object.
1000 object.
1001
1001
1002 'common' is a list of revision numbers. If common is not supplied, uses
1002 'common' is a list of revision numbers. If common is not supplied, uses
1003 nullrev.
1003 nullrev.
1004 """
1004 """
1005 if common is None:
1005 if common is None:
1006 common = [nullrev]
1006 common = [nullrev]
1007
1007
1008 if rustancestor is not None:
1008 if rustancestor is not None:
1009 return rustancestor.MissingAncestors(self.index, common)
1009 return rustancestor.MissingAncestors(self.index, common)
1010 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1010 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1011
1011
1012 def findmissingrevs(self, common=None, heads=None):
1012 def findmissingrevs(self, common=None, heads=None):
1013 """Return the revision numbers of the ancestors of heads that
1013 """Return the revision numbers of the ancestors of heads that
1014 are not ancestors of common.
1014 are not ancestors of common.
1015
1015
1016 More specifically, return a list of revision numbers corresponding to
1016 More specifically, return a list of revision numbers corresponding to
1017 nodes N such that every N satisfies the following constraints:
1017 nodes N such that every N satisfies the following constraints:
1018
1018
1019 1. N is an ancestor of some node in 'heads'
1019 1. N is an ancestor of some node in 'heads'
1020 2. N is not an ancestor of any node in 'common'
1020 2. N is not an ancestor of any node in 'common'
1021
1021
1022 The list is sorted by revision number, meaning it is
1022 The list is sorted by revision number, meaning it is
1023 topologically sorted.
1023 topologically sorted.
1024
1024
1025 'heads' and 'common' are both lists of revision numbers. If heads is
1025 'heads' and 'common' are both lists of revision numbers. If heads is
1026 not supplied, uses all of the revlog's heads. If common is not
1026 not supplied, uses all of the revlog's heads. If common is not
1027 supplied, uses nullid."""
1027 supplied, uses nullid."""
1028 if common is None:
1028 if common is None:
1029 common = [nullrev]
1029 common = [nullrev]
1030 if heads is None:
1030 if heads is None:
1031 heads = self.headrevs()
1031 heads = self.headrevs()
1032
1032
1033 inc = self.incrementalmissingrevs(common=common)
1033 inc = self.incrementalmissingrevs(common=common)
1034 return inc.missingancestors(heads)
1034 return inc.missingancestors(heads)
1035
1035
1036 def findmissing(self, common=None, heads=None):
1036 def findmissing(self, common=None, heads=None):
1037 """Return the ancestors of heads that are not ancestors of common.
1037 """Return the ancestors of heads that are not ancestors of common.
1038
1038
1039 More specifically, return a list of nodes N such that every N
1039 More specifically, return a list of nodes N such that every N
1040 satisfies the following constraints:
1040 satisfies the following constraints:
1041
1041
1042 1. N is an ancestor of some node in 'heads'
1042 1. N is an ancestor of some node in 'heads'
1043 2. N is not an ancestor of any node in 'common'
1043 2. N is not an ancestor of any node in 'common'
1044
1044
1045 The list is sorted by revision number, meaning it is
1045 The list is sorted by revision number, meaning it is
1046 topologically sorted.
1046 topologically sorted.
1047
1047
1048 'heads' and 'common' are both lists of node IDs. If heads is
1048 'heads' and 'common' are both lists of node IDs. If heads is
1049 not supplied, uses all of the revlog's heads. If common is not
1049 not supplied, uses all of the revlog's heads. If common is not
1050 supplied, uses nullid."""
1050 supplied, uses nullid."""
1051 if common is None:
1051 if common is None:
1052 common = [self.nullid]
1052 common = [self.nullid]
1053 if heads is None:
1053 if heads is None:
1054 heads = self.heads()
1054 heads = self.heads()
1055
1055
1056 common = [self.rev(n) for n in common]
1056 common = [self.rev(n) for n in common]
1057 heads = [self.rev(n) for n in heads]
1057 heads = [self.rev(n) for n in heads]
1058
1058
1059 inc = self.incrementalmissingrevs(common=common)
1059 inc = self.incrementalmissingrevs(common=common)
1060 return [self.node(r) for r in inc.missingancestors(heads)]
1060 return [self.node(r) for r in inc.missingancestors(heads)]
1061
1061
1062 def nodesbetween(self, roots=None, heads=None):
1062 def nodesbetween(self, roots=None, heads=None):
1063 """Return a topological path from 'roots' to 'heads'.
1063 """Return a topological path from 'roots' to 'heads'.
1064
1064
1065 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1065 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1066 topologically sorted list of all nodes N that satisfy both of
1066 topologically sorted list of all nodes N that satisfy both of
1067 these constraints:
1067 these constraints:
1068
1068
1069 1. N is a descendant of some node in 'roots'
1069 1. N is a descendant of some node in 'roots'
1070 2. N is an ancestor of some node in 'heads'
1070 2. N is an ancestor of some node in 'heads'
1071
1071
1072 Every node is considered to be both a descendant and an ancestor
1072 Every node is considered to be both a descendant and an ancestor
1073 of itself, so every reachable node in 'roots' and 'heads' will be
1073 of itself, so every reachable node in 'roots' and 'heads' will be
1074 included in 'nodes'.
1074 included in 'nodes'.
1075
1075
1076 'outroots' is the list of reachable nodes in 'roots', i.e., the
1076 'outroots' is the list of reachable nodes in 'roots', i.e., the
1077 subset of 'roots' that is returned in 'nodes'. Likewise,
1077 subset of 'roots' that is returned in 'nodes'. Likewise,
1078 'outheads' is the subset of 'heads' that is also in 'nodes'.
1078 'outheads' is the subset of 'heads' that is also in 'nodes'.
1079
1079
1080 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1080 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1081 unspecified, uses nullid as the only root. If 'heads' is
1081 unspecified, uses nullid as the only root. If 'heads' is
1082 unspecified, uses list of all of the revlog's heads."""
1082 unspecified, uses list of all of the revlog's heads."""
1083 nonodes = ([], [], [])
1083 nonodes = ([], [], [])
1084 if roots is not None:
1084 if roots is not None:
1085 roots = list(roots)
1085 roots = list(roots)
1086 if not roots:
1086 if not roots:
1087 return nonodes
1087 return nonodes
1088 lowestrev = min([self.rev(n) for n in roots])
1088 lowestrev = min([self.rev(n) for n in roots])
1089 else:
1089 else:
1090 roots = [self.nullid] # Everybody's a descendant of nullid
1090 roots = [self.nullid] # Everybody's a descendant of nullid
1091 lowestrev = nullrev
1091 lowestrev = nullrev
1092 if (lowestrev == nullrev) and (heads is None):
1092 if (lowestrev == nullrev) and (heads is None):
1093 # We want _all_ the nodes!
1093 # We want _all_ the nodes!
1094 return (
1094 return (
1095 [self.node(r) for r in self],
1095 [self.node(r) for r in self],
1096 [self.nullid],
1096 [self.nullid],
1097 list(self.heads()),
1097 list(self.heads()),
1098 )
1098 )
1099 if heads is None:
1099 if heads is None:
1100 # All nodes are ancestors, so the latest ancestor is the last
1100 # All nodes are ancestors, so the latest ancestor is the last
1101 # node.
1101 # node.
1102 highestrev = len(self) - 1
1102 highestrev = len(self) - 1
1103 # Set ancestors to None to signal that every node is an ancestor.
1103 # Set ancestors to None to signal that every node is an ancestor.
1104 ancestors = None
1104 ancestors = None
1105 # Set heads to an empty dictionary for later discovery of heads
1105 # Set heads to an empty dictionary for later discovery of heads
1106 heads = {}
1106 heads = {}
1107 else:
1107 else:
1108 heads = list(heads)
1108 heads = list(heads)
1109 if not heads:
1109 if not heads:
1110 return nonodes
1110 return nonodes
1111 ancestors = set()
1111 ancestors = set()
1112 # Turn heads into a dictionary so we can remove 'fake' heads.
1112 # Turn heads into a dictionary so we can remove 'fake' heads.
1113 # Also, later we will be using it to filter out the heads we can't
1113 # Also, later we will be using it to filter out the heads we can't
1114 # find from roots.
1114 # find from roots.
1115 heads = dict.fromkeys(heads, False)
1115 heads = dict.fromkeys(heads, False)
1116 # Start at the top and keep marking parents until we're done.
1116 # Start at the top and keep marking parents until we're done.
1117 nodestotag = set(heads)
1117 nodestotag = set(heads)
1118 # Remember where the top was so we can use it as a limit later.
1118 # Remember where the top was so we can use it as a limit later.
1119 highestrev = max([self.rev(n) for n in nodestotag])
1119 highestrev = max([self.rev(n) for n in nodestotag])
1120 while nodestotag:
1120 while nodestotag:
1121 # grab a node to tag
1121 # grab a node to tag
1122 n = nodestotag.pop()
1122 n = nodestotag.pop()
1123 # Never tag nullid
1123 # Never tag nullid
1124 if n == self.nullid:
1124 if n == self.nullid:
1125 continue
1125 continue
1126 # A node's revision number represents its place in a
1126 # A node's revision number represents its place in a
1127 # topologically sorted list of nodes.
1127 # topologically sorted list of nodes.
1128 r = self.rev(n)
1128 r = self.rev(n)
1129 if r >= lowestrev:
1129 if r >= lowestrev:
1130 if n not in ancestors:
1130 if n not in ancestors:
1131 # If we are possibly a descendant of one of the roots
1131 # If we are possibly a descendant of one of the roots
1132 # and we haven't already been marked as an ancestor
1132 # and we haven't already been marked as an ancestor
1133 ancestors.add(n) # Mark as ancestor
1133 ancestors.add(n) # Mark as ancestor
1134 # Add non-nullid parents to list of nodes to tag.
1134 # Add non-nullid parents to list of nodes to tag.
1135 nodestotag.update(
1135 nodestotag.update(
1136 [p for p in self.parents(n) if p != self.nullid]
1136 [p for p in self.parents(n) if p != self.nullid]
1137 )
1137 )
1138 elif n in heads: # We've seen it before, is it a fake head?
1138 elif n in heads: # We've seen it before, is it a fake head?
1139 # So it is, real heads should not be the ancestors of
1139 # So it is, real heads should not be the ancestors of
1140 # any other heads.
1140 # any other heads.
1141 heads.pop(n)
1141 heads.pop(n)
1142 if not ancestors:
1142 if not ancestors:
1143 return nonodes
1143 return nonodes
1144 # Now that we have our set of ancestors, we want to remove any
1144 # Now that we have our set of ancestors, we want to remove any
1145 # roots that are not ancestors.
1145 # roots that are not ancestors.
1146
1146
1147 # If one of the roots was nullid, everything is included anyway.
1147 # If one of the roots was nullid, everything is included anyway.
1148 if lowestrev > nullrev:
1148 if lowestrev > nullrev:
1149 # But, since we weren't, let's recompute the lowest rev to not
1149 # But, since we weren't, let's recompute the lowest rev to not
1150 # include roots that aren't ancestors.
1150 # include roots that aren't ancestors.
1151
1151
1152 # Filter out roots that aren't ancestors of heads
1152 # Filter out roots that aren't ancestors of heads
1153 roots = [root for root in roots if root in ancestors]
1153 roots = [root for root in roots if root in ancestors]
1154 # Recompute the lowest revision
1154 # Recompute the lowest revision
1155 if roots:
1155 if roots:
1156 lowestrev = min([self.rev(root) for root in roots])
1156 lowestrev = min([self.rev(root) for root in roots])
1157 else:
1157 else:
1158 # No more roots? Return empty list
1158 # No more roots? Return empty list
1159 return nonodes
1159 return nonodes
1160 else:
1160 else:
1161 # We are descending from nullid, and don't need to care about
1161 # We are descending from nullid, and don't need to care about
1162 # any other roots.
1162 # any other roots.
1163 lowestrev = nullrev
1163 lowestrev = nullrev
1164 roots = [self.nullid]
1164 roots = [self.nullid]
1165 # Transform our roots list into a set.
1165 # Transform our roots list into a set.
1166 descendants = set(roots)
1166 descendants = set(roots)
1167 # Also, keep the original roots so we can filter out roots that aren't
1167 # Also, keep the original roots so we can filter out roots that aren't
1168 # 'real' roots (i.e. are descended from other roots).
1168 # 'real' roots (i.e. are descended from other roots).
1169 roots = descendants.copy()
1169 roots = descendants.copy()
1170 # Our topologically sorted list of output nodes.
1170 # Our topologically sorted list of output nodes.
1171 orderedout = []
1171 orderedout = []
1172 # Don't start at nullid since we don't want nullid in our output list,
1172 # Don't start at nullid since we don't want nullid in our output list,
1173 # and if nullid shows up in descendants, empty parents will look like
1173 # and if nullid shows up in descendants, empty parents will look like
1174 # they're descendants.
1174 # they're descendants.
1175 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1175 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1176 n = self.node(r)
1176 n = self.node(r)
1177 isdescendant = False
1177 isdescendant = False
1178 if lowestrev == nullrev: # Everybody is a descendant of nullid
1178 if lowestrev == nullrev: # Everybody is a descendant of nullid
1179 isdescendant = True
1179 isdescendant = True
1180 elif n in descendants:
1180 elif n in descendants:
1181 # n is already a descendant
1181 # n is already a descendant
1182 isdescendant = True
1182 isdescendant = True
1183 # This check only needs to be done here because all the roots
1183 # This check only needs to be done here because all the roots
1184 # will start being marked is descendants before the loop.
1184 # will start being marked is descendants before the loop.
1185 if n in roots:
1185 if n in roots:
1186 # If n was a root, check if it's a 'real' root.
1186 # If n was a root, check if it's a 'real' root.
1187 p = tuple(self.parents(n))
1187 p = tuple(self.parents(n))
1188 # If any of its parents are descendants, it's not a root.
1188 # If any of its parents are descendants, it's not a root.
1189 if (p[0] in descendants) or (p[1] in descendants):
1189 if (p[0] in descendants) or (p[1] in descendants):
1190 roots.remove(n)
1190 roots.remove(n)
1191 else:
1191 else:
1192 p = tuple(self.parents(n))
1192 p = tuple(self.parents(n))
1193 # A node is a descendant if either of its parents are
1193 # A node is a descendant if either of its parents are
1194 # descendants. (We seeded the dependents list with the roots
1194 # descendants. (We seeded the dependents list with the roots
1195 # up there, remember?)
1195 # up there, remember?)
1196 if (p[0] in descendants) or (p[1] in descendants):
1196 if (p[0] in descendants) or (p[1] in descendants):
1197 descendants.add(n)
1197 descendants.add(n)
1198 isdescendant = True
1198 isdescendant = True
1199 if isdescendant and ((ancestors is None) or (n in ancestors)):
1199 if isdescendant and ((ancestors is None) or (n in ancestors)):
1200 # Only include nodes that are both descendants and ancestors.
1200 # Only include nodes that are both descendants and ancestors.
1201 orderedout.append(n)
1201 orderedout.append(n)
1202 if (ancestors is not None) and (n in heads):
1202 if (ancestors is not None) and (n in heads):
1203 # We're trying to figure out which heads are reachable
1203 # We're trying to figure out which heads are reachable
1204 # from roots.
1204 # from roots.
1205 # Mark this head as having been reached
1205 # Mark this head as having been reached
1206 heads[n] = True
1206 heads[n] = True
1207 elif ancestors is None:
1207 elif ancestors is None:
1208 # Otherwise, we're trying to discover the heads.
1208 # Otherwise, we're trying to discover the heads.
1209 # Assume this is a head because if it isn't, the next step
1209 # Assume this is a head because if it isn't, the next step
1210 # will eventually remove it.
1210 # will eventually remove it.
1211 heads[n] = True
1211 heads[n] = True
1212 # But, obviously its parents aren't.
1212 # But, obviously its parents aren't.
1213 for p in self.parents(n):
1213 for p in self.parents(n):
1214 heads.pop(p, None)
1214 heads.pop(p, None)
1215 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1215 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1216 roots = list(roots)
1216 roots = list(roots)
1217 assert orderedout
1217 assert orderedout
1218 assert roots
1218 assert roots
1219 assert heads
1219 assert heads
1220 return (orderedout, roots, heads)
1220 return (orderedout, roots, heads)
1221
1221
1222 def headrevs(self, revs=None):
1222 def headrevs(self, revs=None):
1223 if revs is None:
1223 if revs is None:
1224 try:
1224 try:
1225 return self.index.headrevs()
1225 return self.index.headrevs()
1226 except AttributeError:
1226 except AttributeError:
1227 return self._headrevs()
1227 return self._headrevs()
1228 if rustdagop is not None:
1228 if rustdagop is not None:
1229 return rustdagop.headrevs(self.index, revs)
1229 return rustdagop.headrevs(self.index, revs)
1230 return dagop.headrevs(revs, self._uncheckedparentrevs)
1230 return dagop.headrevs(revs, self._uncheckedparentrevs)
1231
1231
1232 def computephases(self, roots):
1232 def computephases(self, roots):
1233 return self.index.computephasesmapsets(roots)
1233 return self.index.computephasesmapsets(roots)
1234
1234
1235 def _headrevs(self):
1235 def _headrevs(self):
1236 count = len(self)
1236 count = len(self)
1237 if not count:
1237 if not count:
1238 return [nullrev]
1238 return [nullrev]
1239 # we won't iter over filtered rev so nobody is a head at start
1239 # we won't iter over filtered rev so nobody is a head at start
1240 ishead = [0] * (count + 1)
1240 ishead = [0] * (count + 1)
1241 index = self.index
1241 index = self.index
1242 for r in self:
1242 for r in self:
1243 ishead[r] = 1 # I may be an head
1243 ishead[r] = 1 # I may be an head
1244 e = index[r]
1244 e = index[r]
1245 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1245 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1246 return [r for r, val in enumerate(ishead) if val]
1246 return [r for r, val in enumerate(ishead) if val]
1247
1247
1248 def heads(self, start=None, stop=None):
1248 def heads(self, start=None, stop=None):
1249 """return the list of all nodes that have no children
1249 """return the list of all nodes that have no children
1250
1250
1251 if start is specified, only heads that are descendants of
1251 if start is specified, only heads that are descendants of
1252 start will be returned
1252 start will be returned
1253 if stop is specified, it will consider all the revs from stop
1253 if stop is specified, it will consider all the revs from stop
1254 as if they had no children
1254 as if they had no children
1255 """
1255 """
1256 if start is None and stop is None:
1256 if start is None and stop is None:
1257 if not len(self):
1257 if not len(self):
1258 return [self.nullid]
1258 return [self.nullid]
1259 return [self.node(r) for r in self.headrevs()]
1259 return [self.node(r) for r in self.headrevs()]
1260
1260
1261 if start is None:
1261 if start is None:
1262 start = nullrev
1262 start = nullrev
1263 else:
1263 else:
1264 start = self.rev(start)
1264 start = self.rev(start)
1265
1265
1266 stoprevs = {self.rev(n) for n in stop or []}
1266 stoprevs = {self.rev(n) for n in stop or []}
1267
1267
1268 revs = dagop.headrevssubset(
1268 revs = dagop.headrevssubset(
1269 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1269 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1270 )
1270 )
1271
1271
1272 return [self.node(rev) for rev in revs]
1272 return [self.node(rev) for rev in revs]
1273
1273
1274 def children(self, node):
1274 def children(self, node):
1275 """find the children of a given node"""
1275 """find the children of a given node"""
1276 c = []
1276 c = []
1277 p = self.rev(node)
1277 p = self.rev(node)
1278 for r in self.revs(start=p + 1):
1278 for r in self.revs(start=p + 1):
1279 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1279 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1280 if prevs:
1280 if prevs:
1281 for pr in prevs:
1281 for pr in prevs:
1282 if pr == p:
1282 if pr == p:
1283 c.append(self.node(r))
1283 c.append(self.node(r))
1284 elif p == nullrev:
1284 elif p == nullrev:
1285 c.append(self.node(r))
1285 c.append(self.node(r))
1286 return c
1286 return c
1287
1287
1288 def commonancestorsheads(self, a, b):
1288 def commonancestorsheads(self, a, b):
1289 """calculate all the heads of the common ancestors of nodes a and b"""
1289 """calculate all the heads of the common ancestors of nodes a and b"""
1290 a, b = self.rev(a), self.rev(b)
1290 a, b = self.rev(a), self.rev(b)
1291 ancs = self._commonancestorsheads(a, b)
1291 ancs = self._commonancestorsheads(a, b)
1292 return pycompat.maplist(self.node, ancs)
1292 return pycompat.maplist(self.node, ancs)
1293
1293
1294 def _commonancestorsheads(self, *revs):
1294 def _commonancestorsheads(self, *revs):
1295 """calculate all the heads of the common ancestors of revs"""
1295 """calculate all the heads of the common ancestors of revs"""
1296 try:
1296 try:
1297 ancs = self.index.commonancestorsheads(*revs)
1297 ancs = self.index.commonancestorsheads(*revs)
1298 except (AttributeError, OverflowError): # C implementation failed
1298 except (AttributeError, OverflowError): # C implementation failed
1299 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1299 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1300 return ancs
1300 return ancs
1301
1301
1302 def isancestor(self, a, b):
1302 def isancestor(self, a, b):
1303 """return True if node a is an ancestor of node b
1303 """return True if node a is an ancestor of node b
1304
1304
1305 A revision is considered an ancestor of itself."""
1305 A revision is considered an ancestor of itself."""
1306 a, b = self.rev(a), self.rev(b)
1306 a, b = self.rev(a), self.rev(b)
1307 return self.isancestorrev(a, b)
1307 return self.isancestorrev(a, b)
1308
1308
1309 def isancestorrev(self, a, b):
1309 def isancestorrev(self, a, b):
1310 """return True if revision a is an ancestor of revision b
1310 """return True if revision a is an ancestor of revision b
1311
1311
1312 A revision is considered an ancestor of itself.
1312 A revision is considered an ancestor of itself.
1313
1313
1314 The implementation of this is trivial but the use of
1314 The implementation of this is trivial but the use of
1315 reachableroots is not."""
1315 reachableroots is not."""
1316 if a == nullrev:
1316 if a == nullrev:
1317 return True
1317 return True
1318 elif a == b:
1318 elif a == b:
1319 return True
1319 return True
1320 elif a > b:
1320 elif a > b:
1321 return False
1321 return False
1322 return bool(self.reachableroots(a, [b], [a], includepath=False))
1322 return bool(self.reachableroots(a, [b], [a], includepath=False))
1323
1323
1324 def reachableroots(self, minroot, heads, roots, includepath=False):
1324 def reachableroots(self, minroot, heads, roots, includepath=False):
1325 """return (heads(::(<roots> and <roots>::<heads>)))
1325 """return (heads(::(<roots> and <roots>::<heads>)))
1326
1326
1327 If includepath is True, return (<roots>::<heads>)."""
1327 If includepath is True, return (<roots>::<heads>)."""
1328 try:
1328 try:
1329 return self.index.reachableroots2(
1329 return self.index.reachableroots2(
1330 minroot, heads, roots, includepath
1330 minroot, heads, roots, includepath
1331 )
1331 )
1332 except AttributeError:
1332 except AttributeError:
1333 return dagop._reachablerootspure(
1333 return dagop._reachablerootspure(
1334 self.parentrevs, minroot, roots, heads, includepath
1334 self.parentrevs, minroot, roots, heads, includepath
1335 )
1335 )
1336
1336
1337 def ancestor(self, a, b):
1337 def ancestor(self, a, b):
1338 """calculate the "best" common ancestor of nodes a and b"""
1338 """calculate the "best" common ancestor of nodes a and b"""
1339
1339
1340 a, b = self.rev(a), self.rev(b)
1340 a, b = self.rev(a), self.rev(b)
1341 try:
1341 try:
1342 ancs = self.index.ancestors(a, b)
1342 ancs = self.index.ancestors(a, b)
1343 except (AttributeError, OverflowError):
1343 except (AttributeError, OverflowError):
1344 ancs = ancestor.ancestors(self.parentrevs, a, b)
1344 ancs = ancestor.ancestors(self.parentrevs, a, b)
1345 if ancs:
1345 if ancs:
1346 # choose a consistent winner when there's a tie
1346 # choose a consistent winner when there's a tie
1347 return min(map(self.node, ancs))
1347 return min(map(self.node, ancs))
1348 return self.nullid
1348 return self.nullid
1349
1349
1350 def _match(self, id):
1350 def _match(self, id):
1351 if isinstance(id, int):
1351 if isinstance(id, int):
1352 # rev
1352 # rev
1353 return self.node(id)
1353 return self.node(id)
1354 if len(id) == self.nodeconstants.nodelen:
1354 if len(id) == self.nodeconstants.nodelen:
1355 # possibly a binary node
1355 # possibly a binary node
1356 # odds of a binary node being all hex in ASCII are 1 in 10**25
1356 # odds of a binary node being all hex in ASCII are 1 in 10**25
1357 try:
1357 try:
1358 node = id
1358 node = id
1359 self.rev(node) # quick search the index
1359 self.rev(node) # quick search the index
1360 return node
1360 return node
1361 except error.LookupError:
1361 except error.LookupError:
1362 pass # may be partial hex id
1362 pass # may be partial hex id
1363 try:
1363 try:
1364 # str(rev)
1364 # str(rev)
1365 rev = int(id)
1365 rev = int(id)
1366 if b"%d" % rev != id:
1366 if b"%d" % rev != id:
1367 raise ValueError
1367 raise ValueError
1368 if rev < 0:
1368 if rev < 0:
1369 rev = len(self) + rev
1369 rev = len(self) + rev
1370 if rev < 0 or rev >= len(self):
1370 if rev < 0 or rev >= len(self):
1371 raise ValueError
1371 raise ValueError
1372 return self.node(rev)
1372 return self.node(rev)
1373 except (ValueError, OverflowError):
1373 except (ValueError, OverflowError):
1374 pass
1374 pass
1375 if len(id) == 2 * self.nodeconstants.nodelen:
1375 if len(id) == 2 * self.nodeconstants.nodelen:
1376 try:
1376 try:
1377 # a full hex nodeid?
1377 # a full hex nodeid?
1378 node = bin(id)
1378 node = bin(id)
1379 self.rev(node)
1379 self.rev(node)
1380 return node
1380 return node
1381 except (TypeError, error.LookupError):
1381 except (TypeError, error.LookupError):
1382 pass
1382 pass
1383
1383
1384 def _partialmatch(self, id):
1384 def _partialmatch(self, id):
1385 # we don't care wdirfilenodeids as they should be always full hash
1385 # we don't care wdirfilenodeids as they should be always full hash
1386 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1386 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1387 try:
1387 try:
1388 partial = self.index.partialmatch(id)
1388 partial = self.index.partialmatch(id)
1389 if partial and self.hasnode(partial):
1389 if partial and self.hasnode(partial):
1390 if maybewdir:
1390 if maybewdir:
1391 # single 'ff...' match in radix tree, ambiguous with wdir
1391 # single 'ff...' match in radix tree, ambiguous with wdir
1392 raise error.RevlogError
1392 raise error.RevlogError
1393 return partial
1393 return partial
1394 if maybewdir:
1394 if maybewdir:
1395 # no 'ff...' match in radix tree, wdir identified
1395 # no 'ff...' match in radix tree, wdir identified
1396 raise error.WdirUnsupported
1396 raise error.WdirUnsupported
1397 return None
1397 return None
1398 except error.RevlogError:
1398 except error.RevlogError:
1399 # parsers.c radix tree lookup gave multiple matches
1399 # parsers.c radix tree lookup gave multiple matches
1400 # fast path: for unfiltered changelog, radix tree is accurate
1400 # fast path: for unfiltered changelog, radix tree is accurate
1401 if not getattr(self, 'filteredrevs', None):
1401 if not getattr(self, 'filteredrevs', None):
1402 raise error.AmbiguousPrefixLookupError(
1402 raise error.AmbiguousPrefixLookupError(
1403 id, self.display_id, _(b'ambiguous identifier')
1403 id, self.display_id, _(b'ambiguous identifier')
1404 )
1404 )
1405 # fall through to slow path that filters hidden revisions
1405 # fall through to slow path that filters hidden revisions
1406 except (AttributeError, ValueError):
1406 except (AttributeError, ValueError):
1407 # we are pure python, or key was too short to search radix tree
1407 # we are pure python, or key was too short to search radix tree
1408 pass
1408 pass
1409
1409
1410 if id in self._pcache:
1410 if id in self._pcache:
1411 return self._pcache[id]
1411 return self._pcache[id]
1412
1412
1413 if len(id) <= 40:
1413 if len(id) <= 40:
1414 try:
1414 try:
1415 # hex(node)[:...]
1415 # hex(node)[:...]
1416 l = len(id) // 2 # grab an even number of digits
1416 l = len(id) // 2 # grab an even number of digits
1417 prefix = bin(id[: l * 2])
1417 prefix = bin(id[: l * 2])
1418 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1418 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1419 nl = [
1419 nl = [
1420 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1420 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1421 ]
1421 ]
1422 if self.nodeconstants.nullhex.startswith(id):
1422 if self.nodeconstants.nullhex.startswith(id):
1423 nl.append(self.nullid)
1423 nl.append(self.nullid)
1424 if len(nl) > 0:
1424 if len(nl) > 0:
1425 if len(nl) == 1 and not maybewdir:
1425 if len(nl) == 1 and not maybewdir:
1426 self._pcache[id] = nl[0]
1426 self._pcache[id] = nl[0]
1427 return nl[0]
1427 return nl[0]
1428 raise error.AmbiguousPrefixLookupError(
1428 raise error.AmbiguousPrefixLookupError(
1429 id, self.display_id, _(b'ambiguous identifier')
1429 id, self.display_id, _(b'ambiguous identifier')
1430 )
1430 )
1431 if maybewdir:
1431 if maybewdir:
1432 raise error.WdirUnsupported
1432 raise error.WdirUnsupported
1433 return None
1433 return None
1434 except TypeError:
1434 except TypeError:
1435 pass
1435 pass
1436
1436
1437 def lookup(self, id):
1437 def lookup(self, id):
1438 """locate a node based on:
1438 """locate a node based on:
1439 - revision number or str(revision number)
1439 - revision number or str(revision number)
1440 - nodeid or subset of hex nodeid
1440 - nodeid or subset of hex nodeid
1441 """
1441 """
1442 n = self._match(id)
1442 n = self._match(id)
1443 if n is not None:
1443 if n is not None:
1444 return n
1444 return n
1445 n = self._partialmatch(id)
1445 n = self._partialmatch(id)
1446 if n:
1446 if n:
1447 return n
1447 return n
1448
1448
1449 raise error.LookupError(id, self.display_id, _(b'no match found'))
1449 raise error.LookupError(id, self.display_id, _(b'no match found'))
1450
1450
1451 def shortest(self, node, minlength=1):
1451 def shortest(self, node, minlength=1):
1452 """Find the shortest unambiguous prefix that matches node."""
1452 """Find the shortest unambiguous prefix that matches node."""
1453
1453
1454 def isvalid(prefix):
1454 def isvalid(prefix):
1455 try:
1455 try:
1456 matchednode = self._partialmatch(prefix)
1456 matchednode = self._partialmatch(prefix)
1457 except error.AmbiguousPrefixLookupError:
1457 except error.AmbiguousPrefixLookupError:
1458 return False
1458 return False
1459 except error.WdirUnsupported:
1459 except error.WdirUnsupported:
1460 # single 'ff...' match
1460 # single 'ff...' match
1461 return True
1461 return True
1462 if matchednode is None:
1462 if matchednode is None:
1463 raise error.LookupError(node, self.display_id, _(b'no node'))
1463 raise error.LookupError(node, self.display_id, _(b'no node'))
1464 return True
1464 return True
1465
1465
1466 def maybewdir(prefix):
1466 def maybewdir(prefix):
1467 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1467 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1468
1468
1469 hexnode = hex(node)
1469 hexnode = hex(node)
1470
1470
1471 def disambiguate(hexnode, minlength):
1471 def disambiguate(hexnode, minlength):
1472 """Disambiguate against wdirid."""
1472 """Disambiguate against wdirid."""
1473 for length in range(minlength, len(hexnode) + 1):
1473 for length in range(minlength, len(hexnode) + 1):
1474 prefix = hexnode[:length]
1474 prefix = hexnode[:length]
1475 if not maybewdir(prefix):
1475 if not maybewdir(prefix):
1476 return prefix
1476 return prefix
1477
1477
1478 if not getattr(self, 'filteredrevs', None):
1478 if not getattr(self, 'filteredrevs', None):
1479 try:
1479 try:
1480 length = max(self.index.shortest(node), minlength)
1480 length = max(self.index.shortest(node), minlength)
1481 return disambiguate(hexnode, length)
1481 return disambiguate(hexnode, length)
1482 except error.RevlogError:
1482 except error.RevlogError:
1483 if node != self.nodeconstants.wdirid:
1483 if node != self.nodeconstants.wdirid:
1484 raise error.LookupError(
1484 raise error.LookupError(
1485 node, self.display_id, _(b'no node')
1485 node, self.display_id, _(b'no node')
1486 )
1486 )
1487 except AttributeError:
1487 except AttributeError:
1488 # Fall through to pure code
1488 # Fall through to pure code
1489 pass
1489 pass
1490
1490
1491 if node == self.nodeconstants.wdirid:
1491 if node == self.nodeconstants.wdirid:
1492 for length in range(minlength, len(hexnode) + 1):
1492 for length in range(minlength, len(hexnode) + 1):
1493 prefix = hexnode[:length]
1493 prefix = hexnode[:length]
1494 if isvalid(prefix):
1494 if isvalid(prefix):
1495 return prefix
1495 return prefix
1496
1496
1497 for length in range(minlength, len(hexnode) + 1):
1497 for length in range(minlength, len(hexnode) + 1):
1498 prefix = hexnode[:length]
1498 prefix = hexnode[:length]
1499 if isvalid(prefix):
1499 if isvalid(prefix):
1500 return disambiguate(hexnode, length)
1500 return disambiguate(hexnode, length)
1501
1501
1502 def cmp(self, node, text):
1502 def cmp(self, node, text):
1503 """compare text with a given file revision
1503 """compare text with a given file revision
1504
1504
1505 returns True if text is different than what is stored.
1505 returns True if text is different than what is stored.
1506 """
1506 """
1507 p1, p2 = self.parents(node)
1507 p1, p2 = self.parents(node)
1508 return storageutil.hashrevisionsha1(text, p1, p2) != node
1508 return storageutil.hashrevisionsha1(text, p1, p2) != node
1509
1509
1510 def _cachesegment(self, offset, data):
1510 def _cachesegment(self, offset, data):
1511 """Add a segment to the revlog cache.
1511 """Add a segment to the revlog cache.
1512
1512
1513 Accepts an absolute offset and the data that is at that location.
1513 Accepts an absolute offset and the data that is at that location.
1514 """
1514 """
1515 o, d = self._chunkcache
1515 o, d = self._chunkcache
1516 # try to add to existing cache
1516 # try to add to existing cache
1517 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1517 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1518 self._chunkcache = o, d + data
1518 self._chunkcache = o, d + data
1519 else:
1519 else:
1520 self._chunkcache = offset, data
1520 self._chunkcache = offset, data
1521
1521
1522 def _readsegment(self, offset, length, df=None):
1522 def _readsegment(self, offset, length, df=None):
1523 """Load a segment of raw data from the revlog.
1523 """Load a segment of raw data from the revlog.
1524
1524
1525 Accepts an absolute offset, length to read, and an optional existing
1525 Accepts an absolute offset, length to read, and an optional existing
1526 file handle to read from.
1526 file handle to read from.
1527
1527
1528 If an existing file handle is passed, it will be seeked and the
1528 If an existing file handle is passed, it will be seeked and the
1529 original seek position will NOT be restored.
1529 original seek position will NOT be restored.
1530
1530
1531 Returns a str or buffer of raw byte data.
1531 Returns a str or buffer of raw byte data.
1532
1532
1533 Raises if the requested number of bytes could not be read.
1533 Raises if the requested number of bytes could not be read.
1534 """
1534 """
1535 # Cache data both forward and backward around the requested
1535 # Cache data both forward and backward around the requested
1536 # data, in a fixed size window. This helps speed up operations
1536 # data, in a fixed size window. This helps speed up operations
1537 # involving reading the revlog backwards.
1537 # involving reading the revlog backwards.
1538 cachesize = self._chunkcachesize
1538 cachesize = self._chunkcachesize
1539 realoffset = offset & ~(cachesize - 1)
1539 realoffset = offset & ~(cachesize - 1)
1540 reallength = (
1540 reallength = (
1541 (offset + length + cachesize) & ~(cachesize - 1)
1541 (offset + length + cachesize) & ~(cachesize - 1)
1542 ) - realoffset
1542 ) - realoffset
1543 with self._datareadfp(df) as df:
1543 with self._datareadfp(df) as df:
1544 df.seek(realoffset)
1544 df.seek(realoffset)
1545 d = df.read(reallength)
1545 d = df.read(reallength)
1546
1546
1547 self._cachesegment(realoffset, d)
1547 self._cachesegment(realoffset, d)
1548 if offset != realoffset or reallength != length:
1548 if offset != realoffset or reallength != length:
1549 startoffset = offset - realoffset
1549 startoffset = offset - realoffset
1550 if len(d) - startoffset < length:
1550 if len(d) - startoffset < length:
1551 raise error.RevlogError(
1551 raise error.RevlogError(
1552 _(
1552 _(
1553 b'partial read of revlog %s; expected %d bytes from '
1553 b'partial read of revlog %s; expected %d bytes from '
1554 b'offset %d, got %d'
1554 b'offset %d, got %d'
1555 )
1555 )
1556 % (
1556 % (
1557 self._indexfile if self._inline else self._datafile,
1557 self._indexfile if self._inline else self._datafile,
1558 length,
1558 length,
1559 offset,
1559 offset,
1560 len(d) - startoffset,
1560 len(d) - startoffset,
1561 )
1561 )
1562 )
1562 )
1563
1563
1564 return util.buffer(d, startoffset, length)
1564 return util.buffer(d, startoffset, length)
1565
1565
1566 if len(d) < length:
1566 if len(d) < length:
1567 raise error.RevlogError(
1567 raise error.RevlogError(
1568 _(
1568 _(
1569 b'partial read of revlog %s; expected %d bytes from offset '
1569 b'partial read of revlog %s; expected %d bytes from offset '
1570 b'%d, got %d'
1570 b'%d, got %d'
1571 )
1571 )
1572 % (
1572 % (
1573 self._indexfile if self._inline else self._datafile,
1573 self._indexfile if self._inline else self._datafile,
1574 length,
1574 length,
1575 offset,
1575 offset,
1576 len(d),
1576 len(d),
1577 )
1577 )
1578 )
1578 )
1579
1579
1580 return d
1580 return d
1581
1581
1582 def _getsegment(self, offset, length, df=None):
1582 def _getsegment(self, offset, length, df=None):
1583 """Obtain a segment of raw data from the revlog.
1583 """Obtain a segment of raw data from the revlog.
1584
1584
1585 Accepts an absolute offset, length of bytes to obtain, and an
1585 Accepts an absolute offset, length of bytes to obtain, and an
1586 optional file handle to the already-opened revlog. If the file
1586 optional file handle to the already-opened revlog. If the file
1587 handle is used, it's original seek position will not be preserved.
1587 handle is used, it's original seek position will not be preserved.
1588
1588
1589 Requests for data may be returned from a cache.
1589 Requests for data may be returned from a cache.
1590
1590
1591 Returns a str or a buffer instance of raw byte data.
1591 Returns a str or a buffer instance of raw byte data.
1592 """
1592 """
1593 o, d = self._chunkcache
1593 o, d = self._chunkcache
1594 l = len(d)
1594 l = len(d)
1595
1595
1596 # is it in the cache?
1596 # is it in the cache?
1597 cachestart = offset - o
1597 cachestart = offset - o
1598 cacheend = cachestart + length
1598 cacheend = cachestart + length
1599 if cachestart >= 0 and cacheend <= l:
1599 if cachestart >= 0 and cacheend <= l:
1600 if cachestart == 0 and cacheend == l:
1600 if cachestart == 0 and cacheend == l:
1601 return d # avoid a copy
1601 return d # avoid a copy
1602 return util.buffer(d, cachestart, cacheend - cachestart)
1602 return util.buffer(d, cachestart, cacheend - cachestart)
1603
1603
1604 return self._readsegment(offset, length, df=df)
1604 return self._readsegment(offset, length, df=df)
1605
1605
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1608
1608
1609 Accepts the start and end revisions and an optional already-open
1609 Accepts the start and end revisions and an optional already-open
1610 file handle to be used for reading. If the file handle is read, its
1610 file handle to be used for reading. If the file handle is read, its
1611 seek position will not be preserved.
1611 seek position will not be preserved.
1612
1612
1613 Requests for data may be satisfied by a cache.
1613 Requests for data may be satisfied by a cache.
1614
1614
1615 Returns a 2-tuple of (offset, data) for the requested range of
1615 Returns a 2-tuple of (offset, data) for the requested range of
1616 revisions. Offset is the integer offset from the beginning of the
1616 revisions. Offset is the integer offset from the beginning of the
1617 revlog and data is a str or buffer of the raw byte data.
1617 revlog and data is a str or buffer of the raw byte data.
1618
1618
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1620 to determine where each revision's data begins and ends.
1620 to determine where each revision's data begins and ends.
1621 """
1621 """
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1623 # (functions are expensive).
1623 # (functions are expensive).
1624 index = self.index
1624 index = self.index
1625 istart = index[startrev]
1625 istart = index[startrev]
1626 start = int(istart[0] >> 16)
1626 start = int(istart[0] >> 16)
1627 if startrev == endrev:
1627 if startrev == endrev:
1628 end = start + istart[1]
1628 end = start + istart[1]
1629 else:
1629 else:
1630 iend = index[endrev]
1630 iend = index[endrev]
1631 end = int(iend[0] >> 16) + iend[1]
1631 end = int(iend[0] >> 16) + iend[1]
1632
1632
1633 if self._inline:
1633 if self._inline:
1634 start += (startrev + 1) * self.index.entry_size
1634 start += (startrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1636 length = end - start
1636 length = end - start
1637
1637
1638 return start, self._getsegment(start, length, df=df)
1638 return start, self._getsegment(start, length, df=df)
1639
1639
1640 def _chunk(self, rev, df=None):
1640 def _chunk(self, rev, df=None):
1641 """Obtain a single decompressed chunk for a revision.
1641 """Obtain a single decompressed chunk for a revision.
1642
1642
1643 Accepts an integer revision and an optional already-open file handle
1643 Accepts an integer revision and an optional already-open file handle
1644 to be used for reading. If used, the seek position of the file will not
1644 to be used for reading. If used, the seek position of the file will not
1645 be preserved.
1645 be preserved.
1646
1646
1647 Returns a str holding uncompressed data for the requested revision.
1647 Returns a str holding uncompressed data for the requested revision.
1648 """
1648 """
1649 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1649 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1650
1650
1651 def _chunks(self, revs, df=None, targetsize=None):
1651 def _chunks(self, revs, df=None, targetsize=None):
1652 """Obtain decompressed chunks for the specified revisions.
1652 """Obtain decompressed chunks for the specified revisions.
1653
1653
1654 Accepts an iterable of numeric revisions that are assumed to be in
1654 Accepts an iterable of numeric revisions that are assumed to be in
1655 ascending order. Also accepts an optional already-open file handle
1655 ascending order. Also accepts an optional already-open file handle
1656 to be used for reading. If used, the seek position of the file will
1656 to be used for reading. If used, the seek position of the file will
1657 not be preserved.
1657 not be preserved.
1658
1658
1659 This function is similar to calling ``self._chunk()`` multiple times,
1659 This function is similar to calling ``self._chunk()`` multiple times,
1660 but is faster.
1660 but is faster.
1661
1661
1662 Returns a list with decompressed data for each requested revision.
1662 Returns a list with decompressed data for each requested revision.
1663 """
1663 """
1664 if not revs:
1664 if not revs:
1665 return []
1665 return []
1666 start = self.start
1666 start = self.start
1667 length = self.length
1667 length = self.length
1668 inline = self._inline
1668 inline = self._inline
1669 iosize = self.index.entry_size
1669 iosize = self.index.entry_size
1670 buffer = util.buffer
1670 buffer = util.buffer
1671
1671
1672 l = []
1672 l = []
1673 ladd = l.append
1673 ladd = l.append
1674
1674
1675 if not self._withsparseread:
1675 if not self._withsparseread:
1676 slicedchunks = (revs,)
1676 slicedchunks = (revs,)
1677 else:
1677 else:
1678 slicedchunks = deltautil.slicechunk(
1678 slicedchunks = deltautil.slicechunk(
1679 self, revs, targetsize=targetsize
1679 self, revs, targetsize=targetsize
1680 )
1680 )
1681
1681
1682 for revschunk in slicedchunks:
1682 for revschunk in slicedchunks:
1683 firstrev = revschunk[0]
1683 firstrev = revschunk[0]
1684 # Skip trailing revisions with empty diff
1684 # Skip trailing revisions with empty diff
1685 for lastrev in revschunk[::-1]:
1685 for lastrev in revschunk[::-1]:
1686 if length(lastrev) != 0:
1686 if length(lastrev) != 0:
1687 break
1687 break
1688
1688
1689 try:
1689 try:
1690 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1690 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1691 except OverflowError:
1691 except OverflowError:
1692 # issue4215 - we can't cache a run of chunks greater than
1692 # issue4215 - we can't cache a run of chunks greater than
1693 # 2G on Windows
1693 # 2G on Windows
1694 return [self._chunk(rev, df=df) for rev in revschunk]
1694 return [self._chunk(rev, df=df) for rev in revschunk]
1695
1695
1696 decomp = self.decompress
1696 decomp = self.decompress
1697 for rev in revschunk:
1697 for rev in revschunk:
1698 chunkstart = start(rev)
1698 chunkstart = start(rev)
1699 if inline:
1699 if inline:
1700 chunkstart += (rev + 1) * iosize
1700 chunkstart += (rev + 1) * iosize
1701 chunklength = length(rev)
1701 chunklength = length(rev)
1702 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1702 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1703
1703
1704 return l
1704 return l
1705
1705
1706 def _chunkclear(self):
1706 def _chunkclear(self):
1707 """Clear the raw chunk cache."""
1707 """Clear the raw chunk cache."""
1708 self._chunkcache = (0, b'')
1708 self._chunkcache = (0, b'')
1709
1709
1710 def deltaparent(self, rev):
1710 def deltaparent(self, rev):
1711 """return deltaparent of the given revision"""
1711 """return deltaparent of the given revision"""
1712 base = self.index[rev][3]
1712 base = self.index[rev][3]
1713 if base == rev:
1713 if base == rev:
1714 return nullrev
1714 return nullrev
1715 elif self._generaldelta:
1715 elif self._generaldelta:
1716 return base
1716 return base
1717 else:
1717 else:
1718 return rev - 1
1718 return rev - 1
1719
1719
1720 def issnapshot(self, rev):
1720 def issnapshot(self, rev):
1721 """tells whether rev is a snapshot"""
1721 """tells whether rev is a snapshot"""
1722 if not self._sparserevlog:
1722 if not self._sparserevlog:
1723 return self.deltaparent(rev) == nullrev
1723 return self.deltaparent(rev) == nullrev
1724 elif util.safehasattr(self.index, b'issnapshot'):
1724 elif util.safehasattr(self.index, b'issnapshot'):
1725 # directly assign the method to cache the testing and access
1725 # directly assign the method to cache the testing and access
1726 self.issnapshot = self.index.issnapshot
1726 self.issnapshot = self.index.issnapshot
1727 return self.issnapshot(rev)
1727 return self.issnapshot(rev)
1728 if rev == nullrev:
1728 if rev == nullrev:
1729 return True
1729 return True
1730 entry = self.index[rev]
1730 entry = self.index[rev]
1731 base = entry[3]
1731 base = entry[3]
1732 if base == rev:
1732 if base == rev:
1733 return True
1733 return True
1734 if base == nullrev:
1734 if base == nullrev:
1735 return True
1735 return True
1736 p1 = entry[5]
1736 p1 = entry[5]
1737 p2 = entry[6]
1737 p2 = entry[6]
1738 if base == p1 or base == p2:
1738 if base == p1 or base == p2:
1739 return False
1739 return False
1740 return self.issnapshot(base)
1740 return self.issnapshot(base)
1741
1741
1742 def snapshotdepth(self, rev):
1742 def snapshotdepth(self, rev):
1743 """number of snapshot in the chain before this one"""
1743 """number of snapshot in the chain before this one"""
1744 if not self.issnapshot(rev):
1744 if not self.issnapshot(rev):
1745 raise error.ProgrammingError(b'revision %d not a snapshot')
1745 raise error.ProgrammingError(b'revision %d not a snapshot')
1746 return len(self._deltachain(rev)[0]) - 1
1746 return len(self._deltachain(rev)[0]) - 1
1747
1747
1748 def revdiff(self, rev1, rev2):
1748 def revdiff(self, rev1, rev2):
1749 """return or calculate a delta between two revisions
1749 """return or calculate a delta between two revisions
1750
1750
1751 The delta calculated is in binary form and is intended to be written to
1751 The delta calculated is in binary form and is intended to be written to
1752 revlog data directly. So this function needs raw revision data.
1752 revlog data directly. So this function needs raw revision data.
1753 """
1753 """
1754 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1754 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1755 return bytes(self._chunk(rev2))
1755 return bytes(self._chunk(rev2))
1756
1756
1757 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1757 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1758
1758
1759 def _processflags(self, text, flags, operation, raw=False):
1759 def _processflags(self, text, flags, operation, raw=False):
1760 """deprecated entry point to access flag processors"""
1760 """deprecated entry point to access flag processors"""
1761 msg = b'_processflag(...) use the specialized variant'
1761 msg = b'_processflag(...) use the specialized variant'
1762 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1762 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1763 if raw:
1763 if raw:
1764 return text, flagutil.processflagsraw(self, text, flags)
1764 return text, flagutil.processflagsraw(self, text, flags)
1765 elif operation == b'read':
1765 elif operation == b'read':
1766 return flagutil.processflagsread(self, text, flags)
1766 return flagutil.processflagsread(self, text, flags)
1767 else: # write operation
1767 else: # write operation
1768 return flagutil.processflagswrite(self, text, flags)
1768 return flagutil.processflagswrite(self, text, flags)
1769
1769
1770 def revision(self, nodeorrev, _df=None, raw=False):
1770 def revision(self, nodeorrev, _df=None, raw=False):
1771 """return an uncompressed revision of a given node or revision
1771 """return an uncompressed revision of a given node or revision
1772 number.
1772 number.
1773
1773
1774 _df - an existing file handle to read from. (internal-only)
1774 _df - an existing file handle to read from. (internal-only)
1775 raw - an optional argument specifying if the revision data is to be
1775 raw - an optional argument specifying if the revision data is to be
1776 treated as raw data when applying flag transforms. 'raw' should be set
1776 treated as raw data when applying flag transforms. 'raw' should be set
1777 to True when generating changegroups or in debug commands.
1777 to True when generating changegroups or in debug commands.
1778 """
1778 """
1779 if raw:
1779 if raw:
1780 msg = (
1780 msg = (
1781 b'revlog.revision(..., raw=True) is deprecated, '
1781 b'revlog.revision(..., raw=True) is deprecated, '
1782 b'use revlog.rawdata(...)'
1782 b'use revlog.rawdata(...)'
1783 )
1783 )
1784 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1784 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1785 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1785 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1786
1786
1787 def sidedata(self, nodeorrev, _df=None):
1787 def sidedata(self, nodeorrev, _df=None):
1788 """a map of extra data related to the changeset but not part of the hash
1788 """a map of extra data related to the changeset but not part of the hash
1789
1789
1790 This function currently return a dictionary. However, more advanced
1790 This function currently return a dictionary. However, more advanced
1791 mapping object will likely be used in the future for a more
1791 mapping object will likely be used in the future for a more
1792 efficient/lazy code.
1792 efficient/lazy code.
1793 """
1793 """
1794 return self._revisiondata(nodeorrev, _df)[1]
1794 return self._revisiondata(nodeorrev, _df)[1]
1795
1795
1796 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1796 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1797 # deal with <nodeorrev> argument type
1797 # deal with <nodeorrev> argument type
1798 if isinstance(nodeorrev, int):
1798 if isinstance(nodeorrev, int):
1799 rev = nodeorrev
1799 rev = nodeorrev
1800 node = self.node(rev)
1800 node = self.node(rev)
1801 else:
1801 else:
1802 node = nodeorrev
1802 node = nodeorrev
1803 rev = None
1803 rev = None
1804
1804
1805 # fast path the special `nullid` rev
1805 # fast path the special `nullid` rev
1806 if node == self.nullid:
1806 if node == self.nullid:
1807 return b"", {}
1807 return b"", {}
1808
1808
1809 # ``rawtext`` is the text as stored inside the revlog. Might be the
1809 # ``rawtext`` is the text as stored inside the revlog. Might be the
1810 # revision or might need to be processed to retrieve the revision.
1810 # revision or might need to be processed to retrieve the revision.
1811 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1811 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1812
1812
1813 if self.hassidedata:
1813 if self.hassidedata:
1814 if rev is None:
1814 if rev is None:
1815 rev = self.rev(node)
1815 rev = self.rev(node)
1816 sidedata = self._sidedata(rev)
1816 sidedata = self._sidedata(rev)
1817 else:
1817 else:
1818 sidedata = {}
1818 sidedata = {}
1819
1819
1820 if raw and validated:
1820 if raw and validated:
1821 # if we don't want to process the raw text and that raw
1821 # if we don't want to process the raw text and that raw
1822 # text is cached, we can exit early.
1822 # text is cached, we can exit early.
1823 return rawtext, sidedata
1823 return rawtext, sidedata
1824 if rev is None:
1824 if rev is None:
1825 rev = self.rev(node)
1825 rev = self.rev(node)
1826 # the revlog's flag for this revision
1826 # the revlog's flag for this revision
1827 # (usually alter its state or content)
1827 # (usually alter its state or content)
1828 flags = self.flags(rev)
1828 flags = self.flags(rev)
1829
1829
1830 if validated and flags == REVIDX_DEFAULT_FLAGS:
1830 if validated and flags == REVIDX_DEFAULT_FLAGS:
1831 # no extra flags set, no flag processor runs, text = rawtext
1831 # no extra flags set, no flag processor runs, text = rawtext
1832 return rawtext, sidedata
1832 return rawtext, sidedata
1833
1833
1834 if raw:
1834 if raw:
1835 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1835 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1836 text = rawtext
1836 text = rawtext
1837 else:
1837 else:
1838 r = flagutil.processflagsread(self, rawtext, flags)
1838 r = flagutil.processflagsread(self, rawtext, flags)
1839 text, validatehash = r
1839 text, validatehash = r
1840 if validatehash:
1840 if validatehash:
1841 self.checkhash(text, node, rev=rev)
1841 self.checkhash(text, node, rev=rev)
1842 if not validated:
1842 if not validated:
1843 self._revisioncache = (node, rev, rawtext)
1843 self._revisioncache = (node, rev, rawtext)
1844
1844
1845 return text, sidedata
1845 return text, sidedata
1846
1846
1847 def _rawtext(self, node, rev, _df=None):
1847 def _rawtext(self, node, rev, _df=None):
1848 """return the possibly unvalidated rawtext for a revision
1848 """return the possibly unvalidated rawtext for a revision
1849
1849
1850 returns (rev, rawtext, validated)
1850 returns (rev, rawtext, validated)
1851 """
1851 """
1852
1852
1853 # revision in the cache (could be useful to apply delta)
1853 # revision in the cache (could be useful to apply delta)
1854 cachedrev = None
1854 cachedrev = None
1855 # An intermediate text to apply deltas to
1855 # An intermediate text to apply deltas to
1856 basetext = None
1856 basetext = None
1857
1857
1858 # Check if we have the entry in cache
1858 # Check if we have the entry in cache
1859 # The cache entry looks like (node, rev, rawtext)
1859 # The cache entry looks like (node, rev, rawtext)
1860 if self._revisioncache:
1860 if self._revisioncache:
1861 if self._revisioncache[0] == node:
1861 if self._revisioncache[0] == node:
1862 return (rev, self._revisioncache[2], True)
1862 return (rev, self._revisioncache[2], True)
1863 cachedrev = self._revisioncache[1]
1863 cachedrev = self._revisioncache[1]
1864
1864
1865 if rev is None:
1865 if rev is None:
1866 rev = self.rev(node)
1866 rev = self.rev(node)
1867
1867
1868 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1868 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1869 if stopped:
1869 if stopped:
1870 basetext = self._revisioncache[2]
1870 basetext = self._revisioncache[2]
1871
1871
1872 # drop cache to save memory, the caller is expected to
1872 # drop cache to save memory, the caller is expected to
1873 # update self._revisioncache after validating the text
1873 # update self._revisioncache after validating the text
1874 self._revisioncache = None
1874 self._revisioncache = None
1875
1875
1876 targetsize = None
1876 targetsize = None
1877 rawsize = self.index[rev][2]
1877 rawsize = self.index[rev][2]
1878 if 0 <= rawsize:
1878 if 0 <= rawsize:
1879 targetsize = 4 * rawsize
1879 targetsize = 4 * rawsize
1880
1880
1881 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1881 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1882 if basetext is None:
1882 if basetext is None:
1883 basetext = bytes(bins[0])
1883 basetext = bytes(bins[0])
1884 bins = bins[1:]
1884 bins = bins[1:]
1885
1885
1886 rawtext = mdiff.patches(basetext, bins)
1886 rawtext = mdiff.patches(basetext, bins)
1887 del basetext # let us have a chance to free memory early
1887 del basetext # let us have a chance to free memory early
1888 return (rev, rawtext, False)
1888 return (rev, rawtext, False)
1889
1889
1890 def _sidedata(self, rev):
1890 def _sidedata(self, rev):
1891 """Return the sidedata for a given revision number."""
1891 """Return the sidedata for a given revision number."""
1892 index_entry = self.index[rev]
1892 index_entry = self.index[rev]
1893 sidedata_offset = index_entry[8]
1893 sidedata_offset = index_entry[8]
1894 sidedata_size = index_entry[9]
1894 sidedata_size = index_entry[9]
1895
1895
1896 if self._inline:
1896 if self._inline:
1897 sidedata_offset += self.index.entry_size * (1 + rev)
1897 sidedata_offset += self.index.entry_size * (1 + rev)
1898 if sidedata_size == 0:
1898 if sidedata_size == 0:
1899 return {}
1899 return {}
1900
1900
1901 segment = self._getsegment(sidedata_offset, sidedata_size)
1901 segment = self._getsegment(sidedata_offset, sidedata_size)
1902 sidedata = sidedatautil.deserialize_sidedata(segment)
1902 sidedata = sidedatautil.deserialize_sidedata(segment)
1903 return sidedata
1903 return sidedata
1904
1904
1905 def rawdata(self, nodeorrev, _df=None):
1905 def rawdata(self, nodeorrev, _df=None):
1906 """return an uncompressed raw data of a given node or revision number.
1906 """return an uncompressed raw data of a given node or revision number.
1907
1907
1908 _df - an existing file handle to read from. (internal-only)
1908 _df - an existing file handle to read from. (internal-only)
1909 """
1909 """
1910 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1910 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1911
1911
1912 def hash(self, text, p1, p2):
1912 def hash(self, text, p1, p2):
1913 """Compute a node hash.
1913 """Compute a node hash.
1914
1914
1915 Available as a function so that subclasses can replace the hash
1915 Available as a function so that subclasses can replace the hash
1916 as needed.
1916 as needed.
1917 """
1917 """
1918 return storageutil.hashrevisionsha1(text, p1, p2)
1918 return storageutil.hashrevisionsha1(text, p1, p2)
1919
1919
1920 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1920 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1921 """Check node hash integrity.
1921 """Check node hash integrity.
1922
1922
1923 Available as a function so that subclasses can extend hash mismatch
1923 Available as a function so that subclasses can extend hash mismatch
1924 behaviors as needed.
1924 behaviors as needed.
1925 """
1925 """
1926 try:
1926 try:
1927 if p1 is None and p2 is None:
1927 if p1 is None and p2 is None:
1928 p1, p2 = self.parents(node)
1928 p1, p2 = self.parents(node)
1929 if node != self.hash(text, p1, p2):
1929 if node != self.hash(text, p1, p2):
1930 # Clear the revision cache on hash failure. The revision cache
1930 # Clear the revision cache on hash failure. The revision cache
1931 # only stores the raw revision and clearing the cache does have
1931 # only stores the raw revision and clearing the cache does have
1932 # the side-effect that we won't have a cache hit when the raw
1932 # the side-effect that we won't have a cache hit when the raw
1933 # revision data is accessed. But this case should be rare and
1933 # revision data is accessed. But this case should be rare and
1934 # it is extra work to teach the cache about the hash
1934 # it is extra work to teach the cache about the hash
1935 # verification state.
1935 # verification state.
1936 if self._revisioncache and self._revisioncache[0] == node:
1936 if self._revisioncache and self._revisioncache[0] == node:
1937 self._revisioncache = None
1937 self._revisioncache = None
1938
1938
1939 revornode = rev
1939 revornode = rev
1940 if revornode is None:
1940 if revornode is None:
1941 revornode = templatefilters.short(hex(node))
1941 revornode = templatefilters.short(hex(node))
1942 raise error.RevlogError(
1942 raise error.RevlogError(
1943 _(b"integrity check failed on %s:%s")
1943 _(b"integrity check failed on %s:%s")
1944 % (self.display_id, pycompat.bytestr(revornode))
1944 % (self.display_id, pycompat.bytestr(revornode))
1945 )
1945 )
1946 except error.RevlogError:
1946 except error.RevlogError:
1947 if self._censorable and storageutil.iscensoredtext(text):
1947 if self._censorable and storageutil.iscensoredtext(text):
1948 raise error.CensoredNodeError(self.display_id, node, text)
1948 raise error.CensoredNodeError(self.display_id, node, text)
1949 raise
1949 raise
1950
1950
1951 def _enforceinlinesize(self, tr, fp=None):
1951 def _enforceinlinesize(self, tr, fp=None):
1952 """Check if the revlog is too big for inline and convert if so.
1952 """Check if the revlog is too big for inline and convert if so.
1953
1953
1954 This should be called after revisions are added to the revlog. If the
1954 This should be called after revisions are added to the revlog. If the
1955 revlog has grown too large to be an inline revlog, it will convert it
1955 revlog has grown too large to be an inline revlog, it will convert it
1956 to use multiple index and data files.
1956 to use multiple index and data files.
1957 """
1957 """
1958 tiprev = len(self) - 1
1958 tiprev = len(self) - 1
1959 if (
1959 total_size = self.start(tiprev) + self.length(tiprev)
1960 not self._inline
1960 if not self._inline or total_size < _maxinline:
1961 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1962 ):
1963 return
1961 return
1964
1962
1965 troffset = tr.findoffset(self._indexfile)
1963 troffset = tr.findoffset(self._indexfile)
1966 if troffset is None:
1964 if troffset is None:
1967 raise error.RevlogError(
1965 raise error.RevlogError(
1968 _(b"%s not found in the transaction") % self._indexfile
1966 _(b"%s not found in the transaction") % self._indexfile
1969 )
1967 )
1970 trindex = 0
1968 trindex = 0
1971 tr.add(self._datafile, 0)
1969 tr.add(self._datafile, 0)
1972
1970
1973 if fp:
1971 if fp:
1974 fp.flush()
1972 fp.flush()
1975 fp.close()
1973 fp.close()
1976 # We can't use the cached file handle after close(). So prevent
1974 # We can't use the cached file handle after close(). So prevent
1977 # its usage.
1975 # its usage.
1978 self._writinghandles = None
1976 self._writinghandles = None
1979
1977
1980 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1978 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1981 for r in self:
1979 for r in self:
1982 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1980 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1983 if troffset <= self.start(r):
1981 if troffset <= self.start(r):
1984 trindex = r
1982 trindex = r
1985
1983
1986 with self._indexfp(b'w') as fp:
1984 with self._indexfp(b'w') as fp:
1987 self._format_flags &= ~FLAG_INLINE_DATA
1985 self._format_flags &= ~FLAG_INLINE_DATA
1988 self._inline = False
1986 self._inline = False
1989 for i in self:
1987 for i in self:
1990 e = self.index.entry_binary(i)
1988 e = self.index.entry_binary(i)
1991 if i == 0:
1989 if i == 0:
1992 header = self._format_flags | self._format_version
1990 header = self._format_flags | self._format_version
1993 header = self.index.pack_header(header)
1991 header = self.index.pack_header(header)
1994 e = header + e
1992 e = header + e
1995 fp.write(e)
1993 fp.write(e)
1996
1994
1997 # the temp file replace the real index when we exit the context
1995 # the temp file replace the real index when we exit the context
1998 # manager
1996 # manager
1999
1997
2000 tr.replace(self._indexfile, trindex * self.index.entry_size)
1998 tr.replace(self._indexfile, trindex * self.index.entry_size)
2001 nodemaputil.setup_persistent_nodemap(tr, self)
1999 nodemaputil.setup_persistent_nodemap(tr, self)
2002 self._chunkclear()
2000 self._chunkclear()
2003
2001
2004 def _nodeduplicatecallback(self, transaction, node):
2002 def _nodeduplicatecallback(self, transaction, node):
2005 """called when trying to add a node already stored."""
2003 """called when trying to add a node already stored."""
2006
2004
2007 def addrevision(
2005 def addrevision(
2008 self,
2006 self,
2009 text,
2007 text,
2010 transaction,
2008 transaction,
2011 link,
2009 link,
2012 p1,
2010 p1,
2013 p2,
2011 p2,
2014 cachedelta=None,
2012 cachedelta=None,
2015 node=None,
2013 node=None,
2016 flags=REVIDX_DEFAULT_FLAGS,
2014 flags=REVIDX_DEFAULT_FLAGS,
2017 deltacomputer=None,
2015 deltacomputer=None,
2018 sidedata=None,
2016 sidedata=None,
2019 ):
2017 ):
2020 """add a revision to the log
2018 """add a revision to the log
2021
2019
2022 text - the revision data to add
2020 text - the revision data to add
2023 transaction - the transaction object used for rollback
2021 transaction - the transaction object used for rollback
2024 link - the linkrev data to add
2022 link - the linkrev data to add
2025 p1, p2 - the parent nodeids of the revision
2023 p1, p2 - the parent nodeids of the revision
2026 cachedelta - an optional precomputed delta
2024 cachedelta - an optional precomputed delta
2027 node - nodeid of revision; typically node is not specified, and it is
2025 node - nodeid of revision; typically node is not specified, and it is
2028 computed by default as hash(text, p1, p2), however subclasses might
2026 computed by default as hash(text, p1, p2), however subclasses might
2029 use different hashing method (and override checkhash() in such case)
2027 use different hashing method (and override checkhash() in such case)
2030 flags - the known flags to set on the revision
2028 flags - the known flags to set on the revision
2031 deltacomputer - an optional deltacomputer instance shared between
2029 deltacomputer - an optional deltacomputer instance shared between
2032 multiple calls
2030 multiple calls
2033 """
2031 """
2034 if link == nullrev:
2032 if link == nullrev:
2035 raise error.RevlogError(
2033 raise error.RevlogError(
2036 _(b"attempted to add linkrev -1 to %s") % self.display_id
2034 _(b"attempted to add linkrev -1 to %s") % self.display_id
2037 )
2035 )
2038
2036
2039 if sidedata is None:
2037 if sidedata is None:
2040 sidedata = {}
2038 sidedata = {}
2041 elif sidedata and not self.hassidedata:
2039 elif sidedata and not self.hassidedata:
2042 raise error.ProgrammingError(
2040 raise error.ProgrammingError(
2043 _(b"trying to add sidedata to a revlog who don't support them")
2041 _(b"trying to add sidedata to a revlog who don't support them")
2044 )
2042 )
2045
2043
2046 if flags:
2044 if flags:
2047 node = node or self.hash(text, p1, p2)
2045 node = node or self.hash(text, p1, p2)
2048
2046
2049 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2047 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2050
2048
2051 # If the flag processor modifies the revision data, ignore any provided
2049 # If the flag processor modifies the revision data, ignore any provided
2052 # cachedelta.
2050 # cachedelta.
2053 if rawtext != text:
2051 if rawtext != text:
2054 cachedelta = None
2052 cachedelta = None
2055
2053
2056 if len(rawtext) > _maxentrysize:
2054 if len(rawtext) > _maxentrysize:
2057 raise error.RevlogError(
2055 raise error.RevlogError(
2058 _(
2056 _(
2059 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2057 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2060 )
2058 )
2061 % (self.display_id, len(rawtext))
2059 % (self.display_id, len(rawtext))
2062 )
2060 )
2063
2061
2064 node = node or self.hash(rawtext, p1, p2)
2062 node = node or self.hash(rawtext, p1, p2)
2065 rev = self.index.get_rev(node)
2063 rev = self.index.get_rev(node)
2066 if rev is not None:
2064 if rev is not None:
2067 return rev
2065 return rev
2068
2066
2069 if validatehash:
2067 if validatehash:
2070 self.checkhash(rawtext, node, p1=p1, p2=p2)
2068 self.checkhash(rawtext, node, p1=p1, p2=p2)
2071
2069
2072 return self.addrawrevision(
2070 return self.addrawrevision(
2073 rawtext,
2071 rawtext,
2074 transaction,
2072 transaction,
2075 link,
2073 link,
2076 p1,
2074 p1,
2077 p2,
2075 p2,
2078 node,
2076 node,
2079 flags,
2077 flags,
2080 cachedelta=cachedelta,
2078 cachedelta=cachedelta,
2081 deltacomputer=deltacomputer,
2079 deltacomputer=deltacomputer,
2082 sidedata=sidedata,
2080 sidedata=sidedata,
2083 )
2081 )
2084
2082
2085 def addrawrevision(
2083 def addrawrevision(
2086 self,
2084 self,
2087 rawtext,
2085 rawtext,
2088 transaction,
2086 transaction,
2089 link,
2087 link,
2090 p1,
2088 p1,
2091 p2,
2089 p2,
2092 node,
2090 node,
2093 flags,
2091 flags,
2094 cachedelta=None,
2092 cachedelta=None,
2095 deltacomputer=None,
2093 deltacomputer=None,
2096 sidedata=None,
2094 sidedata=None,
2097 ):
2095 ):
2098 """add a raw revision with known flags, node and parents
2096 """add a raw revision with known flags, node and parents
2099 useful when reusing a revision not stored in this revlog (ex: received
2097 useful when reusing a revision not stored in this revlog (ex: received
2100 over wire, or read from an external bundle).
2098 over wire, or read from an external bundle).
2101 """
2099 """
2102 dfh = None
2100 dfh = None
2103 if not self._inline:
2101 if not self._inline:
2104 dfh = self._datafp(b"a+")
2102 dfh = self._datafp(b"a+")
2105 ifh = self._indexfp(b"a+")
2103 ifh = self._indexfp(b"a+")
2106 try:
2104 try:
2107 return self._addrevision(
2105 return self._addrevision(
2108 node,
2106 node,
2109 rawtext,
2107 rawtext,
2110 transaction,
2108 transaction,
2111 link,
2109 link,
2112 p1,
2110 p1,
2113 p2,
2111 p2,
2114 flags,
2112 flags,
2115 cachedelta,
2113 cachedelta,
2116 ifh,
2114 ifh,
2117 dfh,
2115 dfh,
2118 deltacomputer=deltacomputer,
2116 deltacomputer=deltacomputer,
2119 sidedata=sidedata,
2117 sidedata=sidedata,
2120 )
2118 )
2121 finally:
2119 finally:
2122 if dfh:
2120 if dfh:
2123 dfh.close()
2121 dfh.close()
2124 ifh.close()
2122 ifh.close()
2125
2123
2126 def compress(self, data):
2124 def compress(self, data):
2127 """Generate a possibly-compressed representation of data."""
2125 """Generate a possibly-compressed representation of data."""
2128 if not data:
2126 if not data:
2129 return b'', data
2127 return b'', data
2130
2128
2131 compressed = self._compressor.compress(data)
2129 compressed = self._compressor.compress(data)
2132
2130
2133 if compressed:
2131 if compressed:
2134 # The revlog compressor added the header in the returned data.
2132 # The revlog compressor added the header in the returned data.
2135 return b'', compressed
2133 return b'', compressed
2136
2134
2137 if data[0:1] == b'\0':
2135 if data[0:1] == b'\0':
2138 return b'', data
2136 return b'', data
2139 return b'u', data
2137 return b'u', data
2140
2138
2141 def decompress(self, data):
2139 def decompress(self, data):
2142 """Decompress a revlog chunk.
2140 """Decompress a revlog chunk.
2143
2141
2144 The chunk is expected to begin with a header identifying the
2142 The chunk is expected to begin with a header identifying the
2145 format type so it can be routed to an appropriate decompressor.
2143 format type so it can be routed to an appropriate decompressor.
2146 """
2144 """
2147 if not data:
2145 if not data:
2148 return data
2146 return data
2149
2147
2150 # Revlogs are read much more frequently than they are written and many
2148 # Revlogs are read much more frequently than they are written and many
2151 # chunks only take microseconds to decompress, so performance is
2149 # chunks only take microseconds to decompress, so performance is
2152 # important here.
2150 # important here.
2153 #
2151 #
2154 # We can make a few assumptions about revlogs:
2152 # We can make a few assumptions about revlogs:
2155 #
2153 #
2156 # 1) the majority of chunks will be compressed (as opposed to inline
2154 # 1) the majority of chunks will be compressed (as opposed to inline
2157 # raw data).
2155 # raw data).
2158 # 2) decompressing *any* data will likely by at least 10x slower than
2156 # 2) decompressing *any* data will likely by at least 10x slower than
2159 # returning raw inline data.
2157 # returning raw inline data.
2160 # 3) we want to prioritize common and officially supported compression
2158 # 3) we want to prioritize common and officially supported compression
2161 # engines
2159 # engines
2162 #
2160 #
2163 # It follows that we want to optimize for "decompress compressed data
2161 # It follows that we want to optimize for "decompress compressed data
2164 # when encoded with common and officially supported compression engines"
2162 # when encoded with common and officially supported compression engines"
2165 # case over "raw data" and "data encoded by less common or non-official
2163 # case over "raw data" and "data encoded by less common or non-official
2166 # compression engines." That is why we have the inline lookup first
2164 # compression engines." That is why we have the inline lookup first
2167 # followed by the compengines lookup.
2165 # followed by the compengines lookup.
2168 #
2166 #
2169 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2167 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2170 # compressed chunks. And this matters for changelog and manifest reads.
2168 # compressed chunks. And this matters for changelog and manifest reads.
2171 t = data[0:1]
2169 t = data[0:1]
2172
2170
2173 if t == b'x':
2171 if t == b'x':
2174 try:
2172 try:
2175 return _zlibdecompress(data)
2173 return _zlibdecompress(data)
2176 except zlib.error as e:
2174 except zlib.error as e:
2177 raise error.RevlogError(
2175 raise error.RevlogError(
2178 _(b'revlog decompress error: %s')
2176 _(b'revlog decompress error: %s')
2179 % stringutil.forcebytestr(e)
2177 % stringutil.forcebytestr(e)
2180 )
2178 )
2181 # '\0' is more common than 'u' so it goes first.
2179 # '\0' is more common than 'u' so it goes first.
2182 elif t == b'\0':
2180 elif t == b'\0':
2183 return data
2181 return data
2184 elif t == b'u':
2182 elif t == b'u':
2185 return util.buffer(data, 1)
2183 return util.buffer(data, 1)
2186
2184
2187 try:
2185 try:
2188 compressor = self._decompressors[t]
2186 compressor = self._decompressors[t]
2189 except KeyError:
2187 except KeyError:
2190 try:
2188 try:
2191 engine = util.compengines.forrevlogheader(t)
2189 engine = util.compengines.forrevlogheader(t)
2192 compressor = engine.revlogcompressor(self._compengineopts)
2190 compressor = engine.revlogcompressor(self._compengineopts)
2193 self._decompressors[t] = compressor
2191 self._decompressors[t] = compressor
2194 except KeyError:
2192 except KeyError:
2195 raise error.RevlogError(
2193 raise error.RevlogError(
2196 _(b'unknown compression type %s') % binascii.hexlify(t)
2194 _(b'unknown compression type %s') % binascii.hexlify(t)
2197 )
2195 )
2198
2196
2199 return compressor.decompress(data)
2197 return compressor.decompress(data)
2200
2198
2201 def _addrevision(
2199 def _addrevision(
2202 self,
2200 self,
2203 node,
2201 node,
2204 rawtext,
2202 rawtext,
2205 transaction,
2203 transaction,
2206 link,
2204 link,
2207 p1,
2205 p1,
2208 p2,
2206 p2,
2209 flags,
2207 flags,
2210 cachedelta,
2208 cachedelta,
2211 ifh,
2209 ifh,
2212 dfh,
2210 dfh,
2213 alwayscache=False,
2211 alwayscache=False,
2214 deltacomputer=None,
2212 deltacomputer=None,
2215 sidedata=None,
2213 sidedata=None,
2216 ):
2214 ):
2217 """internal function to add revisions to the log
2215 """internal function to add revisions to the log
2218
2216
2219 see addrevision for argument descriptions.
2217 see addrevision for argument descriptions.
2220
2218
2221 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2219 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2222
2220
2223 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2221 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2224 be used.
2222 be used.
2225
2223
2226 invariants:
2224 invariants:
2227 - rawtext is optional (can be None); if not set, cachedelta must be set.
2225 - rawtext is optional (can be None); if not set, cachedelta must be set.
2228 if both are set, they must correspond to each other.
2226 if both are set, they must correspond to each other.
2229 """
2227 """
2230 if node == self.nullid:
2228 if node == self.nullid:
2231 raise error.RevlogError(
2229 raise error.RevlogError(
2232 _(b"%s: attempt to add null revision") % self.display_id
2230 _(b"%s: attempt to add null revision") % self.display_id
2233 )
2231 )
2234 if (
2232 if (
2235 node == self.nodeconstants.wdirid
2233 node == self.nodeconstants.wdirid
2236 or node in self.nodeconstants.wdirfilenodeids
2234 or node in self.nodeconstants.wdirfilenodeids
2237 ):
2235 ):
2238 raise error.RevlogError(
2236 raise error.RevlogError(
2239 _(b"%s: attempt to add wdir revision") % self.display_id
2237 _(b"%s: attempt to add wdir revision") % self.display_id
2240 )
2238 )
2241
2239
2242 if self._inline:
2240 if self._inline:
2243 fh = ifh
2241 fh = ifh
2244 else:
2242 else:
2245 fh = dfh
2243 fh = dfh
2246
2244
2247 btext = [rawtext]
2245 btext = [rawtext]
2248
2246
2249 curr = len(self)
2247 curr = len(self)
2250 prev = curr - 1
2248 prev = curr - 1
2251
2249
2252 offset = self._get_data_offset(prev)
2250 offset = self._get_data_offset(prev)
2253
2251
2254 if self._concurrencychecker:
2252 if self._concurrencychecker:
2255 if self._inline:
2253 if self._inline:
2256 # offset is "as if" it were in the .d file, so we need to add on
2254 # offset is "as if" it were in the .d file, so we need to add on
2257 # the size of the entry metadata.
2255 # the size of the entry metadata.
2258 self._concurrencychecker(
2256 self._concurrencychecker(
2259 ifh, self._indexfile, offset + curr * self.index.entry_size
2257 ifh, self._indexfile, offset + curr * self.index.entry_size
2260 )
2258 )
2261 else:
2259 else:
2262 # Entries in the .i are a consistent size.
2260 # Entries in the .i are a consistent size.
2263 self._concurrencychecker(
2261 self._concurrencychecker(
2264 ifh, self._indexfile, curr * self.index.entry_size
2262 ifh, self._indexfile, curr * self.index.entry_size
2265 )
2263 )
2266 self._concurrencychecker(dfh, self._datafile, offset)
2264 self._concurrencychecker(dfh, self._datafile, offset)
2267
2265
2268 p1r, p2r = self.rev(p1), self.rev(p2)
2266 p1r, p2r = self.rev(p1), self.rev(p2)
2269
2267
2270 # full versions are inserted when the needed deltas
2268 # full versions are inserted when the needed deltas
2271 # become comparable to the uncompressed text
2269 # become comparable to the uncompressed text
2272 if rawtext is None:
2270 if rawtext is None:
2273 # need rawtext size, before changed by flag processors, which is
2271 # need rawtext size, before changed by flag processors, which is
2274 # the non-raw size. use revlog explicitly to avoid filelog's extra
2272 # the non-raw size. use revlog explicitly to avoid filelog's extra
2275 # logic that might remove metadata size.
2273 # logic that might remove metadata size.
2276 textlen = mdiff.patchedsize(
2274 textlen = mdiff.patchedsize(
2277 revlog.size(self, cachedelta[0]), cachedelta[1]
2275 revlog.size(self, cachedelta[0]), cachedelta[1]
2278 )
2276 )
2279 else:
2277 else:
2280 textlen = len(rawtext)
2278 textlen = len(rawtext)
2281
2279
2282 if deltacomputer is None:
2280 if deltacomputer is None:
2283 deltacomputer = deltautil.deltacomputer(self)
2281 deltacomputer = deltautil.deltacomputer(self)
2284
2282
2285 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2283 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2286
2284
2287 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2285 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2288
2286
2289 if sidedata and self.hassidedata:
2287 if sidedata and self.hassidedata:
2290 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2288 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2291 sidedata_offset = offset + deltainfo.deltalen
2289 sidedata_offset = offset + deltainfo.deltalen
2292 else:
2290 else:
2293 serialized_sidedata = b""
2291 serialized_sidedata = b""
2294 # Don't store the offset if the sidedata is empty, that way
2292 # Don't store the offset if the sidedata is empty, that way
2295 # we can easily detect empty sidedata and they will be no different
2293 # we can easily detect empty sidedata and they will be no different
2296 # than ones we manually add.
2294 # than ones we manually add.
2297 sidedata_offset = 0
2295 sidedata_offset = 0
2298
2296
2299 e = (
2297 e = (
2300 offset_type(offset, flags),
2298 offset_type(offset, flags),
2301 deltainfo.deltalen,
2299 deltainfo.deltalen,
2302 textlen,
2300 textlen,
2303 deltainfo.base,
2301 deltainfo.base,
2304 link,
2302 link,
2305 p1r,
2303 p1r,
2306 p2r,
2304 p2r,
2307 node,
2305 node,
2308 sidedata_offset,
2306 sidedata_offset,
2309 len(serialized_sidedata),
2307 len(serialized_sidedata),
2310 )
2308 )
2311
2309
2312 self.index.append(e)
2310 self.index.append(e)
2313 entry = self.index.entry_binary(curr)
2311 entry = self.index.entry_binary(curr)
2314 if curr == 0:
2312 if curr == 0:
2315 header = self._format_flags | self._format_version
2313 header = self._format_flags | self._format_version
2316 header = self.index.pack_header(header)
2314 header = self.index.pack_header(header)
2317 entry = header + entry
2315 entry = header + entry
2318 self._writeentry(
2316 self._writeentry(
2319 transaction,
2317 transaction,
2320 ifh,
2318 ifh,
2321 dfh,
2319 dfh,
2322 entry,
2320 entry,
2323 deltainfo.data,
2321 deltainfo.data,
2324 link,
2322 link,
2325 offset,
2323 offset,
2326 serialized_sidedata,
2324 serialized_sidedata,
2327 )
2325 )
2328
2326
2329 rawtext = btext[0]
2327 rawtext = btext[0]
2330
2328
2331 if alwayscache and rawtext is None:
2329 if alwayscache and rawtext is None:
2332 rawtext = deltacomputer.buildtext(revinfo, fh)
2330 rawtext = deltacomputer.buildtext(revinfo, fh)
2333
2331
2334 if type(rawtext) == bytes: # only accept immutable objects
2332 if type(rawtext) == bytes: # only accept immutable objects
2335 self._revisioncache = (node, curr, rawtext)
2333 self._revisioncache = (node, curr, rawtext)
2336 self._chainbasecache[curr] = deltainfo.chainbase
2334 self._chainbasecache[curr] = deltainfo.chainbase
2337 return curr
2335 return curr
2338
2336
2339 def _get_data_offset(self, prev):
2337 def _get_data_offset(self, prev):
2340 """Returns the current offset in the (in-transaction) data file.
2338 """Returns the current offset in the (in-transaction) data file.
2341 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2339 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2342 file to store that information: since sidedata can be rewritten to the
2340 file to store that information: since sidedata can be rewritten to the
2343 end of the data file within a transaction, you can have cases where, for
2341 end of the data file within a transaction, you can have cases where, for
2344 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2342 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2345 to `n - 1`'s sidedata being written after `n`'s data.
2343 to `n - 1`'s sidedata being written after `n`'s data.
2346
2344
2347 TODO cache this in a docket file before getting out of experimental."""
2345 TODO cache this in a docket file before getting out of experimental."""
2348 if self._format_version != REVLOGV2:
2346 if self._format_version != REVLOGV2:
2349 return self.end(prev)
2347 return self.end(prev)
2350
2348
2351 offset = 0
2349 offset = 0
2352 for rev, entry in enumerate(self.index):
2350 for rev, entry in enumerate(self.index):
2353 sidedata_end = entry[8] + entry[9]
2351 sidedata_end = entry[8] + entry[9]
2354 # Sidedata for a previous rev has potentially been written after
2352 # Sidedata for a previous rev has potentially been written after
2355 # this rev's end, so take the max.
2353 # this rev's end, so take the max.
2356 offset = max(self.end(rev), offset, sidedata_end)
2354 offset = max(self.end(rev), offset, sidedata_end)
2357 return offset
2355 return offset
2358
2356
2359 def _writeentry(
2357 def _writeentry(
2360 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2358 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2361 ):
2359 ):
2362 # Files opened in a+ mode have inconsistent behavior on various
2360 # Files opened in a+ mode have inconsistent behavior on various
2363 # platforms. Windows requires that a file positioning call be made
2361 # platforms. Windows requires that a file positioning call be made
2364 # when the file handle transitions between reads and writes. See
2362 # when the file handle transitions between reads and writes. See
2365 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2363 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2366 # platforms, Python or the platform itself can be buggy. Some versions
2364 # platforms, Python or the platform itself can be buggy. Some versions
2367 # of Solaris have been observed to not append at the end of the file
2365 # of Solaris have been observed to not append at the end of the file
2368 # if the file was seeked to before the end. See issue4943 for more.
2366 # if the file was seeked to before the end. See issue4943 for more.
2369 #
2367 #
2370 # We work around this issue by inserting a seek() before writing.
2368 # We work around this issue by inserting a seek() before writing.
2371 # Note: This is likely not necessary on Python 3. However, because
2369 # Note: This is likely not necessary on Python 3. However, because
2372 # the file handle is reused for reads and may be seeked there, we need
2370 # the file handle is reused for reads and may be seeked there, we need
2373 # to be careful before changing this.
2371 # to be careful before changing this.
2374 ifh.seek(0, os.SEEK_END)
2372 ifh.seek(0, os.SEEK_END)
2375 if dfh:
2373 if dfh:
2376 dfh.seek(0, os.SEEK_END)
2374 dfh.seek(0, os.SEEK_END)
2377
2375
2378 curr = len(self) - 1
2376 curr = len(self) - 1
2379 if not self._inline:
2377 if not self._inline:
2380 transaction.add(self._datafile, offset)
2378 transaction.add(self._datafile, offset)
2381 transaction.add(self._indexfile, curr * len(entry))
2379 transaction.add(self._indexfile, curr * len(entry))
2382 if data[0]:
2380 if data[0]:
2383 dfh.write(data[0])
2381 dfh.write(data[0])
2384 dfh.write(data[1])
2382 dfh.write(data[1])
2385 if sidedata:
2383 if sidedata:
2386 dfh.write(sidedata)
2384 dfh.write(sidedata)
2387 ifh.write(entry)
2385 ifh.write(entry)
2388 else:
2386 else:
2389 offset += curr * self.index.entry_size
2387 offset += curr * self.index.entry_size
2390 transaction.add(self._indexfile, offset)
2388 transaction.add(self._indexfile, offset)
2391 ifh.write(entry)
2389 ifh.write(entry)
2392 ifh.write(data[0])
2390 ifh.write(data[0])
2393 ifh.write(data[1])
2391 ifh.write(data[1])
2394 if sidedata:
2392 if sidedata:
2395 ifh.write(sidedata)
2393 ifh.write(sidedata)
2396 self._enforceinlinesize(transaction, ifh)
2394 self._enforceinlinesize(transaction, ifh)
2397 nodemaputil.setup_persistent_nodemap(transaction, self)
2395 nodemaputil.setup_persistent_nodemap(transaction, self)
2398
2396
2399 def addgroup(
2397 def addgroup(
2400 self,
2398 self,
2401 deltas,
2399 deltas,
2402 linkmapper,
2400 linkmapper,
2403 transaction,
2401 transaction,
2404 alwayscache=False,
2402 alwayscache=False,
2405 addrevisioncb=None,
2403 addrevisioncb=None,
2406 duplicaterevisioncb=None,
2404 duplicaterevisioncb=None,
2407 ):
2405 ):
2408 """
2406 """
2409 add a delta group
2407 add a delta group
2410
2408
2411 given a set of deltas, add them to the revision log. the
2409 given a set of deltas, add them to the revision log. the
2412 first delta is against its parent, which should be in our
2410 first delta is against its parent, which should be in our
2413 log, the rest are against the previous delta.
2411 log, the rest are against the previous delta.
2414
2412
2415 If ``addrevisioncb`` is defined, it will be called with arguments of
2413 If ``addrevisioncb`` is defined, it will be called with arguments of
2416 this revlog and the node that was added.
2414 this revlog and the node that was added.
2417 """
2415 """
2418
2416
2419 if self._writinghandles:
2417 if self._writinghandles:
2420 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2418 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2421
2419
2422 r = len(self)
2420 r = len(self)
2423 end = 0
2421 end = 0
2424 if r:
2422 if r:
2425 end = self.end(r - 1)
2423 end = self.end(r - 1)
2426 ifh = self._indexfp(b"a+")
2424 ifh = self._indexfp(b"a+")
2427 isize = r * self.index.entry_size
2425 isize = r * self.index.entry_size
2428 if self._inline:
2426 if self._inline:
2429 transaction.add(self._indexfile, end + isize)
2427 transaction.add(self._indexfile, end + isize)
2430 dfh = None
2428 dfh = None
2431 else:
2429 else:
2432 transaction.add(self._indexfile, isize)
2430 transaction.add(self._indexfile, isize)
2433 transaction.add(self._datafile, end)
2431 transaction.add(self._datafile, end)
2434 dfh = self._datafp(b"a+")
2432 dfh = self._datafp(b"a+")
2435
2433
2436 self._writinghandles = (ifh, dfh)
2434 self._writinghandles = (ifh, dfh)
2437 empty = True
2435 empty = True
2438
2436
2439 try:
2437 try:
2440 deltacomputer = deltautil.deltacomputer(self)
2438 deltacomputer = deltautil.deltacomputer(self)
2441 # loop through our set of deltas
2439 # loop through our set of deltas
2442 for data in deltas:
2440 for data in deltas:
2443 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2441 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2444 link = linkmapper(linknode)
2442 link = linkmapper(linknode)
2445 flags = flags or REVIDX_DEFAULT_FLAGS
2443 flags = flags or REVIDX_DEFAULT_FLAGS
2446
2444
2447 rev = self.index.get_rev(node)
2445 rev = self.index.get_rev(node)
2448 if rev is not None:
2446 if rev is not None:
2449 # this can happen if two branches make the same change
2447 # this can happen if two branches make the same change
2450 self._nodeduplicatecallback(transaction, rev)
2448 self._nodeduplicatecallback(transaction, rev)
2451 if duplicaterevisioncb:
2449 if duplicaterevisioncb:
2452 duplicaterevisioncb(self, rev)
2450 duplicaterevisioncb(self, rev)
2453 empty = False
2451 empty = False
2454 continue
2452 continue
2455
2453
2456 for p in (p1, p2):
2454 for p in (p1, p2):
2457 if not self.index.has_node(p):
2455 if not self.index.has_node(p):
2458 raise error.LookupError(
2456 raise error.LookupError(
2459 p, self.radix, _(b'unknown parent')
2457 p, self.radix, _(b'unknown parent')
2460 )
2458 )
2461
2459
2462 if not self.index.has_node(deltabase):
2460 if not self.index.has_node(deltabase):
2463 raise error.LookupError(
2461 raise error.LookupError(
2464 deltabase, self.display_id, _(b'unknown delta base')
2462 deltabase, self.display_id, _(b'unknown delta base')
2465 )
2463 )
2466
2464
2467 baserev = self.rev(deltabase)
2465 baserev = self.rev(deltabase)
2468
2466
2469 if baserev != nullrev and self.iscensored(baserev):
2467 if baserev != nullrev and self.iscensored(baserev):
2470 # if base is censored, delta must be full replacement in a
2468 # if base is censored, delta must be full replacement in a
2471 # single patch operation
2469 # single patch operation
2472 hlen = struct.calcsize(b">lll")
2470 hlen = struct.calcsize(b">lll")
2473 oldlen = self.rawsize(baserev)
2471 oldlen = self.rawsize(baserev)
2474 newlen = len(delta) - hlen
2472 newlen = len(delta) - hlen
2475 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2473 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2476 raise error.CensoredBaseError(
2474 raise error.CensoredBaseError(
2477 self.display_id, self.node(baserev)
2475 self.display_id, self.node(baserev)
2478 )
2476 )
2479
2477
2480 if not flags and self._peek_iscensored(baserev, delta):
2478 if not flags and self._peek_iscensored(baserev, delta):
2481 flags |= REVIDX_ISCENSORED
2479 flags |= REVIDX_ISCENSORED
2482
2480
2483 # We assume consumers of addrevisioncb will want to retrieve
2481 # We assume consumers of addrevisioncb will want to retrieve
2484 # the added revision, which will require a call to
2482 # the added revision, which will require a call to
2485 # revision(). revision() will fast path if there is a cache
2483 # revision(). revision() will fast path if there is a cache
2486 # hit. So, we tell _addrevision() to always cache in this case.
2484 # hit. So, we tell _addrevision() to always cache in this case.
2487 # We're only using addgroup() in the context of changegroup
2485 # We're only using addgroup() in the context of changegroup
2488 # generation so the revision data can always be handled as raw
2486 # generation so the revision data can always be handled as raw
2489 # by the flagprocessor.
2487 # by the flagprocessor.
2490 rev = self._addrevision(
2488 rev = self._addrevision(
2491 node,
2489 node,
2492 None,
2490 None,
2493 transaction,
2491 transaction,
2494 link,
2492 link,
2495 p1,
2493 p1,
2496 p2,
2494 p2,
2497 flags,
2495 flags,
2498 (baserev, delta),
2496 (baserev, delta),
2499 ifh,
2497 ifh,
2500 dfh,
2498 dfh,
2501 alwayscache=alwayscache,
2499 alwayscache=alwayscache,
2502 deltacomputer=deltacomputer,
2500 deltacomputer=deltacomputer,
2503 sidedata=sidedata,
2501 sidedata=sidedata,
2504 )
2502 )
2505
2503
2506 if addrevisioncb:
2504 if addrevisioncb:
2507 addrevisioncb(self, rev)
2505 addrevisioncb(self, rev)
2508 empty = False
2506 empty = False
2509
2507
2510 if not dfh and not self._inline:
2508 if not dfh and not self._inline:
2511 # addrevision switched from inline to conventional
2509 # addrevision switched from inline to conventional
2512 # reopen the index
2510 # reopen the index
2513 ifh.close()
2511 ifh.close()
2514 dfh = self._datafp(b"a+")
2512 dfh = self._datafp(b"a+")
2515 ifh = self._indexfp(b"a+")
2513 ifh = self._indexfp(b"a+")
2516 self._writinghandles = (ifh, dfh)
2514 self._writinghandles = (ifh, dfh)
2517 finally:
2515 finally:
2518 self._writinghandles = None
2516 self._writinghandles = None
2519
2517
2520 if dfh:
2518 if dfh:
2521 dfh.close()
2519 dfh.close()
2522 ifh.close()
2520 ifh.close()
2523 return not empty
2521 return not empty
2524
2522
2525 def iscensored(self, rev):
2523 def iscensored(self, rev):
2526 """Check if a file revision is censored."""
2524 """Check if a file revision is censored."""
2527 if not self._censorable:
2525 if not self._censorable:
2528 return False
2526 return False
2529
2527
2530 return self.flags(rev) & REVIDX_ISCENSORED
2528 return self.flags(rev) & REVIDX_ISCENSORED
2531
2529
2532 def _peek_iscensored(self, baserev, delta):
2530 def _peek_iscensored(self, baserev, delta):
2533 """Quickly check if a delta produces a censored revision."""
2531 """Quickly check if a delta produces a censored revision."""
2534 if not self._censorable:
2532 if not self._censorable:
2535 return False
2533 return False
2536
2534
2537 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2535 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2538
2536
2539 def getstrippoint(self, minlink):
2537 def getstrippoint(self, minlink):
2540 """find the minimum rev that must be stripped to strip the linkrev
2538 """find the minimum rev that must be stripped to strip the linkrev
2541
2539
2542 Returns a tuple containing the minimum rev and a set of all revs that
2540 Returns a tuple containing the minimum rev and a set of all revs that
2543 have linkrevs that will be broken by this strip.
2541 have linkrevs that will be broken by this strip.
2544 """
2542 """
2545 return storageutil.resolvestripinfo(
2543 return storageutil.resolvestripinfo(
2546 minlink,
2544 minlink,
2547 len(self) - 1,
2545 len(self) - 1,
2548 self.headrevs(),
2546 self.headrevs(),
2549 self.linkrev,
2547 self.linkrev,
2550 self.parentrevs,
2548 self.parentrevs,
2551 )
2549 )
2552
2550
2553 def strip(self, minlink, transaction):
2551 def strip(self, minlink, transaction):
2554 """truncate the revlog on the first revision with a linkrev >= minlink
2552 """truncate the revlog on the first revision with a linkrev >= minlink
2555
2553
2556 This function is called when we're stripping revision minlink and
2554 This function is called when we're stripping revision minlink and
2557 its descendants from the repository.
2555 its descendants from the repository.
2558
2556
2559 We have to remove all revisions with linkrev >= minlink, because
2557 We have to remove all revisions with linkrev >= minlink, because
2560 the equivalent changelog revisions will be renumbered after the
2558 the equivalent changelog revisions will be renumbered after the
2561 strip.
2559 strip.
2562
2560
2563 So we truncate the revlog on the first of these revisions, and
2561 So we truncate the revlog on the first of these revisions, and
2564 trust that the caller has saved the revisions that shouldn't be
2562 trust that the caller has saved the revisions that shouldn't be
2565 removed and that it'll re-add them after this truncation.
2563 removed and that it'll re-add them after this truncation.
2566 """
2564 """
2567 if len(self) == 0:
2565 if len(self) == 0:
2568 return
2566 return
2569
2567
2570 rev, _ = self.getstrippoint(minlink)
2568 rev, _ = self.getstrippoint(minlink)
2571 if rev == len(self):
2569 if rev == len(self):
2572 return
2570 return
2573
2571
2574 # first truncate the files on disk
2572 # first truncate the files on disk
2575 end = self.start(rev)
2573 end = self.start(rev)
2576 if not self._inline:
2574 if not self._inline:
2577 transaction.add(self._datafile, end)
2575 transaction.add(self._datafile, end)
2578 end = rev * self.index.entry_size
2576 end = rev * self.index.entry_size
2579 else:
2577 else:
2580 end += rev * self.index.entry_size
2578 end += rev * self.index.entry_size
2581
2579
2582 transaction.add(self._indexfile, end)
2580 transaction.add(self._indexfile, end)
2583
2581
2584 # then reset internal state in memory to forget those revisions
2582 # then reset internal state in memory to forget those revisions
2585 self._revisioncache = None
2583 self._revisioncache = None
2586 self._chaininfocache = util.lrucachedict(500)
2584 self._chaininfocache = util.lrucachedict(500)
2587 self._chunkclear()
2585 self._chunkclear()
2588
2586
2589 del self.index[rev:-1]
2587 del self.index[rev:-1]
2590
2588
2591 def checksize(self):
2589 def checksize(self):
2592 """Check size of index and data files
2590 """Check size of index and data files
2593
2591
2594 return a (dd, di) tuple.
2592 return a (dd, di) tuple.
2595 - dd: extra bytes for the "data" file
2593 - dd: extra bytes for the "data" file
2596 - di: extra bytes for the "index" file
2594 - di: extra bytes for the "index" file
2597
2595
2598 A healthy revlog will return (0, 0).
2596 A healthy revlog will return (0, 0).
2599 """
2597 """
2600 expected = 0
2598 expected = 0
2601 if len(self):
2599 if len(self):
2602 expected = max(0, self.end(len(self) - 1))
2600 expected = max(0, self.end(len(self) - 1))
2603
2601
2604 try:
2602 try:
2605 with self._datafp() as f:
2603 with self._datafp() as f:
2606 f.seek(0, io.SEEK_END)
2604 f.seek(0, io.SEEK_END)
2607 actual = f.tell()
2605 actual = f.tell()
2608 dd = actual - expected
2606 dd = actual - expected
2609 except IOError as inst:
2607 except IOError as inst:
2610 if inst.errno != errno.ENOENT:
2608 if inst.errno != errno.ENOENT:
2611 raise
2609 raise
2612 dd = 0
2610 dd = 0
2613
2611
2614 try:
2612 try:
2615 f = self.opener(self._indexfile)
2613 f = self.opener(self._indexfile)
2616 f.seek(0, io.SEEK_END)
2614 f.seek(0, io.SEEK_END)
2617 actual = f.tell()
2615 actual = f.tell()
2618 f.close()
2616 f.close()
2619 s = self.index.entry_size
2617 s = self.index.entry_size
2620 i = max(0, actual // s)
2618 i = max(0, actual // s)
2621 di = actual - (i * s)
2619 di = actual - (i * s)
2622 if self._inline:
2620 if self._inline:
2623 databytes = 0
2621 databytes = 0
2624 for r in self:
2622 for r in self:
2625 databytes += max(0, self.length(r))
2623 databytes += max(0, self.length(r))
2626 dd = 0
2624 dd = 0
2627 di = actual - len(self) * s - databytes
2625 di = actual - len(self) * s - databytes
2628 except IOError as inst:
2626 except IOError as inst:
2629 if inst.errno != errno.ENOENT:
2627 if inst.errno != errno.ENOENT:
2630 raise
2628 raise
2631 di = 0
2629 di = 0
2632
2630
2633 return (dd, di)
2631 return (dd, di)
2634
2632
2635 def files(self):
2633 def files(self):
2636 res = [self._indexfile]
2634 res = [self._indexfile]
2637 if not self._inline:
2635 if not self._inline:
2638 res.append(self._datafile)
2636 res.append(self._datafile)
2639 return res
2637 return res
2640
2638
2641 def emitrevisions(
2639 def emitrevisions(
2642 self,
2640 self,
2643 nodes,
2641 nodes,
2644 nodesorder=None,
2642 nodesorder=None,
2645 revisiondata=False,
2643 revisiondata=False,
2646 assumehaveparentrevisions=False,
2644 assumehaveparentrevisions=False,
2647 deltamode=repository.CG_DELTAMODE_STD,
2645 deltamode=repository.CG_DELTAMODE_STD,
2648 sidedata_helpers=None,
2646 sidedata_helpers=None,
2649 ):
2647 ):
2650 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2648 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2651 raise error.ProgrammingError(
2649 raise error.ProgrammingError(
2652 b'unhandled value for nodesorder: %s' % nodesorder
2650 b'unhandled value for nodesorder: %s' % nodesorder
2653 )
2651 )
2654
2652
2655 if nodesorder is None and not self._generaldelta:
2653 if nodesorder is None and not self._generaldelta:
2656 nodesorder = b'storage'
2654 nodesorder = b'storage'
2657
2655
2658 if (
2656 if (
2659 not self._storedeltachains
2657 not self._storedeltachains
2660 and deltamode != repository.CG_DELTAMODE_PREV
2658 and deltamode != repository.CG_DELTAMODE_PREV
2661 ):
2659 ):
2662 deltamode = repository.CG_DELTAMODE_FULL
2660 deltamode = repository.CG_DELTAMODE_FULL
2663
2661
2664 return storageutil.emitrevisions(
2662 return storageutil.emitrevisions(
2665 self,
2663 self,
2666 nodes,
2664 nodes,
2667 nodesorder,
2665 nodesorder,
2668 revlogrevisiondelta,
2666 revlogrevisiondelta,
2669 deltaparentfn=self.deltaparent,
2667 deltaparentfn=self.deltaparent,
2670 candeltafn=self.candelta,
2668 candeltafn=self.candelta,
2671 rawsizefn=self.rawsize,
2669 rawsizefn=self.rawsize,
2672 revdifffn=self.revdiff,
2670 revdifffn=self.revdiff,
2673 flagsfn=self.flags,
2671 flagsfn=self.flags,
2674 deltamode=deltamode,
2672 deltamode=deltamode,
2675 revisiondata=revisiondata,
2673 revisiondata=revisiondata,
2676 assumehaveparentrevisions=assumehaveparentrevisions,
2674 assumehaveparentrevisions=assumehaveparentrevisions,
2677 sidedata_helpers=sidedata_helpers,
2675 sidedata_helpers=sidedata_helpers,
2678 )
2676 )
2679
2677
2680 DELTAREUSEALWAYS = b'always'
2678 DELTAREUSEALWAYS = b'always'
2681 DELTAREUSESAMEREVS = b'samerevs'
2679 DELTAREUSESAMEREVS = b'samerevs'
2682 DELTAREUSENEVER = b'never'
2680 DELTAREUSENEVER = b'never'
2683
2681
2684 DELTAREUSEFULLADD = b'fulladd'
2682 DELTAREUSEFULLADD = b'fulladd'
2685
2683
2686 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2684 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2687
2685
2688 def clone(
2686 def clone(
2689 self,
2687 self,
2690 tr,
2688 tr,
2691 destrevlog,
2689 destrevlog,
2692 addrevisioncb=None,
2690 addrevisioncb=None,
2693 deltareuse=DELTAREUSESAMEREVS,
2691 deltareuse=DELTAREUSESAMEREVS,
2694 forcedeltabothparents=None,
2692 forcedeltabothparents=None,
2695 sidedata_helpers=None,
2693 sidedata_helpers=None,
2696 ):
2694 ):
2697 """Copy this revlog to another, possibly with format changes.
2695 """Copy this revlog to another, possibly with format changes.
2698
2696
2699 The destination revlog will contain the same revisions and nodes.
2697 The destination revlog will contain the same revisions and nodes.
2700 However, it may not be bit-for-bit identical due to e.g. delta encoding
2698 However, it may not be bit-for-bit identical due to e.g. delta encoding
2701 differences.
2699 differences.
2702
2700
2703 The ``deltareuse`` argument control how deltas from the existing revlog
2701 The ``deltareuse`` argument control how deltas from the existing revlog
2704 are preserved in the destination revlog. The argument can have the
2702 are preserved in the destination revlog. The argument can have the
2705 following values:
2703 following values:
2706
2704
2707 DELTAREUSEALWAYS
2705 DELTAREUSEALWAYS
2708 Deltas will always be reused (if possible), even if the destination
2706 Deltas will always be reused (if possible), even if the destination
2709 revlog would not select the same revisions for the delta. This is the
2707 revlog would not select the same revisions for the delta. This is the
2710 fastest mode of operation.
2708 fastest mode of operation.
2711 DELTAREUSESAMEREVS
2709 DELTAREUSESAMEREVS
2712 Deltas will be reused if the destination revlog would pick the same
2710 Deltas will be reused if the destination revlog would pick the same
2713 revisions for the delta. This mode strikes a balance between speed
2711 revisions for the delta. This mode strikes a balance between speed
2714 and optimization.
2712 and optimization.
2715 DELTAREUSENEVER
2713 DELTAREUSENEVER
2716 Deltas will never be reused. This is the slowest mode of execution.
2714 Deltas will never be reused. This is the slowest mode of execution.
2717 This mode can be used to recompute deltas (e.g. if the diff/delta
2715 This mode can be used to recompute deltas (e.g. if the diff/delta
2718 algorithm changes).
2716 algorithm changes).
2719 DELTAREUSEFULLADD
2717 DELTAREUSEFULLADD
2720 Revision will be re-added as if their were new content. This is
2718 Revision will be re-added as if their were new content. This is
2721 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2719 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2722 eg: large file detection and handling.
2720 eg: large file detection and handling.
2723
2721
2724 Delta computation can be slow, so the choice of delta reuse policy can
2722 Delta computation can be slow, so the choice of delta reuse policy can
2725 significantly affect run time.
2723 significantly affect run time.
2726
2724
2727 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2725 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2728 two extremes. Deltas will be reused if they are appropriate. But if the
2726 two extremes. Deltas will be reused if they are appropriate. But if the
2729 delta could choose a better revision, it will do so. This means if you
2727 delta could choose a better revision, it will do so. This means if you
2730 are converting a non-generaldelta revlog to a generaldelta revlog,
2728 are converting a non-generaldelta revlog to a generaldelta revlog,
2731 deltas will be recomputed if the delta's parent isn't a parent of the
2729 deltas will be recomputed if the delta's parent isn't a parent of the
2732 revision.
2730 revision.
2733
2731
2734 In addition to the delta policy, the ``forcedeltabothparents``
2732 In addition to the delta policy, the ``forcedeltabothparents``
2735 argument controls whether to force compute deltas against both parents
2733 argument controls whether to force compute deltas against both parents
2736 for merges. By default, the current default is used.
2734 for merges. By default, the current default is used.
2737
2735
2738 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2736 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2739 `sidedata_helpers`.
2737 `sidedata_helpers`.
2740 """
2738 """
2741 if deltareuse not in self.DELTAREUSEALL:
2739 if deltareuse not in self.DELTAREUSEALL:
2742 raise ValueError(
2740 raise ValueError(
2743 _(b'value for deltareuse invalid: %s') % deltareuse
2741 _(b'value for deltareuse invalid: %s') % deltareuse
2744 )
2742 )
2745
2743
2746 if len(destrevlog):
2744 if len(destrevlog):
2747 raise ValueError(_(b'destination revlog is not empty'))
2745 raise ValueError(_(b'destination revlog is not empty'))
2748
2746
2749 if getattr(self, 'filteredrevs', None):
2747 if getattr(self, 'filteredrevs', None):
2750 raise ValueError(_(b'source revlog has filtered revisions'))
2748 raise ValueError(_(b'source revlog has filtered revisions'))
2751 if getattr(destrevlog, 'filteredrevs', None):
2749 if getattr(destrevlog, 'filteredrevs', None):
2752 raise ValueError(_(b'destination revlog has filtered revisions'))
2750 raise ValueError(_(b'destination revlog has filtered revisions'))
2753
2751
2754 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2752 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2755 # if possible.
2753 # if possible.
2756 oldlazydelta = destrevlog._lazydelta
2754 oldlazydelta = destrevlog._lazydelta
2757 oldlazydeltabase = destrevlog._lazydeltabase
2755 oldlazydeltabase = destrevlog._lazydeltabase
2758 oldamd = destrevlog._deltabothparents
2756 oldamd = destrevlog._deltabothparents
2759
2757
2760 try:
2758 try:
2761 if deltareuse == self.DELTAREUSEALWAYS:
2759 if deltareuse == self.DELTAREUSEALWAYS:
2762 destrevlog._lazydeltabase = True
2760 destrevlog._lazydeltabase = True
2763 destrevlog._lazydelta = True
2761 destrevlog._lazydelta = True
2764 elif deltareuse == self.DELTAREUSESAMEREVS:
2762 elif deltareuse == self.DELTAREUSESAMEREVS:
2765 destrevlog._lazydeltabase = False
2763 destrevlog._lazydeltabase = False
2766 destrevlog._lazydelta = True
2764 destrevlog._lazydelta = True
2767 elif deltareuse == self.DELTAREUSENEVER:
2765 elif deltareuse == self.DELTAREUSENEVER:
2768 destrevlog._lazydeltabase = False
2766 destrevlog._lazydeltabase = False
2769 destrevlog._lazydelta = False
2767 destrevlog._lazydelta = False
2770
2768
2771 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2769 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2772
2770
2773 self._clone(
2771 self._clone(
2774 tr,
2772 tr,
2775 destrevlog,
2773 destrevlog,
2776 addrevisioncb,
2774 addrevisioncb,
2777 deltareuse,
2775 deltareuse,
2778 forcedeltabothparents,
2776 forcedeltabothparents,
2779 sidedata_helpers,
2777 sidedata_helpers,
2780 )
2778 )
2781
2779
2782 finally:
2780 finally:
2783 destrevlog._lazydelta = oldlazydelta
2781 destrevlog._lazydelta = oldlazydelta
2784 destrevlog._lazydeltabase = oldlazydeltabase
2782 destrevlog._lazydeltabase = oldlazydeltabase
2785 destrevlog._deltabothparents = oldamd
2783 destrevlog._deltabothparents = oldamd
2786
2784
2787 def _clone(
2785 def _clone(
2788 self,
2786 self,
2789 tr,
2787 tr,
2790 destrevlog,
2788 destrevlog,
2791 addrevisioncb,
2789 addrevisioncb,
2792 deltareuse,
2790 deltareuse,
2793 forcedeltabothparents,
2791 forcedeltabothparents,
2794 sidedata_helpers,
2792 sidedata_helpers,
2795 ):
2793 ):
2796 """perform the core duty of `revlog.clone` after parameter processing"""
2794 """perform the core duty of `revlog.clone` after parameter processing"""
2797 deltacomputer = deltautil.deltacomputer(destrevlog)
2795 deltacomputer = deltautil.deltacomputer(destrevlog)
2798 index = self.index
2796 index = self.index
2799 for rev in self:
2797 for rev in self:
2800 entry = index[rev]
2798 entry = index[rev]
2801
2799
2802 # Some classes override linkrev to take filtered revs into
2800 # Some classes override linkrev to take filtered revs into
2803 # account. Use raw entry from index.
2801 # account. Use raw entry from index.
2804 flags = entry[0] & 0xFFFF
2802 flags = entry[0] & 0xFFFF
2805 linkrev = entry[4]
2803 linkrev = entry[4]
2806 p1 = index[entry[5]][7]
2804 p1 = index[entry[5]][7]
2807 p2 = index[entry[6]][7]
2805 p2 = index[entry[6]][7]
2808 node = entry[7]
2806 node = entry[7]
2809
2807
2810 # (Possibly) reuse the delta from the revlog if allowed and
2808 # (Possibly) reuse the delta from the revlog if allowed and
2811 # the revlog chunk is a delta.
2809 # the revlog chunk is a delta.
2812 cachedelta = None
2810 cachedelta = None
2813 rawtext = None
2811 rawtext = None
2814 if deltareuse == self.DELTAREUSEFULLADD:
2812 if deltareuse == self.DELTAREUSEFULLADD:
2815 text, sidedata = self._revisiondata(rev)
2813 text, sidedata = self._revisiondata(rev)
2816
2814
2817 if sidedata_helpers is not None:
2815 if sidedata_helpers is not None:
2818 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2816 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2819 self, sidedata_helpers, sidedata, rev
2817 self, sidedata_helpers, sidedata, rev
2820 )
2818 )
2821 flags = flags | new_flags[0] & ~new_flags[1]
2819 flags = flags | new_flags[0] & ~new_flags[1]
2822
2820
2823 destrevlog.addrevision(
2821 destrevlog.addrevision(
2824 text,
2822 text,
2825 tr,
2823 tr,
2826 linkrev,
2824 linkrev,
2827 p1,
2825 p1,
2828 p2,
2826 p2,
2829 cachedelta=cachedelta,
2827 cachedelta=cachedelta,
2830 node=node,
2828 node=node,
2831 flags=flags,
2829 flags=flags,
2832 deltacomputer=deltacomputer,
2830 deltacomputer=deltacomputer,
2833 sidedata=sidedata,
2831 sidedata=sidedata,
2834 )
2832 )
2835 else:
2833 else:
2836 if destrevlog._lazydelta:
2834 if destrevlog._lazydelta:
2837 dp = self.deltaparent(rev)
2835 dp = self.deltaparent(rev)
2838 if dp != nullrev:
2836 if dp != nullrev:
2839 cachedelta = (dp, bytes(self._chunk(rev)))
2837 cachedelta = (dp, bytes(self._chunk(rev)))
2840
2838
2841 sidedata = None
2839 sidedata = None
2842 if not cachedelta:
2840 if not cachedelta:
2843 rawtext, sidedata = self._revisiondata(rev)
2841 rawtext, sidedata = self._revisiondata(rev)
2844 if sidedata is None:
2842 if sidedata is None:
2845 sidedata = self.sidedata(rev)
2843 sidedata = self.sidedata(rev)
2846
2844
2847 if sidedata_helpers is not None:
2845 if sidedata_helpers is not None:
2848 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2846 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2849 self, sidedata_helpers, sidedata, rev
2847 self, sidedata_helpers, sidedata, rev
2850 )
2848 )
2851 flags = flags | new_flags[0] & ~new_flags[1]
2849 flags = flags | new_flags[0] & ~new_flags[1]
2852
2850
2853 ifh = destrevlog.opener(
2851 ifh = destrevlog.opener(
2854 destrevlog._indexfile, b'a+', checkambig=False
2852 destrevlog._indexfile, b'a+', checkambig=False
2855 )
2853 )
2856 dfh = None
2854 dfh = None
2857 if not destrevlog._inline:
2855 if not destrevlog._inline:
2858 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2856 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2859 try:
2857 try:
2860 destrevlog._addrevision(
2858 destrevlog._addrevision(
2861 node,
2859 node,
2862 rawtext,
2860 rawtext,
2863 tr,
2861 tr,
2864 linkrev,
2862 linkrev,
2865 p1,
2863 p1,
2866 p2,
2864 p2,
2867 flags,
2865 flags,
2868 cachedelta,
2866 cachedelta,
2869 ifh,
2867 ifh,
2870 dfh,
2868 dfh,
2871 deltacomputer=deltacomputer,
2869 deltacomputer=deltacomputer,
2872 sidedata=sidedata,
2870 sidedata=sidedata,
2873 )
2871 )
2874 finally:
2872 finally:
2875 if dfh:
2873 if dfh:
2876 dfh.close()
2874 dfh.close()
2877 ifh.close()
2875 ifh.close()
2878
2876
2879 if addrevisioncb:
2877 if addrevisioncb:
2880 addrevisioncb(self, rev, node)
2878 addrevisioncb(self, rev, node)
2881
2879
2882 def censorrevision(self, tr, censornode, tombstone=b''):
2880 def censorrevision(self, tr, censornode, tombstone=b''):
2883 if self._format_version == REVLOGV0:
2881 if self._format_version == REVLOGV0:
2884 raise error.RevlogError(
2882 raise error.RevlogError(
2885 _(b'cannot censor with version %d revlogs')
2883 _(b'cannot censor with version %d revlogs')
2886 % self._format_version
2884 % self._format_version
2887 )
2885 )
2888
2886
2889 censorrev = self.rev(censornode)
2887 censorrev = self.rev(censornode)
2890 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2888 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2891
2889
2892 if len(tombstone) > self.rawsize(censorrev):
2890 if len(tombstone) > self.rawsize(censorrev):
2893 raise error.Abort(
2891 raise error.Abort(
2894 _(b'censor tombstone must be no longer than censored data')
2892 _(b'censor tombstone must be no longer than censored data')
2895 )
2893 )
2896
2894
2897 # Rewriting the revlog in place is hard. Our strategy for censoring is
2895 # Rewriting the revlog in place is hard. Our strategy for censoring is
2898 # to create a new revlog, copy all revisions to it, then replace the
2896 # to create a new revlog, copy all revisions to it, then replace the
2899 # revlogs on transaction close.
2897 # revlogs on transaction close.
2900 #
2898 #
2901 # This is a bit dangerous. We could easily have a mismatch of state.
2899 # This is a bit dangerous. We could easily have a mismatch of state.
2902 newrl = revlog(
2900 newrl = revlog(
2903 self.opener,
2901 self.opener,
2904 target=self.target,
2902 target=self.target,
2905 radix=self.radix,
2903 radix=self.radix,
2906 postfix=b'tmpcensored',
2904 postfix=b'tmpcensored',
2907 censorable=True,
2905 censorable=True,
2908 )
2906 )
2909 newrl._format_version = self._format_version
2907 newrl._format_version = self._format_version
2910 newrl._format_flags = self._format_flags
2908 newrl._format_flags = self._format_flags
2911 newrl._generaldelta = self._generaldelta
2909 newrl._generaldelta = self._generaldelta
2912 newrl._parse_index = self._parse_index
2910 newrl._parse_index = self._parse_index
2913
2911
2914 for rev in self.revs():
2912 for rev in self.revs():
2915 node = self.node(rev)
2913 node = self.node(rev)
2916 p1, p2 = self.parents(node)
2914 p1, p2 = self.parents(node)
2917
2915
2918 if rev == censorrev:
2916 if rev == censorrev:
2919 newrl.addrawrevision(
2917 newrl.addrawrevision(
2920 tombstone,
2918 tombstone,
2921 tr,
2919 tr,
2922 self.linkrev(censorrev),
2920 self.linkrev(censorrev),
2923 p1,
2921 p1,
2924 p2,
2922 p2,
2925 censornode,
2923 censornode,
2926 REVIDX_ISCENSORED,
2924 REVIDX_ISCENSORED,
2927 )
2925 )
2928
2926
2929 if newrl.deltaparent(rev) != nullrev:
2927 if newrl.deltaparent(rev) != nullrev:
2930 raise error.Abort(
2928 raise error.Abort(
2931 _(
2929 _(
2932 b'censored revision stored as delta; '
2930 b'censored revision stored as delta; '
2933 b'cannot censor'
2931 b'cannot censor'
2934 ),
2932 ),
2935 hint=_(
2933 hint=_(
2936 b'censoring of revlogs is not '
2934 b'censoring of revlogs is not '
2937 b'fully implemented; please report '
2935 b'fully implemented; please report '
2938 b'this bug'
2936 b'this bug'
2939 ),
2937 ),
2940 )
2938 )
2941 continue
2939 continue
2942
2940
2943 if self.iscensored(rev):
2941 if self.iscensored(rev):
2944 if self.deltaparent(rev) != nullrev:
2942 if self.deltaparent(rev) != nullrev:
2945 raise error.Abort(
2943 raise error.Abort(
2946 _(
2944 _(
2947 b'cannot censor due to censored '
2945 b'cannot censor due to censored '
2948 b'revision having delta stored'
2946 b'revision having delta stored'
2949 )
2947 )
2950 )
2948 )
2951 rawtext = self._chunk(rev)
2949 rawtext = self._chunk(rev)
2952 else:
2950 else:
2953 rawtext = self.rawdata(rev)
2951 rawtext = self.rawdata(rev)
2954
2952
2955 newrl.addrawrevision(
2953 newrl.addrawrevision(
2956 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2954 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2957 )
2955 )
2958
2956
2959 tr.addbackup(self._indexfile, location=b'store')
2957 tr.addbackup(self._indexfile, location=b'store')
2960 if not self._inline:
2958 if not self._inline:
2961 tr.addbackup(self._datafile, location=b'store')
2959 tr.addbackup(self._datafile, location=b'store')
2962
2960
2963 self.opener.rename(newrl._indexfile, self._indexfile)
2961 self.opener.rename(newrl._indexfile, self._indexfile)
2964 if not self._inline:
2962 if not self._inline:
2965 self.opener.rename(newrl._datafile, self._datafile)
2963 self.opener.rename(newrl._datafile, self._datafile)
2966
2964
2967 self.clearcaches()
2965 self.clearcaches()
2968 self._loadindex()
2966 self._loadindex()
2969
2967
2970 def verifyintegrity(self, state):
2968 def verifyintegrity(self, state):
2971 """Verifies the integrity of the revlog.
2969 """Verifies the integrity of the revlog.
2972
2970
2973 Yields ``revlogproblem`` instances describing problems that are
2971 Yields ``revlogproblem`` instances describing problems that are
2974 found.
2972 found.
2975 """
2973 """
2976 dd, di = self.checksize()
2974 dd, di = self.checksize()
2977 if dd:
2975 if dd:
2978 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2976 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2979 if di:
2977 if di:
2980 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2978 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2981
2979
2982 version = self._format_version
2980 version = self._format_version
2983
2981
2984 # The verifier tells us what version revlog we should be.
2982 # The verifier tells us what version revlog we should be.
2985 if version != state[b'expectedversion']:
2983 if version != state[b'expectedversion']:
2986 yield revlogproblem(
2984 yield revlogproblem(
2987 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2985 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2988 % (self.display_id, version, state[b'expectedversion'])
2986 % (self.display_id, version, state[b'expectedversion'])
2989 )
2987 )
2990
2988
2991 state[b'skipread'] = set()
2989 state[b'skipread'] = set()
2992 state[b'safe_renamed'] = set()
2990 state[b'safe_renamed'] = set()
2993
2991
2994 for rev in self:
2992 for rev in self:
2995 node = self.node(rev)
2993 node = self.node(rev)
2996
2994
2997 # Verify contents. 4 cases to care about:
2995 # Verify contents. 4 cases to care about:
2998 #
2996 #
2999 # common: the most common case
2997 # common: the most common case
3000 # rename: with a rename
2998 # rename: with a rename
3001 # meta: file content starts with b'\1\n', the metadata
2999 # meta: file content starts with b'\1\n', the metadata
3002 # header defined in filelog.py, but without a rename
3000 # header defined in filelog.py, but without a rename
3003 # ext: content stored externally
3001 # ext: content stored externally
3004 #
3002 #
3005 # More formally, their differences are shown below:
3003 # More formally, their differences are shown below:
3006 #
3004 #
3007 # | common | rename | meta | ext
3005 # | common | rename | meta | ext
3008 # -------------------------------------------------------
3006 # -------------------------------------------------------
3009 # flags() | 0 | 0 | 0 | not 0
3007 # flags() | 0 | 0 | 0 | not 0
3010 # renamed() | False | True | False | ?
3008 # renamed() | False | True | False | ?
3011 # rawtext[0:2]=='\1\n'| False | True | True | ?
3009 # rawtext[0:2]=='\1\n'| False | True | True | ?
3012 #
3010 #
3013 # "rawtext" means the raw text stored in revlog data, which
3011 # "rawtext" means the raw text stored in revlog data, which
3014 # could be retrieved by "rawdata(rev)". "text"
3012 # could be retrieved by "rawdata(rev)". "text"
3015 # mentioned below is "revision(rev)".
3013 # mentioned below is "revision(rev)".
3016 #
3014 #
3017 # There are 3 different lengths stored physically:
3015 # There are 3 different lengths stored physically:
3018 # 1. L1: rawsize, stored in revlog index
3016 # 1. L1: rawsize, stored in revlog index
3019 # 2. L2: len(rawtext), stored in revlog data
3017 # 2. L2: len(rawtext), stored in revlog data
3020 # 3. L3: len(text), stored in revlog data if flags==0, or
3018 # 3. L3: len(text), stored in revlog data if flags==0, or
3021 # possibly somewhere else if flags!=0
3019 # possibly somewhere else if flags!=0
3022 #
3020 #
3023 # L1 should be equal to L2. L3 could be different from them.
3021 # L1 should be equal to L2. L3 could be different from them.
3024 # "text" may or may not affect commit hash depending on flag
3022 # "text" may or may not affect commit hash depending on flag
3025 # processors (see flagutil.addflagprocessor).
3023 # processors (see flagutil.addflagprocessor).
3026 #
3024 #
3027 # | common | rename | meta | ext
3025 # | common | rename | meta | ext
3028 # -------------------------------------------------
3026 # -------------------------------------------------
3029 # rawsize() | L1 | L1 | L1 | L1
3027 # rawsize() | L1 | L1 | L1 | L1
3030 # size() | L1 | L2-LM | L1(*) | L1 (?)
3028 # size() | L1 | L2-LM | L1(*) | L1 (?)
3031 # len(rawtext) | L2 | L2 | L2 | L2
3029 # len(rawtext) | L2 | L2 | L2 | L2
3032 # len(text) | L2 | L2 | L2 | L3
3030 # len(text) | L2 | L2 | L2 | L3
3033 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3031 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3034 #
3032 #
3035 # LM: length of metadata, depending on rawtext
3033 # LM: length of metadata, depending on rawtext
3036 # (*): not ideal, see comment in filelog.size
3034 # (*): not ideal, see comment in filelog.size
3037 # (?): could be "- len(meta)" if the resolved content has
3035 # (?): could be "- len(meta)" if the resolved content has
3038 # rename metadata
3036 # rename metadata
3039 #
3037 #
3040 # Checks needed to be done:
3038 # Checks needed to be done:
3041 # 1. length check: L1 == L2, in all cases.
3039 # 1. length check: L1 == L2, in all cases.
3042 # 2. hash check: depending on flag processor, we may need to
3040 # 2. hash check: depending on flag processor, we may need to
3043 # use either "text" (external), or "rawtext" (in revlog).
3041 # use either "text" (external), or "rawtext" (in revlog).
3044
3042
3045 try:
3043 try:
3046 skipflags = state.get(b'skipflags', 0)
3044 skipflags = state.get(b'skipflags', 0)
3047 if skipflags:
3045 if skipflags:
3048 skipflags &= self.flags(rev)
3046 skipflags &= self.flags(rev)
3049
3047
3050 _verify_revision(self, skipflags, state, node)
3048 _verify_revision(self, skipflags, state, node)
3051
3049
3052 l1 = self.rawsize(rev)
3050 l1 = self.rawsize(rev)
3053 l2 = len(self.rawdata(node))
3051 l2 = len(self.rawdata(node))
3054
3052
3055 if l1 != l2:
3053 if l1 != l2:
3056 yield revlogproblem(
3054 yield revlogproblem(
3057 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3055 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3058 node=node,
3056 node=node,
3059 )
3057 )
3060
3058
3061 except error.CensoredNodeError:
3059 except error.CensoredNodeError:
3062 if state[b'erroroncensored']:
3060 if state[b'erroroncensored']:
3063 yield revlogproblem(
3061 yield revlogproblem(
3064 error=_(b'censored file data'), node=node
3062 error=_(b'censored file data'), node=node
3065 )
3063 )
3066 state[b'skipread'].add(node)
3064 state[b'skipread'].add(node)
3067 except Exception as e:
3065 except Exception as e:
3068 yield revlogproblem(
3066 yield revlogproblem(
3069 error=_(b'unpacking %s: %s')
3067 error=_(b'unpacking %s: %s')
3070 % (short(node), stringutil.forcebytestr(e)),
3068 % (short(node), stringutil.forcebytestr(e)),
3071 node=node,
3069 node=node,
3072 )
3070 )
3073 state[b'skipread'].add(node)
3071 state[b'skipread'].add(node)
3074
3072
3075 def storageinfo(
3073 def storageinfo(
3076 self,
3074 self,
3077 exclusivefiles=False,
3075 exclusivefiles=False,
3078 sharedfiles=False,
3076 sharedfiles=False,
3079 revisionscount=False,
3077 revisionscount=False,
3080 trackedsize=False,
3078 trackedsize=False,
3081 storedsize=False,
3079 storedsize=False,
3082 ):
3080 ):
3083 d = {}
3081 d = {}
3084
3082
3085 if exclusivefiles:
3083 if exclusivefiles:
3086 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3084 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3087 if not self._inline:
3085 if not self._inline:
3088 d[b'exclusivefiles'].append((self.opener, self._datafile))
3086 d[b'exclusivefiles'].append((self.opener, self._datafile))
3089
3087
3090 if sharedfiles:
3088 if sharedfiles:
3091 d[b'sharedfiles'] = []
3089 d[b'sharedfiles'] = []
3092
3090
3093 if revisionscount:
3091 if revisionscount:
3094 d[b'revisionscount'] = len(self)
3092 d[b'revisionscount'] = len(self)
3095
3093
3096 if trackedsize:
3094 if trackedsize:
3097 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3095 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3098
3096
3099 if storedsize:
3097 if storedsize:
3100 d[b'storedsize'] = sum(
3098 d[b'storedsize'] = sum(
3101 self.opener.stat(path).st_size for path in self.files()
3099 self.opener.stat(path).st_size for path in self.files()
3102 )
3100 )
3103
3101
3104 return d
3102 return d
3105
3103
3106 def rewrite_sidedata(self, helpers, startrev, endrev):
3104 def rewrite_sidedata(self, helpers, startrev, endrev):
3107 if not self.hassidedata:
3105 if not self.hassidedata:
3108 return
3106 return
3109 # inline are not yet supported because they suffer from an issue when
3107 # inline are not yet supported because they suffer from an issue when
3110 # rewriting them (since it's not an append-only operation).
3108 # rewriting them (since it's not an append-only operation).
3111 # See issue6485.
3109 # See issue6485.
3112 assert not self._inline
3110 assert not self._inline
3113 if not helpers[1] and not helpers[2]:
3111 if not helpers[1] and not helpers[2]:
3114 # Nothing to generate or remove
3112 # Nothing to generate or remove
3115 return
3113 return
3116
3114
3117 # changelog implement some "delayed" writing mechanism that assume that
3115 # changelog implement some "delayed" writing mechanism that assume that
3118 # all index data is writen in append mode and is therefor incompatible
3116 # all index data is writen in append mode and is therefor incompatible
3119 # with the seeked write done in this method. The use of such "delayed"
3117 # with the seeked write done in this method. The use of such "delayed"
3120 # writing will soon be removed for revlog version that support side
3118 # writing will soon be removed for revlog version that support side
3121 # data, so for now, we only keep this simple assert to highlight the
3119 # data, so for now, we only keep this simple assert to highlight the
3122 # situation.
3120 # situation.
3123 delayed = getattr(self, '_delayed', False)
3121 delayed = getattr(self, '_delayed', False)
3124 diverted = getattr(self, '_divert', False)
3122 diverted = getattr(self, '_divert', False)
3125 if delayed and not diverted:
3123 if delayed and not diverted:
3126 msg = "cannot rewrite_sidedata of a delayed revlog"
3124 msg = "cannot rewrite_sidedata of a delayed revlog"
3127 raise error.ProgrammingError(msg)
3125 raise error.ProgrammingError(msg)
3128
3126
3129 new_entries = []
3127 new_entries = []
3130 # append the new sidedata
3128 # append the new sidedata
3131 with self._datafp(b'a+') as fp:
3129 with self._datafp(b'a+') as fp:
3132 # Maybe this bug still exists, see revlog._writeentry
3130 # Maybe this bug still exists, see revlog._writeentry
3133 fp.seek(0, os.SEEK_END)
3131 fp.seek(0, os.SEEK_END)
3134 current_offset = fp.tell()
3132 current_offset = fp.tell()
3135 for rev in range(startrev, endrev + 1):
3133 for rev in range(startrev, endrev + 1):
3136 entry = self.index[rev]
3134 entry = self.index[rev]
3137 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3135 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3138 store=self,
3136 store=self,
3139 sidedata_helpers=helpers,
3137 sidedata_helpers=helpers,
3140 sidedata={},
3138 sidedata={},
3141 rev=rev,
3139 rev=rev,
3142 )
3140 )
3143
3141
3144 serialized_sidedata = sidedatautil.serialize_sidedata(
3142 serialized_sidedata = sidedatautil.serialize_sidedata(
3145 new_sidedata
3143 new_sidedata
3146 )
3144 )
3147 if entry[8] != 0 or entry[9] != 0:
3145 if entry[8] != 0 or entry[9] != 0:
3148 # rewriting entries that already have sidedata is not
3146 # rewriting entries that already have sidedata is not
3149 # supported yet, because it introduces garbage data in the
3147 # supported yet, because it introduces garbage data in the
3150 # revlog.
3148 # revlog.
3151 msg = b"Rewriting existing sidedata is not supported yet"
3149 msg = b"Rewriting existing sidedata is not supported yet"
3152 raise error.Abort(msg)
3150 raise error.Abort(msg)
3153
3151
3154 # Apply (potential) flags to add and to remove after running
3152 # Apply (potential) flags to add and to remove after running
3155 # the sidedata helpers
3153 # the sidedata helpers
3156 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3154 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3157 entry = (new_offset_flags,) + entry[1:8]
3155 entry = (new_offset_flags,) + entry[1:8]
3158 entry += (current_offset, len(serialized_sidedata))
3156 entry += (current_offset, len(serialized_sidedata))
3159
3157
3160 fp.write(serialized_sidedata)
3158 fp.write(serialized_sidedata)
3161 new_entries.append(entry)
3159 new_entries.append(entry)
3162 current_offset += len(serialized_sidedata)
3160 current_offset += len(serialized_sidedata)
3163
3161
3164 # rewrite the new index entries
3162 # rewrite the new index entries
3165 with self._indexfp(b'r+') as fp:
3163 with self._indexfp(b'r+') as fp:
3166 fp.seek(startrev * self.index.entry_size)
3164 fp.seek(startrev * self.index.entry_size)
3167 for i, e in enumerate(new_entries):
3165 for i, e in enumerate(new_entries):
3168 rev = startrev + i
3166 rev = startrev + i
3169 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3167 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3170 packed = self.index.entry_binary(rev)
3168 packed = self.index.entry_binary(rev)
3171 if rev == 0:
3169 if rev == 0:
3172 header = self._format_flags | self._format_version
3170 header = self._format_flags | self._format_version
3173 header = self.index.pack_header(header)
3171 header = self.index.pack_header(header)
3174 packed = header + packed
3172 packed = header + packed
3175 fp.write(packed)
3173 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now