##// END OF EJS Templates
revlog: fix capitalisation of an error...
marmoute -
r48002:e51392ac default
parent child Browse files
Show More
@@ -1,3220 +1,3220 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 self._indexfile = None
318 self._indexfile = None
319 self._datafile = None
319 self._datafile = None
320 self._nodemap_file = None
320 self._nodemap_file = None
321 self.postfix = postfix
321 self.postfix = postfix
322 self.opener = opener
322 self.opener = opener
323 if persistentnodemap:
323 if persistentnodemap:
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325
325
326 assert target[0] in ALL_KINDS
326 assert target[0] in ALL_KINDS
327 assert len(target) == 2
327 assert len(target) == 2
328 self.target = target
328 self.target = target
329 # When True, indexfile is opened with checkambig=True at writing, to
329 # When True, indexfile is opened with checkambig=True at writing, to
330 # avoid file stat ambiguity.
330 # avoid file stat ambiguity.
331 self._checkambig = checkambig
331 self._checkambig = checkambig
332 self._mmaplargeindex = mmaplargeindex
332 self._mmaplargeindex = mmaplargeindex
333 self._censorable = censorable
333 self._censorable = censorable
334 # 3-tuple of (node, rev, text) for a raw revision.
334 # 3-tuple of (node, rev, text) for a raw revision.
335 self._revisioncache = None
335 self._revisioncache = None
336 # Maps rev to chain base rev.
336 # Maps rev to chain base rev.
337 self._chainbasecache = util.lrucachedict(100)
337 self._chainbasecache = util.lrucachedict(100)
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 self._chunkcache = (0, b'')
339 self._chunkcache = (0, b'')
340 # How much data to read and cache into the raw revlog data cache.
340 # How much data to read and cache into the raw revlog data cache.
341 self._chunkcachesize = 65536
341 self._chunkcachesize = 65536
342 self._maxchainlen = None
342 self._maxchainlen = None
343 self._deltabothparents = True
343 self._deltabothparents = True
344 self.index = None
344 self.index = None
345 self._nodemap_docket = None
345 self._nodemap_docket = None
346 # Mapping of partial identifiers to full nodes.
346 # Mapping of partial identifiers to full nodes.
347 self._pcache = {}
347 self._pcache = {}
348 # Mapping of revision integer to full node.
348 # Mapping of revision integer to full node.
349 self._compengine = b'zlib'
349 self._compengine = b'zlib'
350 self._compengineopts = {}
350 self._compengineopts = {}
351 self._maxdeltachainspan = -1
351 self._maxdeltachainspan = -1
352 self._withsparseread = False
352 self._withsparseread = False
353 self._sparserevlog = False
353 self._sparserevlog = False
354 self.hassidedata = False
354 self.hassidedata = False
355 self._srdensitythreshold = 0.50
355 self._srdensitythreshold = 0.50
356 self._srmingapsize = 262144
356 self._srmingapsize = 262144
357
357
358 # Make copy of flag processors so each revlog instance can support
358 # Make copy of flag processors so each revlog instance can support
359 # custom flags.
359 # custom flags.
360 self._flagprocessors = dict(flagutil.flagprocessors)
360 self._flagprocessors = dict(flagutil.flagprocessors)
361
361
362 # 2-tuple of file handles being used for active writing.
362 # 2-tuple of file handles being used for active writing.
363 self._writinghandles = None
363 self._writinghandles = None
364 # prevent nesting of addgroup
364 # prevent nesting of addgroup
365 self._adding_group = None
365 self._adding_group = None
366
366
367 self._loadindex()
367 self._loadindex()
368
368
369 self._concurrencychecker = concurrencychecker
369 self._concurrencychecker = concurrencychecker
370
370
371 def _init_opts(self):
371 def _init_opts(self):
372 """process options (from above/config) to setup associated default revlog mode
372 """process options (from above/config) to setup associated default revlog mode
373
373
374 These values might be affected when actually reading on disk information.
374 These values might be affected when actually reading on disk information.
375
375
376 The relevant values are returned for use in _loadindex().
376 The relevant values are returned for use in _loadindex().
377
377
378 * newversionflags:
378 * newversionflags:
379 version header to use if we need to create a new revlog
379 version header to use if we need to create a new revlog
380
380
381 * mmapindexthreshold:
381 * mmapindexthreshold:
382 minimal index size for start to use mmap
382 minimal index size for start to use mmap
383
383
384 * force_nodemap:
384 * force_nodemap:
385 force the usage of a "development" version of the nodemap code
385 force the usage of a "development" version of the nodemap code
386 """
386 """
387 mmapindexthreshold = None
387 mmapindexthreshold = None
388 opts = self.opener.options
388 opts = self.opener.options
389
389
390 if b'revlogv2' in opts:
390 if b'revlogv2' in opts:
391 new_header = REVLOGV2 | FLAG_INLINE_DATA
391 new_header = REVLOGV2 | FLAG_INLINE_DATA
392 elif b'revlogv1' in opts:
392 elif b'revlogv1' in opts:
393 new_header = REVLOGV1 | FLAG_INLINE_DATA
393 new_header = REVLOGV1 | FLAG_INLINE_DATA
394 if b'generaldelta' in opts:
394 if b'generaldelta' in opts:
395 new_header |= FLAG_GENERALDELTA
395 new_header |= FLAG_GENERALDELTA
396 elif b'revlogv0' in self.opener.options:
396 elif b'revlogv0' in self.opener.options:
397 new_header = REVLOGV0
397 new_header = REVLOGV0
398 else:
398 else:
399 new_header = REVLOG_DEFAULT_VERSION
399 new_header = REVLOG_DEFAULT_VERSION
400
400
401 if b'chunkcachesize' in opts:
401 if b'chunkcachesize' in opts:
402 self._chunkcachesize = opts[b'chunkcachesize']
402 self._chunkcachesize = opts[b'chunkcachesize']
403 if b'maxchainlen' in opts:
403 if b'maxchainlen' in opts:
404 self._maxchainlen = opts[b'maxchainlen']
404 self._maxchainlen = opts[b'maxchainlen']
405 if b'deltabothparents' in opts:
405 if b'deltabothparents' in opts:
406 self._deltabothparents = opts[b'deltabothparents']
406 self._deltabothparents = opts[b'deltabothparents']
407 self._lazydelta = bool(opts.get(b'lazydelta', True))
407 self._lazydelta = bool(opts.get(b'lazydelta', True))
408 self._lazydeltabase = False
408 self._lazydeltabase = False
409 if self._lazydelta:
409 if self._lazydelta:
410 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
410 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
411 if b'compengine' in opts:
411 if b'compengine' in opts:
412 self._compengine = opts[b'compengine']
412 self._compengine = opts[b'compengine']
413 if b'zlib.level' in opts:
413 if b'zlib.level' in opts:
414 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
414 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
415 if b'zstd.level' in opts:
415 if b'zstd.level' in opts:
416 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
416 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
417 if b'maxdeltachainspan' in opts:
417 if b'maxdeltachainspan' in opts:
418 self._maxdeltachainspan = opts[b'maxdeltachainspan']
418 self._maxdeltachainspan = opts[b'maxdeltachainspan']
419 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
419 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
420 mmapindexthreshold = opts[b'mmapindexthreshold']
420 mmapindexthreshold = opts[b'mmapindexthreshold']
421 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
421 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
422 withsparseread = bool(opts.get(b'with-sparse-read', False))
422 withsparseread = bool(opts.get(b'with-sparse-read', False))
423 # sparse-revlog forces sparse-read
423 # sparse-revlog forces sparse-read
424 self._withsparseread = self._sparserevlog or withsparseread
424 self._withsparseread = self._sparserevlog or withsparseread
425 if b'sparse-read-density-threshold' in opts:
425 if b'sparse-read-density-threshold' in opts:
426 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
426 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
427 if b'sparse-read-min-gap-size' in opts:
427 if b'sparse-read-min-gap-size' in opts:
428 self._srmingapsize = opts[b'sparse-read-min-gap-size']
428 self._srmingapsize = opts[b'sparse-read-min-gap-size']
429 if opts.get(b'enableellipsis'):
429 if opts.get(b'enableellipsis'):
430 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
430 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
431
431
432 # revlog v0 doesn't have flag processors
432 # revlog v0 doesn't have flag processors
433 for flag, processor in pycompat.iteritems(
433 for flag, processor in pycompat.iteritems(
434 opts.get(b'flagprocessors', {})
434 opts.get(b'flagprocessors', {})
435 ):
435 ):
436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
437
437
438 if self._chunkcachesize <= 0:
438 if self._chunkcachesize <= 0:
439 raise error.RevlogError(
439 raise error.RevlogError(
440 _(b'revlog chunk cache size %r is not greater than 0')
440 _(b'revlog chunk cache size %r is not greater than 0')
441 % self._chunkcachesize
441 % self._chunkcachesize
442 )
442 )
443 elif self._chunkcachesize & (self._chunkcachesize - 1):
443 elif self._chunkcachesize & (self._chunkcachesize - 1):
444 raise error.RevlogError(
444 raise error.RevlogError(
445 _(b'revlog chunk cache size %r is not a power of 2')
445 _(b'revlog chunk cache size %r is not a power of 2')
446 % self._chunkcachesize
446 % self._chunkcachesize
447 )
447 )
448 force_nodemap = opts.get(b'devel-force-nodemap', False)
448 force_nodemap = opts.get(b'devel-force-nodemap', False)
449 return new_header, mmapindexthreshold, force_nodemap
449 return new_header, mmapindexthreshold, force_nodemap
450
450
451 def _get_data(self, filepath, mmap_threshold):
451 def _get_data(self, filepath, mmap_threshold):
452 """return a file content with or without mmap
452 """return a file content with or without mmap
453
453
454 If the file is missing return the empty string"""
454 If the file is missing return the empty string"""
455 try:
455 try:
456 with self.opener(filepath) as fp:
456 with self.opener(filepath) as fp:
457 if mmap_threshold is not None:
457 if mmap_threshold is not None:
458 file_size = self.opener.fstat(fp).st_size
458 file_size = self.opener.fstat(fp).st_size
459 if file_size >= mmap_threshold:
459 if file_size >= mmap_threshold:
460 # TODO: should .close() to release resources without
460 # TODO: should .close() to release resources without
461 # relying on Python GC
461 # relying on Python GC
462 return util.buffer(util.mmapread(fp))
462 return util.buffer(util.mmapread(fp))
463 return fp.read()
463 return fp.read()
464 except IOError as inst:
464 except IOError as inst:
465 if inst.errno != errno.ENOENT:
465 if inst.errno != errno.ENOENT:
466 raise
466 raise
467 return b''
467 return b''
468
468
469 def _loadindex(self):
469 def _loadindex(self):
470
470
471 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
471 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
472
472
473 if self.postfix is None:
473 if self.postfix is None:
474 entry_point = b'%s.i' % self.radix
474 entry_point = b'%s.i' % self.radix
475 else:
475 else:
476 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
476 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
477
477
478 entry_data = b''
478 entry_data = b''
479 self._initempty = True
479 self._initempty = True
480 entry_data = self._get_data(entry_point, mmapindexthreshold)
480 entry_data = self._get_data(entry_point, mmapindexthreshold)
481 if len(entry_data) > 0:
481 if len(entry_data) > 0:
482 header = INDEX_HEADER.unpack(entry_data[:4])[0]
482 header = INDEX_HEADER.unpack(entry_data[:4])[0]
483 self._initempty = False
483 self._initempty = False
484 else:
484 else:
485 header = new_header
485 header = new_header
486
486
487 self._format_flags = header & ~0xFFFF
487 self._format_flags = header & ~0xFFFF
488 self._format_version = header & 0xFFFF
488 self._format_version = header & 0xFFFF
489
489
490 if self._format_version == REVLOGV0:
490 if self._format_version == REVLOGV0:
491 if self._format_flags:
491 if self._format_flags:
492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
493 display_flag = self._format_flags >> 16
493 display_flag = self._format_flags >> 16
494 msg %= (display_flag, self._format_version, self.display_id)
494 msg %= (display_flag, self._format_version, self.display_id)
495 raise error.RevlogError(msg)
495 raise error.RevlogError(msg)
496
496
497 self._inline = False
497 self._inline = False
498 self._generaldelta = False
498 self._generaldelta = False
499
499
500 elif self._format_version == REVLOGV1:
500 elif self._format_version == REVLOGV1:
501 if self._format_flags & ~REVLOGV1_FLAGS:
501 if self._format_flags & ~REVLOGV1_FLAGS:
502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
503 display_flag = self._format_flags >> 16
503 display_flag = self._format_flags >> 16
504 msg %= (display_flag, self._format_version, self.display_id)
504 msg %= (display_flag, self._format_version, self.display_id)
505 raise error.RevlogError(msg)
505 raise error.RevlogError(msg)
506
506
507 self._inline = self._format_flags & FLAG_INLINE_DATA
507 self._inline = self._format_flags & FLAG_INLINE_DATA
508 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
508 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
509
509
510 elif self._format_version == REVLOGV2:
510 elif self._format_version == REVLOGV2:
511 if self._format_flags & ~REVLOGV2_FLAGS:
511 if self._format_flags & ~REVLOGV2_FLAGS:
512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
513 display_flag = self._format_flags >> 16
513 display_flag = self._format_flags >> 16
514 msg %= (display_flag, self._format_version, self.display_id)
514 msg %= (display_flag, self._format_version, self.display_id)
515 raise error.RevlogError(msg)
515 raise error.RevlogError(msg)
516
516
517 # There is a bug in the transaction handling when going from an
517 # There is a bug in the transaction handling when going from an
518 # inline revlog to a separate index and data file. Turn it off until
518 # inline revlog to a separate index and data file. Turn it off until
519 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
519 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
520 # See issue6485
520 # See issue6485
521 self._inline = False
521 self._inline = False
522 # generaldelta implied by version 2 revlogs.
522 # generaldelta implied by version 2 revlogs.
523 self._generaldelta = True
523 self._generaldelta = True
524 # revlog-v2 has built in sidedata support
524 # revlog-v2 has built in sidedata support
525 self.hassidedata = True
525 self.hassidedata = True
526
526
527 else:
527 else:
528 msg = _(b'unknown version (%d) in revlog %s')
528 msg = _(b'unknown version (%d) in revlog %s')
529 msg %= (self._format_version, self.display_id)
529 msg %= (self._format_version, self.display_id)
530 raise error.RevlogError(msg)
530 raise error.RevlogError(msg)
531
531
532 index_data = entry_data
532 index_data = entry_data
533 self._indexfile = entry_point
533 self._indexfile = entry_point
534
534
535 if self.postfix is None or self.postfix == b'a':
535 if self.postfix is None or self.postfix == b'a':
536 self._datafile = b'%s.d' % self.radix
536 self._datafile = b'%s.d' % self.radix
537 else:
537 else:
538 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
538 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
539
539
540 self.nodeconstants = sha1nodeconstants
540 self.nodeconstants = sha1nodeconstants
541 self.nullid = self.nodeconstants.nullid
541 self.nullid = self.nodeconstants.nullid
542
542
543 # sparse-revlog can't be on without general-delta (issue6056)
543 # sparse-revlog can't be on without general-delta (issue6056)
544 if not self._generaldelta:
544 if not self._generaldelta:
545 self._sparserevlog = False
545 self._sparserevlog = False
546
546
547 self._storedeltachains = True
547 self._storedeltachains = True
548
548
549 devel_nodemap = (
549 devel_nodemap = (
550 self._nodemap_file
550 self._nodemap_file
551 and force_nodemap
551 and force_nodemap
552 and parse_index_v1_nodemap is not None
552 and parse_index_v1_nodemap is not None
553 )
553 )
554
554
555 use_rust_index = False
555 use_rust_index = False
556 if rustrevlog is not None:
556 if rustrevlog is not None:
557 if self._nodemap_file is not None:
557 if self._nodemap_file is not None:
558 use_rust_index = True
558 use_rust_index = True
559 else:
559 else:
560 use_rust_index = self.opener.options.get(b'rust.index')
560 use_rust_index = self.opener.options.get(b'rust.index')
561
561
562 self._parse_index = parse_index_v1
562 self._parse_index = parse_index_v1
563 if self._format_version == REVLOGV0:
563 if self._format_version == REVLOGV0:
564 self._parse_index = revlogv0.parse_index_v0
564 self._parse_index = revlogv0.parse_index_v0
565 elif self._format_version == REVLOGV2:
565 elif self._format_version == REVLOGV2:
566 self._parse_index = parse_index_v2
566 self._parse_index = parse_index_v2
567 elif devel_nodemap:
567 elif devel_nodemap:
568 self._parse_index = parse_index_v1_nodemap
568 self._parse_index = parse_index_v1_nodemap
569 elif use_rust_index:
569 elif use_rust_index:
570 self._parse_index = parse_index_v1_mixed
570 self._parse_index = parse_index_v1_mixed
571 try:
571 try:
572 d = self._parse_index(index_data, self._inline)
572 d = self._parse_index(index_data, self._inline)
573 index, _chunkcache = d
573 index, _chunkcache = d
574 use_nodemap = (
574 use_nodemap = (
575 not self._inline
575 not self._inline
576 and self._nodemap_file is not None
576 and self._nodemap_file is not None
577 and util.safehasattr(index, 'update_nodemap_data')
577 and util.safehasattr(index, 'update_nodemap_data')
578 )
578 )
579 if use_nodemap:
579 if use_nodemap:
580 nodemap_data = nodemaputil.persisted_data(self)
580 nodemap_data = nodemaputil.persisted_data(self)
581 if nodemap_data is not None:
581 if nodemap_data is not None:
582 docket = nodemap_data[0]
582 docket = nodemap_data[0]
583 if (
583 if (
584 len(d[0]) > docket.tip_rev
584 len(d[0]) > docket.tip_rev
585 and d[0][docket.tip_rev][7] == docket.tip_node
585 and d[0][docket.tip_rev][7] == docket.tip_node
586 ):
586 ):
587 # no changelog tampering
587 # no changelog tampering
588 self._nodemap_docket = docket
588 self._nodemap_docket = docket
589 index.update_nodemap_data(*nodemap_data)
589 index.update_nodemap_data(*nodemap_data)
590 except (ValueError, IndexError):
590 except (ValueError, IndexError):
591 raise error.RevlogError(
591 raise error.RevlogError(
592 _(b"index %s is corrupted") % self.display_id
592 _(b"index %s is corrupted") % self.display_id
593 )
593 )
594 self.index, self._chunkcache = d
594 self.index, self._chunkcache = d
595 if not self._chunkcache:
595 if not self._chunkcache:
596 self._chunkclear()
596 self._chunkclear()
597 # revnum -> (chain-length, sum-delta-length)
597 # revnum -> (chain-length, sum-delta-length)
598 self._chaininfocache = util.lrucachedict(500)
598 self._chaininfocache = util.lrucachedict(500)
599 # revlog header -> revlog compressor
599 # revlog header -> revlog compressor
600 self._decompressors = {}
600 self._decompressors = {}
601
601
602 @util.propertycache
602 @util.propertycache
603 def revlog_kind(self):
603 def revlog_kind(self):
604 return self.target[0]
604 return self.target[0]
605
605
606 @util.propertycache
606 @util.propertycache
607 def display_id(self):
607 def display_id(self):
608 """The public facing "ID" of the revlog that we use in message"""
608 """The public facing "ID" of the revlog that we use in message"""
609 # Maybe we should build a user facing representation of
609 # Maybe we should build a user facing representation of
610 # revlog.target instead of using `self.radix`
610 # revlog.target instead of using `self.radix`
611 return self.radix
611 return self.radix
612
612
613 @util.propertycache
613 @util.propertycache
614 def _compressor(self):
614 def _compressor(self):
615 engine = util.compengines[self._compengine]
615 engine = util.compengines[self._compengine]
616 return engine.revlogcompressor(self._compengineopts)
616 return engine.revlogcompressor(self._compengineopts)
617
617
618 def _indexfp(self):
618 def _indexfp(self):
619 """file object for the revlog's index file"""
619 """file object for the revlog's index file"""
620 return self.opener(self._indexfile, mode=b"r")
620 return self.opener(self._indexfile, mode=b"r")
621
621
622 def __index_write_fp(self):
622 def __index_write_fp(self):
623 # You should not use this directly and use `_writing` instead
623 # You should not use this directly and use `_writing` instead
624 try:
624 try:
625 f = self.opener(
625 f = self.opener(
626 self._indexfile, mode=b"r+", checkambig=self._checkambig
626 self._indexfile, mode=b"r+", checkambig=self._checkambig
627 )
627 )
628 f.seek(0, os.SEEK_END)
628 f.seek(0, os.SEEK_END)
629 return f
629 return f
630 except IOError as inst:
630 except IOError as inst:
631 if inst.errno != errno.ENOENT:
631 if inst.errno != errno.ENOENT:
632 raise
632 raise
633 return self.opener(
633 return self.opener(
634 self._indexfile, mode=b"w+", checkambig=self._checkambig
634 self._indexfile, mode=b"w+", checkambig=self._checkambig
635 )
635 )
636
636
637 def __index_new_fp(self):
637 def __index_new_fp(self):
638 # You should not use this unless you are upgrading from inline revlog
638 # You should not use this unless you are upgrading from inline revlog
639 return self.opener(
639 return self.opener(
640 self._indexfile,
640 self._indexfile,
641 mode=b"w",
641 mode=b"w",
642 checkambig=self._checkambig,
642 checkambig=self._checkambig,
643 atomictemp=True,
643 atomictemp=True,
644 )
644 )
645
645
646 def _datafp(self, mode=b'r'):
646 def _datafp(self, mode=b'r'):
647 """file object for the revlog's data file"""
647 """file object for the revlog's data file"""
648 return self.opener(self._datafile, mode=mode)
648 return self.opener(self._datafile, mode=mode)
649
649
650 @contextlib.contextmanager
650 @contextlib.contextmanager
651 def _datareadfp(self, existingfp=None):
651 def _datareadfp(self, existingfp=None):
652 """file object suitable to read data"""
652 """file object suitable to read data"""
653 # Use explicit file handle, if given.
653 # Use explicit file handle, if given.
654 if existingfp is not None:
654 if existingfp is not None:
655 yield existingfp
655 yield existingfp
656
656
657 # Use a file handle being actively used for writes, if available.
657 # Use a file handle being actively used for writes, if available.
658 # There is some danger to doing this because reads will seek the
658 # There is some danger to doing this because reads will seek the
659 # file. However, _writeentry() performs a SEEK_END before all writes,
659 # file. However, _writeentry() performs a SEEK_END before all writes,
660 # so we should be safe.
660 # so we should be safe.
661 elif self._writinghandles:
661 elif self._writinghandles:
662 if self._inline:
662 if self._inline:
663 yield self._writinghandles[0]
663 yield self._writinghandles[0]
664 else:
664 else:
665 yield self._writinghandles[1]
665 yield self._writinghandles[1]
666
666
667 # Otherwise open a new file handle.
667 # Otherwise open a new file handle.
668 else:
668 else:
669 if self._inline:
669 if self._inline:
670 func = self._indexfp
670 func = self._indexfp
671 else:
671 else:
672 func = self._datafp
672 func = self._datafp
673 with func() as fp:
673 with func() as fp:
674 yield fp
674 yield fp
675
675
676 def tiprev(self):
676 def tiprev(self):
677 return len(self.index) - 1
677 return len(self.index) - 1
678
678
679 def tip(self):
679 def tip(self):
680 return self.node(self.tiprev())
680 return self.node(self.tiprev())
681
681
682 def __contains__(self, rev):
682 def __contains__(self, rev):
683 return 0 <= rev < len(self)
683 return 0 <= rev < len(self)
684
684
685 def __len__(self):
685 def __len__(self):
686 return len(self.index)
686 return len(self.index)
687
687
688 def __iter__(self):
688 def __iter__(self):
689 return iter(pycompat.xrange(len(self)))
689 return iter(pycompat.xrange(len(self)))
690
690
691 def revs(self, start=0, stop=None):
691 def revs(self, start=0, stop=None):
692 """iterate over all rev in this revlog (from start to stop)"""
692 """iterate over all rev in this revlog (from start to stop)"""
693 return storageutil.iterrevs(len(self), start=start, stop=stop)
693 return storageutil.iterrevs(len(self), start=start, stop=stop)
694
694
695 @property
695 @property
696 def nodemap(self):
696 def nodemap(self):
697 msg = (
697 msg = (
698 b"revlog.nodemap is deprecated, "
698 b"revlog.nodemap is deprecated, "
699 b"use revlog.index.[has_node|rev|get_rev]"
699 b"use revlog.index.[has_node|rev|get_rev]"
700 )
700 )
701 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
701 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
702 return self.index.nodemap
702 return self.index.nodemap
703
703
704 @property
704 @property
705 def _nodecache(self):
705 def _nodecache(self):
706 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
706 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
707 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
707 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
708 return self.index.nodemap
708 return self.index.nodemap
709
709
710 def hasnode(self, node):
710 def hasnode(self, node):
711 try:
711 try:
712 self.rev(node)
712 self.rev(node)
713 return True
713 return True
714 except KeyError:
714 except KeyError:
715 return False
715 return False
716
716
717 def candelta(self, baserev, rev):
717 def candelta(self, baserev, rev):
718 """whether two revisions (baserev, rev) can be delta-ed or not"""
718 """whether two revisions (baserev, rev) can be delta-ed or not"""
719 # Disable delta if either rev requires a content-changing flag
719 # Disable delta if either rev requires a content-changing flag
720 # processor (ex. LFS). This is because such flag processor can alter
720 # processor (ex. LFS). This is because such flag processor can alter
721 # the rawtext content that the delta will be based on, and two clients
721 # the rawtext content that the delta will be based on, and two clients
722 # could have a same revlog node with different flags (i.e. different
722 # could have a same revlog node with different flags (i.e. different
723 # rawtext contents) and the delta could be incompatible.
723 # rawtext contents) and the delta could be incompatible.
724 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
724 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
725 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
725 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
726 ):
726 ):
727 return False
727 return False
728 return True
728 return True
729
729
730 def update_caches(self, transaction):
730 def update_caches(self, transaction):
731 if self._nodemap_file is not None:
731 if self._nodemap_file is not None:
732 if transaction is None:
732 if transaction is None:
733 nodemaputil.update_persistent_nodemap(self)
733 nodemaputil.update_persistent_nodemap(self)
734 else:
734 else:
735 nodemaputil.setup_persistent_nodemap(transaction, self)
735 nodemaputil.setup_persistent_nodemap(transaction, self)
736
736
737 def clearcaches(self):
737 def clearcaches(self):
738 self._revisioncache = None
738 self._revisioncache = None
739 self._chainbasecache.clear()
739 self._chainbasecache.clear()
740 self._chunkcache = (0, b'')
740 self._chunkcache = (0, b'')
741 self._pcache = {}
741 self._pcache = {}
742 self._nodemap_docket = None
742 self._nodemap_docket = None
743 self.index.clearcaches()
743 self.index.clearcaches()
744 # The python code is the one responsible for validating the docket, we
744 # The python code is the one responsible for validating the docket, we
745 # end up having to refresh it here.
745 # end up having to refresh it here.
746 use_nodemap = (
746 use_nodemap = (
747 not self._inline
747 not self._inline
748 and self._nodemap_file is not None
748 and self._nodemap_file is not None
749 and util.safehasattr(self.index, 'update_nodemap_data')
749 and util.safehasattr(self.index, 'update_nodemap_data')
750 )
750 )
751 if use_nodemap:
751 if use_nodemap:
752 nodemap_data = nodemaputil.persisted_data(self)
752 nodemap_data = nodemaputil.persisted_data(self)
753 if nodemap_data is not None:
753 if nodemap_data is not None:
754 self._nodemap_docket = nodemap_data[0]
754 self._nodemap_docket = nodemap_data[0]
755 self.index.update_nodemap_data(*nodemap_data)
755 self.index.update_nodemap_data(*nodemap_data)
756
756
757 def rev(self, node):
757 def rev(self, node):
758 try:
758 try:
759 return self.index.rev(node)
759 return self.index.rev(node)
760 except TypeError:
760 except TypeError:
761 raise
761 raise
762 except error.RevlogError:
762 except error.RevlogError:
763 # parsers.c radix tree lookup failed
763 # parsers.c radix tree lookup failed
764 if (
764 if (
765 node == self.nodeconstants.wdirid
765 node == self.nodeconstants.wdirid
766 or node in self.nodeconstants.wdirfilenodeids
766 or node in self.nodeconstants.wdirfilenodeids
767 ):
767 ):
768 raise error.WdirUnsupported
768 raise error.WdirUnsupported
769 raise error.LookupError(node, self.display_id, _(b'no node'))
769 raise error.LookupError(node, self.display_id, _(b'no node'))
770
770
771 # Accessors for index entries.
771 # Accessors for index entries.
772
772
773 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
773 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
774 # are flags.
774 # are flags.
775 def start(self, rev):
775 def start(self, rev):
776 return int(self.index[rev][0] >> 16)
776 return int(self.index[rev][0] >> 16)
777
777
778 def flags(self, rev):
778 def flags(self, rev):
779 return self.index[rev][0] & 0xFFFF
779 return self.index[rev][0] & 0xFFFF
780
780
781 def length(self, rev):
781 def length(self, rev):
782 return self.index[rev][1]
782 return self.index[rev][1]
783
783
784 def sidedata_length(self, rev):
784 def sidedata_length(self, rev):
785 if not self.hassidedata:
785 if not self.hassidedata:
786 return 0
786 return 0
787 return self.index[rev][9]
787 return self.index[rev][9]
788
788
789 def rawsize(self, rev):
789 def rawsize(self, rev):
790 """return the length of the uncompressed text for a given revision"""
790 """return the length of the uncompressed text for a given revision"""
791 l = self.index[rev][2]
791 l = self.index[rev][2]
792 if l >= 0:
792 if l >= 0:
793 return l
793 return l
794
794
795 t = self.rawdata(rev)
795 t = self.rawdata(rev)
796 return len(t)
796 return len(t)
797
797
798 def size(self, rev):
798 def size(self, rev):
799 """length of non-raw text (processed by a "read" flag processor)"""
799 """length of non-raw text (processed by a "read" flag processor)"""
800 # fast path: if no "read" flag processor could change the content,
800 # fast path: if no "read" flag processor could change the content,
801 # size is rawsize. note: ELLIPSIS is known to not change the content.
801 # size is rawsize. note: ELLIPSIS is known to not change the content.
802 flags = self.flags(rev)
802 flags = self.flags(rev)
803 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
803 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
804 return self.rawsize(rev)
804 return self.rawsize(rev)
805
805
806 return len(self.revision(rev, raw=False))
806 return len(self.revision(rev, raw=False))
807
807
808 def chainbase(self, rev):
808 def chainbase(self, rev):
809 base = self._chainbasecache.get(rev)
809 base = self._chainbasecache.get(rev)
810 if base is not None:
810 if base is not None:
811 return base
811 return base
812
812
813 index = self.index
813 index = self.index
814 iterrev = rev
814 iterrev = rev
815 base = index[iterrev][3]
815 base = index[iterrev][3]
816 while base != iterrev:
816 while base != iterrev:
817 iterrev = base
817 iterrev = base
818 base = index[iterrev][3]
818 base = index[iterrev][3]
819
819
820 self._chainbasecache[rev] = base
820 self._chainbasecache[rev] = base
821 return base
821 return base
822
822
823 def linkrev(self, rev):
823 def linkrev(self, rev):
824 return self.index[rev][4]
824 return self.index[rev][4]
825
825
826 def parentrevs(self, rev):
826 def parentrevs(self, rev):
827 try:
827 try:
828 entry = self.index[rev]
828 entry = self.index[rev]
829 except IndexError:
829 except IndexError:
830 if rev == wdirrev:
830 if rev == wdirrev:
831 raise error.WdirUnsupported
831 raise error.WdirUnsupported
832 raise
832 raise
833 if entry[5] == nullrev:
833 if entry[5] == nullrev:
834 return entry[6], entry[5]
834 return entry[6], entry[5]
835 else:
835 else:
836 return entry[5], entry[6]
836 return entry[5], entry[6]
837
837
838 # fast parentrevs(rev) where rev isn't filtered
838 # fast parentrevs(rev) where rev isn't filtered
839 _uncheckedparentrevs = parentrevs
839 _uncheckedparentrevs = parentrevs
840
840
841 def node(self, rev):
841 def node(self, rev):
842 try:
842 try:
843 return self.index[rev][7]
843 return self.index[rev][7]
844 except IndexError:
844 except IndexError:
845 if rev == wdirrev:
845 if rev == wdirrev:
846 raise error.WdirUnsupported
846 raise error.WdirUnsupported
847 raise
847 raise
848
848
849 # Derived from index values.
849 # Derived from index values.
850
850
851 def end(self, rev):
851 def end(self, rev):
852 return self.start(rev) + self.length(rev)
852 return self.start(rev) + self.length(rev)
853
853
854 def parents(self, node):
854 def parents(self, node):
855 i = self.index
855 i = self.index
856 d = i[self.rev(node)]
856 d = i[self.rev(node)]
857 # inline node() to avoid function call overhead
857 # inline node() to avoid function call overhead
858 if d[5] == self.nullid:
858 if d[5] == self.nullid:
859 return i[d[6]][7], i[d[5]][7]
859 return i[d[6]][7], i[d[5]][7]
860 else:
860 else:
861 return i[d[5]][7], i[d[6]][7]
861 return i[d[5]][7], i[d[6]][7]
862
862
863 def chainlen(self, rev):
863 def chainlen(self, rev):
864 return self._chaininfo(rev)[0]
864 return self._chaininfo(rev)[0]
865
865
866 def _chaininfo(self, rev):
866 def _chaininfo(self, rev):
867 chaininfocache = self._chaininfocache
867 chaininfocache = self._chaininfocache
868 if rev in chaininfocache:
868 if rev in chaininfocache:
869 return chaininfocache[rev]
869 return chaininfocache[rev]
870 index = self.index
870 index = self.index
871 generaldelta = self._generaldelta
871 generaldelta = self._generaldelta
872 iterrev = rev
872 iterrev = rev
873 e = index[iterrev]
873 e = index[iterrev]
874 clen = 0
874 clen = 0
875 compresseddeltalen = 0
875 compresseddeltalen = 0
876 while iterrev != e[3]:
876 while iterrev != e[3]:
877 clen += 1
877 clen += 1
878 compresseddeltalen += e[1]
878 compresseddeltalen += e[1]
879 if generaldelta:
879 if generaldelta:
880 iterrev = e[3]
880 iterrev = e[3]
881 else:
881 else:
882 iterrev -= 1
882 iterrev -= 1
883 if iterrev in chaininfocache:
883 if iterrev in chaininfocache:
884 t = chaininfocache[iterrev]
884 t = chaininfocache[iterrev]
885 clen += t[0]
885 clen += t[0]
886 compresseddeltalen += t[1]
886 compresseddeltalen += t[1]
887 break
887 break
888 e = index[iterrev]
888 e = index[iterrev]
889 else:
889 else:
890 # Add text length of base since decompressing that also takes
890 # Add text length of base since decompressing that also takes
891 # work. For cache hits the length is already included.
891 # work. For cache hits the length is already included.
892 compresseddeltalen += e[1]
892 compresseddeltalen += e[1]
893 r = (clen, compresseddeltalen)
893 r = (clen, compresseddeltalen)
894 chaininfocache[rev] = r
894 chaininfocache[rev] = r
895 return r
895 return r
896
896
897 def _deltachain(self, rev, stoprev=None):
897 def _deltachain(self, rev, stoprev=None):
898 """Obtain the delta chain for a revision.
898 """Obtain the delta chain for a revision.
899
899
900 ``stoprev`` specifies a revision to stop at. If not specified, we
900 ``stoprev`` specifies a revision to stop at. If not specified, we
901 stop at the base of the chain.
901 stop at the base of the chain.
902
902
903 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
903 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
904 revs in ascending order and ``stopped`` is a bool indicating whether
904 revs in ascending order and ``stopped`` is a bool indicating whether
905 ``stoprev`` was hit.
905 ``stoprev`` was hit.
906 """
906 """
907 # Try C implementation.
907 # Try C implementation.
908 try:
908 try:
909 return self.index.deltachain(rev, stoprev, self._generaldelta)
909 return self.index.deltachain(rev, stoprev, self._generaldelta)
910 except AttributeError:
910 except AttributeError:
911 pass
911 pass
912
912
913 chain = []
913 chain = []
914
914
915 # Alias to prevent attribute lookup in tight loop.
915 # Alias to prevent attribute lookup in tight loop.
916 index = self.index
916 index = self.index
917 generaldelta = self._generaldelta
917 generaldelta = self._generaldelta
918
918
919 iterrev = rev
919 iterrev = rev
920 e = index[iterrev]
920 e = index[iterrev]
921 while iterrev != e[3] and iterrev != stoprev:
921 while iterrev != e[3] and iterrev != stoprev:
922 chain.append(iterrev)
922 chain.append(iterrev)
923 if generaldelta:
923 if generaldelta:
924 iterrev = e[3]
924 iterrev = e[3]
925 else:
925 else:
926 iterrev -= 1
926 iterrev -= 1
927 e = index[iterrev]
927 e = index[iterrev]
928
928
929 if iterrev == stoprev:
929 if iterrev == stoprev:
930 stopped = True
930 stopped = True
931 else:
931 else:
932 chain.append(iterrev)
932 chain.append(iterrev)
933 stopped = False
933 stopped = False
934
934
935 chain.reverse()
935 chain.reverse()
936 return chain, stopped
936 return chain, stopped
937
937
938 def ancestors(self, revs, stoprev=0, inclusive=False):
938 def ancestors(self, revs, stoprev=0, inclusive=False):
939 """Generate the ancestors of 'revs' in reverse revision order.
939 """Generate the ancestors of 'revs' in reverse revision order.
940 Does not generate revs lower than stoprev.
940 Does not generate revs lower than stoprev.
941
941
942 See the documentation for ancestor.lazyancestors for more details."""
942 See the documentation for ancestor.lazyancestors for more details."""
943
943
944 # first, make sure start revisions aren't filtered
944 # first, make sure start revisions aren't filtered
945 revs = list(revs)
945 revs = list(revs)
946 checkrev = self.node
946 checkrev = self.node
947 for r in revs:
947 for r in revs:
948 checkrev(r)
948 checkrev(r)
949 # and we're sure ancestors aren't filtered as well
949 # and we're sure ancestors aren't filtered as well
950
950
951 if rustancestor is not None:
951 if rustancestor is not None:
952 lazyancestors = rustancestor.LazyAncestors
952 lazyancestors = rustancestor.LazyAncestors
953 arg = self.index
953 arg = self.index
954 else:
954 else:
955 lazyancestors = ancestor.lazyancestors
955 lazyancestors = ancestor.lazyancestors
956 arg = self._uncheckedparentrevs
956 arg = self._uncheckedparentrevs
957 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
957 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
958
958
959 def descendants(self, revs):
959 def descendants(self, revs):
960 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
960 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
961
961
962 def findcommonmissing(self, common=None, heads=None):
962 def findcommonmissing(self, common=None, heads=None):
963 """Return a tuple of the ancestors of common and the ancestors of heads
963 """Return a tuple of the ancestors of common and the ancestors of heads
964 that are not ancestors of common. In revset terminology, we return the
964 that are not ancestors of common. In revset terminology, we return the
965 tuple:
965 tuple:
966
966
967 ::common, (::heads) - (::common)
967 ::common, (::heads) - (::common)
968
968
969 The list is sorted by revision number, meaning it is
969 The list is sorted by revision number, meaning it is
970 topologically sorted.
970 topologically sorted.
971
971
972 'heads' and 'common' are both lists of node IDs. If heads is
972 'heads' and 'common' are both lists of node IDs. If heads is
973 not supplied, uses all of the revlog's heads. If common is not
973 not supplied, uses all of the revlog's heads. If common is not
974 supplied, uses nullid."""
974 supplied, uses nullid."""
975 if common is None:
975 if common is None:
976 common = [self.nullid]
976 common = [self.nullid]
977 if heads is None:
977 if heads is None:
978 heads = self.heads()
978 heads = self.heads()
979
979
980 common = [self.rev(n) for n in common]
980 common = [self.rev(n) for n in common]
981 heads = [self.rev(n) for n in heads]
981 heads = [self.rev(n) for n in heads]
982
982
983 # we want the ancestors, but inclusive
983 # we want the ancestors, but inclusive
984 class lazyset(object):
984 class lazyset(object):
985 def __init__(self, lazyvalues):
985 def __init__(self, lazyvalues):
986 self.addedvalues = set()
986 self.addedvalues = set()
987 self.lazyvalues = lazyvalues
987 self.lazyvalues = lazyvalues
988
988
989 def __contains__(self, value):
989 def __contains__(self, value):
990 return value in self.addedvalues or value in self.lazyvalues
990 return value in self.addedvalues or value in self.lazyvalues
991
991
992 def __iter__(self):
992 def __iter__(self):
993 added = self.addedvalues
993 added = self.addedvalues
994 for r in added:
994 for r in added:
995 yield r
995 yield r
996 for r in self.lazyvalues:
996 for r in self.lazyvalues:
997 if not r in added:
997 if not r in added:
998 yield r
998 yield r
999
999
1000 def add(self, value):
1000 def add(self, value):
1001 self.addedvalues.add(value)
1001 self.addedvalues.add(value)
1002
1002
1003 def update(self, values):
1003 def update(self, values):
1004 self.addedvalues.update(values)
1004 self.addedvalues.update(values)
1005
1005
1006 has = lazyset(self.ancestors(common))
1006 has = lazyset(self.ancestors(common))
1007 has.add(nullrev)
1007 has.add(nullrev)
1008 has.update(common)
1008 has.update(common)
1009
1009
1010 # take all ancestors from heads that aren't in has
1010 # take all ancestors from heads that aren't in has
1011 missing = set()
1011 missing = set()
1012 visit = collections.deque(r for r in heads if r not in has)
1012 visit = collections.deque(r for r in heads if r not in has)
1013 while visit:
1013 while visit:
1014 r = visit.popleft()
1014 r = visit.popleft()
1015 if r in missing:
1015 if r in missing:
1016 continue
1016 continue
1017 else:
1017 else:
1018 missing.add(r)
1018 missing.add(r)
1019 for p in self.parentrevs(r):
1019 for p in self.parentrevs(r):
1020 if p not in has:
1020 if p not in has:
1021 visit.append(p)
1021 visit.append(p)
1022 missing = list(missing)
1022 missing = list(missing)
1023 missing.sort()
1023 missing.sort()
1024 return has, [self.node(miss) for miss in missing]
1024 return has, [self.node(miss) for miss in missing]
1025
1025
1026 def incrementalmissingrevs(self, common=None):
1026 def incrementalmissingrevs(self, common=None):
1027 """Return an object that can be used to incrementally compute the
1027 """Return an object that can be used to incrementally compute the
1028 revision numbers of the ancestors of arbitrary sets that are not
1028 revision numbers of the ancestors of arbitrary sets that are not
1029 ancestors of common. This is an ancestor.incrementalmissingancestors
1029 ancestors of common. This is an ancestor.incrementalmissingancestors
1030 object.
1030 object.
1031
1031
1032 'common' is a list of revision numbers. If common is not supplied, uses
1032 'common' is a list of revision numbers. If common is not supplied, uses
1033 nullrev.
1033 nullrev.
1034 """
1034 """
1035 if common is None:
1035 if common is None:
1036 common = [nullrev]
1036 common = [nullrev]
1037
1037
1038 if rustancestor is not None:
1038 if rustancestor is not None:
1039 return rustancestor.MissingAncestors(self.index, common)
1039 return rustancestor.MissingAncestors(self.index, common)
1040 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1040 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1041
1041
1042 def findmissingrevs(self, common=None, heads=None):
1042 def findmissingrevs(self, common=None, heads=None):
1043 """Return the revision numbers of the ancestors of heads that
1043 """Return the revision numbers of the ancestors of heads that
1044 are not ancestors of common.
1044 are not ancestors of common.
1045
1045
1046 More specifically, return a list of revision numbers corresponding to
1046 More specifically, return a list of revision numbers corresponding to
1047 nodes N such that every N satisfies the following constraints:
1047 nodes N such that every N satisfies the following constraints:
1048
1048
1049 1. N is an ancestor of some node in 'heads'
1049 1. N is an ancestor of some node in 'heads'
1050 2. N is not an ancestor of any node in 'common'
1050 2. N is not an ancestor of any node in 'common'
1051
1051
1052 The list is sorted by revision number, meaning it is
1052 The list is sorted by revision number, meaning it is
1053 topologically sorted.
1053 topologically sorted.
1054
1054
1055 'heads' and 'common' are both lists of revision numbers. If heads is
1055 'heads' and 'common' are both lists of revision numbers. If heads is
1056 not supplied, uses all of the revlog's heads. If common is not
1056 not supplied, uses all of the revlog's heads. If common is not
1057 supplied, uses nullid."""
1057 supplied, uses nullid."""
1058 if common is None:
1058 if common is None:
1059 common = [nullrev]
1059 common = [nullrev]
1060 if heads is None:
1060 if heads is None:
1061 heads = self.headrevs()
1061 heads = self.headrevs()
1062
1062
1063 inc = self.incrementalmissingrevs(common=common)
1063 inc = self.incrementalmissingrevs(common=common)
1064 return inc.missingancestors(heads)
1064 return inc.missingancestors(heads)
1065
1065
1066 def findmissing(self, common=None, heads=None):
1066 def findmissing(self, common=None, heads=None):
1067 """Return the ancestors of heads that are not ancestors of common.
1067 """Return the ancestors of heads that are not ancestors of common.
1068
1068
1069 More specifically, return a list of nodes N such that every N
1069 More specifically, return a list of nodes N such that every N
1070 satisfies the following constraints:
1070 satisfies the following constraints:
1071
1071
1072 1. N is an ancestor of some node in 'heads'
1072 1. N is an ancestor of some node in 'heads'
1073 2. N is not an ancestor of any node in 'common'
1073 2. N is not an ancestor of any node in 'common'
1074
1074
1075 The list is sorted by revision number, meaning it is
1075 The list is sorted by revision number, meaning it is
1076 topologically sorted.
1076 topologically sorted.
1077
1077
1078 'heads' and 'common' are both lists of node IDs. If heads is
1078 'heads' and 'common' are both lists of node IDs. If heads is
1079 not supplied, uses all of the revlog's heads. If common is not
1079 not supplied, uses all of the revlog's heads. If common is not
1080 supplied, uses nullid."""
1080 supplied, uses nullid."""
1081 if common is None:
1081 if common is None:
1082 common = [self.nullid]
1082 common = [self.nullid]
1083 if heads is None:
1083 if heads is None:
1084 heads = self.heads()
1084 heads = self.heads()
1085
1085
1086 common = [self.rev(n) for n in common]
1086 common = [self.rev(n) for n in common]
1087 heads = [self.rev(n) for n in heads]
1087 heads = [self.rev(n) for n in heads]
1088
1088
1089 inc = self.incrementalmissingrevs(common=common)
1089 inc = self.incrementalmissingrevs(common=common)
1090 return [self.node(r) for r in inc.missingancestors(heads)]
1090 return [self.node(r) for r in inc.missingancestors(heads)]
1091
1091
1092 def nodesbetween(self, roots=None, heads=None):
1092 def nodesbetween(self, roots=None, heads=None):
1093 """Return a topological path from 'roots' to 'heads'.
1093 """Return a topological path from 'roots' to 'heads'.
1094
1094
1095 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1095 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1096 topologically sorted list of all nodes N that satisfy both of
1096 topologically sorted list of all nodes N that satisfy both of
1097 these constraints:
1097 these constraints:
1098
1098
1099 1. N is a descendant of some node in 'roots'
1099 1. N is a descendant of some node in 'roots'
1100 2. N is an ancestor of some node in 'heads'
1100 2. N is an ancestor of some node in 'heads'
1101
1101
1102 Every node is considered to be both a descendant and an ancestor
1102 Every node is considered to be both a descendant and an ancestor
1103 of itself, so every reachable node in 'roots' and 'heads' will be
1103 of itself, so every reachable node in 'roots' and 'heads' will be
1104 included in 'nodes'.
1104 included in 'nodes'.
1105
1105
1106 'outroots' is the list of reachable nodes in 'roots', i.e., the
1106 'outroots' is the list of reachable nodes in 'roots', i.e., the
1107 subset of 'roots' that is returned in 'nodes'. Likewise,
1107 subset of 'roots' that is returned in 'nodes'. Likewise,
1108 'outheads' is the subset of 'heads' that is also in 'nodes'.
1108 'outheads' is the subset of 'heads' that is also in 'nodes'.
1109
1109
1110 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1110 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1111 unspecified, uses nullid as the only root. If 'heads' is
1111 unspecified, uses nullid as the only root. If 'heads' is
1112 unspecified, uses list of all of the revlog's heads."""
1112 unspecified, uses list of all of the revlog's heads."""
1113 nonodes = ([], [], [])
1113 nonodes = ([], [], [])
1114 if roots is not None:
1114 if roots is not None:
1115 roots = list(roots)
1115 roots = list(roots)
1116 if not roots:
1116 if not roots:
1117 return nonodes
1117 return nonodes
1118 lowestrev = min([self.rev(n) for n in roots])
1118 lowestrev = min([self.rev(n) for n in roots])
1119 else:
1119 else:
1120 roots = [self.nullid] # Everybody's a descendant of nullid
1120 roots = [self.nullid] # Everybody's a descendant of nullid
1121 lowestrev = nullrev
1121 lowestrev = nullrev
1122 if (lowestrev == nullrev) and (heads is None):
1122 if (lowestrev == nullrev) and (heads is None):
1123 # We want _all_ the nodes!
1123 # We want _all_ the nodes!
1124 return (
1124 return (
1125 [self.node(r) for r in self],
1125 [self.node(r) for r in self],
1126 [self.nullid],
1126 [self.nullid],
1127 list(self.heads()),
1127 list(self.heads()),
1128 )
1128 )
1129 if heads is None:
1129 if heads is None:
1130 # All nodes are ancestors, so the latest ancestor is the last
1130 # All nodes are ancestors, so the latest ancestor is the last
1131 # node.
1131 # node.
1132 highestrev = len(self) - 1
1132 highestrev = len(self) - 1
1133 # Set ancestors to None to signal that every node is an ancestor.
1133 # Set ancestors to None to signal that every node is an ancestor.
1134 ancestors = None
1134 ancestors = None
1135 # Set heads to an empty dictionary for later discovery of heads
1135 # Set heads to an empty dictionary for later discovery of heads
1136 heads = {}
1136 heads = {}
1137 else:
1137 else:
1138 heads = list(heads)
1138 heads = list(heads)
1139 if not heads:
1139 if not heads:
1140 return nonodes
1140 return nonodes
1141 ancestors = set()
1141 ancestors = set()
1142 # Turn heads into a dictionary so we can remove 'fake' heads.
1142 # Turn heads into a dictionary so we can remove 'fake' heads.
1143 # Also, later we will be using it to filter out the heads we can't
1143 # Also, later we will be using it to filter out the heads we can't
1144 # find from roots.
1144 # find from roots.
1145 heads = dict.fromkeys(heads, False)
1145 heads = dict.fromkeys(heads, False)
1146 # Start at the top and keep marking parents until we're done.
1146 # Start at the top and keep marking parents until we're done.
1147 nodestotag = set(heads)
1147 nodestotag = set(heads)
1148 # Remember where the top was so we can use it as a limit later.
1148 # Remember where the top was so we can use it as a limit later.
1149 highestrev = max([self.rev(n) for n in nodestotag])
1149 highestrev = max([self.rev(n) for n in nodestotag])
1150 while nodestotag:
1150 while nodestotag:
1151 # grab a node to tag
1151 # grab a node to tag
1152 n = nodestotag.pop()
1152 n = nodestotag.pop()
1153 # Never tag nullid
1153 # Never tag nullid
1154 if n == self.nullid:
1154 if n == self.nullid:
1155 continue
1155 continue
1156 # A node's revision number represents its place in a
1156 # A node's revision number represents its place in a
1157 # topologically sorted list of nodes.
1157 # topologically sorted list of nodes.
1158 r = self.rev(n)
1158 r = self.rev(n)
1159 if r >= lowestrev:
1159 if r >= lowestrev:
1160 if n not in ancestors:
1160 if n not in ancestors:
1161 # If we are possibly a descendant of one of the roots
1161 # If we are possibly a descendant of one of the roots
1162 # and we haven't already been marked as an ancestor
1162 # and we haven't already been marked as an ancestor
1163 ancestors.add(n) # Mark as ancestor
1163 ancestors.add(n) # Mark as ancestor
1164 # Add non-nullid parents to list of nodes to tag.
1164 # Add non-nullid parents to list of nodes to tag.
1165 nodestotag.update(
1165 nodestotag.update(
1166 [p for p in self.parents(n) if p != self.nullid]
1166 [p for p in self.parents(n) if p != self.nullid]
1167 )
1167 )
1168 elif n in heads: # We've seen it before, is it a fake head?
1168 elif n in heads: # We've seen it before, is it a fake head?
1169 # So it is, real heads should not be the ancestors of
1169 # So it is, real heads should not be the ancestors of
1170 # any other heads.
1170 # any other heads.
1171 heads.pop(n)
1171 heads.pop(n)
1172 if not ancestors:
1172 if not ancestors:
1173 return nonodes
1173 return nonodes
1174 # Now that we have our set of ancestors, we want to remove any
1174 # Now that we have our set of ancestors, we want to remove any
1175 # roots that are not ancestors.
1175 # roots that are not ancestors.
1176
1176
1177 # If one of the roots was nullid, everything is included anyway.
1177 # If one of the roots was nullid, everything is included anyway.
1178 if lowestrev > nullrev:
1178 if lowestrev > nullrev:
1179 # But, since we weren't, let's recompute the lowest rev to not
1179 # But, since we weren't, let's recompute the lowest rev to not
1180 # include roots that aren't ancestors.
1180 # include roots that aren't ancestors.
1181
1181
1182 # Filter out roots that aren't ancestors of heads
1182 # Filter out roots that aren't ancestors of heads
1183 roots = [root for root in roots if root in ancestors]
1183 roots = [root for root in roots if root in ancestors]
1184 # Recompute the lowest revision
1184 # Recompute the lowest revision
1185 if roots:
1185 if roots:
1186 lowestrev = min([self.rev(root) for root in roots])
1186 lowestrev = min([self.rev(root) for root in roots])
1187 else:
1187 else:
1188 # No more roots? Return empty list
1188 # No more roots? Return empty list
1189 return nonodes
1189 return nonodes
1190 else:
1190 else:
1191 # We are descending from nullid, and don't need to care about
1191 # We are descending from nullid, and don't need to care about
1192 # any other roots.
1192 # any other roots.
1193 lowestrev = nullrev
1193 lowestrev = nullrev
1194 roots = [self.nullid]
1194 roots = [self.nullid]
1195 # Transform our roots list into a set.
1195 # Transform our roots list into a set.
1196 descendants = set(roots)
1196 descendants = set(roots)
1197 # Also, keep the original roots so we can filter out roots that aren't
1197 # Also, keep the original roots so we can filter out roots that aren't
1198 # 'real' roots (i.e. are descended from other roots).
1198 # 'real' roots (i.e. are descended from other roots).
1199 roots = descendants.copy()
1199 roots = descendants.copy()
1200 # Our topologically sorted list of output nodes.
1200 # Our topologically sorted list of output nodes.
1201 orderedout = []
1201 orderedout = []
1202 # Don't start at nullid since we don't want nullid in our output list,
1202 # Don't start at nullid since we don't want nullid in our output list,
1203 # and if nullid shows up in descendants, empty parents will look like
1203 # and if nullid shows up in descendants, empty parents will look like
1204 # they're descendants.
1204 # they're descendants.
1205 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1205 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1206 n = self.node(r)
1206 n = self.node(r)
1207 isdescendant = False
1207 isdescendant = False
1208 if lowestrev == nullrev: # Everybody is a descendant of nullid
1208 if lowestrev == nullrev: # Everybody is a descendant of nullid
1209 isdescendant = True
1209 isdescendant = True
1210 elif n in descendants:
1210 elif n in descendants:
1211 # n is already a descendant
1211 # n is already a descendant
1212 isdescendant = True
1212 isdescendant = True
1213 # This check only needs to be done here because all the roots
1213 # This check only needs to be done here because all the roots
1214 # will start being marked is descendants before the loop.
1214 # will start being marked is descendants before the loop.
1215 if n in roots:
1215 if n in roots:
1216 # If n was a root, check if it's a 'real' root.
1216 # If n was a root, check if it's a 'real' root.
1217 p = tuple(self.parents(n))
1217 p = tuple(self.parents(n))
1218 # If any of its parents are descendants, it's not a root.
1218 # If any of its parents are descendants, it's not a root.
1219 if (p[0] in descendants) or (p[1] in descendants):
1219 if (p[0] in descendants) or (p[1] in descendants):
1220 roots.remove(n)
1220 roots.remove(n)
1221 else:
1221 else:
1222 p = tuple(self.parents(n))
1222 p = tuple(self.parents(n))
1223 # A node is a descendant if either of its parents are
1223 # A node is a descendant if either of its parents are
1224 # descendants. (We seeded the dependents list with the roots
1224 # descendants. (We seeded the dependents list with the roots
1225 # up there, remember?)
1225 # up there, remember?)
1226 if (p[0] in descendants) or (p[1] in descendants):
1226 if (p[0] in descendants) or (p[1] in descendants):
1227 descendants.add(n)
1227 descendants.add(n)
1228 isdescendant = True
1228 isdescendant = True
1229 if isdescendant and ((ancestors is None) or (n in ancestors)):
1229 if isdescendant and ((ancestors is None) or (n in ancestors)):
1230 # Only include nodes that are both descendants and ancestors.
1230 # Only include nodes that are both descendants and ancestors.
1231 orderedout.append(n)
1231 orderedout.append(n)
1232 if (ancestors is not None) and (n in heads):
1232 if (ancestors is not None) and (n in heads):
1233 # We're trying to figure out which heads are reachable
1233 # We're trying to figure out which heads are reachable
1234 # from roots.
1234 # from roots.
1235 # Mark this head as having been reached
1235 # Mark this head as having been reached
1236 heads[n] = True
1236 heads[n] = True
1237 elif ancestors is None:
1237 elif ancestors is None:
1238 # Otherwise, we're trying to discover the heads.
1238 # Otherwise, we're trying to discover the heads.
1239 # Assume this is a head because if it isn't, the next step
1239 # Assume this is a head because if it isn't, the next step
1240 # will eventually remove it.
1240 # will eventually remove it.
1241 heads[n] = True
1241 heads[n] = True
1242 # But, obviously its parents aren't.
1242 # But, obviously its parents aren't.
1243 for p in self.parents(n):
1243 for p in self.parents(n):
1244 heads.pop(p, None)
1244 heads.pop(p, None)
1245 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1245 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1246 roots = list(roots)
1246 roots = list(roots)
1247 assert orderedout
1247 assert orderedout
1248 assert roots
1248 assert roots
1249 assert heads
1249 assert heads
1250 return (orderedout, roots, heads)
1250 return (orderedout, roots, heads)
1251
1251
1252 def headrevs(self, revs=None):
1252 def headrevs(self, revs=None):
1253 if revs is None:
1253 if revs is None:
1254 try:
1254 try:
1255 return self.index.headrevs()
1255 return self.index.headrevs()
1256 except AttributeError:
1256 except AttributeError:
1257 return self._headrevs()
1257 return self._headrevs()
1258 if rustdagop is not None:
1258 if rustdagop is not None:
1259 return rustdagop.headrevs(self.index, revs)
1259 return rustdagop.headrevs(self.index, revs)
1260 return dagop.headrevs(revs, self._uncheckedparentrevs)
1260 return dagop.headrevs(revs, self._uncheckedparentrevs)
1261
1261
1262 def computephases(self, roots):
1262 def computephases(self, roots):
1263 return self.index.computephasesmapsets(roots)
1263 return self.index.computephasesmapsets(roots)
1264
1264
1265 def _headrevs(self):
1265 def _headrevs(self):
1266 count = len(self)
1266 count = len(self)
1267 if not count:
1267 if not count:
1268 return [nullrev]
1268 return [nullrev]
1269 # we won't iter over filtered rev so nobody is a head at start
1269 # we won't iter over filtered rev so nobody is a head at start
1270 ishead = [0] * (count + 1)
1270 ishead = [0] * (count + 1)
1271 index = self.index
1271 index = self.index
1272 for r in self:
1272 for r in self:
1273 ishead[r] = 1 # I may be an head
1273 ishead[r] = 1 # I may be an head
1274 e = index[r]
1274 e = index[r]
1275 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1275 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1276 return [r for r, val in enumerate(ishead) if val]
1276 return [r for r, val in enumerate(ishead) if val]
1277
1277
1278 def heads(self, start=None, stop=None):
1278 def heads(self, start=None, stop=None):
1279 """return the list of all nodes that have no children
1279 """return the list of all nodes that have no children
1280
1280
1281 if start is specified, only heads that are descendants of
1281 if start is specified, only heads that are descendants of
1282 start will be returned
1282 start will be returned
1283 if stop is specified, it will consider all the revs from stop
1283 if stop is specified, it will consider all the revs from stop
1284 as if they had no children
1284 as if they had no children
1285 """
1285 """
1286 if start is None and stop is None:
1286 if start is None and stop is None:
1287 if not len(self):
1287 if not len(self):
1288 return [self.nullid]
1288 return [self.nullid]
1289 return [self.node(r) for r in self.headrevs()]
1289 return [self.node(r) for r in self.headrevs()]
1290
1290
1291 if start is None:
1291 if start is None:
1292 start = nullrev
1292 start = nullrev
1293 else:
1293 else:
1294 start = self.rev(start)
1294 start = self.rev(start)
1295
1295
1296 stoprevs = {self.rev(n) for n in stop or []}
1296 stoprevs = {self.rev(n) for n in stop or []}
1297
1297
1298 revs = dagop.headrevssubset(
1298 revs = dagop.headrevssubset(
1299 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1299 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1300 )
1300 )
1301
1301
1302 return [self.node(rev) for rev in revs]
1302 return [self.node(rev) for rev in revs]
1303
1303
1304 def children(self, node):
1304 def children(self, node):
1305 """find the children of a given node"""
1305 """find the children of a given node"""
1306 c = []
1306 c = []
1307 p = self.rev(node)
1307 p = self.rev(node)
1308 for r in self.revs(start=p + 1):
1308 for r in self.revs(start=p + 1):
1309 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1309 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1310 if prevs:
1310 if prevs:
1311 for pr in prevs:
1311 for pr in prevs:
1312 if pr == p:
1312 if pr == p:
1313 c.append(self.node(r))
1313 c.append(self.node(r))
1314 elif p == nullrev:
1314 elif p == nullrev:
1315 c.append(self.node(r))
1315 c.append(self.node(r))
1316 return c
1316 return c
1317
1317
1318 def commonancestorsheads(self, a, b):
1318 def commonancestorsheads(self, a, b):
1319 """calculate all the heads of the common ancestors of nodes a and b"""
1319 """calculate all the heads of the common ancestors of nodes a and b"""
1320 a, b = self.rev(a), self.rev(b)
1320 a, b = self.rev(a), self.rev(b)
1321 ancs = self._commonancestorsheads(a, b)
1321 ancs = self._commonancestorsheads(a, b)
1322 return pycompat.maplist(self.node, ancs)
1322 return pycompat.maplist(self.node, ancs)
1323
1323
1324 def _commonancestorsheads(self, *revs):
1324 def _commonancestorsheads(self, *revs):
1325 """calculate all the heads of the common ancestors of revs"""
1325 """calculate all the heads of the common ancestors of revs"""
1326 try:
1326 try:
1327 ancs = self.index.commonancestorsheads(*revs)
1327 ancs = self.index.commonancestorsheads(*revs)
1328 except (AttributeError, OverflowError): # C implementation failed
1328 except (AttributeError, OverflowError): # C implementation failed
1329 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1329 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1330 return ancs
1330 return ancs
1331
1331
1332 def isancestor(self, a, b):
1332 def isancestor(self, a, b):
1333 """return True if node a is an ancestor of node b
1333 """return True if node a is an ancestor of node b
1334
1334
1335 A revision is considered an ancestor of itself."""
1335 A revision is considered an ancestor of itself."""
1336 a, b = self.rev(a), self.rev(b)
1336 a, b = self.rev(a), self.rev(b)
1337 return self.isancestorrev(a, b)
1337 return self.isancestorrev(a, b)
1338
1338
1339 def isancestorrev(self, a, b):
1339 def isancestorrev(self, a, b):
1340 """return True if revision a is an ancestor of revision b
1340 """return True if revision a is an ancestor of revision b
1341
1341
1342 A revision is considered an ancestor of itself.
1342 A revision is considered an ancestor of itself.
1343
1343
1344 The implementation of this is trivial but the use of
1344 The implementation of this is trivial but the use of
1345 reachableroots is not."""
1345 reachableroots is not."""
1346 if a == nullrev:
1346 if a == nullrev:
1347 return True
1347 return True
1348 elif a == b:
1348 elif a == b:
1349 return True
1349 return True
1350 elif a > b:
1350 elif a > b:
1351 return False
1351 return False
1352 return bool(self.reachableroots(a, [b], [a], includepath=False))
1352 return bool(self.reachableroots(a, [b], [a], includepath=False))
1353
1353
1354 def reachableroots(self, minroot, heads, roots, includepath=False):
1354 def reachableroots(self, minroot, heads, roots, includepath=False):
1355 """return (heads(::(<roots> and <roots>::<heads>)))
1355 """return (heads(::(<roots> and <roots>::<heads>)))
1356
1356
1357 If includepath is True, return (<roots>::<heads>)."""
1357 If includepath is True, return (<roots>::<heads>)."""
1358 try:
1358 try:
1359 return self.index.reachableroots2(
1359 return self.index.reachableroots2(
1360 minroot, heads, roots, includepath
1360 minroot, heads, roots, includepath
1361 )
1361 )
1362 except AttributeError:
1362 except AttributeError:
1363 return dagop._reachablerootspure(
1363 return dagop._reachablerootspure(
1364 self.parentrevs, minroot, roots, heads, includepath
1364 self.parentrevs, minroot, roots, heads, includepath
1365 )
1365 )
1366
1366
1367 def ancestor(self, a, b):
1367 def ancestor(self, a, b):
1368 """calculate the "best" common ancestor of nodes a and b"""
1368 """calculate the "best" common ancestor of nodes a and b"""
1369
1369
1370 a, b = self.rev(a), self.rev(b)
1370 a, b = self.rev(a), self.rev(b)
1371 try:
1371 try:
1372 ancs = self.index.ancestors(a, b)
1372 ancs = self.index.ancestors(a, b)
1373 except (AttributeError, OverflowError):
1373 except (AttributeError, OverflowError):
1374 ancs = ancestor.ancestors(self.parentrevs, a, b)
1374 ancs = ancestor.ancestors(self.parentrevs, a, b)
1375 if ancs:
1375 if ancs:
1376 # choose a consistent winner when there's a tie
1376 # choose a consistent winner when there's a tie
1377 return min(map(self.node, ancs))
1377 return min(map(self.node, ancs))
1378 return self.nullid
1378 return self.nullid
1379
1379
1380 def _match(self, id):
1380 def _match(self, id):
1381 if isinstance(id, int):
1381 if isinstance(id, int):
1382 # rev
1382 # rev
1383 return self.node(id)
1383 return self.node(id)
1384 if len(id) == self.nodeconstants.nodelen:
1384 if len(id) == self.nodeconstants.nodelen:
1385 # possibly a binary node
1385 # possibly a binary node
1386 # odds of a binary node being all hex in ASCII are 1 in 10**25
1386 # odds of a binary node being all hex in ASCII are 1 in 10**25
1387 try:
1387 try:
1388 node = id
1388 node = id
1389 self.rev(node) # quick search the index
1389 self.rev(node) # quick search the index
1390 return node
1390 return node
1391 except error.LookupError:
1391 except error.LookupError:
1392 pass # may be partial hex id
1392 pass # may be partial hex id
1393 try:
1393 try:
1394 # str(rev)
1394 # str(rev)
1395 rev = int(id)
1395 rev = int(id)
1396 if b"%d" % rev != id:
1396 if b"%d" % rev != id:
1397 raise ValueError
1397 raise ValueError
1398 if rev < 0:
1398 if rev < 0:
1399 rev = len(self) + rev
1399 rev = len(self) + rev
1400 if rev < 0 or rev >= len(self):
1400 if rev < 0 or rev >= len(self):
1401 raise ValueError
1401 raise ValueError
1402 return self.node(rev)
1402 return self.node(rev)
1403 except (ValueError, OverflowError):
1403 except (ValueError, OverflowError):
1404 pass
1404 pass
1405 if len(id) == 2 * self.nodeconstants.nodelen:
1405 if len(id) == 2 * self.nodeconstants.nodelen:
1406 try:
1406 try:
1407 # a full hex nodeid?
1407 # a full hex nodeid?
1408 node = bin(id)
1408 node = bin(id)
1409 self.rev(node)
1409 self.rev(node)
1410 return node
1410 return node
1411 except (TypeError, error.LookupError):
1411 except (TypeError, error.LookupError):
1412 pass
1412 pass
1413
1413
1414 def _partialmatch(self, id):
1414 def _partialmatch(self, id):
1415 # we don't care wdirfilenodeids as they should be always full hash
1415 # we don't care wdirfilenodeids as they should be always full hash
1416 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1416 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1417 try:
1417 try:
1418 partial = self.index.partialmatch(id)
1418 partial = self.index.partialmatch(id)
1419 if partial and self.hasnode(partial):
1419 if partial and self.hasnode(partial):
1420 if maybewdir:
1420 if maybewdir:
1421 # single 'ff...' match in radix tree, ambiguous with wdir
1421 # single 'ff...' match in radix tree, ambiguous with wdir
1422 raise error.RevlogError
1422 raise error.RevlogError
1423 return partial
1423 return partial
1424 if maybewdir:
1424 if maybewdir:
1425 # no 'ff...' match in radix tree, wdir identified
1425 # no 'ff...' match in radix tree, wdir identified
1426 raise error.WdirUnsupported
1426 raise error.WdirUnsupported
1427 return None
1427 return None
1428 except error.RevlogError:
1428 except error.RevlogError:
1429 # parsers.c radix tree lookup gave multiple matches
1429 # parsers.c radix tree lookup gave multiple matches
1430 # fast path: for unfiltered changelog, radix tree is accurate
1430 # fast path: for unfiltered changelog, radix tree is accurate
1431 if not getattr(self, 'filteredrevs', None):
1431 if not getattr(self, 'filteredrevs', None):
1432 raise error.AmbiguousPrefixLookupError(
1432 raise error.AmbiguousPrefixLookupError(
1433 id, self.display_id, _(b'ambiguous identifier')
1433 id, self.display_id, _(b'ambiguous identifier')
1434 )
1434 )
1435 # fall through to slow path that filters hidden revisions
1435 # fall through to slow path that filters hidden revisions
1436 except (AttributeError, ValueError):
1436 except (AttributeError, ValueError):
1437 # we are pure python, or key was too short to search radix tree
1437 # we are pure python, or key was too short to search radix tree
1438 pass
1438 pass
1439
1439
1440 if id in self._pcache:
1440 if id in self._pcache:
1441 return self._pcache[id]
1441 return self._pcache[id]
1442
1442
1443 if len(id) <= 40:
1443 if len(id) <= 40:
1444 try:
1444 try:
1445 # hex(node)[:...]
1445 # hex(node)[:...]
1446 l = len(id) // 2 # grab an even number of digits
1446 l = len(id) // 2 # grab an even number of digits
1447 prefix = bin(id[: l * 2])
1447 prefix = bin(id[: l * 2])
1448 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1448 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1449 nl = [
1449 nl = [
1450 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1450 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1451 ]
1451 ]
1452 if self.nodeconstants.nullhex.startswith(id):
1452 if self.nodeconstants.nullhex.startswith(id):
1453 nl.append(self.nullid)
1453 nl.append(self.nullid)
1454 if len(nl) > 0:
1454 if len(nl) > 0:
1455 if len(nl) == 1 and not maybewdir:
1455 if len(nl) == 1 and not maybewdir:
1456 self._pcache[id] = nl[0]
1456 self._pcache[id] = nl[0]
1457 return nl[0]
1457 return nl[0]
1458 raise error.AmbiguousPrefixLookupError(
1458 raise error.AmbiguousPrefixLookupError(
1459 id, self.display_id, _(b'ambiguous identifier')
1459 id, self.display_id, _(b'ambiguous identifier')
1460 )
1460 )
1461 if maybewdir:
1461 if maybewdir:
1462 raise error.WdirUnsupported
1462 raise error.WdirUnsupported
1463 return None
1463 return None
1464 except TypeError:
1464 except TypeError:
1465 pass
1465 pass
1466
1466
1467 def lookup(self, id):
1467 def lookup(self, id):
1468 """locate a node based on:
1468 """locate a node based on:
1469 - revision number or str(revision number)
1469 - revision number or str(revision number)
1470 - nodeid or subset of hex nodeid
1470 - nodeid or subset of hex nodeid
1471 """
1471 """
1472 n = self._match(id)
1472 n = self._match(id)
1473 if n is not None:
1473 if n is not None:
1474 return n
1474 return n
1475 n = self._partialmatch(id)
1475 n = self._partialmatch(id)
1476 if n:
1476 if n:
1477 return n
1477 return n
1478
1478
1479 raise error.LookupError(id, self.display_id, _(b'no match found'))
1479 raise error.LookupError(id, self.display_id, _(b'no match found'))
1480
1480
1481 def shortest(self, node, minlength=1):
1481 def shortest(self, node, minlength=1):
1482 """Find the shortest unambiguous prefix that matches node."""
1482 """Find the shortest unambiguous prefix that matches node."""
1483
1483
1484 def isvalid(prefix):
1484 def isvalid(prefix):
1485 try:
1485 try:
1486 matchednode = self._partialmatch(prefix)
1486 matchednode = self._partialmatch(prefix)
1487 except error.AmbiguousPrefixLookupError:
1487 except error.AmbiguousPrefixLookupError:
1488 return False
1488 return False
1489 except error.WdirUnsupported:
1489 except error.WdirUnsupported:
1490 # single 'ff...' match
1490 # single 'ff...' match
1491 return True
1491 return True
1492 if matchednode is None:
1492 if matchednode is None:
1493 raise error.LookupError(node, self.display_id, _(b'no node'))
1493 raise error.LookupError(node, self.display_id, _(b'no node'))
1494 return True
1494 return True
1495
1495
1496 def maybewdir(prefix):
1496 def maybewdir(prefix):
1497 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1497 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1498
1498
1499 hexnode = hex(node)
1499 hexnode = hex(node)
1500
1500
1501 def disambiguate(hexnode, minlength):
1501 def disambiguate(hexnode, minlength):
1502 """Disambiguate against wdirid."""
1502 """Disambiguate against wdirid."""
1503 for length in range(minlength, len(hexnode) + 1):
1503 for length in range(minlength, len(hexnode) + 1):
1504 prefix = hexnode[:length]
1504 prefix = hexnode[:length]
1505 if not maybewdir(prefix):
1505 if not maybewdir(prefix):
1506 return prefix
1506 return prefix
1507
1507
1508 if not getattr(self, 'filteredrevs', None):
1508 if not getattr(self, 'filteredrevs', None):
1509 try:
1509 try:
1510 length = max(self.index.shortest(node), minlength)
1510 length = max(self.index.shortest(node), minlength)
1511 return disambiguate(hexnode, length)
1511 return disambiguate(hexnode, length)
1512 except error.RevlogError:
1512 except error.RevlogError:
1513 if node != self.nodeconstants.wdirid:
1513 if node != self.nodeconstants.wdirid:
1514 raise error.LookupError(
1514 raise error.LookupError(
1515 node, self.display_id, _(b'no node')
1515 node, self.display_id, _(b'no node')
1516 )
1516 )
1517 except AttributeError:
1517 except AttributeError:
1518 # Fall through to pure code
1518 # Fall through to pure code
1519 pass
1519 pass
1520
1520
1521 if node == self.nodeconstants.wdirid:
1521 if node == self.nodeconstants.wdirid:
1522 for length in range(minlength, len(hexnode) + 1):
1522 for length in range(minlength, len(hexnode) + 1):
1523 prefix = hexnode[:length]
1523 prefix = hexnode[:length]
1524 if isvalid(prefix):
1524 if isvalid(prefix):
1525 return prefix
1525 return prefix
1526
1526
1527 for length in range(minlength, len(hexnode) + 1):
1527 for length in range(minlength, len(hexnode) + 1):
1528 prefix = hexnode[:length]
1528 prefix = hexnode[:length]
1529 if isvalid(prefix):
1529 if isvalid(prefix):
1530 return disambiguate(hexnode, length)
1530 return disambiguate(hexnode, length)
1531
1531
1532 def cmp(self, node, text):
1532 def cmp(self, node, text):
1533 """compare text with a given file revision
1533 """compare text with a given file revision
1534
1534
1535 returns True if text is different than what is stored.
1535 returns True if text is different than what is stored.
1536 """
1536 """
1537 p1, p2 = self.parents(node)
1537 p1, p2 = self.parents(node)
1538 return storageutil.hashrevisionsha1(text, p1, p2) != node
1538 return storageutil.hashrevisionsha1(text, p1, p2) != node
1539
1539
1540 def _cachesegment(self, offset, data):
1540 def _cachesegment(self, offset, data):
1541 """Add a segment to the revlog cache.
1541 """Add a segment to the revlog cache.
1542
1542
1543 Accepts an absolute offset and the data that is at that location.
1543 Accepts an absolute offset and the data that is at that location.
1544 """
1544 """
1545 o, d = self._chunkcache
1545 o, d = self._chunkcache
1546 # try to add to existing cache
1546 # try to add to existing cache
1547 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1547 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1548 self._chunkcache = o, d + data
1548 self._chunkcache = o, d + data
1549 else:
1549 else:
1550 self._chunkcache = offset, data
1550 self._chunkcache = offset, data
1551
1551
1552 def _readsegment(self, offset, length, df=None):
1552 def _readsegment(self, offset, length, df=None):
1553 """Load a segment of raw data from the revlog.
1553 """Load a segment of raw data from the revlog.
1554
1554
1555 Accepts an absolute offset, length to read, and an optional existing
1555 Accepts an absolute offset, length to read, and an optional existing
1556 file handle to read from.
1556 file handle to read from.
1557
1557
1558 If an existing file handle is passed, it will be seeked and the
1558 If an existing file handle is passed, it will be seeked and the
1559 original seek position will NOT be restored.
1559 original seek position will NOT be restored.
1560
1560
1561 Returns a str or buffer of raw byte data.
1561 Returns a str or buffer of raw byte data.
1562
1562
1563 Raises if the requested number of bytes could not be read.
1563 Raises if the requested number of bytes could not be read.
1564 """
1564 """
1565 # Cache data both forward and backward around the requested
1565 # Cache data both forward and backward around the requested
1566 # data, in a fixed size window. This helps speed up operations
1566 # data, in a fixed size window. This helps speed up operations
1567 # involving reading the revlog backwards.
1567 # involving reading the revlog backwards.
1568 cachesize = self._chunkcachesize
1568 cachesize = self._chunkcachesize
1569 realoffset = offset & ~(cachesize - 1)
1569 realoffset = offset & ~(cachesize - 1)
1570 reallength = (
1570 reallength = (
1571 (offset + length + cachesize) & ~(cachesize - 1)
1571 (offset + length + cachesize) & ~(cachesize - 1)
1572 ) - realoffset
1572 ) - realoffset
1573 with self._datareadfp(df) as df:
1573 with self._datareadfp(df) as df:
1574 df.seek(realoffset)
1574 df.seek(realoffset)
1575 d = df.read(reallength)
1575 d = df.read(reallength)
1576
1576
1577 self._cachesegment(realoffset, d)
1577 self._cachesegment(realoffset, d)
1578 if offset != realoffset or reallength != length:
1578 if offset != realoffset or reallength != length:
1579 startoffset = offset - realoffset
1579 startoffset = offset - realoffset
1580 if len(d) - startoffset < length:
1580 if len(d) - startoffset < length:
1581 raise error.RevlogError(
1581 raise error.RevlogError(
1582 _(
1582 _(
1583 b'partial read of revlog %s; expected %d bytes from '
1583 b'partial read of revlog %s; expected %d bytes from '
1584 b'offset %d, got %d'
1584 b'offset %d, got %d'
1585 )
1585 )
1586 % (
1586 % (
1587 self._indexfile if self._inline else self._datafile,
1587 self._indexfile if self._inline else self._datafile,
1588 length,
1588 length,
1589 offset,
1589 offset,
1590 len(d) - startoffset,
1590 len(d) - startoffset,
1591 )
1591 )
1592 )
1592 )
1593
1593
1594 return util.buffer(d, startoffset, length)
1594 return util.buffer(d, startoffset, length)
1595
1595
1596 if len(d) < length:
1596 if len(d) < length:
1597 raise error.RevlogError(
1597 raise error.RevlogError(
1598 _(
1598 _(
1599 b'partial read of revlog %s; expected %d bytes from offset '
1599 b'partial read of revlog %s; expected %d bytes from offset '
1600 b'%d, got %d'
1600 b'%d, got %d'
1601 )
1601 )
1602 % (
1602 % (
1603 self._indexfile if self._inline else self._datafile,
1603 self._indexfile if self._inline else self._datafile,
1604 length,
1604 length,
1605 offset,
1605 offset,
1606 len(d),
1606 len(d),
1607 )
1607 )
1608 )
1608 )
1609
1609
1610 return d
1610 return d
1611
1611
1612 def _getsegment(self, offset, length, df=None):
1612 def _getsegment(self, offset, length, df=None):
1613 """Obtain a segment of raw data from the revlog.
1613 """Obtain a segment of raw data from the revlog.
1614
1614
1615 Accepts an absolute offset, length of bytes to obtain, and an
1615 Accepts an absolute offset, length of bytes to obtain, and an
1616 optional file handle to the already-opened revlog. If the file
1616 optional file handle to the already-opened revlog. If the file
1617 handle is used, it's original seek position will not be preserved.
1617 handle is used, it's original seek position will not be preserved.
1618
1618
1619 Requests for data may be returned from a cache.
1619 Requests for data may be returned from a cache.
1620
1620
1621 Returns a str or a buffer instance of raw byte data.
1621 Returns a str or a buffer instance of raw byte data.
1622 """
1622 """
1623 o, d = self._chunkcache
1623 o, d = self._chunkcache
1624 l = len(d)
1624 l = len(d)
1625
1625
1626 # is it in the cache?
1626 # is it in the cache?
1627 cachestart = offset - o
1627 cachestart = offset - o
1628 cacheend = cachestart + length
1628 cacheend = cachestart + length
1629 if cachestart >= 0 and cacheend <= l:
1629 if cachestart >= 0 and cacheend <= l:
1630 if cachestart == 0 and cacheend == l:
1630 if cachestart == 0 and cacheend == l:
1631 return d # avoid a copy
1631 return d # avoid a copy
1632 return util.buffer(d, cachestart, cacheend - cachestart)
1632 return util.buffer(d, cachestart, cacheend - cachestart)
1633
1633
1634 return self._readsegment(offset, length, df=df)
1634 return self._readsegment(offset, length, df=df)
1635
1635
1636 def _getsegmentforrevs(self, startrev, endrev, df=None):
1636 def _getsegmentforrevs(self, startrev, endrev, df=None):
1637 """Obtain a segment of raw data corresponding to a range of revisions.
1637 """Obtain a segment of raw data corresponding to a range of revisions.
1638
1638
1639 Accepts the start and end revisions and an optional already-open
1639 Accepts the start and end revisions and an optional already-open
1640 file handle to be used for reading. If the file handle is read, its
1640 file handle to be used for reading. If the file handle is read, its
1641 seek position will not be preserved.
1641 seek position will not be preserved.
1642
1642
1643 Requests for data may be satisfied by a cache.
1643 Requests for data may be satisfied by a cache.
1644
1644
1645 Returns a 2-tuple of (offset, data) for the requested range of
1645 Returns a 2-tuple of (offset, data) for the requested range of
1646 revisions. Offset is the integer offset from the beginning of the
1646 revisions. Offset is the integer offset from the beginning of the
1647 revlog and data is a str or buffer of the raw byte data.
1647 revlog and data is a str or buffer of the raw byte data.
1648
1648
1649 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1649 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1650 to determine where each revision's data begins and ends.
1650 to determine where each revision's data begins and ends.
1651 """
1651 """
1652 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1652 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1653 # (functions are expensive).
1653 # (functions are expensive).
1654 index = self.index
1654 index = self.index
1655 istart = index[startrev]
1655 istart = index[startrev]
1656 start = int(istart[0] >> 16)
1656 start = int(istart[0] >> 16)
1657 if startrev == endrev:
1657 if startrev == endrev:
1658 end = start + istart[1]
1658 end = start + istart[1]
1659 else:
1659 else:
1660 iend = index[endrev]
1660 iend = index[endrev]
1661 end = int(iend[0] >> 16) + iend[1]
1661 end = int(iend[0] >> 16) + iend[1]
1662
1662
1663 if self._inline:
1663 if self._inline:
1664 start += (startrev + 1) * self.index.entry_size
1664 start += (startrev + 1) * self.index.entry_size
1665 end += (endrev + 1) * self.index.entry_size
1665 end += (endrev + 1) * self.index.entry_size
1666 length = end - start
1666 length = end - start
1667
1667
1668 return start, self._getsegment(start, length, df=df)
1668 return start, self._getsegment(start, length, df=df)
1669
1669
1670 def _chunk(self, rev, df=None):
1670 def _chunk(self, rev, df=None):
1671 """Obtain a single decompressed chunk for a revision.
1671 """Obtain a single decompressed chunk for a revision.
1672
1672
1673 Accepts an integer revision and an optional already-open file handle
1673 Accepts an integer revision and an optional already-open file handle
1674 to be used for reading. If used, the seek position of the file will not
1674 to be used for reading. If used, the seek position of the file will not
1675 be preserved.
1675 be preserved.
1676
1676
1677 Returns a str holding uncompressed data for the requested revision.
1677 Returns a str holding uncompressed data for the requested revision.
1678 """
1678 """
1679 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1679 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1680
1680
1681 def _chunks(self, revs, df=None, targetsize=None):
1681 def _chunks(self, revs, df=None, targetsize=None):
1682 """Obtain decompressed chunks for the specified revisions.
1682 """Obtain decompressed chunks for the specified revisions.
1683
1683
1684 Accepts an iterable of numeric revisions that are assumed to be in
1684 Accepts an iterable of numeric revisions that are assumed to be in
1685 ascending order. Also accepts an optional already-open file handle
1685 ascending order. Also accepts an optional already-open file handle
1686 to be used for reading. If used, the seek position of the file will
1686 to be used for reading. If used, the seek position of the file will
1687 not be preserved.
1687 not be preserved.
1688
1688
1689 This function is similar to calling ``self._chunk()`` multiple times,
1689 This function is similar to calling ``self._chunk()`` multiple times,
1690 but is faster.
1690 but is faster.
1691
1691
1692 Returns a list with decompressed data for each requested revision.
1692 Returns a list with decompressed data for each requested revision.
1693 """
1693 """
1694 if not revs:
1694 if not revs:
1695 return []
1695 return []
1696 start = self.start
1696 start = self.start
1697 length = self.length
1697 length = self.length
1698 inline = self._inline
1698 inline = self._inline
1699 iosize = self.index.entry_size
1699 iosize = self.index.entry_size
1700 buffer = util.buffer
1700 buffer = util.buffer
1701
1701
1702 l = []
1702 l = []
1703 ladd = l.append
1703 ladd = l.append
1704
1704
1705 if not self._withsparseread:
1705 if not self._withsparseread:
1706 slicedchunks = (revs,)
1706 slicedchunks = (revs,)
1707 else:
1707 else:
1708 slicedchunks = deltautil.slicechunk(
1708 slicedchunks = deltautil.slicechunk(
1709 self, revs, targetsize=targetsize
1709 self, revs, targetsize=targetsize
1710 )
1710 )
1711
1711
1712 for revschunk in slicedchunks:
1712 for revschunk in slicedchunks:
1713 firstrev = revschunk[0]
1713 firstrev = revschunk[0]
1714 # Skip trailing revisions with empty diff
1714 # Skip trailing revisions with empty diff
1715 for lastrev in revschunk[::-1]:
1715 for lastrev in revschunk[::-1]:
1716 if length(lastrev) != 0:
1716 if length(lastrev) != 0:
1717 break
1717 break
1718
1718
1719 try:
1719 try:
1720 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1720 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1721 except OverflowError:
1721 except OverflowError:
1722 # issue4215 - we can't cache a run of chunks greater than
1722 # issue4215 - we can't cache a run of chunks greater than
1723 # 2G on Windows
1723 # 2G on Windows
1724 return [self._chunk(rev, df=df) for rev in revschunk]
1724 return [self._chunk(rev, df=df) for rev in revschunk]
1725
1725
1726 decomp = self.decompress
1726 decomp = self.decompress
1727 for rev in revschunk:
1727 for rev in revschunk:
1728 chunkstart = start(rev)
1728 chunkstart = start(rev)
1729 if inline:
1729 if inline:
1730 chunkstart += (rev + 1) * iosize
1730 chunkstart += (rev + 1) * iosize
1731 chunklength = length(rev)
1731 chunklength = length(rev)
1732 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1732 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1733
1733
1734 return l
1734 return l
1735
1735
1736 def _chunkclear(self):
1736 def _chunkclear(self):
1737 """Clear the raw chunk cache."""
1737 """Clear the raw chunk cache."""
1738 self._chunkcache = (0, b'')
1738 self._chunkcache = (0, b'')
1739
1739
1740 def deltaparent(self, rev):
1740 def deltaparent(self, rev):
1741 """return deltaparent of the given revision"""
1741 """return deltaparent of the given revision"""
1742 base = self.index[rev][3]
1742 base = self.index[rev][3]
1743 if base == rev:
1743 if base == rev:
1744 return nullrev
1744 return nullrev
1745 elif self._generaldelta:
1745 elif self._generaldelta:
1746 return base
1746 return base
1747 else:
1747 else:
1748 return rev - 1
1748 return rev - 1
1749
1749
1750 def issnapshot(self, rev):
1750 def issnapshot(self, rev):
1751 """tells whether rev is a snapshot"""
1751 """tells whether rev is a snapshot"""
1752 if not self._sparserevlog:
1752 if not self._sparserevlog:
1753 return self.deltaparent(rev) == nullrev
1753 return self.deltaparent(rev) == nullrev
1754 elif util.safehasattr(self.index, b'issnapshot'):
1754 elif util.safehasattr(self.index, b'issnapshot'):
1755 # directly assign the method to cache the testing and access
1755 # directly assign the method to cache the testing and access
1756 self.issnapshot = self.index.issnapshot
1756 self.issnapshot = self.index.issnapshot
1757 return self.issnapshot(rev)
1757 return self.issnapshot(rev)
1758 if rev == nullrev:
1758 if rev == nullrev:
1759 return True
1759 return True
1760 entry = self.index[rev]
1760 entry = self.index[rev]
1761 base = entry[3]
1761 base = entry[3]
1762 if base == rev:
1762 if base == rev:
1763 return True
1763 return True
1764 if base == nullrev:
1764 if base == nullrev:
1765 return True
1765 return True
1766 p1 = entry[5]
1766 p1 = entry[5]
1767 p2 = entry[6]
1767 p2 = entry[6]
1768 if base == p1 or base == p2:
1768 if base == p1 or base == p2:
1769 return False
1769 return False
1770 return self.issnapshot(base)
1770 return self.issnapshot(base)
1771
1771
1772 def snapshotdepth(self, rev):
1772 def snapshotdepth(self, rev):
1773 """number of snapshot in the chain before this one"""
1773 """number of snapshot in the chain before this one"""
1774 if not self.issnapshot(rev):
1774 if not self.issnapshot(rev):
1775 raise error.ProgrammingError(b'revision %d not a snapshot')
1775 raise error.ProgrammingError(b'revision %d not a snapshot')
1776 return len(self._deltachain(rev)[0]) - 1
1776 return len(self._deltachain(rev)[0]) - 1
1777
1777
1778 def revdiff(self, rev1, rev2):
1778 def revdiff(self, rev1, rev2):
1779 """return or calculate a delta between two revisions
1779 """return or calculate a delta between two revisions
1780
1780
1781 The delta calculated is in binary form and is intended to be written to
1781 The delta calculated is in binary form and is intended to be written to
1782 revlog data directly. So this function needs raw revision data.
1782 revlog data directly. So this function needs raw revision data.
1783 """
1783 """
1784 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1784 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1785 return bytes(self._chunk(rev2))
1785 return bytes(self._chunk(rev2))
1786
1786
1787 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1787 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1788
1788
1789 def _processflags(self, text, flags, operation, raw=False):
1789 def _processflags(self, text, flags, operation, raw=False):
1790 """deprecated entry point to access flag processors"""
1790 """deprecated entry point to access flag processors"""
1791 msg = b'_processflag(...) use the specialized variant'
1791 msg = b'_processflag(...) use the specialized variant'
1792 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1792 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1793 if raw:
1793 if raw:
1794 return text, flagutil.processflagsraw(self, text, flags)
1794 return text, flagutil.processflagsraw(self, text, flags)
1795 elif operation == b'read':
1795 elif operation == b'read':
1796 return flagutil.processflagsread(self, text, flags)
1796 return flagutil.processflagsread(self, text, flags)
1797 else: # write operation
1797 else: # write operation
1798 return flagutil.processflagswrite(self, text, flags)
1798 return flagutil.processflagswrite(self, text, flags)
1799
1799
1800 def revision(self, nodeorrev, _df=None, raw=False):
1800 def revision(self, nodeorrev, _df=None, raw=False):
1801 """return an uncompressed revision of a given node or revision
1801 """return an uncompressed revision of a given node or revision
1802 number.
1802 number.
1803
1803
1804 _df - an existing file handle to read from. (internal-only)
1804 _df - an existing file handle to read from. (internal-only)
1805 raw - an optional argument specifying if the revision data is to be
1805 raw - an optional argument specifying if the revision data is to be
1806 treated as raw data when applying flag transforms. 'raw' should be set
1806 treated as raw data when applying flag transforms. 'raw' should be set
1807 to True when generating changegroups or in debug commands.
1807 to True when generating changegroups or in debug commands.
1808 """
1808 """
1809 if raw:
1809 if raw:
1810 msg = (
1810 msg = (
1811 b'revlog.revision(..., raw=True) is deprecated, '
1811 b'revlog.revision(..., raw=True) is deprecated, '
1812 b'use revlog.rawdata(...)'
1812 b'use revlog.rawdata(...)'
1813 )
1813 )
1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1815 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1815 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1816
1816
1817 def sidedata(self, nodeorrev, _df=None):
1817 def sidedata(self, nodeorrev, _df=None):
1818 """a map of extra data related to the changeset but not part of the hash
1818 """a map of extra data related to the changeset but not part of the hash
1819
1819
1820 This function currently return a dictionary. However, more advanced
1820 This function currently return a dictionary. However, more advanced
1821 mapping object will likely be used in the future for a more
1821 mapping object will likely be used in the future for a more
1822 efficient/lazy code.
1822 efficient/lazy code.
1823 """
1823 """
1824 return self._revisiondata(nodeorrev, _df)[1]
1824 return self._revisiondata(nodeorrev, _df)[1]
1825
1825
1826 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1826 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1827 # deal with <nodeorrev> argument type
1827 # deal with <nodeorrev> argument type
1828 if isinstance(nodeorrev, int):
1828 if isinstance(nodeorrev, int):
1829 rev = nodeorrev
1829 rev = nodeorrev
1830 node = self.node(rev)
1830 node = self.node(rev)
1831 else:
1831 else:
1832 node = nodeorrev
1832 node = nodeorrev
1833 rev = None
1833 rev = None
1834
1834
1835 # fast path the special `nullid` rev
1835 # fast path the special `nullid` rev
1836 if node == self.nullid:
1836 if node == self.nullid:
1837 return b"", {}
1837 return b"", {}
1838
1838
1839 # ``rawtext`` is the text as stored inside the revlog. Might be the
1839 # ``rawtext`` is the text as stored inside the revlog. Might be the
1840 # revision or might need to be processed to retrieve the revision.
1840 # revision or might need to be processed to retrieve the revision.
1841 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1841 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1842
1842
1843 if self.hassidedata:
1843 if self.hassidedata:
1844 if rev is None:
1844 if rev is None:
1845 rev = self.rev(node)
1845 rev = self.rev(node)
1846 sidedata = self._sidedata(rev)
1846 sidedata = self._sidedata(rev)
1847 else:
1847 else:
1848 sidedata = {}
1848 sidedata = {}
1849
1849
1850 if raw and validated:
1850 if raw and validated:
1851 # if we don't want to process the raw text and that raw
1851 # if we don't want to process the raw text and that raw
1852 # text is cached, we can exit early.
1852 # text is cached, we can exit early.
1853 return rawtext, sidedata
1853 return rawtext, sidedata
1854 if rev is None:
1854 if rev is None:
1855 rev = self.rev(node)
1855 rev = self.rev(node)
1856 # the revlog's flag for this revision
1856 # the revlog's flag for this revision
1857 # (usually alter its state or content)
1857 # (usually alter its state or content)
1858 flags = self.flags(rev)
1858 flags = self.flags(rev)
1859
1859
1860 if validated and flags == REVIDX_DEFAULT_FLAGS:
1860 if validated and flags == REVIDX_DEFAULT_FLAGS:
1861 # no extra flags set, no flag processor runs, text = rawtext
1861 # no extra flags set, no flag processor runs, text = rawtext
1862 return rawtext, sidedata
1862 return rawtext, sidedata
1863
1863
1864 if raw:
1864 if raw:
1865 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1865 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1866 text = rawtext
1866 text = rawtext
1867 else:
1867 else:
1868 r = flagutil.processflagsread(self, rawtext, flags)
1868 r = flagutil.processflagsread(self, rawtext, flags)
1869 text, validatehash = r
1869 text, validatehash = r
1870 if validatehash:
1870 if validatehash:
1871 self.checkhash(text, node, rev=rev)
1871 self.checkhash(text, node, rev=rev)
1872 if not validated:
1872 if not validated:
1873 self._revisioncache = (node, rev, rawtext)
1873 self._revisioncache = (node, rev, rawtext)
1874
1874
1875 return text, sidedata
1875 return text, sidedata
1876
1876
1877 def _rawtext(self, node, rev, _df=None):
1877 def _rawtext(self, node, rev, _df=None):
1878 """return the possibly unvalidated rawtext for a revision
1878 """return the possibly unvalidated rawtext for a revision
1879
1879
1880 returns (rev, rawtext, validated)
1880 returns (rev, rawtext, validated)
1881 """
1881 """
1882
1882
1883 # revision in the cache (could be useful to apply delta)
1883 # revision in the cache (could be useful to apply delta)
1884 cachedrev = None
1884 cachedrev = None
1885 # An intermediate text to apply deltas to
1885 # An intermediate text to apply deltas to
1886 basetext = None
1886 basetext = None
1887
1887
1888 # Check if we have the entry in cache
1888 # Check if we have the entry in cache
1889 # The cache entry looks like (node, rev, rawtext)
1889 # The cache entry looks like (node, rev, rawtext)
1890 if self._revisioncache:
1890 if self._revisioncache:
1891 if self._revisioncache[0] == node:
1891 if self._revisioncache[0] == node:
1892 return (rev, self._revisioncache[2], True)
1892 return (rev, self._revisioncache[2], True)
1893 cachedrev = self._revisioncache[1]
1893 cachedrev = self._revisioncache[1]
1894
1894
1895 if rev is None:
1895 if rev is None:
1896 rev = self.rev(node)
1896 rev = self.rev(node)
1897
1897
1898 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1898 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1899 if stopped:
1899 if stopped:
1900 basetext = self._revisioncache[2]
1900 basetext = self._revisioncache[2]
1901
1901
1902 # drop cache to save memory, the caller is expected to
1902 # drop cache to save memory, the caller is expected to
1903 # update self._revisioncache after validating the text
1903 # update self._revisioncache after validating the text
1904 self._revisioncache = None
1904 self._revisioncache = None
1905
1905
1906 targetsize = None
1906 targetsize = None
1907 rawsize = self.index[rev][2]
1907 rawsize = self.index[rev][2]
1908 if 0 <= rawsize:
1908 if 0 <= rawsize:
1909 targetsize = 4 * rawsize
1909 targetsize = 4 * rawsize
1910
1910
1911 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1911 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1912 if basetext is None:
1912 if basetext is None:
1913 basetext = bytes(bins[0])
1913 basetext = bytes(bins[0])
1914 bins = bins[1:]
1914 bins = bins[1:]
1915
1915
1916 rawtext = mdiff.patches(basetext, bins)
1916 rawtext = mdiff.patches(basetext, bins)
1917 del basetext # let us have a chance to free memory early
1917 del basetext # let us have a chance to free memory early
1918 return (rev, rawtext, False)
1918 return (rev, rawtext, False)
1919
1919
1920 def _sidedata(self, rev):
1920 def _sidedata(self, rev):
1921 """Return the sidedata for a given revision number."""
1921 """Return the sidedata for a given revision number."""
1922 index_entry = self.index[rev]
1922 index_entry = self.index[rev]
1923 sidedata_offset = index_entry[8]
1923 sidedata_offset = index_entry[8]
1924 sidedata_size = index_entry[9]
1924 sidedata_size = index_entry[9]
1925
1925
1926 if self._inline:
1926 if self._inline:
1927 sidedata_offset += self.index.entry_size * (1 + rev)
1927 sidedata_offset += self.index.entry_size * (1 + rev)
1928 if sidedata_size == 0:
1928 if sidedata_size == 0:
1929 return {}
1929 return {}
1930
1930
1931 segment = self._getsegment(sidedata_offset, sidedata_size)
1931 segment = self._getsegment(sidedata_offset, sidedata_size)
1932 sidedata = sidedatautil.deserialize_sidedata(segment)
1932 sidedata = sidedatautil.deserialize_sidedata(segment)
1933 return sidedata
1933 return sidedata
1934
1934
1935 def rawdata(self, nodeorrev, _df=None):
1935 def rawdata(self, nodeorrev, _df=None):
1936 """return an uncompressed raw data of a given node or revision number.
1936 """return an uncompressed raw data of a given node or revision number.
1937
1937
1938 _df - an existing file handle to read from. (internal-only)
1938 _df - an existing file handle to read from. (internal-only)
1939 """
1939 """
1940 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1940 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1941
1941
1942 def hash(self, text, p1, p2):
1942 def hash(self, text, p1, p2):
1943 """Compute a node hash.
1943 """Compute a node hash.
1944
1944
1945 Available as a function so that subclasses can replace the hash
1945 Available as a function so that subclasses can replace the hash
1946 as needed.
1946 as needed.
1947 """
1947 """
1948 return storageutil.hashrevisionsha1(text, p1, p2)
1948 return storageutil.hashrevisionsha1(text, p1, p2)
1949
1949
1950 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1950 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1951 """Check node hash integrity.
1951 """Check node hash integrity.
1952
1952
1953 Available as a function so that subclasses can extend hash mismatch
1953 Available as a function so that subclasses can extend hash mismatch
1954 behaviors as needed.
1954 behaviors as needed.
1955 """
1955 """
1956 try:
1956 try:
1957 if p1 is None and p2 is None:
1957 if p1 is None and p2 is None:
1958 p1, p2 = self.parents(node)
1958 p1, p2 = self.parents(node)
1959 if node != self.hash(text, p1, p2):
1959 if node != self.hash(text, p1, p2):
1960 # Clear the revision cache on hash failure. The revision cache
1960 # Clear the revision cache on hash failure. The revision cache
1961 # only stores the raw revision and clearing the cache does have
1961 # only stores the raw revision and clearing the cache does have
1962 # the side-effect that we won't have a cache hit when the raw
1962 # the side-effect that we won't have a cache hit when the raw
1963 # revision data is accessed. But this case should be rare and
1963 # revision data is accessed. But this case should be rare and
1964 # it is extra work to teach the cache about the hash
1964 # it is extra work to teach the cache about the hash
1965 # verification state.
1965 # verification state.
1966 if self._revisioncache and self._revisioncache[0] == node:
1966 if self._revisioncache and self._revisioncache[0] == node:
1967 self._revisioncache = None
1967 self._revisioncache = None
1968
1968
1969 revornode = rev
1969 revornode = rev
1970 if revornode is None:
1970 if revornode is None:
1971 revornode = templatefilters.short(hex(node))
1971 revornode = templatefilters.short(hex(node))
1972 raise error.RevlogError(
1972 raise error.RevlogError(
1973 _(b"integrity check failed on %s:%s")
1973 _(b"integrity check failed on %s:%s")
1974 % (self.display_id, pycompat.bytestr(revornode))
1974 % (self.display_id, pycompat.bytestr(revornode))
1975 )
1975 )
1976 except error.RevlogError:
1976 except error.RevlogError:
1977 if self._censorable and storageutil.iscensoredtext(text):
1977 if self._censorable and storageutil.iscensoredtext(text):
1978 raise error.CensoredNodeError(self.display_id, node, text)
1978 raise error.CensoredNodeError(self.display_id, node, text)
1979 raise
1979 raise
1980
1980
1981 def _enforceinlinesize(self, tr):
1981 def _enforceinlinesize(self, tr):
1982 """Check if the revlog is too big for inline and convert if so.
1982 """Check if the revlog is too big for inline and convert if so.
1983
1983
1984 This should be called after revisions are added to the revlog. If the
1984 This should be called after revisions are added to the revlog. If the
1985 revlog has grown too large to be an inline revlog, it will convert it
1985 revlog has grown too large to be an inline revlog, it will convert it
1986 to use multiple index and data files.
1986 to use multiple index and data files.
1987 """
1987 """
1988 tiprev = len(self) - 1
1988 tiprev = len(self) - 1
1989 total_size = self.start(tiprev) + self.length(tiprev)
1989 total_size = self.start(tiprev) + self.length(tiprev)
1990 if not self._inline or total_size < _maxinline:
1990 if not self._inline or total_size < _maxinline:
1991 return
1991 return
1992
1992
1993 troffset = tr.findoffset(self._indexfile)
1993 troffset = tr.findoffset(self._indexfile)
1994 if troffset is None:
1994 if troffset is None:
1995 raise error.RevlogError(
1995 raise error.RevlogError(
1996 _(b"%s not found in the transaction") % self._indexfile
1996 _(b"%s not found in the transaction") % self._indexfile
1997 )
1997 )
1998 trindex = 0
1998 trindex = 0
1999 tr.add(self._datafile, 0)
1999 tr.add(self._datafile, 0)
2000
2000
2001 existing_handles = False
2001 existing_handles = False
2002 if self._writinghandles is not None:
2002 if self._writinghandles is not None:
2003 existing_handles = True
2003 existing_handles = True
2004 fp = self._writinghandles[0]
2004 fp = self._writinghandles[0]
2005 fp.flush()
2005 fp.flush()
2006 fp.close()
2006 fp.close()
2007 # We can't use the cached file handle after close(). So prevent
2007 # We can't use the cached file handle after close(). So prevent
2008 # its usage.
2008 # its usage.
2009 self._writinghandles = None
2009 self._writinghandles = None
2010
2010
2011 new_dfh = self._datafp(b'w+')
2011 new_dfh = self._datafp(b'w+')
2012 new_dfh.truncate(0) # drop any potentially existing data
2012 new_dfh.truncate(0) # drop any potentially existing data
2013 try:
2013 try:
2014 with self._indexfp() as read_ifh:
2014 with self._indexfp() as read_ifh:
2015 for r in self:
2015 for r in self:
2016 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2016 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2017 if troffset <= self.start(r):
2017 if troffset <= self.start(r):
2018 trindex = r
2018 trindex = r
2019 new_dfh.flush()
2019 new_dfh.flush()
2020
2020
2021 with self.__index_new_fp() as fp:
2021 with self.__index_new_fp() as fp:
2022 self._format_flags &= ~FLAG_INLINE_DATA
2022 self._format_flags &= ~FLAG_INLINE_DATA
2023 self._inline = False
2023 self._inline = False
2024 for i in self:
2024 for i in self:
2025 e = self.index.entry_binary(i)
2025 e = self.index.entry_binary(i)
2026 if i == 0:
2026 if i == 0:
2027 header = self._format_flags | self._format_version
2027 header = self._format_flags | self._format_version
2028 header = self.index.pack_header(header)
2028 header = self.index.pack_header(header)
2029 e = header + e
2029 e = header + e
2030 fp.write(e)
2030 fp.write(e)
2031 # the temp file replace the real index when we exit the context
2031 # the temp file replace the real index when we exit the context
2032 # manager
2032 # manager
2033
2033
2034 tr.replace(self._indexfile, trindex * self.index.entry_size)
2034 tr.replace(self._indexfile, trindex * self.index.entry_size)
2035 nodemaputil.setup_persistent_nodemap(tr, self)
2035 nodemaputil.setup_persistent_nodemap(tr, self)
2036 self._chunkclear()
2036 self._chunkclear()
2037
2037
2038 if existing_handles:
2038 if existing_handles:
2039 # switched from inline to conventional reopen the index
2039 # switched from inline to conventional reopen the index
2040 ifh = self.__index_write_fp()
2040 ifh = self.__index_write_fp()
2041 self._writinghandles = (ifh, new_dfh)
2041 self._writinghandles = (ifh, new_dfh)
2042 new_dfh = None
2042 new_dfh = None
2043 finally:
2043 finally:
2044 if new_dfh is not None:
2044 if new_dfh is not None:
2045 new_dfh.close()
2045 new_dfh.close()
2046
2046
2047 def _nodeduplicatecallback(self, transaction, node):
2047 def _nodeduplicatecallback(self, transaction, node):
2048 """called when trying to add a node already stored."""
2048 """called when trying to add a node already stored."""
2049
2049
2050 @contextlib.contextmanager
2050 @contextlib.contextmanager
2051 def _writing(self, transaction):
2051 def _writing(self, transaction):
2052 if self._writinghandles is not None:
2052 if self._writinghandles is not None:
2053 yield
2053 yield
2054 else:
2054 else:
2055 r = len(self)
2055 r = len(self)
2056 dsize = 0
2056 dsize = 0
2057 if r:
2057 if r:
2058 dsize = self.end(r - 1)
2058 dsize = self.end(r - 1)
2059 dfh = None
2059 dfh = None
2060 if not self._inline:
2060 if not self._inline:
2061 try:
2061 try:
2062 dfh = self._datafp(b"r+")
2062 dfh = self._datafp(b"r+")
2063 dfh.seek(0, os.SEEK_END)
2063 dfh.seek(0, os.SEEK_END)
2064 except IOError as inst:
2064 except IOError as inst:
2065 if inst.errno != errno.ENOENT:
2065 if inst.errno != errno.ENOENT:
2066 raise
2066 raise
2067 dfh = self._datafp(b"w+")
2067 dfh = self._datafp(b"w+")
2068 transaction.add(self._datafile, dsize)
2068 transaction.add(self._datafile, dsize)
2069 try:
2069 try:
2070 isize = r * self.index.entry_size
2070 isize = r * self.index.entry_size
2071 ifh = self.__index_write_fp()
2071 ifh = self.__index_write_fp()
2072 if self._inline:
2072 if self._inline:
2073 transaction.add(self._indexfile, dsize + isize)
2073 transaction.add(self._indexfile, dsize + isize)
2074 else:
2074 else:
2075 transaction.add(self._indexfile, isize)
2075 transaction.add(self._indexfile, isize)
2076 try:
2076 try:
2077 self._writinghandles = (ifh, dfh)
2077 self._writinghandles = (ifh, dfh)
2078 try:
2078 try:
2079 yield
2079 yield
2080 finally:
2080 finally:
2081 self._writinghandles = None
2081 self._writinghandles = None
2082 finally:
2082 finally:
2083 ifh.close()
2083 ifh.close()
2084 finally:
2084 finally:
2085 if dfh is not None:
2085 if dfh is not None:
2086 dfh.close()
2086 dfh.close()
2087
2087
2088 def addrevision(
2088 def addrevision(
2089 self,
2089 self,
2090 text,
2090 text,
2091 transaction,
2091 transaction,
2092 link,
2092 link,
2093 p1,
2093 p1,
2094 p2,
2094 p2,
2095 cachedelta=None,
2095 cachedelta=None,
2096 node=None,
2096 node=None,
2097 flags=REVIDX_DEFAULT_FLAGS,
2097 flags=REVIDX_DEFAULT_FLAGS,
2098 deltacomputer=None,
2098 deltacomputer=None,
2099 sidedata=None,
2099 sidedata=None,
2100 ):
2100 ):
2101 """add a revision to the log
2101 """add a revision to the log
2102
2102
2103 text - the revision data to add
2103 text - the revision data to add
2104 transaction - the transaction object used for rollback
2104 transaction - the transaction object used for rollback
2105 link - the linkrev data to add
2105 link - the linkrev data to add
2106 p1, p2 - the parent nodeids of the revision
2106 p1, p2 - the parent nodeids of the revision
2107 cachedelta - an optional precomputed delta
2107 cachedelta - an optional precomputed delta
2108 node - nodeid of revision; typically node is not specified, and it is
2108 node - nodeid of revision; typically node is not specified, and it is
2109 computed by default as hash(text, p1, p2), however subclasses might
2109 computed by default as hash(text, p1, p2), however subclasses might
2110 use different hashing method (and override checkhash() in such case)
2110 use different hashing method (and override checkhash() in such case)
2111 flags - the known flags to set on the revision
2111 flags - the known flags to set on the revision
2112 deltacomputer - an optional deltacomputer instance shared between
2112 deltacomputer - an optional deltacomputer instance shared between
2113 multiple calls
2113 multiple calls
2114 """
2114 """
2115 if link == nullrev:
2115 if link == nullrev:
2116 raise error.RevlogError(
2116 raise error.RevlogError(
2117 _(b"attempted to add linkrev -1 to %s") % self.display_id
2117 _(b"attempted to add linkrev -1 to %s") % self.display_id
2118 )
2118 )
2119
2119
2120 if sidedata is None:
2120 if sidedata is None:
2121 sidedata = {}
2121 sidedata = {}
2122 elif sidedata and not self.hassidedata:
2122 elif sidedata and not self.hassidedata:
2123 raise error.ProgrammingError(
2123 raise error.ProgrammingError(
2124 _(b"trying to add sidedata to a revlog who don't support them")
2124 _(b"trying to add sidedata to a revlog who don't support them")
2125 )
2125 )
2126
2126
2127 if flags:
2127 if flags:
2128 node = node or self.hash(text, p1, p2)
2128 node = node or self.hash(text, p1, p2)
2129
2129
2130 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2130 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2131
2131
2132 # If the flag processor modifies the revision data, ignore any provided
2132 # If the flag processor modifies the revision data, ignore any provided
2133 # cachedelta.
2133 # cachedelta.
2134 if rawtext != text:
2134 if rawtext != text:
2135 cachedelta = None
2135 cachedelta = None
2136
2136
2137 if len(rawtext) > _maxentrysize:
2137 if len(rawtext) > _maxentrysize:
2138 raise error.RevlogError(
2138 raise error.RevlogError(
2139 _(
2139 _(
2140 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2140 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2141 )
2141 )
2142 % (self.display_id, len(rawtext))
2142 % (self.display_id, len(rawtext))
2143 )
2143 )
2144
2144
2145 node = node or self.hash(rawtext, p1, p2)
2145 node = node or self.hash(rawtext, p1, p2)
2146 rev = self.index.get_rev(node)
2146 rev = self.index.get_rev(node)
2147 if rev is not None:
2147 if rev is not None:
2148 return rev
2148 return rev
2149
2149
2150 if validatehash:
2150 if validatehash:
2151 self.checkhash(rawtext, node, p1=p1, p2=p2)
2151 self.checkhash(rawtext, node, p1=p1, p2=p2)
2152
2152
2153 return self.addrawrevision(
2153 return self.addrawrevision(
2154 rawtext,
2154 rawtext,
2155 transaction,
2155 transaction,
2156 link,
2156 link,
2157 p1,
2157 p1,
2158 p2,
2158 p2,
2159 node,
2159 node,
2160 flags,
2160 flags,
2161 cachedelta=cachedelta,
2161 cachedelta=cachedelta,
2162 deltacomputer=deltacomputer,
2162 deltacomputer=deltacomputer,
2163 sidedata=sidedata,
2163 sidedata=sidedata,
2164 )
2164 )
2165
2165
2166 def addrawrevision(
2166 def addrawrevision(
2167 self,
2167 self,
2168 rawtext,
2168 rawtext,
2169 transaction,
2169 transaction,
2170 link,
2170 link,
2171 p1,
2171 p1,
2172 p2,
2172 p2,
2173 node,
2173 node,
2174 flags,
2174 flags,
2175 cachedelta=None,
2175 cachedelta=None,
2176 deltacomputer=None,
2176 deltacomputer=None,
2177 sidedata=None,
2177 sidedata=None,
2178 ):
2178 ):
2179 """add a raw revision with known flags, node and parents
2179 """add a raw revision with known flags, node and parents
2180 useful when reusing a revision not stored in this revlog (ex: received
2180 useful when reusing a revision not stored in this revlog (ex: received
2181 over wire, or read from an external bundle).
2181 over wire, or read from an external bundle).
2182 """
2182 """
2183 with self._writing(transaction):
2183 with self._writing(transaction):
2184 return self._addrevision(
2184 return self._addrevision(
2185 node,
2185 node,
2186 rawtext,
2186 rawtext,
2187 transaction,
2187 transaction,
2188 link,
2188 link,
2189 p1,
2189 p1,
2190 p2,
2190 p2,
2191 flags,
2191 flags,
2192 cachedelta,
2192 cachedelta,
2193 deltacomputer=deltacomputer,
2193 deltacomputer=deltacomputer,
2194 sidedata=sidedata,
2194 sidedata=sidedata,
2195 )
2195 )
2196
2196
2197 def compress(self, data):
2197 def compress(self, data):
2198 """Generate a possibly-compressed representation of data."""
2198 """Generate a possibly-compressed representation of data."""
2199 if not data:
2199 if not data:
2200 return b'', data
2200 return b'', data
2201
2201
2202 compressed = self._compressor.compress(data)
2202 compressed = self._compressor.compress(data)
2203
2203
2204 if compressed:
2204 if compressed:
2205 # The revlog compressor added the header in the returned data.
2205 # The revlog compressor added the header in the returned data.
2206 return b'', compressed
2206 return b'', compressed
2207
2207
2208 if data[0:1] == b'\0':
2208 if data[0:1] == b'\0':
2209 return b'', data
2209 return b'', data
2210 return b'u', data
2210 return b'u', data
2211
2211
2212 def decompress(self, data):
2212 def decompress(self, data):
2213 """Decompress a revlog chunk.
2213 """Decompress a revlog chunk.
2214
2214
2215 The chunk is expected to begin with a header identifying the
2215 The chunk is expected to begin with a header identifying the
2216 format type so it can be routed to an appropriate decompressor.
2216 format type so it can be routed to an appropriate decompressor.
2217 """
2217 """
2218 if not data:
2218 if not data:
2219 return data
2219 return data
2220
2220
2221 # Revlogs are read much more frequently than they are written and many
2221 # Revlogs are read much more frequently than they are written and many
2222 # chunks only take microseconds to decompress, so performance is
2222 # chunks only take microseconds to decompress, so performance is
2223 # important here.
2223 # important here.
2224 #
2224 #
2225 # We can make a few assumptions about revlogs:
2225 # We can make a few assumptions about revlogs:
2226 #
2226 #
2227 # 1) the majority of chunks will be compressed (as opposed to inline
2227 # 1) the majority of chunks will be compressed (as opposed to inline
2228 # raw data).
2228 # raw data).
2229 # 2) decompressing *any* data will likely by at least 10x slower than
2229 # 2) decompressing *any* data will likely by at least 10x slower than
2230 # returning raw inline data.
2230 # returning raw inline data.
2231 # 3) we want to prioritize common and officially supported compression
2231 # 3) we want to prioritize common and officially supported compression
2232 # engines
2232 # engines
2233 #
2233 #
2234 # It follows that we want to optimize for "decompress compressed data
2234 # It follows that we want to optimize for "decompress compressed data
2235 # when encoded with common and officially supported compression engines"
2235 # when encoded with common and officially supported compression engines"
2236 # case over "raw data" and "data encoded by less common or non-official
2236 # case over "raw data" and "data encoded by less common or non-official
2237 # compression engines." That is why we have the inline lookup first
2237 # compression engines." That is why we have the inline lookup first
2238 # followed by the compengines lookup.
2238 # followed by the compengines lookup.
2239 #
2239 #
2240 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2240 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2241 # compressed chunks. And this matters for changelog and manifest reads.
2241 # compressed chunks. And this matters for changelog and manifest reads.
2242 t = data[0:1]
2242 t = data[0:1]
2243
2243
2244 if t == b'x':
2244 if t == b'x':
2245 try:
2245 try:
2246 return _zlibdecompress(data)
2246 return _zlibdecompress(data)
2247 except zlib.error as e:
2247 except zlib.error as e:
2248 raise error.RevlogError(
2248 raise error.RevlogError(
2249 _(b'revlog decompress error: %s')
2249 _(b'revlog decompress error: %s')
2250 % stringutil.forcebytestr(e)
2250 % stringutil.forcebytestr(e)
2251 )
2251 )
2252 # '\0' is more common than 'u' so it goes first.
2252 # '\0' is more common than 'u' so it goes first.
2253 elif t == b'\0':
2253 elif t == b'\0':
2254 return data
2254 return data
2255 elif t == b'u':
2255 elif t == b'u':
2256 return util.buffer(data, 1)
2256 return util.buffer(data, 1)
2257
2257
2258 try:
2258 try:
2259 compressor = self._decompressors[t]
2259 compressor = self._decompressors[t]
2260 except KeyError:
2260 except KeyError:
2261 try:
2261 try:
2262 engine = util.compengines.forrevlogheader(t)
2262 engine = util.compengines.forrevlogheader(t)
2263 compressor = engine.revlogcompressor(self._compengineopts)
2263 compressor = engine.revlogcompressor(self._compengineopts)
2264 self._decompressors[t] = compressor
2264 self._decompressors[t] = compressor
2265 except KeyError:
2265 except KeyError:
2266 raise error.RevlogError(
2266 raise error.RevlogError(
2267 _(b'unknown compression type %s') % binascii.hexlify(t)
2267 _(b'unknown compression type %s') % binascii.hexlify(t)
2268 )
2268 )
2269
2269
2270 return compressor.decompress(data)
2270 return compressor.decompress(data)
2271
2271
2272 def _addrevision(
2272 def _addrevision(
2273 self,
2273 self,
2274 node,
2274 node,
2275 rawtext,
2275 rawtext,
2276 transaction,
2276 transaction,
2277 link,
2277 link,
2278 p1,
2278 p1,
2279 p2,
2279 p2,
2280 flags,
2280 flags,
2281 cachedelta,
2281 cachedelta,
2282 alwayscache=False,
2282 alwayscache=False,
2283 deltacomputer=None,
2283 deltacomputer=None,
2284 sidedata=None,
2284 sidedata=None,
2285 ):
2285 ):
2286 """internal function to add revisions to the log
2286 """internal function to add revisions to the log
2287
2287
2288 see addrevision for argument descriptions.
2288 see addrevision for argument descriptions.
2289
2289
2290 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2290 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2291
2291
2292 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2292 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2293 be used.
2293 be used.
2294
2294
2295 invariants:
2295 invariants:
2296 - rawtext is optional (can be None); if not set, cachedelta must be set.
2296 - rawtext is optional (can be None); if not set, cachedelta must be set.
2297 if both are set, they must correspond to each other.
2297 if both are set, they must correspond to each other.
2298 """
2298 """
2299 if node == self.nullid:
2299 if node == self.nullid:
2300 raise error.RevlogError(
2300 raise error.RevlogError(
2301 _(b"%s: attempt to add null revision") % self.display_id
2301 _(b"%s: attempt to add null revision") % self.display_id
2302 )
2302 )
2303 if (
2303 if (
2304 node == self.nodeconstants.wdirid
2304 node == self.nodeconstants.wdirid
2305 or node in self.nodeconstants.wdirfilenodeids
2305 or node in self.nodeconstants.wdirfilenodeids
2306 ):
2306 ):
2307 raise error.RevlogError(
2307 raise error.RevlogError(
2308 _(b"%s: attempt to add wdir revision") % self.display_id
2308 _(b"%s: attempt to add wdir revision") % self.display_id
2309 )
2309 )
2310 if self._writinghandles is None:
2310 if self._writinghandles is None:
2311 msg = b'adding revision outside `revlog._writing` context'
2311 msg = b'adding revision outside `revlog._writing` context'
2312 raise error.ProgrammingError(msg)
2312 raise error.ProgrammingError(msg)
2313
2313
2314 if self._inline:
2314 if self._inline:
2315 fh = self._writinghandles[0]
2315 fh = self._writinghandles[0]
2316 else:
2316 else:
2317 fh = self._writinghandles[1]
2317 fh = self._writinghandles[1]
2318
2318
2319 btext = [rawtext]
2319 btext = [rawtext]
2320
2320
2321 curr = len(self)
2321 curr = len(self)
2322 prev = curr - 1
2322 prev = curr - 1
2323
2323
2324 offset = self._get_data_offset(prev)
2324 offset = self._get_data_offset(prev)
2325
2325
2326 if self._concurrencychecker:
2326 if self._concurrencychecker:
2327 ifh, dfh = self._writinghandles
2327 ifh, dfh = self._writinghandles
2328 if self._inline:
2328 if self._inline:
2329 # offset is "as if" it were in the .d file, so we need to add on
2329 # offset is "as if" it were in the .d file, so we need to add on
2330 # the size of the entry metadata.
2330 # the size of the entry metadata.
2331 self._concurrencychecker(
2331 self._concurrencychecker(
2332 ifh, self._indexfile, offset + curr * self.index.entry_size
2332 ifh, self._indexfile, offset + curr * self.index.entry_size
2333 )
2333 )
2334 else:
2334 else:
2335 # Entries in the .i are a consistent size.
2335 # Entries in the .i are a consistent size.
2336 self._concurrencychecker(
2336 self._concurrencychecker(
2337 ifh, self._indexfile, curr * self.index.entry_size
2337 ifh, self._indexfile, curr * self.index.entry_size
2338 )
2338 )
2339 self._concurrencychecker(dfh, self._datafile, offset)
2339 self._concurrencychecker(dfh, self._datafile, offset)
2340
2340
2341 p1r, p2r = self.rev(p1), self.rev(p2)
2341 p1r, p2r = self.rev(p1), self.rev(p2)
2342
2342
2343 # full versions are inserted when the needed deltas
2343 # full versions are inserted when the needed deltas
2344 # become comparable to the uncompressed text
2344 # become comparable to the uncompressed text
2345 if rawtext is None:
2345 if rawtext is None:
2346 # need rawtext size, before changed by flag processors, which is
2346 # need rawtext size, before changed by flag processors, which is
2347 # the non-raw size. use revlog explicitly to avoid filelog's extra
2347 # the non-raw size. use revlog explicitly to avoid filelog's extra
2348 # logic that might remove metadata size.
2348 # logic that might remove metadata size.
2349 textlen = mdiff.patchedsize(
2349 textlen = mdiff.patchedsize(
2350 revlog.size(self, cachedelta[0]), cachedelta[1]
2350 revlog.size(self, cachedelta[0]), cachedelta[1]
2351 )
2351 )
2352 else:
2352 else:
2353 textlen = len(rawtext)
2353 textlen = len(rawtext)
2354
2354
2355 if deltacomputer is None:
2355 if deltacomputer is None:
2356 deltacomputer = deltautil.deltacomputer(self)
2356 deltacomputer = deltautil.deltacomputer(self)
2357
2357
2358 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2358 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2359
2359
2360 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2360 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2361
2361
2362 if sidedata and self.hassidedata:
2362 if sidedata and self.hassidedata:
2363 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2363 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2364 sidedata_offset = offset + deltainfo.deltalen
2364 sidedata_offset = offset + deltainfo.deltalen
2365 else:
2365 else:
2366 serialized_sidedata = b""
2366 serialized_sidedata = b""
2367 # Don't store the offset if the sidedata is empty, that way
2367 # Don't store the offset if the sidedata is empty, that way
2368 # we can easily detect empty sidedata and they will be no different
2368 # we can easily detect empty sidedata and they will be no different
2369 # than ones we manually add.
2369 # than ones we manually add.
2370 sidedata_offset = 0
2370 sidedata_offset = 0
2371
2371
2372 e = (
2372 e = (
2373 offset_type(offset, flags),
2373 offset_type(offset, flags),
2374 deltainfo.deltalen,
2374 deltainfo.deltalen,
2375 textlen,
2375 textlen,
2376 deltainfo.base,
2376 deltainfo.base,
2377 link,
2377 link,
2378 p1r,
2378 p1r,
2379 p2r,
2379 p2r,
2380 node,
2380 node,
2381 sidedata_offset,
2381 sidedata_offset,
2382 len(serialized_sidedata),
2382 len(serialized_sidedata),
2383 )
2383 )
2384
2384
2385 self.index.append(e)
2385 self.index.append(e)
2386 entry = self.index.entry_binary(curr)
2386 entry = self.index.entry_binary(curr)
2387 if curr == 0:
2387 if curr == 0:
2388 header = self._format_flags | self._format_version
2388 header = self._format_flags | self._format_version
2389 header = self.index.pack_header(header)
2389 header = self.index.pack_header(header)
2390 entry = header + entry
2390 entry = header + entry
2391 self._writeentry(
2391 self._writeentry(
2392 transaction,
2392 transaction,
2393 entry,
2393 entry,
2394 deltainfo.data,
2394 deltainfo.data,
2395 link,
2395 link,
2396 offset,
2396 offset,
2397 serialized_sidedata,
2397 serialized_sidedata,
2398 )
2398 )
2399
2399
2400 rawtext = btext[0]
2400 rawtext = btext[0]
2401
2401
2402 if alwayscache and rawtext is None:
2402 if alwayscache and rawtext is None:
2403 rawtext = deltacomputer.buildtext(revinfo, fh)
2403 rawtext = deltacomputer.buildtext(revinfo, fh)
2404
2404
2405 if type(rawtext) == bytes: # only accept immutable objects
2405 if type(rawtext) == bytes: # only accept immutable objects
2406 self._revisioncache = (node, curr, rawtext)
2406 self._revisioncache = (node, curr, rawtext)
2407 self._chainbasecache[curr] = deltainfo.chainbase
2407 self._chainbasecache[curr] = deltainfo.chainbase
2408 return curr
2408 return curr
2409
2409
2410 def _get_data_offset(self, prev):
2410 def _get_data_offset(self, prev):
2411 """Returns the current offset in the (in-transaction) data file.
2411 """Returns the current offset in the (in-transaction) data file.
2412 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2412 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2413 file to store that information: since sidedata can be rewritten to the
2413 file to store that information: since sidedata can be rewritten to the
2414 end of the data file within a transaction, you can have cases where, for
2414 end of the data file within a transaction, you can have cases where, for
2415 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2415 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2416 to `n - 1`'s sidedata being written after `n`'s data.
2416 to `n - 1`'s sidedata being written after `n`'s data.
2417
2417
2418 TODO cache this in a docket file before getting out of experimental."""
2418 TODO cache this in a docket file before getting out of experimental."""
2419 if self._format_version != REVLOGV2:
2419 if self._format_version != REVLOGV2:
2420 return self.end(prev)
2420 return self.end(prev)
2421
2421
2422 offset = 0
2422 offset = 0
2423 for rev, entry in enumerate(self.index):
2423 for rev, entry in enumerate(self.index):
2424 sidedata_end = entry[8] + entry[9]
2424 sidedata_end = entry[8] + entry[9]
2425 # Sidedata for a previous rev has potentially been written after
2425 # Sidedata for a previous rev has potentially been written after
2426 # this rev's end, so take the max.
2426 # this rev's end, so take the max.
2427 offset = max(self.end(rev), offset, sidedata_end)
2427 offset = max(self.end(rev), offset, sidedata_end)
2428 return offset
2428 return offset
2429
2429
2430 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2430 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2431 # Files opened in a+ mode have inconsistent behavior on various
2431 # Files opened in a+ mode have inconsistent behavior on various
2432 # platforms. Windows requires that a file positioning call be made
2432 # platforms. Windows requires that a file positioning call be made
2433 # when the file handle transitions between reads and writes. See
2433 # when the file handle transitions between reads and writes. See
2434 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2434 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2435 # platforms, Python or the platform itself can be buggy. Some versions
2435 # platforms, Python or the platform itself can be buggy. Some versions
2436 # of Solaris have been observed to not append at the end of the file
2436 # of Solaris have been observed to not append at the end of the file
2437 # if the file was seeked to before the end. See issue4943 for more.
2437 # if the file was seeked to before the end. See issue4943 for more.
2438 #
2438 #
2439 # We work around this issue by inserting a seek() before writing.
2439 # We work around this issue by inserting a seek() before writing.
2440 # Note: This is likely not necessary on Python 3. However, because
2440 # Note: This is likely not necessary on Python 3. However, because
2441 # the file handle is reused for reads and may be seeked there, we need
2441 # the file handle is reused for reads and may be seeked there, we need
2442 # to be careful before changing this.
2442 # to be careful before changing this.
2443 if self._writinghandles is None:
2443 if self._writinghandles is None:
2444 msg = b'adding revision outside `revlog._writing` context'
2444 msg = b'adding revision outside `revlog._writing` context'
2445 raise error.ProgrammingError(msg)
2445 raise error.ProgrammingError(msg)
2446 ifh, dfh = self._writinghandles
2446 ifh, dfh = self._writinghandles
2447 ifh.seek(0, os.SEEK_END)
2447 ifh.seek(0, os.SEEK_END)
2448 if dfh:
2448 if dfh:
2449 dfh.seek(0, os.SEEK_END)
2449 dfh.seek(0, os.SEEK_END)
2450
2450
2451 curr = len(self) - 1
2451 curr = len(self) - 1
2452 if not self._inline:
2452 if not self._inline:
2453 transaction.add(self._datafile, offset)
2453 transaction.add(self._datafile, offset)
2454 transaction.add(self._indexfile, curr * len(entry))
2454 transaction.add(self._indexfile, curr * len(entry))
2455 if data[0]:
2455 if data[0]:
2456 dfh.write(data[0])
2456 dfh.write(data[0])
2457 dfh.write(data[1])
2457 dfh.write(data[1])
2458 if sidedata:
2458 if sidedata:
2459 dfh.write(sidedata)
2459 dfh.write(sidedata)
2460 ifh.write(entry)
2460 ifh.write(entry)
2461 else:
2461 else:
2462 offset += curr * self.index.entry_size
2462 offset += curr * self.index.entry_size
2463 transaction.add(self._indexfile, offset)
2463 transaction.add(self._indexfile, offset)
2464 ifh.write(entry)
2464 ifh.write(entry)
2465 ifh.write(data[0])
2465 ifh.write(data[0])
2466 ifh.write(data[1])
2466 ifh.write(data[1])
2467 if sidedata:
2467 if sidedata:
2468 ifh.write(sidedata)
2468 ifh.write(sidedata)
2469 self._enforceinlinesize(transaction)
2469 self._enforceinlinesize(transaction)
2470 nodemaputil.setup_persistent_nodemap(transaction, self)
2470 nodemaputil.setup_persistent_nodemap(transaction, self)
2471
2471
2472 def addgroup(
2472 def addgroup(
2473 self,
2473 self,
2474 deltas,
2474 deltas,
2475 linkmapper,
2475 linkmapper,
2476 transaction,
2476 transaction,
2477 alwayscache=False,
2477 alwayscache=False,
2478 addrevisioncb=None,
2478 addrevisioncb=None,
2479 duplicaterevisioncb=None,
2479 duplicaterevisioncb=None,
2480 ):
2480 ):
2481 """
2481 """
2482 add a delta group
2482 add a delta group
2483
2483
2484 given a set of deltas, add them to the revision log. the
2484 given a set of deltas, add them to the revision log. the
2485 first delta is against its parent, which should be in our
2485 first delta is against its parent, which should be in our
2486 log, the rest are against the previous delta.
2486 log, the rest are against the previous delta.
2487
2487
2488 If ``addrevisioncb`` is defined, it will be called with arguments of
2488 If ``addrevisioncb`` is defined, it will be called with arguments of
2489 this revlog and the node that was added.
2489 this revlog and the node that was added.
2490 """
2490 """
2491
2491
2492 if self._adding_group:
2492 if self._adding_group:
2493 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2493 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2494
2494
2495 self._adding_group = True
2495 self._adding_group = True
2496 empty = True
2496 empty = True
2497 try:
2497 try:
2498 with self._writing(transaction):
2498 with self._writing(transaction):
2499 deltacomputer = deltautil.deltacomputer(self)
2499 deltacomputer = deltautil.deltacomputer(self)
2500 # loop through our set of deltas
2500 # loop through our set of deltas
2501 for data in deltas:
2501 for data in deltas:
2502 (
2502 (
2503 node,
2503 node,
2504 p1,
2504 p1,
2505 p2,
2505 p2,
2506 linknode,
2506 linknode,
2507 deltabase,
2507 deltabase,
2508 delta,
2508 delta,
2509 flags,
2509 flags,
2510 sidedata,
2510 sidedata,
2511 ) = data
2511 ) = data
2512 link = linkmapper(linknode)
2512 link = linkmapper(linknode)
2513 flags = flags or REVIDX_DEFAULT_FLAGS
2513 flags = flags or REVIDX_DEFAULT_FLAGS
2514
2514
2515 rev = self.index.get_rev(node)
2515 rev = self.index.get_rev(node)
2516 if rev is not None:
2516 if rev is not None:
2517 # this can happen if two branches make the same change
2517 # this can happen if two branches make the same change
2518 self._nodeduplicatecallback(transaction, rev)
2518 self._nodeduplicatecallback(transaction, rev)
2519 if duplicaterevisioncb:
2519 if duplicaterevisioncb:
2520 duplicaterevisioncb(self, rev)
2520 duplicaterevisioncb(self, rev)
2521 empty = False
2521 empty = False
2522 continue
2522 continue
2523
2523
2524 for p in (p1, p2):
2524 for p in (p1, p2):
2525 if not self.index.has_node(p):
2525 if not self.index.has_node(p):
2526 raise error.LookupError(
2526 raise error.LookupError(
2527 p, self.radix, _(b'unknown parent')
2527 p, self.radix, _(b'unknown parent')
2528 )
2528 )
2529
2529
2530 if not self.index.has_node(deltabase):
2530 if not self.index.has_node(deltabase):
2531 raise error.LookupError(
2531 raise error.LookupError(
2532 deltabase, self.display_id, _(b'unknown delta base')
2532 deltabase, self.display_id, _(b'unknown delta base')
2533 )
2533 )
2534
2534
2535 baserev = self.rev(deltabase)
2535 baserev = self.rev(deltabase)
2536
2536
2537 if baserev != nullrev and self.iscensored(baserev):
2537 if baserev != nullrev and self.iscensored(baserev):
2538 # if base is censored, delta must be full replacement in a
2538 # if base is censored, delta must be full replacement in a
2539 # single patch operation
2539 # single patch operation
2540 hlen = struct.calcsize(b">lll")
2540 hlen = struct.calcsize(b">lll")
2541 oldlen = self.rawsize(baserev)
2541 oldlen = self.rawsize(baserev)
2542 newlen = len(delta) - hlen
2542 newlen = len(delta) - hlen
2543 if delta[:hlen] != mdiff.replacediffheader(
2543 if delta[:hlen] != mdiff.replacediffheader(
2544 oldlen, newlen
2544 oldlen, newlen
2545 ):
2545 ):
2546 raise error.CensoredBaseError(
2546 raise error.CensoredBaseError(
2547 self.display_id, self.node(baserev)
2547 self.display_id, self.node(baserev)
2548 )
2548 )
2549
2549
2550 if not flags and self._peek_iscensored(baserev, delta):
2550 if not flags and self._peek_iscensored(baserev, delta):
2551 flags |= REVIDX_ISCENSORED
2551 flags |= REVIDX_ISCENSORED
2552
2552
2553 # We assume consumers of addrevisioncb will want to retrieve
2553 # We assume consumers of addrevisioncb will want to retrieve
2554 # the added revision, which will require a call to
2554 # the added revision, which will require a call to
2555 # revision(). revision() will fast path if there is a cache
2555 # revision(). revision() will fast path if there is a cache
2556 # hit. So, we tell _addrevision() to always cache in this case.
2556 # hit. So, we tell _addrevision() to always cache in this case.
2557 # We're only using addgroup() in the context of changegroup
2557 # We're only using addgroup() in the context of changegroup
2558 # generation so the revision data can always be handled as raw
2558 # generation so the revision data can always be handled as raw
2559 # by the flagprocessor.
2559 # by the flagprocessor.
2560 rev = self._addrevision(
2560 rev = self._addrevision(
2561 node,
2561 node,
2562 None,
2562 None,
2563 transaction,
2563 transaction,
2564 link,
2564 link,
2565 p1,
2565 p1,
2566 p2,
2566 p2,
2567 flags,
2567 flags,
2568 (baserev, delta),
2568 (baserev, delta),
2569 alwayscache=alwayscache,
2569 alwayscache=alwayscache,
2570 deltacomputer=deltacomputer,
2570 deltacomputer=deltacomputer,
2571 sidedata=sidedata,
2571 sidedata=sidedata,
2572 )
2572 )
2573
2573
2574 if addrevisioncb:
2574 if addrevisioncb:
2575 addrevisioncb(self, rev)
2575 addrevisioncb(self, rev)
2576 empty = False
2576 empty = False
2577 finally:
2577 finally:
2578 self._adding_group = False
2578 self._adding_group = False
2579 return not empty
2579 return not empty
2580
2580
2581 def iscensored(self, rev):
2581 def iscensored(self, rev):
2582 """Check if a file revision is censored."""
2582 """Check if a file revision is censored."""
2583 if not self._censorable:
2583 if not self._censorable:
2584 return False
2584 return False
2585
2585
2586 return self.flags(rev) & REVIDX_ISCENSORED
2586 return self.flags(rev) & REVIDX_ISCENSORED
2587
2587
2588 def _peek_iscensored(self, baserev, delta):
2588 def _peek_iscensored(self, baserev, delta):
2589 """Quickly check if a delta produces a censored revision."""
2589 """Quickly check if a delta produces a censored revision."""
2590 if not self._censorable:
2590 if not self._censorable:
2591 return False
2591 return False
2592
2592
2593 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2593 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2594
2594
2595 def getstrippoint(self, minlink):
2595 def getstrippoint(self, minlink):
2596 """find the minimum rev that must be stripped to strip the linkrev
2596 """find the minimum rev that must be stripped to strip the linkrev
2597
2597
2598 Returns a tuple containing the minimum rev and a set of all revs that
2598 Returns a tuple containing the minimum rev and a set of all revs that
2599 have linkrevs that will be broken by this strip.
2599 have linkrevs that will be broken by this strip.
2600 """
2600 """
2601 return storageutil.resolvestripinfo(
2601 return storageutil.resolvestripinfo(
2602 minlink,
2602 minlink,
2603 len(self) - 1,
2603 len(self) - 1,
2604 self.headrevs(),
2604 self.headrevs(),
2605 self.linkrev,
2605 self.linkrev,
2606 self.parentrevs,
2606 self.parentrevs,
2607 )
2607 )
2608
2608
2609 def strip(self, minlink, transaction):
2609 def strip(self, minlink, transaction):
2610 """truncate the revlog on the first revision with a linkrev >= minlink
2610 """truncate the revlog on the first revision with a linkrev >= minlink
2611
2611
2612 This function is called when we're stripping revision minlink and
2612 This function is called when we're stripping revision minlink and
2613 its descendants from the repository.
2613 its descendants from the repository.
2614
2614
2615 We have to remove all revisions with linkrev >= minlink, because
2615 We have to remove all revisions with linkrev >= minlink, because
2616 the equivalent changelog revisions will be renumbered after the
2616 the equivalent changelog revisions will be renumbered after the
2617 strip.
2617 strip.
2618
2618
2619 So we truncate the revlog on the first of these revisions, and
2619 So we truncate the revlog on the first of these revisions, and
2620 trust that the caller has saved the revisions that shouldn't be
2620 trust that the caller has saved the revisions that shouldn't be
2621 removed and that it'll re-add them after this truncation.
2621 removed and that it'll re-add them after this truncation.
2622 """
2622 """
2623 if len(self) == 0:
2623 if len(self) == 0:
2624 return
2624 return
2625
2625
2626 rev, _ = self.getstrippoint(minlink)
2626 rev, _ = self.getstrippoint(minlink)
2627 if rev == len(self):
2627 if rev == len(self):
2628 return
2628 return
2629
2629
2630 # first truncate the files on disk
2630 # first truncate the files on disk
2631 end = self.start(rev)
2631 end = self.start(rev)
2632 if not self._inline:
2632 if not self._inline:
2633 transaction.add(self._datafile, end)
2633 transaction.add(self._datafile, end)
2634 end = rev * self.index.entry_size
2634 end = rev * self.index.entry_size
2635 else:
2635 else:
2636 end += rev * self.index.entry_size
2636 end += rev * self.index.entry_size
2637
2637
2638 transaction.add(self._indexfile, end)
2638 transaction.add(self._indexfile, end)
2639
2639
2640 # then reset internal state in memory to forget those revisions
2640 # then reset internal state in memory to forget those revisions
2641 self._revisioncache = None
2641 self._revisioncache = None
2642 self._chaininfocache = util.lrucachedict(500)
2642 self._chaininfocache = util.lrucachedict(500)
2643 self._chunkclear()
2643 self._chunkclear()
2644
2644
2645 del self.index[rev:-1]
2645 del self.index[rev:-1]
2646
2646
2647 def checksize(self):
2647 def checksize(self):
2648 """Check size of index and data files
2648 """Check size of index and data files
2649
2649
2650 return a (dd, di) tuple.
2650 return a (dd, di) tuple.
2651 - dd: extra bytes for the "data" file
2651 - dd: extra bytes for the "data" file
2652 - di: extra bytes for the "index" file
2652 - di: extra bytes for the "index" file
2653
2653
2654 A healthy revlog will return (0, 0).
2654 A healthy revlog will return (0, 0).
2655 """
2655 """
2656 expected = 0
2656 expected = 0
2657 if len(self):
2657 if len(self):
2658 expected = max(0, self.end(len(self) - 1))
2658 expected = max(0, self.end(len(self) - 1))
2659
2659
2660 try:
2660 try:
2661 with self._datafp() as f:
2661 with self._datafp() as f:
2662 f.seek(0, io.SEEK_END)
2662 f.seek(0, io.SEEK_END)
2663 actual = f.tell()
2663 actual = f.tell()
2664 dd = actual - expected
2664 dd = actual - expected
2665 except IOError as inst:
2665 except IOError as inst:
2666 if inst.errno != errno.ENOENT:
2666 if inst.errno != errno.ENOENT:
2667 raise
2667 raise
2668 dd = 0
2668 dd = 0
2669
2669
2670 try:
2670 try:
2671 f = self.opener(self._indexfile)
2671 f = self.opener(self._indexfile)
2672 f.seek(0, io.SEEK_END)
2672 f.seek(0, io.SEEK_END)
2673 actual = f.tell()
2673 actual = f.tell()
2674 f.close()
2674 f.close()
2675 s = self.index.entry_size
2675 s = self.index.entry_size
2676 i = max(0, actual // s)
2676 i = max(0, actual // s)
2677 di = actual - (i * s)
2677 di = actual - (i * s)
2678 if self._inline:
2678 if self._inline:
2679 databytes = 0
2679 databytes = 0
2680 for r in self:
2680 for r in self:
2681 databytes += max(0, self.length(r))
2681 databytes += max(0, self.length(r))
2682 dd = 0
2682 dd = 0
2683 di = actual - len(self) * s - databytes
2683 di = actual - len(self) * s - databytes
2684 except IOError as inst:
2684 except IOError as inst:
2685 if inst.errno != errno.ENOENT:
2685 if inst.errno != errno.ENOENT:
2686 raise
2686 raise
2687 di = 0
2687 di = 0
2688
2688
2689 return (dd, di)
2689 return (dd, di)
2690
2690
2691 def files(self):
2691 def files(self):
2692 res = [self._indexfile]
2692 res = [self._indexfile]
2693 if not self._inline:
2693 if not self._inline:
2694 res.append(self._datafile)
2694 res.append(self._datafile)
2695 return res
2695 return res
2696
2696
2697 def emitrevisions(
2697 def emitrevisions(
2698 self,
2698 self,
2699 nodes,
2699 nodes,
2700 nodesorder=None,
2700 nodesorder=None,
2701 revisiondata=False,
2701 revisiondata=False,
2702 assumehaveparentrevisions=False,
2702 assumehaveparentrevisions=False,
2703 deltamode=repository.CG_DELTAMODE_STD,
2703 deltamode=repository.CG_DELTAMODE_STD,
2704 sidedata_helpers=None,
2704 sidedata_helpers=None,
2705 ):
2705 ):
2706 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2706 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2707 raise error.ProgrammingError(
2707 raise error.ProgrammingError(
2708 b'unhandled value for nodesorder: %s' % nodesorder
2708 b'unhandled value for nodesorder: %s' % nodesorder
2709 )
2709 )
2710
2710
2711 if nodesorder is None and not self._generaldelta:
2711 if nodesorder is None and not self._generaldelta:
2712 nodesorder = b'storage'
2712 nodesorder = b'storage'
2713
2713
2714 if (
2714 if (
2715 not self._storedeltachains
2715 not self._storedeltachains
2716 and deltamode != repository.CG_DELTAMODE_PREV
2716 and deltamode != repository.CG_DELTAMODE_PREV
2717 ):
2717 ):
2718 deltamode = repository.CG_DELTAMODE_FULL
2718 deltamode = repository.CG_DELTAMODE_FULL
2719
2719
2720 return storageutil.emitrevisions(
2720 return storageutil.emitrevisions(
2721 self,
2721 self,
2722 nodes,
2722 nodes,
2723 nodesorder,
2723 nodesorder,
2724 revlogrevisiondelta,
2724 revlogrevisiondelta,
2725 deltaparentfn=self.deltaparent,
2725 deltaparentfn=self.deltaparent,
2726 candeltafn=self.candelta,
2726 candeltafn=self.candelta,
2727 rawsizefn=self.rawsize,
2727 rawsizefn=self.rawsize,
2728 revdifffn=self.revdiff,
2728 revdifffn=self.revdiff,
2729 flagsfn=self.flags,
2729 flagsfn=self.flags,
2730 deltamode=deltamode,
2730 deltamode=deltamode,
2731 revisiondata=revisiondata,
2731 revisiondata=revisiondata,
2732 assumehaveparentrevisions=assumehaveparentrevisions,
2732 assumehaveparentrevisions=assumehaveparentrevisions,
2733 sidedata_helpers=sidedata_helpers,
2733 sidedata_helpers=sidedata_helpers,
2734 )
2734 )
2735
2735
2736 DELTAREUSEALWAYS = b'always'
2736 DELTAREUSEALWAYS = b'always'
2737 DELTAREUSESAMEREVS = b'samerevs'
2737 DELTAREUSESAMEREVS = b'samerevs'
2738 DELTAREUSENEVER = b'never'
2738 DELTAREUSENEVER = b'never'
2739
2739
2740 DELTAREUSEFULLADD = b'fulladd'
2740 DELTAREUSEFULLADD = b'fulladd'
2741
2741
2742 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2742 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2743
2743
2744 def clone(
2744 def clone(
2745 self,
2745 self,
2746 tr,
2746 tr,
2747 destrevlog,
2747 destrevlog,
2748 addrevisioncb=None,
2748 addrevisioncb=None,
2749 deltareuse=DELTAREUSESAMEREVS,
2749 deltareuse=DELTAREUSESAMEREVS,
2750 forcedeltabothparents=None,
2750 forcedeltabothparents=None,
2751 sidedata_helpers=None,
2751 sidedata_helpers=None,
2752 ):
2752 ):
2753 """Copy this revlog to another, possibly with format changes.
2753 """Copy this revlog to another, possibly with format changes.
2754
2754
2755 The destination revlog will contain the same revisions and nodes.
2755 The destination revlog will contain the same revisions and nodes.
2756 However, it may not be bit-for-bit identical due to e.g. delta encoding
2756 However, it may not be bit-for-bit identical due to e.g. delta encoding
2757 differences.
2757 differences.
2758
2758
2759 The ``deltareuse`` argument control how deltas from the existing revlog
2759 The ``deltareuse`` argument control how deltas from the existing revlog
2760 are preserved in the destination revlog. The argument can have the
2760 are preserved in the destination revlog. The argument can have the
2761 following values:
2761 following values:
2762
2762
2763 DELTAREUSEALWAYS
2763 DELTAREUSEALWAYS
2764 Deltas will always be reused (if possible), even if the destination
2764 Deltas will always be reused (if possible), even if the destination
2765 revlog would not select the same revisions for the delta. This is the
2765 revlog would not select the same revisions for the delta. This is the
2766 fastest mode of operation.
2766 fastest mode of operation.
2767 DELTAREUSESAMEREVS
2767 DELTAREUSESAMEREVS
2768 Deltas will be reused if the destination revlog would pick the same
2768 Deltas will be reused if the destination revlog would pick the same
2769 revisions for the delta. This mode strikes a balance between speed
2769 revisions for the delta. This mode strikes a balance between speed
2770 and optimization.
2770 and optimization.
2771 DELTAREUSENEVER
2771 DELTAREUSENEVER
2772 Deltas will never be reused. This is the slowest mode of execution.
2772 Deltas will never be reused. This is the slowest mode of execution.
2773 This mode can be used to recompute deltas (e.g. if the diff/delta
2773 This mode can be used to recompute deltas (e.g. if the diff/delta
2774 algorithm changes).
2774 algorithm changes).
2775 DELTAREUSEFULLADD
2775 DELTAREUSEFULLADD
2776 Revision will be re-added as if their were new content. This is
2776 Revision will be re-added as if their were new content. This is
2777 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2777 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2778 eg: large file detection and handling.
2778 eg: large file detection and handling.
2779
2779
2780 Delta computation can be slow, so the choice of delta reuse policy can
2780 Delta computation can be slow, so the choice of delta reuse policy can
2781 significantly affect run time.
2781 significantly affect run time.
2782
2782
2783 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2783 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2784 two extremes. Deltas will be reused if they are appropriate. But if the
2784 two extremes. Deltas will be reused if they are appropriate. But if the
2785 delta could choose a better revision, it will do so. This means if you
2785 delta could choose a better revision, it will do so. This means if you
2786 are converting a non-generaldelta revlog to a generaldelta revlog,
2786 are converting a non-generaldelta revlog to a generaldelta revlog,
2787 deltas will be recomputed if the delta's parent isn't a parent of the
2787 deltas will be recomputed if the delta's parent isn't a parent of the
2788 revision.
2788 revision.
2789
2789
2790 In addition to the delta policy, the ``forcedeltabothparents``
2790 In addition to the delta policy, the ``forcedeltabothparents``
2791 argument controls whether to force compute deltas against both parents
2791 argument controls whether to force compute deltas against both parents
2792 for merges. By default, the current default is used.
2792 for merges. By default, the current default is used.
2793
2793
2794 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2794 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2795 `sidedata_helpers`.
2795 `sidedata_helpers`.
2796 """
2796 """
2797 if deltareuse not in self.DELTAREUSEALL:
2797 if deltareuse not in self.DELTAREUSEALL:
2798 raise ValueError(
2798 raise ValueError(
2799 _(b'value for deltareuse invalid: %s') % deltareuse
2799 _(b'value for deltareuse invalid: %s') % deltareuse
2800 )
2800 )
2801
2801
2802 if len(destrevlog):
2802 if len(destrevlog):
2803 raise ValueError(_(b'destination revlog is not empty'))
2803 raise ValueError(_(b'destination revlog is not empty'))
2804
2804
2805 if getattr(self, 'filteredrevs', None):
2805 if getattr(self, 'filteredrevs', None):
2806 raise ValueError(_(b'source revlog has filtered revisions'))
2806 raise ValueError(_(b'source revlog has filtered revisions'))
2807 if getattr(destrevlog, 'filteredrevs', None):
2807 if getattr(destrevlog, 'filteredrevs', None):
2808 raise ValueError(_(b'destination revlog has filtered revisions'))
2808 raise ValueError(_(b'destination revlog has filtered revisions'))
2809
2809
2810 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2810 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2811 # if possible.
2811 # if possible.
2812 oldlazydelta = destrevlog._lazydelta
2812 oldlazydelta = destrevlog._lazydelta
2813 oldlazydeltabase = destrevlog._lazydeltabase
2813 oldlazydeltabase = destrevlog._lazydeltabase
2814 oldamd = destrevlog._deltabothparents
2814 oldamd = destrevlog._deltabothparents
2815
2815
2816 try:
2816 try:
2817 if deltareuse == self.DELTAREUSEALWAYS:
2817 if deltareuse == self.DELTAREUSEALWAYS:
2818 destrevlog._lazydeltabase = True
2818 destrevlog._lazydeltabase = True
2819 destrevlog._lazydelta = True
2819 destrevlog._lazydelta = True
2820 elif deltareuse == self.DELTAREUSESAMEREVS:
2820 elif deltareuse == self.DELTAREUSESAMEREVS:
2821 destrevlog._lazydeltabase = False
2821 destrevlog._lazydeltabase = False
2822 destrevlog._lazydelta = True
2822 destrevlog._lazydelta = True
2823 elif deltareuse == self.DELTAREUSENEVER:
2823 elif deltareuse == self.DELTAREUSENEVER:
2824 destrevlog._lazydeltabase = False
2824 destrevlog._lazydeltabase = False
2825 destrevlog._lazydelta = False
2825 destrevlog._lazydelta = False
2826
2826
2827 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2827 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2828
2828
2829 self._clone(
2829 self._clone(
2830 tr,
2830 tr,
2831 destrevlog,
2831 destrevlog,
2832 addrevisioncb,
2832 addrevisioncb,
2833 deltareuse,
2833 deltareuse,
2834 forcedeltabothparents,
2834 forcedeltabothparents,
2835 sidedata_helpers,
2835 sidedata_helpers,
2836 )
2836 )
2837
2837
2838 finally:
2838 finally:
2839 destrevlog._lazydelta = oldlazydelta
2839 destrevlog._lazydelta = oldlazydelta
2840 destrevlog._lazydeltabase = oldlazydeltabase
2840 destrevlog._lazydeltabase = oldlazydeltabase
2841 destrevlog._deltabothparents = oldamd
2841 destrevlog._deltabothparents = oldamd
2842
2842
2843 def _clone(
2843 def _clone(
2844 self,
2844 self,
2845 tr,
2845 tr,
2846 destrevlog,
2846 destrevlog,
2847 addrevisioncb,
2847 addrevisioncb,
2848 deltareuse,
2848 deltareuse,
2849 forcedeltabothparents,
2849 forcedeltabothparents,
2850 sidedata_helpers,
2850 sidedata_helpers,
2851 ):
2851 ):
2852 """perform the core duty of `revlog.clone` after parameter processing"""
2852 """perform the core duty of `revlog.clone` after parameter processing"""
2853 deltacomputer = deltautil.deltacomputer(destrevlog)
2853 deltacomputer = deltautil.deltacomputer(destrevlog)
2854 index = self.index
2854 index = self.index
2855 for rev in self:
2855 for rev in self:
2856 entry = index[rev]
2856 entry = index[rev]
2857
2857
2858 # Some classes override linkrev to take filtered revs into
2858 # Some classes override linkrev to take filtered revs into
2859 # account. Use raw entry from index.
2859 # account. Use raw entry from index.
2860 flags = entry[0] & 0xFFFF
2860 flags = entry[0] & 0xFFFF
2861 linkrev = entry[4]
2861 linkrev = entry[4]
2862 p1 = index[entry[5]][7]
2862 p1 = index[entry[5]][7]
2863 p2 = index[entry[6]][7]
2863 p2 = index[entry[6]][7]
2864 node = entry[7]
2864 node = entry[7]
2865
2865
2866 # (Possibly) reuse the delta from the revlog if allowed and
2866 # (Possibly) reuse the delta from the revlog if allowed and
2867 # the revlog chunk is a delta.
2867 # the revlog chunk is a delta.
2868 cachedelta = None
2868 cachedelta = None
2869 rawtext = None
2869 rawtext = None
2870 if deltareuse == self.DELTAREUSEFULLADD:
2870 if deltareuse == self.DELTAREUSEFULLADD:
2871 text, sidedata = self._revisiondata(rev)
2871 text, sidedata = self._revisiondata(rev)
2872
2872
2873 if sidedata_helpers is not None:
2873 if sidedata_helpers is not None:
2874 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2874 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2875 self, sidedata_helpers, sidedata, rev
2875 self, sidedata_helpers, sidedata, rev
2876 )
2876 )
2877 flags = flags | new_flags[0] & ~new_flags[1]
2877 flags = flags | new_flags[0] & ~new_flags[1]
2878
2878
2879 destrevlog.addrevision(
2879 destrevlog.addrevision(
2880 text,
2880 text,
2881 tr,
2881 tr,
2882 linkrev,
2882 linkrev,
2883 p1,
2883 p1,
2884 p2,
2884 p2,
2885 cachedelta=cachedelta,
2885 cachedelta=cachedelta,
2886 node=node,
2886 node=node,
2887 flags=flags,
2887 flags=flags,
2888 deltacomputer=deltacomputer,
2888 deltacomputer=deltacomputer,
2889 sidedata=sidedata,
2889 sidedata=sidedata,
2890 )
2890 )
2891 else:
2891 else:
2892 if destrevlog._lazydelta:
2892 if destrevlog._lazydelta:
2893 dp = self.deltaparent(rev)
2893 dp = self.deltaparent(rev)
2894 if dp != nullrev:
2894 if dp != nullrev:
2895 cachedelta = (dp, bytes(self._chunk(rev)))
2895 cachedelta = (dp, bytes(self._chunk(rev)))
2896
2896
2897 sidedata = None
2897 sidedata = None
2898 if not cachedelta:
2898 if not cachedelta:
2899 rawtext, sidedata = self._revisiondata(rev)
2899 rawtext, sidedata = self._revisiondata(rev)
2900 if sidedata is None:
2900 if sidedata is None:
2901 sidedata = self.sidedata(rev)
2901 sidedata = self.sidedata(rev)
2902
2902
2903 if sidedata_helpers is not None:
2903 if sidedata_helpers is not None:
2904 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2904 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2905 self, sidedata_helpers, sidedata, rev
2905 self, sidedata_helpers, sidedata, rev
2906 )
2906 )
2907 flags = flags | new_flags[0] & ~new_flags[1]
2907 flags = flags | new_flags[0] & ~new_flags[1]
2908
2908
2909 with destrevlog._writing(tr):
2909 with destrevlog._writing(tr):
2910 destrevlog._addrevision(
2910 destrevlog._addrevision(
2911 node,
2911 node,
2912 rawtext,
2912 rawtext,
2913 tr,
2913 tr,
2914 linkrev,
2914 linkrev,
2915 p1,
2915 p1,
2916 p2,
2916 p2,
2917 flags,
2917 flags,
2918 cachedelta,
2918 cachedelta,
2919 deltacomputer=deltacomputer,
2919 deltacomputer=deltacomputer,
2920 sidedata=sidedata,
2920 sidedata=sidedata,
2921 )
2921 )
2922
2922
2923 if addrevisioncb:
2923 if addrevisioncb:
2924 addrevisioncb(self, rev, node)
2924 addrevisioncb(self, rev, node)
2925
2925
2926 def censorrevision(self, tr, censornode, tombstone=b''):
2926 def censorrevision(self, tr, censornode, tombstone=b''):
2927 if self._format_version == REVLOGV0:
2927 if self._format_version == REVLOGV0:
2928 raise error.RevlogError(
2928 raise error.RevlogError(
2929 _(b'cannot censor with version %d revlogs')
2929 _(b'cannot censor with version %d revlogs')
2930 % self._format_version
2930 % self._format_version
2931 )
2931 )
2932
2932
2933 censorrev = self.rev(censornode)
2933 censorrev = self.rev(censornode)
2934 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2934 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2935
2935
2936 if len(tombstone) > self.rawsize(censorrev):
2936 if len(tombstone) > self.rawsize(censorrev):
2937 raise error.Abort(
2937 raise error.Abort(
2938 _(b'censor tombstone must be no longer than censored data')
2938 _(b'censor tombstone must be no longer than censored data')
2939 )
2939 )
2940
2940
2941 # Rewriting the revlog in place is hard. Our strategy for censoring is
2941 # Rewriting the revlog in place is hard. Our strategy for censoring is
2942 # to create a new revlog, copy all revisions to it, then replace the
2942 # to create a new revlog, copy all revisions to it, then replace the
2943 # revlogs on transaction close.
2943 # revlogs on transaction close.
2944 #
2944 #
2945 # This is a bit dangerous. We could easily have a mismatch of state.
2945 # This is a bit dangerous. We could easily have a mismatch of state.
2946 newrl = revlog(
2946 newrl = revlog(
2947 self.opener,
2947 self.opener,
2948 target=self.target,
2948 target=self.target,
2949 radix=self.radix,
2949 radix=self.radix,
2950 postfix=b'tmpcensored',
2950 postfix=b'tmpcensored',
2951 censorable=True,
2951 censorable=True,
2952 )
2952 )
2953 newrl._format_version = self._format_version
2953 newrl._format_version = self._format_version
2954 newrl._format_flags = self._format_flags
2954 newrl._format_flags = self._format_flags
2955 newrl._generaldelta = self._generaldelta
2955 newrl._generaldelta = self._generaldelta
2956 newrl._parse_index = self._parse_index
2956 newrl._parse_index = self._parse_index
2957
2957
2958 for rev in self.revs():
2958 for rev in self.revs():
2959 node = self.node(rev)
2959 node = self.node(rev)
2960 p1, p2 = self.parents(node)
2960 p1, p2 = self.parents(node)
2961
2961
2962 if rev == censorrev:
2962 if rev == censorrev:
2963 newrl.addrawrevision(
2963 newrl.addrawrevision(
2964 tombstone,
2964 tombstone,
2965 tr,
2965 tr,
2966 self.linkrev(censorrev),
2966 self.linkrev(censorrev),
2967 p1,
2967 p1,
2968 p2,
2968 p2,
2969 censornode,
2969 censornode,
2970 REVIDX_ISCENSORED,
2970 REVIDX_ISCENSORED,
2971 )
2971 )
2972
2972
2973 if newrl.deltaparent(rev) != nullrev:
2973 if newrl.deltaparent(rev) != nullrev:
2974 raise error.Abort(
2974 raise error.Abort(
2975 _(
2975 _(
2976 b'censored revision stored as delta; '
2976 b'censored revision stored as delta; '
2977 b'cannot censor'
2977 b'cannot censor'
2978 ),
2978 ),
2979 hint=_(
2979 hint=_(
2980 b'censoring of revlogs is not '
2980 b'censoring of revlogs is not '
2981 b'fully implemented; please report '
2981 b'fully implemented; please report '
2982 b'this bug'
2982 b'this bug'
2983 ),
2983 ),
2984 )
2984 )
2985 continue
2985 continue
2986
2986
2987 if self.iscensored(rev):
2987 if self.iscensored(rev):
2988 if self.deltaparent(rev) != nullrev:
2988 if self.deltaparent(rev) != nullrev:
2989 raise error.Abort(
2989 raise error.Abort(
2990 _(
2990 _(
2991 b'cannot censor due to censored '
2991 b'cannot censor due to censored '
2992 b'revision having delta stored'
2992 b'revision having delta stored'
2993 )
2993 )
2994 )
2994 )
2995 rawtext = self._chunk(rev)
2995 rawtext = self._chunk(rev)
2996 else:
2996 else:
2997 rawtext = self.rawdata(rev)
2997 rawtext = self.rawdata(rev)
2998
2998
2999 newrl.addrawrevision(
2999 newrl.addrawrevision(
3000 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3000 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3001 )
3001 )
3002
3002
3003 tr.addbackup(self._indexfile, location=b'store')
3003 tr.addbackup(self._indexfile, location=b'store')
3004 if not self._inline:
3004 if not self._inline:
3005 tr.addbackup(self._datafile, location=b'store')
3005 tr.addbackup(self._datafile, location=b'store')
3006
3006
3007 self.opener.rename(newrl._indexfile, self._indexfile)
3007 self.opener.rename(newrl._indexfile, self._indexfile)
3008 if not self._inline:
3008 if not self._inline:
3009 self.opener.rename(newrl._datafile, self._datafile)
3009 self.opener.rename(newrl._datafile, self._datafile)
3010
3010
3011 self.clearcaches()
3011 self.clearcaches()
3012 self._loadindex()
3012 self._loadindex()
3013
3013
3014 def verifyintegrity(self, state):
3014 def verifyintegrity(self, state):
3015 """Verifies the integrity of the revlog.
3015 """Verifies the integrity of the revlog.
3016
3016
3017 Yields ``revlogproblem`` instances describing problems that are
3017 Yields ``revlogproblem`` instances describing problems that are
3018 found.
3018 found.
3019 """
3019 """
3020 dd, di = self.checksize()
3020 dd, di = self.checksize()
3021 if dd:
3021 if dd:
3022 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3022 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3023 if di:
3023 if di:
3024 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3024 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3025
3025
3026 version = self._format_version
3026 version = self._format_version
3027
3027
3028 # The verifier tells us what version revlog we should be.
3028 # The verifier tells us what version revlog we should be.
3029 if version != state[b'expectedversion']:
3029 if version != state[b'expectedversion']:
3030 yield revlogproblem(
3030 yield revlogproblem(
3031 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3031 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3032 % (self.display_id, version, state[b'expectedversion'])
3032 % (self.display_id, version, state[b'expectedversion'])
3033 )
3033 )
3034
3034
3035 state[b'skipread'] = set()
3035 state[b'skipread'] = set()
3036 state[b'safe_renamed'] = set()
3036 state[b'safe_renamed'] = set()
3037
3037
3038 for rev in self:
3038 for rev in self:
3039 node = self.node(rev)
3039 node = self.node(rev)
3040
3040
3041 # Verify contents. 4 cases to care about:
3041 # Verify contents. 4 cases to care about:
3042 #
3042 #
3043 # common: the most common case
3043 # common: the most common case
3044 # rename: with a rename
3044 # rename: with a rename
3045 # meta: file content starts with b'\1\n', the metadata
3045 # meta: file content starts with b'\1\n', the metadata
3046 # header defined in filelog.py, but without a rename
3046 # header defined in filelog.py, but without a rename
3047 # ext: content stored externally
3047 # ext: content stored externally
3048 #
3048 #
3049 # More formally, their differences are shown below:
3049 # More formally, their differences are shown below:
3050 #
3050 #
3051 # | common | rename | meta | ext
3051 # | common | rename | meta | ext
3052 # -------------------------------------------------------
3052 # -------------------------------------------------------
3053 # flags() | 0 | 0 | 0 | not 0
3053 # flags() | 0 | 0 | 0 | not 0
3054 # renamed() | False | True | False | ?
3054 # renamed() | False | True | False | ?
3055 # rawtext[0:2]=='\1\n'| False | True | True | ?
3055 # rawtext[0:2]=='\1\n'| False | True | True | ?
3056 #
3056 #
3057 # "rawtext" means the raw text stored in revlog data, which
3057 # "rawtext" means the raw text stored in revlog data, which
3058 # could be retrieved by "rawdata(rev)". "text"
3058 # could be retrieved by "rawdata(rev)". "text"
3059 # mentioned below is "revision(rev)".
3059 # mentioned below is "revision(rev)".
3060 #
3060 #
3061 # There are 3 different lengths stored physically:
3061 # There are 3 different lengths stored physically:
3062 # 1. L1: rawsize, stored in revlog index
3062 # 1. L1: rawsize, stored in revlog index
3063 # 2. L2: len(rawtext), stored in revlog data
3063 # 2. L2: len(rawtext), stored in revlog data
3064 # 3. L3: len(text), stored in revlog data if flags==0, or
3064 # 3. L3: len(text), stored in revlog data if flags==0, or
3065 # possibly somewhere else if flags!=0
3065 # possibly somewhere else if flags!=0
3066 #
3066 #
3067 # L1 should be equal to L2. L3 could be different from them.
3067 # L1 should be equal to L2. L3 could be different from them.
3068 # "text" may or may not affect commit hash depending on flag
3068 # "text" may or may not affect commit hash depending on flag
3069 # processors (see flagutil.addflagprocessor).
3069 # processors (see flagutil.addflagprocessor).
3070 #
3070 #
3071 # | common | rename | meta | ext
3071 # | common | rename | meta | ext
3072 # -------------------------------------------------
3072 # -------------------------------------------------
3073 # rawsize() | L1 | L1 | L1 | L1
3073 # rawsize() | L1 | L1 | L1 | L1
3074 # size() | L1 | L2-LM | L1(*) | L1 (?)
3074 # size() | L1 | L2-LM | L1(*) | L1 (?)
3075 # len(rawtext) | L2 | L2 | L2 | L2
3075 # len(rawtext) | L2 | L2 | L2 | L2
3076 # len(text) | L2 | L2 | L2 | L3
3076 # len(text) | L2 | L2 | L2 | L3
3077 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3077 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3078 #
3078 #
3079 # LM: length of metadata, depending on rawtext
3079 # LM: length of metadata, depending on rawtext
3080 # (*): not ideal, see comment in filelog.size
3080 # (*): not ideal, see comment in filelog.size
3081 # (?): could be "- len(meta)" if the resolved content has
3081 # (?): could be "- len(meta)" if the resolved content has
3082 # rename metadata
3082 # rename metadata
3083 #
3083 #
3084 # Checks needed to be done:
3084 # Checks needed to be done:
3085 # 1. length check: L1 == L2, in all cases.
3085 # 1. length check: L1 == L2, in all cases.
3086 # 2. hash check: depending on flag processor, we may need to
3086 # 2. hash check: depending on flag processor, we may need to
3087 # use either "text" (external), or "rawtext" (in revlog).
3087 # use either "text" (external), or "rawtext" (in revlog).
3088
3088
3089 try:
3089 try:
3090 skipflags = state.get(b'skipflags', 0)
3090 skipflags = state.get(b'skipflags', 0)
3091 if skipflags:
3091 if skipflags:
3092 skipflags &= self.flags(rev)
3092 skipflags &= self.flags(rev)
3093
3093
3094 _verify_revision(self, skipflags, state, node)
3094 _verify_revision(self, skipflags, state, node)
3095
3095
3096 l1 = self.rawsize(rev)
3096 l1 = self.rawsize(rev)
3097 l2 = len(self.rawdata(node))
3097 l2 = len(self.rawdata(node))
3098
3098
3099 if l1 != l2:
3099 if l1 != l2:
3100 yield revlogproblem(
3100 yield revlogproblem(
3101 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3101 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3102 node=node,
3102 node=node,
3103 )
3103 )
3104
3104
3105 except error.CensoredNodeError:
3105 except error.CensoredNodeError:
3106 if state[b'erroroncensored']:
3106 if state[b'erroroncensored']:
3107 yield revlogproblem(
3107 yield revlogproblem(
3108 error=_(b'censored file data'), node=node
3108 error=_(b'censored file data'), node=node
3109 )
3109 )
3110 state[b'skipread'].add(node)
3110 state[b'skipread'].add(node)
3111 except Exception as e:
3111 except Exception as e:
3112 yield revlogproblem(
3112 yield revlogproblem(
3113 error=_(b'unpacking %s: %s')
3113 error=_(b'unpacking %s: %s')
3114 % (short(node), stringutil.forcebytestr(e)),
3114 % (short(node), stringutil.forcebytestr(e)),
3115 node=node,
3115 node=node,
3116 )
3116 )
3117 state[b'skipread'].add(node)
3117 state[b'skipread'].add(node)
3118
3118
3119 def storageinfo(
3119 def storageinfo(
3120 self,
3120 self,
3121 exclusivefiles=False,
3121 exclusivefiles=False,
3122 sharedfiles=False,
3122 sharedfiles=False,
3123 revisionscount=False,
3123 revisionscount=False,
3124 trackedsize=False,
3124 trackedsize=False,
3125 storedsize=False,
3125 storedsize=False,
3126 ):
3126 ):
3127 d = {}
3127 d = {}
3128
3128
3129 if exclusivefiles:
3129 if exclusivefiles:
3130 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3130 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3131 if not self._inline:
3131 if not self._inline:
3132 d[b'exclusivefiles'].append((self.opener, self._datafile))
3132 d[b'exclusivefiles'].append((self.opener, self._datafile))
3133
3133
3134 if sharedfiles:
3134 if sharedfiles:
3135 d[b'sharedfiles'] = []
3135 d[b'sharedfiles'] = []
3136
3136
3137 if revisionscount:
3137 if revisionscount:
3138 d[b'revisionscount'] = len(self)
3138 d[b'revisionscount'] = len(self)
3139
3139
3140 if trackedsize:
3140 if trackedsize:
3141 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3141 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3142
3142
3143 if storedsize:
3143 if storedsize:
3144 d[b'storedsize'] = sum(
3144 d[b'storedsize'] = sum(
3145 self.opener.stat(path).st_size for path in self.files()
3145 self.opener.stat(path).st_size for path in self.files()
3146 )
3146 )
3147
3147
3148 return d
3148 return d
3149
3149
3150 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3150 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3151 if not self.hassidedata:
3151 if not self.hassidedata:
3152 return
3152 return
3153 # inline are not yet supported because they suffer from an issue when
3153 # inline are not yet supported because they suffer from an issue when
3154 # rewriting them (since it's not an append-only operation).
3154 # rewriting them (since it's not an append-only operation).
3155 # See issue6485.
3155 # See issue6485.
3156 assert not self._inline
3156 assert not self._inline
3157 if not helpers[1] and not helpers[2]:
3157 if not helpers[1] and not helpers[2]:
3158 # Nothing to generate or remove
3158 # Nothing to generate or remove
3159 return
3159 return
3160
3160
3161 # changelog implement some "delayed" writing mechanism that assume that
3161 # changelog implement some "delayed" writing mechanism that assume that
3162 # all index data is writen in append mode and is therefor incompatible
3162 # all index data is writen in append mode and is therefor incompatible
3163 # with the seeked write done in this method. The use of such "delayed"
3163 # with the seeked write done in this method. The use of such "delayed"
3164 # writing will soon be removed for revlog version that support side
3164 # writing will soon be removed for revlog version that support side
3165 # data, so for now, we only keep this simple assert to highlight the
3165 # data, so for now, we only keep this simple assert to highlight the
3166 # situation.
3166 # situation.
3167 delayed = getattr(self, '_delayed', False)
3167 delayed = getattr(self, '_delayed', False)
3168 diverted = getattr(self, '_divert', False)
3168 diverted = getattr(self, '_divert', False)
3169 if delayed and not diverted:
3169 if delayed and not diverted:
3170 msg = "cannot rewrite_sidedata of a delayed revlog"
3170 msg = "cannot rewrite_sidedata of a delayed revlog"
3171 raise error.ProgrammingError(msg)
3171 raise error.ProgrammingError(msg)
3172
3172
3173 new_entries = []
3173 new_entries = []
3174 # append the new sidedata
3174 # append the new sidedata
3175 with self._writing(transaction):
3175 with self._writing(transaction):
3176 ifh, dfh = self._writinghandles
3176 ifh, dfh = self._writinghandles
3177 dfh.seek(0, os.SEEK_END)
3177 dfh.seek(0, os.SEEK_END)
3178 current_offset = dfh.tell()
3178 current_offset = dfh.tell()
3179 for rev in range(startrev, endrev + 1):
3179 for rev in range(startrev, endrev + 1):
3180 entry = self.index[rev]
3180 entry = self.index[rev]
3181 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3181 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3182 store=self,
3182 store=self,
3183 sidedata_helpers=helpers,
3183 sidedata_helpers=helpers,
3184 sidedata={},
3184 sidedata={},
3185 rev=rev,
3185 rev=rev,
3186 )
3186 )
3187
3187
3188 serialized_sidedata = sidedatautil.serialize_sidedata(
3188 serialized_sidedata = sidedatautil.serialize_sidedata(
3189 new_sidedata
3189 new_sidedata
3190 )
3190 )
3191 if entry[8] != 0 or entry[9] != 0:
3191 if entry[8] != 0 or entry[9] != 0:
3192 # rewriting entries that already have sidedata is not
3192 # rewriting entries that already have sidedata is not
3193 # supported yet, because it introduces garbage data in the
3193 # supported yet, because it introduces garbage data in the
3194 # revlog.
3194 # revlog.
3195 msg = b"Rewriting existing sidedata is not supported yet"
3195 msg = b"rewriting existing sidedata is not supported yet"
3196 raise error.Abort(msg)
3196 raise error.Abort(msg)
3197
3197
3198 # Apply (potential) flags to add and to remove after running
3198 # Apply (potential) flags to add and to remove after running
3199 # the sidedata helpers
3199 # the sidedata helpers
3200 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3200 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3201 entry = (new_offset_flags,) + entry[1:8]
3201 entry = (new_offset_flags,) + entry[1:8]
3202 entry += (current_offset, len(serialized_sidedata))
3202 entry += (current_offset, len(serialized_sidedata))
3203
3203
3204 # the sidedata computation might have move the file cursors around
3204 # the sidedata computation might have move the file cursors around
3205 dfh.seek(current_offset, os.SEEK_SET)
3205 dfh.seek(current_offset, os.SEEK_SET)
3206 dfh.write(serialized_sidedata)
3206 dfh.write(serialized_sidedata)
3207 new_entries.append(entry)
3207 new_entries.append(entry)
3208 current_offset += len(serialized_sidedata)
3208 current_offset += len(serialized_sidedata)
3209
3209
3210 # rewrite the new index entries
3210 # rewrite the new index entries
3211 ifh.seek(startrev * self.index.entry_size)
3211 ifh.seek(startrev * self.index.entry_size)
3212 for i, e in enumerate(new_entries):
3212 for i, e in enumerate(new_entries):
3213 rev = startrev + i
3213 rev = startrev + i
3214 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3214 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3215 packed = self.index.entry_binary(rev)
3215 packed = self.index.entry_binary(rev)
3216 if rev == 0:
3216 if rev == 0:
3217 header = self._format_flags | self._format_version
3217 header = self._format_flags | self._format_version
3218 header = self.index.pack_header(header)
3218 header = self.index.pack_header(header)
3219 packed = header + packed
3219 packed = header + packed
3220 ifh.write(packed)
3220 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now