##// END OF EJS Templates
revlog: use "entry_point" phrasing for loading the revlog...
marmoute -
r47946:21ef5f75 default
parent child Browse files
Show More
@@ -1,3180 +1,3179 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 self._indexfile = None
318 self._indexfile = None
319 self._datafile = None
319 self._datafile = None
320 self._nodemap_file = None
320 self._nodemap_file = None
321 self.postfix = postfix
321 self.postfix = postfix
322 self.opener = opener
322 self.opener = opener
323 if persistentnodemap:
323 if persistentnodemap:
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325
325
326 assert target[0] in ALL_KINDS
326 assert target[0] in ALL_KINDS
327 assert len(target) == 2
327 assert len(target) == 2
328 self.target = target
328 self.target = target
329 # When True, indexfile is opened with checkambig=True at writing, to
329 # When True, indexfile is opened with checkambig=True at writing, to
330 # avoid file stat ambiguity.
330 # avoid file stat ambiguity.
331 self._checkambig = checkambig
331 self._checkambig = checkambig
332 self._mmaplargeindex = mmaplargeindex
332 self._mmaplargeindex = mmaplargeindex
333 self._censorable = censorable
333 self._censorable = censorable
334 # 3-tuple of (node, rev, text) for a raw revision.
334 # 3-tuple of (node, rev, text) for a raw revision.
335 self._revisioncache = None
335 self._revisioncache = None
336 # Maps rev to chain base rev.
336 # Maps rev to chain base rev.
337 self._chainbasecache = util.lrucachedict(100)
337 self._chainbasecache = util.lrucachedict(100)
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 self._chunkcache = (0, b'')
339 self._chunkcache = (0, b'')
340 # How much data to read and cache into the raw revlog data cache.
340 # How much data to read and cache into the raw revlog data cache.
341 self._chunkcachesize = 65536
341 self._chunkcachesize = 65536
342 self._maxchainlen = None
342 self._maxchainlen = None
343 self._deltabothparents = True
343 self._deltabothparents = True
344 self.index = None
344 self.index = None
345 self._nodemap_docket = None
345 self._nodemap_docket = None
346 # Mapping of partial identifiers to full nodes.
346 # Mapping of partial identifiers to full nodes.
347 self._pcache = {}
347 self._pcache = {}
348 # Mapping of revision integer to full node.
348 # Mapping of revision integer to full node.
349 self._compengine = b'zlib'
349 self._compengine = b'zlib'
350 self._compengineopts = {}
350 self._compengineopts = {}
351 self._maxdeltachainspan = -1
351 self._maxdeltachainspan = -1
352 self._withsparseread = False
352 self._withsparseread = False
353 self._sparserevlog = False
353 self._sparserevlog = False
354 self._srdensitythreshold = 0.50
354 self._srdensitythreshold = 0.50
355 self._srmingapsize = 262144
355 self._srmingapsize = 262144
356
356
357 # Make copy of flag processors so each revlog instance can support
357 # Make copy of flag processors so each revlog instance can support
358 # custom flags.
358 # custom flags.
359 self._flagprocessors = dict(flagutil.flagprocessors)
359 self._flagprocessors = dict(flagutil.flagprocessors)
360
360
361 # 2-tuple of file handles being used for active writing.
361 # 2-tuple of file handles being used for active writing.
362 self._writinghandles = None
362 self._writinghandles = None
363
363
364 self._loadindex()
364 self._loadindex()
365
365
366 self._concurrencychecker = concurrencychecker
366 self._concurrencychecker = concurrencychecker
367
367
368 def _init_opts(self):
368 def _init_opts(self):
369 """process options (from above/config) to setup associated default revlog mode
369 """process options (from above/config) to setup associated default revlog mode
370
370
371 These values might be affected when actually reading on disk information.
371 These values might be affected when actually reading on disk information.
372
372
373 The relevant values are returned for use in _loadindex().
373 The relevant values are returned for use in _loadindex().
374
374
375 * newversionflags:
375 * newversionflags:
376 version header to use if we need to create a new revlog
376 version header to use if we need to create a new revlog
377
377
378 * mmapindexthreshold:
378 * mmapindexthreshold:
379 minimal index size for start to use mmap
379 minimal index size for start to use mmap
380
380
381 * force_nodemap:
381 * force_nodemap:
382 force the usage of a "development" version of the nodemap code
382 force the usage of a "development" version of the nodemap code
383 """
383 """
384 mmapindexthreshold = None
384 mmapindexthreshold = None
385 opts = self.opener.options
385 opts = self.opener.options
386
386
387 if b'revlogv2' in opts:
387 if b'revlogv2' in opts:
388 new_header = REVLOGV2 | FLAG_INLINE_DATA
388 new_header = REVLOGV2 | FLAG_INLINE_DATA
389 elif b'revlogv1' in opts:
389 elif b'revlogv1' in opts:
390 new_header = REVLOGV1 | FLAG_INLINE_DATA
390 new_header = REVLOGV1 | FLAG_INLINE_DATA
391 if b'generaldelta' in opts:
391 if b'generaldelta' in opts:
392 new_header |= FLAG_GENERALDELTA
392 new_header |= FLAG_GENERALDELTA
393 elif b'revlogv0' in self.opener.options:
393 elif b'revlogv0' in self.opener.options:
394 new_header = REVLOGV0
394 new_header = REVLOGV0
395 else:
395 else:
396 new_header = REVLOG_DEFAULT_VERSION
396 new_header = REVLOG_DEFAULT_VERSION
397
397
398 if b'chunkcachesize' in opts:
398 if b'chunkcachesize' in opts:
399 self._chunkcachesize = opts[b'chunkcachesize']
399 self._chunkcachesize = opts[b'chunkcachesize']
400 if b'maxchainlen' in opts:
400 if b'maxchainlen' in opts:
401 self._maxchainlen = opts[b'maxchainlen']
401 self._maxchainlen = opts[b'maxchainlen']
402 if b'deltabothparents' in opts:
402 if b'deltabothparents' in opts:
403 self._deltabothparents = opts[b'deltabothparents']
403 self._deltabothparents = opts[b'deltabothparents']
404 self._lazydelta = bool(opts.get(b'lazydelta', True))
404 self._lazydelta = bool(opts.get(b'lazydelta', True))
405 self._lazydeltabase = False
405 self._lazydeltabase = False
406 if self._lazydelta:
406 if self._lazydelta:
407 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
407 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
408 if b'compengine' in opts:
408 if b'compengine' in opts:
409 self._compengine = opts[b'compengine']
409 self._compengine = opts[b'compengine']
410 if b'zlib.level' in opts:
410 if b'zlib.level' in opts:
411 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
411 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
412 if b'zstd.level' in opts:
412 if b'zstd.level' in opts:
413 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
413 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
414 if b'maxdeltachainspan' in opts:
414 if b'maxdeltachainspan' in opts:
415 self._maxdeltachainspan = opts[b'maxdeltachainspan']
415 self._maxdeltachainspan = opts[b'maxdeltachainspan']
416 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
416 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
417 mmapindexthreshold = opts[b'mmapindexthreshold']
417 mmapindexthreshold = opts[b'mmapindexthreshold']
418 self.hassidedata = bool(opts.get(b'side-data', False))
418 self.hassidedata = bool(opts.get(b'side-data', False))
419 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
419 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
420 withsparseread = bool(opts.get(b'with-sparse-read', False))
420 withsparseread = bool(opts.get(b'with-sparse-read', False))
421 # sparse-revlog forces sparse-read
421 # sparse-revlog forces sparse-read
422 self._withsparseread = self._sparserevlog or withsparseread
422 self._withsparseread = self._sparserevlog or withsparseread
423 if b'sparse-read-density-threshold' in opts:
423 if b'sparse-read-density-threshold' in opts:
424 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
424 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
425 if b'sparse-read-min-gap-size' in opts:
425 if b'sparse-read-min-gap-size' in opts:
426 self._srmingapsize = opts[b'sparse-read-min-gap-size']
426 self._srmingapsize = opts[b'sparse-read-min-gap-size']
427 if opts.get(b'enableellipsis'):
427 if opts.get(b'enableellipsis'):
428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
429
429
430 # revlog v0 doesn't have flag processors
430 # revlog v0 doesn't have flag processors
431 for flag, processor in pycompat.iteritems(
431 for flag, processor in pycompat.iteritems(
432 opts.get(b'flagprocessors', {})
432 opts.get(b'flagprocessors', {})
433 ):
433 ):
434 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
434 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
435
435
436 if self._chunkcachesize <= 0:
436 if self._chunkcachesize <= 0:
437 raise error.RevlogError(
437 raise error.RevlogError(
438 _(b'revlog chunk cache size %r is not greater than 0')
438 _(b'revlog chunk cache size %r is not greater than 0')
439 % self._chunkcachesize
439 % self._chunkcachesize
440 )
440 )
441 elif self._chunkcachesize & (self._chunkcachesize - 1):
441 elif self._chunkcachesize & (self._chunkcachesize - 1):
442 raise error.RevlogError(
442 raise error.RevlogError(
443 _(b'revlog chunk cache size %r is not a power of 2')
443 _(b'revlog chunk cache size %r is not a power of 2')
444 % self._chunkcachesize
444 % self._chunkcachesize
445 )
445 )
446 force_nodemap = opts.get(b'devel-force-nodemap', False)
446 force_nodemap = opts.get(b'devel-force-nodemap', False)
447 return new_header, mmapindexthreshold, force_nodemap
447 return new_header, mmapindexthreshold, force_nodemap
448
448
449 def _get_data(self, filepath, mmap_threshold):
449 def _get_data(self, filepath, mmap_threshold):
450 """return a file content with or without mmap
450 """return a file content with or without mmap
451
451
452 If the file is missing return the empty string"""
452 If the file is missing return the empty string"""
453 try:
453 try:
454 with self.opener(filepath) as fp:
454 with self.opener(filepath) as fp:
455 if mmap_threshold is not None:
455 if mmap_threshold is not None:
456 file_size = self.opener.fstat(fp).st_size
456 file_size = self.opener.fstat(fp).st_size
457 if file_size >= mmap_threshold:
457 if file_size >= mmap_threshold:
458 # TODO: should .close() to release resources without
458 # TODO: should .close() to release resources without
459 # relying on Python GC
459 # relying on Python GC
460 return util.buffer(util.mmapread(fp))
460 return util.buffer(util.mmapread(fp))
461 return fp.read()
461 return fp.read()
462 except IOError as inst:
462 except IOError as inst:
463 if inst.errno != errno.ENOENT:
463 if inst.errno != errno.ENOENT:
464 raise
464 raise
465 return b''
465 return b''
466
466
467 def _loadindex(self):
467 def _loadindex(self):
468
468
469 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
469 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
470
470
471 if self.postfix is None:
471 if self.postfix is None:
472 index_file = b'%s.i' % self.radix
472 entry_point = b'%s.i' % self.radix
473 data_file = b'%s.d' % self.radix
474 elif self.postfix == b'a':
475 index_file = b'%s.i.a' % self.radix
476 data_file = b'%s.d' % self.radix
477 else:
473 else:
478 index_file = b'%s.i.%s' % (self.radix, self.postfix)
474 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
479 data_file = b'%s.d.%s' % (self.radix, self.postfix)
480
481 self._indexfile = index_file
482 self._datafile = data_file
483
475
484 indexdata = b''
476 indexdata = b''
485 self._initempty = True
477 self._initempty = True
486 indexdata = self._get_data(self._indexfile, mmapindexthreshold)
478 indexdata = self._get_data(entry_point, mmapindexthreshold)
487 if len(indexdata) > 0:
479 if len(indexdata) > 0:
488 header = INDEX_HEADER.unpack(indexdata[:4])[0]
480 header = INDEX_HEADER.unpack(indexdata[:4])[0]
489 self._initempty = False
481 self._initempty = False
490 else:
482 else:
491 header = new_header
483 header = new_header
492
484
493 self._format_flags = header & ~0xFFFF
485 self._format_flags = header & ~0xFFFF
494 self._format_version = header & 0xFFFF
486 self._format_version = header & 0xFFFF
495
487
496 if self._format_version == REVLOGV0:
488 if self._format_version == REVLOGV0:
497 if self._format_flags:
489 if self._format_flags:
498 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
490 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
499 display_flag = self._format_flags >> 16
491 display_flag = self._format_flags >> 16
500 msg %= (display_flag, self._format_version, self.display_id)
492 msg %= (display_flag, self._format_version, self.display_id)
501 raise error.RevlogError(msg)
493 raise error.RevlogError(msg)
502
494
503 self._inline = False
495 self._inline = False
504 self._generaldelta = False
496 self._generaldelta = False
505
497
506 elif self._format_version == REVLOGV1:
498 elif self._format_version == REVLOGV1:
507 if self._format_flags & ~REVLOGV1_FLAGS:
499 if self._format_flags & ~REVLOGV1_FLAGS:
508 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
500 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
509 display_flag = self._format_flags >> 16
501 display_flag = self._format_flags >> 16
510 msg %= (display_flag, self._format_version, self.display_id)
502 msg %= (display_flag, self._format_version, self.display_id)
511 raise error.RevlogError(msg)
503 raise error.RevlogError(msg)
512
504
513 self._inline = self._format_flags & FLAG_INLINE_DATA
505 self._inline = self._format_flags & FLAG_INLINE_DATA
514 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
506 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
515
507
516 elif self._format_version == REVLOGV2:
508 elif self._format_version == REVLOGV2:
517 if self._format_flags & ~REVLOGV2_FLAGS:
509 if self._format_flags & ~REVLOGV2_FLAGS:
518 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
510 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
519 display_flag = self._format_flags >> 16
511 display_flag = self._format_flags >> 16
520 msg %= (display_flag, self._format_version, self.display_id)
512 msg %= (display_flag, self._format_version, self.display_id)
521 raise error.RevlogError(msg)
513 raise error.RevlogError(msg)
522
514
523 # There is a bug in the transaction handling when going from an
515 # There is a bug in the transaction handling when going from an
524 # inline revlog to a separate index and data file. Turn it off until
516 # inline revlog to a separate index and data file. Turn it off until
525 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
517 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
526 # See issue6485
518 # See issue6485
527 self._inline = False
519 self._inline = False
528 # generaldelta implied by version 2 revlogs.
520 # generaldelta implied by version 2 revlogs.
529 self._generaldelta = True
521 self._generaldelta = True
530
522
531 else:
523 else:
532 msg = _(b'unknown version (%d) in revlog %s')
524 msg = _(b'unknown version (%d) in revlog %s')
533 msg %= (self._format_version, self.display_id)
525 msg %= (self._format_version, self.display_id)
534 raise error.RevlogError(msg)
526 raise error.RevlogError(msg)
535
527
528 self._indexfile = entry_point
529
530 if self.postfix is None or self.postfix == b'a':
531 self._datafile = b'%s.d' % self.radix
532 else:
533 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
534
536 self.nodeconstants = sha1nodeconstants
535 self.nodeconstants = sha1nodeconstants
537 self.nullid = self.nodeconstants.nullid
536 self.nullid = self.nodeconstants.nullid
538
537
539 # sparse-revlog can't be on without general-delta (issue6056)
538 # sparse-revlog can't be on without general-delta (issue6056)
540 if not self._generaldelta:
539 if not self._generaldelta:
541 self._sparserevlog = False
540 self._sparserevlog = False
542
541
543 self._storedeltachains = True
542 self._storedeltachains = True
544
543
545 devel_nodemap = (
544 devel_nodemap = (
546 self._nodemap_file
545 self._nodemap_file
547 and force_nodemap
546 and force_nodemap
548 and parse_index_v1_nodemap is not None
547 and parse_index_v1_nodemap is not None
549 )
548 )
550
549
551 use_rust_index = False
550 use_rust_index = False
552 if rustrevlog is not None:
551 if rustrevlog is not None:
553 if self._nodemap_file is not None:
552 if self._nodemap_file is not None:
554 use_rust_index = True
553 use_rust_index = True
555 else:
554 else:
556 use_rust_index = self.opener.options.get(b'rust.index')
555 use_rust_index = self.opener.options.get(b'rust.index')
557
556
558 self._parse_index = parse_index_v1
557 self._parse_index = parse_index_v1
559 if self._format_version == REVLOGV0:
558 if self._format_version == REVLOGV0:
560 self._parse_index = revlogv0.parse_index_v0
559 self._parse_index = revlogv0.parse_index_v0
561 elif self._format_version == REVLOGV2:
560 elif self._format_version == REVLOGV2:
562 self._parse_index = parse_index_v2
561 self._parse_index = parse_index_v2
563 elif devel_nodemap:
562 elif devel_nodemap:
564 self._parse_index = parse_index_v1_nodemap
563 self._parse_index = parse_index_v1_nodemap
565 elif use_rust_index:
564 elif use_rust_index:
566 self._parse_index = parse_index_v1_mixed
565 self._parse_index = parse_index_v1_mixed
567 try:
566 try:
568 d = self._parse_index(indexdata, self._inline)
567 d = self._parse_index(indexdata, self._inline)
569 index, _chunkcache = d
568 index, _chunkcache = d
570 use_nodemap = (
569 use_nodemap = (
571 not self._inline
570 not self._inline
572 and self._nodemap_file is not None
571 and self._nodemap_file is not None
573 and util.safehasattr(index, 'update_nodemap_data')
572 and util.safehasattr(index, 'update_nodemap_data')
574 )
573 )
575 if use_nodemap:
574 if use_nodemap:
576 nodemap_data = nodemaputil.persisted_data(self)
575 nodemap_data = nodemaputil.persisted_data(self)
577 if nodemap_data is not None:
576 if nodemap_data is not None:
578 docket = nodemap_data[0]
577 docket = nodemap_data[0]
579 if (
578 if (
580 len(d[0]) > docket.tip_rev
579 len(d[0]) > docket.tip_rev
581 and d[0][docket.tip_rev][7] == docket.tip_node
580 and d[0][docket.tip_rev][7] == docket.tip_node
582 ):
581 ):
583 # no changelog tampering
582 # no changelog tampering
584 self._nodemap_docket = docket
583 self._nodemap_docket = docket
585 index.update_nodemap_data(*nodemap_data)
584 index.update_nodemap_data(*nodemap_data)
586 except (ValueError, IndexError):
585 except (ValueError, IndexError):
587 raise error.RevlogError(
586 raise error.RevlogError(
588 _(b"index %s is corrupted") % self.display_id
587 _(b"index %s is corrupted") % self.display_id
589 )
588 )
590 self.index, self._chunkcache = d
589 self.index, self._chunkcache = d
591 if not self._chunkcache:
590 if not self._chunkcache:
592 self._chunkclear()
591 self._chunkclear()
593 # revnum -> (chain-length, sum-delta-length)
592 # revnum -> (chain-length, sum-delta-length)
594 self._chaininfocache = util.lrucachedict(500)
593 self._chaininfocache = util.lrucachedict(500)
595 # revlog header -> revlog compressor
594 # revlog header -> revlog compressor
596 self._decompressors = {}
595 self._decompressors = {}
597
596
598 @util.propertycache
597 @util.propertycache
599 def revlog_kind(self):
598 def revlog_kind(self):
600 return self.target[0]
599 return self.target[0]
601
600
602 @util.propertycache
601 @util.propertycache
603 def display_id(self):
602 def display_id(self):
604 """The public facing "ID" of the revlog that we use in message"""
603 """The public facing "ID" of the revlog that we use in message"""
605 # Maybe we should build a user facing representation of
604 # Maybe we should build a user facing representation of
606 # revlog.target instead of using `self.radix`
605 # revlog.target instead of using `self.radix`
607 return self.radix
606 return self.radix
608
607
609 @util.propertycache
608 @util.propertycache
610 def _compressor(self):
609 def _compressor(self):
611 engine = util.compengines[self._compengine]
610 engine = util.compengines[self._compengine]
612 return engine.revlogcompressor(self._compengineopts)
611 return engine.revlogcompressor(self._compengineopts)
613
612
614 def _indexfp(self, mode=b'r'):
613 def _indexfp(self, mode=b'r'):
615 """file object for the revlog's index file"""
614 """file object for the revlog's index file"""
616 args = {'mode': mode}
615 args = {'mode': mode}
617 if mode != b'r':
616 if mode != b'r':
618 args['checkambig'] = self._checkambig
617 args['checkambig'] = self._checkambig
619 if mode == b'w':
618 if mode == b'w':
620 args['atomictemp'] = True
619 args['atomictemp'] = True
621 return self.opener(self._indexfile, **args)
620 return self.opener(self._indexfile, **args)
622
621
623 def _datafp(self, mode=b'r'):
622 def _datafp(self, mode=b'r'):
624 """file object for the revlog's data file"""
623 """file object for the revlog's data file"""
625 return self.opener(self._datafile, mode=mode)
624 return self.opener(self._datafile, mode=mode)
626
625
627 @contextlib.contextmanager
626 @contextlib.contextmanager
628 def _datareadfp(self, existingfp=None):
627 def _datareadfp(self, existingfp=None):
629 """file object suitable to read data"""
628 """file object suitable to read data"""
630 # Use explicit file handle, if given.
629 # Use explicit file handle, if given.
631 if existingfp is not None:
630 if existingfp is not None:
632 yield existingfp
631 yield existingfp
633
632
634 # Use a file handle being actively used for writes, if available.
633 # Use a file handle being actively used for writes, if available.
635 # There is some danger to doing this because reads will seek the
634 # There is some danger to doing this because reads will seek the
636 # file. However, _writeentry() performs a SEEK_END before all writes,
635 # file. However, _writeentry() performs a SEEK_END before all writes,
637 # so we should be safe.
636 # so we should be safe.
638 elif self._writinghandles:
637 elif self._writinghandles:
639 if self._inline:
638 if self._inline:
640 yield self._writinghandles[0]
639 yield self._writinghandles[0]
641 else:
640 else:
642 yield self._writinghandles[1]
641 yield self._writinghandles[1]
643
642
644 # Otherwise open a new file handle.
643 # Otherwise open a new file handle.
645 else:
644 else:
646 if self._inline:
645 if self._inline:
647 func = self._indexfp
646 func = self._indexfp
648 else:
647 else:
649 func = self._datafp
648 func = self._datafp
650 with func() as fp:
649 with func() as fp:
651 yield fp
650 yield fp
652
651
653 def tiprev(self):
652 def tiprev(self):
654 return len(self.index) - 1
653 return len(self.index) - 1
655
654
656 def tip(self):
655 def tip(self):
657 return self.node(self.tiprev())
656 return self.node(self.tiprev())
658
657
659 def __contains__(self, rev):
658 def __contains__(self, rev):
660 return 0 <= rev < len(self)
659 return 0 <= rev < len(self)
661
660
662 def __len__(self):
661 def __len__(self):
663 return len(self.index)
662 return len(self.index)
664
663
665 def __iter__(self):
664 def __iter__(self):
666 return iter(pycompat.xrange(len(self)))
665 return iter(pycompat.xrange(len(self)))
667
666
668 def revs(self, start=0, stop=None):
667 def revs(self, start=0, stop=None):
669 """iterate over all rev in this revlog (from start to stop)"""
668 """iterate over all rev in this revlog (from start to stop)"""
670 return storageutil.iterrevs(len(self), start=start, stop=stop)
669 return storageutil.iterrevs(len(self), start=start, stop=stop)
671
670
672 @property
671 @property
673 def nodemap(self):
672 def nodemap(self):
674 msg = (
673 msg = (
675 b"revlog.nodemap is deprecated, "
674 b"revlog.nodemap is deprecated, "
676 b"use revlog.index.[has_node|rev|get_rev]"
675 b"use revlog.index.[has_node|rev|get_rev]"
677 )
676 )
678 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
679 return self.index.nodemap
678 return self.index.nodemap
680
679
681 @property
680 @property
682 def _nodecache(self):
681 def _nodecache(self):
683 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
682 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
684 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
683 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
685 return self.index.nodemap
684 return self.index.nodemap
686
685
687 def hasnode(self, node):
686 def hasnode(self, node):
688 try:
687 try:
689 self.rev(node)
688 self.rev(node)
690 return True
689 return True
691 except KeyError:
690 except KeyError:
692 return False
691 return False
693
692
694 def candelta(self, baserev, rev):
693 def candelta(self, baserev, rev):
695 """whether two revisions (baserev, rev) can be delta-ed or not"""
694 """whether two revisions (baserev, rev) can be delta-ed or not"""
696 # Disable delta if either rev requires a content-changing flag
695 # Disable delta if either rev requires a content-changing flag
697 # processor (ex. LFS). This is because such flag processor can alter
696 # processor (ex. LFS). This is because such flag processor can alter
698 # the rawtext content that the delta will be based on, and two clients
697 # the rawtext content that the delta will be based on, and two clients
699 # could have a same revlog node with different flags (i.e. different
698 # could have a same revlog node with different flags (i.e. different
700 # rawtext contents) and the delta could be incompatible.
699 # rawtext contents) and the delta could be incompatible.
701 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
700 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
702 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
701 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
703 ):
702 ):
704 return False
703 return False
705 return True
704 return True
706
705
707 def update_caches(self, transaction):
706 def update_caches(self, transaction):
708 if self._nodemap_file is not None:
707 if self._nodemap_file is not None:
709 if transaction is None:
708 if transaction is None:
710 nodemaputil.update_persistent_nodemap(self)
709 nodemaputil.update_persistent_nodemap(self)
711 else:
710 else:
712 nodemaputil.setup_persistent_nodemap(transaction, self)
711 nodemaputil.setup_persistent_nodemap(transaction, self)
713
712
714 def clearcaches(self):
713 def clearcaches(self):
715 self._revisioncache = None
714 self._revisioncache = None
716 self._chainbasecache.clear()
715 self._chainbasecache.clear()
717 self._chunkcache = (0, b'')
716 self._chunkcache = (0, b'')
718 self._pcache = {}
717 self._pcache = {}
719 self._nodemap_docket = None
718 self._nodemap_docket = None
720 self.index.clearcaches()
719 self.index.clearcaches()
721 # The python code is the one responsible for validating the docket, we
720 # The python code is the one responsible for validating the docket, we
722 # end up having to refresh it here.
721 # end up having to refresh it here.
723 use_nodemap = (
722 use_nodemap = (
724 not self._inline
723 not self._inline
725 and self._nodemap_file is not None
724 and self._nodemap_file is not None
726 and util.safehasattr(self.index, 'update_nodemap_data')
725 and util.safehasattr(self.index, 'update_nodemap_data')
727 )
726 )
728 if use_nodemap:
727 if use_nodemap:
729 nodemap_data = nodemaputil.persisted_data(self)
728 nodemap_data = nodemaputil.persisted_data(self)
730 if nodemap_data is not None:
729 if nodemap_data is not None:
731 self._nodemap_docket = nodemap_data[0]
730 self._nodemap_docket = nodemap_data[0]
732 self.index.update_nodemap_data(*nodemap_data)
731 self.index.update_nodemap_data(*nodemap_data)
733
732
734 def rev(self, node):
733 def rev(self, node):
735 try:
734 try:
736 return self.index.rev(node)
735 return self.index.rev(node)
737 except TypeError:
736 except TypeError:
738 raise
737 raise
739 except error.RevlogError:
738 except error.RevlogError:
740 # parsers.c radix tree lookup failed
739 # parsers.c radix tree lookup failed
741 if (
740 if (
742 node == self.nodeconstants.wdirid
741 node == self.nodeconstants.wdirid
743 or node in self.nodeconstants.wdirfilenodeids
742 or node in self.nodeconstants.wdirfilenodeids
744 ):
743 ):
745 raise error.WdirUnsupported
744 raise error.WdirUnsupported
746 raise error.LookupError(node, self.display_id, _(b'no node'))
745 raise error.LookupError(node, self.display_id, _(b'no node'))
747
746
748 # Accessors for index entries.
747 # Accessors for index entries.
749
748
750 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
749 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
751 # are flags.
750 # are flags.
752 def start(self, rev):
751 def start(self, rev):
753 return int(self.index[rev][0] >> 16)
752 return int(self.index[rev][0] >> 16)
754
753
755 def flags(self, rev):
754 def flags(self, rev):
756 return self.index[rev][0] & 0xFFFF
755 return self.index[rev][0] & 0xFFFF
757
756
758 def length(self, rev):
757 def length(self, rev):
759 return self.index[rev][1]
758 return self.index[rev][1]
760
759
761 def sidedata_length(self, rev):
760 def sidedata_length(self, rev):
762 if not self.hassidedata:
761 if not self.hassidedata:
763 return 0
762 return 0
764 return self.index[rev][9]
763 return self.index[rev][9]
765
764
766 def rawsize(self, rev):
765 def rawsize(self, rev):
767 """return the length of the uncompressed text for a given revision"""
766 """return the length of the uncompressed text for a given revision"""
768 l = self.index[rev][2]
767 l = self.index[rev][2]
769 if l >= 0:
768 if l >= 0:
770 return l
769 return l
771
770
772 t = self.rawdata(rev)
771 t = self.rawdata(rev)
773 return len(t)
772 return len(t)
774
773
775 def size(self, rev):
774 def size(self, rev):
776 """length of non-raw text (processed by a "read" flag processor)"""
775 """length of non-raw text (processed by a "read" flag processor)"""
777 # fast path: if no "read" flag processor could change the content,
776 # fast path: if no "read" flag processor could change the content,
778 # size is rawsize. note: ELLIPSIS is known to not change the content.
777 # size is rawsize. note: ELLIPSIS is known to not change the content.
779 flags = self.flags(rev)
778 flags = self.flags(rev)
780 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
779 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
781 return self.rawsize(rev)
780 return self.rawsize(rev)
782
781
783 return len(self.revision(rev, raw=False))
782 return len(self.revision(rev, raw=False))
784
783
785 def chainbase(self, rev):
784 def chainbase(self, rev):
786 base = self._chainbasecache.get(rev)
785 base = self._chainbasecache.get(rev)
787 if base is not None:
786 if base is not None:
788 return base
787 return base
789
788
790 index = self.index
789 index = self.index
791 iterrev = rev
790 iterrev = rev
792 base = index[iterrev][3]
791 base = index[iterrev][3]
793 while base != iterrev:
792 while base != iterrev:
794 iterrev = base
793 iterrev = base
795 base = index[iterrev][3]
794 base = index[iterrev][3]
796
795
797 self._chainbasecache[rev] = base
796 self._chainbasecache[rev] = base
798 return base
797 return base
799
798
800 def linkrev(self, rev):
799 def linkrev(self, rev):
801 return self.index[rev][4]
800 return self.index[rev][4]
802
801
803 def parentrevs(self, rev):
802 def parentrevs(self, rev):
804 try:
803 try:
805 entry = self.index[rev]
804 entry = self.index[rev]
806 except IndexError:
805 except IndexError:
807 if rev == wdirrev:
806 if rev == wdirrev:
808 raise error.WdirUnsupported
807 raise error.WdirUnsupported
809 raise
808 raise
810 if entry[5] == nullrev:
809 if entry[5] == nullrev:
811 return entry[6], entry[5]
810 return entry[6], entry[5]
812 else:
811 else:
813 return entry[5], entry[6]
812 return entry[5], entry[6]
814
813
815 # fast parentrevs(rev) where rev isn't filtered
814 # fast parentrevs(rev) where rev isn't filtered
816 _uncheckedparentrevs = parentrevs
815 _uncheckedparentrevs = parentrevs
817
816
818 def node(self, rev):
817 def node(self, rev):
819 try:
818 try:
820 return self.index[rev][7]
819 return self.index[rev][7]
821 except IndexError:
820 except IndexError:
822 if rev == wdirrev:
821 if rev == wdirrev:
823 raise error.WdirUnsupported
822 raise error.WdirUnsupported
824 raise
823 raise
825
824
826 # Derived from index values.
825 # Derived from index values.
827
826
828 def end(self, rev):
827 def end(self, rev):
829 return self.start(rev) + self.length(rev)
828 return self.start(rev) + self.length(rev)
830
829
831 def parents(self, node):
830 def parents(self, node):
832 i = self.index
831 i = self.index
833 d = i[self.rev(node)]
832 d = i[self.rev(node)]
834 # inline node() to avoid function call overhead
833 # inline node() to avoid function call overhead
835 if d[5] == self.nullid:
834 if d[5] == self.nullid:
836 return i[d[6]][7], i[d[5]][7]
835 return i[d[6]][7], i[d[5]][7]
837 else:
836 else:
838 return i[d[5]][7], i[d[6]][7]
837 return i[d[5]][7], i[d[6]][7]
839
838
840 def chainlen(self, rev):
839 def chainlen(self, rev):
841 return self._chaininfo(rev)[0]
840 return self._chaininfo(rev)[0]
842
841
843 def _chaininfo(self, rev):
842 def _chaininfo(self, rev):
844 chaininfocache = self._chaininfocache
843 chaininfocache = self._chaininfocache
845 if rev in chaininfocache:
844 if rev in chaininfocache:
846 return chaininfocache[rev]
845 return chaininfocache[rev]
847 index = self.index
846 index = self.index
848 generaldelta = self._generaldelta
847 generaldelta = self._generaldelta
849 iterrev = rev
848 iterrev = rev
850 e = index[iterrev]
849 e = index[iterrev]
851 clen = 0
850 clen = 0
852 compresseddeltalen = 0
851 compresseddeltalen = 0
853 while iterrev != e[3]:
852 while iterrev != e[3]:
854 clen += 1
853 clen += 1
855 compresseddeltalen += e[1]
854 compresseddeltalen += e[1]
856 if generaldelta:
855 if generaldelta:
857 iterrev = e[3]
856 iterrev = e[3]
858 else:
857 else:
859 iterrev -= 1
858 iterrev -= 1
860 if iterrev in chaininfocache:
859 if iterrev in chaininfocache:
861 t = chaininfocache[iterrev]
860 t = chaininfocache[iterrev]
862 clen += t[0]
861 clen += t[0]
863 compresseddeltalen += t[1]
862 compresseddeltalen += t[1]
864 break
863 break
865 e = index[iterrev]
864 e = index[iterrev]
866 else:
865 else:
867 # Add text length of base since decompressing that also takes
866 # Add text length of base since decompressing that also takes
868 # work. For cache hits the length is already included.
867 # work. For cache hits the length is already included.
869 compresseddeltalen += e[1]
868 compresseddeltalen += e[1]
870 r = (clen, compresseddeltalen)
869 r = (clen, compresseddeltalen)
871 chaininfocache[rev] = r
870 chaininfocache[rev] = r
872 return r
871 return r
873
872
874 def _deltachain(self, rev, stoprev=None):
873 def _deltachain(self, rev, stoprev=None):
875 """Obtain the delta chain for a revision.
874 """Obtain the delta chain for a revision.
876
875
877 ``stoprev`` specifies a revision to stop at. If not specified, we
876 ``stoprev`` specifies a revision to stop at. If not specified, we
878 stop at the base of the chain.
877 stop at the base of the chain.
879
878
880 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
879 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
881 revs in ascending order and ``stopped`` is a bool indicating whether
880 revs in ascending order and ``stopped`` is a bool indicating whether
882 ``stoprev`` was hit.
881 ``stoprev`` was hit.
883 """
882 """
884 # Try C implementation.
883 # Try C implementation.
885 try:
884 try:
886 return self.index.deltachain(rev, stoprev, self._generaldelta)
885 return self.index.deltachain(rev, stoprev, self._generaldelta)
887 except AttributeError:
886 except AttributeError:
888 pass
887 pass
889
888
890 chain = []
889 chain = []
891
890
892 # Alias to prevent attribute lookup in tight loop.
891 # Alias to prevent attribute lookup in tight loop.
893 index = self.index
892 index = self.index
894 generaldelta = self._generaldelta
893 generaldelta = self._generaldelta
895
894
896 iterrev = rev
895 iterrev = rev
897 e = index[iterrev]
896 e = index[iterrev]
898 while iterrev != e[3] and iterrev != stoprev:
897 while iterrev != e[3] and iterrev != stoprev:
899 chain.append(iterrev)
898 chain.append(iterrev)
900 if generaldelta:
899 if generaldelta:
901 iterrev = e[3]
900 iterrev = e[3]
902 else:
901 else:
903 iterrev -= 1
902 iterrev -= 1
904 e = index[iterrev]
903 e = index[iterrev]
905
904
906 if iterrev == stoprev:
905 if iterrev == stoprev:
907 stopped = True
906 stopped = True
908 else:
907 else:
909 chain.append(iterrev)
908 chain.append(iterrev)
910 stopped = False
909 stopped = False
911
910
912 chain.reverse()
911 chain.reverse()
913 return chain, stopped
912 return chain, stopped
914
913
915 def ancestors(self, revs, stoprev=0, inclusive=False):
914 def ancestors(self, revs, stoprev=0, inclusive=False):
916 """Generate the ancestors of 'revs' in reverse revision order.
915 """Generate the ancestors of 'revs' in reverse revision order.
917 Does not generate revs lower than stoprev.
916 Does not generate revs lower than stoprev.
918
917
919 See the documentation for ancestor.lazyancestors for more details."""
918 See the documentation for ancestor.lazyancestors for more details."""
920
919
921 # first, make sure start revisions aren't filtered
920 # first, make sure start revisions aren't filtered
922 revs = list(revs)
921 revs = list(revs)
923 checkrev = self.node
922 checkrev = self.node
924 for r in revs:
923 for r in revs:
925 checkrev(r)
924 checkrev(r)
926 # and we're sure ancestors aren't filtered as well
925 # and we're sure ancestors aren't filtered as well
927
926
928 if rustancestor is not None:
927 if rustancestor is not None:
929 lazyancestors = rustancestor.LazyAncestors
928 lazyancestors = rustancestor.LazyAncestors
930 arg = self.index
929 arg = self.index
931 else:
930 else:
932 lazyancestors = ancestor.lazyancestors
931 lazyancestors = ancestor.lazyancestors
933 arg = self._uncheckedparentrevs
932 arg = self._uncheckedparentrevs
934 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
933 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
935
934
936 def descendants(self, revs):
935 def descendants(self, revs):
937 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
936 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
938
937
939 def findcommonmissing(self, common=None, heads=None):
938 def findcommonmissing(self, common=None, heads=None):
940 """Return a tuple of the ancestors of common and the ancestors of heads
939 """Return a tuple of the ancestors of common and the ancestors of heads
941 that are not ancestors of common. In revset terminology, we return the
940 that are not ancestors of common. In revset terminology, we return the
942 tuple:
941 tuple:
943
942
944 ::common, (::heads) - (::common)
943 ::common, (::heads) - (::common)
945
944
946 The list is sorted by revision number, meaning it is
945 The list is sorted by revision number, meaning it is
947 topologically sorted.
946 topologically sorted.
948
947
949 'heads' and 'common' are both lists of node IDs. If heads is
948 'heads' and 'common' are both lists of node IDs. If heads is
950 not supplied, uses all of the revlog's heads. If common is not
949 not supplied, uses all of the revlog's heads. If common is not
951 supplied, uses nullid."""
950 supplied, uses nullid."""
952 if common is None:
951 if common is None:
953 common = [self.nullid]
952 common = [self.nullid]
954 if heads is None:
953 if heads is None:
955 heads = self.heads()
954 heads = self.heads()
956
955
957 common = [self.rev(n) for n in common]
956 common = [self.rev(n) for n in common]
958 heads = [self.rev(n) for n in heads]
957 heads = [self.rev(n) for n in heads]
959
958
960 # we want the ancestors, but inclusive
959 # we want the ancestors, but inclusive
961 class lazyset(object):
960 class lazyset(object):
962 def __init__(self, lazyvalues):
961 def __init__(self, lazyvalues):
963 self.addedvalues = set()
962 self.addedvalues = set()
964 self.lazyvalues = lazyvalues
963 self.lazyvalues = lazyvalues
965
964
966 def __contains__(self, value):
965 def __contains__(self, value):
967 return value in self.addedvalues or value in self.lazyvalues
966 return value in self.addedvalues or value in self.lazyvalues
968
967
969 def __iter__(self):
968 def __iter__(self):
970 added = self.addedvalues
969 added = self.addedvalues
971 for r in added:
970 for r in added:
972 yield r
971 yield r
973 for r in self.lazyvalues:
972 for r in self.lazyvalues:
974 if not r in added:
973 if not r in added:
975 yield r
974 yield r
976
975
977 def add(self, value):
976 def add(self, value):
978 self.addedvalues.add(value)
977 self.addedvalues.add(value)
979
978
980 def update(self, values):
979 def update(self, values):
981 self.addedvalues.update(values)
980 self.addedvalues.update(values)
982
981
983 has = lazyset(self.ancestors(common))
982 has = lazyset(self.ancestors(common))
984 has.add(nullrev)
983 has.add(nullrev)
985 has.update(common)
984 has.update(common)
986
985
987 # take all ancestors from heads that aren't in has
986 # take all ancestors from heads that aren't in has
988 missing = set()
987 missing = set()
989 visit = collections.deque(r for r in heads if r not in has)
988 visit = collections.deque(r for r in heads if r not in has)
990 while visit:
989 while visit:
991 r = visit.popleft()
990 r = visit.popleft()
992 if r in missing:
991 if r in missing:
993 continue
992 continue
994 else:
993 else:
995 missing.add(r)
994 missing.add(r)
996 for p in self.parentrevs(r):
995 for p in self.parentrevs(r):
997 if p not in has:
996 if p not in has:
998 visit.append(p)
997 visit.append(p)
999 missing = list(missing)
998 missing = list(missing)
1000 missing.sort()
999 missing.sort()
1001 return has, [self.node(miss) for miss in missing]
1000 return has, [self.node(miss) for miss in missing]
1002
1001
1003 def incrementalmissingrevs(self, common=None):
1002 def incrementalmissingrevs(self, common=None):
1004 """Return an object that can be used to incrementally compute the
1003 """Return an object that can be used to incrementally compute the
1005 revision numbers of the ancestors of arbitrary sets that are not
1004 revision numbers of the ancestors of arbitrary sets that are not
1006 ancestors of common. This is an ancestor.incrementalmissingancestors
1005 ancestors of common. This is an ancestor.incrementalmissingancestors
1007 object.
1006 object.
1008
1007
1009 'common' is a list of revision numbers. If common is not supplied, uses
1008 'common' is a list of revision numbers. If common is not supplied, uses
1010 nullrev.
1009 nullrev.
1011 """
1010 """
1012 if common is None:
1011 if common is None:
1013 common = [nullrev]
1012 common = [nullrev]
1014
1013
1015 if rustancestor is not None:
1014 if rustancestor is not None:
1016 return rustancestor.MissingAncestors(self.index, common)
1015 return rustancestor.MissingAncestors(self.index, common)
1017 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1016 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1018
1017
1019 def findmissingrevs(self, common=None, heads=None):
1018 def findmissingrevs(self, common=None, heads=None):
1020 """Return the revision numbers of the ancestors of heads that
1019 """Return the revision numbers of the ancestors of heads that
1021 are not ancestors of common.
1020 are not ancestors of common.
1022
1021
1023 More specifically, return a list of revision numbers corresponding to
1022 More specifically, return a list of revision numbers corresponding to
1024 nodes N such that every N satisfies the following constraints:
1023 nodes N such that every N satisfies the following constraints:
1025
1024
1026 1. N is an ancestor of some node in 'heads'
1025 1. N is an ancestor of some node in 'heads'
1027 2. N is not an ancestor of any node in 'common'
1026 2. N is not an ancestor of any node in 'common'
1028
1027
1029 The list is sorted by revision number, meaning it is
1028 The list is sorted by revision number, meaning it is
1030 topologically sorted.
1029 topologically sorted.
1031
1030
1032 'heads' and 'common' are both lists of revision numbers. If heads is
1031 'heads' and 'common' are both lists of revision numbers. If heads is
1033 not supplied, uses all of the revlog's heads. If common is not
1032 not supplied, uses all of the revlog's heads. If common is not
1034 supplied, uses nullid."""
1033 supplied, uses nullid."""
1035 if common is None:
1034 if common is None:
1036 common = [nullrev]
1035 common = [nullrev]
1037 if heads is None:
1036 if heads is None:
1038 heads = self.headrevs()
1037 heads = self.headrevs()
1039
1038
1040 inc = self.incrementalmissingrevs(common=common)
1039 inc = self.incrementalmissingrevs(common=common)
1041 return inc.missingancestors(heads)
1040 return inc.missingancestors(heads)
1042
1041
1043 def findmissing(self, common=None, heads=None):
1042 def findmissing(self, common=None, heads=None):
1044 """Return the ancestors of heads that are not ancestors of common.
1043 """Return the ancestors of heads that are not ancestors of common.
1045
1044
1046 More specifically, return a list of nodes N such that every N
1045 More specifically, return a list of nodes N such that every N
1047 satisfies the following constraints:
1046 satisfies the following constraints:
1048
1047
1049 1. N is an ancestor of some node in 'heads'
1048 1. N is an ancestor of some node in 'heads'
1050 2. N is not an ancestor of any node in 'common'
1049 2. N is not an ancestor of any node in 'common'
1051
1050
1052 The list is sorted by revision number, meaning it is
1051 The list is sorted by revision number, meaning it is
1053 topologically sorted.
1052 topologically sorted.
1054
1053
1055 'heads' and 'common' are both lists of node IDs. If heads is
1054 'heads' and 'common' are both lists of node IDs. If heads is
1056 not supplied, uses all of the revlog's heads. If common is not
1055 not supplied, uses all of the revlog's heads. If common is not
1057 supplied, uses nullid."""
1056 supplied, uses nullid."""
1058 if common is None:
1057 if common is None:
1059 common = [self.nullid]
1058 common = [self.nullid]
1060 if heads is None:
1059 if heads is None:
1061 heads = self.heads()
1060 heads = self.heads()
1062
1061
1063 common = [self.rev(n) for n in common]
1062 common = [self.rev(n) for n in common]
1064 heads = [self.rev(n) for n in heads]
1063 heads = [self.rev(n) for n in heads]
1065
1064
1066 inc = self.incrementalmissingrevs(common=common)
1065 inc = self.incrementalmissingrevs(common=common)
1067 return [self.node(r) for r in inc.missingancestors(heads)]
1066 return [self.node(r) for r in inc.missingancestors(heads)]
1068
1067
1069 def nodesbetween(self, roots=None, heads=None):
1068 def nodesbetween(self, roots=None, heads=None):
1070 """Return a topological path from 'roots' to 'heads'.
1069 """Return a topological path from 'roots' to 'heads'.
1071
1070
1072 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1071 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1073 topologically sorted list of all nodes N that satisfy both of
1072 topologically sorted list of all nodes N that satisfy both of
1074 these constraints:
1073 these constraints:
1075
1074
1076 1. N is a descendant of some node in 'roots'
1075 1. N is a descendant of some node in 'roots'
1077 2. N is an ancestor of some node in 'heads'
1076 2. N is an ancestor of some node in 'heads'
1078
1077
1079 Every node is considered to be both a descendant and an ancestor
1078 Every node is considered to be both a descendant and an ancestor
1080 of itself, so every reachable node in 'roots' and 'heads' will be
1079 of itself, so every reachable node in 'roots' and 'heads' will be
1081 included in 'nodes'.
1080 included in 'nodes'.
1082
1081
1083 'outroots' is the list of reachable nodes in 'roots', i.e., the
1082 'outroots' is the list of reachable nodes in 'roots', i.e., the
1084 subset of 'roots' that is returned in 'nodes'. Likewise,
1083 subset of 'roots' that is returned in 'nodes'. Likewise,
1085 'outheads' is the subset of 'heads' that is also in 'nodes'.
1084 'outheads' is the subset of 'heads' that is also in 'nodes'.
1086
1085
1087 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1086 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1088 unspecified, uses nullid as the only root. If 'heads' is
1087 unspecified, uses nullid as the only root. If 'heads' is
1089 unspecified, uses list of all of the revlog's heads."""
1088 unspecified, uses list of all of the revlog's heads."""
1090 nonodes = ([], [], [])
1089 nonodes = ([], [], [])
1091 if roots is not None:
1090 if roots is not None:
1092 roots = list(roots)
1091 roots = list(roots)
1093 if not roots:
1092 if not roots:
1094 return nonodes
1093 return nonodes
1095 lowestrev = min([self.rev(n) for n in roots])
1094 lowestrev = min([self.rev(n) for n in roots])
1096 else:
1095 else:
1097 roots = [self.nullid] # Everybody's a descendant of nullid
1096 roots = [self.nullid] # Everybody's a descendant of nullid
1098 lowestrev = nullrev
1097 lowestrev = nullrev
1099 if (lowestrev == nullrev) and (heads is None):
1098 if (lowestrev == nullrev) and (heads is None):
1100 # We want _all_ the nodes!
1099 # We want _all_ the nodes!
1101 return (
1100 return (
1102 [self.node(r) for r in self],
1101 [self.node(r) for r in self],
1103 [self.nullid],
1102 [self.nullid],
1104 list(self.heads()),
1103 list(self.heads()),
1105 )
1104 )
1106 if heads is None:
1105 if heads is None:
1107 # All nodes are ancestors, so the latest ancestor is the last
1106 # All nodes are ancestors, so the latest ancestor is the last
1108 # node.
1107 # node.
1109 highestrev = len(self) - 1
1108 highestrev = len(self) - 1
1110 # Set ancestors to None to signal that every node is an ancestor.
1109 # Set ancestors to None to signal that every node is an ancestor.
1111 ancestors = None
1110 ancestors = None
1112 # Set heads to an empty dictionary for later discovery of heads
1111 # Set heads to an empty dictionary for later discovery of heads
1113 heads = {}
1112 heads = {}
1114 else:
1113 else:
1115 heads = list(heads)
1114 heads = list(heads)
1116 if not heads:
1115 if not heads:
1117 return nonodes
1116 return nonodes
1118 ancestors = set()
1117 ancestors = set()
1119 # Turn heads into a dictionary so we can remove 'fake' heads.
1118 # Turn heads into a dictionary so we can remove 'fake' heads.
1120 # Also, later we will be using it to filter out the heads we can't
1119 # Also, later we will be using it to filter out the heads we can't
1121 # find from roots.
1120 # find from roots.
1122 heads = dict.fromkeys(heads, False)
1121 heads = dict.fromkeys(heads, False)
1123 # Start at the top and keep marking parents until we're done.
1122 # Start at the top and keep marking parents until we're done.
1124 nodestotag = set(heads)
1123 nodestotag = set(heads)
1125 # Remember where the top was so we can use it as a limit later.
1124 # Remember where the top was so we can use it as a limit later.
1126 highestrev = max([self.rev(n) for n in nodestotag])
1125 highestrev = max([self.rev(n) for n in nodestotag])
1127 while nodestotag:
1126 while nodestotag:
1128 # grab a node to tag
1127 # grab a node to tag
1129 n = nodestotag.pop()
1128 n = nodestotag.pop()
1130 # Never tag nullid
1129 # Never tag nullid
1131 if n == self.nullid:
1130 if n == self.nullid:
1132 continue
1131 continue
1133 # A node's revision number represents its place in a
1132 # A node's revision number represents its place in a
1134 # topologically sorted list of nodes.
1133 # topologically sorted list of nodes.
1135 r = self.rev(n)
1134 r = self.rev(n)
1136 if r >= lowestrev:
1135 if r >= lowestrev:
1137 if n not in ancestors:
1136 if n not in ancestors:
1138 # If we are possibly a descendant of one of the roots
1137 # If we are possibly a descendant of one of the roots
1139 # and we haven't already been marked as an ancestor
1138 # and we haven't already been marked as an ancestor
1140 ancestors.add(n) # Mark as ancestor
1139 ancestors.add(n) # Mark as ancestor
1141 # Add non-nullid parents to list of nodes to tag.
1140 # Add non-nullid parents to list of nodes to tag.
1142 nodestotag.update(
1141 nodestotag.update(
1143 [p for p in self.parents(n) if p != self.nullid]
1142 [p for p in self.parents(n) if p != self.nullid]
1144 )
1143 )
1145 elif n in heads: # We've seen it before, is it a fake head?
1144 elif n in heads: # We've seen it before, is it a fake head?
1146 # So it is, real heads should not be the ancestors of
1145 # So it is, real heads should not be the ancestors of
1147 # any other heads.
1146 # any other heads.
1148 heads.pop(n)
1147 heads.pop(n)
1149 if not ancestors:
1148 if not ancestors:
1150 return nonodes
1149 return nonodes
1151 # Now that we have our set of ancestors, we want to remove any
1150 # Now that we have our set of ancestors, we want to remove any
1152 # roots that are not ancestors.
1151 # roots that are not ancestors.
1153
1152
1154 # If one of the roots was nullid, everything is included anyway.
1153 # If one of the roots was nullid, everything is included anyway.
1155 if lowestrev > nullrev:
1154 if lowestrev > nullrev:
1156 # But, since we weren't, let's recompute the lowest rev to not
1155 # But, since we weren't, let's recompute the lowest rev to not
1157 # include roots that aren't ancestors.
1156 # include roots that aren't ancestors.
1158
1157
1159 # Filter out roots that aren't ancestors of heads
1158 # Filter out roots that aren't ancestors of heads
1160 roots = [root for root in roots if root in ancestors]
1159 roots = [root for root in roots if root in ancestors]
1161 # Recompute the lowest revision
1160 # Recompute the lowest revision
1162 if roots:
1161 if roots:
1163 lowestrev = min([self.rev(root) for root in roots])
1162 lowestrev = min([self.rev(root) for root in roots])
1164 else:
1163 else:
1165 # No more roots? Return empty list
1164 # No more roots? Return empty list
1166 return nonodes
1165 return nonodes
1167 else:
1166 else:
1168 # We are descending from nullid, and don't need to care about
1167 # We are descending from nullid, and don't need to care about
1169 # any other roots.
1168 # any other roots.
1170 lowestrev = nullrev
1169 lowestrev = nullrev
1171 roots = [self.nullid]
1170 roots = [self.nullid]
1172 # Transform our roots list into a set.
1171 # Transform our roots list into a set.
1173 descendants = set(roots)
1172 descendants = set(roots)
1174 # Also, keep the original roots so we can filter out roots that aren't
1173 # Also, keep the original roots so we can filter out roots that aren't
1175 # 'real' roots (i.e. are descended from other roots).
1174 # 'real' roots (i.e. are descended from other roots).
1176 roots = descendants.copy()
1175 roots = descendants.copy()
1177 # Our topologically sorted list of output nodes.
1176 # Our topologically sorted list of output nodes.
1178 orderedout = []
1177 orderedout = []
1179 # Don't start at nullid since we don't want nullid in our output list,
1178 # Don't start at nullid since we don't want nullid in our output list,
1180 # and if nullid shows up in descendants, empty parents will look like
1179 # and if nullid shows up in descendants, empty parents will look like
1181 # they're descendants.
1180 # they're descendants.
1182 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1181 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1183 n = self.node(r)
1182 n = self.node(r)
1184 isdescendant = False
1183 isdescendant = False
1185 if lowestrev == nullrev: # Everybody is a descendant of nullid
1184 if lowestrev == nullrev: # Everybody is a descendant of nullid
1186 isdescendant = True
1185 isdescendant = True
1187 elif n in descendants:
1186 elif n in descendants:
1188 # n is already a descendant
1187 # n is already a descendant
1189 isdescendant = True
1188 isdescendant = True
1190 # This check only needs to be done here because all the roots
1189 # This check only needs to be done here because all the roots
1191 # will start being marked is descendants before the loop.
1190 # will start being marked is descendants before the loop.
1192 if n in roots:
1191 if n in roots:
1193 # If n was a root, check if it's a 'real' root.
1192 # If n was a root, check if it's a 'real' root.
1194 p = tuple(self.parents(n))
1193 p = tuple(self.parents(n))
1195 # If any of its parents are descendants, it's not a root.
1194 # If any of its parents are descendants, it's not a root.
1196 if (p[0] in descendants) or (p[1] in descendants):
1195 if (p[0] in descendants) or (p[1] in descendants):
1197 roots.remove(n)
1196 roots.remove(n)
1198 else:
1197 else:
1199 p = tuple(self.parents(n))
1198 p = tuple(self.parents(n))
1200 # A node is a descendant if either of its parents are
1199 # A node is a descendant if either of its parents are
1201 # descendants. (We seeded the dependents list with the roots
1200 # descendants. (We seeded the dependents list with the roots
1202 # up there, remember?)
1201 # up there, remember?)
1203 if (p[0] in descendants) or (p[1] in descendants):
1202 if (p[0] in descendants) or (p[1] in descendants):
1204 descendants.add(n)
1203 descendants.add(n)
1205 isdescendant = True
1204 isdescendant = True
1206 if isdescendant and ((ancestors is None) or (n in ancestors)):
1205 if isdescendant and ((ancestors is None) or (n in ancestors)):
1207 # Only include nodes that are both descendants and ancestors.
1206 # Only include nodes that are both descendants and ancestors.
1208 orderedout.append(n)
1207 orderedout.append(n)
1209 if (ancestors is not None) and (n in heads):
1208 if (ancestors is not None) and (n in heads):
1210 # We're trying to figure out which heads are reachable
1209 # We're trying to figure out which heads are reachable
1211 # from roots.
1210 # from roots.
1212 # Mark this head as having been reached
1211 # Mark this head as having been reached
1213 heads[n] = True
1212 heads[n] = True
1214 elif ancestors is None:
1213 elif ancestors is None:
1215 # Otherwise, we're trying to discover the heads.
1214 # Otherwise, we're trying to discover the heads.
1216 # Assume this is a head because if it isn't, the next step
1215 # Assume this is a head because if it isn't, the next step
1217 # will eventually remove it.
1216 # will eventually remove it.
1218 heads[n] = True
1217 heads[n] = True
1219 # But, obviously its parents aren't.
1218 # But, obviously its parents aren't.
1220 for p in self.parents(n):
1219 for p in self.parents(n):
1221 heads.pop(p, None)
1220 heads.pop(p, None)
1222 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1221 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1223 roots = list(roots)
1222 roots = list(roots)
1224 assert orderedout
1223 assert orderedout
1225 assert roots
1224 assert roots
1226 assert heads
1225 assert heads
1227 return (orderedout, roots, heads)
1226 return (orderedout, roots, heads)
1228
1227
1229 def headrevs(self, revs=None):
1228 def headrevs(self, revs=None):
1230 if revs is None:
1229 if revs is None:
1231 try:
1230 try:
1232 return self.index.headrevs()
1231 return self.index.headrevs()
1233 except AttributeError:
1232 except AttributeError:
1234 return self._headrevs()
1233 return self._headrevs()
1235 if rustdagop is not None:
1234 if rustdagop is not None:
1236 return rustdagop.headrevs(self.index, revs)
1235 return rustdagop.headrevs(self.index, revs)
1237 return dagop.headrevs(revs, self._uncheckedparentrevs)
1236 return dagop.headrevs(revs, self._uncheckedparentrevs)
1238
1237
1239 def computephases(self, roots):
1238 def computephases(self, roots):
1240 return self.index.computephasesmapsets(roots)
1239 return self.index.computephasesmapsets(roots)
1241
1240
1242 def _headrevs(self):
1241 def _headrevs(self):
1243 count = len(self)
1242 count = len(self)
1244 if not count:
1243 if not count:
1245 return [nullrev]
1244 return [nullrev]
1246 # we won't iter over filtered rev so nobody is a head at start
1245 # we won't iter over filtered rev so nobody is a head at start
1247 ishead = [0] * (count + 1)
1246 ishead = [0] * (count + 1)
1248 index = self.index
1247 index = self.index
1249 for r in self:
1248 for r in self:
1250 ishead[r] = 1 # I may be an head
1249 ishead[r] = 1 # I may be an head
1251 e = index[r]
1250 e = index[r]
1252 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1251 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1253 return [r for r, val in enumerate(ishead) if val]
1252 return [r for r, val in enumerate(ishead) if val]
1254
1253
1255 def heads(self, start=None, stop=None):
1254 def heads(self, start=None, stop=None):
1256 """return the list of all nodes that have no children
1255 """return the list of all nodes that have no children
1257
1256
1258 if start is specified, only heads that are descendants of
1257 if start is specified, only heads that are descendants of
1259 start will be returned
1258 start will be returned
1260 if stop is specified, it will consider all the revs from stop
1259 if stop is specified, it will consider all the revs from stop
1261 as if they had no children
1260 as if they had no children
1262 """
1261 """
1263 if start is None and stop is None:
1262 if start is None and stop is None:
1264 if not len(self):
1263 if not len(self):
1265 return [self.nullid]
1264 return [self.nullid]
1266 return [self.node(r) for r in self.headrevs()]
1265 return [self.node(r) for r in self.headrevs()]
1267
1266
1268 if start is None:
1267 if start is None:
1269 start = nullrev
1268 start = nullrev
1270 else:
1269 else:
1271 start = self.rev(start)
1270 start = self.rev(start)
1272
1271
1273 stoprevs = {self.rev(n) for n in stop or []}
1272 stoprevs = {self.rev(n) for n in stop or []}
1274
1273
1275 revs = dagop.headrevssubset(
1274 revs = dagop.headrevssubset(
1276 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1275 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1277 )
1276 )
1278
1277
1279 return [self.node(rev) for rev in revs]
1278 return [self.node(rev) for rev in revs]
1280
1279
1281 def children(self, node):
1280 def children(self, node):
1282 """find the children of a given node"""
1281 """find the children of a given node"""
1283 c = []
1282 c = []
1284 p = self.rev(node)
1283 p = self.rev(node)
1285 for r in self.revs(start=p + 1):
1284 for r in self.revs(start=p + 1):
1286 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1285 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1287 if prevs:
1286 if prevs:
1288 for pr in prevs:
1287 for pr in prevs:
1289 if pr == p:
1288 if pr == p:
1290 c.append(self.node(r))
1289 c.append(self.node(r))
1291 elif p == nullrev:
1290 elif p == nullrev:
1292 c.append(self.node(r))
1291 c.append(self.node(r))
1293 return c
1292 return c
1294
1293
1295 def commonancestorsheads(self, a, b):
1294 def commonancestorsheads(self, a, b):
1296 """calculate all the heads of the common ancestors of nodes a and b"""
1295 """calculate all the heads of the common ancestors of nodes a and b"""
1297 a, b = self.rev(a), self.rev(b)
1296 a, b = self.rev(a), self.rev(b)
1298 ancs = self._commonancestorsheads(a, b)
1297 ancs = self._commonancestorsheads(a, b)
1299 return pycompat.maplist(self.node, ancs)
1298 return pycompat.maplist(self.node, ancs)
1300
1299
1301 def _commonancestorsheads(self, *revs):
1300 def _commonancestorsheads(self, *revs):
1302 """calculate all the heads of the common ancestors of revs"""
1301 """calculate all the heads of the common ancestors of revs"""
1303 try:
1302 try:
1304 ancs = self.index.commonancestorsheads(*revs)
1303 ancs = self.index.commonancestorsheads(*revs)
1305 except (AttributeError, OverflowError): # C implementation failed
1304 except (AttributeError, OverflowError): # C implementation failed
1306 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1305 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1307 return ancs
1306 return ancs
1308
1307
1309 def isancestor(self, a, b):
1308 def isancestor(self, a, b):
1310 """return True if node a is an ancestor of node b
1309 """return True if node a is an ancestor of node b
1311
1310
1312 A revision is considered an ancestor of itself."""
1311 A revision is considered an ancestor of itself."""
1313 a, b = self.rev(a), self.rev(b)
1312 a, b = self.rev(a), self.rev(b)
1314 return self.isancestorrev(a, b)
1313 return self.isancestorrev(a, b)
1315
1314
1316 def isancestorrev(self, a, b):
1315 def isancestorrev(self, a, b):
1317 """return True if revision a is an ancestor of revision b
1316 """return True if revision a is an ancestor of revision b
1318
1317
1319 A revision is considered an ancestor of itself.
1318 A revision is considered an ancestor of itself.
1320
1319
1321 The implementation of this is trivial but the use of
1320 The implementation of this is trivial but the use of
1322 reachableroots is not."""
1321 reachableroots is not."""
1323 if a == nullrev:
1322 if a == nullrev:
1324 return True
1323 return True
1325 elif a == b:
1324 elif a == b:
1326 return True
1325 return True
1327 elif a > b:
1326 elif a > b:
1328 return False
1327 return False
1329 return bool(self.reachableroots(a, [b], [a], includepath=False))
1328 return bool(self.reachableroots(a, [b], [a], includepath=False))
1330
1329
1331 def reachableroots(self, minroot, heads, roots, includepath=False):
1330 def reachableroots(self, minroot, heads, roots, includepath=False):
1332 """return (heads(::(<roots> and <roots>::<heads>)))
1331 """return (heads(::(<roots> and <roots>::<heads>)))
1333
1332
1334 If includepath is True, return (<roots>::<heads>)."""
1333 If includepath is True, return (<roots>::<heads>)."""
1335 try:
1334 try:
1336 return self.index.reachableroots2(
1335 return self.index.reachableroots2(
1337 minroot, heads, roots, includepath
1336 minroot, heads, roots, includepath
1338 )
1337 )
1339 except AttributeError:
1338 except AttributeError:
1340 return dagop._reachablerootspure(
1339 return dagop._reachablerootspure(
1341 self.parentrevs, minroot, roots, heads, includepath
1340 self.parentrevs, minroot, roots, heads, includepath
1342 )
1341 )
1343
1342
1344 def ancestor(self, a, b):
1343 def ancestor(self, a, b):
1345 """calculate the "best" common ancestor of nodes a and b"""
1344 """calculate the "best" common ancestor of nodes a and b"""
1346
1345
1347 a, b = self.rev(a), self.rev(b)
1346 a, b = self.rev(a), self.rev(b)
1348 try:
1347 try:
1349 ancs = self.index.ancestors(a, b)
1348 ancs = self.index.ancestors(a, b)
1350 except (AttributeError, OverflowError):
1349 except (AttributeError, OverflowError):
1351 ancs = ancestor.ancestors(self.parentrevs, a, b)
1350 ancs = ancestor.ancestors(self.parentrevs, a, b)
1352 if ancs:
1351 if ancs:
1353 # choose a consistent winner when there's a tie
1352 # choose a consistent winner when there's a tie
1354 return min(map(self.node, ancs))
1353 return min(map(self.node, ancs))
1355 return self.nullid
1354 return self.nullid
1356
1355
1357 def _match(self, id):
1356 def _match(self, id):
1358 if isinstance(id, int):
1357 if isinstance(id, int):
1359 # rev
1358 # rev
1360 return self.node(id)
1359 return self.node(id)
1361 if len(id) == self.nodeconstants.nodelen:
1360 if len(id) == self.nodeconstants.nodelen:
1362 # possibly a binary node
1361 # possibly a binary node
1363 # odds of a binary node being all hex in ASCII are 1 in 10**25
1362 # odds of a binary node being all hex in ASCII are 1 in 10**25
1364 try:
1363 try:
1365 node = id
1364 node = id
1366 self.rev(node) # quick search the index
1365 self.rev(node) # quick search the index
1367 return node
1366 return node
1368 except error.LookupError:
1367 except error.LookupError:
1369 pass # may be partial hex id
1368 pass # may be partial hex id
1370 try:
1369 try:
1371 # str(rev)
1370 # str(rev)
1372 rev = int(id)
1371 rev = int(id)
1373 if b"%d" % rev != id:
1372 if b"%d" % rev != id:
1374 raise ValueError
1373 raise ValueError
1375 if rev < 0:
1374 if rev < 0:
1376 rev = len(self) + rev
1375 rev = len(self) + rev
1377 if rev < 0 or rev >= len(self):
1376 if rev < 0 or rev >= len(self):
1378 raise ValueError
1377 raise ValueError
1379 return self.node(rev)
1378 return self.node(rev)
1380 except (ValueError, OverflowError):
1379 except (ValueError, OverflowError):
1381 pass
1380 pass
1382 if len(id) == 2 * self.nodeconstants.nodelen:
1381 if len(id) == 2 * self.nodeconstants.nodelen:
1383 try:
1382 try:
1384 # a full hex nodeid?
1383 # a full hex nodeid?
1385 node = bin(id)
1384 node = bin(id)
1386 self.rev(node)
1385 self.rev(node)
1387 return node
1386 return node
1388 except (TypeError, error.LookupError):
1387 except (TypeError, error.LookupError):
1389 pass
1388 pass
1390
1389
1391 def _partialmatch(self, id):
1390 def _partialmatch(self, id):
1392 # we don't care wdirfilenodeids as they should be always full hash
1391 # we don't care wdirfilenodeids as they should be always full hash
1393 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1392 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1394 try:
1393 try:
1395 partial = self.index.partialmatch(id)
1394 partial = self.index.partialmatch(id)
1396 if partial and self.hasnode(partial):
1395 if partial and self.hasnode(partial):
1397 if maybewdir:
1396 if maybewdir:
1398 # single 'ff...' match in radix tree, ambiguous with wdir
1397 # single 'ff...' match in radix tree, ambiguous with wdir
1399 raise error.RevlogError
1398 raise error.RevlogError
1400 return partial
1399 return partial
1401 if maybewdir:
1400 if maybewdir:
1402 # no 'ff...' match in radix tree, wdir identified
1401 # no 'ff...' match in radix tree, wdir identified
1403 raise error.WdirUnsupported
1402 raise error.WdirUnsupported
1404 return None
1403 return None
1405 except error.RevlogError:
1404 except error.RevlogError:
1406 # parsers.c radix tree lookup gave multiple matches
1405 # parsers.c radix tree lookup gave multiple matches
1407 # fast path: for unfiltered changelog, radix tree is accurate
1406 # fast path: for unfiltered changelog, radix tree is accurate
1408 if not getattr(self, 'filteredrevs', None):
1407 if not getattr(self, 'filteredrevs', None):
1409 raise error.AmbiguousPrefixLookupError(
1408 raise error.AmbiguousPrefixLookupError(
1410 id, self.display_id, _(b'ambiguous identifier')
1409 id, self.display_id, _(b'ambiguous identifier')
1411 )
1410 )
1412 # fall through to slow path that filters hidden revisions
1411 # fall through to slow path that filters hidden revisions
1413 except (AttributeError, ValueError):
1412 except (AttributeError, ValueError):
1414 # we are pure python, or key was too short to search radix tree
1413 # we are pure python, or key was too short to search radix tree
1415 pass
1414 pass
1416
1415
1417 if id in self._pcache:
1416 if id in self._pcache:
1418 return self._pcache[id]
1417 return self._pcache[id]
1419
1418
1420 if len(id) <= 40:
1419 if len(id) <= 40:
1421 try:
1420 try:
1422 # hex(node)[:...]
1421 # hex(node)[:...]
1423 l = len(id) // 2 # grab an even number of digits
1422 l = len(id) // 2 # grab an even number of digits
1424 prefix = bin(id[: l * 2])
1423 prefix = bin(id[: l * 2])
1425 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1424 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1426 nl = [
1425 nl = [
1427 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1426 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1428 ]
1427 ]
1429 if self.nodeconstants.nullhex.startswith(id):
1428 if self.nodeconstants.nullhex.startswith(id):
1430 nl.append(self.nullid)
1429 nl.append(self.nullid)
1431 if len(nl) > 0:
1430 if len(nl) > 0:
1432 if len(nl) == 1 and not maybewdir:
1431 if len(nl) == 1 and not maybewdir:
1433 self._pcache[id] = nl[0]
1432 self._pcache[id] = nl[0]
1434 return nl[0]
1433 return nl[0]
1435 raise error.AmbiguousPrefixLookupError(
1434 raise error.AmbiguousPrefixLookupError(
1436 id, self.display_id, _(b'ambiguous identifier')
1435 id, self.display_id, _(b'ambiguous identifier')
1437 )
1436 )
1438 if maybewdir:
1437 if maybewdir:
1439 raise error.WdirUnsupported
1438 raise error.WdirUnsupported
1440 return None
1439 return None
1441 except TypeError:
1440 except TypeError:
1442 pass
1441 pass
1443
1442
1444 def lookup(self, id):
1443 def lookup(self, id):
1445 """locate a node based on:
1444 """locate a node based on:
1446 - revision number or str(revision number)
1445 - revision number or str(revision number)
1447 - nodeid or subset of hex nodeid
1446 - nodeid or subset of hex nodeid
1448 """
1447 """
1449 n = self._match(id)
1448 n = self._match(id)
1450 if n is not None:
1449 if n is not None:
1451 return n
1450 return n
1452 n = self._partialmatch(id)
1451 n = self._partialmatch(id)
1453 if n:
1452 if n:
1454 return n
1453 return n
1455
1454
1456 raise error.LookupError(id, self.display_id, _(b'no match found'))
1455 raise error.LookupError(id, self.display_id, _(b'no match found'))
1457
1456
1458 def shortest(self, node, minlength=1):
1457 def shortest(self, node, minlength=1):
1459 """Find the shortest unambiguous prefix that matches node."""
1458 """Find the shortest unambiguous prefix that matches node."""
1460
1459
1461 def isvalid(prefix):
1460 def isvalid(prefix):
1462 try:
1461 try:
1463 matchednode = self._partialmatch(prefix)
1462 matchednode = self._partialmatch(prefix)
1464 except error.AmbiguousPrefixLookupError:
1463 except error.AmbiguousPrefixLookupError:
1465 return False
1464 return False
1466 except error.WdirUnsupported:
1465 except error.WdirUnsupported:
1467 # single 'ff...' match
1466 # single 'ff...' match
1468 return True
1467 return True
1469 if matchednode is None:
1468 if matchednode is None:
1470 raise error.LookupError(node, self.display_id, _(b'no node'))
1469 raise error.LookupError(node, self.display_id, _(b'no node'))
1471 return True
1470 return True
1472
1471
1473 def maybewdir(prefix):
1472 def maybewdir(prefix):
1474 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1473 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1475
1474
1476 hexnode = hex(node)
1475 hexnode = hex(node)
1477
1476
1478 def disambiguate(hexnode, minlength):
1477 def disambiguate(hexnode, minlength):
1479 """Disambiguate against wdirid."""
1478 """Disambiguate against wdirid."""
1480 for length in range(minlength, len(hexnode) + 1):
1479 for length in range(minlength, len(hexnode) + 1):
1481 prefix = hexnode[:length]
1480 prefix = hexnode[:length]
1482 if not maybewdir(prefix):
1481 if not maybewdir(prefix):
1483 return prefix
1482 return prefix
1484
1483
1485 if not getattr(self, 'filteredrevs', None):
1484 if not getattr(self, 'filteredrevs', None):
1486 try:
1485 try:
1487 length = max(self.index.shortest(node), minlength)
1486 length = max(self.index.shortest(node), minlength)
1488 return disambiguate(hexnode, length)
1487 return disambiguate(hexnode, length)
1489 except error.RevlogError:
1488 except error.RevlogError:
1490 if node != self.nodeconstants.wdirid:
1489 if node != self.nodeconstants.wdirid:
1491 raise error.LookupError(
1490 raise error.LookupError(
1492 node, self.display_id, _(b'no node')
1491 node, self.display_id, _(b'no node')
1493 )
1492 )
1494 except AttributeError:
1493 except AttributeError:
1495 # Fall through to pure code
1494 # Fall through to pure code
1496 pass
1495 pass
1497
1496
1498 if node == self.nodeconstants.wdirid:
1497 if node == self.nodeconstants.wdirid:
1499 for length in range(minlength, len(hexnode) + 1):
1498 for length in range(minlength, len(hexnode) + 1):
1500 prefix = hexnode[:length]
1499 prefix = hexnode[:length]
1501 if isvalid(prefix):
1500 if isvalid(prefix):
1502 return prefix
1501 return prefix
1503
1502
1504 for length in range(minlength, len(hexnode) + 1):
1503 for length in range(minlength, len(hexnode) + 1):
1505 prefix = hexnode[:length]
1504 prefix = hexnode[:length]
1506 if isvalid(prefix):
1505 if isvalid(prefix):
1507 return disambiguate(hexnode, length)
1506 return disambiguate(hexnode, length)
1508
1507
1509 def cmp(self, node, text):
1508 def cmp(self, node, text):
1510 """compare text with a given file revision
1509 """compare text with a given file revision
1511
1510
1512 returns True if text is different than what is stored.
1511 returns True if text is different than what is stored.
1513 """
1512 """
1514 p1, p2 = self.parents(node)
1513 p1, p2 = self.parents(node)
1515 return storageutil.hashrevisionsha1(text, p1, p2) != node
1514 return storageutil.hashrevisionsha1(text, p1, p2) != node
1516
1515
1517 def _cachesegment(self, offset, data):
1516 def _cachesegment(self, offset, data):
1518 """Add a segment to the revlog cache.
1517 """Add a segment to the revlog cache.
1519
1518
1520 Accepts an absolute offset and the data that is at that location.
1519 Accepts an absolute offset and the data that is at that location.
1521 """
1520 """
1522 o, d = self._chunkcache
1521 o, d = self._chunkcache
1523 # try to add to existing cache
1522 # try to add to existing cache
1524 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1523 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1525 self._chunkcache = o, d + data
1524 self._chunkcache = o, d + data
1526 else:
1525 else:
1527 self._chunkcache = offset, data
1526 self._chunkcache = offset, data
1528
1527
1529 def _readsegment(self, offset, length, df=None):
1528 def _readsegment(self, offset, length, df=None):
1530 """Load a segment of raw data from the revlog.
1529 """Load a segment of raw data from the revlog.
1531
1530
1532 Accepts an absolute offset, length to read, and an optional existing
1531 Accepts an absolute offset, length to read, and an optional existing
1533 file handle to read from.
1532 file handle to read from.
1534
1533
1535 If an existing file handle is passed, it will be seeked and the
1534 If an existing file handle is passed, it will be seeked and the
1536 original seek position will NOT be restored.
1535 original seek position will NOT be restored.
1537
1536
1538 Returns a str or buffer of raw byte data.
1537 Returns a str or buffer of raw byte data.
1539
1538
1540 Raises if the requested number of bytes could not be read.
1539 Raises if the requested number of bytes could not be read.
1541 """
1540 """
1542 # Cache data both forward and backward around the requested
1541 # Cache data both forward and backward around the requested
1543 # data, in a fixed size window. This helps speed up operations
1542 # data, in a fixed size window. This helps speed up operations
1544 # involving reading the revlog backwards.
1543 # involving reading the revlog backwards.
1545 cachesize = self._chunkcachesize
1544 cachesize = self._chunkcachesize
1546 realoffset = offset & ~(cachesize - 1)
1545 realoffset = offset & ~(cachesize - 1)
1547 reallength = (
1546 reallength = (
1548 (offset + length + cachesize) & ~(cachesize - 1)
1547 (offset + length + cachesize) & ~(cachesize - 1)
1549 ) - realoffset
1548 ) - realoffset
1550 with self._datareadfp(df) as df:
1549 with self._datareadfp(df) as df:
1551 df.seek(realoffset)
1550 df.seek(realoffset)
1552 d = df.read(reallength)
1551 d = df.read(reallength)
1553
1552
1554 self._cachesegment(realoffset, d)
1553 self._cachesegment(realoffset, d)
1555 if offset != realoffset or reallength != length:
1554 if offset != realoffset or reallength != length:
1556 startoffset = offset - realoffset
1555 startoffset = offset - realoffset
1557 if len(d) - startoffset < length:
1556 if len(d) - startoffset < length:
1558 raise error.RevlogError(
1557 raise error.RevlogError(
1559 _(
1558 _(
1560 b'partial read of revlog %s; expected %d bytes from '
1559 b'partial read of revlog %s; expected %d bytes from '
1561 b'offset %d, got %d'
1560 b'offset %d, got %d'
1562 )
1561 )
1563 % (
1562 % (
1564 self._indexfile if self._inline else self._datafile,
1563 self._indexfile if self._inline else self._datafile,
1565 length,
1564 length,
1566 offset,
1565 offset,
1567 len(d) - startoffset,
1566 len(d) - startoffset,
1568 )
1567 )
1569 )
1568 )
1570
1569
1571 return util.buffer(d, startoffset, length)
1570 return util.buffer(d, startoffset, length)
1572
1571
1573 if len(d) < length:
1572 if len(d) < length:
1574 raise error.RevlogError(
1573 raise error.RevlogError(
1575 _(
1574 _(
1576 b'partial read of revlog %s; expected %d bytes from offset '
1575 b'partial read of revlog %s; expected %d bytes from offset '
1577 b'%d, got %d'
1576 b'%d, got %d'
1578 )
1577 )
1579 % (
1578 % (
1580 self._indexfile if self._inline else self._datafile,
1579 self._indexfile if self._inline else self._datafile,
1581 length,
1580 length,
1582 offset,
1581 offset,
1583 len(d),
1582 len(d),
1584 )
1583 )
1585 )
1584 )
1586
1585
1587 return d
1586 return d
1588
1587
1589 def _getsegment(self, offset, length, df=None):
1588 def _getsegment(self, offset, length, df=None):
1590 """Obtain a segment of raw data from the revlog.
1589 """Obtain a segment of raw data from the revlog.
1591
1590
1592 Accepts an absolute offset, length of bytes to obtain, and an
1591 Accepts an absolute offset, length of bytes to obtain, and an
1593 optional file handle to the already-opened revlog. If the file
1592 optional file handle to the already-opened revlog. If the file
1594 handle is used, it's original seek position will not be preserved.
1593 handle is used, it's original seek position will not be preserved.
1595
1594
1596 Requests for data may be returned from a cache.
1595 Requests for data may be returned from a cache.
1597
1596
1598 Returns a str or a buffer instance of raw byte data.
1597 Returns a str or a buffer instance of raw byte data.
1599 """
1598 """
1600 o, d = self._chunkcache
1599 o, d = self._chunkcache
1601 l = len(d)
1600 l = len(d)
1602
1601
1603 # is it in the cache?
1602 # is it in the cache?
1604 cachestart = offset - o
1603 cachestart = offset - o
1605 cacheend = cachestart + length
1604 cacheend = cachestart + length
1606 if cachestart >= 0 and cacheend <= l:
1605 if cachestart >= 0 and cacheend <= l:
1607 if cachestart == 0 and cacheend == l:
1606 if cachestart == 0 and cacheend == l:
1608 return d # avoid a copy
1607 return d # avoid a copy
1609 return util.buffer(d, cachestart, cacheend - cachestart)
1608 return util.buffer(d, cachestart, cacheend - cachestart)
1610
1609
1611 return self._readsegment(offset, length, df=df)
1610 return self._readsegment(offset, length, df=df)
1612
1611
1613 def _getsegmentforrevs(self, startrev, endrev, df=None):
1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1614 """Obtain a segment of raw data corresponding to a range of revisions.
1613 """Obtain a segment of raw data corresponding to a range of revisions.
1615
1614
1616 Accepts the start and end revisions and an optional already-open
1615 Accepts the start and end revisions and an optional already-open
1617 file handle to be used for reading. If the file handle is read, its
1616 file handle to be used for reading. If the file handle is read, its
1618 seek position will not be preserved.
1617 seek position will not be preserved.
1619
1618
1620 Requests for data may be satisfied by a cache.
1619 Requests for data may be satisfied by a cache.
1621
1620
1622 Returns a 2-tuple of (offset, data) for the requested range of
1621 Returns a 2-tuple of (offset, data) for the requested range of
1623 revisions. Offset is the integer offset from the beginning of the
1622 revisions. Offset is the integer offset from the beginning of the
1624 revlog and data is a str or buffer of the raw byte data.
1623 revlog and data is a str or buffer of the raw byte data.
1625
1624
1626 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1627 to determine where each revision's data begins and ends.
1626 to determine where each revision's data begins and ends.
1628 """
1627 """
1629 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1630 # (functions are expensive).
1629 # (functions are expensive).
1631 index = self.index
1630 index = self.index
1632 istart = index[startrev]
1631 istart = index[startrev]
1633 start = int(istart[0] >> 16)
1632 start = int(istart[0] >> 16)
1634 if startrev == endrev:
1633 if startrev == endrev:
1635 end = start + istart[1]
1634 end = start + istart[1]
1636 else:
1635 else:
1637 iend = index[endrev]
1636 iend = index[endrev]
1638 end = int(iend[0] >> 16) + iend[1]
1637 end = int(iend[0] >> 16) + iend[1]
1639
1638
1640 if self._inline:
1639 if self._inline:
1641 start += (startrev + 1) * self.index.entry_size
1640 start += (startrev + 1) * self.index.entry_size
1642 end += (endrev + 1) * self.index.entry_size
1641 end += (endrev + 1) * self.index.entry_size
1643 length = end - start
1642 length = end - start
1644
1643
1645 return start, self._getsegment(start, length, df=df)
1644 return start, self._getsegment(start, length, df=df)
1646
1645
1647 def _chunk(self, rev, df=None):
1646 def _chunk(self, rev, df=None):
1648 """Obtain a single decompressed chunk for a revision.
1647 """Obtain a single decompressed chunk for a revision.
1649
1648
1650 Accepts an integer revision and an optional already-open file handle
1649 Accepts an integer revision and an optional already-open file handle
1651 to be used for reading. If used, the seek position of the file will not
1650 to be used for reading. If used, the seek position of the file will not
1652 be preserved.
1651 be preserved.
1653
1652
1654 Returns a str holding uncompressed data for the requested revision.
1653 Returns a str holding uncompressed data for the requested revision.
1655 """
1654 """
1656 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1655 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1657
1656
1658 def _chunks(self, revs, df=None, targetsize=None):
1657 def _chunks(self, revs, df=None, targetsize=None):
1659 """Obtain decompressed chunks for the specified revisions.
1658 """Obtain decompressed chunks for the specified revisions.
1660
1659
1661 Accepts an iterable of numeric revisions that are assumed to be in
1660 Accepts an iterable of numeric revisions that are assumed to be in
1662 ascending order. Also accepts an optional already-open file handle
1661 ascending order. Also accepts an optional already-open file handle
1663 to be used for reading. If used, the seek position of the file will
1662 to be used for reading. If used, the seek position of the file will
1664 not be preserved.
1663 not be preserved.
1665
1664
1666 This function is similar to calling ``self._chunk()`` multiple times,
1665 This function is similar to calling ``self._chunk()`` multiple times,
1667 but is faster.
1666 but is faster.
1668
1667
1669 Returns a list with decompressed data for each requested revision.
1668 Returns a list with decompressed data for each requested revision.
1670 """
1669 """
1671 if not revs:
1670 if not revs:
1672 return []
1671 return []
1673 start = self.start
1672 start = self.start
1674 length = self.length
1673 length = self.length
1675 inline = self._inline
1674 inline = self._inline
1676 iosize = self.index.entry_size
1675 iosize = self.index.entry_size
1677 buffer = util.buffer
1676 buffer = util.buffer
1678
1677
1679 l = []
1678 l = []
1680 ladd = l.append
1679 ladd = l.append
1681
1680
1682 if not self._withsparseread:
1681 if not self._withsparseread:
1683 slicedchunks = (revs,)
1682 slicedchunks = (revs,)
1684 else:
1683 else:
1685 slicedchunks = deltautil.slicechunk(
1684 slicedchunks = deltautil.slicechunk(
1686 self, revs, targetsize=targetsize
1685 self, revs, targetsize=targetsize
1687 )
1686 )
1688
1687
1689 for revschunk in slicedchunks:
1688 for revschunk in slicedchunks:
1690 firstrev = revschunk[0]
1689 firstrev = revschunk[0]
1691 # Skip trailing revisions with empty diff
1690 # Skip trailing revisions with empty diff
1692 for lastrev in revschunk[::-1]:
1691 for lastrev in revschunk[::-1]:
1693 if length(lastrev) != 0:
1692 if length(lastrev) != 0:
1694 break
1693 break
1695
1694
1696 try:
1695 try:
1697 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1696 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1698 except OverflowError:
1697 except OverflowError:
1699 # issue4215 - we can't cache a run of chunks greater than
1698 # issue4215 - we can't cache a run of chunks greater than
1700 # 2G on Windows
1699 # 2G on Windows
1701 return [self._chunk(rev, df=df) for rev in revschunk]
1700 return [self._chunk(rev, df=df) for rev in revschunk]
1702
1701
1703 decomp = self.decompress
1702 decomp = self.decompress
1704 for rev in revschunk:
1703 for rev in revschunk:
1705 chunkstart = start(rev)
1704 chunkstart = start(rev)
1706 if inline:
1705 if inline:
1707 chunkstart += (rev + 1) * iosize
1706 chunkstart += (rev + 1) * iosize
1708 chunklength = length(rev)
1707 chunklength = length(rev)
1709 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1708 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1710
1709
1711 return l
1710 return l
1712
1711
1713 def _chunkclear(self):
1712 def _chunkclear(self):
1714 """Clear the raw chunk cache."""
1713 """Clear the raw chunk cache."""
1715 self._chunkcache = (0, b'')
1714 self._chunkcache = (0, b'')
1716
1715
1717 def deltaparent(self, rev):
1716 def deltaparent(self, rev):
1718 """return deltaparent of the given revision"""
1717 """return deltaparent of the given revision"""
1719 base = self.index[rev][3]
1718 base = self.index[rev][3]
1720 if base == rev:
1719 if base == rev:
1721 return nullrev
1720 return nullrev
1722 elif self._generaldelta:
1721 elif self._generaldelta:
1723 return base
1722 return base
1724 else:
1723 else:
1725 return rev - 1
1724 return rev - 1
1726
1725
1727 def issnapshot(self, rev):
1726 def issnapshot(self, rev):
1728 """tells whether rev is a snapshot"""
1727 """tells whether rev is a snapshot"""
1729 if not self._sparserevlog:
1728 if not self._sparserevlog:
1730 return self.deltaparent(rev) == nullrev
1729 return self.deltaparent(rev) == nullrev
1731 elif util.safehasattr(self.index, b'issnapshot'):
1730 elif util.safehasattr(self.index, b'issnapshot'):
1732 # directly assign the method to cache the testing and access
1731 # directly assign the method to cache the testing and access
1733 self.issnapshot = self.index.issnapshot
1732 self.issnapshot = self.index.issnapshot
1734 return self.issnapshot(rev)
1733 return self.issnapshot(rev)
1735 if rev == nullrev:
1734 if rev == nullrev:
1736 return True
1735 return True
1737 entry = self.index[rev]
1736 entry = self.index[rev]
1738 base = entry[3]
1737 base = entry[3]
1739 if base == rev:
1738 if base == rev:
1740 return True
1739 return True
1741 if base == nullrev:
1740 if base == nullrev:
1742 return True
1741 return True
1743 p1 = entry[5]
1742 p1 = entry[5]
1744 p2 = entry[6]
1743 p2 = entry[6]
1745 if base == p1 or base == p2:
1744 if base == p1 or base == p2:
1746 return False
1745 return False
1747 return self.issnapshot(base)
1746 return self.issnapshot(base)
1748
1747
1749 def snapshotdepth(self, rev):
1748 def snapshotdepth(self, rev):
1750 """number of snapshot in the chain before this one"""
1749 """number of snapshot in the chain before this one"""
1751 if not self.issnapshot(rev):
1750 if not self.issnapshot(rev):
1752 raise error.ProgrammingError(b'revision %d not a snapshot')
1751 raise error.ProgrammingError(b'revision %d not a snapshot')
1753 return len(self._deltachain(rev)[0]) - 1
1752 return len(self._deltachain(rev)[0]) - 1
1754
1753
1755 def revdiff(self, rev1, rev2):
1754 def revdiff(self, rev1, rev2):
1756 """return or calculate a delta between two revisions
1755 """return or calculate a delta between two revisions
1757
1756
1758 The delta calculated is in binary form and is intended to be written to
1757 The delta calculated is in binary form and is intended to be written to
1759 revlog data directly. So this function needs raw revision data.
1758 revlog data directly. So this function needs raw revision data.
1760 """
1759 """
1761 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1760 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1762 return bytes(self._chunk(rev2))
1761 return bytes(self._chunk(rev2))
1763
1762
1764 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1763 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1765
1764
1766 def _processflags(self, text, flags, operation, raw=False):
1765 def _processflags(self, text, flags, operation, raw=False):
1767 """deprecated entry point to access flag processors"""
1766 """deprecated entry point to access flag processors"""
1768 msg = b'_processflag(...) use the specialized variant'
1767 msg = b'_processflag(...) use the specialized variant'
1769 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1768 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1770 if raw:
1769 if raw:
1771 return text, flagutil.processflagsraw(self, text, flags)
1770 return text, flagutil.processflagsraw(self, text, flags)
1772 elif operation == b'read':
1771 elif operation == b'read':
1773 return flagutil.processflagsread(self, text, flags)
1772 return flagutil.processflagsread(self, text, flags)
1774 else: # write operation
1773 else: # write operation
1775 return flagutil.processflagswrite(self, text, flags)
1774 return flagutil.processflagswrite(self, text, flags)
1776
1775
1777 def revision(self, nodeorrev, _df=None, raw=False):
1776 def revision(self, nodeorrev, _df=None, raw=False):
1778 """return an uncompressed revision of a given node or revision
1777 """return an uncompressed revision of a given node or revision
1779 number.
1778 number.
1780
1779
1781 _df - an existing file handle to read from. (internal-only)
1780 _df - an existing file handle to read from. (internal-only)
1782 raw - an optional argument specifying if the revision data is to be
1781 raw - an optional argument specifying if the revision data is to be
1783 treated as raw data when applying flag transforms. 'raw' should be set
1782 treated as raw data when applying flag transforms. 'raw' should be set
1784 to True when generating changegroups or in debug commands.
1783 to True when generating changegroups or in debug commands.
1785 """
1784 """
1786 if raw:
1785 if raw:
1787 msg = (
1786 msg = (
1788 b'revlog.revision(..., raw=True) is deprecated, '
1787 b'revlog.revision(..., raw=True) is deprecated, '
1789 b'use revlog.rawdata(...)'
1788 b'use revlog.rawdata(...)'
1790 )
1789 )
1791 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1790 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1792 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1791 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1793
1792
1794 def sidedata(self, nodeorrev, _df=None):
1793 def sidedata(self, nodeorrev, _df=None):
1795 """a map of extra data related to the changeset but not part of the hash
1794 """a map of extra data related to the changeset but not part of the hash
1796
1795
1797 This function currently return a dictionary. However, more advanced
1796 This function currently return a dictionary. However, more advanced
1798 mapping object will likely be used in the future for a more
1797 mapping object will likely be used in the future for a more
1799 efficient/lazy code.
1798 efficient/lazy code.
1800 """
1799 """
1801 return self._revisiondata(nodeorrev, _df)[1]
1800 return self._revisiondata(nodeorrev, _df)[1]
1802
1801
1803 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1802 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1804 # deal with <nodeorrev> argument type
1803 # deal with <nodeorrev> argument type
1805 if isinstance(nodeorrev, int):
1804 if isinstance(nodeorrev, int):
1806 rev = nodeorrev
1805 rev = nodeorrev
1807 node = self.node(rev)
1806 node = self.node(rev)
1808 else:
1807 else:
1809 node = nodeorrev
1808 node = nodeorrev
1810 rev = None
1809 rev = None
1811
1810
1812 # fast path the special `nullid` rev
1811 # fast path the special `nullid` rev
1813 if node == self.nullid:
1812 if node == self.nullid:
1814 return b"", {}
1813 return b"", {}
1815
1814
1816 # ``rawtext`` is the text as stored inside the revlog. Might be the
1815 # ``rawtext`` is the text as stored inside the revlog. Might be the
1817 # revision or might need to be processed to retrieve the revision.
1816 # revision or might need to be processed to retrieve the revision.
1818 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1817 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1819
1818
1820 if self.hassidedata:
1819 if self.hassidedata:
1821 if rev is None:
1820 if rev is None:
1822 rev = self.rev(node)
1821 rev = self.rev(node)
1823 sidedata = self._sidedata(rev)
1822 sidedata = self._sidedata(rev)
1824 else:
1823 else:
1825 sidedata = {}
1824 sidedata = {}
1826
1825
1827 if raw and validated:
1826 if raw and validated:
1828 # if we don't want to process the raw text and that raw
1827 # if we don't want to process the raw text and that raw
1829 # text is cached, we can exit early.
1828 # text is cached, we can exit early.
1830 return rawtext, sidedata
1829 return rawtext, sidedata
1831 if rev is None:
1830 if rev is None:
1832 rev = self.rev(node)
1831 rev = self.rev(node)
1833 # the revlog's flag for this revision
1832 # the revlog's flag for this revision
1834 # (usually alter its state or content)
1833 # (usually alter its state or content)
1835 flags = self.flags(rev)
1834 flags = self.flags(rev)
1836
1835
1837 if validated and flags == REVIDX_DEFAULT_FLAGS:
1836 if validated and flags == REVIDX_DEFAULT_FLAGS:
1838 # no extra flags set, no flag processor runs, text = rawtext
1837 # no extra flags set, no flag processor runs, text = rawtext
1839 return rawtext, sidedata
1838 return rawtext, sidedata
1840
1839
1841 if raw:
1840 if raw:
1842 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1841 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1843 text = rawtext
1842 text = rawtext
1844 else:
1843 else:
1845 r = flagutil.processflagsread(self, rawtext, flags)
1844 r = flagutil.processflagsread(self, rawtext, flags)
1846 text, validatehash = r
1845 text, validatehash = r
1847 if validatehash:
1846 if validatehash:
1848 self.checkhash(text, node, rev=rev)
1847 self.checkhash(text, node, rev=rev)
1849 if not validated:
1848 if not validated:
1850 self._revisioncache = (node, rev, rawtext)
1849 self._revisioncache = (node, rev, rawtext)
1851
1850
1852 return text, sidedata
1851 return text, sidedata
1853
1852
1854 def _rawtext(self, node, rev, _df=None):
1853 def _rawtext(self, node, rev, _df=None):
1855 """return the possibly unvalidated rawtext for a revision
1854 """return the possibly unvalidated rawtext for a revision
1856
1855
1857 returns (rev, rawtext, validated)
1856 returns (rev, rawtext, validated)
1858 """
1857 """
1859
1858
1860 # revision in the cache (could be useful to apply delta)
1859 # revision in the cache (could be useful to apply delta)
1861 cachedrev = None
1860 cachedrev = None
1862 # An intermediate text to apply deltas to
1861 # An intermediate text to apply deltas to
1863 basetext = None
1862 basetext = None
1864
1863
1865 # Check if we have the entry in cache
1864 # Check if we have the entry in cache
1866 # The cache entry looks like (node, rev, rawtext)
1865 # The cache entry looks like (node, rev, rawtext)
1867 if self._revisioncache:
1866 if self._revisioncache:
1868 if self._revisioncache[0] == node:
1867 if self._revisioncache[0] == node:
1869 return (rev, self._revisioncache[2], True)
1868 return (rev, self._revisioncache[2], True)
1870 cachedrev = self._revisioncache[1]
1869 cachedrev = self._revisioncache[1]
1871
1870
1872 if rev is None:
1871 if rev is None:
1873 rev = self.rev(node)
1872 rev = self.rev(node)
1874
1873
1875 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1874 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1876 if stopped:
1875 if stopped:
1877 basetext = self._revisioncache[2]
1876 basetext = self._revisioncache[2]
1878
1877
1879 # drop cache to save memory, the caller is expected to
1878 # drop cache to save memory, the caller is expected to
1880 # update self._revisioncache after validating the text
1879 # update self._revisioncache after validating the text
1881 self._revisioncache = None
1880 self._revisioncache = None
1882
1881
1883 targetsize = None
1882 targetsize = None
1884 rawsize = self.index[rev][2]
1883 rawsize = self.index[rev][2]
1885 if 0 <= rawsize:
1884 if 0 <= rawsize:
1886 targetsize = 4 * rawsize
1885 targetsize = 4 * rawsize
1887
1886
1888 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1887 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1889 if basetext is None:
1888 if basetext is None:
1890 basetext = bytes(bins[0])
1889 basetext = bytes(bins[0])
1891 bins = bins[1:]
1890 bins = bins[1:]
1892
1891
1893 rawtext = mdiff.patches(basetext, bins)
1892 rawtext = mdiff.patches(basetext, bins)
1894 del basetext # let us have a chance to free memory early
1893 del basetext # let us have a chance to free memory early
1895 return (rev, rawtext, False)
1894 return (rev, rawtext, False)
1896
1895
1897 def _sidedata(self, rev):
1896 def _sidedata(self, rev):
1898 """Return the sidedata for a given revision number."""
1897 """Return the sidedata for a given revision number."""
1899 index_entry = self.index[rev]
1898 index_entry = self.index[rev]
1900 sidedata_offset = index_entry[8]
1899 sidedata_offset = index_entry[8]
1901 sidedata_size = index_entry[9]
1900 sidedata_size = index_entry[9]
1902
1901
1903 if self._inline:
1902 if self._inline:
1904 sidedata_offset += self.index.entry_size * (1 + rev)
1903 sidedata_offset += self.index.entry_size * (1 + rev)
1905 if sidedata_size == 0:
1904 if sidedata_size == 0:
1906 return {}
1905 return {}
1907
1906
1908 segment = self._getsegment(sidedata_offset, sidedata_size)
1907 segment = self._getsegment(sidedata_offset, sidedata_size)
1909 sidedata = sidedatautil.deserialize_sidedata(segment)
1908 sidedata = sidedatautil.deserialize_sidedata(segment)
1910 return sidedata
1909 return sidedata
1911
1910
1912 def rawdata(self, nodeorrev, _df=None):
1911 def rawdata(self, nodeorrev, _df=None):
1913 """return an uncompressed raw data of a given node or revision number.
1912 """return an uncompressed raw data of a given node or revision number.
1914
1913
1915 _df - an existing file handle to read from. (internal-only)
1914 _df - an existing file handle to read from. (internal-only)
1916 """
1915 """
1917 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1916 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1918
1917
1919 def hash(self, text, p1, p2):
1918 def hash(self, text, p1, p2):
1920 """Compute a node hash.
1919 """Compute a node hash.
1921
1920
1922 Available as a function so that subclasses can replace the hash
1921 Available as a function so that subclasses can replace the hash
1923 as needed.
1922 as needed.
1924 """
1923 """
1925 return storageutil.hashrevisionsha1(text, p1, p2)
1924 return storageutil.hashrevisionsha1(text, p1, p2)
1926
1925
1927 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1926 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1928 """Check node hash integrity.
1927 """Check node hash integrity.
1929
1928
1930 Available as a function so that subclasses can extend hash mismatch
1929 Available as a function so that subclasses can extend hash mismatch
1931 behaviors as needed.
1930 behaviors as needed.
1932 """
1931 """
1933 try:
1932 try:
1934 if p1 is None and p2 is None:
1933 if p1 is None and p2 is None:
1935 p1, p2 = self.parents(node)
1934 p1, p2 = self.parents(node)
1936 if node != self.hash(text, p1, p2):
1935 if node != self.hash(text, p1, p2):
1937 # Clear the revision cache on hash failure. The revision cache
1936 # Clear the revision cache on hash failure. The revision cache
1938 # only stores the raw revision and clearing the cache does have
1937 # only stores the raw revision and clearing the cache does have
1939 # the side-effect that we won't have a cache hit when the raw
1938 # the side-effect that we won't have a cache hit when the raw
1940 # revision data is accessed. But this case should be rare and
1939 # revision data is accessed. But this case should be rare and
1941 # it is extra work to teach the cache about the hash
1940 # it is extra work to teach the cache about the hash
1942 # verification state.
1941 # verification state.
1943 if self._revisioncache and self._revisioncache[0] == node:
1942 if self._revisioncache and self._revisioncache[0] == node:
1944 self._revisioncache = None
1943 self._revisioncache = None
1945
1944
1946 revornode = rev
1945 revornode = rev
1947 if revornode is None:
1946 if revornode is None:
1948 revornode = templatefilters.short(hex(node))
1947 revornode = templatefilters.short(hex(node))
1949 raise error.RevlogError(
1948 raise error.RevlogError(
1950 _(b"integrity check failed on %s:%s")
1949 _(b"integrity check failed on %s:%s")
1951 % (self.display_id, pycompat.bytestr(revornode))
1950 % (self.display_id, pycompat.bytestr(revornode))
1952 )
1951 )
1953 except error.RevlogError:
1952 except error.RevlogError:
1954 if self._censorable and storageutil.iscensoredtext(text):
1953 if self._censorable and storageutil.iscensoredtext(text):
1955 raise error.CensoredNodeError(self.display_id, node, text)
1954 raise error.CensoredNodeError(self.display_id, node, text)
1956 raise
1955 raise
1957
1956
1958 def _enforceinlinesize(self, tr, fp=None):
1957 def _enforceinlinesize(self, tr, fp=None):
1959 """Check if the revlog is too big for inline and convert if so.
1958 """Check if the revlog is too big for inline and convert if so.
1960
1959
1961 This should be called after revisions are added to the revlog. If the
1960 This should be called after revisions are added to the revlog. If the
1962 revlog has grown too large to be an inline revlog, it will convert it
1961 revlog has grown too large to be an inline revlog, it will convert it
1963 to use multiple index and data files.
1962 to use multiple index and data files.
1964 """
1963 """
1965 tiprev = len(self) - 1
1964 tiprev = len(self) - 1
1966 total_size = self.start(tiprev) + self.length(tiprev)
1965 total_size = self.start(tiprev) + self.length(tiprev)
1967 if not self._inline or total_size < _maxinline:
1966 if not self._inline or total_size < _maxinline:
1968 return
1967 return
1969
1968
1970 troffset = tr.findoffset(self._indexfile)
1969 troffset = tr.findoffset(self._indexfile)
1971 if troffset is None:
1970 if troffset is None:
1972 raise error.RevlogError(
1971 raise error.RevlogError(
1973 _(b"%s not found in the transaction") % self._indexfile
1972 _(b"%s not found in the transaction") % self._indexfile
1974 )
1973 )
1975 trindex = 0
1974 trindex = 0
1976 tr.add(self._datafile, 0)
1975 tr.add(self._datafile, 0)
1977
1976
1978 if fp:
1977 if fp:
1979 fp.flush()
1978 fp.flush()
1980 fp.close()
1979 fp.close()
1981 # We can't use the cached file handle after close(). So prevent
1980 # We can't use the cached file handle after close(). So prevent
1982 # its usage.
1981 # its usage.
1983 self._writinghandles = None
1982 self._writinghandles = None
1984
1983
1985 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1984 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1986 for r in self:
1985 for r in self:
1987 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1986 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1988 if troffset <= self.start(r):
1987 if troffset <= self.start(r):
1989 trindex = r
1988 trindex = r
1990
1989
1991 with self._indexfp(b'w') as fp:
1990 with self._indexfp(b'w') as fp:
1992 self._format_flags &= ~FLAG_INLINE_DATA
1991 self._format_flags &= ~FLAG_INLINE_DATA
1993 self._inline = False
1992 self._inline = False
1994 for i in self:
1993 for i in self:
1995 e = self.index.entry_binary(i)
1994 e = self.index.entry_binary(i)
1996 if i == 0:
1995 if i == 0:
1997 header = self._format_flags | self._format_version
1996 header = self._format_flags | self._format_version
1998 header = self.index.pack_header(header)
1997 header = self.index.pack_header(header)
1999 e = header + e
1998 e = header + e
2000 fp.write(e)
1999 fp.write(e)
2001
2000
2002 # the temp file replace the real index when we exit the context
2001 # the temp file replace the real index when we exit the context
2003 # manager
2002 # manager
2004
2003
2005 tr.replace(self._indexfile, trindex * self.index.entry_size)
2004 tr.replace(self._indexfile, trindex * self.index.entry_size)
2006 nodemaputil.setup_persistent_nodemap(tr, self)
2005 nodemaputil.setup_persistent_nodemap(tr, self)
2007 self._chunkclear()
2006 self._chunkclear()
2008
2007
2009 def _nodeduplicatecallback(self, transaction, node):
2008 def _nodeduplicatecallback(self, transaction, node):
2010 """called when trying to add a node already stored."""
2009 """called when trying to add a node already stored."""
2011
2010
2012 def addrevision(
2011 def addrevision(
2013 self,
2012 self,
2014 text,
2013 text,
2015 transaction,
2014 transaction,
2016 link,
2015 link,
2017 p1,
2016 p1,
2018 p2,
2017 p2,
2019 cachedelta=None,
2018 cachedelta=None,
2020 node=None,
2019 node=None,
2021 flags=REVIDX_DEFAULT_FLAGS,
2020 flags=REVIDX_DEFAULT_FLAGS,
2022 deltacomputer=None,
2021 deltacomputer=None,
2023 sidedata=None,
2022 sidedata=None,
2024 ):
2023 ):
2025 """add a revision to the log
2024 """add a revision to the log
2026
2025
2027 text - the revision data to add
2026 text - the revision data to add
2028 transaction - the transaction object used for rollback
2027 transaction - the transaction object used for rollback
2029 link - the linkrev data to add
2028 link - the linkrev data to add
2030 p1, p2 - the parent nodeids of the revision
2029 p1, p2 - the parent nodeids of the revision
2031 cachedelta - an optional precomputed delta
2030 cachedelta - an optional precomputed delta
2032 node - nodeid of revision; typically node is not specified, and it is
2031 node - nodeid of revision; typically node is not specified, and it is
2033 computed by default as hash(text, p1, p2), however subclasses might
2032 computed by default as hash(text, p1, p2), however subclasses might
2034 use different hashing method (and override checkhash() in such case)
2033 use different hashing method (and override checkhash() in such case)
2035 flags - the known flags to set on the revision
2034 flags - the known flags to set on the revision
2036 deltacomputer - an optional deltacomputer instance shared between
2035 deltacomputer - an optional deltacomputer instance shared between
2037 multiple calls
2036 multiple calls
2038 """
2037 """
2039 if link == nullrev:
2038 if link == nullrev:
2040 raise error.RevlogError(
2039 raise error.RevlogError(
2041 _(b"attempted to add linkrev -1 to %s") % self.display_id
2040 _(b"attempted to add linkrev -1 to %s") % self.display_id
2042 )
2041 )
2043
2042
2044 if sidedata is None:
2043 if sidedata is None:
2045 sidedata = {}
2044 sidedata = {}
2046 elif sidedata and not self.hassidedata:
2045 elif sidedata and not self.hassidedata:
2047 raise error.ProgrammingError(
2046 raise error.ProgrammingError(
2048 _(b"trying to add sidedata to a revlog who don't support them")
2047 _(b"trying to add sidedata to a revlog who don't support them")
2049 )
2048 )
2050
2049
2051 if flags:
2050 if flags:
2052 node = node or self.hash(text, p1, p2)
2051 node = node or self.hash(text, p1, p2)
2053
2052
2054 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2053 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2055
2054
2056 # If the flag processor modifies the revision data, ignore any provided
2055 # If the flag processor modifies the revision data, ignore any provided
2057 # cachedelta.
2056 # cachedelta.
2058 if rawtext != text:
2057 if rawtext != text:
2059 cachedelta = None
2058 cachedelta = None
2060
2059
2061 if len(rawtext) > _maxentrysize:
2060 if len(rawtext) > _maxentrysize:
2062 raise error.RevlogError(
2061 raise error.RevlogError(
2063 _(
2062 _(
2064 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2063 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2065 )
2064 )
2066 % (self.display_id, len(rawtext))
2065 % (self.display_id, len(rawtext))
2067 )
2066 )
2068
2067
2069 node = node or self.hash(rawtext, p1, p2)
2068 node = node or self.hash(rawtext, p1, p2)
2070 rev = self.index.get_rev(node)
2069 rev = self.index.get_rev(node)
2071 if rev is not None:
2070 if rev is not None:
2072 return rev
2071 return rev
2073
2072
2074 if validatehash:
2073 if validatehash:
2075 self.checkhash(rawtext, node, p1=p1, p2=p2)
2074 self.checkhash(rawtext, node, p1=p1, p2=p2)
2076
2075
2077 return self.addrawrevision(
2076 return self.addrawrevision(
2078 rawtext,
2077 rawtext,
2079 transaction,
2078 transaction,
2080 link,
2079 link,
2081 p1,
2080 p1,
2082 p2,
2081 p2,
2083 node,
2082 node,
2084 flags,
2083 flags,
2085 cachedelta=cachedelta,
2084 cachedelta=cachedelta,
2086 deltacomputer=deltacomputer,
2085 deltacomputer=deltacomputer,
2087 sidedata=sidedata,
2086 sidedata=sidedata,
2088 )
2087 )
2089
2088
2090 def addrawrevision(
2089 def addrawrevision(
2091 self,
2090 self,
2092 rawtext,
2091 rawtext,
2093 transaction,
2092 transaction,
2094 link,
2093 link,
2095 p1,
2094 p1,
2096 p2,
2095 p2,
2097 node,
2096 node,
2098 flags,
2097 flags,
2099 cachedelta=None,
2098 cachedelta=None,
2100 deltacomputer=None,
2099 deltacomputer=None,
2101 sidedata=None,
2100 sidedata=None,
2102 ):
2101 ):
2103 """add a raw revision with known flags, node and parents
2102 """add a raw revision with known flags, node and parents
2104 useful when reusing a revision not stored in this revlog (ex: received
2103 useful when reusing a revision not stored in this revlog (ex: received
2105 over wire, or read from an external bundle).
2104 over wire, or read from an external bundle).
2106 """
2105 """
2107 dfh = None
2106 dfh = None
2108 if not self._inline:
2107 if not self._inline:
2109 dfh = self._datafp(b"a+")
2108 dfh = self._datafp(b"a+")
2110 ifh = self._indexfp(b"a+")
2109 ifh = self._indexfp(b"a+")
2111 try:
2110 try:
2112 return self._addrevision(
2111 return self._addrevision(
2113 node,
2112 node,
2114 rawtext,
2113 rawtext,
2115 transaction,
2114 transaction,
2116 link,
2115 link,
2117 p1,
2116 p1,
2118 p2,
2117 p2,
2119 flags,
2118 flags,
2120 cachedelta,
2119 cachedelta,
2121 ifh,
2120 ifh,
2122 dfh,
2121 dfh,
2123 deltacomputer=deltacomputer,
2122 deltacomputer=deltacomputer,
2124 sidedata=sidedata,
2123 sidedata=sidedata,
2125 )
2124 )
2126 finally:
2125 finally:
2127 if dfh:
2126 if dfh:
2128 dfh.close()
2127 dfh.close()
2129 ifh.close()
2128 ifh.close()
2130
2129
2131 def compress(self, data):
2130 def compress(self, data):
2132 """Generate a possibly-compressed representation of data."""
2131 """Generate a possibly-compressed representation of data."""
2133 if not data:
2132 if not data:
2134 return b'', data
2133 return b'', data
2135
2134
2136 compressed = self._compressor.compress(data)
2135 compressed = self._compressor.compress(data)
2137
2136
2138 if compressed:
2137 if compressed:
2139 # The revlog compressor added the header in the returned data.
2138 # The revlog compressor added the header in the returned data.
2140 return b'', compressed
2139 return b'', compressed
2141
2140
2142 if data[0:1] == b'\0':
2141 if data[0:1] == b'\0':
2143 return b'', data
2142 return b'', data
2144 return b'u', data
2143 return b'u', data
2145
2144
2146 def decompress(self, data):
2145 def decompress(self, data):
2147 """Decompress a revlog chunk.
2146 """Decompress a revlog chunk.
2148
2147
2149 The chunk is expected to begin with a header identifying the
2148 The chunk is expected to begin with a header identifying the
2150 format type so it can be routed to an appropriate decompressor.
2149 format type so it can be routed to an appropriate decompressor.
2151 """
2150 """
2152 if not data:
2151 if not data:
2153 return data
2152 return data
2154
2153
2155 # Revlogs are read much more frequently than they are written and many
2154 # Revlogs are read much more frequently than they are written and many
2156 # chunks only take microseconds to decompress, so performance is
2155 # chunks only take microseconds to decompress, so performance is
2157 # important here.
2156 # important here.
2158 #
2157 #
2159 # We can make a few assumptions about revlogs:
2158 # We can make a few assumptions about revlogs:
2160 #
2159 #
2161 # 1) the majority of chunks will be compressed (as opposed to inline
2160 # 1) the majority of chunks will be compressed (as opposed to inline
2162 # raw data).
2161 # raw data).
2163 # 2) decompressing *any* data will likely by at least 10x slower than
2162 # 2) decompressing *any* data will likely by at least 10x slower than
2164 # returning raw inline data.
2163 # returning raw inline data.
2165 # 3) we want to prioritize common and officially supported compression
2164 # 3) we want to prioritize common and officially supported compression
2166 # engines
2165 # engines
2167 #
2166 #
2168 # It follows that we want to optimize for "decompress compressed data
2167 # It follows that we want to optimize for "decompress compressed data
2169 # when encoded with common and officially supported compression engines"
2168 # when encoded with common and officially supported compression engines"
2170 # case over "raw data" and "data encoded by less common or non-official
2169 # case over "raw data" and "data encoded by less common or non-official
2171 # compression engines." That is why we have the inline lookup first
2170 # compression engines." That is why we have the inline lookup first
2172 # followed by the compengines lookup.
2171 # followed by the compengines lookup.
2173 #
2172 #
2174 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2173 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2175 # compressed chunks. And this matters for changelog and manifest reads.
2174 # compressed chunks. And this matters for changelog and manifest reads.
2176 t = data[0:1]
2175 t = data[0:1]
2177
2176
2178 if t == b'x':
2177 if t == b'x':
2179 try:
2178 try:
2180 return _zlibdecompress(data)
2179 return _zlibdecompress(data)
2181 except zlib.error as e:
2180 except zlib.error as e:
2182 raise error.RevlogError(
2181 raise error.RevlogError(
2183 _(b'revlog decompress error: %s')
2182 _(b'revlog decompress error: %s')
2184 % stringutil.forcebytestr(e)
2183 % stringutil.forcebytestr(e)
2185 )
2184 )
2186 # '\0' is more common than 'u' so it goes first.
2185 # '\0' is more common than 'u' so it goes first.
2187 elif t == b'\0':
2186 elif t == b'\0':
2188 return data
2187 return data
2189 elif t == b'u':
2188 elif t == b'u':
2190 return util.buffer(data, 1)
2189 return util.buffer(data, 1)
2191
2190
2192 try:
2191 try:
2193 compressor = self._decompressors[t]
2192 compressor = self._decompressors[t]
2194 except KeyError:
2193 except KeyError:
2195 try:
2194 try:
2196 engine = util.compengines.forrevlogheader(t)
2195 engine = util.compengines.forrevlogheader(t)
2197 compressor = engine.revlogcompressor(self._compengineopts)
2196 compressor = engine.revlogcompressor(self._compengineopts)
2198 self._decompressors[t] = compressor
2197 self._decompressors[t] = compressor
2199 except KeyError:
2198 except KeyError:
2200 raise error.RevlogError(
2199 raise error.RevlogError(
2201 _(b'unknown compression type %s') % binascii.hexlify(t)
2200 _(b'unknown compression type %s') % binascii.hexlify(t)
2202 )
2201 )
2203
2202
2204 return compressor.decompress(data)
2203 return compressor.decompress(data)
2205
2204
2206 def _addrevision(
2205 def _addrevision(
2207 self,
2206 self,
2208 node,
2207 node,
2209 rawtext,
2208 rawtext,
2210 transaction,
2209 transaction,
2211 link,
2210 link,
2212 p1,
2211 p1,
2213 p2,
2212 p2,
2214 flags,
2213 flags,
2215 cachedelta,
2214 cachedelta,
2216 ifh,
2215 ifh,
2217 dfh,
2216 dfh,
2218 alwayscache=False,
2217 alwayscache=False,
2219 deltacomputer=None,
2218 deltacomputer=None,
2220 sidedata=None,
2219 sidedata=None,
2221 ):
2220 ):
2222 """internal function to add revisions to the log
2221 """internal function to add revisions to the log
2223
2222
2224 see addrevision for argument descriptions.
2223 see addrevision for argument descriptions.
2225
2224
2226 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2225 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2227
2226
2228 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2227 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2229 be used.
2228 be used.
2230
2229
2231 invariants:
2230 invariants:
2232 - rawtext is optional (can be None); if not set, cachedelta must be set.
2231 - rawtext is optional (can be None); if not set, cachedelta must be set.
2233 if both are set, they must correspond to each other.
2232 if both are set, they must correspond to each other.
2234 """
2233 """
2235 if node == self.nullid:
2234 if node == self.nullid:
2236 raise error.RevlogError(
2235 raise error.RevlogError(
2237 _(b"%s: attempt to add null revision") % self.display_id
2236 _(b"%s: attempt to add null revision") % self.display_id
2238 )
2237 )
2239 if (
2238 if (
2240 node == self.nodeconstants.wdirid
2239 node == self.nodeconstants.wdirid
2241 or node in self.nodeconstants.wdirfilenodeids
2240 or node in self.nodeconstants.wdirfilenodeids
2242 ):
2241 ):
2243 raise error.RevlogError(
2242 raise error.RevlogError(
2244 _(b"%s: attempt to add wdir revision") % self.display_id
2243 _(b"%s: attempt to add wdir revision") % self.display_id
2245 )
2244 )
2246
2245
2247 if self._inline:
2246 if self._inline:
2248 fh = ifh
2247 fh = ifh
2249 else:
2248 else:
2250 fh = dfh
2249 fh = dfh
2251
2250
2252 btext = [rawtext]
2251 btext = [rawtext]
2253
2252
2254 curr = len(self)
2253 curr = len(self)
2255 prev = curr - 1
2254 prev = curr - 1
2256
2255
2257 offset = self._get_data_offset(prev)
2256 offset = self._get_data_offset(prev)
2258
2257
2259 if self._concurrencychecker:
2258 if self._concurrencychecker:
2260 if self._inline:
2259 if self._inline:
2261 # offset is "as if" it were in the .d file, so we need to add on
2260 # offset is "as if" it were in the .d file, so we need to add on
2262 # the size of the entry metadata.
2261 # the size of the entry metadata.
2263 self._concurrencychecker(
2262 self._concurrencychecker(
2264 ifh, self._indexfile, offset + curr * self.index.entry_size
2263 ifh, self._indexfile, offset + curr * self.index.entry_size
2265 )
2264 )
2266 else:
2265 else:
2267 # Entries in the .i are a consistent size.
2266 # Entries in the .i are a consistent size.
2268 self._concurrencychecker(
2267 self._concurrencychecker(
2269 ifh, self._indexfile, curr * self.index.entry_size
2268 ifh, self._indexfile, curr * self.index.entry_size
2270 )
2269 )
2271 self._concurrencychecker(dfh, self._datafile, offset)
2270 self._concurrencychecker(dfh, self._datafile, offset)
2272
2271
2273 p1r, p2r = self.rev(p1), self.rev(p2)
2272 p1r, p2r = self.rev(p1), self.rev(p2)
2274
2273
2275 # full versions are inserted when the needed deltas
2274 # full versions are inserted when the needed deltas
2276 # become comparable to the uncompressed text
2275 # become comparable to the uncompressed text
2277 if rawtext is None:
2276 if rawtext is None:
2278 # need rawtext size, before changed by flag processors, which is
2277 # need rawtext size, before changed by flag processors, which is
2279 # the non-raw size. use revlog explicitly to avoid filelog's extra
2278 # the non-raw size. use revlog explicitly to avoid filelog's extra
2280 # logic that might remove metadata size.
2279 # logic that might remove metadata size.
2281 textlen = mdiff.patchedsize(
2280 textlen = mdiff.patchedsize(
2282 revlog.size(self, cachedelta[0]), cachedelta[1]
2281 revlog.size(self, cachedelta[0]), cachedelta[1]
2283 )
2282 )
2284 else:
2283 else:
2285 textlen = len(rawtext)
2284 textlen = len(rawtext)
2286
2285
2287 if deltacomputer is None:
2286 if deltacomputer is None:
2288 deltacomputer = deltautil.deltacomputer(self)
2287 deltacomputer = deltautil.deltacomputer(self)
2289
2288
2290 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2289 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2291
2290
2292 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2291 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2293
2292
2294 if sidedata and self.hassidedata:
2293 if sidedata and self.hassidedata:
2295 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2294 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2296 sidedata_offset = offset + deltainfo.deltalen
2295 sidedata_offset = offset + deltainfo.deltalen
2297 else:
2296 else:
2298 serialized_sidedata = b""
2297 serialized_sidedata = b""
2299 # Don't store the offset if the sidedata is empty, that way
2298 # Don't store the offset if the sidedata is empty, that way
2300 # we can easily detect empty sidedata and they will be no different
2299 # we can easily detect empty sidedata and they will be no different
2301 # than ones we manually add.
2300 # than ones we manually add.
2302 sidedata_offset = 0
2301 sidedata_offset = 0
2303
2302
2304 e = (
2303 e = (
2305 offset_type(offset, flags),
2304 offset_type(offset, flags),
2306 deltainfo.deltalen,
2305 deltainfo.deltalen,
2307 textlen,
2306 textlen,
2308 deltainfo.base,
2307 deltainfo.base,
2309 link,
2308 link,
2310 p1r,
2309 p1r,
2311 p2r,
2310 p2r,
2312 node,
2311 node,
2313 sidedata_offset,
2312 sidedata_offset,
2314 len(serialized_sidedata),
2313 len(serialized_sidedata),
2315 )
2314 )
2316
2315
2317 self.index.append(e)
2316 self.index.append(e)
2318 entry = self.index.entry_binary(curr)
2317 entry = self.index.entry_binary(curr)
2319 if curr == 0:
2318 if curr == 0:
2320 header = self._format_flags | self._format_version
2319 header = self._format_flags | self._format_version
2321 header = self.index.pack_header(header)
2320 header = self.index.pack_header(header)
2322 entry = header + entry
2321 entry = header + entry
2323 self._writeentry(
2322 self._writeentry(
2324 transaction,
2323 transaction,
2325 ifh,
2324 ifh,
2326 dfh,
2325 dfh,
2327 entry,
2326 entry,
2328 deltainfo.data,
2327 deltainfo.data,
2329 link,
2328 link,
2330 offset,
2329 offset,
2331 serialized_sidedata,
2330 serialized_sidedata,
2332 )
2331 )
2333
2332
2334 rawtext = btext[0]
2333 rawtext = btext[0]
2335
2334
2336 if alwayscache and rawtext is None:
2335 if alwayscache and rawtext is None:
2337 rawtext = deltacomputer.buildtext(revinfo, fh)
2336 rawtext = deltacomputer.buildtext(revinfo, fh)
2338
2337
2339 if type(rawtext) == bytes: # only accept immutable objects
2338 if type(rawtext) == bytes: # only accept immutable objects
2340 self._revisioncache = (node, curr, rawtext)
2339 self._revisioncache = (node, curr, rawtext)
2341 self._chainbasecache[curr] = deltainfo.chainbase
2340 self._chainbasecache[curr] = deltainfo.chainbase
2342 return curr
2341 return curr
2343
2342
2344 def _get_data_offset(self, prev):
2343 def _get_data_offset(self, prev):
2345 """Returns the current offset in the (in-transaction) data file.
2344 """Returns the current offset in the (in-transaction) data file.
2346 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2345 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2347 file to store that information: since sidedata can be rewritten to the
2346 file to store that information: since sidedata can be rewritten to the
2348 end of the data file within a transaction, you can have cases where, for
2347 end of the data file within a transaction, you can have cases where, for
2349 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2348 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2350 to `n - 1`'s sidedata being written after `n`'s data.
2349 to `n - 1`'s sidedata being written after `n`'s data.
2351
2350
2352 TODO cache this in a docket file before getting out of experimental."""
2351 TODO cache this in a docket file before getting out of experimental."""
2353 if self._format_version != REVLOGV2:
2352 if self._format_version != REVLOGV2:
2354 return self.end(prev)
2353 return self.end(prev)
2355
2354
2356 offset = 0
2355 offset = 0
2357 for rev, entry in enumerate(self.index):
2356 for rev, entry in enumerate(self.index):
2358 sidedata_end = entry[8] + entry[9]
2357 sidedata_end = entry[8] + entry[9]
2359 # Sidedata for a previous rev has potentially been written after
2358 # Sidedata for a previous rev has potentially been written after
2360 # this rev's end, so take the max.
2359 # this rev's end, so take the max.
2361 offset = max(self.end(rev), offset, sidedata_end)
2360 offset = max(self.end(rev), offset, sidedata_end)
2362 return offset
2361 return offset
2363
2362
2364 def _writeentry(
2363 def _writeentry(
2365 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2364 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2366 ):
2365 ):
2367 # Files opened in a+ mode have inconsistent behavior on various
2366 # Files opened in a+ mode have inconsistent behavior on various
2368 # platforms. Windows requires that a file positioning call be made
2367 # platforms. Windows requires that a file positioning call be made
2369 # when the file handle transitions between reads and writes. See
2368 # when the file handle transitions between reads and writes. See
2370 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2369 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2371 # platforms, Python or the platform itself can be buggy. Some versions
2370 # platforms, Python or the platform itself can be buggy. Some versions
2372 # of Solaris have been observed to not append at the end of the file
2371 # of Solaris have been observed to not append at the end of the file
2373 # if the file was seeked to before the end. See issue4943 for more.
2372 # if the file was seeked to before the end. See issue4943 for more.
2374 #
2373 #
2375 # We work around this issue by inserting a seek() before writing.
2374 # We work around this issue by inserting a seek() before writing.
2376 # Note: This is likely not necessary on Python 3. However, because
2375 # Note: This is likely not necessary on Python 3. However, because
2377 # the file handle is reused for reads and may be seeked there, we need
2376 # the file handle is reused for reads and may be seeked there, we need
2378 # to be careful before changing this.
2377 # to be careful before changing this.
2379 ifh.seek(0, os.SEEK_END)
2378 ifh.seek(0, os.SEEK_END)
2380 if dfh:
2379 if dfh:
2381 dfh.seek(0, os.SEEK_END)
2380 dfh.seek(0, os.SEEK_END)
2382
2381
2383 curr = len(self) - 1
2382 curr = len(self) - 1
2384 if not self._inline:
2383 if not self._inline:
2385 transaction.add(self._datafile, offset)
2384 transaction.add(self._datafile, offset)
2386 transaction.add(self._indexfile, curr * len(entry))
2385 transaction.add(self._indexfile, curr * len(entry))
2387 if data[0]:
2386 if data[0]:
2388 dfh.write(data[0])
2387 dfh.write(data[0])
2389 dfh.write(data[1])
2388 dfh.write(data[1])
2390 if sidedata:
2389 if sidedata:
2391 dfh.write(sidedata)
2390 dfh.write(sidedata)
2392 ifh.write(entry)
2391 ifh.write(entry)
2393 else:
2392 else:
2394 offset += curr * self.index.entry_size
2393 offset += curr * self.index.entry_size
2395 transaction.add(self._indexfile, offset)
2394 transaction.add(self._indexfile, offset)
2396 ifh.write(entry)
2395 ifh.write(entry)
2397 ifh.write(data[0])
2396 ifh.write(data[0])
2398 ifh.write(data[1])
2397 ifh.write(data[1])
2399 if sidedata:
2398 if sidedata:
2400 ifh.write(sidedata)
2399 ifh.write(sidedata)
2401 self._enforceinlinesize(transaction, ifh)
2400 self._enforceinlinesize(transaction, ifh)
2402 nodemaputil.setup_persistent_nodemap(transaction, self)
2401 nodemaputil.setup_persistent_nodemap(transaction, self)
2403
2402
2404 def addgroup(
2403 def addgroup(
2405 self,
2404 self,
2406 deltas,
2405 deltas,
2407 linkmapper,
2406 linkmapper,
2408 transaction,
2407 transaction,
2409 alwayscache=False,
2408 alwayscache=False,
2410 addrevisioncb=None,
2409 addrevisioncb=None,
2411 duplicaterevisioncb=None,
2410 duplicaterevisioncb=None,
2412 ):
2411 ):
2413 """
2412 """
2414 add a delta group
2413 add a delta group
2415
2414
2416 given a set of deltas, add them to the revision log. the
2415 given a set of deltas, add them to the revision log. the
2417 first delta is against its parent, which should be in our
2416 first delta is against its parent, which should be in our
2418 log, the rest are against the previous delta.
2417 log, the rest are against the previous delta.
2419
2418
2420 If ``addrevisioncb`` is defined, it will be called with arguments of
2419 If ``addrevisioncb`` is defined, it will be called with arguments of
2421 this revlog and the node that was added.
2420 this revlog and the node that was added.
2422 """
2421 """
2423
2422
2424 if self._writinghandles:
2423 if self._writinghandles:
2425 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2424 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2426
2425
2427 r = len(self)
2426 r = len(self)
2428 end = 0
2427 end = 0
2429 if r:
2428 if r:
2430 end = self.end(r - 1)
2429 end = self.end(r - 1)
2431 ifh = self._indexfp(b"a+")
2430 ifh = self._indexfp(b"a+")
2432 isize = r * self.index.entry_size
2431 isize = r * self.index.entry_size
2433 if self._inline:
2432 if self._inline:
2434 transaction.add(self._indexfile, end + isize)
2433 transaction.add(self._indexfile, end + isize)
2435 dfh = None
2434 dfh = None
2436 else:
2435 else:
2437 transaction.add(self._indexfile, isize)
2436 transaction.add(self._indexfile, isize)
2438 transaction.add(self._datafile, end)
2437 transaction.add(self._datafile, end)
2439 dfh = self._datafp(b"a+")
2438 dfh = self._datafp(b"a+")
2440
2439
2441 self._writinghandles = (ifh, dfh)
2440 self._writinghandles = (ifh, dfh)
2442 empty = True
2441 empty = True
2443
2442
2444 try:
2443 try:
2445 deltacomputer = deltautil.deltacomputer(self)
2444 deltacomputer = deltautil.deltacomputer(self)
2446 # loop through our set of deltas
2445 # loop through our set of deltas
2447 for data in deltas:
2446 for data in deltas:
2448 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2447 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2449 link = linkmapper(linknode)
2448 link = linkmapper(linknode)
2450 flags = flags or REVIDX_DEFAULT_FLAGS
2449 flags = flags or REVIDX_DEFAULT_FLAGS
2451
2450
2452 rev = self.index.get_rev(node)
2451 rev = self.index.get_rev(node)
2453 if rev is not None:
2452 if rev is not None:
2454 # this can happen if two branches make the same change
2453 # this can happen if two branches make the same change
2455 self._nodeduplicatecallback(transaction, rev)
2454 self._nodeduplicatecallback(transaction, rev)
2456 if duplicaterevisioncb:
2455 if duplicaterevisioncb:
2457 duplicaterevisioncb(self, rev)
2456 duplicaterevisioncb(self, rev)
2458 empty = False
2457 empty = False
2459 continue
2458 continue
2460
2459
2461 for p in (p1, p2):
2460 for p in (p1, p2):
2462 if not self.index.has_node(p):
2461 if not self.index.has_node(p):
2463 raise error.LookupError(
2462 raise error.LookupError(
2464 p, self.radix, _(b'unknown parent')
2463 p, self.radix, _(b'unknown parent')
2465 )
2464 )
2466
2465
2467 if not self.index.has_node(deltabase):
2466 if not self.index.has_node(deltabase):
2468 raise error.LookupError(
2467 raise error.LookupError(
2469 deltabase, self.display_id, _(b'unknown delta base')
2468 deltabase, self.display_id, _(b'unknown delta base')
2470 )
2469 )
2471
2470
2472 baserev = self.rev(deltabase)
2471 baserev = self.rev(deltabase)
2473
2472
2474 if baserev != nullrev and self.iscensored(baserev):
2473 if baserev != nullrev and self.iscensored(baserev):
2475 # if base is censored, delta must be full replacement in a
2474 # if base is censored, delta must be full replacement in a
2476 # single patch operation
2475 # single patch operation
2477 hlen = struct.calcsize(b">lll")
2476 hlen = struct.calcsize(b">lll")
2478 oldlen = self.rawsize(baserev)
2477 oldlen = self.rawsize(baserev)
2479 newlen = len(delta) - hlen
2478 newlen = len(delta) - hlen
2480 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2479 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2481 raise error.CensoredBaseError(
2480 raise error.CensoredBaseError(
2482 self.display_id, self.node(baserev)
2481 self.display_id, self.node(baserev)
2483 )
2482 )
2484
2483
2485 if not flags and self._peek_iscensored(baserev, delta):
2484 if not flags and self._peek_iscensored(baserev, delta):
2486 flags |= REVIDX_ISCENSORED
2485 flags |= REVIDX_ISCENSORED
2487
2486
2488 # We assume consumers of addrevisioncb will want to retrieve
2487 # We assume consumers of addrevisioncb will want to retrieve
2489 # the added revision, which will require a call to
2488 # the added revision, which will require a call to
2490 # revision(). revision() will fast path if there is a cache
2489 # revision(). revision() will fast path if there is a cache
2491 # hit. So, we tell _addrevision() to always cache in this case.
2490 # hit. So, we tell _addrevision() to always cache in this case.
2492 # We're only using addgroup() in the context of changegroup
2491 # We're only using addgroup() in the context of changegroup
2493 # generation so the revision data can always be handled as raw
2492 # generation so the revision data can always be handled as raw
2494 # by the flagprocessor.
2493 # by the flagprocessor.
2495 rev = self._addrevision(
2494 rev = self._addrevision(
2496 node,
2495 node,
2497 None,
2496 None,
2498 transaction,
2497 transaction,
2499 link,
2498 link,
2500 p1,
2499 p1,
2501 p2,
2500 p2,
2502 flags,
2501 flags,
2503 (baserev, delta),
2502 (baserev, delta),
2504 ifh,
2503 ifh,
2505 dfh,
2504 dfh,
2506 alwayscache=alwayscache,
2505 alwayscache=alwayscache,
2507 deltacomputer=deltacomputer,
2506 deltacomputer=deltacomputer,
2508 sidedata=sidedata,
2507 sidedata=sidedata,
2509 )
2508 )
2510
2509
2511 if addrevisioncb:
2510 if addrevisioncb:
2512 addrevisioncb(self, rev)
2511 addrevisioncb(self, rev)
2513 empty = False
2512 empty = False
2514
2513
2515 if not dfh and not self._inline:
2514 if not dfh and not self._inline:
2516 # addrevision switched from inline to conventional
2515 # addrevision switched from inline to conventional
2517 # reopen the index
2516 # reopen the index
2518 ifh.close()
2517 ifh.close()
2519 dfh = self._datafp(b"a+")
2518 dfh = self._datafp(b"a+")
2520 ifh = self._indexfp(b"a+")
2519 ifh = self._indexfp(b"a+")
2521 self._writinghandles = (ifh, dfh)
2520 self._writinghandles = (ifh, dfh)
2522 finally:
2521 finally:
2523 self._writinghandles = None
2522 self._writinghandles = None
2524
2523
2525 if dfh:
2524 if dfh:
2526 dfh.close()
2525 dfh.close()
2527 ifh.close()
2526 ifh.close()
2528 return not empty
2527 return not empty
2529
2528
2530 def iscensored(self, rev):
2529 def iscensored(self, rev):
2531 """Check if a file revision is censored."""
2530 """Check if a file revision is censored."""
2532 if not self._censorable:
2531 if not self._censorable:
2533 return False
2532 return False
2534
2533
2535 return self.flags(rev) & REVIDX_ISCENSORED
2534 return self.flags(rev) & REVIDX_ISCENSORED
2536
2535
2537 def _peek_iscensored(self, baserev, delta):
2536 def _peek_iscensored(self, baserev, delta):
2538 """Quickly check if a delta produces a censored revision."""
2537 """Quickly check if a delta produces a censored revision."""
2539 if not self._censorable:
2538 if not self._censorable:
2540 return False
2539 return False
2541
2540
2542 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2541 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2543
2542
2544 def getstrippoint(self, minlink):
2543 def getstrippoint(self, minlink):
2545 """find the minimum rev that must be stripped to strip the linkrev
2544 """find the minimum rev that must be stripped to strip the linkrev
2546
2545
2547 Returns a tuple containing the minimum rev and a set of all revs that
2546 Returns a tuple containing the minimum rev and a set of all revs that
2548 have linkrevs that will be broken by this strip.
2547 have linkrevs that will be broken by this strip.
2549 """
2548 """
2550 return storageutil.resolvestripinfo(
2549 return storageutil.resolvestripinfo(
2551 minlink,
2550 minlink,
2552 len(self) - 1,
2551 len(self) - 1,
2553 self.headrevs(),
2552 self.headrevs(),
2554 self.linkrev,
2553 self.linkrev,
2555 self.parentrevs,
2554 self.parentrevs,
2556 )
2555 )
2557
2556
2558 def strip(self, minlink, transaction):
2557 def strip(self, minlink, transaction):
2559 """truncate the revlog on the first revision with a linkrev >= minlink
2558 """truncate the revlog on the first revision with a linkrev >= minlink
2560
2559
2561 This function is called when we're stripping revision minlink and
2560 This function is called when we're stripping revision minlink and
2562 its descendants from the repository.
2561 its descendants from the repository.
2563
2562
2564 We have to remove all revisions with linkrev >= minlink, because
2563 We have to remove all revisions with linkrev >= minlink, because
2565 the equivalent changelog revisions will be renumbered after the
2564 the equivalent changelog revisions will be renumbered after the
2566 strip.
2565 strip.
2567
2566
2568 So we truncate the revlog on the first of these revisions, and
2567 So we truncate the revlog on the first of these revisions, and
2569 trust that the caller has saved the revisions that shouldn't be
2568 trust that the caller has saved the revisions that shouldn't be
2570 removed and that it'll re-add them after this truncation.
2569 removed and that it'll re-add them after this truncation.
2571 """
2570 """
2572 if len(self) == 0:
2571 if len(self) == 0:
2573 return
2572 return
2574
2573
2575 rev, _ = self.getstrippoint(minlink)
2574 rev, _ = self.getstrippoint(minlink)
2576 if rev == len(self):
2575 if rev == len(self):
2577 return
2576 return
2578
2577
2579 # first truncate the files on disk
2578 # first truncate the files on disk
2580 end = self.start(rev)
2579 end = self.start(rev)
2581 if not self._inline:
2580 if not self._inline:
2582 transaction.add(self._datafile, end)
2581 transaction.add(self._datafile, end)
2583 end = rev * self.index.entry_size
2582 end = rev * self.index.entry_size
2584 else:
2583 else:
2585 end += rev * self.index.entry_size
2584 end += rev * self.index.entry_size
2586
2585
2587 transaction.add(self._indexfile, end)
2586 transaction.add(self._indexfile, end)
2588
2587
2589 # then reset internal state in memory to forget those revisions
2588 # then reset internal state in memory to forget those revisions
2590 self._revisioncache = None
2589 self._revisioncache = None
2591 self._chaininfocache = util.lrucachedict(500)
2590 self._chaininfocache = util.lrucachedict(500)
2592 self._chunkclear()
2591 self._chunkclear()
2593
2592
2594 del self.index[rev:-1]
2593 del self.index[rev:-1]
2595
2594
2596 def checksize(self):
2595 def checksize(self):
2597 """Check size of index and data files
2596 """Check size of index and data files
2598
2597
2599 return a (dd, di) tuple.
2598 return a (dd, di) tuple.
2600 - dd: extra bytes for the "data" file
2599 - dd: extra bytes for the "data" file
2601 - di: extra bytes for the "index" file
2600 - di: extra bytes for the "index" file
2602
2601
2603 A healthy revlog will return (0, 0).
2602 A healthy revlog will return (0, 0).
2604 """
2603 """
2605 expected = 0
2604 expected = 0
2606 if len(self):
2605 if len(self):
2607 expected = max(0, self.end(len(self) - 1))
2606 expected = max(0, self.end(len(self) - 1))
2608
2607
2609 try:
2608 try:
2610 with self._datafp() as f:
2609 with self._datafp() as f:
2611 f.seek(0, io.SEEK_END)
2610 f.seek(0, io.SEEK_END)
2612 actual = f.tell()
2611 actual = f.tell()
2613 dd = actual - expected
2612 dd = actual - expected
2614 except IOError as inst:
2613 except IOError as inst:
2615 if inst.errno != errno.ENOENT:
2614 if inst.errno != errno.ENOENT:
2616 raise
2615 raise
2617 dd = 0
2616 dd = 0
2618
2617
2619 try:
2618 try:
2620 f = self.opener(self._indexfile)
2619 f = self.opener(self._indexfile)
2621 f.seek(0, io.SEEK_END)
2620 f.seek(0, io.SEEK_END)
2622 actual = f.tell()
2621 actual = f.tell()
2623 f.close()
2622 f.close()
2624 s = self.index.entry_size
2623 s = self.index.entry_size
2625 i = max(0, actual // s)
2624 i = max(0, actual // s)
2626 di = actual - (i * s)
2625 di = actual - (i * s)
2627 if self._inline:
2626 if self._inline:
2628 databytes = 0
2627 databytes = 0
2629 for r in self:
2628 for r in self:
2630 databytes += max(0, self.length(r))
2629 databytes += max(0, self.length(r))
2631 dd = 0
2630 dd = 0
2632 di = actual - len(self) * s - databytes
2631 di = actual - len(self) * s - databytes
2633 except IOError as inst:
2632 except IOError as inst:
2634 if inst.errno != errno.ENOENT:
2633 if inst.errno != errno.ENOENT:
2635 raise
2634 raise
2636 di = 0
2635 di = 0
2637
2636
2638 return (dd, di)
2637 return (dd, di)
2639
2638
2640 def files(self):
2639 def files(self):
2641 res = [self._indexfile]
2640 res = [self._indexfile]
2642 if not self._inline:
2641 if not self._inline:
2643 res.append(self._datafile)
2642 res.append(self._datafile)
2644 return res
2643 return res
2645
2644
2646 def emitrevisions(
2645 def emitrevisions(
2647 self,
2646 self,
2648 nodes,
2647 nodes,
2649 nodesorder=None,
2648 nodesorder=None,
2650 revisiondata=False,
2649 revisiondata=False,
2651 assumehaveparentrevisions=False,
2650 assumehaveparentrevisions=False,
2652 deltamode=repository.CG_DELTAMODE_STD,
2651 deltamode=repository.CG_DELTAMODE_STD,
2653 sidedata_helpers=None,
2652 sidedata_helpers=None,
2654 ):
2653 ):
2655 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2654 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2656 raise error.ProgrammingError(
2655 raise error.ProgrammingError(
2657 b'unhandled value for nodesorder: %s' % nodesorder
2656 b'unhandled value for nodesorder: %s' % nodesorder
2658 )
2657 )
2659
2658
2660 if nodesorder is None and not self._generaldelta:
2659 if nodesorder is None and not self._generaldelta:
2661 nodesorder = b'storage'
2660 nodesorder = b'storage'
2662
2661
2663 if (
2662 if (
2664 not self._storedeltachains
2663 not self._storedeltachains
2665 and deltamode != repository.CG_DELTAMODE_PREV
2664 and deltamode != repository.CG_DELTAMODE_PREV
2666 ):
2665 ):
2667 deltamode = repository.CG_DELTAMODE_FULL
2666 deltamode = repository.CG_DELTAMODE_FULL
2668
2667
2669 return storageutil.emitrevisions(
2668 return storageutil.emitrevisions(
2670 self,
2669 self,
2671 nodes,
2670 nodes,
2672 nodesorder,
2671 nodesorder,
2673 revlogrevisiondelta,
2672 revlogrevisiondelta,
2674 deltaparentfn=self.deltaparent,
2673 deltaparentfn=self.deltaparent,
2675 candeltafn=self.candelta,
2674 candeltafn=self.candelta,
2676 rawsizefn=self.rawsize,
2675 rawsizefn=self.rawsize,
2677 revdifffn=self.revdiff,
2676 revdifffn=self.revdiff,
2678 flagsfn=self.flags,
2677 flagsfn=self.flags,
2679 deltamode=deltamode,
2678 deltamode=deltamode,
2680 revisiondata=revisiondata,
2679 revisiondata=revisiondata,
2681 assumehaveparentrevisions=assumehaveparentrevisions,
2680 assumehaveparentrevisions=assumehaveparentrevisions,
2682 sidedata_helpers=sidedata_helpers,
2681 sidedata_helpers=sidedata_helpers,
2683 )
2682 )
2684
2683
2685 DELTAREUSEALWAYS = b'always'
2684 DELTAREUSEALWAYS = b'always'
2686 DELTAREUSESAMEREVS = b'samerevs'
2685 DELTAREUSESAMEREVS = b'samerevs'
2687 DELTAREUSENEVER = b'never'
2686 DELTAREUSENEVER = b'never'
2688
2687
2689 DELTAREUSEFULLADD = b'fulladd'
2688 DELTAREUSEFULLADD = b'fulladd'
2690
2689
2691 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2690 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2692
2691
2693 def clone(
2692 def clone(
2694 self,
2693 self,
2695 tr,
2694 tr,
2696 destrevlog,
2695 destrevlog,
2697 addrevisioncb=None,
2696 addrevisioncb=None,
2698 deltareuse=DELTAREUSESAMEREVS,
2697 deltareuse=DELTAREUSESAMEREVS,
2699 forcedeltabothparents=None,
2698 forcedeltabothparents=None,
2700 sidedata_helpers=None,
2699 sidedata_helpers=None,
2701 ):
2700 ):
2702 """Copy this revlog to another, possibly with format changes.
2701 """Copy this revlog to another, possibly with format changes.
2703
2702
2704 The destination revlog will contain the same revisions and nodes.
2703 The destination revlog will contain the same revisions and nodes.
2705 However, it may not be bit-for-bit identical due to e.g. delta encoding
2704 However, it may not be bit-for-bit identical due to e.g. delta encoding
2706 differences.
2705 differences.
2707
2706
2708 The ``deltareuse`` argument control how deltas from the existing revlog
2707 The ``deltareuse`` argument control how deltas from the existing revlog
2709 are preserved in the destination revlog. The argument can have the
2708 are preserved in the destination revlog. The argument can have the
2710 following values:
2709 following values:
2711
2710
2712 DELTAREUSEALWAYS
2711 DELTAREUSEALWAYS
2713 Deltas will always be reused (if possible), even if the destination
2712 Deltas will always be reused (if possible), even if the destination
2714 revlog would not select the same revisions for the delta. This is the
2713 revlog would not select the same revisions for the delta. This is the
2715 fastest mode of operation.
2714 fastest mode of operation.
2716 DELTAREUSESAMEREVS
2715 DELTAREUSESAMEREVS
2717 Deltas will be reused if the destination revlog would pick the same
2716 Deltas will be reused if the destination revlog would pick the same
2718 revisions for the delta. This mode strikes a balance between speed
2717 revisions for the delta. This mode strikes a balance between speed
2719 and optimization.
2718 and optimization.
2720 DELTAREUSENEVER
2719 DELTAREUSENEVER
2721 Deltas will never be reused. This is the slowest mode of execution.
2720 Deltas will never be reused. This is the slowest mode of execution.
2722 This mode can be used to recompute deltas (e.g. if the diff/delta
2721 This mode can be used to recompute deltas (e.g. if the diff/delta
2723 algorithm changes).
2722 algorithm changes).
2724 DELTAREUSEFULLADD
2723 DELTAREUSEFULLADD
2725 Revision will be re-added as if their were new content. This is
2724 Revision will be re-added as if their were new content. This is
2726 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2725 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2727 eg: large file detection and handling.
2726 eg: large file detection and handling.
2728
2727
2729 Delta computation can be slow, so the choice of delta reuse policy can
2728 Delta computation can be slow, so the choice of delta reuse policy can
2730 significantly affect run time.
2729 significantly affect run time.
2731
2730
2732 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2731 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2733 two extremes. Deltas will be reused if they are appropriate. But if the
2732 two extremes. Deltas will be reused if they are appropriate. But if the
2734 delta could choose a better revision, it will do so. This means if you
2733 delta could choose a better revision, it will do so. This means if you
2735 are converting a non-generaldelta revlog to a generaldelta revlog,
2734 are converting a non-generaldelta revlog to a generaldelta revlog,
2736 deltas will be recomputed if the delta's parent isn't a parent of the
2735 deltas will be recomputed if the delta's parent isn't a parent of the
2737 revision.
2736 revision.
2738
2737
2739 In addition to the delta policy, the ``forcedeltabothparents``
2738 In addition to the delta policy, the ``forcedeltabothparents``
2740 argument controls whether to force compute deltas against both parents
2739 argument controls whether to force compute deltas against both parents
2741 for merges. By default, the current default is used.
2740 for merges. By default, the current default is used.
2742
2741
2743 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2742 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2744 `sidedata_helpers`.
2743 `sidedata_helpers`.
2745 """
2744 """
2746 if deltareuse not in self.DELTAREUSEALL:
2745 if deltareuse not in self.DELTAREUSEALL:
2747 raise ValueError(
2746 raise ValueError(
2748 _(b'value for deltareuse invalid: %s') % deltareuse
2747 _(b'value for deltareuse invalid: %s') % deltareuse
2749 )
2748 )
2750
2749
2751 if len(destrevlog):
2750 if len(destrevlog):
2752 raise ValueError(_(b'destination revlog is not empty'))
2751 raise ValueError(_(b'destination revlog is not empty'))
2753
2752
2754 if getattr(self, 'filteredrevs', None):
2753 if getattr(self, 'filteredrevs', None):
2755 raise ValueError(_(b'source revlog has filtered revisions'))
2754 raise ValueError(_(b'source revlog has filtered revisions'))
2756 if getattr(destrevlog, 'filteredrevs', None):
2755 if getattr(destrevlog, 'filteredrevs', None):
2757 raise ValueError(_(b'destination revlog has filtered revisions'))
2756 raise ValueError(_(b'destination revlog has filtered revisions'))
2758
2757
2759 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2758 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2760 # if possible.
2759 # if possible.
2761 oldlazydelta = destrevlog._lazydelta
2760 oldlazydelta = destrevlog._lazydelta
2762 oldlazydeltabase = destrevlog._lazydeltabase
2761 oldlazydeltabase = destrevlog._lazydeltabase
2763 oldamd = destrevlog._deltabothparents
2762 oldamd = destrevlog._deltabothparents
2764
2763
2765 try:
2764 try:
2766 if deltareuse == self.DELTAREUSEALWAYS:
2765 if deltareuse == self.DELTAREUSEALWAYS:
2767 destrevlog._lazydeltabase = True
2766 destrevlog._lazydeltabase = True
2768 destrevlog._lazydelta = True
2767 destrevlog._lazydelta = True
2769 elif deltareuse == self.DELTAREUSESAMEREVS:
2768 elif deltareuse == self.DELTAREUSESAMEREVS:
2770 destrevlog._lazydeltabase = False
2769 destrevlog._lazydeltabase = False
2771 destrevlog._lazydelta = True
2770 destrevlog._lazydelta = True
2772 elif deltareuse == self.DELTAREUSENEVER:
2771 elif deltareuse == self.DELTAREUSENEVER:
2773 destrevlog._lazydeltabase = False
2772 destrevlog._lazydeltabase = False
2774 destrevlog._lazydelta = False
2773 destrevlog._lazydelta = False
2775
2774
2776 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2775 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2777
2776
2778 self._clone(
2777 self._clone(
2779 tr,
2778 tr,
2780 destrevlog,
2779 destrevlog,
2781 addrevisioncb,
2780 addrevisioncb,
2782 deltareuse,
2781 deltareuse,
2783 forcedeltabothparents,
2782 forcedeltabothparents,
2784 sidedata_helpers,
2783 sidedata_helpers,
2785 )
2784 )
2786
2785
2787 finally:
2786 finally:
2788 destrevlog._lazydelta = oldlazydelta
2787 destrevlog._lazydelta = oldlazydelta
2789 destrevlog._lazydeltabase = oldlazydeltabase
2788 destrevlog._lazydeltabase = oldlazydeltabase
2790 destrevlog._deltabothparents = oldamd
2789 destrevlog._deltabothparents = oldamd
2791
2790
2792 def _clone(
2791 def _clone(
2793 self,
2792 self,
2794 tr,
2793 tr,
2795 destrevlog,
2794 destrevlog,
2796 addrevisioncb,
2795 addrevisioncb,
2797 deltareuse,
2796 deltareuse,
2798 forcedeltabothparents,
2797 forcedeltabothparents,
2799 sidedata_helpers,
2798 sidedata_helpers,
2800 ):
2799 ):
2801 """perform the core duty of `revlog.clone` after parameter processing"""
2800 """perform the core duty of `revlog.clone` after parameter processing"""
2802 deltacomputer = deltautil.deltacomputer(destrevlog)
2801 deltacomputer = deltautil.deltacomputer(destrevlog)
2803 index = self.index
2802 index = self.index
2804 for rev in self:
2803 for rev in self:
2805 entry = index[rev]
2804 entry = index[rev]
2806
2805
2807 # Some classes override linkrev to take filtered revs into
2806 # Some classes override linkrev to take filtered revs into
2808 # account. Use raw entry from index.
2807 # account. Use raw entry from index.
2809 flags = entry[0] & 0xFFFF
2808 flags = entry[0] & 0xFFFF
2810 linkrev = entry[4]
2809 linkrev = entry[4]
2811 p1 = index[entry[5]][7]
2810 p1 = index[entry[5]][7]
2812 p2 = index[entry[6]][7]
2811 p2 = index[entry[6]][7]
2813 node = entry[7]
2812 node = entry[7]
2814
2813
2815 # (Possibly) reuse the delta from the revlog if allowed and
2814 # (Possibly) reuse the delta from the revlog if allowed and
2816 # the revlog chunk is a delta.
2815 # the revlog chunk is a delta.
2817 cachedelta = None
2816 cachedelta = None
2818 rawtext = None
2817 rawtext = None
2819 if deltareuse == self.DELTAREUSEFULLADD:
2818 if deltareuse == self.DELTAREUSEFULLADD:
2820 text, sidedata = self._revisiondata(rev)
2819 text, sidedata = self._revisiondata(rev)
2821
2820
2822 if sidedata_helpers is not None:
2821 if sidedata_helpers is not None:
2823 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2822 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2824 self, sidedata_helpers, sidedata, rev
2823 self, sidedata_helpers, sidedata, rev
2825 )
2824 )
2826 flags = flags | new_flags[0] & ~new_flags[1]
2825 flags = flags | new_flags[0] & ~new_flags[1]
2827
2826
2828 destrevlog.addrevision(
2827 destrevlog.addrevision(
2829 text,
2828 text,
2830 tr,
2829 tr,
2831 linkrev,
2830 linkrev,
2832 p1,
2831 p1,
2833 p2,
2832 p2,
2834 cachedelta=cachedelta,
2833 cachedelta=cachedelta,
2835 node=node,
2834 node=node,
2836 flags=flags,
2835 flags=flags,
2837 deltacomputer=deltacomputer,
2836 deltacomputer=deltacomputer,
2838 sidedata=sidedata,
2837 sidedata=sidedata,
2839 )
2838 )
2840 else:
2839 else:
2841 if destrevlog._lazydelta:
2840 if destrevlog._lazydelta:
2842 dp = self.deltaparent(rev)
2841 dp = self.deltaparent(rev)
2843 if dp != nullrev:
2842 if dp != nullrev:
2844 cachedelta = (dp, bytes(self._chunk(rev)))
2843 cachedelta = (dp, bytes(self._chunk(rev)))
2845
2844
2846 sidedata = None
2845 sidedata = None
2847 if not cachedelta:
2846 if not cachedelta:
2848 rawtext, sidedata = self._revisiondata(rev)
2847 rawtext, sidedata = self._revisiondata(rev)
2849 if sidedata is None:
2848 if sidedata is None:
2850 sidedata = self.sidedata(rev)
2849 sidedata = self.sidedata(rev)
2851
2850
2852 if sidedata_helpers is not None:
2851 if sidedata_helpers is not None:
2853 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2852 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2854 self, sidedata_helpers, sidedata, rev
2853 self, sidedata_helpers, sidedata, rev
2855 )
2854 )
2856 flags = flags | new_flags[0] & ~new_flags[1]
2855 flags = flags | new_flags[0] & ~new_flags[1]
2857
2856
2858 ifh = destrevlog.opener(
2857 ifh = destrevlog.opener(
2859 destrevlog._indexfile, b'a+', checkambig=False
2858 destrevlog._indexfile, b'a+', checkambig=False
2860 )
2859 )
2861 dfh = None
2860 dfh = None
2862 if not destrevlog._inline:
2861 if not destrevlog._inline:
2863 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2862 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2864 try:
2863 try:
2865 destrevlog._addrevision(
2864 destrevlog._addrevision(
2866 node,
2865 node,
2867 rawtext,
2866 rawtext,
2868 tr,
2867 tr,
2869 linkrev,
2868 linkrev,
2870 p1,
2869 p1,
2871 p2,
2870 p2,
2872 flags,
2871 flags,
2873 cachedelta,
2872 cachedelta,
2874 ifh,
2873 ifh,
2875 dfh,
2874 dfh,
2876 deltacomputer=deltacomputer,
2875 deltacomputer=deltacomputer,
2877 sidedata=sidedata,
2876 sidedata=sidedata,
2878 )
2877 )
2879 finally:
2878 finally:
2880 if dfh:
2879 if dfh:
2881 dfh.close()
2880 dfh.close()
2882 ifh.close()
2881 ifh.close()
2883
2882
2884 if addrevisioncb:
2883 if addrevisioncb:
2885 addrevisioncb(self, rev, node)
2884 addrevisioncb(self, rev, node)
2886
2885
2887 def censorrevision(self, tr, censornode, tombstone=b''):
2886 def censorrevision(self, tr, censornode, tombstone=b''):
2888 if self._format_version == REVLOGV0:
2887 if self._format_version == REVLOGV0:
2889 raise error.RevlogError(
2888 raise error.RevlogError(
2890 _(b'cannot censor with version %d revlogs')
2889 _(b'cannot censor with version %d revlogs')
2891 % self._format_version
2890 % self._format_version
2892 )
2891 )
2893
2892
2894 censorrev = self.rev(censornode)
2893 censorrev = self.rev(censornode)
2895 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2894 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2896
2895
2897 if len(tombstone) > self.rawsize(censorrev):
2896 if len(tombstone) > self.rawsize(censorrev):
2898 raise error.Abort(
2897 raise error.Abort(
2899 _(b'censor tombstone must be no longer than censored data')
2898 _(b'censor tombstone must be no longer than censored data')
2900 )
2899 )
2901
2900
2902 # Rewriting the revlog in place is hard. Our strategy for censoring is
2901 # Rewriting the revlog in place is hard. Our strategy for censoring is
2903 # to create a new revlog, copy all revisions to it, then replace the
2902 # to create a new revlog, copy all revisions to it, then replace the
2904 # revlogs on transaction close.
2903 # revlogs on transaction close.
2905 #
2904 #
2906 # This is a bit dangerous. We could easily have a mismatch of state.
2905 # This is a bit dangerous. We could easily have a mismatch of state.
2907 newrl = revlog(
2906 newrl = revlog(
2908 self.opener,
2907 self.opener,
2909 target=self.target,
2908 target=self.target,
2910 radix=self.radix,
2909 radix=self.radix,
2911 postfix=b'tmpcensored',
2910 postfix=b'tmpcensored',
2912 censorable=True,
2911 censorable=True,
2913 )
2912 )
2914 newrl._format_version = self._format_version
2913 newrl._format_version = self._format_version
2915 newrl._format_flags = self._format_flags
2914 newrl._format_flags = self._format_flags
2916 newrl._generaldelta = self._generaldelta
2915 newrl._generaldelta = self._generaldelta
2917 newrl._parse_index = self._parse_index
2916 newrl._parse_index = self._parse_index
2918
2917
2919 for rev in self.revs():
2918 for rev in self.revs():
2920 node = self.node(rev)
2919 node = self.node(rev)
2921 p1, p2 = self.parents(node)
2920 p1, p2 = self.parents(node)
2922
2921
2923 if rev == censorrev:
2922 if rev == censorrev:
2924 newrl.addrawrevision(
2923 newrl.addrawrevision(
2925 tombstone,
2924 tombstone,
2926 tr,
2925 tr,
2927 self.linkrev(censorrev),
2926 self.linkrev(censorrev),
2928 p1,
2927 p1,
2929 p2,
2928 p2,
2930 censornode,
2929 censornode,
2931 REVIDX_ISCENSORED,
2930 REVIDX_ISCENSORED,
2932 )
2931 )
2933
2932
2934 if newrl.deltaparent(rev) != nullrev:
2933 if newrl.deltaparent(rev) != nullrev:
2935 raise error.Abort(
2934 raise error.Abort(
2936 _(
2935 _(
2937 b'censored revision stored as delta; '
2936 b'censored revision stored as delta; '
2938 b'cannot censor'
2937 b'cannot censor'
2939 ),
2938 ),
2940 hint=_(
2939 hint=_(
2941 b'censoring of revlogs is not '
2940 b'censoring of revlogs is not '
2942 b'fully implemented; please report '
2941 b'fully implemented; please report '
2943 b'this bug'
2942 b'this bug'
2944 ),
2943 ),
2945 )
2944 )
2946 continue
2945 continue
2947
2946
2948 if self.iscensored(rev):
2947 if self.iscensored(rev):
2949 if self.deltaparent(rev) != nullrev:
2948 if self.deltaparent(rev) != nullrev:
2950 raise error.Abort(
2949 raise error.Abort(
2951 _(
2950 _(
2952 b'cannot censor due to censored '
2951 b'cannot censor due to censored '
2953 b'revision having delta stored'
2952 b'revision having delta stored'
2954 )
2953 )
2955 )
2954 )
2956 rawtext = self._chunk(rev)
2955 rawtext = self._chunk(rev)
2957 else:
2956 else:
2958 rawtext = self.rawdata(rev)
2957 rawtext = self.rawdata(rev)
2959
2958
2960 newrl.addrawrevision(
2959 newrl.addrawrevision(
2961 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2960 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2962 )
2961 )
2963
2962
2964 tr.addbackup(self._indexfile, location=b'store')
2963 tr.addbackup(self._indexfile, location=b'store')
2965 if not self._inline:
2964 if not self._inline:
2966 tr.addbackup(self._datafile, location=b'store')
2965 tr.addbackup(self._datafile, location=b'store')
2967
2966
2968 self.opener.rename(newrl._indexfile, self._indexfile)
2967 self.opener.rename(newrl._indexfile, self._indexfile)
2969 if not self._inline:
2968 if not self._inline:
2970 self.opener.rename(newrl._datafile, self._datafile)
2969 self.opener.rename(newrl._datafile, self._datafile)
2971
2970
2972 self.clearcaches()
2971 self.clearcaches()
2973 self._loadindex()
2972 self._loadindex()
2974
2973
2975 def verifyintegrity(self, state):
2974 def verifyintegrity(self, state):
2976 """Verifies the integrity of the revlog.
2975 """Verifies the integrity of the revlog.
2977
2976
2978 Yields ``revlogproblem`` instances describing problems that are
2977 Yields ``revlogproblem`` instances describing problems that are
2979 found.
2978 found.
2980 """
2979 """
2981 dd, di = self.checksize()
2980 dd, di = self.checksize()
2982 if dd:
2981 if dd:
2983 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2982 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2984 if di:
2983 if di:
2985 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2984 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2986
2985
2987 version = self._format_version
2986 version = self._format_version
2988
2987
2989 # The verifier tells us what version revlog we should be.
2988 # The verifier tells us what version revlog we should be.
2990 if version != state[b'expectedversion']:
2989 if version != state[b'expectedversion']:
2991 yield revlogproblem(
2990 yield revlogproblem(
2992 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2991 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2993 % (self.display_id, version, state[b'expectedversion'])
2992 % (self.display_id, version, state[b'expectedversion'])
2994 )
2993 )
2995
2994
2996 state[b'skipread'] = set()
2995 state[b'skipread'] = set()
2997 state[b'safe_renamed'] = set()
2996 state[b'safe_renamed'] = set()
2998
2997
2999 for rev in self:
2998 for rev in self:
3000 node = self.node(rev)
2999 node = self.node(rev)
3001
3000
3002 # Verify contents. 4 cases to care about:
3001 # Verify contents. 4 cases to care about:
3003 #
3002 #
3004 # common: the most common case
3003 # common: the most common case
3005 # rename: with a rename
3004 # rename: with a rename
3006 # meta: file content starts with b'\1\n', the metadata
3005 # meta: file content starts with b'\1\n', the metadata
3007 # header defined in filelog.py, but without a rename
3006 # header defined in filelog.py, but without a rename
3008 # ext: content stored externally
3007 # ext: content stored externally
3009 #
3008 #
3010 # More formally, their differences are shown below:
3009 # More formally, their differences are shown below:
3011 #
3010 #
3012 # | common | rename | meta | ext
3011 # | common | rename | meta | ext
3013 # -------------------------------------------------------
3012 # -------------------------------------------------------
3014 # flags() | 0 | 0 | 0 | not 0
3013 # flags() | 0 | 0 | 0 | not 0
3015 # renamed() | False | True | False | ?
3014 # renamed() | False | True | False | ?
3016 # rawtext[0:2]=='\1\n'| False | True | True | ?
3015 # rawtext[0:2]=='\1\n'| False | True | True | ?
3017 #
3016 #
3018 # "rawtext" means the raw text stored in revlog data, which
3017 # "rawtext" means the raw text stored in revlog data, which
3019 # could be retrieved by "rawdata(rev)". "text"
3018 # could be retrieved by "rawdata(rev)". "text"
3020 # mentioned below is "revision(rev)".
3019 # mentioned below is "revision(rev)".
3021 #
3020 #
3022 # There are 3 different lengths stored physically:
3021 # There are 3 different lengths stored physically:
3023 # 1. L1: rawsize, stored in revlog index
3022 # 1. L1: rawsize, stored in revlog index
3024 # 2. L2: len(rawtext), stored in revlog data
3023 # 2. L2: len(rawtext), stored in revlog data
3025 # 3. L3: len(text), stored in revlog data if flags==0, or
3024 # 3. L3: len(text), stored in revlog data if flags==0, or
3026 # possibly somewhere else if flags!=0
3025 # possibly somewhere else if flags!=0
3027 #
3026 #
3028 # L1 should be equal to L2. L3 could be different from them.
3027 # L1 should be equal to L2. L3 could be different from them.
3029 # "text" may or may not affect commit hash depending on flag
3028 # "text" may or may not affect commit hash depending on flag
3030 # processors (see flagutil.addflagprocessor).
3029 # processors (see flagutil.addflagprocessor).
3031 #
3030 #
3032 # | common | rename | meta | ext
3031 # | common | rename | meta | ext
3033 # -------------------------------------------------
3032 # -------------------------------------------------
3034 # rawsize() | L1 | L1 | L1 | L1
3033 # rawsize() | L1 | L1 | L1 | L1
3035 # size() | L1 | L2-LM | L1(*) | L1 (?)
3034 # size() | L1 | L2-LM | L1(*) | L1 (?)
3036 # len(rawtext) | L2 | L2 | L2 | L2
3035 # len(rawtext) | L2 | L2 | L2 | L2
3037 # len(text) | L2 | L2 | L2 | L3
3036 # len(text) | L2 | L2 | L2 | L3
3038 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3037 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3039 #
3038 #
3040 # LM: length of metadata, depending on rawtext
3039 # LM: length of metadata, depending on rawtext
3041 # (*): not ideal, see comment in filelog.size
3040 # (*): not ideal, see comment in filelog.size
3042 # (?): could be "- len(meta)" if the resolved content has
3041 # (?): could be "- len(meta)" if the resolved content has
3043 # rename metadata
3042 # rename metadata
3044 #
3043 #
3045 # Checks needed to be done:
3044 # Checks needed to be done:
3046 # 1. length check: L1 == L2, in all cases.
3045 # 1. length check: L1 == L2, in all cases.
3047 # 2. hash check: depending on flag processor, we may need to
3046 # 2. hash check: depending on flag processor, we may need to
3048 # use either "text" (external), or "rawtext" (in revlog).
3047 # use either "text" (external), or "rawtext" (in revlog).
3049
3048
3050 try:
3049 try:
3051 skipflags = state.get(b'skipflags', 0)
3050 skipflags = state.get(b'skipflags', 0)
3052 if skipflags:
3051 if skipflags:
3053 skipflags &= self.flags(rev)
3052 skipflags &= self.flags(rev)
3054
3053
3055 _verify_revision(self, skipflags, state, node)
3054 _verify_revision(self, skipflags, state, node)
3056
3055
3057 l1 = self.rawsize(rev)
3056 l1 = self.rawsize(rev)
3058 l2 = len(self.rawdata(node))
3057 l2 = len(self.rawdata(node))
3059
3058
3060 if l1 != l2:
3059 if l1 != l2:
3061 yield revlogproblem(
3060 yield revlogproblem(
3062 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3061 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3063 node=node,
3062 node=node,
3064 )
3063 )
3065
3064
3066 except error.CensoredNodeError:
3065 except error.CensoredNodeError:
3067 if state[b'erroroncensored']:
3066 if state[b'erroroncensored']:
3068 yield revlogproblem(
3067 yield revlogproblem(
3069 error=_(b'censored file data'), node=node
3068 error=_(b'censored file data'), node=node
3070 )
3069 )
3071 state[b'skipread'].add(node)
3070 state[b'skipread'].add(node)
3072 except Exception as e:
3071 except Exception as e:
3073 yield revlogproblem(
3072 yield revlogproblem(
3074 error=_(b'unpacking %s: %s')
3073 error=_(b'unpacking %s: %s')
3075 % (short(node), stringutil.forcebytestr(e)),
3074 % (short(node), stringutil.forcebytestr(e)),
3076 node=node,
3075 node=node,
3077 )
3076 )
3078 state[b'skipread'].add(node)
3077 state[b'skipread'].add(node)
3079
3078
3080 def storageinfo(
3079 def storageinfo(
3081 self,
3080 self,
3082 exclusivefiles=False,
3081 exclusivefiles=False,
3083 sharedfiles=False,
3082 sharedfiles=False,
3084 revisionscount=False,
3083 revisionscount=False,
3085 trackedsize=False,
3084 trackedsize=False,
3086 storedsize=False,
3085 storedsize=False,
3087 ):
3086 ):
3088 d = {}
3087 d = {}
3089
3088
3090 if exclusivefiles:
3089 if exclusivefiles:
3091 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3090 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3092 if not self._inline:
3091 if not self._inline:
3093 d[b'exclusivefiles'].append((self.opener, self._datafile))
3092 d[b'exclusivefiles'].append((self.opener, self._datafile))
3094
3093
3095 if sharedfiles:
3094 if sharedfiles:
3096 d[b'sharedfiles'] = []
3095 d[b'sharedfiles'] = []
3097
3096
3098 if revisionscount:
3097 if revisionscount:
3099 d[b'revisionscount'] = len(self)
3098 d[b'revisionscount'] = len(self)
3100
3099
3101 if trackedsize:
3100 if trackedsize:
3102 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3101 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3103
3102
3104 if storedsize:
3103 if storedsize:
3105 d[b'storedsize'] = sum(
3104 d[b'storedsize'] = sum(
3106 self.opener.stat(path).st_size for path in self.files()
3105 self.opener.stat(path).st_size for path in self.files()
3107 )
3106 )
3108
3107
3109 return d
3108 return d
3110
3109
3111 def rewrite_sidedata(self, helpers, startrev, endrev):
3110 def rewrite_sidedata(self, helpers, startrev, endrev):
3112 if not self.hassidedata:
3111 if not self.hassidedata:
3113 return
3112 return
3114 # inline are not yet supported because they suffer from an issue when
3113 # inline are not yet supported because they suffer from an issue when
3115 # rewriting them (since it's not an append-only operation).
3114 # rewriting them (since it's not an append-only operation).
3116 # See issue6485.
3115 # See issue6485.
3117 assert not self._inline
3116 assert not self._inline
3118 if not helpers[1] and not helpers[2]:
3117 if not helpers[1] and not helpers[2]:
3119 # Nothing to generate or remove
3118 # Nothing to generate or remove
3120 return
3119 return
3121
3120
3122 # changelog implement some "delayed" writing mechanism that assume that
3121 # changelog implement some "delayed" writing mechanism that assume that
3123 # all index data is writen in append mode and is therefor incompatible
3122 # all index data is writen in append mode and is therefor incompatible
3124 # with the seeked write done in this method. The use of such "delayed"
3123 # with the seeked write done in this method. The use of such "delayed"
3125 # writing will soon be removed for revlog version that support side
3124 # writing will soon be removed for revlog version that support side
3126 # data, so for now, we only keep this simple assert to highlight the
3125 # data, so for now, we only keep this simple assert to highlight the
3127 # situation.
3126 # situation.
3128 delayed = getattr(self, '_delayed', False)
3127 delayed = getattr(self, '_delayed', False)
3129 diverted = getattr(self, '_divert', False)
3128 diverted = getattr(self, '_divert', False)
3130 if delayed and not diverted:
3129 if delayed and not diverted:
3131 msg = "cannot rewrite_sidedata of a delayed revlog"
3130 msg = "cannot rewrite_sidedata of a delayed revlog"
3132 raise error.ProgrammingError(msg)
3131 raise error.ProgrammingError(msg)
3133
3132
3134 new_entries = []
3133 new_entries = []
3135 # append the new sidedata
3134 # append the new sidedata
3136 with self._datafp(b'a+') as fp:
3135 with self._datafp(b'a+') as fp:
3137 # Maybe this bug still exists, see revlog._writeentry
3136 # Maybe this bug still exists, see revlog._writeentry
3138 fp.seek(0, os.SEEK_END)
3137 fp.seek(0, os.SEEK_END)
3139 current_offset = fp.tell()
3138 current_offset = fp.tell()
3140 for rev in range(startrev, endrev + 1):
3139 for rev in range(startrev, endrev + 1):
3141 entry = self.index[rev]
3140 entry = self.index[rev]
3142 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3141 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3143 store=self,
3142 store=self,
3144 sidedata_helpers=helpers,
3143 sidedata_helpers=helpers,
3145 sidedata={},
3144 sidedata={},
3146 rev=rev,
3145 rev=rev,
3147 )
3146 )
3148
3147
3149 serialized_sidedata = sidedatautil.serialize_sidedata(
3148 serialized_sidedata = sidedatautil.serialize_sidedata(
3150 new_sidedata
3149 new_sidedata
3151 )
3150 )
3152 if entry[8] != 0 or entry[9] != 0:
3151 if entry[8] != 0 or entry[9] != 0:
3153 # rewriting entries that already have sidedata is not
3152 # rewriting entries that already have sidedata is not
3154 # supported yet, because it introduces garbage data in the
3153 # supported yet, because it introduces garbage data in the
3155 # revlog.
3154 # revlog.
3156 msg = b"Rewriting existing sidedata is not supported yet"
3155 msg = b"Rewriting existing sidedata is not supported yet"
3157 raise error.Abort(msg)
3156 raise error.Abort(msg)
3158
3157
3159 # Apply (potential) flags to add and to remove after running
3158 # Apply (potential) flags to add and to remove after running
3160 # the sidedata helpers
3159 # the sidedata helpers
3161 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3160 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3162 entry = (new_offset_flags,) + entry[1:8]
3161 entry = (new_offset_flags,) + entry[1:8]
3163 entry += (current_offset, len(serialized_sidedata))
3162 entry += (current_offset, len(serialized_sidedata))
3164
3163
3165 fp.write(serialized_sidedata)
3164 fp.write(serialized_sidedata)
3166 new_entries.append(entry)
3165 new_entries.append(entry)
3167 current_offset += len(serialized_sidedata)
3166 current_offset += len(serialized_sidedata)
3168
3167
3169 # rewrite the new index entries
3168 # rewrite the new index entries
3170 with self._indexfp(b'r+') as fp:
3169 with self._indexfp(b'r+') as fp:
3171 fp.seek(startrev * self.index.entry_size)
3170 fp.seek(startrev * self.index.entry_size)
3172 for i, e in enumerate(new_entries):
3171 for i, e in enumerate(new_entries):
3173 rev = startrev + i
3172 rev = startrev + i
3174 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3173 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3175 packed = self.index.entry_binary(rev)
3174 packed = self.index.entry_binary(rev)
3176 if rev == 0:
3175 if rev == 0:
3177 header = self._format_flags | self._format_version
3176 header = self._format_flags | self._format_version
3178 header = self.index.pack_header(header)
3177 header = self.index.pack_header(header)
3179 packed = header + packed
3178 packed = header + packed
3180 fp.write(packed)
3179 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now