##// END OF EJS Templates
revlog: rename `nodemap_file` to `_nodemap_file`...
marmoute -
r47935:04f2f948 default
parent child Browse files
Show More
@@ -1,3180 +1,3180 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 if postfix is None:
318 if postfix is None:
319 indexfile = b'%s.i' % self.radix
319 indexfile = b'%s.i' % self.radix
320 datafile = b'%s.d' % self.radix
320 datafile = b'%s.d' % self.radix
321 elif postfix == b'a':
321 elif postfix == b'a':
322 indexfile = b'%s.i.a' % self.radix
322 indexfile = b'%s.i.a' % self.radix
323 datafile = b'%s.d' % self.radix
323 datafile = b'%s.d' % self.radix
324 else:
324 else:
325 indexfile = b'%s.i.%s' % (self.radix, postfix)
325 indexfile = b'%s.i.%s' % (self.radix, postfix)
326 datafile = b'%s.d.%s' % (self.radix, postfix)
326 datafile = b'%s.d.%s' % (self.radix, postfix)
327
327
328 self._indexfile = indexfile
328 self._indexfile = indexfile
329 self._datafile = datafile
329 self._datafile = datafile
330 self.nodemap_file = None
330 self._nodemap_file = None
331 self.postfix = postfix
331 self.postfix = postfix
332 self.opener = opener
332 self.opener = opener
333 if persistentnodemap:
333 if persistentnodemap:
334 self.nodemap_file = nodemaputil.get_nodemap_file(self)
334 self._nodemap_file = nodemaputil.get_nodemap_file(self)
335
335
336 assert target[0] in ALL_KINDS
336 assert target[0] in ALL_KINDS
337 assert len(target) == 2
337 assert len(target) == 2
338 self.target = target
338 self.target = target
339 # When True, indexfile is opened with checkambig=True at writing, to
339 # When True, indexfile is opened with checkambig=True at writing, to
340 # avoid file stat ambiguity.
340 # avoid file stat ambiguity.
341 self._checkambig = checkambig
341 self._checkambig = checkambig
342 self._mmaplargeindex = mmaplargeindex
342 self._mmaplargeindex = mmaplargeindex
343 self._censorable = censorable
343 self._censorable = censorable
344 # 3-tuple of (node, rev, text) for a raw revision.
344 # 3-tuple of (node, rev, text) for a raw revision.
345 self._revisioncache = None
345 self._revisioncache = None
346 # Maps rev to chain base rev.
346 # Maps rev to chain base rev.
347 self._chainbasecache = util.lrucachedict(100)
347 self._chainbasecache = util.lrucachedict(100)
348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
349 self._chunkcache = (0, b'')
349 self._chunkcache = (0, b'')
350 # How much data to read and cache into the raw revlog data cache.
350 # How much data to read and cache into the raw revlog data cache.
351 self._chunkcachesize = 65536
351 self._chunkcachesize = 65536
352 self._maxchainlen = None
352 self._maxchainlen = None
353 self._deltabothparents = True
353 self._deltabothparents = True
354 self.index = None
354 self.index = None
355 self._nodemap_docket = None
355 self._nodemap_docket = None
356 # Mapping of partial identifiers to full nodes.
356 # Mapping of partial identifiers to full nodes.
357 self._pcache = {}
357 self._pcache = {}
358 # Mapping of revision integer to full node.
358 # Mapping of revision integer to full node.
359 self._compengine = b'zlib'
359 self._compengine = b'zlib'
360 self._compengineopts = {}
360 self._compengineopts = {}
361 self._maxdeltachainspan = -1
361 self._maxdeltachainspan = -1
362 self._withsparseread = False
362 self._withsparseread = False
363 self._sparserevlog = False
363 self._sparserevlog = False
364 self._srdensitythreshold = 0.50
364 self._srdensitythreshold = 0.50
365 self._srmingapsize = 262144
365 self._srmingapsize = 262144
366
366
367 # Make copy of flag processors so each revlog instance can support
367 # Make copy of flag processors so each revlog instance can support
368 # custom flags.
368 # custom flags.
369 self._flagprocessors = dict(flagutil.flagprocessors)
369 self._flagprocessors = dict(flagutil.flagprocessors)
370
370
371 # 2-tuple of file handles being used for active writing.
371 # 2-tuple of file handles being used for active writing.
372 self._writinghandles = None
372 self._writinghandles = None
373
373
374 self._loadindex()
374 self._loadindex()
375
375
376 self._concurrencychecker = concurrencychecker
376 self._concurrencychecker = concurrencychecker
377
377
378 def _init_opts(self):
378 def _init_opts(self):
379 """process options (from above/config) to setup associated default revlog mode
379 """process options (from above/config) to setup associated default revlog mode
380
380
381 These values might be affected when actually reading on disk information.
381 These values might be affected when actually reading on disk information.
382
382
383 The relevant values are returned for use in _loadindex().
383 The relevant values are returned for use in _loadindex().
384
384
385 * newversionflags:
385 * newversionflags:
386 version header to use if we need to create a new revlog
386 version header to use if we need to create a new revlog
387
387
388 * mmapindexthreshold:
388 * mmapindexthreshold:
389 minimal index size for start to use mmap
389 minimal index size for start to use mmap
390
390
391 * force_nodemap:
391 * force_nodemap:
392 force the usage of a "development" version of the nodemap code
392 force the usage of a "development" version of the nodemap code
393 """
393 """
394 mmapindexthreshold = None
394 mmapindexthreshold = None
395 opts = self.opener.options
395 opts = self.opener.options
396
396
397 if b'revlogv2' in opts:
397 if b'revlogv2' in opts:
398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
399 elif b'revlogv1' in opts:
399 elif b'revlogv1' in opts:
400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
401 if b'generaldelta' in opts:
401 if b'generaldelta' in opts:
402 newversionflags |= FLAG_GENERALDELTA
402 newversionflags |= FLAG_GENERALDELTA
403 elif b'revlogv0' in self.opener.options:
403 elif b'revlogv0' in self.opener.options:
404 newversionflags = REVLOGV0
404 newversionflags = REVLOGV0
405 else:
405 else:
406 newversionflags = REVLOG_DEFAULT_VERSION
406 newversionflags = REVLOG_DEFAULT_VERSION
407
407
408 if b'chunkcachesize' in opts:
408 if b'chunkcachesize' in opts:
409 self._chunkcachesize = opts[b'chunkcachesize']
409 self._chunkcachesize = opts[b'chunkcachesize']
410 if b'maxchainlen' in opts:
410 if b'maxchainlen' in opts:
411 self._maxchainlen = opts[b'maxchainlen']
411 self._maxchainlen = opts[b'maxchainlen']
412 if b'deltabothparents' in opts:
412 if b'deltabothparents' in opts:
413 self._deltabothparents = opts[b'deltabothparents']
413 self._deltabothparents = opts[b'deltabothparents']
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydeltabase = False
415 self._lazydeltabase = False
416 if self._lazydelta:
416 if self._lazydelta:
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 if b'compengine' in opts:
418 if b'compengine' in opts:
419 self._compengine = opts[b'compengine']
419 self._compengine = opts[b'compengine']
420 if b'zlib.level' in opts:
420 if b'zlib.level' in opts:
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 if b'zstd.level' in opts:
422 if b'zstd.level' in opts:
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 if b'maxdeltachainspan' in opts:
424 if b'maxdeltachainspan' in opts:
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 mmapindexthreshold = opts[b'mmapindexthreshold']
427 mmapindexthreshold = opts[b'mmapindexthreshold']
428 self.hassidedata = bool(opts.get(b'side-data', False))
428 self.hassidedata = bool(opts.get(b'side-data', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
432 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
433 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439
439
440 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
441 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
442 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
443 ):
443 ):
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445
445
446 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
447 raise error.RevlogError(
447 raise error.RevlogError(
448 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
449 % self._chunkcachesize
449 % self._chunkcachesize
450 )
450 )
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 raise error.RevlogError(
452 raise error.RevlogError(
453 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
454 % self._chunkcachesize
454 % self._chunkcachesize
455 )
455 )
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 return newversionflags, mmapindexthreshold, force_nodemap
457 return newversionflags, mmapindexthreshold, force_nodemap
458
458
459 def _loadindex(self):
459 def _loadindex(self):
460
460
461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
462 indexdata = b''
462 indexdata = b''
463 self._initempty = True
463 self._initempty = True
464 try:
464 try:
465 with self._indexfp() as f:
465 with self._indexfp() as f:
466 if (
466 if (
467 mmapindexthreshold is not None
467 mmapindexthreshold is not None
468 and self.opener.fstat(f).st_size >= mmapindexthreshold
468 and self.opener.fstat(f).st_size >= mmapindexthreshold
469 ):
469 ):
470 # TODO: should .close() to release resources without
470 # TODO: should .close() to release resources without
471 # relying on Python GC
471 # relying on Python GC
472 indexdata = util.buffer(util.mmapread(f))
472 indexdata = util.buffer(util.mmapread(f))
473 else:
473 else:
474 indexdata = f.read()
474 indexdata = f.read()
475 if len(indexdata) > 0:
475 if len(indexdata) > 0:
476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
477 self._initempty = False
477 self._initempty = False
478 else:
478 else:
479 versionflags = newversionflags
479 versionflags = newversionflags
480 except IOError as inst:
480 except IOError as inst:
481 if inst.errno != errno.ENOENT:
481 if inst.errno != errno.ENOENT:
482 raise
482 raise
483
483
484 versionflags = newversionflags
484 versionflags = newversionflags
485
485
486 flags = self._format_flags = versionflags & ~0xFFFF
486 flags = self._format_flags = versionflags & ~0xFFFF
487 fmt = self._format_version = versionflags & 0xFFFF
487 fmt = self._format_version = versionflags & 0xFFFF
488
488
489 if fmt == REVLOGV0:
489 if fmt == REVLOGV0:
490 if flags:
490 if flags:
491 raise error.RevlogError(
491 raise error.RevlogError(
492 _(b'unknown flags (%#04x) in version %d revlog %s')
492 _(b'unknown flags (%#04x) in version %d revlog %s')
493 % (flags >> 16, fmt, self.display_id)
493 % (flags >> 16, fmt, self.display_id)
494 )
494 )
495
495
496 self._inline = False
496 self._inline = False
497 self._generaldelta = False
497 self._generaldelta = False
498
498
499 elif fmt == REVLOGV1:
499 elif fmt == REVLOGV1:
500 if flags & ~REVLOGV1_FLAGS:
500 if flags & ~REVLOGV1_FLAGS:
501 raise error.RevlogError(
501 raise error.RevlogError(
502 _(b'unknown flags (%#04x) in version %d revlog %s')
502 _(b'unknown flags (%#04x) in version %d revlog %s')
503 % (flags >> 16, fmt, self.display_id)
503 % (flags >> 16, fmt, self.display_id)
504 )
504 )
505
505
506 self._inline = versionflags & FLAG_INLINE_DATA
506 self._inline = versionflags & FLAG_INLINE_DATA
507 self._generaldelta = versionflags & FLAG_GENERALDELTA
507 self._generaldelta = versionflags & FLAG_GENERALDELTA
508
508
509 elif fmt == REVLOGV2:
509 elif fmt == REVLOGV2:
510 if flags & ~REVLOGV2_FLAGS:
510 if flags & ~REVLOGV2_FLAGS:
511 raise error.RevlogError(
511 raise error.RevlogError(
512 _(b'unknown flags (%#04x) in version %d revlog %s')
512 _(b'unknown flags (%#04x) in version %d revlog %s')
513 % (flags >> 16, fmt, self.display_id)
513 % (flags >> 16, fmt, self.display_id)
514 )
514 )
515
515
516 # There is a bug in the transaction handling when going from an
516 # There is a bug in the transaction handling when going from an
517 # inline revlog to a separate index and data file. Turn it off until
517 # inline revlog to a separate index and data file. Turn it off until
518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
519 # See issue6485
519 # See issue6485
520 self._inline = False
520 self._inline = False
521 # generaldelta implied by version 2 revlogs.
521 # generaldelta implied by version 2 revlogs.
522 self._generaldelta = True
522 self._generaldelta = True
523
523
524 else:
524 else:
525 raise error.RevlogError(
525 raise error.RevlogError(
526 _(b'unknown version (%d) in revlog %s') % (fmt, self.display_id)
526 _(b'unknown version (%d) in revlog %s') % (fmt, self.display_id)
527 )
527 )
528
528
529 self.nodeconstants = sha1nodeconstants
529 self.nodeconstants = sha1nodeconstants
530 self.nullid = self.nodeconstants.nullid
530 self.nullid = self.nodeconstants.nullid
531
531
532 # sparse-revlog can't be on without general-delta (issue6056)
532 # sparse-revlog can't be on without general-delta (issue6056)
533 if not self._generaldelta:
533 if not self._generaldelta:
534 self._sparserevlog = False
534 self._sparserevlog = False
535
535
536 self._storedeltachains = True
536 self._storedeltachains = True
537
537
538 devel_nodemap = (
538 devel_nodemap = (
539 self.nodemap_file
539 self._nodemap_file
540 and force_nodemap
540 and force_nodemap
541 and parse_index_v1_nodemap is not None
541 and parse_index_v1_nodemap is not None
542 )
542 )
543
543
544 use_rust_index = False
544 use_rust_index = False
545 if rustrevlog is not None:
545 if rustrevlog is not None:
546 if self.nodemap_file is not None:
546 if self._nodemap_file is not None:
547 use_rust_index = True
547 use_rust_index = True
548 else:
548 else:
549 use_rust_index = self.opener.options.get(b'rust.index')
549 use_rust_index = self.opener.options.get(b'rust.index')
550
550
551 self._parse_index = parse_index_v1
551 self._parse_index = parse_index_v1
552 if self._format_version == REVLOGV0:
552 if self._format_version == REVLOGV0:
553 self._parse_index = revlogv0.parse_index_v0
553 self._parse_index = revlogv0.parse_index_v0
554 elif fmt == REVLOGV2:
554 elif fmt == REVLOGV2:
555 self._parse_index = parse_index_v2
555 self._parse_index = parse_index_v2
556 elif devel_nodemap:
556 elif devel_nodemap:
557 self._parse_index = parse_index_v1_nodemap
557 self._parse_index = parse_index_v1_nodemap
558 elif use_rust_index:
558 elif use_rust_index:
559 self._parse_index = parse_index_v1_mixed
559 self._parse_index = parse_index_v1_mixed
560 try:
560 try:
561 d = self._parse_index(indexdata, self._inline)
561 d = self._parse_index(indexdata, self._inline)
562 index, _chunkcache = d
562 index, _chunkcache = d
563 use_nodemap = (
563 use_nodemap = (
564 not self._inline
564 not self._inline
565 and self.nodemap_file is not None
565 and self._nodemap_file is not None
566 and util.safehasattr(index, 'update_nodemap_data')
566 and util.safehasattr(index, 'update_nodemap_data')
567 )
567 )
568 if use_nodemap:
568 if use_nodemap:
569 nodemap_data = nodemaputil.persisted_data(self)
569 nodemap_data = nodemaputil.persisted_data(self)
570 if nodemap_data is not None:
570 if nodemap_data is not None:
571 docket = nodemap_data[0]
571 docket = nodemap_data[0]
572 if (
572 if (
573 len(d[0]) > docket.tip_rev
573 len(d[0]) > docket.tip_rev
574 and d[0][docket.tip_rev][7] == docket.tip_node
574 and d[0][docket.tip_rev][7] == docket.tip_node
575 ):
575 ):
576 # no changelog tampering
576 # no changelog tampering
577 self._nodemap_docket = docket
577 self._nodemap_docket = docket
578 index.update_nodemap_data(*nodemap_data)
578 index.update_nodemap_data(*nodemap_data)
579 except (ValueError, IndexError):
579 except (ValueError, IndexError):
580 raise error.RevlogError(
580 raise error.RevlogError(
581 _(b"index %s is corrupted") % self.display_id
581 _(b"index %s is corrupted") % self.display_id
582 )
582 )
583 self.index, self._chunkcache = d
583 self.index, self._chunkcache = d
584 if not self._chunkcache:
584 if not self._chunkcache:
585 self._chunkclear()
585 self._chunkclear()
586 # revnum -> (chain-length, sum-delta-length)
586 # revnum -> (chain-length, sum-delta-length)
587 self._chaininfocache = util.lrucachedict(500)
587 self._chaininfocache = util.lrucachedict(500)
588 # revlog header -> revlog compressor
588 # revlog header -> revlog compressor
589 self._decompressors = {}
589 self._decompressors = {}
590
590
591 @util.propertycache
591 @util.propertycache
592 def revlog_kind(self):
592 def revlog_kind(self):
593 return self.target[0]
593 return self.target[0]
594
594
595 @util.propertycache
595 @util.propertycache
596 def display_id(self):
596 def display_id(self):
597 """The public facing "ID" of the revlog that we use in message"""
597 """The public facing "ID" of the revlog that we use in message"""
598 # Maybe we should build a user facing representation of
598 # Maybe we should build a user facing representation of
599 # revlog.target instead of using `self.radix`
599 # revlog.target instead of using `self.radix`
600 return self.radix
600 return self.radix
601
601
602 @util.propertycache
602 @util.propertycache
603 def _compressor(self):
603 def _compressor(self):
604 engine = util.compengines[self._compengine]
604 engine = util.compengines[self._compengine]
605 return engine.revlogcompressor(self._compengineopts)
605 return engine.revlogcompressor(self._compengineopts)
606
606
607 def _indexfp(self, mode=b'r'):
607 def _indexfp(self, mode=b'r'):
608 """file object for the revlog's index file"""
608 """file object for the revlog's index file"""
609 args = {'mode': mode}
609 args = {'mode': mode}
610 if mode != b'r':
610 if mode != b'r':
611 args['checkambig'] = self._checkambig
611 args['checkambig'] = self._checkambig
612 if mode == b'w':
612 if mode == b'w':
613 args['atomictemp'] = True
613 args['atomictemp'] = True
614 return self.opener(self._indexfile, **args)
614 return self.opener(self._indexfile, **args)
615
615
616 def _datafp(self, mode=b'r'):
616 def _datafp(self, mode=b'r'):
617 """file object for the revlog's data file"""
617 """file object for the revlog's data file"""
618 return self.opener(self._datafile, mode=mode)
618 return self.opener(self._datafile, mode=mode)
619
619
620 @contextlib.contextmanager
620 @contextlib.contextmanager
621 def _datareadfp(self, existingfp=None):
621 def _datareadfp(self, existingfp=None):
622 """file object suitable to read data"""
622 """file object suitable to read data"""
623 # Use explicit file handle, if given.
623 # Use explicit file handle, if given.
624 if existingfp is not None:
624 if existingfp is not None:
625 yield existingfp
625 yield existingfp
626
626
627 # Use a file handle being actively used for writes, if available.
627 # Use a file handle being actively used for writes, if available.
628 # There is some danger to doing this because reads will seek the
628 # There is some danger to doing this because reads will seek the
629 # file. However, _writeentry() performs a SEEK_END before all writes,
629 # file. However, _writeentry() performs a SEEK_END before all writes,
630 # so we should be safe.
630 # so we should be safe.
631 elif self._writinghandles:
631 elif self._writinghandles:
632 if self._inline:
632 if self._inline:
633 yield self._writinghandles[0]
633 yield self._writinghandles[0]
634 else:
634 else:
635 yield self._writinghandles[1]
635 yield self._writinghandles[1]
636
636
637 # Otherwise open a new file handle.
637 # Otherwise open a new file handle.
638 else:
638 else:
639 if self._inline:
639 if self._inline:
640 func = self._indexfp
640 func = self._indexfp
641 else:
641 else:
642 func = self._datafp
642 func = self._datafp
643 with func() as fp:
643 with func() as fp:
644 yield fp
644 yield fp
645
645
646 def tiprev(self):
646 def tiprev(self):
647 return len(self.index) - 1
647 return len(self.index) - 1
648
648
649 def tip(self):
649 def tip(self):
650 return self.node(self.tiprev())
650 return self.node(self.tiprev())
651
651
652 def __contains__(self, rev):
652 def __contains__(self, rev):
653 return 0 <= rev < len(self)
653 return 0 <= rev < len(self)
654
654
655 def __len__(self):
655 def __len__(self):
656 return len(self.index)
656 return len(self.index)
657
657
658 def __iter__(self):
658 def __iter__(self):
659 return iter(pycompat.xrange(len(self)))
659 return iter(pycompat.xrange(len(self)))
660
660
661 def revs(self, start=0, stop=None):
661 def revs(self, start=0, stop=None):
662 """iterate over all rev in this revlog (from start to stop)"""
662 """iterate over all rev in this revlog (from start to stop)"""
663 return storageutil.iterrevs(len(self), start=start, stop=stop)
663 return storageutil.iterrevs(len(self), start=start, stop=stop)
664
664
665 @property
665 @property
666 def nodemap(self):
666 def nodemap(self):
667 msg = (
667 msg = (
668 b"revlog.nodemap is deprecated, "
668 b"revlog.nodemap is deprecated, "
669 b"use revlog.index.[has_node|rev|get_rev]"
669 b"use revlog.index.[has_node|rev|get_rev]"
670 )
670 )
671 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
671 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
672 return self.index.nodemap
672 return self.index.nodemap
673
673
674 @property
674 @property
675 def _nodecache(self):
675 def _nodecache(self):
676 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
676 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
677 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
678 return self.index.nodemap
678 return self.index.nodemap
679
679
680 def hasnode(self, node):
680 def hasnode(self, node):
681 try:
681 try:
682 self.rev(node)
682 self.rev(node)
683 return True
683 return True
684 except KeyError:
684 except KeyError:
685 return False
685 return False
686
686
687 def candelta(self, baserev, rev):
687 def candelta(self, baserev, rev):
688 """whether two revisions (baserev, rev) can be delta-ed or not"""
688 """whether two revisions (baserev, rev) can be delta-ed or not"""
689 # Disable delta if either rev requires a content-changing flag
689 # Disable delta if either rev requires a content-changing flag
690 # processor (ex. LFS). This is because such flag processor can alter
690 # processor (ex. LFS). This is because such flag processor can alter
691 # the rawtext content that the delta will be based on, and two clients
691 # the rawtext content that the delta will be based on, and two clients
692 # could have a same revlog node with different flags (i.e. different
692 # could have a same revlog node with different flags (i.e. different
693 # rawtext contents) and the delta could be incompatible.
693 # rawtext contents) and the delta could be incompatible.
694 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
694 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
695 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
695 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
696 ):
696 ):
697 return False
697 return False
698 return True
698 return True
699
699
700 def update_caches(self, transaction):
700 def update_caches(self, transaction):
701 if self.nodemap_file is not None:
701 if self._nodemap_file is not None:
702 if transaction is None:
702 if transaction is None:
703 nodemaputil.update_persistent_nodemap(self)
703 nodemaputil.update_persistent_nodemap(self)
704 else:
704 else:
705 nodemaputil.setup_persistent_nodemap(transaction, self)
705 nodemaputil.setup_persistent_nodemap(transaction, self)
706
706
707 def clearcaches(self):
707 def clearcaches(self):
708 self._revisioncache = None
708 self._revisioncache = None
709 self._chainbasecache.clear()
709 self._chainbasecache.clear()
710 self._chunkcache = (0, b'')
710 self._chunkcache = (0, b'')
711 self._pcache = {}
711 self._pcache = {}
712 self._nodemap_docket = None
712 self._nodemap_docket = None
713 self.index.clearcaches()
713 self.index.clearcaches()
714 # The python code is the one responsible for validating the docket, we
714 # The python code is the one responsible for validating the docket, we
715 # end up having to refresh it here.
715 # end up having to refresh it here.
716 use_nodemap = (
716 use_nodemap = (
717 not self._inline
717 not self._inline
718 and self.nodemap_file is not None
718 and self._nodemap_file is not None
719 and util.safehasattr(self.index, 'update_nodemap_data')
719 and util.safehasattr(self.index, 'update_nodemap_data')
720 )
720 )
721 if use_nodemap:
721 if use_nodemap:
722 nodemap_data = nodemaputil.persisted_data(self)
722 nodemap_data = nodemaputil.persisted_data(self)
723 if nodemap_data is not None:
723 if nodemap_data is not None:
724 self._nodemap_docket = nodemap_data[0]
724 self._nodemap_docket = nodemap_data[0]
725 self.index.update_nodemap_data(*nodemap_data)
725 self.index.update_nodemap_data(*nodemap_data)
726
726
727 def rev(self, node):
727 def rev(self, node):
728 try:
728 try:
729 return self.index.rev(node)
729 return self.index.rev(node)
730 except TypeError:
730 except TypeError:
731 raise
731 raise
732 except error.RevlogError:
732 except error.RevlogError:
733 # parsers.c radix tree lookup failed
733 # parsers.c radix tree lookup failed
734 if (
734 if (
735 node == self.nodeconstants.wdirid
735 node == self.nodeconstants.wdirid
736 or node in self.nodeconstants.wdirfilenodeids
736 or node in self.nodeconstants.wdirfilenodeids
737 ):
737 ):
738 raise error.WdirUnsupported
738 raise error.WdirUnsupported
739 raise error.LookupError(node, self.display_id, _(b'no node'))
739 raise error.LookupError(node, self.display_id, _(b'no node'))
740
740
741 # Accessors for index entries.
741 # Accessors for index entries.
742
742
743 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
743 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
744 # are flags.
744 # are flags.
745 def start(self, rev):
745 def start(self, rev):
746 return int(self.index[rev][0] >> 16)
746 return int(self.index[rev][0] >> 16)
747
747
748 def flags(self, rev):
748 def flags(self, rev):
749 return self.index[rev][0] & 0xFFFF
749 return self.index[rev][0] & 0xFFFF
750
750
751 def length(self, rev):
751 def length(self, rev):
752 return self.index[rev][1]
752 return self.index[rev][1]
753
753
754 def sidedata_length(self, rev):
754 def sidedata_length(self, rev):
755 if not self.hassidedata:
755 if not self.hassidedata:
756 return 0
756 return 0
757 return self.index[rev][9]
757 return self.index[rev][9]
758
758
759 def rawsize(self, rev):
759 def rawsize(self, rev):
760 """return the length of the uncompressed text for a given revision"""
760 """return the length of the uncompressed text for a given revision"""
761 l = self.index[rev][2]
761 l = self.index[rev][2]
762 if l >= 0:
762 if l >= 0:
763 return l
763 return l
764
764
765 t = self.rawdata(rev)
765 t = self.rawdata(rev)
766 return len(t)
766 return len(t)
767
767
768 def size(self, rev):
768 def size(self, rev):
769 """length of non-raw text (processed by a "read" flag processor)"""
769 """length of non-raw text (processed by a "read" flag processor)"""
770 # fast path: if no "read" flag processor could change the content,
770 # fast path: if no "read" flag processor could change the content,
771 # size is rawsize. note: ELLIPSIS is known to not change the content.
771 # size is rawsize. note: ELLIPSIS is known to not change the content.
772 flags = self.flags(rev)
772 flags = self.flags(rev)
773 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
773 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
774 return self.rawsize(rev)
774 return self.rawsize(rev)
775
775
776 return len(self.revision(rev, raw=False))
776 return len(self.revision(rev, raw=False))
777
777
778 def chainbase(self, rev):
778 def chainbase(self, rev):
779 base = self._chainbasecache.get(rev)
779 base = self._chainbasecache.get(rev)
780 if base is not None:
780 if base is not None:
781 return base
781 return base
782
782
783 index = self.index
783 index = self.index
784 iterrev = rev
784 iterrev = rev
785 base = index[iterrev][3]
785 base = index[iterrev][3]
786 while base != iterrev:
786 while base != iterrev:
787 iterrev = base
787 iterrev = base
788 base = index[iterrev][3]
788 base = index[iterrev][3]
789
789
790 self._chainbasecache[rev] = base
790 self._chainbasecache[rev] = base
791 return base
791 return base
792
792
793 def linkrev(self, rev):
793 def linkrev(self, rev):
794 return self.index[rev][4]
794 return self.index[rev][4]
795
795
796 def parentrevs(self, rev):
796 def parentrevs(self, rev):
797 try:
797 try:
798 entry = self.index[rev]
798 entry = self.index[rev]
799 except IndexError:
799 except IndexError:
800 if rev == wdirrev:
800 if rev == wdirrev:
801 raise error.WdirUnsupported
801 raise error.WdirUnsupported
802 raise
802 raise
803 if entry[5] == nullrev:
803 if entry[5] == nullrev:
804 return entry[6], entry[5]
804 return entry[6], entry[5]
805 else:
805 else:
806 return entry[5], entry[6]
806 return entry[5], entry[6]
807
807
808 # fast parentrevs(rev) where rev isn't filtered
808 # fast parentrevs(rev) where rev isn't filtered
809 _uncheckedparentrevs = parentrevs
809 _uncheckedparentrevs = parentrevs
810
810
811 def node(self, rev):
811 def node(self, rev):
812 try:
812 try:
813 return self.index[rev][7]
813 return self.index[rev][7]
814 except IndexError:
814 except IndexError:
815 if rev == wdirrev:
815 if rev == wdirrev:
816 raise error.WdirUnsupported
816 raise error.WdirUnsupported
817 raise
817 raise
818
818
819 # Derived from index values.
819 # Derived from index values.
820
820
821 def end(self, rev):
821 def end(self, rev):
822 return self.start(rev) + self.length(rev)
822 return self.start(rev) + self.length(rev)
823
823
824 def parents(self, node):
824 def parents(self, node):
825 i = self.index
825 i = self.index
826 d = i[self.rev(node)]
826 d = i[self.rev(node)]
827 # inline node() to avoid function call overhead
827 # inline node() to avoid function call overhead
828 if d[5] == self.nullid:
828 if d[5] == self.nullid:
829 return i[d[6]][7], i[d[5]][7]
829 return i[d[6]][7], i[d[5]][7]
830 else:
830 else:
831 return i[d[5]][7], i[d[6]][7]
831 return i[d[5]][7], i[d[6]][7]
832
832
833 def chainlen(self, rev):
833 def chainlen(self, rev):
834 return self._chaininfo(rev)[0]
834 return self._chaininfo(rev)[0]
835
835
836 def _chaininfo(self, rev):
836 def _chaininfo(self, rev):
837 chaininfocache = self._chaininfocache
837 chaininfocache = self._chaininfocache
838 if rev in chaininfocache:
838 if rev in chaininfocache:
839 return chaininfocache[rev]
839 return chaininfocache[rev]
840 index = self.index
840 index = self.index
841 generaldelta = self._generaldelta
841 generaldelta = self._generaldelta
842 iterrev = rev
842 iterrev = rev
843 e = index[iterrev]
843 e = index[iterrev]
844 clen = 0
844 clen = 0
845 compresseddeltalen = 0
845 compresseddeltalen = 0
846 while iterrev != e[3]:
846 while iterrev != e[3]:
847 clen += 1
847 clen += 1
848 compresseddeltalen += e[1]
848 compresseddeltalen += e[1]
849 if generaldelta:
849 if generaldelta:
850 iterrev = e[3]
850 iterrev = e[3]
851 else:
851 else:
852 iterrev -= 1
852 iterrev -= 1
853 if iterrev in chaininfocache:
853 if iterrev in chaininfocache:
854 t = chaininfocache[iterrev]
854 t = chaininfocache[iterrev]
855 clen += t[0]
855 clen += t[0]
856 compresseddeltalen += t[1]
856 compresseddeltalen += t[1]
857 break
857 break
858 e = index[iterrev]
858 e = index[iterrev]
859 else:
859 else:
860 # Add text length of base since decompressing that also takes
860 # Add text length of base since decompressing that also takes
861 # work. For cache hits the length is already included.
861 # work. For cache hits the length is already included.
862 compresseddeltalen += e[1]
862 compresseddeltalen += e[1]
863 r = (clen, compresseddeltalen)
863 r = (clen, compresseddeltalen)
864 chaininfocache[rev] = r
864 chaininfocache[rev] = r
865 return r
865 return r
866
866
867 def _deltachain(self, rev, stoprev=None):
867 def _deltachain(self, rev, stoprev=None):
868 """Obtain the delta chain for a revision.
868 """Obtain the delta chain for a revision.
869
869
870 ``stoprev`` specifies a revision to stop at. If not specified, we
870 ``stoprev`` specifies a revision to stop at. If not specified, we
871 stop at the base of the chain.
871 stop at the base of the chain.
872
872
873 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
873 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
874 revs in ascending order and ``stopped`` is a bool indicating whether
874 revs in ascending order and ``stopped`` is a bool indicating whether
875 ``stoprev`` was hit.
875 ``stoprev`` was hit.
876 """
876 """
877 # Try C implementation.
877 # Try C implementation.
878 try:
878 try:
879 return self.index.deltachain(rev, stoprev, self._generaldelta)
879 return self.index.deltachain(rev, stoprev, self._generaldelta)
880 except AttributeError:
880 except AttributeError:
881 pass
881 pass
882
882
883 chain = []
883 chain = []
884
884
885 # Alias to prevent attribute lookup in tight loop.
885 # Alias to prevent attribute lookup in tight loop.
886 index = self.index
886 index = self.index
887 generaldelta = self._generaldelta
887 generaldelta = self._generaldelta
888
888
889 iterrev = rev
889 iterrev = rev
890 e = index[iterrev]
890 e = index[iterrev]
891 while iterrev != e[3] and iterrev != stoprev:
891 while iterrev != e[3] and iterrev != stoprev:
892 chain.append(iterrev)
892 chain.append(iterrev)
893 if generaldelta:
893 if generaldelta:
894 iterrev = e[3]
894 iterrev = e[3]
895 else:
895 else:
896 iterrev -= 1
896 iterrev -= 1
897 e = index[iterrev]
897 e = index[iterrev]
898
898
899 if iterrev == stoprev:
899 if iterrev == stoprev:
900 stopped = True
900 stopped = True
901 else:
901 else:
902 chain.append(iterrev)
902 chain.append(iterrev)
903 stopped = False
903 stopped = False
904
904
905 chain.reverse()
905 chain.reverse()
906 return chain, stopped
906 return chain, stopped
907
907
908 def ancestors(self, revs, stoprev=0, inclusive=False):
908 def ancestors(self, revs, stoprev=0, inclusive=False):
909 """Generate the ancestors of 'revs' in reverse revision order.
909 """Generate the ancestors of 'revs' in reverse revision order.
910 Does not generate revs lower than stoprev.
910 Does not generate revs lower than stoprev.
911
911
912 See the documentation for ancestor.lazyancestors for more details."""
912 See the documentation for ancestor.lazyancestors for more details."""
913
913
914 # first, make sure start revisions aren't filtered
914 # first, make sure start revisions aren't filtered
915 revs = list(revs)
915 revs = list(revs)
916 checkrev = self.node
916 checkrev = self.node
917 for r in revs:
917 for r in revs:
918 checkrev(r)
918 checkrev(r)
919 # and we're sure ancestors aren't filtered as well
919 # and we're sure ancestors aren't filtered as well
920
920
921 if rustancestor is not None:
921 if rustancestor is not None:
922 lazyancestors = rustancestor.LazyAncestors
922 lazyancestors = rustancestor.LazyAncestors
923 arg = self.index
923 arg = self.index
924 else:
924 else:
925 lazyancestors = ancestor.lazyancestors
925 lazyancestors = ancestor.lazyancestors
926 arg = self._uncheckedparentrevs
926 arg = self._uncheckedparentrevs
927 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
927 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
928
928
929 def descendants(self, revs):
929 def descendants(self, revs):
930 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
930 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
931
931
932 def findcommonmissing(self, common=None, heads=None):
932 def findcommonmissing(self, common=None, heads=None):
933 """Return a tuple of the ancestors of common and the ancestors of heads
933 """Return a tuple of the ancestors of common and the ancestors of heads
934 that are not ancestors of common. In revset terminology, we return the
934 that are not ancestors of common. In revset terminology, we return the
935 tuple:
935 tuple:
936
936
937 ::common, (::heads) - (::common)
937 ::common, (::heads) - (::common)
938
938
939 The list is sorted by revision number, meaning it is
939 The list is sorted by revision number, meaning it is
940 topologically sorted.
940 topologically sorted.
941
941
942 'heads' and 'common' are both lists of node IDs. If heads is
942 'heads' and 'common' are both lists of node IDs. If heads is
943 not supplied, uses all of the revlog's heads. If common is not
943 not supplied, uses all of the revlog's heads. If common is not
944 supplied, uses nullid."""
944 supplied, uses nullid."""
945 if common is None:
945 if common is None:
946 common = [self.nullid]
946 common = [self.nullid]
947 if heads is None:
947 if heads is None:
948 heads = self.heads()
948 heads = self.heads()
949
949
950 common = [self.rev(n) for n in common]
950 common = [self.rev(n) for n in common]
951 heads = [self.rev(n) for n in heads]
951 heads = [self.rev(n) for n in heads]
952
952
953 # we want the ancestors, but inclusive
953 # we want the ancestors, but inclusive
954 class lazyset(object):
954 class lazyset(object):
955 def __init__(self, lazyvalues):
955 def __init__(self, lazyvalues):
956 self.addedvalues = set()
956 self.addedvalues = set()
957 self.lazyvalues = lazyvalues
957 self.lazyvalues = lazyvalues
958
958
959 def __contains__(self, value):
959 def __contains__(self, value):
960 return value in self.addedvalues or value in self.lazyvalues
960 return value in self.addedvalues or value in self.lazyvalues
961
961
962 def __iter__(self):
962 def __iter__(self):
963 added = self.addedvalues
963 added = self.addedvalues
964 for r in added:
964 for r in added:
965 yield r
965 yield r
966 for r in self.lazyvalues:
966 for r in self.lazyvalues:
967 if not r in added:
967 if not r in added:
968 yield r
968 yield r
969
969
970 def add(self, value):
970 def add(self, value):
971 self.addedvalues.add(value)
971 self.addedvalues.add(value)
972
972
973 def update(self, values):
973 def update(self, values):
974 self.addedvalues.update(values)
974 self.addedvalues.update(values)
975
975
976 has = lazyset(self.ancestors(common))
976 has = lazyset(self.ancestors(common))
977 has.add(nullrev)
977 has.add(nullrev)
978 has.update(common)
978 has.update(common)
979
979
980 # take all ancestors from heads that aren't in has
980 # take all ancestors from heads that aren't in has
981 missing = set()
981 missing = set()
982 visit = collections.deque(r for r in heads if r not in has)
982 visit = collections.deque(r for r in heads if r not in has)
983 while visit:
983 while visit:
984 r = visit.popleft()
984 r = visit.popleft()
985 if r in missing:
985 if r in missing:
986 continue
986 continue
987 else:
987 else:
988 missing.add(r)
988 missing.add(r)
989 for p in self.parentrevs(r):
989 for p in self.parentrevs(r):
990 if p not in has:
990 if p not in has:
991 visit.append(p)
991 visit.append(p)
992 missing = list(missing)
992 missing = list(missing)
993 missing.sort()
993 missing.sort()
994 return has, [self.node(miss) for miss in missing]
994 return has, [self.node(miss) for miss in missing]
995
995
996 def incrementalmissingrevs(self, common=None):
996 def incrementalmissingrevs(self, common=None):
997 """Return an object that can be used to incrementally compute the
997 """Return an object that can be used to incrementally compute the
998 revision numbers of the ancestors of arbitrary sets that are not
998 revision numbers of the ancestors of arbitrary sets that are not
999 ancestors of common. This is an ancestor.incrementalmissingancestors
999 ancestors of common. This is an ancestor.incrementalmissingancestors
1000 object.
1000 object.
1001
1001
1002 'common' is a list of revision numbers. If common is not supplied, uses
1002 'common' is a list of revision numbers. If common is not supplied, uses
1003 nullrev.
1003 nullrev.
1004 """
1004 """
1005 if common is None:
1005 if common is None:
1006 common = [nullrev]
1006 common = [nullrev]
1007
1007
1008 if rustancestor is not None:
1008 if rustancestor is not None:
1009 return rustancestor.MissingAncestors(self.index, common)
1009 return rustancestor.MissingAncestors(self.index, common)
1010 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1010 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1011
1011
1012 def findmissingrevs(self, common=None, heads=None):
1012 def findmissingrevs(self, common=None, heads=None):
1013 """Return the revision numbers of the ancestors of heads that
1013 """Return the revision numbers of the ancestors of heads that
1014 are not ancestors of common.
1014 are not ancestors of common.
1015
1015
1016 More specifically, return a list of revision numbers corresponding to
1016 More specifically, return a list of revision numbers corresponding to
1017 nodes N such that every N satisfies the following constraints:
1017 nodes N such that every N satisfies the following constraints:
1018
1018
1019 1. N is an ancestor of some node in 'heads'
1019 1. N is an ancestor of some node in 'heads'
1020 2. N is not an ancestor of any node in 'common'
1020 2. N is not an ancestor of any node in 'common'
1021
1021
1022 The list is sorted by revision number, meaning it is
1022 The list is sorted by revision number, meaning it is
1023 topologically sorted.
1023 topologically sorted.
1024
1024
1025 'heads' and 'common' are both lists of revision numbers. If heads is
1025 'heads' and 'common' are both lists of revision numbers. If heads is
1026 not supplied, uses all of the revlog's heads. If common is not
1026 not supplied, uses all of the revlog's heads. If common is not
1027 supplied, uses nullid."""
1027 supplied, uses nullid."""
1028 if common is None:
1028 if common is None:
1029 common = [nullrev]
1029 common = [nullrev]
1030 if heads is None:
1030 if heads is None:
1031 heads = self.headrevs()
1031 heads = self.headrevs()
1032
1032
1033 inc = self.incrementalmissingrevs(common=common)
1033 inc = self.incrementalmissingrevs(common=common)
1034 return inc.missingancestors(heads)
1034 return inc.missingancestors(heads)
1035
1035
1036 def findmissing(self, common=None, heads=None):
1036 def findmissing(self, common=None, heads=None):
1037 """Return the ancestors of heads that are not ancestors of common.
1037 """Return the ancestors of heads that are not ancestors of common.
1038
1038
1039 More specifically, return a list of nodes N such that every N
1039 More specifically, return a list of nodes N such that every N
1040 satisfies the following constraints:
1040 satisfies the following constraints:
1041
1041
1042 1. N is an ancestor of some node in 'heads'
1042 1. N is an ancestor of some node in 'heads'
1043 2. N is not an ancestor of any node in 'common'
1043 2. N is not an ancestor of any node in 'common'
1044
1044
1045 The list is sorted by revision number, meaning it is
1045 The list is sorted by revision number, meaning it is
1046 topologically sorted.
1046 topologically sorted.
1047
1047
1048 'heads' and 'common' are both lists of node IDs. If heads is
1048 'heads' and 'common' are both lists of node IDs. If heads is
1049 not supplied, uses all of the revlog's heads. If common is not
1049 not supplied, uses all of the revlog's heads. If common is not
1050 supplied, uses nullid."""
1050 supplied, uses nullid."""
1051 if common is None:
1051 if common is None:
1052 common = [self.nullid]
1052 common = [self.nullid]
1053 if heads is None:
1053 if heads is None:
1054 heads = self.heads()
1054 heads = self.heads()
1055
1055
1056 common = [self.rev(n) for n in common]
1056 common = [self.rev(n) for n in common]
1057 heads = [self.rev(n) for n in heads]
1057 heads = [self.rev(n) for n in heads]
1058
1058
1059 inc = self.incrementalmissingrevs(common=common)
1059 inc = self.incrementalmissingrevs(common=common)
1060 return [self.node(r) for r in inc.missingancestors(heads)]
1060 return [self.node(r) for r in inc.missingancestors(heads)]
1061
1061
1062 def nodesbetween(self, roots=None, heads=None):
1062 def nodesbetween(self, roots=None, heads=None):
1063 """Return a topological path from 'roots' to 'heads'.
1063 """Return a topological path from 'roots' to 'heads'.
1064
1064
1065 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1065 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1066 topologically sorted list of all nodes N that satisfy both of
1066 topologically sorted list of all nodes N that satisfy both of
1067 these constraints:
1067 these constraints:
1068
1068
1069 1. N is a descendant of some node in 'roots'
1069 1. N is a descendant of some node in 'roots'
1070 2. N is an ancestor of some node in 'heads'
1070 2. N is an ancestor of some node in 'heads'
1071
1071
1072 Every node is considered to be both a descendant and an ancestor
1072 Every node is considered to be both a descendant and an ancestor
1073 of itself, so every reachable node in 'roots' and 'heads' will be
1073 of itself, so every reachable node in 'roots' and 'heads' will be
1074 included in 'nodes'.
1074 included in 'nodes'.
1075
1075
1076 'outroots' is the list of reachable nodes in 'roots', i.e., the
1076 'outroots' is the list of reachable nodes in 'roots', i.e., the
1077 subset of 'roots' that is returned in 'nodes'. Likewise,
1077 subset of 'roots' that is returned in 'nodes'. Likewise,
1078 'outheads' is the subset of 'heads' that is also in 'nodes'.
1078 'outheads' is the subset of 'heads' that is also in 'nodes'.
1079
1079
1080 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1080 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1081 unspecified, uses nullid as the only root. If 'heads' is
1081 unspecified, uses nullid as the only root. If 'heads' is
1082 unspecified, uses list of all of the revlog's heads."""
1082 unspecified, uses list of all of the revlog's heads."""
1083 nonodes = ([], [], [])
1083 nonodes = ([], [], [])
1084 if roots is not None:
1084 if roots is not None:
1085 roots = list(roots)
1085 roots = list(roots)
1086 if not roots:
1086 if not roots:
1087 return nonodes
1087 return nonodes
1088 lowestrev = min([self.rev(n) for n in roots])
1088 lowestrev = min([self.rev(n) for n in roots])
1089 else:
1089 else:
1090 roots = [self.nullid] # Everybody's a descendant of nullid
1090 roots = [self.nullid] # Everybody's a descendant of nullid
1091 lowestrev = nullrev
1091 lowestrev = nullrev
1092 if (lowestrev == nullrev) and (heads is None):
1092 if (lowestrev == nullrev) and (heads is None):
1093 # We want _all_ the nodes!
1093 # We want _all_ the nodes!
1094 return (
1094 return (
1095 [self.node(r) for r in self],
1095 [self.node(r) for r in self],
1096 [self.nullid],
1096 [self.nullid],
1097 list(self.heads()),
1097 list(self.heads()),
1098 )
1098 )
1099 if heads is None:
1099 if heads is None:
1100 # All nodes are ancestors, so the latest ancestor is the last
1100 # All nodes are ancestors, so the latest ancestor is the last
1101 # node.
1101 # node.
1102 highestrev = len(self) - 1
1102 highestrev = len(self) - 1
1103 # Set ancestors to None to signal that every node is an ancestor.
1103 # Set ancestors to None to signal that every node is an ancestor.
1104 ancestors = None
1104 ancestors = None
1105 # Set heads to an empty dictionary for later discovery of heads
1105 # Set heads to an empty dictionary for later discovery of heads
1106 heads = {}
1106 heads = {}
1107 else:
1107 else:
1108 heads = list(heads)
1108 heads = list(heads)
1109 if not heads:
1109 if not heads:
1110 return nonodes
1110 return nonodes
1111 ancestors = set()
1111 ancestors = set()
1112 # Turn heads into a dictionary so we can remove 'fake' heads.
1112 # Turn heads into a dictionary so we can remove 'fake' heads.
1113 # Also, later we will be using it to filter out the heads we can't
1113 # Also, later we will be using it to filter out the heads we can't
1114 # find from roots.
1114 # find from roots.
1115 heads = dict.fromkeys(heads, False)
1115 heads = dict.fromkeys(heads, False)
1116 # Start at the top and keep marking parents until we're done.
1116 # Start at the top and keep marking parents until we're done.
1117 nodestotag = set(heads)
1117 nodestotag = set(heads)
1118 # Remember where the top was so we can use it as a limit later.
1118 # Remember where the top was so we can use it as a limit later.
1119 highestrev = max([self.rev(n) for n in nodestotag])
1119 highestrev = max([self.rev(n) for n in nodestotag])
1120 while nodestotag:
1120 while nodestotag:
1121 # grab a node to tag
1121 # grab a node to tag
1122 n = nodestotag.pop()
1122 n = nodestotag.pop()
1123 # Never tag nullid
1123 # Never tag nullid
1124 if n == self.nullid:
1124 if n == self.nullid:
1125 continue
1125 continue
1126 # A node's revision number represents its place in a
1126 # A node's revision number represents its place in a
1127 # topologically sorted list of nodes.
1127 # topologically sorted list of nodes.
1128 r = self.rev(n)
1128 r = self.rev(n)
1129 if r >= lowestrev:
1129 if r >= lowestrev:
1130 if n not in ancestors:
1130 if n not in ancestors:
1131 # If we are possibly a descendant of one of the roots
1131 # If we are possibly a descendant of one of the roots
1132 # and we haven't already been marked as an ancestor
1132 # and we haven't already been marked as an ancestor
1133 ancestors.add(n) # Mark as ancestor
1133 ancestors.add(n) # Mark as ancestor
1134 # Add non-nullid parents to list of nodes to tag.
1134 # Add non-nullid parents to list of nodes to tag.
1135 nodestotag.update(
1135 nodestotag.update(
1136 [p for p in self.parents(n) if p != self.nullid]
1136 [p for p in self.parents(n) if p != self.nullid]
1137 )
1137 )
1138 elif n in heads: # We've seen it before, is it a fake head?
1138 elif n in heads: # We've seen it before, is it a fake head?
1139 # So it is, real heads should not be the ancestors of
1139 # So it is, real heads should not be the ancestors of
1140 # any other heads.
1140 # any other heads.
1141 heads.pop(n)
1141 heads.pop(n)
1142 if not ancestors:
1142 if not ancestors:
1143 return nonodes
1143 return nonodes
1144 # Now that we have our set of ancestors, we want to remove any
1144 # Now that we have our set of ancestors, we want to remove any
1145 # roots that are not ancestors.
1145 # roots that are not ancestors.
1146
1146
1147 # If one of the roots was nullid, everything is included anyway.
1147 # If one of the roots was nullid, everything is included anyway.
1148 if lowestrev > nullrev:
1148 if lowestrev > nullrev:
1149 # But, since we weren't, let's recompute the lowest rev to not
1149 # But, since we weren't, let's recompute the lowest rev to not
1150 # include roots that aren't ancestors.
1150 # include roots that aren't ancestors.
1151
1151
1152 # Filter out roots that aren't ancestors of heads
1152 # Filter out roots that aren't ancestors of heads
1153 roots = [root for root in roots if root in ancestors]
1153 roots = [root for root in roots if root in ancestors]
1154 # Recompute the lowest revision
1154 # Recompute the lowest revision
1155 if roots:
1155 if roots:
1156 lowestrev = min([self.rev(root) for root in roots])
1156 lowestrev = min([self.rev(root) for root in roots])
1157 else:
1157 else:
1158 # No more roots? Return empty list
1158 # No more roots? Return empty list
1159 return nonodes
1159 return nonodes
1160 else:
1160 else:
1161 # We are descending from nullid, and don't need to care about
1161 # We are descending from nullid, and don't need to care about
1162 # any other roots.
1162 # any other roots.
1163 lowestrev = nullrev
1163 lowestrev = nullrev
1164 roots = [self.nullid]
1164 roots = [self.nullid]
1165 # Transform our roots list into a set.
1165 # Transform our roots list into a set.
1166 descendants = set(roots)
1166 descendants = set(roots)
1167 # Also, keep the original roots so we can filter out roots that aren't
1167 # Also, keep the original roots so we can filter out roots that aren't
1168 # 'real' roots (i.e. are descended from other roots).
1168 # 'real' roots (i.e. are descended from other roots).
1169 roots = descendants.copy()
1169 roots = descendants.copy()
1170 # Our topologically sorted list of output nodes.
1170 # Our topologically sorted list of output nodes.
1171 orderedout = []
1171 orderedout = []
1172 # Don't start at nullid since we don't want nullid in our output list,
1172 # Don't start at nullid since we don't want nullid in our output list,
1173 # and if nullid shows up in descendants, empty parents will look like
1173 # and if nullid shows up in descendants, empty parents will look like
1174 # they're descendants.
1174 # they're descendants.
1175 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1175 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1176 n = self.node(r)
1176 n = self.node(r)
1177 isdescendant = False
1177 isdescendant = False
1178 if lowestrev == nullrev: # Everybody is a descendant of nullid
1178 if lowestrev == nullrev: # Everybody is a descendant of nullid
1179 isdescendant = True
1179 isdescendant = True
1180 elif n in descendants:
1180 elif n in descendants:
1181 # n is already a descendant
1181 # n is already a descendant
1182 isdescendant = True
1182 isdescendant = True
1183 # This check only needs to be done here because all the roots
1183 # This check only needs to be done here because all the roots
1184 # will start being marked is descendants before the loop.
1184 # will start being marked is descendants before the loop.
1185 if n in roots:
1185 if n in roots:
1186 # If n was a root, check if it's a 'real' root.
1186 # If n was a root, check if it's a 'real' root.
1187 p = tuple(self.parents(n))
1187 p = tuple(self.parents(n))
1188 # If any of its parents are descendants, it's not a root.
1188 # If any of its parents are descendants, it's not a root.
1189 if (p[0] in descendants) or (p[1] in descendants):
1189 if (p[0] in descendants) or (p[1] in descendants):
1190 roots.remove(n)
1190 roots.remove(n)
1191 else:
1191 else:
1192 p = tuple(self.parents(n))
1192 p = tuple(self.parents(n))
1193 # A node is a descendant if either of its parents are
1193 # A node is a descendant if either of its parents are
1194 # descendants. (We seeded the dependents list with the roots
1194 # descendants. (We seeded the dependents list with the roots
1195 # up there, remember?)
1195 # up there, remember?)
1196 if (p[0] in descendants) or (p[1] in descendants):
1196 if (p[0] in descendants) or (p[1] in descendants):
1197 descendants.add(n)
1197 descendants.add(n)
1198 isdescendant = True
1198 isdescendant = True
1199 if isdescendant and ((ancestors is None) or (n in ancestors)):
1199 if isdescendant and ((ancestors is None) or (n in ancestors)):
1200 # Only include nodes that are both descendants and ancestors.
1200 # Only include nodes that are both descendants and ancestors.
1201 orderedout.append(n)
1201 orderedout.append(n)
1202 if (ancestors is not None) and (n in heads):
1202 if (ancestors is not None) and (n in heads):
1203 # We're trying to figure out which heads are reachable
1203 # We're trying to figure out which heads are reachable
1204 # from roots.
1204 # from roots.
1205 # Mark this head as having been reached
1205 # Mark this head as having been reached
1206 heads[n] = True
1206 heads[n] = True
1207 elif ancestors is None:
1207 elif ancestors is None:
1208 # Otherwise, we're trying to discover the heads.
1208 # Otherwise, we're trying to discover the heads.
1209 # Assume this is a head because if it isn't, the next step
1209 # Assume this is a head because if it isn't, the next step
1210 # will eventually remove it.
1210 # will eventually remove it.
1211 heads[n] = True
1211 heads[n] = True
1212 # But, obviously its parents aren't.
1212 # But, obviously its parents aren't.
1213 for p in self.parents(n):
1213 for p in self.parents(n):
1214 heads.pop(p, None)
1214 heads.pop(p, None)
1215 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1215 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1216 roots = list(roots)
1216 roots = list(roots)
1217 assert orderedout
1217 assert orderedout
1218 assert roots
1218 assert roots
1219 assert heads
1219 assert heads
1220 return (orderedout, roots, heads)
1220 return (orderedout, roots, heads)
1221
1221
1222 def headrevs(self, revs=None):
1222 def headrevs(self, revs=None):
1223 if revs is None:
1223 if revs is None:
1224 try:
1224 try:
1225 return self.index.headrevs()
1225 return self.index.headrevs()
1226 except AttributeError:
1226 except AttributeError:
1227 return self._headrevs()
1227 return self._headrevs()
1228 if rustdagop is not None:
1228 if rustdagop is not None:
1229 return rustdagop.headrevs(self.index, revs)
1229 return rustdagop.headrevs(self.index, revs)
1230 return dagop.headrevs(revs, self._uncheckedparentrevs)
1230 return dagop.headrevs(revs, self._uncheckedparentrevs)
1231
1231
1232 def computephases(self, roots):
1232 def computephases(self, roots):
1233 return self.index.computephasesmapsets(roots)
1233 return self.index.computephasesmapsets(roots)
1234
1234
1235 def _headrevs(self):
1235 def _headrevs(self):
1236 count = len(self)
1236 count = len(self)
1237 if not count:
1237 if not count:
1238 return [nullrev]
1238 return [nullrev]
1239 # we won't iter over filtered rev so nobody is a head at start
1239 # we won't iter over filtered rev so nobody is a head at start
1240 ishead = [0] * (count + 1)
1240 ishead = [0] * (count + 1)
1241 index = self.index
1241 index = self.index
1242 for r in self:
1242 for r in self:
1243 ishead[r] = 1 # I may be an head
1243 ishead[r] = 1 # I may be an head
1244 e = index[r]
1244 e = index[r]
1245 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1245 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1246 return [r for r, val in enumerate(ishead) if val]
1246 return [r for r, val in enumerate(ishead) if val]
1247
1247
1248 def heads(self, start=None, stop=None):
1248 def heads(self, start=None, stop=None):
1249 """return the list of all nodes that have no children
1249 """return the list of all nodes that have no children
1250
1250
1251 if start is specified, only heads that are descendants of
1251 if start is specified, only heads that are descendants of
1252 start will be returned
1252 start will be returned
1253 if stop is specified, it will consider all the revs from stop
1253 if stop is specified, it will consider all the revs from stop
1254 as if they had no children
1254 as if they had no children
1255 """
1255 """
1256 if start is None and stop is None:
1256 if start is None and stop is None:
1257 if not len(self):
1257 if not len(self):
1258 return [self.nullid]
1258 return [self.nullid]
1259 return [self.node(r) for r in self.headrevs()]
1259 return [self.node(r) for r in self.headrevs()]
1260
1260
1261 if start is None:
1261 if start is None:
1262 start = nullrev
1262 start = nullrev
1263 else:
1263 else:
1264 start = self.rev(start)
1264 start = self.rev(start)
1265
1265
1266 stoprevs = {self.rev(n) for n in stop or []}
1266 stoprevs = {self.rev(n) for n in stop or []}
1267
1267
1268 revs = dagop.headrevssubset(
1268 revs = dagop.headrevssubset(
1269 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1269 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1270 )
1270 )
1271
1271
1272 return [self.node(rev) for rev in revs]
1272 return [self.node(rev) for rev in revs]
1273
1273
1274 def children(self, node):
1274 def children(self, node):
1275 """find the children of a given node"""
1275 """find the children of a given node"""
1276 c = []
1276 c = []
1277 p = self.rev(node)
1277 p = self.rev(node)
1278 for r in self.revs(start=p + 1):
1278 for r in self.revs(start=p + 1):
1279 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1279 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1280 if prevs:
1280 if prevs:
1281 for pr in prevs:
1281 for pr in prevs:
1282 if pr == p:
1282 if pr == p:
1283 c.append(self.node(r))
1283 c.append(self.node(r))
1284 elif p == nullrev:
1284 elif p == nullrev:
1285 c.append(self.node(r))
1285 c.append(self.node(r))
1286 return c
1286 return c
1287
1287
1288 def commonancestorsheads(self, a, b):
1288 def commonancestorsheads(self, a, b):
1289 """calculate all the heads of the common ancestors of nodes a and b"""
1289 """calculate all the heads of the common ancestors of nodes a and b"""
1290 a, b = self.rev(a), self.rev(b)
1290 a, b = self.rev(a), self.rev(b)
1291 ancs = self._commonancestorsheads(a, b)
1291 ancs = self._commonancestorsheads(a, b)
1292 return pycompat.maplist(self.node, ancs)
1292 return pycompat.maplist(self.node, ancs)
1293
1293
1294 def _commonancestorsheads(self, *revs):
1294 def _commonancestorsheads(self, *revs):
1295 """calculate all the heads of the common ancestors of revs"""
1295 """calculate all the heads of the common ancestors of revs"""
1296 try:
1296 try:
1297 ancs = self.index.commonancestorsheads(*revs)
1297 ancs = self.index.commonancestorsheads(*revs)
1298 except (AttributeError, OverflowError): # C implementation failed
1298 except (AttributeError, OverflowError): # C implementation failed
1299 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1299 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1300 return ancs
1300 return ancs
1301
1301
1302 def isancestor(self, a, b):
1302 def isancestor(self, a, b):
1303 """return True if node a is an ancestor of node b
1303 """return True if node a is an ancestor of node b
1304
1304
1305 A revision is considered an ancestor of itself."""
1305 A revision is considered an ancestor of itself."""
1306 a, b = self.rev(a), self.rev(b)
1306 a, b = self.rev(a), self.rev(b)
1307 return self.isancestorrev(a, b)
1307 return self.isancestorrev(a, b)
1308
1308
1309 def isancestorrev(self, a, b):
1309 def isancestorrev(self, a, b):
1310 """return True if revision a is an ancestor of revision b
1310 """return True if revision a is an ancestor of revision b
1311
1311
1312 A revision is considered an ancestor of itself.
1312 A revision is considered an ancestor of itself.
1313
1313
1314 The implementation of this is trivial but the use of
1314 The implementation of this is trivial but the use of
1315 reachableroots is not."""
1315 reachableroots is not."""
1316 if a == nullrev:
1316 if a == nullrev:
1317 return True
1317 return True
1318 elif a == b:
1318 elif a == b:
1319 return True
1319 return True
1320 elif a > b:
1320 elif a > b:
1321 return False
1321 return False
1322 return bool(self.reachableroots(a, [b], [a], includepath=False))
1322 return bool(self.reachableroots(a, [b], [a], includepath=False))
1323
1323
1324 def reachableroots(self, minroot, heads, roots, includepath=False):
1324 def reachableroots(self, minroot, heads, roots, includepath=False):
1325 """return (heads(::(<roots> and <roots>::<heads>)))
1325 """return (heads(::(<roots> and <roots>::<heads>)))
1326
1326
1327 If includepath is True, return (<roots>::<heads>)."""
1327 If includepath is True, return (<roots>::<heads>)."""
1328 try:
1328 try:
1329 return self.index.reachableroots2(
1329 return self.index.reachableroots2(
1330 minroot, heads, roots, includepath
1330 minroot, heads, roots, includepath
1331 )
1331 )
1332 except AttributeError:
1332 except AttributeError:
1333 return dagop._reachablerootspure(
1333 return dagop._reachablerootspure(
1334 self.parentrevs, minroot, roots, heads, includepath
1334 self.parentrevs, minroot, roots, heads, includepath
1335 )
1335 )
1336
1336
1337 def ancestor(self, a, b):
1337 def ancestor(self, a, b):
1338 """calculate the "best" common ancestor of nodes a and b"""
1338 """calculate the "best" common ancestor of nodes a and b"""
1339
1339
1340 a, b = self.rev(a), self.rev(b)
1340 a, b = self.rev(a), self.rev(b)
1341 try:
1341 try:
1342 ancs = self.index.ancestors(a, b)
1342 ancs = self.index.ancestors(a, b)
1343 except (AttributeError, OverflowError):
1343 except (AttributeError, OverflowError):
1344 ancs = ancestor.ancestors(self.parentrevs, a, b)
1344 ancs = ancestor.ancestors(self.parentrevs, a, b)
1345 if ancs:
1345 if ancs:
1346 # choose a consistent winner when there's a tie
1346 # choose a consistent winner when there's a tie
1347 return min(map(self.node, ancs))
1347 return min(map(self.node, ancs))
1348 return self.nullid
1348 return self.nullid
1349
1349
1350 def _match(self, id):
1350 def _match(self, id):
1351 if isinstance(id, int):
1351 if isinstance(id, int):
1352 # rev
1352 # rev
1353 return self.node(id)
1353 return self.node(id)
1354 if len(id) == self.nodeconstants.nodelen:
1354 if len(id) == self.nodeconstants.nodelen:
1355 # possibly a binary node
1355 # possibly a binary node
1356 # odds of a binary node being all hex in ASCII are 1 in 10**25
1356 # odds of a binary node being all hex in ASCII are 1 in 10**25
1357 try:
1357 try:
1358 node = id
1358 node = id
1359 self.rev(node) # quick search the index
1359 self.rev(node) # quick search the index
1360 return node
1360 return node
1361 except error.LookupError:
1361 except error.LookupError:
1362 pass # may be partial hex id
1362 pass # may be partial hex id
1363 try:
1363 try:
1364 # str(rev)
1364 # str(rev)
1365 rev = int(id)
1365 rev = int(id)
1366 if b"%d" % rev != id:
1366 if b"%d" % rev != id:
1367 raise ValueError
1367 raise ValueError
1368 if rev < 0:
1368 if rev < 0:
1369 rev = len(self) + rev
1369 rev = len(self) + rev
1370 if rev < 0 or rev >= len(self):
1370 if rev < 0 or rev >= len(self):
1371 raise ValueError
1371 raise ValueError
1372 return self.node(rev)
1372 return self.node(rev)
1373 except (ValueError, OverflowError):
1373 except (ValueError, OverflowError):
1374 pass
1374 pass
1375 if len(id) == 2 * self.nodeconstants.nodelen:
1375 if len(id) == 2 * self.nodeconstants.nodelen:
1376 try:
1376 try:
1377 # a full hex nodeid?
1377 # a full hex nodeid?
1378 node = bin(id)
1378 node = bin(id)
1379 self.rev(node)
1379 self.rev(node)
1380 return node
1380 return node
1381 except (TypeError, error.LookupError):
1381 except (TypeError, error.LookupError):
1382 pass
1382 pass
1383
1383
1384 def _partialmatch(self, id):
1384 def _partialmatch(self, id):
1385 # we don't care wdirfilenodeids as they should be always full hash
1385 # we don't care wdirfilenodeids as they should be always full hash
1386 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1386 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1387 try:
1387 try:
1388 partial = self.index.partialmatch(id)
1388 partial = self.index.partialmatch(id)
1389 if partial and self.hasnode(partial):
1389 if partial and self.hasnode(partial):
1390 if maybewdir:
1390 if maybewdir:
1391 # single 'ff...' match in radix tree, ambiguous with wdir
1391 # single 'ff...' match in radix tree, ambiguous with wdir
1392 raise error.RevlogError
1392 raise error.RevlogError
1393 return partial
1393 return partial
1394 if maybewdir:
1394 if maybewdir:
1395 # no 'ff...' match in radix tree, wdir identified
1395 # no 'ff...' match in radix tree, wdir identified
1396 raise error.WdirUnsupported
1396 raise error.WdirUnsupported
1397 return None
1397 return None
1398 except error.RevlogError:
1398 except error.RevlogError:
1399 # parsers.c radix tree lookup gave multiple matches
1399 # parsers.c radix tree lookup gave multiple matches
1400 # fast path: for unfiltered changelog, radix tree is accurate
1400 # fast path: for unfiltered changelog, radix tree is accurate
1401 if not getattr(self, 'filteredrevs', None):
1401 if not getattr(self, 'filteredrevs', None):
1402 raise error.AmbiguousPrefixLookupError(
1402 raise error.AmbiguousPrefixLookupError(
1403 id, self.display_id, _(b'ambiguous identifier')
1403 id, self.display_id, _(b'ambiguous identifier')
1404 )
1404 )
1405 # fall through to slow path that filters hidden revisions
1405 # fall through to slow path that filters hidden revisions
1406 except (AttributeError, ValueError):
1406 except (AttributeError, ValueError):
1407 # we are pure python, or key was too short to search radix tree
1407 # we are pure python, or key was too short to search radix tree
1408 pass
1408 pass
1409
1409
1410 if id in self._pcache:
1410 if id in self._pcache:
1411 return self._pcache[id]
1411 return self._pcache[id]
1412
1412
1413 if len(id) <= 40:
1413 if len(id) <= 40:
1414 try:
1414 try:
1415 # hex(node)[:...]
1415 # hex(node)[:...]
1416 l = len(id) // 2 # grab an even number of digits
1416 l = len(id) // 2 # grab an even number of digits
1417 prefix = bin(id[: l * 2])
1417 prefix = bin(id[: l * 2])
1418 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1418 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1419 nl = [
1419 nl = [
1420 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1420 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1421 ]
1421 ]
1422 if self.nodeconstants.nullhex.startswith(id):
1422 if self.nodeconstants.nullhex.startswith(id):
1423 nl.append(self.nullid)
1423 nl.append(self.nullid)
1424 if len(nl) > 0:
1424 if len(nl) > 0:
1425 if len(nl) == 1 and not maybewdir:
1425 if len(nl) == 1 and not maybewdir:
1426 self._pcache[id] = nl[0]
1426 self._pcache[id] = nl[0]
1427 return nl[0]
1427 return nl[0]
1428 raise error.AmbiguousPrefixLookupError(
1428 raise error.AmbiguousPrefixLookupError(
1429 id, self.display_id, _(b'ambiguous identifier')
1429 id, self.display_id, _(b'ambiguous identifier')
1430 )
1430 )
1431 if maybewdir:
1431 if maybewdir:
1432 raise error.WdirUnsupported
1432 raise error.WdirUnsupported
1433 return None
1433 return None
1434 except TypeError:
1434 except TypeError:
1435 pass
1435 pass
1436
1436
1437 def lookup(self, id):
1437 def lookup(self, id):
1438 """locate a node based on:
1438 """locate a node based on:
1439 - revision number or str(revision number)
1439 - revision number or str(revision number)
1440 - nodeid or subset of hex nodeid
1440 - nodeid or subset of hex nodeid
1441 """
1441 """
1442 n = self._match(id)
1442 n = self._match(id)
1443 if n is not None:
1443 if n is not None:
1444 return n
1444 return n
1445 n = self._partialmatch(id)
1445 n = self._partialmatch(id)
1446 if n:
1446 if n:
1447 return n
1447 return n
1448
1448
1449 raise error.LookupError(id, self.display_id, _(b'no match found'))
1449 raise error.LookupError(id, self.display_id, _(b'no match found'))
1450
1450
1451 def shortest(self, node, minlength=1):
1451 def shortest(self, node, minlength=1):
1452 """Find the shortest unambiguous prefix that matches node."""
1452 """Find the shortest unambiguous prefix that matches node."""
1453
1453
1454 def isvalid(prefix):
1454 def isvalid(prefix):
1455 try:
1455 try:
1456 matchednode = self._partialmatch(prefix)
1456 matchednode = self._partialmatch(prefix)
1457 except error.AmbiguousPrefixLookupError:
1457 except error.AmbiguousPrefixLookupError:
1458 return False
1458 return False
1459 except error.WdirUnsupported:
1459 except error.WdirUnsupported:
1460 # single 'ff...' match
1460 # single 'ff...' match
1461 return True
1461 return True
1462 if matchednode is None:
1462 if matchednode is None:
1463 raise error.LookupError(node, self.display_id, _(b'no node'))
1463 raise error.LookupError(node, self.display_id, _(b'no node'))
1464 return True
1464 return True
1465
1465
1466 def maybewdir(prefix):
1466 def maybewdir(prefix):
1467 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1467 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1468
1468
1469 hexnode = hex(node)
1469 hexnode = hex(node)
1470
1470
1471 def disambiguate(hexnode, minlength):
1471 def disambiguate(hexnode, minlength):
1472 """Disambiguate against wdirid."""
1472 """Disambiguate against wdirid."""
1473 for length in range(minlength, len(hexnode) + 1):
1473 for length in range(minlength, len(hexnode) + 1):
1474 prefix = hexnode[:length]
1474 prefix = hexnode[:length]
1475 if not maybewdir(prefix):
1475 if not maybewdir(prefix):
1476 return prefix
1476 return prefix
1477
1477
1478 if not getattr(self, 'filteredrevs', None):
1478 if not getattr(self, 'filteredrevs', None):
1479 try:
1479 try:
1480 length = max(self.index.shortest(node), minlength)
1480 length = max(self.index.shortest(node), minlength)
1481 return disambiguate(hexnode, length)
1481 return disambiguate(hexnode, length)
1482 except error.RevlogError:
1482 except error.RevlogError:
1483 if node != self.nodeconstants.wdirid:
1483 if node != self.nodeconstants.wdirid:
1484 raise error.LookupError(
1484 raise error.LookupError(
1485 node, self.display_id, _(b'no node')
1485 node, self.display_id, _(b'no node')
1486 )
1486 )
1487 except AttributeError:
1487 except AttributeError:
1488 # Fall through to pure code
1488 # Fall through to pure code
1489 pass
1489 pass
1490
1490
1491 if node == self.nodeconstants.wdirid:
1491 if node == self.nodeconstants.wdirid:
1492 for length in range(minlength, len(hexnode) + 1):
1492 for length in range(minlength, len(hexnode) + 1):
1493 prefix = hexnode[:length]
1493 prefix = hexnode[:length]
1494 if isvalid(prefix):
1494 if isvalid(prefix):
1495 return prefix
1495 return prefix
1496
1496
1497 for length in range(minlength, len(hexnode) + 1):
1497 for length in range(minlength, len(hexnode) + 1):
1498 prefix = hexnode[:length]
1498 prefix = hexnode[:length]
1499 if isvalid(prefix):
1499 if isvalid(prefix):
1500 return disambiguate(hexnode, length)
1500 return disambiguate(hexnode, length)
1501
1501
1502 def cmp(self, node, text):
1502 def cmp(self, node, text):
1503 """compare text with a given file revision
1503 """compare text with a given file revision
1504
1504
1505 returns True if text is different than what is stored.
1505 returns True if text is different than what is stored.
1506 """
1506 """
1507 p1, p2 = self.parents(node)
1507 p1, p2 = self.parents(node)
1508 return storageutil.hashrevisionsha1(text, p1, p2) != node
1508 return storageutil.hashrevisionsha1(text, p1, p2) != node
1509
1509
1510 def _cachesegment(self, offset, data):
1510 def _cachesegment(self, offset, data):
1511 """Add a segment to the revlog cache.
1511 """Add a segment to the revlog cache.
1512
1512
1513 Accepts an absolute offset and the data that is at that location.
1513 Accepts an absolute offset and the data that is at that location.
1514 """
1514 """
1515 o, d = self._chunkcache
1515 o, d = self._chunkcache
1516 # try to add to existing cache
1516 # try to add to existing cache
1517 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1517 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1518 self._chunkcache = o, d + data
1518 self._chunkcache = o, d + data
1519 else:
1519 else:
1520 self._chunkcache = offset, data
1520 self._chunkcache = offset, data
1521
1521
1522 def _readsegment(self, offset, length, df=None):
1522 def _readsegment(self, offset, length, df=None):
1523 """Load a segment of raw data from the revlog.
1523 """Load a segment of raw data from the revlog.
1524
1524
1525 Accepts an absolute offset, length to read, and an optional existing
1525 Accepts an absolute offset, length to read, and an optional existing
1526 file handle to read from.
1526 file handle to read from.
1527
1527
1528 If an existing file handle is passed, it will be seeked and the
1528 If an existing file handle is passed, it will be seeked and the
1529 original seek position will NOT be restored.
1529 original seek position will NOT be restored.
1530
1530
1531 Returns a str or buffer of raw byte data.
1531 Returns a str or buffer of raw byte data.
1532
1532
1533 Raises if the requested number of bytes could not be read.
1533 Raises if the requested number of bytes could not be read.
1534 """
1534 """
1535 # Cache data both forward and backward around the requested
1535 # Cache data both forward and backward around the requested
1536 # data, in a fixed size window. This helps speed up operations
1536 # data, in a fixed size window. This helps speed up operations
1537 # involving reading the revlog backwards.
1537 # involving reading the revlog backwards.
1538 cachesize = self._chunkcachesize
1538 cachesize = self._chunkcachesize
1539 realoffset = offset & ~(cachesize - 1)
1539 realoffset = offset & ~(cachesize - 1)
1540 reallength = (
1540 reallength = (
1541 (offset + length + cachesize) & ~(cachesize - 1)
1541 (offset + length + cachesize) & ~(cachesize - 1)
1542 ) - realoffset
1542 ) - realoffset
1543 with self._datareadfp(df) as df:
1543 with self._datareadfp(df) as df:
1544 df.seek(realoffset)
1544 df.seek(realoffset)
1545 d = df.read(reallength)
1545 d = df.read(reallength)
1546
1546
1547 self._cachesegment(realoffset, d)
1547 self._cachesegment(realoffset, d)
1548 if offset != realoffset or reallength != length:
1548 if offset != realoffset or reallength != length:
1549 startoffset = offset - realoffset
1549 startoffset = offset - realoffset
1550 if len(d) - startoffset < length:
1550 if len(d) - startoffset < length:
1551 raise error.RevlogError(
1551 raise error.RevlogError(
1552 _(
1552 _(
1553 b'partial read of revlog %s; expected %d bytes from '
1553 b'partial read of revlog %s; expected %d bytes from '
1554 b'offset %d, got %d'
1554 b'offset %d, got %d'
1555 )
1555 )
1556 % (
1556 % (
1557 self._indexfile if self._inline else self._datafile,
1557 self._indexfile if self._inline else self._datafile,
1558 length,
1558 length,
1559 realoffset,
1559 realoffset,
1560 len(d) - startoffset,
1560 len(d) - startoffset,
1561 )
1561 )
1562 )
1562 )
1563
1563
1564 return util.buffer(d, startoffset, length)
1564 return util.buffer(d, startoffset, length)
1565
1565
1566 if len(d) < length:
1566 if len(d) < length:
1567 raise error.RevlogError(
1567 raise error.RevlogError(
1568 _(
1568 _(
1569 b'partial read of revlog %s; expected %d bytes from offset '
1569 b'partial read of revlog %s; expected %d bytes from offset '
1570 b'%d, got %d'
1570 b'%d, got %d'
1571 )
1571 )
1572 % (
1572 % (
1573 self._indexfile if self._inline else self._datafile,
1573 self._indexfile if self._inline else self._datafile,
1574 length,
1574 length,
1575 offset,
1575 offset,
1576 len(d),
1576 len(d),
1577 )
1577 )
1578 )
1578 )
1579
1579
1580 return d
1580 return d
1581
1581
1582 def _getsegment(self, offset, length, df=None):
1582 def _getsegment(self, offset, length, df=None):
1583 """Obtain a segment of raw data from the revlog.
1583 """Obtain a segment of raw data from the revlog.
1584
1584
1585 Accepts an absolute offset, length of bytes to obtain, and an
1585 Accepts an absolute offset, length of bytes to obtain, and an
1586 optional file handle to the already-opened revlog. If the file
1586 optional file handle to the already-opened revlog. If the file
1587 handle is used, it's original seek position will not be preserved.
1587 handle is used, it's original seek position will not be preserved.
1588
1588
1589 Requests for data may be returned from a cache.
1589 Requests for data may be returned from a cache.
1590
1590
1591 Returns a str or a buffer instance of raw byte data.
1591 Returns a str or a buffer instance of raw byte data.
1592 """
1592 """
1593 o, d = self._chunkcache
1593 o, d = self._chunkcache
1594 l = len(d)
1594 l = len(d)
1595
1595
1596 # is it in the cache?
1596 # is it in the cache?
1597 cachestart = offset - o
1597 cachestart = offset - o
1598 cacheend = cachestart + length
1598 cacheend = cachestart + length
1599 if cachestart >= 0 and cacheend <= l:
1599 if cachestart >= 0 and cacheend <= l:
1600 if cachestart == 0 and cacheend == l:
1600 if cachestart == 0 and cacheend == l:
1601 return d # avoid a copy
1601 return d # avoid a copy
1602 return util.buffer(d, cachestart, cacheend - cachestart)
1602 return util.buffer(d, cachestart, cacheend - cachestart)
1603
1603
1604 return self._readsegment(offset, length, df=df)
1604 return self._readsegment(offset, length, df=df)
1605
1605
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1608
1608
1609 Accepts the start and end revisions and an optional already-open
1609 Accepts the start and end revisions and an optional already-open
1610 file handle to be used for reading. If the file handle is read, its
1610 file handle to be used for reading. If the file handle is read, its
1611 seek position will not be preserved.
1611 seek position will not be preserved.
1612
1612
1613 Requests for data may be satisfied by a cache.
1613 Requests for data may be satisfied by a cache.
1614
1614
1615 Returns a 2-tuple of (offset, data) for the requested range of
1615 Returns a 2-tuple of (offset, data) for the requested range of
1616 revisions. Offset is the integer offset from the beginning of the
1616 revisions. Offset is the integer offset from the beginning of the
1617 revlog and data is a str or buffer of the raw byte data.
1617 revlog and data is a str or buffer of the raw byte data.
1618
1618
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1620 to determine where each revision's data begins and ends.
1620 to determine where each revision's data begins and ends.
1621 """
1621 """
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1623 # (functions are expensive).
1623 # (functions are expensive).
1624 index = self.index
1624 index = self.index
1625 istart = index[startrev]
1625 istart = index[startrev]
1626 start = int(istart[0] >> 16)
1626 start = int(istart[0] >> 16)
1627 if startrev == endrev:
1627 if startrev == endrev:
1628 end = start + istart[1]
1628 end = start + istart[1]
1629 else:
1629 else:
1630 iend = index[endrev]
1630 iend = index[endrev]
1631 end = int(iend[0] >> 16) + iend[1]
1631 end = int(iend[0] >> 16) + iend[1]
1632
1632
1633 if self._inline:
1633 if self._inline:
1634 start += (startrev + 1) * self.index.entry_size
1634 start += (startrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1636 length = end - start
1636 length = end - start
1637
1637
1638 return start, self._getsegment(start, length, df=df)
1638 return start, self._getsegment(start, length, df=df)
1639
1639
1640 def _chunk(self, rev, df=None):
1640 def _chunk(self, rev, df=None):
1641 """Obtain a single decompressed chunk for a revision.
1641 """Obtain a single decompressed chunk for a revision.
1642
1642
1643 Accepts an integer revision and an optional already-open file handle
1643 Accepts an integer revision and an optional already-open file handle
1644 to be used for reading. If used, the seek position of the file will not
1644 to be used for reading. If used, the seek position of the file will not
1645 be preserved.
1645 be preserved.
1646
1646
1647 Returns a str holding uncompressed data for the requested revision.
1647 Returns a str holding uncompressed data for the requested revision.
1648 """
1648 """
1649 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1649 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1650
1650
1651 def _chunks(self, revs, df=None, targetsize=None):
1651 def _chunks(self, revs, df=None, targetsize=None):
1652 """Obtain decompressed chunks for the specified revisions.
1652 """Obtain decompressed chunks for the specified revisions.
1653
1653
1654 Accepts an iterable of numeric revisions that are assumed to be in
1654 Accepts an iterable of numeric revisions that are assumed to be in
1655 ascending order. Also accepts an optional already-open file handle
1655 ascending order. Also accepts an optional already-open file handle
1656 to be used for reading. If used, the seek position of the file will
1656 to be used for reading. If used, the seek position of the file will
1657 not be preserved.
1657 not be preserved.
1658
1658
1659 This function is similar to calling ``self._chunk()`` multiple times,
1659 This function is similar to calling ``self._chunk()`` multiple times,
1660 but is faster.
1660 but is faster.
1661
1661
1662 Returns a list with decompressed data for each requested revision.
1662 Returns a list with decompressed data for each requested revision.
1663 """
1663 """
1664 if not revs:
1664 if not revs:
1665 return []
1665 return []
1666 start = self.start
1666 start = self.start
1667 length = self.length
1667 length = self.length
1668 inline = self._inline
1668 inline = self._inline
1669 iosize = self.index.entry_size
1669 iosize = self.index.entry_size
1670 buffer = util.buffer
1670 buffer = util.buffer
1671
1671
1672 l = []
1672 l = []
1673 ladd = l.append
1673 ladd = l.append
1674
1674
1675 if not self._withsparseread:
1675 if not self._withsparseread:
1676 slicedchunks = (revs,)
1676 slicedchunks = (revs,)
1677 else:
1677 else:
1678 slicedchunks = deltautil.slicechunk(
1678 slicedchunks = deltautil.slicechunk(
1679 self, revs, targetsize=targetsize
1679 self, revs, targetsize=targetsize
1680 )
1680 )
1681
1681
1682 for revschunk in slicedchunks:
1682 for revschunk in slicedchunks:
1683 firstrev = revschunk[0]
1683 firstrev = revschunk[0]
1684 # Skip trailing revisions with empty diff
1684 # Skip trailing revisions with empty diff
1685 for lastrev in revschunk[::-1]:
1685 for lastrev in revschunk[::-1]:
1686 if length(lastrev) != 0:
1686 if length(lastrev) != 0:
1687 break
1687 break
1688
1688
1689 try:
1689 try:
1690 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1690 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1691 except OverflowError:
1691 except OverflowError:
1692 # issue4215 - we can't cache a run of chunks greater than
1692 # issue4215 - we can't cache a run of chunks greater than
1693 # 2G on Windows
1693 # 2G on Windows
1694 return [self._chunk(rev, df=df) for rev in revschunk]
1694 return [self._chunk(rev, df=df) for rev in revschunk]
1695
1695
1696 decomp = self.decompress
1696 decomp = self.decompress
1697 for rev in revschunk:
1697 for rev in revschunk:
1698 chunkstart = start(rev)
1698 chunkstart = start(rev)
1699 if inline:
1699 if inline:
1700 chunkstart += (rev + 1) * iosize
1700 chunkstart += (rev + 1) * iosize
1701 chunklength = length(rev)
1701 chunklength = length(rev)
1702 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1702 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1703
1703
1704 return l
1704 return l
1705
1705
1706 def _chunkclear(self):
1706 def _chunkclear(self):
1707 """Clear the raw chunk cache."""
1707 """Clear the raw chunk cache."""
1708 self._chunkcache = (0, b'')
1708 self._chunkcache = (0, b'')
1709
1709
1710 def deltaparent(self, rev):
1710 def deltaparent(self, rev):
1711 """return deltaparent of the given revision"""
1711 """return deltaparent of the given revision"""
1712 base = self.index[rev][3]
1712 base = self.index[rev][3]
1713 if base == rev:
1713 if base == rev:
1714 return nullrev
1714 return nullrev
1715 elif self._generaldelta:
1715 elif self._generaldelta:
1716 return base
1716 return base
1717 else:
1717 else:
1718 return rev - 1
1718 return rev - 1
1719
1719
1720 def issnapshot(self, rev):
1720 def issnapshot(self, rev):
1721 """tells whether rev is a snapshot"""
1721 """tells whether rev is a snapshot"""
1722 if not self._sparserevlog:
1722 if not self._sparserevlog:
1723 return self.deltaparent(rev) == nullrev
1723 return self.deltaparent(rev) == nullrev
1724 elif util.safehasattr(self.index, b'issnapshot'):
1724 elif util.safehasattr(self.index, b'issnapshot'):
1725 # directly assign the method to cache the testing and access
1725 # directly assign the method to cache the testing and access
1726 self.issnapshot = self.index.issnapshot
1726 self.issnapshot = self.index.issnapshot
1727 return self.issnapshot(rev)
1727 return self.issnapshot(rev)
1728 if rev == nullrev:
1728 if rev == nullrev:
1729 return True
1729 return True
1730 entry = self.index[rev]
1730 entry = self.index[rev]
1731 base = entry[3]
1731 base = entry[3]
1732 if base == rev:
1732 if base == rev:
1733 return True
1733 return True
1734 if base == nullrev:
1734 if base == nullrev:
1735 return True
1735 return True
1736 p1 = entry[5]
1736 p1 = entry[5]
1737 p2 = entry[6]
1737 p2 = entry[6]
1738 if base == p1 or base == p2:
1738 if base == p1 or base == p2:
1739 return False
1739 return False
1740 return self.issnapshot(base)
1740 return self.issnapshot(base)
1741
1741
1742 def snapshotdepth(self, rev):
1742 def snapshotdepth(self, rev):
1743 """number of snapshot in the chain before this one"""
1743 """number of snapshot in the chain before this one"""
1744 if not self.issnapshot(rev):
1744 if not self.issnapshot(rev):
1745 raise error.ProgrammingError(b'revision %d not a snapshot')
1745 raise error.ProgrammingError(b'revision %d not a snapshot')
1746 return len(self._deltachain(rev)[0]) - 1
1746 return len(self._deltachain(rev)[0]) - 1
1747
1747
1748 def revdiff(self, rev1, rev2):
1748 def revdiff(self, rev1, rev2):
1749 """return or calculate a delta between two revisions
1749 """return or calculate a delta between two revisions
1750
1750
1751 The delta calculated is in binary form and is intended to be written to
1751 The delta calculated is in binary form and is intended to be written to
1752 revlog data directly. So this function needs raw revision data.
1752 revlog data directly. So this function needs raw revision data.
1753 """
1753 """
1754 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1754 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1755 return bytes(self._chunk(rev2))
1755 return bytes(self._chunk(rev2))
1756
1756
1757 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1757 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1758
1758
1759 def _processflags(self, text, flags, operation, raw=False):
1759 def _processflags(self, text, flags, operation, raw=False):
1760 """deprecated entry point to access flag processors"""
1760 """deprecated entry point to access flag processors"""
1761 msg = b'_processflag(...) use the specialized variant'
1761 msg = b'_processflag(...) use the specialized variant'
1762 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1762 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1763 if raw:
1763 if raw:
1764 return text, flagutil.processflagsraw(self, text, flags)
1764 return text, flagutil.processflagsraw(self, text, flags)
1765 elif operation == b'read':
1765 elif operation == b'read':
1766 return flagutil.processflagsread(self, text, flags)
1766 return flagutil.processflagsread(self, text, flags)
1767 else: # write operation
1767 else: # write operation
1768 return flagutil.processflagswrite(self, text, flags)
1768 return flagutil.processflagswrite(self, text, flags)
1769
1769
1770 def revision(self, nodeorrev, _df=None, raw=False):
1770 def revision(self, nodeorrev, _df=None, raw=False):
1771 """return an uncompressed revision of a given node or revision
1771 """return an uncompressed revision of a given node or revision
1772 number.
1772 number.
1773
1773
1774 _df - an existing file handle to read from. (internal-only)
1774 _df - an existing file handle to read from. (internal-only)
1775 raw - an optional argument specifying if the revision data is to be
1775 raw - an optional argument specifying if the revision data is to be
1776 treated as raw data when applying flag transforms. 'raw' should be set
1776 treated as raw data when applying flag transforms. 'raw' should be set
1777 to True when generating changegroups or in debug commands.
1777 to True when generating changegroups or in debug commands.
1778 """
1778 """
1779 if raw:
1779 if raw:
1780 msg = (
1780 msg = (
1781 b'revlog.revision(..., raw=True) is deprecated, '
1781 b'revlog.revision(..., raw=True) is deprecated, '
1782 b'use revlog.rawdata(...)'
1782 b'use revlog.rawdata(...)'
1783 )
1783 )
1784 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1784 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1785 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1785 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1786
1786
1787 def sidedata(self, nodeorrev, _df=None):
1787 def sidedata(self, nodeorrev, _df=None):
1788 """a map of extra data related to the changeset but not part of the hash
1788 """a map of extra data related to the changeset but not part of the hash
1789
1789
1790 This function currently return a dictionary. However, more advanced
1790 This function currently return a dictionary. However, more advanced
1791 mapping object will likely be used in the future for a more
1791 mapping object will likely be used in the future for a more
1792 efficient/lazy code.
1792 efficient/lazy code.
1793 """
1793 """
1794 return self._revisiondata(nodeorrev, _df)[1]
1794 return self._revisiondata(nodeorrev, _df)[1]
1795
1795
1796 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1796 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1797 # deal with <nodeorrev> argument type
1797 # deal with <nodeorrev> argument type
1798 if isinstance(nodeorrev, int):
1798 if isinstance(nodeorrev, int):
1799 rev = nodeorrev
1799 rev = nodeorrev
1800 node = self.node(rev)
1800 node = self.node(rev)
1801 else:
1801 else:
1802 node = nodeorrev
1802 node = nodeorrev
1803 rev = None
1803 rev = None
1804
1804
1805 # fast path the special `nullid` rev
1805 # fast path the special `nullid` rev
1806 if node == self.nullid:
1806 if node == self.nullid:
1807 return b"", {}
1807 return b"", {}
1808
1808
1809 # ``rawtext`` is the text as stored inside the revlog. Might be the
1809 # ``rawtext`` is the text as stored inside the revlog. Might be the
1810 # revision or might need to be processed to retrieve the revision.
1810 # revision or might need to be processed to retrieve the revision.
1811 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1811 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1812
1812
1813 if self.hassidedata:
1813 if self.hassidedata:
1814 if rev is None:
1814 if rev is None:
1815 rev = self.rev(node)
1815 rev = self.rev(node)
1816 sidedata = self._sidedata(rev)
1816 sidedata = self._sidedata(rev)
1817 else:
1817 else:
1818 sidedata = {}
1818 sidedata = {}
1819
1819
1820 if raw and validated:
1820 if raw and validated:
1821 # if we don't want to process the raw text and that raw
1821 # if we don't want to process the raw text and that raw
1822 # text is cached, we can exit early.
1822 # text is cached, we can exit early.
1823 return rawtext, sidedata
1823 return rawtext, sidedata
1824 if rev is None:
1824 if rev is None:
1825 rev = self.rev(node)
1825 rev = self.rev(node)
1826 # the revlog's flag for this revision
1826 # the revlog's flag for this revision
1827 # (usually alter its state or content)
1827 # (usually alter its state or content)
1828 flags = self.flags(rev)
1828 flags = self.flags(rev)
1829
1829
1830 if validated and flags == REVIDX_DEFAULT_FLAGS:
1830 if validated and flags == REVIDX_DEFAULT_FLAGS:
1831 # no extra flags set, no flag processor runs, text = rawtext
1831 # no extra flags set, no flag processor runs, text = rawtext
1832 return rawtext, sidedata
1832 return rawtext, sidedata
1833
1833
1834 if raw:
1834 if raw:
1835 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1835 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1836 text = rawtext
1836 text = rawtext
1837 else:
1837 else:
1838 r = flagutil.processflagsread(self, rawtext, flags)
1838 r = flagutil.processflagsread(self, rawtext, flags)
1839 text, validatehash = r
1839 text, validatehash = r
1840 if validatehash:
1840 if validatehash:
1841 self.checkhash(text, node, rev=rev)
1841 self.checkhash(text, node, rev=rev)
1842 if not validated:
1842 if not validated:
1843 self._revisioncache = (node, rev, rawtext)
1843 self._revisioncache = (node, rev, rawtext)
1844
1844
1845 return text, sidedata
1845 return text, sidedata
1846
1846
1847 def _rawtext(self, node, rev, _df=None):
1847 def _rawtext(self, node, rev, _df=None):
1848 """return the possibly unvalidated rawtext for a revision
1848 """return the possibly unvalidated rawtext for a revision
1849
1849
1850 returns (rev, rawtext, validated)
1850 returns (rev, rawtext, validated)
1851 """
1851 """
1852
1852
1853 # revision in the cache (could be useful to apply delta)
1853 # revision in the cache (could be useful to apply delta)
1854 cachedrev = None
1854 cachedrev = None
1855 # An intermediate text to apply deltas to
1855 # An intermediate text to apply deltas to
1856 basetext = None
1856 basetext = None
1857
1857
1858 # Check if we have the entry in cache
1858 # Check if we have the entry in cache
1859 # The cache entry looks like (node, rev, rawtext)
1859 # The cache entry looks like (node, rev, rawtext)
1860 if self._revisioncache:
1860 if self._revisioncache:
1861 if self._revisioncache[0] == node:
1861 if self._revisioncache[0] == node:
1862 return (rev, self._revisioncache[2], True)
1862 return (rev, self._revisioncache[2], True)
1863 cachedrev = self._revisioncache[1]
1863 cachedrev = self._revisioncache[1]
1864
1864
1865 if rev is None:
1865 if rev is None:
1866 rev = self.rev(node)
1866 rev = self.rev(node)
1867
1867
1868 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1868 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1869 if stopped:
1869 if stopped:
1870 basetext = self._revisioncache[2]
1870 basetext = self._revisioncache[2]
1871
1871
1872 # drop cache to save memory, the caller is expected to
1872 # drop cache to save memory, the caller is expected to
1873 # update self._revisioncache after validating the text
1873 # update self._revisioncache after validating the text
1874 self._revisioncache = None
1874 self._revisioncache = None
1875
1875
1876 targetsize = None
1876 targetsize = None
1877 rawsize = self.index[rev][2]
1877 rawsize = self.index[rev][2]
1878 if 0 <= rawsize:
1878 if 0 <= rawsize:
1879 targetsize = 4 * rawsize
1879 targetsize = 4 * rawsize
1880
1880
1881 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1881 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1882 if basetext is None:
1882 if basetext is None:
1883 basetext = bytes(bins[0])
1883 basetext = bytes(bins[0])
1884 bins = bins[1:]
1884 bins = bins[1:]
1885
1885
1886 rawtext = mdiff.patches(basetext, bins)
1886 rawtext = mdiff.patches(basetext, bins)
1887 del basetext # let us have a chance to free memory early
1887 del basetext # let us have a chance to free memory early
1888 return (rev, rawtext, False)
1888 return (rev, rawtext, False)
1889
1889
1890 def _sidedata(self, rev):
1890 def _sidedata(self, rev):
1891 """Return the sidedata for a given revision number."""
1891 """Return the sidedata for a given revision number."""
1892 index_entry = self.index[rev]
1892 index_entry = self.index[rev]
1893 sidedata_offset = index_entry[8]
1893 sidedata_offset = index_entry[8]
1894 sidedata_size = index_entry[9]
1894 sidedata_size = index_entry[9]
1895
1895
1896 if self._inline:
1896 if self._inline:
1897 sidedata_offset += self.index.entry_size * (1 + rev)
1897 sidedata_offset += self.index.entry_size * (1 + rev)
1898 if sidedata_size == 0:
1898 if sidedata_size == 0:
1899 return {}
1899 return {}
1900
1900
1901 segment = self._getsegment(sidedata_offset, sidedata_size)
1901 segment = self._getsegment(sidedata_offset, sidedata_size)
1902 sidedata = sidedatautil.deserialize_sidedata(segment)
1902 sidedata = sidedatautil.deserialize_sidedata(segment)
1903 return sidedata
1903 return sidedata
1904
1904
1905 def rawdata(self, nodeorrev, _df=None):
1905 def rawdata(self, nodeorrev, _df=None):
1906 """return an uncompressed raw data of a given node or revision number.
1906 """return an uncompressed raw data of a given node or revision number.
1907
1907
1908 _df - an existing file handle to read from. (internal-only)
1908 _df - an existing file handle to read from. (internal-only)
1909 """
1909 """
1910 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1910 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1911
1911
1912 def hash(self, text, p1, p2):
1912 def hash(self, text, p1, p2):
1913 """Compute a node hash.
1913 """Compute a node hash.
1914
1914
1915 Available as a function so that subclasses can replace the hash
1915 Available as a function so that subclasses can replace the hash
1916 as needed.
1916 as needed.
1917 """
1917 """
1918 return storageutil.hashrevisionsha1(text, p1, p2)
1918 return storageutil.hashrevisionsha1(text, p1, p2)
1919
1919
1920 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1920 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1921 """Check node hash integrity.
1921 """Check node hash integrity.
1922
1922
1923 Available as a function so that subclasses can extend hash mismatch
1923 Available as a function so that subclasses can extend hash mismatch
1924 behaviors as needed.
1924 behaviors as needed.
1925 """
1925 """
1926 try:
1926 try:
1927 if p1 is None and p2 is None:
1927 if p1 is None and p2 is None:
1928 p1, p2 = self.parents(node)
1928 p1, p2 = self.parents(node)
1929 if node != self.hash(text, p1, p2):
1929 if node != self.hash(text, p1, p2):
1930 # Clear the revision cache on hash failure. The revision cache
1930 # Clear the revision cache on hash failure. The revision cache
1931 # only stores the raw revision and clearing the cache does have
1931 # only stores the raw revision and clearing the cache does have
1932 # the side-effect that we won't have a cache hit when the raw
1932 # the side-effect that we won't have a cache hit when the raw
1933 # revision data is accessed. But this case should be rare and
1933 # revision data is accessed. But this case should be rare and
1934 # it is extra work to teach the cache about the hash
1934 # it is extra work to teach the cache about the hash
1935 # verification state.
1935 # verification state.
1936 if self._revisioncache and self._revisioncache[0] == node:
1936 if self._revisioncache and self._revisioncache[0] == node:
1937 self._revisioncache = None
1937 self._revisioncache = None
1938
1938
1939 revornode = rev
1939 revornode = rev
1940 if revornode is None:
1940 if revornode is None:
1941 revornode = templatefilters.short(hex(node))
1941 revornode = templatefilters.short(hex(node))
1942 raise error.RevlogError(
1942 raise error.RevlogError(
1943 _(b"integrity check failed on %s:%s")
1943 _(b"integrity check failed on %s:%s")
1944 % (self.display_id, pycompat.bytestr(revornode))
1944 % (self.display_id, pycompat.bytestr(revornode))
1945 )
1945 )
1946 except error.RevlogError:
1946 except error.RevlogError:
1947 if self._censorable and storageutil.iscensoredtext(text):
1947 if self._censorable and storageutil.iscensoredtext(text):
1948 raise error.CensoredNodeError(self.display_id, node, text)
1948 raise error.CensoredNodeError(self.display_id, node, text)
1949 raise
1949 raise
1950
1950
1951 def _enforceinlinesize(self, tr, fp=None):
1951 def _enforceinlinesize(self, tr, fp=None):
1952 """Check if the revlog is too big for inline and convert if so.
1952 """Check if the revlog is too big for inline and convert if so.
1953
1953
1954 This should be called after revisions are added to the revlog. If the
1954 This should be called after revisions are added to the revlog. If the
1955 revlog has grown too large to be an inline revlog, it will convert it
1955 revlog has grown too large to be an inline revlog, it will convert it
1956 to use multiple index and data files.
1956 to use multiple index and data files.
1957 """
1957 """
1958 tiprev = len(self) - 1
1958 tiprev = len(self) - 1
1959 if (
1959 if (
1960 not self._inline
1960 not self._inline
1961 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1961 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1962 ):
1962 ):
1963 return
1963 return
1964
1964
1965 troffset = tr.findoffset(self._indexfile)
1965 troffset = tr.findoffset(self._indexfile)
1966 if troffset is None:
1966 if troffset is None:
1967 raise error.RevlogError(
1967 raise error.RevlogError(
1968 _(b"%s not found in the transaction") % self._indexfile
1968 _(b"%s not found in the transaction") % self._indexfile
1969 )
1969 )
1970 trindex = 0
1970 trindex = 0
1971 tr.add(self._datafile, 0)
1971 tr.add(self._datafile, 0)
1972
1972
1973 if fp:
1973 if fp:
1974 fp.flush()
1974 fp.flush()
1975 fp.close()
1975 fp.close()
1976 # We can't use the cached file handle after close(). So prevent
1976 # We can't use the cached file handle after close(). So prevent
1977 # its usage.
1977 # its usage.
1978 self._writinghandles = None
1978 self._writinghandles = None
1979
1979
1980 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1980 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1981 for r in self:
1981 for r in self:
1982 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1982 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1983 if troffset <= self.start(r):
1983 if troffset <= self.start(r):
1984 trindex = r
1984 trindex = r
1985
1985
1986 with self._indexfp(b'w') as fp:
1986 with self._indexfp(b'w') as fp:
1987 self._format_flags &= ~FLAG_INLINE_DATA
1987 self._format_flags &= ~FLAG_INLINE_DATA
1988 self._inline = False
1988 self._inline = False
1989 for i in self:
1989 for i in self:
1990 e = self.index.entry_binary(i)
1990 e = self.index.entry_binary(i)
1991 if i == 0:
1991 if i == 0:
1992 header = self._format_flags | self._format_version
1992 header = self._format_flags | self._format_version
1993 header = self.index.pack_header(header)
1993 header = self.index.pack_header(header)
1994 e = header + e
1994 e = header + e
1995 fp.write(e)
1995 fp.write(e)
1996
1996
1997 # the temp file replace the real index when we exit the context
1997 # the temp file replace the real index when we exit the context
1998 # manager
1998 # manager
1999
1999
2000 tr.replace(self._indexfile, trindex * self.index.entry_size)
2000 tr.replace(self._indexfile, trindex * self.index.entry_size)
2001 nodemaputil.setup_persistent_nodemap(tr, self)
2001 nodemaputil.setup_persistent_nodemap(tr, self)
2002 self._chunkclear()
2002 self._chunkclear()
2003
2003
2004 def _nodeduplicatecallback(self, transaction, node):
2004 def _nodeduplicatecallback(self, transaction, node):
2005 """called when trying to add a node already stored."""
2005 """called when trying to add a node already stored."""
2006
2006
2007 def addrevision(
2007 def addrevision(
2008 self,
2008 self,
2009 text,
2009 text,
2010 transaction,
2010 transaction,
2011 link,
2011 link,
2012 p1,
2012 p1,
2013 p2,
2013 p2,
2014 cachedelta=None,
2014 cachedelta=None,
2015 node=None,
2015 node=None,
2016 flags=REVIDX_DEFAULT_FLAGS,
2016 flags=REVIDX_DEFAULT_FLAGS,
2017 deltacomputer=None,
2017 deltacomputer=None,
2018 sidedata=None,
2018 sidedata=None,
2019 ):
2019 ):
2020 """add a revision to the log
2020 """add a revision to the log
2021
2021
2022 text - the revision data to add
2022 text - the revision data to add
2023 transaction - the transaction object used for rollback
2023 transaction - the transaction object used for rollback
2024 link - the linkrev data to add
2024 link - the linkrev data to add
2025 p1, p2 - the parent nodeids of the revision
2025 p1, p2 - the parent nodeids of the revision
2026 cachedelta - an optional precomputed delta
2026 cachedelta - an optional precomputed delta
2027 node - nodeid of revision; typically node is not specified, and it is
2027 node - nodeid of revision; typically node is not specified, and it is
2028 computed by default as hash(text, p1, p2), however subclasses might
2028 computed by default as hash(text, p1, p2), however subclasses might
2029 use different hashing method (and override checkhash() in such case)
2029 use different hashing method (and override checkhash() in such case)
2030 flags - the known flags to set on the revision
2030 flags - the known flags to set on the revision
2031 deltacomputer - an optional deltacomputer instance shared between
2031 deltacomputer - an optional deltacomputer instance shared between
2032 multiple calls
2032 multiple calls
2033 """
2033 """
2034 if link == nullrev:
2034 if link == nullrev:
2035 raise error.RevlogError(
2035 raise error.RevlogError(
2036 _(b"attempted to add linkrev -1 to %s") % self.display_id
2036 _(b"attempted to add linkrev -1 to %s") % self.display_id
2037 )
2037 )
2038
2038
2039 if sidedata is None:
2039 if sidedata is None:
2040 sidedata = {}
2040 sidedata = {}
2041 elif sidedata and not self.hassidedata:
2041 elif sidedata and not self.hassidedata:
2042 raise error.ProgrammingError(
2042 raise error.ProgrammingError(
2043 _(b"trying to add sidedata to a revlog who don't support them")
2043 _(b"trying to add sidedata to a revlog who don't support them")
2044 )
2044 )
2045
2045
2046 if flags:
2046 if flags:
2047 node = node or self.hash(text, p1, p2)
2047 node = node or self.hash(text, p1, p2)
2048
2048
2049 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2049 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2050
2050
2051 # If the flag processor modifies the revision data, ignore any provided
2051 # If the flag processor modifies the revision data, ignore any provided
2052 # cachedelta.
2052 # cachedelta.
2053 if rawtext != text:
2053 if rawtext != text:
2054 cachedelta = None
2054 cachedelta = None
2055
2055
2056 if len(rawtext) > _maxentrysize:
2056 if len(rawtext) > _maxentrysize:
2057 raise error.RevlogError(
2057 raise error.RevlogError(
2058 _(
2058 _(
2059 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2059 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2060 )
2060 )
2061 % (self.display_id, len(rawtext))
2061 % (self.display_id, len(rawtext))
2062 )
2062 )
2063
2063
2064 node = node or self.hash(rawtext, p1, p2)
2064 node = node or self.hash(rawtext, p1, p2)
2065 rev = self.index.get_rev(node)
2065 rev = self.index.get_rev(node)
2066 if rev is not None:
2066 if rev is not None:
2067 return rev
2067 return rev
2068
2068
2069 if validatehash:
2069 if validatehash:
2070 self.checkhash(rawtext, node, p1=p1, p2=p2)
2070 self.checkhash(rawtext, node, p1=p1, p2=p2)
2071
2071
2072 return self.addrawrevision(
2072 return self.addrawrevision(
2073 rawtext,
2073 rawtext,
2074 transaction,
2074 transaction,
2075 link,
2075 link,
2076 p1,
2076 p1,
2077 p2,
2077 p2,
2078 node,
2078 node,
2079 flags,
2079 flags,
2080 cachedelta=cachedelta,
2080 cachedelta=cachedelta,
2081 deltacomputer=deltacomputer,
2081 deltacomputer=deltacomputer,
2082 sidedata=sidedata,
2082 sidedata=sidedata,
2083 )
2083 )
2084
2084
2085 def addrawrevision(
2085 def addrawrevision(
2086 self,
2086 self,
2087 rawtext,
2087 rawtext,
2088 transaction,
2088 transaction,
2089 link,
2089 link,
2090 p1,
2090 p1,
2091 p2,
2091 p2,
2092 node,
2092 node,
2093 flags,
2093 flags,
2094 cachedelta=None,
2094 cachedelta=None,
2095 deltacomputer=None,
2095 deltacomputer=None,
2096 sidedata=None,
2096 sidedata=None,
2097 ):
2097 ):
2098 """add a raw revision with known flags, node and parents
2098 """add a raw revision with known flags, node and parents
2099 useful when reusing a revision not stored in this revlog (ex: received
2099 useful when reusing a revision not stored in this revlog (ex: received
2100 over wire, or read from an external bundle).
2100 over wire, or read from an external bundle).
2101 """
2101 """
2102 dfh = None
2102 dfh = None
2103 if not self._inline:
2103 if not self._inline:
2104 dfh = self._datafp(b"a+")
2104 dfh = self._datafp(b"a+")
2105 ifh = self._indexfp(b"a+")
2105 ifh = self._indexfp(b"a+")
2106 try:
2106 try:
2107 return self._addrevision(
2107 return self._addrevision(
2108 node,
2108 node,
2109 rawtext,
2109 rawtext,
2110 transaction,
2110 transaction,
2111 link,
2111 link,
2112 p1,
2112 p1,
2113 p2,
2113 p2,
2114 flags,
2114 flags,
2115 cachedelta,
2115 cachedelta,
2116 ifh,
2116 ifh,
2117 dfh,
2117 dfh,
2118 deltacomputer=deltacomputer,
2118 deltacomputer=deltacomputer,
2119 sidedata=sidedata,
2119 sidedata=sidedata,
2120 )
2120 )
2121 finally:
2121 finally:
2122 if dfh:
2122 if dfh:
2123 dfh.close()
2123 dfh.close()
2124 ifh.close()
2124 ifh.close()
2125
2125
2126 def compress(self, data):
2126 def compress(self, data):
2127 """Generate a possibly-compressed representation of data."""
2127 """Generate a possibly-compressed representation of data."""
2128 if not data:
2128 if not data:
2129 return b'', data
2129 return b'', data
2130
2130
2131 compressed = self._compressor.compress(data)
2131 compressed = self._compressor.compress(data)
2132
2132
2133 if compressed:
2133 if compressed:
2134 # The revlog compressor added the header in the returned data.
2134 # The revlog compressor added the header in the returned data.
2135 return b'', compressed
2135 return b'', compressed
2136
2136
2137 if data[0:1] == b'\0':
2137 if data[0:1] == b'\0':
2138 return b'', data
2138 return b'', data
2139 return b'u', data
2139 return b'u', data
2140
2140
2141 def decompress(self, data):
2141 def decompress(self, data):
2142 """Decompress a revlog chunk.
2142 """Decompress a revlog chunk.
2143
2143
2144 The chunk is expected to begin with a header identifying the
2144 The chunk is expected to begin with a header identifying the
2145 format type so it can be routed to an appropriate decompressor.
2145 format type so it can be routed to an appropriate decompressor.
2146 """
2146 """
2147 if not data:
2147 if not data:
2148 return data
2148 return data
2149
2149
2150 # Revlogs are read much more frequently than they are written and many
2150 # Revlogs are read much more frequently than they are written and many
2151 # chunks only take microseconds to decompress, so performance is
2151 # chunks only take microseconds to decompress, so performance is
2152 # important here.
2152 # important here.
2153 #
2153 #
2154 # We can make a few assumptions about revlogs:
2154 # We can make a few assumptions about revlogs:
2155 #
2155 #
2156 # 1) the majority of chunks will be compressed (as opposed to inline
2156 # 1) the majority of chunks will be compressed (as opposed to inline
2157 # raw data).
2157 # raw data).
2158 # 2) decompressing *any* data will likely by at least 10x slower than
2158 # 2) decompressing *any* data will likely by at least 10x slower than
2159 # returning raw inline data.
2159 # returning raw inline data.
2160 # 3) we want to prioritize common and officially supported compression
2160 # 3) we want to prioritize common and officially supported compression
2161 # engines
2161 # engines
2162 #
2162 #
2163 # It follows that we want to optimize for "decompress compressed data
2163 # It follows that we want to optimize for "decompress compressed data
2164 # when encoded with common and officially supported compression engines"
2164 # when encoded with common and officially supported compression engines"
2165 # case over "raw data" and "data encoded by less common or non-official
2165 # case over "raw data" and "data encoded by less common or non-official
2166 # compression engines." That is why we have the inline lookup first
2166 # compression engines." That is why we have the inline lookup first
2167 # followed by the compengines lookup.
2167 # followed by the compengines lookup.
2168 #
2168 #
2169 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2169 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2170 # compressed chunks. And this matters for changelog and manifest reads.
2170 # compressed chunks. And this matters for changelog and manifest reads.
2171 t = data[0:1]
2171 t = data[0:1]
2172
2172
2173 if t == b'x':
2173 if t == b'x':
2174 try:
2174 try:
2175 return _zlibdecompress(data)
2175 return _zlibdecompress(data)
2176 except zlib.error as e:
2176 except zlib.error as e:
2177 raise error.RevlogError(
2177 raise error.RevlogError(
2178 _(b'revlog decompress error: %s')
2178 _(b'revlog decompress error: %s')
2179 % stringutil.forcebytestr(e)
2179 % stringutil.forcebytestr(e)
2180 )
2180 )
2181 # '\0' is more common than 'u' so it goes first.
2181 # '\0' is more common than 'u' so it goes first.
2182 elif t == b'\0':
2182 elif t == b'\0':
2183 return data
2183 return data
2184 elif t == b'u':
2184 elif t == b'u':
2185 return util.buffer(data, 1)
2185 return util.buffer(data, 1)
2186
2186
2187 try:
2187 try:
2188 compressor = self._decompressors[t]
2188 compressor = self._decompressors[t]
2189 except KeyError:
2189 except KeyError:
2190 try:
2190 try:
2191 engine = util.compengines.forrevlogheader(t)
2191 engine = util.compengines.forrevlogheader(t)
2192 compressor = engine.revlogcompressor(self._compengineopts)
2192 compressor = engine.revlogcompressor(self._compengineopts)
2193 self._decompressors[t] = compressor
2193 self._decompressors[t] = compressor
2194 except KeyError:
2194 except KeyError:
2195 raise error.RevlogError(
2195 raise error.RevlogError(
2196 _(b'unknown compression type %s') % binascii.hexlify(t)
2196 _(b'unknown compression type %s') % binascii.hexlify(t)
2197 )
2197 )
2198
2198
2199 return compressor.decompress(data)
2199 return compressor.decompress(data)
2200
2200
2201 def _addrevision(
2201 def _addrevision(
2202 self,
2202 self,
2203 node,
2203 node,
2204 rawtext,
2204 rawtext,
2205 transaction,
2205 transaction,
2206 link,
2206 link,
2207 p1,
2207 p1,
2208 p2,
2208 p2,
2209 flags,
2209 flags,
2210 cachedelta,
2210 cachedelta,
2211 ifh,
2211 ifh,
2212 dfh,
2212 dfh,
2213 alwayscache=False,
2213 alwayscache=False,
2214 deltacomputer=None,
2214 deltacomputer=None,
2215 sidedata=None,
2215 sidedata=None,
2216 ):
2216 ):
2217 """internal function to add revisions to the log
2217 """internal function to add revisions to the log
2218
2218
2219 see addrevision for argument descriptions.
2219 see addrevision for argument descriptions.
2220
2220
2221 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2221 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2222
2222
2223 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2223 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2224 be used.
2224 be used.
2225
2225
2226 invariants:
2226 invariants:
2227 - rawtext is optional (can be None); if not set, cachedelta must be set.
2227 - rawtext is optional (can be None); if not set, cachedelta must be set.
2228 if both are set, they must correspond to each other.
2228 if both are set, they must correspond to each other.
2229 """
2229 """
2230 if node == self.nullid:
2230 if node == self.nullid:
2231 raise error.RevlogError(
2231 raise error.RevlogError(
2232 _(b"%s: attempt to add null revision") % self.display_id
2232 _(b"%s: attempt to add null revision") % self.display_id
2233 )
2233 )
2234 if (
2234 if (
2235 node == self.nodeconstants.wdirid
2235 node == self.nodeconstants.wdirid
2236 or node in self.nodeconstants.wdirfilenodeids
2236 or node in self.nodeconstants.wdirfilenodeids
2237 ):
2237 ):
2238 raise error.RevlogError(
2238 raise error.RevlogError(
2239 _(b"%s: attempt to add wdir revision") % self.display_id
2239 _(b"%s: attempt to add wdir revision") % self.display_id
2240 )
2240 )
2241
2241
2242 if self._inline:
2242 if self._inline:
2243 fh = ifh
2243 fh = ifh
2244 else:
2244 else:
2245 fh = dfh
2245 fh = dfh
2246
2246
2247 btext = [rawtext]
2247 btext = [rawtext]
2248
2248
2249 curr = len(self)
2249 curr = len(self)
2250 prev = curr - 1
2250 prev = curr - 1
2251
2251
2252 offset = self._get_data_offset(prev)
2252 offset = self._get_data_offset(prev)
2253
2253
2254 if self._concurrencychecker:
2254 if self._concurrencychecker:
2255 if self._inline:
2255 if self._inline:
2256 # offset is "as if" it were in the .d file, so we need to add on
2256 # offset is "as if" it were in the .d file, so we need to add on
2257 # the size of the entry metadata.
2257 # the size of the entry metadata.
2258 self._concurrencychecker(
2258 self._concurrencychecker(
2259 ifh, self._indexfile, offset + curr * self.index.entry_size
2259 ifh, self._indexfile, offset + curr * self.index.entry_size
2260 )
2260 )
2261 else:
2261 else:
2262 # Entries in the .i are a consistent size.
2262 # Entries in the .i are a consistent size.
2263 self._concurrencychecker(
2263 self._concurrencychecker(
2264 ifh, self._indexfile, curr * self.index.entry_size
2264 ifh, self._indexfile, curr * self.index.entry_size
2265 )
2265 )
2266 self._concurrencychecker(dfh, self._datafile, offset)
2266 self._concurrencychecker(dfh, self._datafile, offset)
2267
2267
2268 p1r, p2r = self.rev(p1), self.rev(p2)
2268 p1r, p2r = self.rev(p1), self.rev(p2)
2269
2269
2270 # full versions are inserted when the needed deltas
2270 # full versions are inserted when the needed deltas
2271 # become comparable to the uncompressed text
2271 # become comparable to the uncompressed text
2272 if rawtext is None:
2272 if rawtext is None:
2273 # need rawtext size, before changed by flag processors, which is
2273 # need rawtext size, before changed by flag processors, which is
2274 # the non-raw size. use revlog explicitly to avoid filelog's extra
2274 # the non-raw size. use revlog explicitly to avoid filelog's extra
2275 # logic that might remove metadata size.
2275 # logic that might remove metadata size.
2276 textlen = mdiff.patchedsize(
2276 textlen = mdiff.patchedsize(
2277 revlog.size(self, cachedelta[0]), cachedelta[1]
2277 revlog.size(self, cachedelta[0]), cachedelta[1]
2278 )
2278 )
2279 else:
2279 else:
2280 textlen = len(rawtext)
2280 textlen = len(rawtext)
2281
2281
2282 if deltacomputer is None:
2282 if deltacomputer is None:
2283 deltacomputer = deltautil.deltacomputer(self)
2283 deltacomputer = deltautil.deltacomputer(self)
2284
2284
2285 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2285 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2286
2286
2287 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2287 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2288
2288
2289 if sidedata and self.hassidedata:
2289 if sidedata and self.hassidedata:
2290 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2290 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2291 sidedata_offset = offset + deltainfo.deltalen
2291 sidedata_offset = offset + deltainfo.deltalen
2292 else:
2292 else:
2293 serialized_sidedata = b""
2293 serialized_sidedata = b""
2294 # Don't store the offset if the sidedata is empty, that way
2294 # Don't store the offset if the sidedata is empty, that way
2295 # we can easily detect empty sidedata and they will be no different
2295 # we can easily detect empty sidedata and they will be no different
2296 # than ones we manually add.
2296 # than ones we manually add.
2297 sidedata_offset = 0
2297 sidedata_offset = 0
2298
2298
2299 e = (
2299 e = (
2300 offset_type(offset, flags),
2300 offset_type(offset, flags),
2301 deltainfo.deltalen,
2301 deltainfo.deltalen,
2302 textlen,
2302 textlen,
2303 deltainfo.base,
2303 deltainfo.base,
2304 link,
2304 link,
2305 p1r,
2305 p1r,
2306 p2r,
2306 p2r,
2307 node,
2307 node,
2308 sidedata_offset,
2308 sidedata_offset,
2309 len(serialized_sidedata),
2309 len(serialized_sidedata),
2310 )
2310 )
2311
2311
2312 self.index.append(e)
2312 self.index.append(e)
2313 entry = self.index.entry_binary(curr)
2313 entry = self.index.entry_binary(curr)
2314 if curr == 0:
2314 if curr == 0:
2315 header = self._format_flags | self._format_version
2315 header = self._format_flags | self._format_version
2316 header = self.index.pack_header(header)
2316 header = self.index.pack_header(header)
2317 entry = header + entry
2317 entry = header + entry
2318 self._writeentry(
2318 self._writeentry(
2319 transaction,
2319 transaction,
2320 ifh,
2320 ifh,
2321 dfh,
2321 dfh,
2322 entry,
2322 entry,
2323 deltainfo.data,
2323 deltainfo.data,
2324 link,
2324 link,
2325 offset,
2325 offset,
2326 serialized_sidedata,
2326 serialized_sidedata,
2327 )
2327 )
2328
2328
2329 rawtext = btext[0]
2329 rawtext = btext[0]
2330
2330
2331 if alwayscache and rawtext is None:
2331 if alwayscache and rawtext is None:
2332 rawtext = deltacomputer.buildtext(revinfo, fh)
2332 rawtext = deltacomputer.buildtext(revinfo, fh)
2333
2333
2334 if type(rawtext) == bytes: # only accept immutable objects
2334 if type(rawtext) == bytes: # only accept immutable objects
2335 self._revisioncache = (node, curr, rawtext)
2335 self._revisioncache = (node, curr, rawtext)
2336 self._chainbasecache[curr] = deltainfo.chainbase
2336 self._chainbasecache[curr] = deltainfo.chainbase
2337 return curr
2337 return curr
2338
2338
2339 def _get_data_offset(self, prev):
2339 def _get_data_offset(self, prev):
2340 """Returns the current offset in the (in-transaction) data file.
2340 """Returns the current offset in the (in-transaction) data file.
2341 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2341 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2342 file to store that information: since sidedata can be rewritten to the
2342 file to store that information: since sidedata can be rewritten to the
2343 end of the data file within a transaction, you can have cases where, for
2343 end of the data file within a transaction, you can have cases where, for
2344 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2344 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2345 to `n - 1`'s sidedata being written after `n`'s data.
2345 to `n - 1`'s sidedata being written after `n`'s data.
2346
2346
2347 TODO cache this in a docket file before getting out of experimental."""
2347 TODO cache this in a docket file before getting out of experimental."""
2348 if self._format_version != REVLOGV2:
2348 if self._format_version != REVLOGV2:
2349 return self.end(prev)
2349 return self.end(prev)
2350
2350
2351 offset = 0
2351 offset = 0
2352 for rev, entry in enumerate(self.index):
2352 for rev, entry in enumerate(self.index):
2353 sidedata_end = entry[8] + entry[9]
2353 sidedata_end = entry[8] + entry[9]
2354 # Sidedata for a previous rev has potentially been written after
2354 # Sidedata for a previous rev has potentially been written after
2355 # this rev's end, so take the max.
2355 # this rev's end, so take the max.
2356 offset = max(self.end(rev), offset, sidedata_end)
2356 offset = max(self.end(rev), offset, sidedata_end)
2357 return offset
2357 return offset
2358
2358
2359 def _writeentry(
2359 def _writeentry(
2360 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2360 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2361 ):
2361 ):
2362 # Files opened in a+ mode have inconsistent behavior on various
2362 # Files opened in a+ mode have inconsistent behavior on various
2363 # platforms. Windows requires that a file positioning call be made
2363 # platforms. Windows requires that a file positioning call be made
2364 # when the file handle transitions between reads and writes. See
2364 # when the file handle transitions between reads and writes. See
2365 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2365 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2366 # platforms, Python or the platform itself can be buggy. Some versions
2366 # platforms, Python or the platform itself can be buggy. Some versions
2367 # of Solaris have been observed to not append at the end of the file
2367 # of Solaris have been observed to not append at the end of the file
2368 # if the file was seeked to before the end. See issue4943 for more.
2368 # if the file was seeked to before the end. See issue4943 for more.
2369 #
2369 #
2370 # We work around this issue by inserting a seek() before writing.
2370 # We work around this issue by inserting a seek() before writing.
2371 # Note: This is likely not necessary on Python 3. However, because
2371 # Note: This is likely not necessary on Python 3. However, because
2372 # the file handle is reused for reads and may be seeked there, we need
2372 # the file handle is reused for reads and may be seeked there, we need
2373 # to be careful before changing this.
2373 # to be careful before changing this.
2374 ifh.seek(0, os.SEEK_END)
2374 ifh.seek(0, os.SEEK_END)
2375 if dfh:
2375 if dfh:
2376 dfh.seek(0, os.SEEK_END)
2376 dfh.seek(0, os.SEEK_END)
2377
2377
2378 curr = len(self) - 1
2378 curr = len(self) - 1
2379 if not self._inline:
2379 if not self._inline:
2380 transaction.add(self._datafile, offset)
2380 transaction.add(self._datafile, offset)
2381 transaction.add(self._indexfile, curr * len(entry))
2381 transaction.add(self._indexfile, curr * len(entry))
2382 if data[0]:
2382 if data[0]:
2383 dfh.write(data[0])
2383 dfh.write(data[0])
2384 dfh.write(data[1])
2384 dfh.write(data[1])
2385 if sidedata:
2385 if sidedata:
2386 dfh.write(sidedata)
2386 dfh.write(sidedata)
2387 ifh.write(entry)
2387 ifh.write(entry)
2388 else:
2388 else:
2389 offset += curr * self.index.entry_size
2389 offset += curr * self.index.entry_size
2390 transaction.add(self._indexfile, offset)
2390 transaction.add(self._indexfile, offset)
2391 ifh.write(entry)
2391 ifh.write(entry)
2392 ifh.write(data[0])
2392 ifh.write(data[0])
2393 ifh.write(data[1])
2393 ifh.write(data[1])
2394 if sidedata:
2394 if sidedata:
2395 ifh.write(sidedata)
2395 ifh.write(sidedata)
2396 self._enforceinlinesize(transaction, ifh)
2396 self._enforceinlinesize(transaction, ifh)
2397 nodemaputil.setup_persistent_nodemap(transaction, self)
2397 nodemaputil.setup_persistent_nodemap(transaction, self)
2398
2398
2399 def addgroup(
2399 def addgroup(
2400 self,
2400 self,
2401 deltas,
2401 deltas,
2402 linkmapper,
2402 linkmapper,
2403 transaction,
2403 transaction,
2404 alwayscache=False,
2404 alwayscache=False,
2405 addrevisioncb=None,
2405 addrevisioncb=None,
2406 duplicaterevisioncb=None,
2406 duplicaterevisioncb=None,
2407 ):
2407 ):
2408 """
2408 """
2409 add a delta group
2409 add a delta group
2410
2410
2411 given a set of deltas, add them to the revision log. the
2411 given a set of deltas, add them to the revision log. the
2412 first delta is against its parent, which should be in our
2412 first delta is against its parent, which should be in our
2413 log, the rest are against the previous delta.
2413 log, the rest are against the previous delta.
2414
2414
2415 If ``addrevisioncb`` is defined, it will be called with arguments of
2415 If ``addrevisioncb`` is defined, it will be called with arguments of
2416 this revlog and the node that was added.
2416 this revlog and the node that was added.
2417 """
2417 """
2418
2418
2419 if self._writinghandles:
2419 if self._writinghandles:
2420 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2420 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2421
2421
2422 r = len(self)
2422 r = len(self)
2423 end = 0
2423 end = 0
2424 if r:
2424 if r:
2425 end = self.end(r - 1)
2425 end = self.end(r - 1)
2426 ifh = self._indexfp(b"a+")
2426 ifh = self._indexfp(b"a+")
2427 isize = r * self.index.entry_size
2427 isize = r * self.index.entry_size
2428 if self._inline:
2428 if self._inline:
2429 transaction.add(self._indexfile, end + isize)
2429 transaction.add(self._indexfile, end + isize)
2430 dfh = None
2430 dfh = None
2431 else:
2431 else:
2432 transaction.add(self._indexfile, isize)
2432 transaction.add(self._indexfile, isize)
2433 transaction.add(self._datafile, end)
2433 transaction.add(self._datafile, end)
2434 dfh = self._datafp(b"a+")
2434 dfh = self._datafp(b"a+")
2435
2435
2436 def flush():
2436 def flush():
2437 if dfh:
2437 if dfh:
2438 dfh.flush()
2438 dfh.flush()
2439 ifh.flush()
2439 ifh.flush()
2440
2440
2441 self._writinghandles = (ifh, dfh)
2441 self._writinghandles = (ifh, dfh)
2442 empty = True
2442 empty = True
2443
2443
2444 try:
2444 try:
2445 deltacomputer = deltautil.deltacomputer(self)
2445 deltacomputer = deltautil.deltacomputer(self)
2446 # loop through our set of deltas
2446 # loop through our set of deltas
2447 for data in deltas:
2447 for data in deltas:
2448 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2448 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2449 link = linkmapper(linknode)
2449 link = linkmapper(linknode)
2450 flags = flags or REVIDX_DEFAULT_FLAGS
2450 flags = flags or REVIDX_DEFAULT_FLAGS
2451
2451
2452 rev = self.index.get_rev(node)
2452 rev = self.index.get_rev(node)
2453 if rev is not None:
2453 if rev is not None:
2454 # this can happen if two branches make the same change
2454 # this can happen if two branches make the same change
2455 self._nodeduplicatecallback(transaction, rev)
2455 self._nodeduplicatecallback(transaction, rev)
2456 if duplicaterevisioncb:
2456 if duplicaterevisioncb:
2457 duplicaterevisioncb(self, rev)
2457 duplicaterevisioncb(self, rev)
2458 empty = False
2458 empty = False
2459 continue
2459 continue
2460
2460
2461 for p in (p1, p2):
2461 for p in (p1, p2):
2462 if not self.index.has_node(p):
2462 if not self.index.has_node(p):
2463 raise error.LookupError(
2463 raise error.LookupError(
2464 p, self.radix, _(b'unknown parent')
2464 p, self.radix, _(b'unknown parent')
2465 )
2465 )
2466
2466
2467 if not self.index.has_node(deltabase):
2467 if not self.index.has_node(deltabase):
2468 raise error.LookupError(
2468 raise error.LookupError(
2469 deltabase, self.display_id, _(b'unknown delta base')
2469 deltabase, self.display_id, _(b'unknown delta base')
2470 )
2470 )
2471
2471
2472 baserev = self.rev(deltabase)
2472 baserev = self.rev(deltabase)
2473
2473
2474 if baserev != nullrev and self.iscensored(baserev):
2474 if baserev != nullrev and self.iscensored(baserev):
2475 # if base is censored, delta must be full replacement in a
2475 # if base is censored, delta must be full replacement in a
2476 # single patch operation
2476 # single patch operation
2477 hlen = struct.calcsize(b">lll")
2477 hlen = struct.calcsize(b">lll")
2478 oldlen = self.rawsize(baserev)
2478 oldlen = self.rawsize(baserev)
2479 newlen = len(delta) - hlen
2479 newlen = len(delta) - hlen
2480 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2480 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2481 raise error.CensoredBaseError(
2481 raise error.CensoredBaseError(
2482 self.display_id, self.node(baserev)
2482 self.display_id, self.node(baserev)
2483 )
2483 )
2484
2484
2485 if not flags and self._peek_iscensored(baserev, delta, flush):
2485 if not flags and self._peek_iscensored(baserev, delta, flush):
2486 flags |= REVIDX_ISCENSORED
2486 flags |= REVIDX_ISCENSORED
2487
2487
2488 # We assume consumers of addrevisioncb will want to retrieve
2488 # We assume consumers of addrevisioncb will want to retrieve
2489 # the added revision, which will require a call to
2489 # the added revision, which will require a call to
2490 # revision(). revision() will fast path if there is a cache
2490 # revision(). revision() will fast path if there is a cache
2491 # hit. So, we tell _addrevision() to always cache in this case.
2491 # hit. So, we tell _addrevision() to always cache in this case.
2492 # We're only using addgroup() in the context of changegroup
2492 # We're only using addgroup() in the context of changegroup
2493 # generation so the revision data can always be handled as raw
2493 # generation so the revision data can always be handled as raw
2494 # by the flagprocessor.
2494 # by the flagprocessor.
2495 rev = self._addrevision(
2495 rev = self._addrevision(
2496 node,
2496 node,
2497 None,
2497 None,
2498 transaction,
2498 transaction,
2499 link,
2499 link,
2500 p1,
2500 p1,
2501 p2,
2501 p2,
2502 flags,
2502 flags,
2503 (baserev, delta),
2503 (baserev, delta),
2504 ifh,
2504 ifh,
2505 dfh,
2505 dfh,
2506 alwayscache=alwayscache,
2506 alwayscache=alwayscache,
2507 deltacomputer=deltacomputer,
2507 deltacomputer=deltacomputer,
2508 sidedata=sidedata,
2508 sidedata=sidedata,
2509 )
2509 )
2510
2510
2511 if addrevisioncb:
2511 if addrevisioncb:
2512 addrevisioncb(self, rev)
2512 addrevisioncb(self, rev)
2513 empty = False
2513 empty = False
2514
2514
2515 if not dfh and not self._inline:
2515 if not dfh and not self._inline:
2516 # addrevision switched from inline to conventional
2516 # addrevision switched from inline to conventional
2517 # reopen the index
2517 # reopen the index
2518 ifh.close()
2518 ifh.close()
2519 dfh = self._datafp(b"a+")
2519 dfh = self._datafp(b"a+")
2520 ifh = self._indexfp(b"a+")
2520 ifh = self._indexfp(b"a+")
2521 self._writinghandles = (ifh, dfh)
2521 self._writinghandles = (ifh, dfh)
2522 finally:
2522 finally:
2523 self._writinghandles = None
2523 self._writinghandles = None
2524
2524
2525 if dfh:
2525 if dfh:
2526 dfh.close()
2526 dfh.close()
2527 ifh.close()
2527 ifh.close()
2528 return not empty
2528 return not empty
2529
2529
2530 def iscensored(self, rev):
2530 def iscensored(self, rev):
2531 """Check if a file revision is censored."""
2531 """Check if a file revision is censored."""
2532 if not self._censorable:
2532 if not self._censorable:
2533 return False
2533 return False
2534
2534
2535 return self.flags(rev) & REVIDX_ISCENSORED
2535 return self.flags(rev) & REVIDX_ISCENSORED
2536
2536
2537 def _peek_iscensored(self, baserev, delta, flush):
2537 def _peek_iscensored(self, baserev, delta, flush):
2538 """Quickly check if a delta produces a censored revision."""
2538 """Quickly check if a delta produces a censored revision."""
2539 if not self._censorable:
2539 if not self._censorable:
2540 return False
2540 return False
2541
2541
2542 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2542 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2543
2543
2544 def getstrippoint(self, minlink):
2544 def getstrippoint(self, minlink):
2545 """find the minimum rev that must be stripped to strip the linkrev
2545 """find the minimum rev that must be stripped to strip the linkrev
2546
2546
2547 Returns a tuple containing the minimum rev and a set of all revs that
2547 Returns a tuple containing the minimum rev and a set of all revs that
2548 have linkrevs that will be broken by this strip.
2548 have linkrevs that will be broken by this strip.
2549 """
2549 """
2550 return storageutil.resolvestripinfo(
2550 return storageutil.resolvestripinfo(
2551 minlink,
2551 minlink,
2552 len(self) - 1,
2552 len(self) - 1,
2553 self.headrevs(),
2553 self.headrevs(),
2554 self.linkrev,
2554 self.linkrev,
2555 self.parentrevs,
2555 self.parentrevs,
2556 )
2556 )
2557
2557
2558 def strip(self, minlink, transaction):
2558 def strip(self, minlink, transaction):
2559 """truncate the revlog on the first revision with a linkrev >= minlink
2559 """truncate the revlog on the first revision with a linkrev >= minlink
2560
2560
2561 This function is called when we're stripping revision minlink and
2561 This function is called when we're stripping revision minlink and
2562 its descendants from the repository.
2562 its descendants from the repository.
2563
2563
2564 We have to remove all revisions with linkrev >= minlink, because
2564 We have to remove all revisions with linkrev >= minlink, because
2565 the equivalent changelog revisions will be renumbered after the
2565 the equivalent changelog revisions will be renumbered after the
2566 strip.
2566 strip.
2567
2567
2568 So we truncate the revlog on the first of these revisions, and
2568 So we truncate the revlog on the first of these revisions, and
2569 trust that the caller has saved the revisions that shouldn't be
2569 trust that the caller has saved the revisions that shouldn't be
2570 removed and that it'll re-add them after this truncation.
2570 removed and that it'll re-add them after this truncation.
2571 """
2571 """
2572 if len(self) == 0:
2572 if len(self) == 0:
2573 return
2573 return
2574
2574
2575 rev, _ = self.getstrippoint(minlink)
2575 rev, _ = self.getstrippoint(minlink)
2576 if rev == len(self):
2576 if rev == len(self):
2577 return
2577 return
2578
2578
2579 # first truncate the files on disk
2579 # first truncate the files on disk
2580 end = self.start(rev)
2580 end = self.start(rev)
2581 if not self._inline:
2581 if not self._inline:
2582 transaction.add(self._datafile, end)
2582 transaction.add(self._datafile, end)
2583 end = rev * self.index.entry_size
2583 end = rev * self.index.entry_size
2584 else:
2584 else:
2585 end += rev * self.index.entry_size
2585 end += rev * self.index.entry_size
2586
2586
2587 transaction.add(self._indexfile, end)
2587 transaction.add(self._indexfile, end)
2588
2588
2589 # then reset internal state in memory to forget those revisions
2589 # then reset internal state in memory to forget those revisions
2590 self._revisioncache = None
2590 self._revisioncache = None
2591 self._chaininfocache = util.lrucachedict(500)
2591 self._chaininfocache = util.lrucachedict(500)
2592 self._chunkclear()
2592 self._chunkclear()
2593
2593
2594 del self.index[rev:-1]
2594 del self.index[rev:-1]
2595
2595
2596 def checksize(self):
2596 def checksize(self):
2597 """Check size of index and data files
2597 """Check size of index and data files
2598
2598
2599 return a (dd, di) tuple.
2599 return a (dd, di) tuple.
2600 - dd: extra bytes for the "data" file
2600 - dd: extra bytes for the "data" file
2601 - di: extra bytes for the "index" file
2601 - di: extra bytes for the "index" file
2602
2602
2603 A healthy revlog will return (0, 0).
2603 A healthy revlog will return (0, 0).
2604 """
2604 """
2605 expected = 0
2605 expected = 0
2606 if len(self):
2606 if len(self):
2607 expected = max(0, self.end(len(self) - 1))
2607 expected = max(0, self.end(len(self) - 1))
2608
2608
2609 try:
2609 try:
2610 with self._datafp() as f:
2610 with self._datafp() as f:
2611 f.seek(0, io.SEEK_END)
2611 f.seek(0, io.SEEK_END)
2612 actual = f.tell()
2612 actual = f.tell()
2613 dd = actual - expected
2613 dd = actual - expected
2614 except IOError as inst:
2614 except IOError as inst:
2615 if inst.errno != errno.ENOENT:
2615 if inst.errno != errno.ENOENT:
2616 raise
2616 raise
2617 dd = 0
2617 dd = 0
2618
2618
2619 try:
2619 try:
2620 f = self.opener(self._indexfile)
2620 f = self.opener(self._indexfile)
2621 f.seek(0, io.SEEK_END)
2621 f.seek(0, io.SEEK_END)
2622 actual = f.tell()
2622 actual = f.tell()
2623 f.close()
2623 f.close()
2624 s = self.index.entry_size
2624 s = self.index.entry_size
2625 i = max(0, actual // s)
2625 i = max(0, actual // s)
2626 di = actual - (i * s)
2626 di = actual - (i * s)
2627 if self._inline:
2627 if self._inline:
2628 databytes = 0
2628 databytes = 0
2629 for r in self:
2629 for r in self:
2630 databytes += max(0, self.length(r))
2630 databytes += max(0, self.length(r))
2631 dd = 0
2631 dd = 0
2632 di = actual - len(self) * s - databytes
2632 di = actual - len(self) * s - databytes
2633 except IOError as inst:
2633 except IOError as inst:
2634 if inst.errno != errno.ENOENT:
2634 if inst.errno != errno.ENOENT:
2635 raise
2635 raise
2636 di = 0
2636 di = 0
2637
2637
2638 return (dd, di)
2638 return (dd, di)
2639
2639
2640 def files(self):
2640 def files(self):
2641 res = [self._indexfile]
2641 res = [self._indexfile]
2642 if not self._inline:
2642 if not self._inline:
2643 res.append(self._datafile)
2643 res.append(self._datafile)
2644 return res
2644 return res
2645
2645
2646 def emitrevisions(
2646 def emitrevisions(
2647 self,
2647 self,
2648 nodes,
2648 nodes,
2649 nodesorder=None,
2649 nodesorder=None,
2650 revisiondata=False,
2650 revisiondata=False,
2651 assumehaveparentrevisions=False,
2651 assumehaveparentrevisions=False,
2652 deltamode=repository.CG_DELTAMODE_STD,
2652 deltamode=repository.CG_DELTAMODE_STD,
2653 sidedata_helpers=None,
2653 sidedata_helpers=None,
2654 ):
2654 ):
2655 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2655 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2656 raise error.ProgrammingError(
2656 raise error.ProgrammingError(
2657 b'unhandled value for nodesorder: %s' % nodesorder
2657 b'unhandled value for nodesorder: %s' % nodesorder
2658 )
2658 )
2659
2659
2660 if nodesorder is None and not self._generaldelta:
2660 if nodesorder is None and not self._generaldelta:
2661 nodesorder = b'storage'
2661 nodesorder = b'storage'
2662
2662
2663 if (
2663 if (
2664 not self._storedeltachains
2664 not self._storedeltachains
2665 and deltamode != repository.CG_DELTAMODE_PREV
2665 and deltamode != repository.CG_DELTAMODE_PREV
2666 ):
2666 ):
2667 deltamode = repository.CG_DELTAMODE_FULL
2667 deltamode = repository.CG_DELTAMODE_FULL
2668
2668
2669 return storageutil.emitrevisions(
2669 return storageutil.emitrevisions(
2670 self,
2670 self,
2671 nodes,
2671 nodes,
2672 nodesorder,
2672 nodesorder,
2673 revlogrevisiondelta,
2673 revlogrevisiondelta,
2674 deltaparentfn=self.deltaparent,
2674 deltaparentfn=self.deltaparent,
2675 candeltafn=self.candelta,
2675 candeltafn=self.candelta,
2676 rawsizefn=self.rawsize,
2676 rawsizefn=self.rawsize,
2677 revdifffn=self.revdiff,
2677 revdifffn=self.revdiff,
2678 flagsfn=self.flags,
2678 flagsfn=self.flags,
2679 deltamode=deltamode,
2679 deltamode=deltamode,
2680 revisiondata=revisiondata,
2680 revisiondata=revisiondata,
2681 assumehaveparentrevisions=assumehaveparentrevisions,
2681 assumehaveparentrevisions=assumehaveparentrevisions,
2682 sidedata_helpers=sidedata_helpers,
2682 sidedata_helpers=sidedata_helpers,
2683 )
2683 )
2684
2684
2685 DELTAREUSEALWAYS = b'always'
2685 DELTAREUSEALWAYS = b'always'
2686 DELTAREUSESAMEREVS = b'samerevs'
2686 DELTAREUSESAMEREVS = b'samerevs'
2687 DELTAREUSENEVER = b'never'
2687 DELTAREUSENEVER = b'never'
2688
2688
2689 DELTAREUSEFULLADD = b'fulladd'
2689 DELTAREUSEFULLADD = b'fulladd'
2690
2690
2691 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2691 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2692
2692
2693 def clone(
2693 def clone(
2694 self,
2694 self,
2695 tr,
2695 tr,
2696 destrevlog,
2696 destrevlog,
2697 addrevisioncb=None,
2697 addrevisioncb=None,
2698 deltareuse=DELTAREUSESAMEREVS,
2698 deltareuse=DELTAREUSESAMEREVS,
2699 forcedeltabothparents=None,
2699 forcedeltabothparents=None,
2700 sidedata_helpers=None,
2700 sidedata_helpers=None,
2701 ):
2701 ):
2702 """Copy this revlog to another, possibly with format changes.
2702 """Copy this revlog to another, possibly with format changes.
2703
2703
2704 The destination revlog will contain the same revisions and nodes.
2704 The destination revlog will contain the same revisions and nodes.
2705 However, it may not be bit-for-bit identical due to e.g. delta encoding
2705 However, it may not be bit-for-bit identical due to e.g. delta encoding
2706 differences.
2706 differences.
2707
2707
2708 The ``deltareuse`` argument control how deltas from the existing revlog
2708 The ``deltareuse`` argument control how deltas from the existing revlog
2709 are preserved in the destination revlog. The argument can have the
2709 are preserved in the destination revlog. The argument can have the
2710 following values:
2710 following values:
2711
2711
2712 DELTAREUSEALWAYS
2712 DELTAREUSEALWAYS
2713 Deltas will always be reused (if possible), even if the destination
2713 Deltas will always be reused (if possible), even if the destination
2714 revlog would not select the same revisions for the delta. This is the
2714 revlog would not select the same revisions for the delta. This is the
2715 fastest mode of operation.
2715 fastest mode of operation.
2716 DELTAREUSESAMEREVS
2716 DELTAREUSESAMEREVS
2717 Deltas will be reused if the destination revlog would pick the same
2717 Deltas will be reused if the destination revlog would pick the same
2718 revisions for the delta. This mode strikes a balance between speed
2718 revisions for the delta. This mode strikes a balance between speed
2719 and optimization.
2719 and optimization.
2720 DELTAREUSENEVER
2720 DELTAREUSENEVER
2721 Deltas will never be reused. This is the slowest mode of execution.
2721 Deltas will never be reused. This is the slowest mode of execution.
2722 This mode can be used to recompute deltas (e.g. if the diff/delta
2722 This mode can be used to recompute deltas (e.g. if the diff/delta
2723 algorithm changes).
2723 algorithm changes).
2724 DELTAREUSEFULLADD
2724 DELTAREUSEFULLADD
2725 Revision will be re-added as if their were new content. This is
2725 Revision will be re-added as if their were new content. This is
2726 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2726 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2727 eg: large file detection and handling.
2727 eg: large file detection and handling.
2728
2728
2729 Delta computation can be slow, so the choice of delta reuse policy can
2729 Delta computation can be slow, so the choice of delta reuse policy can
2730 significantly affect run time.
2730 significantly affect run time.
2731
2731
2732 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2732 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2733 two extremes. Deltas will be reused if they are appropriate. But if the
2733 two extremes. Deltas will be reused if they are appropriate. But if the
2734 delta could choose a better revision, it will do so. This means if you
2734 delta could choose a better revision, it will do so. This means if you
2735 are converting a non-generaldelta revlog to a generaldelta revlog,
2735 are converting a non-generaldelta revlog to a generaldelta revlog,
2736 deltas will be recomputed if the delta's parent isn't a parent of the
2736 deltas will be recomputed if the delta's parent isn't a parent of the
2737 revision.
2737 revision.
2738
2738
2739 In addition to the delta policy, the ``forcedeltabothparents``
2739 In addition to the delta policy, the ``forcedeltabothparents``
2740 argument controls whether to force compute deltas against both parents
2740 argument controls whether to force compute deltas against both parents
2741 for merges. By default, the current default is used.
2741 for merges. By default, the current default is used.
2742
2742
2743 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2743 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2744 `sidedata_helpers`.
2744 `sidedata_helpers`.
2745 """
2745 """
2746 if deltareuse not in self.DELTAREUSEALL:
2746 if deltareuse not in self.DELTAREUSEALL:
2747 raise ValueError(
2747 raise ValueError(
2748 _(b'value for deltareuse invalid: %s') % deltareuse
2748 _(b'value for deltareuse invalid: %s') % deltareuse
2749 )
2749 )
2750
2750
2751 if len(destrevlog):
2751 if len(destrevlog):
2752 raise ValueError(_(b'destination revlog is not empty'))
2752 raise ValueError(_(b'destination revlog is not empty'))
2753
2753
2754 if getattr(self, 'filteredrevs', None):
2754 if getattr(self, 'filteredrevs', None):
2755 raise ValueError(_(b'source revlog has filtered revisions'))
2755 raise ValueError(_(b'source revlog has filtered revisions'))
2756 if getattr(destrevlog, 'filteredrevs', None):
2756 if getattr(destrevlog, 'filteredrevs', None):
2757 raise ValueError(_(b'destination revlog has filtered revisions'))
2757 raise ValueError(_(b'destination revlog has filtered revisions'))
2758
2758
2759 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2759 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2760 # if possible.
2760 # if possible.
2761 oldlazydelta = destrevlog._lazydelta
2761 oldlazydelta = destrevlog._lazydelta
2762 oldlazydeltabase = destrevlog._lazydeltabase
2762 oldlazydeltabase = destrevlog._lazydeltabase
2763 oldamd = destrevlog._deltabothparents
2763 oldamd = destrevlog._deltabothparents
2764
2764
2765 try:
2765 try:
2766 if deltareuse == self.DELTAREUSEALWAYS:
2766 if deltareuse == self.DELTAREUSEALWAYS:
2767 destrevlog._lazydeltabase = True
2767 destrevlog._lazydeltabase = True
2768 destrevlog._lazydelta = True
2768 destrevlog._lazydelta = True
2769 elif deltareuse == self.DELTAREUSESAMEREVS:
2769 elif deltareuse == self.DELTAREUSESAMEREVS:
2770 destrevlog._lazydeltabase = False
2770 destrevlog._lazydeltabase = False
2771 destrevlog._lazydelta = True
2771 destrevlog._lazydelta = True
2772 elif deltareuse == self.DELTAREUSENEVER:
2772 elif deltareuse == self.DELTAREUSENEVER:
2773 destrevlog._lazydeltabase = False
2773 destrevlog._lazydeltabase = False
2774 destrevlog._lazydelta = False
2774 destrevlog._lazydelta = False
2775
2775
2776 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2776 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2777
2777
2778 self._clone(
2778 self._clone(
2779 tr,
2779 tr,
2780 destrevlog,
2780 destrevlog,
2781 addrevisioncb,
2781 addrevisioncb,
2782 deltareuse,
2782 deltareuse,
2783 forcedeltabothparents,
2783 forcedeltabothparents,
2784 sidedata_helpers,
2784 sidedata_helpers,
2785 )
2785 )
2786
2786
2787 finally:
2787 finally:
2788 destrevlog._lazydelta = oldlazydelta
2788 destrevlog._lazydelta = oldlazydelta
2789 destrevlog._lazydeltabase = oldlazydeltabase
2789 destrevlog._lazydeltabase = oldlazydeltabase
2790 destrevlog._deltabothparents = oldamd
2790 destrevlog._deltabothparents = oldamd
2791
2791
2792 def _clone(
2792 def _clone(
2793 self,
2793 self,
2794 tr,
2794 tr,
2795 destrevlog,
2795 destrevlog,
2796 addrevisioncb,
2796 addrevisioncb,
2797 deltareuse,
2797 deltareuse,
2798 forcedeltabothparents,
2798 forcedeltabothparents,
2799 sidedata_helpers,
2799 sidedata_helpers,
2800 ):
2800 ):
2801 """perform the core duty of `revlog.clone` after parameter processing"""
2801 """perform the core duty of `revlog.clone` after parameter processing"""
2802 deltacomputer = deltautil.deltacomputer(destrevlog)
2802 deltacomputer = deltautil.deltacomputer(destrevlog)
2803 index = self.index
2803 index = self.index
2804 for rev in self:
2804 for rev in self:
2805 entry = index[rev]
2805 entry = index[rev]
2806
2806
2807 # Some classes override linkrev to take filtered revs into
2807 # Some classes override linkrev to take filtered revs into
2808 # account. Use raw entry from index.
2808 # account. Use raw entry from index.
2809 flags = entry[0] & 0xFFFF
2809 flags = entry[0] & 0xFFFF
2810 linkrev = entry[4]
2810 linkrev = entry[4]
2811 p1 = index[entry[5]][7]
2811 p1 = index[entry[5]][7]
2812 p2 = index[entry[6]][7]
2812 p2 = index[entry[6]][7]
2813 node = entry[7]
2813 node = entry[7]
2814
2814
2815 # (Possibly) reuse the delta from the revlog if allowed and
2815 # (Possibly) reuse the delta from the revlog if allowed and
2816 # the revlog chunk is a delta.
2816 # the revlog chunk is a delta.
2817 cachedelta = None
2817 cachedelta = None
2818 rawtext = None
2818 rawtext = None
2819 if deltareuse == self.DELTAREUSEFULLADD:
2819 if deltareuse == self.DELTAREUSEFULLADD:
2820 text, sidedata = self._revisiondata(rev)
2820 text, sidedata = self._revisiondata(rev)
2821
2821
2822 if sidedata_helpers is not None:
2822 if sidedata_helpers is not None:
2823 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2823 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2824 self, sidedata_helpers, sidedata, rev
2824 self, sidedata_helpers, sidedata, rev
2825 )
2825 )
2826 flags = flags | new_flags[0] & ~new_flags[1]
2826 flags = flags | new_flags[0] & ~new_flags[1]
2827
2827
2828 destrevlog.addrevision(
2828 destrevlog.addrevision(
2829 text,
2829 text,
2830 tr,
2830 tr,
2831 linkrev,
2831 linkrev,
2832 p1,
2832 p1,
2833 p2,
2833 p2,
2834 cachedelta=cachedelta,
2834 cachedelta=cachedelta,
2835 node=node,
2835 node=node,
2836 flags=flags,
2836 flags=flags,
2837 deltacomputer=deltacomputer,
2837 deltacomputer=deltacomputer,
2838 sidedata=sidedata,
2838 sidedata=sidedata,
2839 )
2839 )
2840 else:
2840 else:
2841 if destrevlog._lazydelta:
2841 if destrevlog._lazydelta:
2842 dp = self.deltaparent(rev)
2842 dp = self.deltaparent(rev)
2843 if dp != nullrev:
2843 if dp != nullrev:
2844 cachedelta = (dp, bytes(self._chunk(rev)))
2844 cachedelta = (dp, bytes(self._chunk(rev)))
2845
2845
2846 sidedata = None
2846 sidedata = None
2847 if not cachedelta:
2847 if not cachedelta:
2848 rawtext, sidedata = self._revisiondata(rev)
2848 rawtext, sidedata = self._revisiondata(rev)
2849 if sidedata is None:
2849 if sidedata is None:
2850 sidedata = self.sidedata(rev)
2850 sidedata = self.sidedata(rev)
2851
2851
2852 if sidedata_helpers is not None:
2852 if sidedata_helpers is not None:
2853 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2853 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2854 self, sidedata_helpers, sidedata, rev
2854 self, sidedata_helpers, sidedata, rev
2855 )
2855 )
2856 flags = flags | new_flags[0] & ~new_flags[1]
2856 flags = flags | new_flags[0] & ~new_flags[1]
2857
2857
2858 ifh = destrevlog.opener(
2858 ifh = destrevlog.opener(
2859 destrevlog._indexfile, b'a+', checkambig=False
2859 destrevlog._indexfile, b'a+', checkambig=False
2860 )
2860 )
2861 dfh = None
2861 dfh = None
2862 if not destrevlog._inline:
2862 if not destrevlog._inline:
2863 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2863 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2864 try:
2864 try:
2865 destrevlog._addrevision(
2865 destrevlog._addrevision(
2866 node,
2866 node,
2867 rawtext,
2867 rawtext,
2868 tr,
2868 tr,
2869 linkrev,
2869 linkrev,
2870 p1,
2870 p1,
2871 p2,
2871 p2,
2872 flags,
2872 flags,
2873 cachedelta,
2873 cachedelta,
2874 ifh,
2874 ifh,
2875 dfh,
2875 dfh,
2876 deltacomputer=deltacomputer,
2876 deltacomputer=deltacomputer,
2877 sidedata=sidedata,
2877 sidedata=sidedata,
2878 )
2878 )
2879 finally:
2879 finally:
2880 if dfh:
2880 if dfh:
2881 dfh.close()
2881 dfh.close()
2882 ifh.close()
2882 ifh.close()
2883
2883
2884 if addrevisioncb:
2884 if addrevisioncb:
2885 addrevisioncb(self, rev, node)
2885 addrevisioncb(self, rev, node)
2886
2886
2887 def censorrevision(self, tr, censornode, tombstone=b''):
2887 def censorrevision(self, tr, censornode, tombstone=b''):
2888 if self._format_version == REVLOGV0:
2888 if self._format_version == REVLOGV0:
2889 raise error.RevlogError(
2889 raise error.RevlogError(
2890 _(b'cannot censor with version %d revlogs')
2890 _(b'cannot censor with version %d revlogs')
2891 % self._format_version
2891 % self._format_version
2892 )
2892 )
2893
2893
2894 censorrev = self.rev(censornode)
2894 censorrev = self.rev(censornode)
2895 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2895 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2896
2896
2897 if len(tombstone) > self.rawsize(censorrev):
2897 if len(tombstone) > self.rawsize(censorrev):
2898 raise error.Abort(
2898 raise error.Abort(
2899 _(b'censor tombstone must be no longer than censored data')
2899 _(b'censor tombstone must be no longer than censored data')
2900 )
2900 )
2901
2901
2902 # Rewriting the revlog in place is hard. Our strategy for censoring is
2902 # Rewriting the revlog in place is hard. Our strategy for censoring is
2903 # to create a new revlog, copy all revisions to it, then replace the
2903 # to create a new revlog, copy all revisions to it, then replace the
2904 # revlogs on transaction close.
2904 # revlogs on transaction close.
2905 #
2905 #
2906 # This is a bit dangerous. We could easily have a mismatch of state.
2906 # This is a bit dangerous. We could easily have a mismatch of state.
2907 newrl = revlog(
2907 newrl = revlog(
2908 self.opener,
2908 self.opener,
2909 target=self.target,
2909 target=self.target,
2910 radix=self.radix,
2910 radix=self.radix,
2911 postfix=b'tmpcensored',
2911 postfix=b'tmpcensored',
2912 censorable=True,
2912 censorable=True,
2913 )
2913 )
2914 newrl._format_version = self._format_version
2914 newrl._format_version = self._format_version
2915 newrl._format_flags = self._format_flags
2915 newrl._format_flags = self._format_flags
2916 newrl._generaldelta = self._generaldelta
2916 newrl._generaldelta = self._generaldelta
2917 newrl._parse_index = self._parse_index
2917 newrl._parse_index = self._parse_index
2918
2918
2919 for rev in self.revs():
2919 for rev in self.revs():
2920 node = self.node(rev)
2920 node = self.node(rev)
2921 p1, p2 = self.parents(node)
2921 p1, p2 = self.parents(node)
2922
2922
2923 if rev == censorrev:
2923 if rev == censorrev:
2924 newrl.addrawrevision(
2924 newrl.addrawrevision(
2925 tombstone,
2925 tombstone,
2926 tr,
2926 tr,
2927 self.linkrev(censorrev),
2927 self.linkrev(censorrev),
2928 p1,
2928 p1,
2929 p2,
2929 p2,
2930 censornode,
2930 censornode,
2931 REVIDX_ISCENSORED,
2931 REVIDX_ISCENSORED,
2932 )
2932 )
2933
2933
2934 if newrl.deltaparent(rev) != nullrev:
2934 if newrl.deltaparent(rev) != nullrev:
2935 raise error.Abort(
2935 raise error.Abort(
2936 _(
2936 _(
2937 b'censored revision stored as delta; '
2937 b'censored revision stored as delta; '
2938 b'cannot censor'
2938 b'cannot censor'
2939 ),
2939 ),
2940 hint=_(
2940 hint=_(
2941 b'censoring of revlogs is not '
2941 b'censoring of revlogs is not '
2942 b'fully implemented; please report '
2942 b'fully implemented; please report '
2943 b'this bug'
2943 b'this bug'
2944 ),
2944 ),
2945 )
2945 )
2946 continue
2946 continue
2947
2947
2948 if self.iscensored(rev):
2948 if self.iscensored(rev):
2949 if self.deltaparent(rev) != nullrev:
2949 if self.deltaparent(rev) != nullrev:
2950 raise error.Abort(
2950 raise error.Abort(
2951 _(
2951 _(
2952 b'cannot censor due to censored '
2952 b'cannot censor due to censored '
2953 b'revision having delta stored'
2953 b'revision having delta stored'
2954 )
2954 )
2955 )
2955 )
2956 rawtext = self._chunk(rev)
2956 rawtext = self._chunk(rev)
2957 else:
2957 else:
2958 rawtext = self.rawdata(rev)
2958 rawtext = self.rawdata(rev)
2959
2959
2960 newrl.addrawrevision(
2960 newrl.addrawrevision(
2961 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2961 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2962 )
2962 )
2963
2963
2964 tr.addbackup(self._indexfile, location=b'store')
2964 tr.addbackup(self._indexfile, location=b'store')
2965 if not self._inline:
2965 if not self._inline:
2966 tr.addbackup(self._datafile, location=b'store')
2966 tr.addbackup(self._datafile, location=b'store')
2967
2967
2968 self.opener.rename(newrl._indexfile, self._indexfile)
2968 self.opener.rename(newrl._indexfile, self._indexfile)
2969 if not self._inline:
2969 if not self._inline:
2970 self.opener.rename(newrl._datafile, self._datafile)
2970 self.opener.rename(newrl._datafile, self._datafile)
2971
2971
2972 self.clearcaches()
2972 self.clearcaches()
2973 self._loadindex()
2973 self._loadindex()
2974
2974
2975 def verifyintegrity(self, state):
2975 def verifyintegrity(self, state):
2976 """Verifies the integrity of the revlog.
2976 """Verifies the integrity of the revlog.
2977
2977
2978 Yields ``revlogproblem`` instances describing problems that are
2978 Yields ``revlogproblem`` instances describing problems that are
2979 found.
2979 found.
2980 """
2980 """
2981 dd, di = self.checksize()
2981 dd, di = self.checksize()
2982 if dd:
2982 if dd:
2983 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2983 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2984 if di:
2984 if di:
2985 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2985 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2986
2986
2987 version = self._format_version
2987 version = self._format_version
2988
2988
2989 # The verifier tells us what version revlog we should be.
2989 # The verifier tells us what version revlog we should be.
2990 if version != state[b'expectedversion']:
2990 if version != state[b'expectedversion']:
2991 yield revlogproblem(
2991 yield revlogproblem(
2992 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2992 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2993 % (self.display_id, version, state[b'expectedversion'])
2993 % (self.display_id, version, state[b'expectedversion'])
2994 )
2994 )
2995
2995
2996 state[b'skipread'] = set()
2996 state[b'skipread'] = set()
2997 state[b'safe_renamed'] = set()
2997 state[b'safe_renamed'] = set()
2998
2998
2999 for rev in self:
2999 for rev in self:
3000 node = self.node(rev)
3000 node = self.node(rev)
3001
3001
3002 # Verify contents. 4 cases to care about:
3002 # Verify contents. 4 cases to care about:
3003 #
3003 #
3004 # common: the most common case
3004 # common: the most common case
3005 # rename: with a rename
3005 # rename: with a rename
3006 # meta: file content starts with b'\1\n', the metadata
3006 # meta: file content starts with b'\1\n', the metadata
3007 # header defined in filelog.py, but without a rename
3007 # header defined in filelog.py, but without a rename
3008 # ext: content stored externally
3008 # ext: content stored externally
3009 #
3009 #
3010 # More formally, their differences are shown below:
3010 # More formally, their differences are shown below:
3011 #
3011 #
3012 # | common | rename | meta | ext
3012 # | common | rename | meta | ext
3013 # -------------------------------------------------------
3013 # -------------------------------------------------------
3014 # flags() | 0 | 0 | 0 | not 0
3014 # flags() | 0 | 0 | 0 | not 0
3015 # renamed() | False | True | False | ?
3015 # renamed() | False | True | False | ?
3016 # rawtext[0:2]=='\1\n'| False | True | True | ?
3016 # rawtext[0:2]=='\1\n'| False | True | True | ?
3017 #
3017 #
3018 # "rawtext" means the raw text stored in revlog data, which
3018 # "rawtext" means the raw text stored in revlog data, which
3019 # could be retrieved by "rawdata(rev)". "text"
3019 # could be retrieved by "rawdata(rev)". "text"
3020 # mentioned below is "revision(rev)".
3020 # mentioned below is "revision(rev)".
3021 #
3021 #
3022 # There are 3 different lengths stored physically:
3022 # There are 3 different lengths stored physically:
3023 # 1. L1: rawsize, stored in revlog index
3023 # 1. L1: rawsize, stored in revlog index
3024 # 2. L2: len(rawtext), stored in revlog data
3024 # 2. L2: len(rawtext), stored in revlog data
3025 # 3. L3: len(text), stored in revlog data if flags==0, or
3025 # 3. L3: len(text), stored in revlog data if flags==0, or
3026 # possibly somewhere else if flags!=0
3026 # possibly somewhere else if flags!=0
3027 #
3027 #
3028 # L1 should be equal to L2. L3 could be different from them.
3028 # L1 should be equal to L2. L3 could be different from them.
3029 # "text" may or may not affect commit hash depending on flag
3029 # "text" may or may not affect commit hash depending on flag
3030 # processors (see flagutil.addflagprocessor).
3030 # processors (see flagutil.addflagprocessor).
3031 #
3031 #
3032 # | common | rename | meta | ext
3032 # | common | rename | meta | ext
3033 # -------------------------------------------------
3033 # -------------------------------------------------
3034 # rawsize() | L1 | L1 | L1 | L1
3034 # rawsize() | L1 | L1 | L1 | L1
3035 # size() | L1 | L2-LM | L1(*) | L1 (?)
3035 # size() | L1 | L2-LM | L1(*) | L1 (?)
3036 # len(rawtext) | L2 | L2 | L2 | L2
3036 # len(rawtext) | L2 | L2 | L2 | L2
3037 # len(text) | L2 | L2 | L2 | L3
3037 # len(text) | L2 | L2 | L2 | L3
3038 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3038 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3039 #
3039 #
3040 # LM: length of metadata, depending on rawtext
3040 # LM: length of metadata, depending on rawtext
3041 # (*): not ideal, see comment in filelog.size
3041 # (*): not ideal, see comment in filelog.size
3042 # (?): could be "- len(meta)" if the resolved content has
3042 # (?): could be "- len(meta)" if the resolved content has
3043 # rename metadata
3043 # rename metadata
3044 #
3044 #
3045 # Checks needed to be done:
3045 # Checks needed to be done:
3046 # 1. length check: L1 == L2, in all cases.
3046 # 1. length check: L1 == L2, in all cases.
3047 # 2. hash check: depending on flag processor, we may need to
3047 # 2. hash check: depending on flag processor, we may need to
3048 # use either "text" (external), or "rawtext" (in revlog).
3048 # use either "text" (external), or "rawtext" (in revlog).
3049
3049
3050 try:
3050 try:
3051 skipflags = state.get(b'skipflags', 0)
3051 skipflags = state.get(b'skipflags', 0)
3052 if skipflags:
3052 if skipflags:
3053 skipflags &= self.flags(rev)
3053 skipflags &= self.flags(rev)
3054
3054
3055 _verify_revision(self, skipflags, state, node)
3055 _verify_revision(self, skipflags, state, node)
3056
3056
3057 l1 = self.rawsize(rev)
3057 l1 = self.rawsize(rev)
3058 l2 = len(self.rawdata(node))
3058 l2 = len(self.rawdata(node))
3059
3059
3060 if l1 != l2:
3060 if l1 != l2:
3061 yield revlogproblem(
3061 yield revlogproblem(
3062 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3062 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3063 node=node,
3063 node=node,
3064 )
3064 )
3065
3065
3066 except error.CensoredNodeError:
3066 except error.CensoredNodeError:
3067 if state[b'erroroncensored']:
3067 if state[b'erroroncensored']:
3068 yield revlogproblem(
3068 yield revlogproblem(
3069 error=_(b'censored file data'), node=node
3069 error=_(b'censored file data'), node=node
3070 )
3070 )
3071 state[b'skipread'].add(node)
3071 state[b'skipread'].add(node)
3072 except Exception as e:
3072 except Exception as e:
3073 yield revlogproblem(
3073 yield revlogproblem(
3074 error=_(b'unpacking %s: %s')
3074 error=_(b'unpacking %s: %s')
3075 % (short(node), stringutil.forcebytestr(e)),
3075 % (short(node), stringutil.forcebytestr(e)),
3076 node=node,
3076 node=node,
3077 )
3077 )
3078 state[b'skipread'].add(node)
3078 state[b'skipread'].add(node)
3079
3079
3080 def storageinfo(
3080 def storageinfo(
3081 self,
3081 self,
3082 exclusivefiles=False,
3082 exclusivefiles=False,
3083 sharedfiles=False,
3083 sharedfiles=False,
3084 revisionscount=False,
3084 revisionscount=False,
3085 trackedsize=False,
3085 trackedsize=False,
3086 storedsize=False,
3086 storedsize=False,
3087 ):
3087 ):
3088 d = {}
3088 d = {}
3089
3089
3090 if exclusivefiles:
3090 if exclusivefiles:
3091 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3091 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3092 if not self._inline:
3092 if not self._inline:
3093 d[b'exclusivefiles'].append((self.opener, self._datafile))
3093 d[b'exclusivefiles'].append((self.opener, self._datafile))
3094
3094
3095 if sharedfiles:
3095 if sharedfiles:
3096 d[b'sharedfiles'] = []
3096 d[b'sharedfiles'] = []
3097
3097
3098 if revisionscount:
3098 if revisionscount:
3099 d[b'revisionscount'] = len(self)
3099 d[b'revisionscount'] = len(self)
3100
3100
3101 if trackedsize:
3101 if trackedsize:
3102 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3102 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3103
3103
3104 if storedsize:
3104 if storedsize:
3105 d[b'storedsize'] = sum(
3105 d[b'storedsize'] = sum(
3106 self.opener.stat(path).st_size for path in self.files()
3106 self.opener.stat(path).st_size for path in self.files()
3107 )
3107 )
3108
3108
3109 return d
3109 return d
3110
3110
3111 def rewrite_sidedata(self, helpers, startrev, endrev):
3111 def rewrite_sidedata(self, helpers, startrev, endrev):
3112 if not self.hassidedata:
3112 if not self.hassidedata:
3113 return
3113 return
3114 # inline are not yet supported because they suffer from an issue when
3114 # inline are not yet supported because they suffer from an issue when
3115 # rewriting them (since it's not an append-only operation).
3115 # rewriting them (since it's not an append-only operation).
3116 # See issue6485.
3116 # See issue6485.
3117 assert not self._inline
3117 assert not self._inline
3118 if not helpers[1] and not helpers[2]:
3118 if not helpers[1] and not helpers[2]:
3119 # Nothing to generate or remove
3119 # Nothing to generate or remove
3120 return
3120 return
3121
3121
3122 # changelog implement some "delayed" writing mechanism that assume that
3122 # changelog implement some "delayed" writing mechanism that assume that
3123 # all index data is writen in append mode and is therefor incompatible
3123 # all index data is writen in append mode and is therefor incompatible
3124 # with the seeked write done in this method. The use of such "delayed"
3124 # with the seeked write done in this method. The use of such "delayed"
3125 # writing will soon be removed for revlog version that support side
3125 # writing will soon be removed for revlog version that support side
3126 # data, so for now, we only keep this simple assert to highlight the
3126 # data, so for now, we only keep this simple assert to highlight the
3127 # situation.
3127 # situation.
3128 delayed = getattr(self, '_delayed', False)
3128 delayed = getattr(self, '_delayed', False)
3129 diverted = getattr(self, '_divert', False)
3129 diverted = getattr(self, '_divert', False)
3130 if delayed and not diverted:
3130 if delayed and not diverted:
3131 msg = "cannot rewrite_sidedata of a delayed revlog"
3131 msg = "cannot rewrite_sidedata of a delayed revlog"
3132 raise error.ProgrammingError(msg)
3132 raise error.ProgrammingError(msg)
3133
3133
3134 new_entries = []
3134 new_entries = []
3135 # append the new sidedata
3135 # append the new sidedata
3136 with self._datafp(b'a+') as fp:
3136 with self._datafp(b'a+') as fp:
3137 # Maybe this bug still exists, see revlog._writeentry
3137 # Maybe this bug still exists, see revlog._writeentry
3138 fp.seek(0, os.SEEK_END)
3138 fp.seek(0, os.SEEK_END)
3139 current_offset = fp.tell()
3139 current_offset = fp.tell()
3140 for rev in range(startrev, endrev + 1):
3140 for rev in range(startrev, endrev + 1):
3141 entry = self.index[rev]
3141 entry = self.index[rev]
3142 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3142 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3143 store=self,
3143 store=self,
3144 sidedata_helpers=helpers,
3144 sidedata_helpers=helpers,
3145 sidedata={},
3145 sidedata={},
3146 rev=rev,
3146 rev=rev,
3147 )
3147 )
3148
3148
3149 serialized_sidedata = sidedatautil.serialize_sidedata(
3149 serialized_sidedata = sidedatautil.serialize_sidedata(
3150 new_sidedata
3150 new_sidedata
3151 )
3151 )
3152 if entry[8] != 0 or entry[9] != 0:
3152 if entry[8] != 0 or entry[9] != 0:
3153 # rewriting entries that already have sidedata is not
3153 # rewriting entries that already have sidedata is not
3154 # supported yet, because it introduces garbage data in the
3154 # supported yet, because it introduces garbage data in the
3155 # revlog.
3155 # revlog.
3156 msg = b"Rewriting existing sidedata is not supported yet"
3156 msg = b"Rewriting existing sidedata is not supported yet"
3157 raise error.Abort(msg)
3157 raise error.Abort(msg)
3158
3158
3159 # Apply (potential) flags to add and to remove after running
3159 # Apply (potential) flags to add and to remove after running
3160 # the sidedata helpers
3160 # the sidedata helpers
3161 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3161 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3162 entry = (new_offset_flags,) + entry[1:8]
3162 entry = (new_offset_flags,) + entry[1:8]
3163 entry += (current_offset, len(serialized_sidedata))
3163 entry += (current_offset, len(serialized_sidedata))
3164
3164
3165 fp.write(serialized_sidedata)
3165 fp.write(serialized_sidedata)
3166 new_entries.append(entry)
3166 new_entries.append(entry)
3167 current_offset += len(serialized_sidedata)
3167 current_offset += len(serialized_sidedata)
3168
3168
3169 # rewrite the new index entries
3169 # rewrite the new index entries
3170 with self._indexfp(b'r+') as fp:
3170 with self._indexfp(b'r+') as fp:
3171 fp.seek(startrev * self.index.entry_size)
3171 fp.seek(startrev * self.index.entry_size)
3172 for i, e in enumerate(new_entries):
3172 for i, e in enumerate(new_entries):
3173 rev = startrev + i
3173 rev = startrev + i
3174 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3174 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3175 packed = self.index.entry_binary(rev)
3175 packed = self.index.entry_binary(rev)
3176 if rev == 0:
3176 if rev == 0:
3177 header = self._format_flags | self._format_version
3177 header = self._format_flags | self._format_version
3178 header = self.index.pack_header(header)
3178 header = self.index.pack_header(header)
3179 packed = header + packed
3179 packed = header + packed
3180 fp.write(packed)
3180 fp.write(packed)
@@ -1,656 +1,656 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from ..node import hex
16 from ..node import hex
17
17
18 from .. import (
18 from .. import (
19 error,
19 error,
20 util,
20 util,
21 )
21 )
22
22
23
23
24 class NodeMap(dict):
24 class NodeMap(dict):
25 def __missing__(self, x):
25 def __missing__(self, x):
26 raise error.RevlogError(b'unknown node: %s' % x)
26 raise error.RevlogError(b'unknown node: %s' % x)
27
27
28
28
29 def persisted_data(revlog):
29 def persisted_data(revlog):
30 """read the nodemap for a revlog from disk"""
30 """read the nodemap for a revlog from disk"""
31 if revlog.nodemap_file is None:
31 if revlog._nodemap_file is None:
32 return None
32 return None
33 pdata = revlog.opener.tryread(revlog.nodemap_file)
33 pdata = revlog.opener.tryread(revlog._nodemap_file)
34 if not pdata:
34 if not pdata:
35 return None
35 return None
36 offset = 0
36 offset = 0
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 if version != ONDISK_VERSION:
38 if version != ONDISK_VERSION:
39 return None
39 return None
40 offset += S_VERSION.size
40 offset += S_VERSION.size
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 offset += S_HEADER.size
43 offset += S_HEADER.size
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 offset += uid_size
45 offset += uid_size
46 docket.tip_rev = tip_rev
46 docket.tip_rev = tip_rev
47 docket.tip_node = pdata[offset : offset + tip_node_size]
47 docket.tip_node = pdata[offset : offset + tip_node_size]
48 docket.data_length = data_length
48 docket.data_length = data_length
49 docket.data_unused = data_unused
49 docket.data_unused = data_unused
50
50
51 filename = _rawdata_filepath(revlog, docket)
51 filename = _rawdata_filepath(revlog, docket)
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 try:
53 try:
54 with revlog.opener(filename) as fd:
54 with revlog.opener(filename) as fd:
55 if use_mmap:
55 if use_mmap:
56 try:
56 try:
57 data = util.buffer(util.mmapread(fd, data_length))
57 data = util.buffer(util.mmapread(fd, data_length))
58 except ValueError:
58 except ValueError:
59 # raised when the read file is too small
59 # raised when the read file is too small
60 data = b''
60 data = b''
61 else:
61 else:
62 data = fd.read(data_length)
62 data = fd.read(data_length)
63 except (IOError, OSError) as e:
63 except (IOError, OSError) as e:
64 if e.errno == errno.ENOENT:
64 if e.errno == errno.ENOENT:
65 return None
65 return None
66 else:
66 else:
67 raise
67 raise
68 if len(data) < data_length:
68 if len(data) < data_length:
69 return None
69 return None
70 return docket, data
70 return docket, data
71
71
72
72
73 def setup_persistent_nodemap(tr, revlog):
73 def setup_persistent_nodemap(tr, revlog):
74 """Install whatever is needed transaction side to persist a nodemap on disk
74 """Install whatever is needed transaction side to persist a nodemap on disk
75
75
76 (only actually persist the nodemap if this is relevant for this revlog)
76 (only actually persist the nodemap if this is relevant for this revlog)
77 """
77 """
78 if revlog._inline:
78 if revlog._inline:
79 return # inlined revlog are too small for this to be relevant
79 return # inlined revlog are too small for this to be relevant
80 if revlog.nodemap_file is None:
80 if revlog._nodemap_file is None:
81 return # we do not use persistent_nodemap on this revlog
81 return # we do not use persistent_nodemap on this revlog
82
82
83 # we need to happen after the changelog finalization, in that use "cl-"
83 # we need to happen after the changelog finalization, in that use "cl-"
84 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
84 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
85 if tr.hasfinalize(callback_id):
85 if tr.hasfinalize(callback_id):
86 return # no need to register again
86 return # no need to register again
87 tr.addpending(
87 tr.addpending(
88 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
88 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
89 )
89 )
90 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
90 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
91
91
92
92
93 class _NoTransaction(object):
93 class _NoTransaction(object):
94 """transaction like object to update the nodemap outside a transaction"""
94 """transaction like object to update the nodemap outside a transaction"""
95
95
96 def __init__(self):
96 def __init__(self):
97 self._postclose = {}
97 self._postclose = {}
98
98
99 def addpostclose(self, callback_id, callback_func):
99 def addpostclose(self, callback_id, callback_func):
100 self._postclose[callback_id] = callback_func
100 self._postclose[callback_id] = callback_func
101
101
102 def registertmp(self, *args, **kwargs):
102 def registertmp(self, *args, **kwargs):
103 pass
103 pass
104
104
105 def addbackup(self, *args, **kwargs):
105 def addbackup(self, *args, **kwargs):
106 pass
106 pass
107
107
108 def add(self, *args, **kwargs):
108 def add(self, *args, **kwargs):
109 pass
109 pass
110
110
111 def addabort(self, *args, **kwargs):
111 def addabort(self, *args, **kwargs):
112 pass
112 pass
113
113
114 def _report(self, *args):
114 def _report(self, *args):
115 pass
115 pass
116
116
117
117
118 def update_persistent_nodemap(revlog):
118 def update_persistent_nodemap(revlog):
119 """update the persistent nodemap right now
119 """update the persistent nodemap right now
120
120
121 To be used for updating the nodemap on disk outside of a normal transaction
121 To be used for updating the nodemap on disk outside of a normal transaction
122 setup (eg, `debugupdatecache`).
122 setup (eg, `debugupdatecache`).
123 """
123 """
124 if revlog._inline:
124 if revlog._inline:
125 return # inlined revlog are too small for this to be relevant
125 return # inlined revlog are too small for this to be relevant
126 if revlog.nodemap_file is None:
126 if revlog._nodemap_file is None:
127 return # we do not use persistent_nodemap on this revlog
127 return # we do not use persistent_nodemap on this revlog
128
128
129 notr = _NoTransaction()
129 notr = _NoTransaction()
130 persist_nodemap(notr, revlog)
130 persist_nodemap(notr, revlog)
131 for k in sorted(notr._postclose):
131 for k in sorted(notr._postclose):
132 notr._postclose[k](None)
132 notr._postclose[k](None)
133
133
134
134
135 def delete_nodemap(tr, repo, revlog):
135 def delete_nodemap(tr, repo, revlog):
136 """Delete nodemap data on disk for a given revlog"""
136 """ Delete nodemap data on disk for a given revlog"""
137 if revlog.nodemap_file is None:
137 if revlog._nodemap_file is None:
138 msg = "calling persist nodemap on a revlog without the feature enabled"
138 msg = "calling persist nodemap on a revlog without the feature enabled"
139 raise error.ProgrammingError(msg)
139 raise error.ProgrammingError(msg)
140 repo.svfs.unlink(revlog.nodemap_file)
140 repo.svfs.unlink(revlog._nodemap_file)
141
141
142
142
143 def persist_nodemap(tr, revlog, pending=False, force=False):
143 def persist_nodemap(tr, revlog, pending=False, force=False):
144 """Write nodemap data on disk for a given revlog"""
144 """Write nodemap data on disk for a given revlog"""
145 if getattr(revlog, 'filteredrevs', ()):
145 if getattr(revlog, 'filteredrevs', ()):
146 raise error.ProgrammingError(
146 raise error.ProgrammingError(
147 "cannot persist nodemap of a filtered changelog"
147 "cannot persist nodemap of a filtered changelog"
148 )
148 )
149 if revlog.nodemap_file is None:
149 if revlog._nodemap_file is None:
150 if force:
150 if force:
151 revlog.nodemap_file = get_nodemap_file(revlog)
151 revlog._nodemap_file = get_nodemap_file(revlog)
152 else:
152 else:
153 msg = "calling persist nodemap on a revlog without the feature enabled"
153 msg = "calling persist nodemap on a revlog without the feature enabled"
154 raise error.ProgrammingError(msg)
154 raise error.ProgrammingError(msg)
155
155
156 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
156 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
157 ondisk_docket = revlog._nodemap_docket
157 ondisk_docket = revlog._nodemap_docket
158 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
158 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
159 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
159 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
160
160
161 data = None
161 data = None
162 # first attemp an incremental update of the data
162 # first attemp an incremental update of the data
163 if can_incremental and ondisk_docket is not None:
163 if can_incremental and ondisk_docket is not None:
164 target_docket = revlog._nodemap_docket.copy()
164 target_docket = revlog._nodemap_docket.copy()
165 (
165 (
166 src_docket,
166 src_docket,
167 data_changed_count,
167 data_changed_count,
168 data,
168 data,
169 ) = revlog.index.nodemap_data_incremental()
169 ) = revlog.index.nodemap_data_incremental()
170 new_length = target_docket.data_length + len(data)
170 new_length = target_docket.data_length + len(data)
171 new_unused = target_docket.data_unused + data_changed_count
171 new_unused = target_docket.data_unused + data_changed_count
172 if src_docket != target_docket:
172 if src_docket != target_docket:
173 data = None
173 data = None
174 elif new_length <= (new_unused * 10): # under 10% of unused data
174 elif new_length <= (new_unused * 10): # under 10% of unused data
175 data = None
175 data = None
176 else:
176 else:
177 datafile = _rawdata_filepath(revlog, target_docket)
177 datafile = _rawdata_filepath(revlog, target_docket)
178 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
178 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
179 # store vfs
179 # store vfs
180 tr.add(datafile, target_docket.data_length)
180 tr.add(datafile, target_docket.data_length)
181 with revlog.opener(datafile, b'r+') as fd:
181 with revlog.opener(datafile, b'r+') as fd:
182 fd.seek(target_docket.data_length)
182 fd.seek(target_docket.data_length)
183 fd.write(data)
183 fd.write(data)
184 if feed_data:
184 if feed_data:
185 if use_mmap:
185 if use_mmap:
186 fd.seek(0)
186 fd.seek(0)
187 new_data = fd.read(new_length)
187 new_data = fd.read(new_length)
188 else:
188 else:
189 fd.flush()
189 fd.flush()
190 new_data = util.buffer(util.mmapread(fd, new_length))
190 new_data = util.buffer(util.mmapread(fd, new_length))
191 target_docket.data_length = new_length
191 target_docket.data_length = new_length
192 target_docket.data_unused = new_unused
192 target_docket.data_unused = new_unused
193
193
194 if data is None:
194 if data is None:
195 # otherwise fallback to a full new export
195 # otherwise fallback to a full new export
196 target_docket = NodeMapDocket()
196 target_docket = NodeMapDocket()
197 datafile = _rawdata_filepath(revlog, target_docket)
197 datafile = _rawdata_filepath(revlog, target_docket)
198 if util.safehasattr(revlog.index, "nodemap_data_all"):
198 if util.safehasattr(revlog.index, "nodemap_data_all"):
199 data = revlog.index.nodemap_data_all()
199 data = revlog.index.nodemap_data_all()
200 else:
200 else:
201 data = persistent_data(revlog.index)
201 data = persistent_data(revlog.index)
202 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
202 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
203 # store vfs
203 # store vfs
204
204
205 tryunlink = revlog.opener.tryunlink
205 tryunlink = revlog.opener.tryunlink
206
206
207 def abortck(tr):
207 def abortck(tr):
208 tryunlink(datafile)
208 tryunlink(datafile)
209
209
210 callback_id = b"delete-%s" % datafile
210 callback_id = b"delete-%s" % datafile
211
211
212 # some flavor of the transaction abort does not cleanup new file, it
212 # some flavor of the transaction abort does not cleanup new file, it
213 # simply empty them.
213 # simply empty them.
214 tr.addabort(callback_id, abortck)
214 tr.addabort(callback_id, abortck)
215 with revlog.opener(datafile, b'w+') as fd:
215 with revlog.opener(datafile, b'w+') as fd:
216 fd.write(data)
216 fd.write(data)
217 if feed_data:
217 if feed_data:
218 if use_mmap:
218 if use_mmap:
219 new_data = data
219 new_data = data
220 else:
220 else:
221 fd.flush()
221 fd.flush()
222 new_data = util.buffer(util.mmapread(fd, len(data)))
222 new_data = util.buffer(util.mmapread(fd, len(data)))
223 target_docket.data_length = len(data)
223 target_docket.data_length = len(data)
224 target_docket.tip_rev = revlog.tiprev()
224 target_docket.tip_rev = revlog.tiprev()
225 target_docket.tip_node = revlog.node(target_docket.tip_rev)
225 target_docket.tip_node = revlog.node(target_docket.tip_rev)
226 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
226 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
227 # store vfs
227 # store vfs
228 file_path = revlog.nodemap_file
228 file_path = revlog._nodemap_file
229 if pending:
229 if pending:
230 file_path += b'.a'
230 file_path += b'.a'
231 tr.registertmp(file_path)
231 tr.registertmp(file_path)
232 else:
232 else:
233 tr.addbackup(file_path)
233 tr.addbackup(file_path)
234
234
235 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
235 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
236 fp.write(target_docket.serialize())
236 fp.write(target_docket.serialize())
237 revlog._nodemap_docket = target_docket
237 revlog._nodemap_docket = target_docket
238 if feed_data:
238 if feed_data:
239 revlog.index.update_nodemap_data(target_docket, new_data)
239 revlog.index.update_nodemap_data(target_docket, new_data)
240
240
241 # search for old index file in all cases, some older process might have
241 # search for old index file in all cases, some older process might have
242 # left one behind.
242 # left one behind.
243 olds = _other_rawdata_filepath(revlog, target_docket)
243 olds = _other_rawdata_filepath(revlog, target_docket)
244 if olds:
244 if olds:
245 realvfs = getattr(revlog, '_realopener', revlog.opener)
245 realvfs = getattr(revlog, '_realopener', revlog.opener)
246
246
247 def cleanup(tr):
247 def cleanup(tr):
248 for oldfile in olds:
248 for oldfile in olds:
249 realvfs.tryunlink(oldfile)
249 realvfs.tryunlink(oldfile)
250
250
251 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
251 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
252 tr.addpostclose(callback_id, cleanup)
252 tr.addpostclose(callback_id, cleanup)
253
253
254
254
255 ### Nodemap docket file
255 ### Nodemap docket file
256 #
256 #
257 # The nodemap data are stored on disk using 2 files:
257 # The nodemap data are stored on disk using 2 files:
258 #
258 #
259 # * a raw data files containing a persistent nodemap
259 # * a raw data files containing a persistent nodemap
260 # (see `Nodemap Trie` section)
260 # (see `Nodemap Trie` section)
261 #
261 #
262 # * a small "docket" file containing medatadata
262 # * a small "docket" file containing medatadata
263 #
263 #
264 # While the nodemap data can be multiple tens of megabytes, the "docket" is
264 # While the nodemap data can be multiple tens of megabytes, the "docket" is
265 # small, it is easy to update it automatically or to duplicated its content
265 # small, it is easy to update it automatically or to duplicated its content
266 # during a transaction.
266 # during a transaction.
267 #
267 #
268 # Multiple raw data can exist at the same time (The currently valid one and a
268 # Multiple raw data can exist at the same time (The currently valid one and a
269 # new one beind used by an in progress transaction). To accomodate this, the
269 # new one beind used by an in progress transaction). To accomodate this, the
270 # filename hosting the raw data has a variable parts. The exact filename is
270 # filename hosting the raw data has a variable parts. The exact filename is
271 # specified inside the "docket" file.
271 # specified inside the "docket" file.
272 #
272 #
273 # The docket file contains information to find, qualify and validate the raw
273 # The docket file contains information to find, qualify and validate the raw
274 # data. Its content is currently very light, but it will expand as the on disk
274 # data. Its content is currently very light, but it will expand as the on disk
275 # nodemap gains the necessary features to be used in production.
275 # nodemap gains the necessary features to be used in production.
276
276
277 ONDISK_VERSION = 1
277 ONDISK_VERSION = 1
278 S_VERSION = struct.Struct(">B")
278 S_VERSION = struct.Struct(">B")
279 S_HEADER = struct.Struct(">BQQQQ")
279 S_HEADER = struct.Struct(">BQQQQ")
280
280
281 ID_SIZE = 8
281 ID_SIZE = 8
282
282
283
283
284 def _make_uid():
284 def _make_uid():
285 """return a new unique identifier.
285 """return a new unique identifier.
286
286
287 The identifier is random and composed of ascii characters."""
287 The identifier is random and composed of ascii characters."""
288 return hex(os.urandom(ID_SIZE))
288 return hex(os.urandom(ID_SIZE))
289
289
290
290
291 class NodeMapDocket(object):
291 class NodeMapDocket(object):
292 """metadata associated with persistent nodemap data
292 """metadata associated with persistent nodemap data
293
293
294 The persistent data may come from disk or be on their way to disk.
294 The persistent data may come from disk or be on their way to disk.
295 """
295 """
296
296
297 def __init__(self, uid=None):
297 def __init__(self, uid=None):
298 if uid is None:
298 if uid is None:
299 uid = _make_uid()
299 uid = _make_uid()
300 # a unique identifier for the data file:
300 # a unique identifier for the data file:
301 # - When new data are appended, it is preserved.
301 # - When new data are appended, it is preserved.
302 # - When a new data file is created, a new identifier is generated.
302 # - When a new data file is created, a new identifier is generated.
303 self.uid = uid
303 self.uid = uid
304 # the tipmost revision stored in the data file. This revision and all
304 # the tipmost revision stored in the data file. This revision and all
305 # revision before it are expected to be encoded in the data file.
305 # revision before it are expected to be encoded in the data file.
306 self.tip_rev = None
306 self.tip_rev = None
307 # the node of that tipmost revision, if it mismatch the current index
307 # the node of that tipmost revision, if it mismatch the current index
308 # data the docket is not valid for the current index and should be
308 # data the docket is not valid for the current index and should be
309 # discarded.
309 # discarded.
310 #
310 #
311 # note: this method is not perfect as some destructive operation could
311 # note: this method is not perfect as some destructive operation could
312 # preserve the same tip_rev + tip_node while altering lower revision.
312 # preserve the same tip_rev + tip_node while altering lower revision.
313 # However this multiple other caches have the same vulnerability (eg:
313 # However this multiple other caches have the same vulnerability (eg:
314 # brancmap cache).
314 # brancmap cache).
315 self.tip_node = None
315 self.tip_node = None
316 # the size (in bytes) of the persisted data to encode the nodemap valid
316 # the size (in bytes) of the persisted data to encode the nodemap valid
317 # for `tip_rev`.
317 # for `tip_rev`.
318 # - data file shorter than this are corrupted,
318 # - data file shorter than this are corrupted,
319 # - any extra data should be ignored.
319 # - any extra data should be ignored.
320 self.data_length = None
320 self.data_length = None
321 # the amount (in bytes) of "dead" data, still in the data file but no
321 # the amount (in bytes) of "dead" data, still in the data file but no
322 # longer used for the nodemap.
322 # longer used for the nodemap.
323 self.data_unused = 0
323 self.data_unused = 0
324
324
325 def copy(self):
325 def copy(self):
326 new = NodeMapDocket(uid=self.uid)
326 new = NodeMapDocket(uid=self.uid)
327 new.tip_rev = self.tip_rev
327 new.tip_rev = self.tip_rev
328 new.tip_node = self.tip_node
328 new.tip_node = self.tip_node
329 new.data_length = self.data_length
329 new.data_length = self.data_length
330 new.data_unused = self.data_unused
330 new.data_unused = self.data_unused
331 return new
331 return new
332
332
333 def __cmp__(self, other):
333 def __cmp__(self, other):
334 if self.uid < other.uid:
334 if self.uid < other.uid:
335 return -1
335 return -1
336 if self.uid > other.uid:
336 if self.uid > other.uid:
337 return 1
337 return 1
338 elif self.data_length < other.data_length:
338 elif self.data_length < other.data_length:
339 return -1
339 return -1
340 elif self.data_length > other.data_length:
340 elif self.data_length > other.data_length:
341 return 1
341 return 1
342 return 0
342 return 0
343
343
344 def __eq__(self, other):
344 def __eq__(self, other):
345 return self.uid == other.uid and self.data_length == other.data_length
345 return self.uid == other.uid and self.data_length == other.data_length
346
346
347 def serialize(self):
347 def serialize(self):
348 """return serialized bytes for a docket using the passed uid"""
348 """return serialized bytes for a docket using the passed uid"""
349 data = []
349 data = []
350 data.append(S_VERSION.pack(ONDISK_VERSION))
350 data.append(S_VERSION.pack(ONDISK_VERSION))
351 headers = (
351 headers = (
352 len(self.uid),
352 len(self.uid),
353 self.tip_rev,
353 self.tip_rev,
354 self.data_length,
354 self.data_length,
355 self.data_unused,
355 self.data_unused,
356 len(self.tip_node),
356 len(self.tip_node),
357 )
357 )
358 data.append(S_HEADER.pack(*headers))
358 data.append(S_HEADER.pack(*headers))
359 data.append(self.uid)
359 data.append(self.uid)
360 data.append(self.tip_node)
360 data.append(self.tip_node)
361 return b''.join(data)
361 return b''.join(data)
362
362
363
363
364 def _rawdata_filepath(revlog, docket):
364 def _rawdata_filepath(revlog, docket):
365 """The (vfs relative) nodemap's rawdata file for a given uid"""
365 """The (vfs relative) nodemap's rawdata file for a given uid"""
366 prefix = revlog.radix
366 prefix = revlog.radix
367 return b"%s-%s.nd" % (prefix, docket.uid)
367 return b"%s-%s.nd" % (prefix, docket.uid)
368
368
369
369
370 def _other_rawdata_filepath(revlog, docket):
370 def _other_rawdata_filepath(revlog, docket):
371 prefix = revlog.radix
371 prefix = revlog.radix
372 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
372 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
373 new_file_path = _rawdata_filepath(revlog, docket)
373 new_file_path = _rawdata_filepath(revlog, docket)
374 new_file_name = revlog.opener.basename(new_file_path)
374 new_file_name = revlog.opener.basename(new_file_path)
375 dirpath = revlog.opener.dirname(new_file_path)
375 dirpath = revlog.opener.dirname(new_file_path)
376 others = []
376 others = []
377 for f in revlog.opener.listdir(dirpath):
377 for f in revlog.opener.listdir(dirpath):
378 if pattern.match(f) and f != new_file_name:
378 if pattern.match(f) and f != new_file_name:
379 others.append(f)
379 others.append(f)
380 return others
380 return others
381
381
382
382
383 ### Nodemap Trie
383 ### Nodemap Trie
384 #
384 #
385 # This is a simple reference implementation to compute and persist a nodemap
385 # This is a simple reference implementation to compute and persist a nodemap
386 # trie. This reference implementation is write only. The python version of this
386 # trie. This reference implementation is write only. The python version of this
387 # is not expected to be actually used, since it wont provide performance
387 # is not expected to be actually used, since it wont provide performance
388 # improvement over existing non-persistent C implementation.
388 # improvement over existing non-persistent C implementation.
389 #
389 #
390 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
390 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
391 # revision can be adressed using its node shortest prefix.
391 # revision can be adressed using its node shortest prefix.
392 #
392 #
393 # The trie is stored as a sequence of block. Each block contains 16 entries
393 # The trie is stored as a sequence of block. Each block contains 16 entries
394 # (signed 64bit integer, big endian). Each entry can be one of the following:
394 # (signed 64bit integer, big endian). Each entry can be one of the following:
395 #
395 #
396 # * value >= 0 -> index of sub-block
396 # * value >= 0 -> index of sub-block
397 # * value == -1 -> no value
397 # * value == -1 -> no value
398 # * value < -1 -> encoded revision: rev = -(value+2)
398 # * value < -1 -> encoded revision: rev = -(value+2)
399 #
399 #
400 # See REV_OFFSET and _transform_rev below.
400 # See REV_OFFSET and _transform_rev below.
401 #
401 #
402 # The implementation focus on simplicity, not on performance. A Rust
402 # The implementation focus on simplicity, not on performance. A Rust
403 # implementation should provide a efficient version of the same binary
403 # implementation should provide a efficient version of the same binary
404 # persistence. This reference python implementation is never meant to be
404 # persistence. This reference python implementation is never meant to be
405 # extensively use in production.
405 # extensively use in production.
406
406
407
407
408 def persistent_data(index):
408 def persistent_data(index):
409 """return the persistent binary form for a nodemap for a given index"""
409 """return the persistent binary form for a nodemap for a given index"""
410 trie = _build_trie(index)
410 trie = _build_trie(index)
411 return _persist_trie(trie)
411 return _persist_trie(trie)
412
412
413
413
414 def update_persistent_data(index, root, max_idx, last_rev):
414 def update_persistent_data(index, root, max_idx, last_rev):
415 """return the incremental update for persistent nodemap from a given index"""
415 """return the incremental update for persistent nodemap from a given index"""
416 changed_block, trie = _update_trie(index, root, last_rev)
416 changed_block, trie = _update_trie(index, root, last_rev)
417 return (
417 return (
418 changed_block * S_BLOCK.size,
418 changed_block * S_BLOCK.size,
419 _persist_trie(trie, existing_idx=max_idx),
419 _persist_trie(trie, existing_idx=max_idx),
420 )
420 )
421
421
422
422
423 S_BLOCK = struct.Struct(">" + ("l" * 16))
423 S_BLOCK = struct.Struct(">" + ("l" * 16))
424
424
425 NO_ENTRY = -1
425 NO_ENTRY = -1
426 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
426 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
427 REV_OFFSET = 2
427 REV_OFFSET = 2
428
428
429
429
430 def _transform_rev(rev):
430 def _transform_rev(rev):
431 """Return the number used to represent the rev in the tree.
431 """Return the number used to represent the rev in the tree.
432
432
433 (or retrieve a rev number from such representation)
433 (or retrieve a rev number from such representation)
434
434
435 Note that this is an involution, a function equal to its inverse (i.e.
435 Note that this is an involution, a function equal to its inverse (i.e.
436 which gives the identity when applied to itself).
436 which gives the identity when applied to itself).
437 """
437 """
438 return -(rev + REV_OFFSET)
438 return -(rev + REV_OFFSET)
439
439
440
440
441 def _to_int(hex_digit):
441 def _to_int(hex_digit):
442 """turn an hexadecimal digit into a proper integer"""
442 """turn an hexadecimal digit into a proper integer"""
443 return int(hex_digit, 16)
443 return int(hex_digit, 16)
444
444
445
445
446 class Block(dict):
446 class Block(dict):
447 """represent a block of the Trie
447 """represent a block of the Trie
448
448
449 contains up to 16 entry indexed from 0 to 15"""
449 contains up to 16 entry indexed from 0 to 15"""
450
450
451 def __init__(self):
451 def __init__(self):
452 super(Block, self).__init__()
452 super(Block, self).__init__()
453 # If this block exist on disk, here is its ID
453 # If this block exist on disk, here is its ID
454 self.ondisk_id = None
454 self.ondisk_id = None
455
455
456 def __iter__(self):
456 def __iter__(self):
457 return iter(self.get(i) for i in range(16))
457 return iter(self.get(i) for i in range(16))
458
458
459
459
460 def _build_trie(index):
460 def _build_trie(index):
461 """build a nodemap trie
461 """build a nodemap trie
462
462
463 The nodemap stores revision number for each unique prefix.
463 The nodemap stores revision number for each unique prefix.
464
464
465 Each block is a dictionary with keys in `[0, 15]`. Values are either
465 Each block is a dictionary with keys in `[0, 15]`. Values are either
466 another block or a revision number.
466 another block or a revision number.
467 """
467 """
468 root = Block()
468 root = Block()
469 for rev in range(len(index)):
469 for rev in range(len(index)):
470 current_hex = hex(index[rev][7])
470 current_hex = hex(index[rev][7])
471 _insert_into_block(index, 0, root, rev, current_hex)
471 _insert_into_block(index, 0, root, rev, current_hex)
472 return root
472 return root
473
473
474
474
475 def _update_trie(index, root, last_rev):
475 def _update_trie(index, root, last_rev):
476 """consume"""
476 """consume"""
477 changed = 0
477 changed = 0
478 for rev in range(last_rev + 1, len(index)):
478 for rev in range(last_rev + 1, len(index)):
479 current_hex = hex(index[rev][7])
479 current_hex = hex(index[rev][7])
480 changed += _insert_into_block(index, 0, root, rev, current_hex)
480 changed += _insert_into_block(index, 0, root, rev, current_hex)
481 return changed, root
481 return changed, root
482
482
483
483
484 def _insert_into_block(index, level, block, current_rev, current_hex):
484 def _insert_into_block(index, level, block, current_rev, current_hex):
485 """insert a new revision in a block
485 """insert a new revision in a block
486
486
487 index: the index we are adding revision for
487 index: the index we are adding revision for
488 level: the depth of the current block in the trie
488 level: the depth of the current block in the trie
489 block: the block currently being considered
489 block: the block currently being considered
490 current_rev: the revision number we are adding
490 current_rev: the revision number we are adding
491 current_hex: the hexadecimal representation of the of that revision
491 current_hex: the hexadecimal representation of the of that revision
492 """
492 """
493 changed = 1
493 changed = 1
494 if block.ondisk_id is not None:
494 if block.ondisk_id is not None:
495 block.ondisk_id = None
495 block.ondisk_id = None
496 hex_digit = _to_int(current_hex[level : level + 1])
496 hex_digit = _to_int(current_hex[level : level + 1])
497 entry = block.get(hex_digit)
497 entry = block.get(hex_digit)
498 if entry is None:
498 if entry is None:
499 # no entry, simply store the revision number
499 # no entry, simply store the revision number
500 block[hex_digit] = current_rev
500 block[hex_digit] = current_rev
501 elif isinstance(entry, dict):
501 elif isinstance(entry, dict):
502 # need to recurse to an underlying block
502 # need to recurse to an underlying block
503 changed += _insert_into_block(
503 changed += _insert_into_block(
504 index, level + 1, entry, current_rev, current_hex
504 index, level + 1, entry, current_rev, current_hex
505 )
505 )
506 else:
506 else:
507 # collision with a previously unique prefix, inserting new
507 # collision with a previously unique prefix, inserting new
508 # vertices to fit both entry.
508 # vertices to fit both entry.
509 other_hex = hex(index[entry][7])
509 other_hex = hex(index[entry][7])
510 other_rev = entry
510 other_rev = entry
511 new = Block()
511 new = Block()
512 block[hex_digit] = new
512 block[hex_digit] = new
513 _insert_into_block(index, level + 1, new, other_rev, other_hex)
513 _insert_into_block(index, level + 1, new, other_rev, other_hex)
514 _insert_into_block(index, level + 1, new, current_rev, current_hex)
514 _insert_into_block(index, level + 1, new, current_rev, current_hex)
515 return changed
515 return changed
516
516
517
517
518 def _persist_trie(root, existing_idx=None):
518 def _persist_trie(root, existing_idx=None):
519 """turn a nodemap trie into persistent binary data
519 """turn a nodemap trie into persistent binary data
520
520
521 See `_build_trie` for nodemap trie structure"""
521 See `_build_trie` for nodemap trie structure"""
522 block_map = {}
522 block_map = {}
523 if existing_idx is not None:
523 if existing_idx is not None:
524 base_idx = existing_idx + 1
524 base_idx = existing_idx + 1
525 else:
525 else:
526 base_idx = 0
526 base_idx = 0
527 chunks = []
527 chunks = []
528 for tn in _walk_trie(root):
528 for tn in _walk_trie(root):
529 if tn.ondisk_id is not None:
529 if tn.ondisk_id is not None:
530 block_map[id(tn)] = tn.ondisk_id
530 block_map[id(tn)] = tn.ondisk_id
531 else:
531 else:
532 block_map[id(tn)] = len(chunks) + base_idx
532 block_map[id(tn)] = len(chunks) + base_idx
533 chunks.append(_persist_block(tn, block_map))
533 chunks.append(_persist_block(tn, block_map))
534 return b''.join(chunks)
534 return b''.join(chunks)
535
535
536
536
537 def _walk_trie(block):
537 def _walk_trie(block):
538 """yield all the block in a trie
538 """yield all the block in a trie
539
539
540 Children blocks are always yield before their parent block.
540 Children blocks are always yield before their parent block.
541 """
541 """
542 for (__, item) in sorted(block.items()):
542 for (__, item) in sorted(block.items()):
543 if isinstance(item, dict):
543 if isinstance(item, dict):
544 for sub_block in _walk_trie(item):
544 for sub_block in _walk_trie(item):
545 yield sub_block
545 yield sub_block
546 yield block
546 yield block
547
547
548
548
549 def _persist_block(block_node, block_map):
549 def _persist_block(block_node, block_map):
550 """produce persistent binary data for a single block
550 """produce persistent binary data for a single block
551
551
552 Children block are assumed to be already persisted and present in
552 Children block are assumed to be already persisted and present in
553 block_map.
553 block_map.
554 """
554 """
555 data = tuple(_to_value(v, block_map) for v in block_node)
555 data = tuple(_to_value(v, block_map) for v in block_node)
556 return S_BLOCK.pack(*data)
556 return S_BLOCK.pack(*data)
557
557
558
558
559 def _to_value(item, block_map):
559 def _to_value(item, block_map):
560 """persist any value as an integer"""
560 """persist any value as an integer"""
561 if item is None:
561 if item is None:
562 return NO_ENTRY
562 return NO_ENTRY
563 elif isinstance(item, dict):
563 elif isinstance(item, dict):
564 return block_map[id(item)]
564 return block_map[id(item)]
565 else:
565 else:
566 return _transform_rev(item)
566 return _transform_rev(item)
567
567
568
568
569 def parse_data(data):
569 def parse_data(data):
570 """parse parse nodemap data into a nodemap Trie"""
570 """parse parse nodemap data into a nodemap Trie"""
571 if (len(data) % S_BLOCK.size) != 0:
571 if (len(data) % S_BLOCK.size) != 0:
572 msg = b"nodemap data size is not a multiple of block size (%d): %d"
572 msg = b"nodemap data size is not a multiple of block size (%d): %d"
573 raise error.Abort(msg % (S_BLOCK.size, len(data)))
573 raise error.Abort(msg % (S_BLOCK.size, len(data)))
574 if not data:
574 if not data:
575 return Block(), None
575 return Block(), None
576 block_map = {}
576 block_map = {}
577 new_blocks = []
577 new_blocks = []
578 for i in range(0, len(data), S_BLOCK.size):
578 for i in range(0, len(data), S_BLOCK.size):
579 block = Block()
579 block = Block()
580 block.ondisk_id = len(block_map)
580 block.ondisk_id = len(block_map)
581 block_map[block.ondisk_id] = block
581 block_map[block.ondisk_id] = block
582 block_data = data[i : i + S_BLOCK.size]
582 block_data = data[i : i + S_BLOCK.size]
583 values = S_BLOCK.unpack(block_data)
583 values = S_BLOCK.unpack(block_data)
584 new_blocks.append((block, values))
584 new_blocks.append((block, values))
585 for b, values in new_blocks:
585 for b, values in new_blocks:
586 for idx, v in enumerate(values):
586 for idx, v in enumerate(values):
587 if v == NO_ENTRY:
587 if v == NO_ENTRY:
588 continue
588 continue
589 elif v >= 0:
589 elif v >= 0:
590 b[idx] = block_map[v]
590 b[idx] = block_map[v]
591 else:
591 else:
592 b[idx] = _transform_rev(v)
592 b[idx] = _transform_rev(v)
593 return block, i // S_BLOCK.size
593 return block, i // S_BLOCK.size
594
594
595
595
596 # debug utility
596 # debug utility
597
597
598
598
599 def check_data(ui, index, data):
599 def check_data(ui, index, data):
600 """verify that the provided nodemap data are valid for the given idex"""
600 """verify that the provided nodemap data are valid for the given idex"""
601 ret = 0
601 ret = 0
602 ui.status((b"revision in index: %d\n") % len(index))
602 ui.status((b"revision in index: %d\n") % len(index))
603 root, __ = parse_data(data)
603 root, __ = parse_data(data)
604 all_revs = set(_all_revisions(root))
604 all_revs = set(_all_revisions(root))
605 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
605 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
606 for r in range(len(index)):
606 for r in range(len(index)):
607 if r not in all_revs:
607 if r not in all_revs:
608 msg = b" revision missing from nodemap: %d\n" % r
608 msg = b" revision missing from nodemap: %d\n" % r
609 ui.write_err(msg)
609 ui.write_err(msg)
610 ret = 1
610 ret = 1
611 else:
611 else:
612 all_revs.remove(r)
612 all_revs.remove(r)
613 nm_rev = _find_node(root, hex(index[r][7]))
613 nm_rev = _find_node(root, hex(index[r][7]))
614 if nm_rev is None:
614 if nm_rev is None:
615 msg = b" revision node does not match any entries: %d\n" % r
615 msg = b" revision node does not match any entries: %d\n" % r
616 ui.write_err(msg)
616 ui.write_err(msg)
617 ret = 1
617 ret = 1
618 elif nm_rev != r:
618 elif nm_rev != r:
619 msg = (
619 msg = (
620 b" revision node does not match the expected revision: "
620 b" revision node does not match the expected revision: "
621 b"%d != %d\n" % (r, nm_rev)
621 b"%d != %d\n" % (r, nm_rev)
622 )
622 )
623 ui.write_err(msg)
623 ui.write_err(msg)
624 ret = 1
624 ret = 1
625
625
626 if all_revs:
626 if all_revs:
627 for r in sorted(all_revs):
627 for r in sorted(all_revs):
628 msg = b" extra revision in nodemap: %d\n" % r
628 msg = b" extra revision in nodemap: %d\n" % r
629 ui.write_err(msg)
629 ui.write_err(msg)
630 ret = 1
630 ret = 1
631 return ret
631 return ret
632
632
633
633
634 def _all_revisions(root):
634 def _all_revisions(root):
635 """return all revisions stored in a Trie"""
635 """return all revisions stored in a Trie"""
636 for block in _walk_trie(root):
636 for block in _walk_trie(root):
637 for v in block:
637 for v in block:
638 if v is None or isinstance(v, Block):
638 if v is None or isinstance(v, Block):
639 continue
639 continue
640 yield v
640 yield v
641
641
642
642
643 def _find_node(block, node):
643 def _find_node(block, node):
644 """find the revision associated with a given node"""
644 """find the revision associated with a given node"""
645 entry = block.get(_to_int(node[0:1]))
645 entry = block.get(_to_int(node[0:1]))
646 if isinstance(entry, dict):
646 if isinstance(entry, dict):
647 return _find_node(entry, node[1:])
647 return _find_node(entry, node[1:])
648 return entry
648 return entry
649
649
650
650
651 def get_nodemap_file(revlog):
651 def get_nodemap_file(revlog):
652 if revlog.postfix == b'a':
652 if revlog.postfix == b'a':
653 pending_path = revlog.radix + b".n.a"
653 pending_path = revlog.radix + b".n.a"
654 if revlog.opener.exists(pending_path):
654 if revlog.opener.exists(pending_path):
655 return pending_path
655 return pending_path
656 return revlog.radix + b".n"
656 return revlog.radix + b".n"
General Comments 0
You need to be logged in to leave comments. Login now