##// END OF EJS Templates
revlog: stop usage of `_indexfile` to computing nodemap path...
marmoute -
r47922:24be247a default
parent child Browse files
Show More
@@ -1,3175 +1,3173 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 if postfix is None:
318 if postfix is None:
319 indexfile = b'%s.i' % self.radix
319 indexfile = b'%s.i' % self.radix
320 datafile = b'%s.d' % self.radix
320 datafile = b'%s.d' % self.radix
321 elif postfix == b'a':
321 elif postfix == b'a':
322 indexfile = b'%s.i.a' % self.radix
322 indexfile = b'%s.i.a' % self.radix
323 datafile = b'%s.d' % self.radix
323 datafile = b'%s.d' % self.radix
324 else:
324 else:
325 indexfile = b'%s.i.%s' % (self.radix, postfix)
325 indexfile = b'%s.i.%s' % (self.radix, postfix)
326 datafile = b'%s.d.%s' % (self.radix, postfix)
326 datafile = b'%s.d.%s' % (self.radix, postfix)
327
327
328 self._indexfile = indexfile
328 self._indexfile = indexfile
329 self._datafile = datafile
329 self._datafile = datafile
330 self.nodemap_file = None
330 self.nodemap_file = None
331 self.postfix = postfix
331 self.postfix = postfix
332 self.opener = opener
332 if persistentnodemap:
333 if persistentnodemap:
333 self.nodemap_file = nodemaputil.get_nodemap_file(
334 self.nodemap_file = nodemaputil.get_nodemap_file(self)
334 opener, self._indexfile
335
335 )
336
337 self.opener = opener
338 assert target[0] in ALL_KINDS
336 assert target[0] in ALL_KINDS
339 assert len(target) == 2
337 assert len(target) == 2
340 self.target = target
338 self.target = target
341 # When True, indexfile is opened with checkambig=True at writing, to
339 # When True, indexfile is opened with checkambig=True at writing, to
342 # avoid file stat ambiguity.
340 # avoid file stat ambiguity.
343 self._checkambig = checkambig
341 self._checkambig = checkambig
344 self._mmaplargeindex = mmaplargeindex
342 self._mmaplargeindex = mmaplargeindex
345 self._censorable = censorable
343 self._censorable = censorable
346 # 3-tuple of (node, rev, text) for a raw revision.
344 # 3-tuple of (node, rev, text) for a raw revision.
347 self._revisioncache = None
345 self._revisioncache = None
348 # Maps rev to chain base rev.
346 # Maps rev to chain base rev.
349 self._chainbasecache = util.lrucachedict(100)
347 self._chainbasecache = util.lrucachedict(100)
350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
351 self._chunkcache = (0, b'')
349 self._chunkcache = (0, b'')
352 # How much data to read and cache into the raw revlog data cache.
350 # How much data to read and cache into the raw revlog data cache.
353 self._chunkcachesize = 65536
351 self._chunkcachesize = 65536
354 self._maxchainlen = None
352 self._maxchainlen = None
355 self._deltabothparents = True
353 self._deltabothparents = True
356 self.index = None
354 self.index = None
357 self._nodemap_docket = None
355 self._nodemap_docket = None
358 # Mapping of partial identifiers to full nodes.
356 # Mapping of partial identifiers to full nodes.
359 self._pcache = {}
357 self._pcache = {}
360 # Mapping of revision integer to full node.
358 # Mapping of revision integer to full node.
361 self._compengine = b'zlib'
359 self._compengine = b'zlib'
362 self._compengineopts = {}
360 self._compengineopts = {}
363 self._maxdeltachainspan = -1
361 self._maxdeltachainspan = -1
364 self._withsparseread = False
362 self._withsparseread = False
365 self._sparserevlog = False
363 self._sparserevlog = False
366 self._srdensitythreshold = 0.50
364 self._srdensitythreshold = 0.50
367 self._srmingapsize = 262144
365 self._srmingapsize = 262144
368
366
369 # Make copy of flag processors so each revlog instance can support
367 # Make copy of flag processors so each revlog instance can support
370 # custom flags.
368 # custom flags.
371 self._flagprocessors = dict(flagutil.flagprocessors)
369 self._flagprocessors = dict(flagutil.flagprocessors)
372
370
373 # 2-tuple of file handles being used for active writing.
371 # 2-tuple of file handles being used for active writing.
374 self._writinghandles = None
372 self._writinghandles = None
375
373
376 self._loadindex()
374 self._loadindex()
377
375
378 self._concurrencychecker = concurrencychecker
376 self._concurrencychecker = concurrencychecker
379
377
380 def _init_opts(self):
378 def _init_opts(self):
381 """process options (from above/config) to setup associated default revlog mode
379 """process options (from above/config) to setup associated default revlog mode
382
380
383 These values might be affected when actually reading on disk information.
381 These values might be affected when actually reading on disk information.
384
382
385 The relevant values are returned for use in _loadindex().
383 The relevant values are returned for use in _loadindex().
386
384
387 * newversionflags:
385 * newversionflags:
388 version header to use if we need to create a new revlog
386 version header to use if we need to create a new revlog
389
387
390 * mmapindexthreshold:
388 * mmapindexthreshold:
391 minimal index size for start to use mmap
389 minimal index size for start to use mmap
392
390
393 * force_nodemap:
391 * force_nodemap:
394 force the usage of a "development" version of the nodemap code
392 force the usage of a "development" version of the nodemap code
395 """
393 """
396 mmapindexthreshold = None
394 mmapindexthreshold = None
397 opts = self.opener.options
395 opts = self.opener.options
398
396
399 if b'revlogv2' in opts:
397 if b'revlogv2' in opts:
400 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
401 elif b'revlogv1' in opts:
399 elif b'revlogv1' in opts:
402 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
403 if b'generaldelta' in opts:
401 if b'generaldelta' in opts:
404 newversionflags |= FLAG_GENERALDELTA
402 newversionflags |= FLAG_GENERALDELTA
405 elif b'revlogv0' in self.opener.options:
403 elif b'revlogv0' in self.opener.options:
406 newversionflags = REVLOGV0
404 newversionflags = REVLOGV0
407 else:
405 else:
408 newversionflags = REVLOG_DEFAULT_VERSION
406 newversionflags = REVLOG_DEFAULT_VERSION
409
407
410 if b'chunkcachesize' in opts:
408 if b'chunkcachesize' in opts:
411 self._chunkcachesize = opts[b'chunkcachesize']
409 self._chunkcachesize = opts[b'chunkcachesize']
412 if b'maxchainlen' in opts:
410 if b'maxchainlen' in opts:
413 self._maxchainlen = opts[b'maxchainlen']
411 self._maxchainlen = opts[b'maxchainlen']
414 if b'deltabothparents' in opts:
412 if b'deltabothparents' in opts:
415 self._deltabothparents = opts[b'deltabothparents']
413 self._deltabothparents = opts[b'deltabothparents']
416 self._lazydelta = bool(opts.get(b'lazydelta', True))
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
417 self._lazydeltabase = False
415 self._lazydeltabase = False
418 if self._lazydelta:
416 if self._lazydelta:
419 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
420 if b'compengine' in opts:
418 if b'compengine' in opts:
421 self._compengine = opts[b'compengine']
419 self._compengine = opts[b'compengine']
422 if b'zlib.level' in opts:
420 if b'zlib.level' in opts:
423 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
424 if b'zstd.level' in opts:
422 if b'zstd.level' in opts:
425 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
426 if b'maxdeltachainspan' in opts:
424 if b'maxdeltachainspan' in opts:
427 self._maxdeltachainspan = opts[b'maxdeltachainspan']
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
428 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
429 mmapindexthreshold = opts[b'mmapindexthreshold']
427 mmapindexthreshold = opts[b'mmapindexthreshold']
430 self.hassidedata = bool(opts.get(b'side-data', False))
428 self.hassidedata = bool(opts.get(b'side-data', False))
431 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
432 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
433 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
434 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
435 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
436 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
437 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
438 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
439 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
440 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
441
439
442 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
443 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
444 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
445 ):
443 ):
446 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
447
445
448 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
449 raise error.RevlogError(
447 raise error.RevlogError(
450 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
451 % self._chunkcachesize
449 % self._chunkcachesize
452 )
450 )
453 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
454 raise error.RevlogError(
452 raise error.RevlogError(
455 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
456 % self._chunkcachesize
454 % self._chunkcachesize
457 )
455 )
458 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
459 return newversionflags, mmapindexthreshold, force_nodemap
457 return newversionflags, mmapindexthreshold, force_nodemap
460
458
461 def _loadindex(self):
459 def _loadindex(self):
462
460
463 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
464 indexdata = b''
462 indexdata = b''
465 self._initempty = True
463 self._initempty = True
466 try:
464 try:
467 with self._indexfp() as f:
465 with self._indexfp() as f:
468 if (
466 if (
469 mmapindexthreshold is not None
467 mmapindexthreshold is not None
470 and self.opener.fstat(f).st_size >= mmapindexthreshold
468 and self.opener.fstat(f).st_size >= mmapindexthreshold
471 ):
469 ):
472 # TODO: should .close() to release resources without
470 # TODO: should .close() to release resources without
473 # relying on Python GC
471 # relying on Python GC
474 indexdata = util.buffer(util.mmapread(f))
472 indexdata = util.buffer(util.mmapread(f))
475 else:
473 else:
476 indexdata = f.read()
474 indexdata = f.read()
477 if len(indexdata) > 0:
475 if len(indexdata) > 0:
478 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
479 self._initempty = False
477 self._initempty = False
480 else:
478 else:
481 versionflags = newversionflags
479 versionflags = newversionflags
482 except IOError as inst:
480 except IOError as inst:
483 if inst.errno != errno.ENOENT:
481 if inst.errno != errno.ENOENT:
484 raise
482 raise
485
483
486 versionflags = newversionflags
484 versionflags = newversionflags
487
485
488 flags = self._format_flags = versionflags & ~0xFFFF
486 flags = self._format_flags = versionflags & ~0xFFFF
489 fmt = self._format_version = versionflags & 0xFFFF
487 fmt = self._format_version = versionflags & 0xFFFF
490
488
491 if fmt == REVLOGV0:
489 if fmt == REVLOGV0:
492 if flags:
490 if flags:
493 raise error.RevlogError(
491 raise error.RevlogError(
494 _(b'unknown flags (%#04x) in version %d revlog %s')
492 _(b'unknown flags (%#04x) in version %d revlog %s')
495 % (flags >> 16, fmt, self._indexfile)
493 % (flags >> 16, fmt, self._indexfile)
496 )
494 )
497
495
498 self._inline = False
496 self._inline = False
499 self._generaldelta = False
497 self._generaldelta = False
500
498
501 elif fmt == REVLOGV1:
499 elif fmt == REVLOGV1:
502 if flags & ~REVLOGV1_FLAGS:
500 if flags & ~REVLOGV1_FLAGS:
503 raise error.RevlogError(
501 raise error.RevlogError(
504 _(b'unknown flags (%#04x) in version %d revlog %s')
502 _(b'unknown flags (%#04x) in version %d revlog %s')
505 % (flags >> 16, fmt, self._indexfile)
503 % (flags >> 16, fmt, self._indexfile)
506 )
504 )
507
505
508 self._inline = versionflags & FLAG_INLINE_DATA
506 self._inline = versionflags & FLAG_INLINE_DATA
509 self._generaldelta = versionflags & FLAG_GENERALDELTA
507 self._generaldelta = versionflags & FLAG_GENERALDELTA
510
508
511 elif fmt == REVLOGV2:
509 elif fmt == REVLOGV2:
512 if flags & ~REVLOGV2_FLAGS:
510 if flags & ~REVLOGV2_FLAGS:
513 raise error.RevlogError(
511 raise error.RevlogError(
514 _(b'unknown flags (%#04x) in version %d revlog %s')
512 _(b'unknown flags (%#04x) in version %d revlog %s')
515 % (flags >> 16, fmt, self._indexfile)
513 % (flags >> 16, fmt, self._indexfile)
516 )
514 )
517
515
518 # There is a bug in the transaction handling when going from an
516 # There is a bug in the transaction handling when going from an
519 # inline revlog to a separate index and data file. Turn it off until
517 # inline revlog to a separate index and data file. Turn it off until
520 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
521 # See issue6485
519 # See issue6485
522 self._inline = False
520 self._inline = False
523 # generaldelta implied by version 2 revlogs.
521 # generaldelta implied by version 2 revlogs.
524 self._generaldelta = True
522 self._generaldelta = True
525
523
526 else:
524 else:
527 raise error.RevlogError(
525 raise error.RevlogError(
528 _(b'unknown version (%d) in revlog %s') % (fmt, self._indexfile)
526 _(b'unknown version (%d) in revlog %s') % (fmt, self._indexfile)
529 )
527 )
530
528
531 self.nodeconstants = sha1nodeconstants
529 self.nodeconstants = sha1nodeconstants
532 self.nullid = self.nodeconstants.nullid
530 self.nullid = self.nodeconstants.nullid
533
531
534 # sparse-revlog can't be on without general-delta (issue6056)
532 # sparse-revlog can't be on without general-delta (issue6056)
535 if not self._generaldelta:
533 if not self._generaldelta:
536 self._sparserevlog = False
534 self._sparserevlog = False
537
535
538 self._storedeltachains = True
536 self._storedeltachains = True
539
537
540 devel_nodemap = (
538 devel_nodemap = (
541 self.nodemap_file
539 self.nodemap_file
542 and force_nodemap
540 and force_nodemap
543 and parse_index_v1_nodemap is not None
541 and parse_index_v1_nodemap is not None
544 )
542 )
545
543
546 use_rust_index = False
544 use_rust_index = False
547 if rustrevlog is not None:
545 if rustrevlog is not None:
548 if self.nodemap_file is not None:
546 if self.nodemap_file is not None:
549 use_rust_index = True
547 use_rust_index = True
550 else:
548 else:
551 use_rust_index = self.opener.options.get(b'rust.index')
549 use_rust_index = self.opener.options.get(b'rust.index')
552
550
553 self._parse_index = parse_index_v1
551 self._parse_index = parse_index_v1
554 if self._format_version == REVLOGV0:
552 if self._format_version == REVLOGV0:
555 self._parse_index = revlogv0.parse_index_v0
553 self._parse_index = revlogv0.parse_index_v0
556 elif fmt == REVLOGV2:
554 elif fmt == REVLOGV2:
557 self._parse_index = parse_index_v2
555 self._parse_index = parse_index_v2
558 elif devel_nodemap:
556 elif devel_nodemap:
559 self._parse_index = parse_index_v1_nodemap
557 self._parse_index = parse_index_v1_nodemap
560 elif use_rust_index:
558 elif use_rust_index:
561 self._parse_index = parse_index_v1_mixed
559 self._parse_index = parse_index_v1_mixed
562 try:
560 try:
563 d = self._parse_index(indexdata, self._inline)
561 d = self._parse_index(indexdata, self._inline)
564 index, _chunkcache = d
562 index, _chunkcache = d
565 use_nodemap = (
563 use_nodemap = (
566 not self._inline
564 not self._inline
567 and self.nodemap_file is not None
565 and self.nodemap_file is not None
568 and util.safehasattr(index, 'update_nodemap_data')
566 and util.safehasattr(index, 'update_nodemap_data')
569 )
567 )
570 if use_nodemap:
568 if use_nodemap:
571 nodemap_data = nodemaputil.persisted_data(self)
569 nodemap_data = nodemaputil.persisted_data(self)
572 if nodemap_data is not None:
570 if nodemap_data is not None:
573 docket = nodemap_data[0]
571 docket = nodemap_data[0]
574 if (
572 if (
575 len(d[0]) > docket.tip_rev
573 len(d[0]) > docket.tip_rev
576 and d[0][docket.tip_rev][7] == docket.tip_node
574 and d[0][docket.tip_rev][7] == docket.tip_node
577 ):
575 ):
578 # no changelog tampering
576 # no changelog tampering
579 self._nodemap_docket = docket
577 self._nodemap_docket = docket
580 index.update_nodemap_data(*nodemap_data)
578 index.update_nodemap_data(*nodemap_data)
581 except (ValueError, IndexError):
579 except (ValueError, IndexError):
582 raise error.RevlogError(
580 raise error.RevlogError(
583 _(b"index %s is corrupted") % self._indexfile
581 _(b"index %s is corrupted") % self._indexfile
584 )
582 )
585 self.index, self._chunkcache = d
583 self.index, self._chunkcache = d
586 if not self._chunkcache:
584 if not self._chunkcache:
587 self._chunkclear()
585 self._chunkclear()
588 # revnum -> (chain-length, sum-delta-length)
586 # revnum -> (chain-length, sum-delta-length)
589 self._chaininfocache = util.lrucachedict(500)
587 self._chaininfocache = util.lrucachedict(500)
590 # revlog header -> revlog compressor
588 # revlog header -> revlog compressor
591 self._decompressors = {}
589 self._decompressors = {}
592
590
593 @util.propertycache
591 @util.propertycache
594 def revlog_kind(self):
592 def revlog_kind(self):
595 return self.target[0]
593 return self.target[0]
596
594
597 @util.propertycache
595 @util.propertycache
598 def _compressor(self):
596 def _compressor(self):
599 engine = util.compengines[self._compengine]
597 engine = util.compengines[self._compengine]
600 return engine.revlogcompressor(self._compengineopts)
598 return engine.revlogcompressor(self._compengineopts)
601
599
602 def _indexfp(self, mode=b'r'):
600 def _indexfp(self, mode=b'r'):
603 """file object for the revlog's index file"""
601 """file object for the revlog's index file"""
604 args = {'mode': mode}
602 args = {'mode': mode}
605 if mode != b'r':
603 if mode != b'r':
606 args['checkambig'] = self._checkambig
604 args['checkambig'] = self._checkambig
607 if mode == b'w':
605 if mode == b'w':
608 args['atomictemp'] = True
606 args['atomictemp'] = True
609 return self.opener(self._indexfile, **args)
607 return self.opener(self._indexfile, **args)
610
608
611 def _datafp(self, mode=b'r'):
609 def _datafp(self, mode=b'r'):
612 """file object for the revlog's data file"""
610 """file object for the revlog's data file"""
613 return self.opener(self._datafile, mode=mode)
611 return self.opener(self._datafile, mode=mode)
614
612
615 @contextlib.contextmanager
613 @contextlib.contextmanager
616 def _datareadfp(self, existingfp=None):
614 def _datareadfp(self, existingfp=None):
617 """file object suitable to read data"""
615 """file object suitable to read data"""
618 # Use explicit file handle, if given.
616 # Use explicit file handle, if given.
619 if existingfp is not None:
617 if existingfp is not None:
620 yield existingfp
618 yield existingfp
621
619
622 # Use a file handle being actively used for writes, if available.
620 # Use a file handle being actively used for writes, if available.
623 # There is some danger to doing this because reads will seek the
621 # There is some danger to doing this because reads will seek the
624 # file. However, _writeentry() performs a SEEK_END before all writes,
622 # file. However, _writeentry() performs a SEEK_END before all writes,
625 # so we should be safe.
623 # so we should be safe.
626 elif self._writinghandles:
624 elif self._writinghandles:
627 if self._inline:
625 if self._inline:
628 yield self._writinghandles[0]
626 yield self._writinghandles[0]
629 else:
627 else:
630 yield self._writinghandles[1]
628 yield self._writinghandles[1]
631
629
632 # Otherwise open a new file handle.
630 # Otherwise open a new file handle.
633 else:
631 else:
634 if self._inline:
632 if self._inline:
635 func = self._indexfp
633 func = self._indexfp
636 else:
634 else:
637 func = self._datafp
635 func = self._datafp
638 with func() as fp:
636 with func() as fp:
639 yield fp
637 yield fp
640
638
641 def tiprev(self):
639 def tiprev(self):
642 return len(self.index) - 1
640 return len(self.index) - 1
643
641
644 def tip(self):
642 def tip(self):
645 return self.node(self.tiprev())
643 return self.node(self.tiprev())
646
644
647 def __contains__(self, rev):
645 def __contains__(self, rev):
648 return 0 <= rev < len(self)
646 return 0 <= rev < len(self)
649
647
650 def __len__(self):
648 def __len__(self):
651 return len(self.index)
649 return len(self.index)
652
650
653 def __iter__(self):
651 def __iter__(self):
654 return iter(pycompat.xrange(len(self)))
652 return iter(pycompat.xrange(len(self)))
655
653
656 def revs(self, start=0, stop=None):
654 def revs(self, start=0, stop=None):
657 """iterate over all rev in this revlog (from start to stop)"""
655 """iterate over all rev in this revlog (from start to stop)"""
658 return storageutil.iterrevs(len(self), start=start, stop=stop)
656 return storageutil.iterrevs(len(self), start=start, stop=stop)
659
657
660 @property
658 @property
661 def nodemap(self):
659 def nodemap(self):
662 msg = (
660 msg = (
663 b"revlog.nodemap is deprecated, "
661 b"revlog.nodemap is deprecated, "
664 b"use revlog.index.[has_node|rev|get_rev]"
662 b"use revlog.index.[has_node|rev|get_rev]"
665 )
663 )
666 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
664 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
667 return self.index.nodemap
665 return self.index.nodemap
668
666
669 @property
667 @property
670 def _nodecache(self):
668 def _nodecache(self):
671 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
669 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
672 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
670 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
673 return self.index.nodemap
671 return self.index.nodemap
674
672
675 def hasnode(self, node):
673 def hasnode(self, node):
676 try:
674 try:
677 self.rev(node)
675 self.rev(node)
678 return True
676 return True
679 except KeyError:
677 except KeyError:
680 return False
678 return False
681
679
682 def candelta(self, baserev, rev):
680 def candelta(self, baserev, rev):
683 """whether two revisions (baserev, rev) can be delta-ed or not"""
681 """whether two revisions (baserev, rev) can be delta-ed or not"""
684 # Disable delta if either rev requires a content-changing flag
682 # Disable delta if either rev requires a content-changing flag
685 # processor (ex. LFS). This is because such flag processor can alter
683 # processor (ex. LFS). This is because such flag processor can alter
686 # the rawtext content that the delta will be based on, and two clients
684 # the rawtext content that the delta will be based on, and two clients
687 # could have a same revlog node with different flags (i.e. different
685 # could have a same revlog node with different flags (i.e. different
688 # rawtext contents) and the delta could be incompatible.
686 # rawtext contents) and the delta could be incompatible.
689 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
687 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
690 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
688 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
691 ):
689 ):
692 return False
690 return False
693 return True
691 return True
694
692
695 def update_caches(self, transaction):
693 def update_caches(self, transaction):
696 if self.nodemap_file is not None:
694 if self.nodemap_file is not None:
697 if transaction is None:
695 if transaction is None:
698 nodemaputil.update_persistent_nodemap(self)
696 nodemaputil.update_persistent_nodemap(self)
699 else:
697 else:
700 nodemaputil.setup_persistent_nodemap(transaction, self)
698 nodemaputil.setup_persistent_nodemap(transaction, self)
701
699
702 def clearcaches(self):
700 def clearcaches(self):
703 self._revisioncache = None
701 self._revisioncache = None
704 self._chainbasecache.clear()
702 self._chainbasecache.clear()
705 self._chunkcache = (0, b'')
703 self._chunkcache = (0, b'')
706 self._pcache = {}
704 self._pcache = {}
707 self._nodemap_docket = None
705 self._nodemap_docket = None
708 self.index.clearcaches()
706 self.index.clearcaches()
709 # The python code is the one responsible for validating the docket, we
707 # The python code is the one responsible for validating the docket, we
710 # end up having to refresh it here.
708 # end up having to refresh it here.
711 use_nodemap = (
709 use_nodemap = (
712 not self._inline
710 not self._inline
713 and self.nodemap_file is not None
711 and self.nodemap_file is not None
714 and util.safehasattr(self.index, 'update_nodemap_data')
712 and util.safehasattr(self.index, 'update_nodemap_data')
715 )
713 )
716 if use_nodemap:
714 if use_nodemap:
717 nodemap_data = nodemaputil.persisted_data(self)
715 nodemap_data = nodemaputil.persisted_data(self)
718 if nodemap_data is not None:
716 if nodemap_data is not None:
719 self._nodemap_docket = nodemap_data[0]
717 self._nodemap_docket = nodemap_data[0]
720 self.index.update_nodemap_data(*nodemap_data)
718 self.index.update_nodemap_data(*nodemap_data)
721
719
722 def rev(self, node):
720 def rev(self, node):
723 try:
721 try:
724 return self.index.rev(node)
722 return self.index.rev(node)
725 except TypeError:
723 except TypeError:
726 raise
724 raise
727 except error.RevlogError:
725 except error.RevlogError:
728 # parsers.c radix tree lookup failed
726 # parsers.c radix tree lookup failed
729 if (
727 if (
730 node == self.nodeconstants.wdirid
728 node == self.nodeconstants.wdirid
731 or node in self.nodeconstants.wdirfilenodeids
729 or node in self.nodeconstants.wdirfilenodeids
732 ):
730 ):
733 raise error.WdirUnsupported
731 raise error.WdirUnsupported
734 raise error.LookupError(node, self._indexfile, _(b'no node'))
732 raise error.LookupError(node, self._indexfile, _(b'no node'))
735
733
736 # Accessors for index entries.
734 # Accessors for index entries.
737
735
738 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
736 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
739 # are flags.
737 # are flags.
740 def start(self, rev):
738 def start(self, rev):
741 return int(self.index[rev][0] >> 16)
739 return int(self.index[rev][0] >> 16)
742
740
743 def flags(self, rev):
741 def flags(self, rev):
744 return self.index[rev][0] & 0xFFFF
742 return self.index[rev][0] & 0xFFFF
745
743
746 def length(self, rev):
744 def length(self, rev):
747 return self.index[rev][1]
745 return self.index[rev][1]
748
746
749 def sidedata_length(self, rev):
747 def sidedata_length(self, rev):
750 if not self.hassidedata:
748 if not self.hassidedata:
751 return 0
749 return 0
752 return self.index[rev][9]
750 return self.index[rev][9]
753
751
754 def rawsize(self, rev):
752 def rawsize(self, rev):
755 """return the length of the uncompressed text for a given revision"""
753 """return the length of the uncompressed text for a given revision"""
756 l = self.index[rev][2]
754 l = self.index[rev][2]
757 if l >= 0:
755 if l >= 0:
758 return l
756 return l
759
757
760 t = self.rawdata(rev)
758 t = self.rawdata(rev)
761 return len(t)
759 return len(t)
762
760
763 def size(self, rev):
761 def size(self, rev):
764 """length of non-raw text (processed by a "read" flag processor)"""
762 """length of non-raw text (processed by a "read" flag processor)"""
765 # fast path: if no "read" flag processor could change the content,
763 # fast path: if no "read" flag processor could change the content,
766 # size is rawsize. note: ELLIPSIS is known to not change the content.
764 # size is rawsize. note: ELLIPSIS is known to not change the content.
767 flags = self.flags(rev)
765 flags = self.flags(rev)
768 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
766 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
769 return self.rawsize(rev)
767 return self.rawsize(rev)
770
768
771 return len(self.revision(rev, raw=False))
769 return len(self.revision(rev, raw=False))
772
770
773 def chainbase(self, rev):
771 def chainbase(self, rev):
774 base = self._chainbasecache.get(rev)
772 base = self._chainbasecache.get(rev)
775 if base is not None:
773 if base is not None:
776 return base
774 return base
777
775
778 index = self.index
776 index = self.index
779 iterrev = rev
777 iterrev = rev
780 base = index[iterrev][3]
778 base = index[iterrev][3]
781 while base != iterrev:
779 while base != iterrev:
782 iterrev = base
780 iterrev = base
783 base = index[iterrev][3]
781 base = index[iterrev][3]
784
782
785 self._chainbasecache[rev] = base
783 self._chainbasecache[rev] = base
786 return base
784 return base
787
785
788 def linkrev(self, rev):
786 def linkrev(self, rev):
789 return self.index[rev][4]
787 return self.index[rev][4]
790
788
791 def parentrevs(self, rev):
789 def parentrevs(self, rev):
792 try:
790 try:
793 entry = self.index[rev]
791 entry = self.index[rev]
794 except IndexError:
792 except IndexError:
795 if rev == wdirrev:
793 if rev == wdirrev:
796 raise error.WdirUnsupported
794 raise error.WdirUnsupported
797 raise
795 raise
798 if entry[5] == nullrev:
796 if entry[5] == nullrev:
799 return entry[6], entry[5]
797 return entry[6], entry[5]
800 else:
798 else:
801 return entry[5], entry[6]
799 return entry[5], entry[6]
802
800
803 # fast parentrevs(rev) where rev isn't filtered
801 # fast parentrevs(rev) where rev isn't filtered
804 _uncheckedparentrevs = parentrevs
802 _uncheckedparentrevs = parentrevs
805
803
806 def node(self, rev):
804 def node(self, rev):
807 try:
805 try:
808 return self.index[rev][7]
806 return self.index[rev][7]
809 except IndexError:
807 except IndexError:
810 if rev == wdirrev:
808 if rev == wdirrev:
811 raise error.WdirUnsupported
809 raise error.WdirUnsupported
812 raise
810 raise
813
811
814 # Derived from index values.
812 # Derived from index values.
815
813
816 def end(self, rev):
814 def end(self, rev):
817 return self.start(rev) + self.length(rev)
815 return self.start(rev) + self.length(rev)
818
816
819 def parents(self, node):
817 def parents(self, node):
820 i = self.index
818 i = self.index
821 d = i[self.rev(node)]
819 d = i[self.rev(node)]
822 # inline node() to avoid function call overhead
820 # inline node() to avoid function call overhead
823 if d[5] == self.nullid:
821 if d[5] == self.nullid:
824 return i[d[6]][7], i[d[5]][7]
822 return i[d[6]][7], i[d[5]][7]
825 else:
823 else:
826 return i[d[5]][7], i[d[6]][7]
824 return i[d[5]][7], i[d[6]][7]
827
825
828 def chainlen(self, rev):
826 def chainlen(self, rev):
829 return self._chaininfo(rev)[0]
827 return self._chaininfo(rev)[0]
830
828
831 def _chaininfo(self, rev):
829 def _chaininfo(self, rev):
832 chaininfocache = self._chaininfocache
830 chaininfocache = self._chaininfocache
833 if rev in chaininfocache:
831 if rev in chaininfocache:
834 return chaininfocache[rev]
832 return chaininfocache[rev]
835 index = self.index
833 index = self.index
836 generaldelta = self._generaldelta
834 generaldelta = self._generaldelta
837 iterrev = rev
835 iterrev = rev
838 e = index[iterrev]
836 e = index[iterrev]
839 clen = 0
837 clen = 0
840 compresseddeltalen = 0
838 compresseddeltalen = 0
841 while iterrev != e[3]:
839 while iterrev != e[3]:
842 clen += 1
840 clen += 1
843 compresseddeltalen += e[1]
841 compresseddeltalen += e[1]
844 if generaldelta:
842 if generaldelta:
845 iterrev = e[3]
843 iterrev = e[3]
846 else:
844 else:
847 iterrev -= 1
845 iterrev -= 1
848 if iterrev in chaininfocache:
846 if iterrev in chaininfocache:
849 t = chaininfocache[iterrev]
847 t = chaininfocache[iterrev]
850 clen += t[0]
848 clen += t[0]
851 compresseddeltalen += t[1]
849 compresseddeltalen += t[1]
852 break
850 break
853 e = index[iterrev]
851 e = index[iterrev]
854 else:
852 else:
855 # Add text length of base since decompressing that also takes
853 # Add text length of base since decompressing that also takes
856 # work. For cache hits the length is already included.
854 # work. For cache hits the length is already included.
857 compresseddeltalen += e[1]
855 compresseddeltalen += e[1]
858 r = (clen, compresseddeltalen)
856 r = (clen, compresseddeltalen)
859 chaininfocache[rev] = r
857 chaininfocache[rev] = r
860 return r
858 return r
861
859
862 def _deltachain(self, rev, stoprev=None):
860 def _deltachain(self, rev, stoprev=None):
863 """Obtain the delta chain for a revision.
861 """Obtain the delta chain for a revision.
864
862
865 ``stoprev`` specifies a revision to stop at. If not specified, we
863 ``stoprev`` specifies a revision to stop at. If not specified, we
866 stop at the base of the chain.
864 stop at the base of the chain.
867
865
868 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
866 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
869 revs in ascending order and ``stopped`` is a bool indicating whether
867 revs in ascending order and ``stopped`` is a bool indicating whether
870 ``stoprev`` was hit.
868 ``stoprev`` was hit.
871 """
869 """
872 # Try C implementation.
870 # Try C implementation.
873 try:
871 try:
874 return self.index.deltachain(rev, stoprev, self._generaldelta)
872 return self.index.deltachain(rev, stoprev, self._generaldelta)
875 except AttributeError:
873 except AttributeError:
876 pass
874 pass
877
875
878 chain = []
876 chain = []
879
877
880 # Alias to prevent attribute lookup in tight loop.
878 # Alias to prevent attribute lookup in tight loop.
881 index = self.index
879 index = self.index
882 generaldelta = self._generaldelta
880 generaldelta = self._generaldelta
883
881
884 iterrev = rev
882 iterrev = rev
885 e = index[iterrev]
883 e = index[iterrev]
886 while iterrev != e[3] and iterrev != stoprev:
884 while iterrev != e[3] and iterrev != stoprev:
887 chain.append(iterrev)
885 chain.append(iterrev)
888 if generaldelta:
886 if generaldelta:
889 iterrev = e[3]
887 iterrev = e[3]
890 else:
888 else:
891 iterrev -= 1
889 iterrev -= 1
892 e = index[iterrev]
890 e = index[iterrev]
893
891
894 if iterrev == stoprev:
892 if iterrev == stoprev:
895 stopped = True
893 stopped = True
896 else:
894 else:
897 chain.append(iterrev)
895 chain.append(iterrev)
898 stopped = False
896 stopped = False
899
897
900 chain.reverse()
898 chain.reverse()
901 return chain, stopped
899 return chain, stopped
902
900
903 def ancestors(self, revs, stoprev=0, inclusive=False):
901 def ancestors(self, revs, stoprev=0, inclusive=False):
904 """Generate the ancestors of 'revs' in reverse revision order.
902 """Generate the ancestors of 'revs' in reverse revision order.
905 Does not generate revs lower than stoprev.
903 Does not generate revs lower than stoprev.
906
904
907 See the documentation for ancestor.lazyancestors for more details."""
905 See the documentation for ancestor.lazyancestors for more details."""
908
906
909 # first, make sure start revisions aren't filtered
907 # first, make sure start revisions aren't filtered
910 revs = list(revs)
908 revs = list(revs)
911 checkrev = self.node
909 checkrev = self.node
912 for r in revs:
910 for r in revs:
913 checkrev(r)
911 checkrev(r)
914 # and we're sure ancestors aren't filtered as well
912 # and we're sure ancestors aren't filtered as well
915
913
916 if rustancestor is not None:
914 if rustancestor is not None:
917 lazyancestors = rustancestor.LazyAncestors
915 lazyancestors = rustancestor.LazyAncestors
918 arg = self.index
916 arg = self.index
919 else:
917 else:
920 lazyancestors = ancestor.lazyancestors
918 lazyancestors = ancestor.lazyancestors
921 arg = self._uncheckedparentrevs
919 arg = self._uncheckedparentrevs
922 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
920 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
923
921
924 def descendants(self, revs):
922 def descendants(self, revs):
925 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
923 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
926
924
927 def findcommonmissing(self, common=None, heads=None):
925 def findcommonmissing(self, common=None, heads=None):
928 """Return a tuple of the ancestors of common and the ancestors of heads
926 """Return a tuple of the ancestors of common and the ancestors of heads
929 that are not ancestors of common. In revset terminology, we return the
927 that are not ancestors of common. In revset terminology, we return the
930 tuple:
928 tuple:
931
929
932 ::common, (::heads) - (::common)
930 ::common, (::heads) - (::common)
933
931
934 The list is sorted by revision number, meaning it is
932 The list is sorted by revision number, meaning it is
935 topologically sorted.
933 topologically sorted.
936
934
937 'heads' and 'common' are both lists of node IDs. If heads is
935 'heads' and 'common' are both lists of node IDs. If heads is
938 not supplied, uses all of the revlog's heads. If common is not
936 not supplied, uses all of the revlog's heads. If common is not
939 supplied, uses nullid."""
937 supplied, uses nullid."""
940 if common is None:
938 if common is None:
941 common = [self.nullid]
939 common = [self.nullid]
942 if heads is None:
940 if heads is None:
943 heads = self.heads()
941 heads = self.heads()
944
942
945 common = [self.rev(n) for n in common]
943 common = [self.rev(n) for n in common]
946 heads = [self.rev(n) for n in heads]
944 heads = [self.rev(n) for n in heads]
947
945
948 # we want the ancestors, but inclusive
946 # we want the ancestors, but inclusive
949 class lazyset(object):
947 class lazyset(object):
950 def __init__(self, lazyvalues):
948 def __init__(self, lazyvalues):
951 self.addedvalues = set()
949 self.addedvalues = set()
952 self.lazyvalues = lazyvalues
950 self.lazyvalues = lazyvalues
953
951
954 def __contains__(self, value):
952 def __contains__(self, value):
955 return value in self.addedvalues or value in self.lazyvalues
953 return value in self.addedvalues or value in self.lazyvalues
956
954
957 def __iter__(self):
955 def __iter__(self):
958 added = self.addedvalues
956 added = self.addedvalues
959 for r in added:
957 for r in added:
960 yield r
958 yield r
961 for r in self.lazyvalues:
959 for r in self.lazyvalues:
962 if not r in added:
960 if not r in added:
963 yield r
961 yield r
964
962
965 def add(self, value):
963 def add(self, value):
966 self.addedvalues.add(value)
964 self.addedvalues.add(value)
967
965
968 def update(self, values):
966 def update(self, values):
969 self.addedvalues.update(values)
967 self.addedvalues.update(values)
970
968
971 has = lazyset(self.ancestors(common))
969 has = lazyset(self.ancestors(common))
972 has.add(nullrev)
970 has.add(nullrev)
973 has.update(common)
971 has.update(common)
974
972
975 # take all ancestors from heads that aren't in has
973 # take all ancestors from heads that aren't in has
976 missing = set()
974 missing = set()
977 visit = collections.deque(r for r in heads if r not in has)
975 visit = collections.deque(r for r in heads if r not in has)
978 while visit:
976 while visit:
979 r = visit.popleft()
977 r = visit.popleft()
980 if r in missing:
978 if r in missing:
981 continue
979 continue
982 else:
980 else:
983 missing.add(r)
981 missing.add(r)
984 for p in self.parentrevs(r):
982 for p in self.parentrevs(r):
985 if p not in has:
983 if p not in has:
986 visit.append(p)
984 visit.append(p)
987 missing = list(missing)
985 missing = list(missing)
988 missing.sort()
986 missing.sort()
989 return has, [self.node(miss) for miss in missing]
987 return has, [self.node(miss) for miss in missing]
990
988
991 def incrementalmissingrevs(self, common=None):
989 def incrementalmissingrevs(self, common=None):
992 """Return an object that can be used to incrementally compute the
990 """Return an object that can be used to incrementally compute the
993 revision numbers of the ancestors of arbitrary sets that are not
991 revision numbers of the ancestors of arbitrary sets that are not
994 ancestors of common. This is an ancestor.incrementalmissingancestors
992 ancestors of common. This is an ancestor.incrementalmissingancestors
995 object.
993 object.
996
994
997 'common' is a list of revision numbers. If common is not supplied, uses
995 'common' is a list of revision numbers. If common is not supplied, uses
998 nullrev.
996 nullrev.
999 """
997 """
1000 if common is None:
998 if common is None:
1001 common = [nullrev]
999 common = [nullrev]
1002
1000
1003 if rustancestor is not None:
1001 if rustancestor is not None:
1004 return rustancestor.MissingAncestors(self.index, common)
1002 return rustancestor.MissingAncestors(self.index, common)
1005 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1003 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1006
1004
1007 def findmissingrevs(self, common=None, heads=None):
1005 def findmissingrevs(self, common=None, heads=None):
1008 """Return the revision numbers of the ancestors of heads that
1006 """Return the revision numbers of the ancestors of heads that
1009 are not ancestors of common.
1007 are not ancestors of common.
1010
1008
1011 More specifically, return a list of revision numbers corresponding to
1009 More specifically, return a list of revision numbers corresponding to
1012 nodes N such that every N satisfies the following constraints:
1010 nodes N such that every N satisfies the following constraints:
1013
1011
1014 1. N is an ancestor of some node in 'heads'
1012 1. N is an ancestor of some node in 'heads'
1015 2. N is not an ancestor of any node in 'common'
1013 2. N is not an ancestor of any node in 'common'
1016
1014
1017 The list is sorted by revision number, meaning it is
1015 The list is sorted by revision number, meaning it is
1018 topologically sorted.
1016 topologically sorted.
1019
1017
1020 'heads' and 'common' are both lists of revision numbers. If heads is
1018 'heads' and 'common' are both lists of revision numbers. If heads is
1021 not supplied, uses all of the revlog's heads. If common is not
1019 not supplied, uses all of the revlog's heads. If common is not
1022 supplied, uses nullid."""
1020 supplied, uses nullid."""
1023 if common is None:
1021 if common is None:
1024 common = [nullrev]
1022 common = [nullrev]
1025 if heads is None:
1023 if heads is None:
1026 heads = self.headrevs()
1024 heads = self.headrevs()
1027
1025
1028 inc = self.incrementalmissingrevs(common=common)
1026 inc = self.incrementalmissingrevs(common=common)
1029 return inc.missingancestors(heads)
1027 return inc.missingancestors(heads)
1030
1028
1031 def findmissing(self, common=None, heads=None):
1029 def findmissing(self, common=None, heads=None):
1032 """Return the ancestors of heads that are not ancestors of common.
1030 """Return the ancestors of heads that are not ancestors of common.
1033
1031
1034 More specifically, return a list of nodes N such that every N
1032 More specifically, return a list of nodes N such that every N
1035 satisfies the following constraints:
1033 satisfies the following constraints:
1036
1034
1037 1. N is an ancestor of some node in 'heads'
1035 1. N is an ancestor of some node in 'heads'
1038 2. N is not an ancestor of any node in 'common'
1036 2. N is not an ancestor of any node in 'common'
1039
1037
1040 The list is sorted by revision number, meaning it is
1038 The list is sorted by revision number, meaning it is
1041 topologically sorted.
1039 topologically sorted.
1042
1040
1043 'heads' and 'common' are both lists of node IDs. If heads is
1041 'heads' and 'common' are both lists of node IDs. If heads is
1044 not supplied, uses all of the revlog's heads. If common is not
1042 not supplied, uses all of the revlog's heads. If common is not
1045 supplied, uses nullid."""
1043 supplied, uses nullid."""
1046 if common is None:
1044 if common is None:
1047 common = [self.nullid]
1045 common = [self.nullid]
1048 if heads is None:
1046 if heads is None:
1049 heads = self.heads()
1047 heads = self.heads()
1050
1048
1051 common = [self.rev(n) for n in common]
1049 common = [self.rev(n) for n in common]
1052 heads = [self.rev(n) for n in heads]
1050 heads = [self.rev(n) for n in heads]
1053
1051
1054 inc = self.incrementalmissingrevs(common=common)
1052 inc = self.incrementalmissingrevs(common=common)
1055 return [self.node(r) for r in inc.missingancestors(heads)]
1053 return [self.node(r) for r in inc.missingancestors(heads)]
1056
1054
1057 def nodesbetween(self, roots=None, heads=None):
1055 def nodesbetween(self, roots=None, heads=None):
1058 """Return a topological path from 'roots' to 'heads'.
1056 """Return a topological path from 'roots' to 'heads'.
1059
1057
1060 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1058 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1061 topologically sorted list of all nodes N that satisfy both of
1059 topologically sorted list of all nodes N that satisfy both of
1062 these constraints:
1060 these constraints:
1063
1061
1064 1. N is a descendant of some node in 'roots'
1062 1. N is a descendant of some node in 'roots'
1065 2. N is an ancestor of some node in 'heads'
1063 2. N is an ancestor of some node in 'heads'
1066
1064
1067 Every node is considered to be both a descendant and an ancestor
1065 Every node is considered to be both a descendant and an ancestor
1068 of itself, so every reachable node in 'roots' and 'heads' will be
1066 of itself, so every reachable node in 'roots' and 'heads' will be
1069 included in 'nodes'.
1067 included in 'nodes'.
1070
1068
1071 'outroots' is the list of reachable nodes in 'roots', i.e., the
1069 'outroots' is the list of reachable nodes in 'roots', i.e., the
1072 subset of 'roots' that is returned in 'nodes'. Likewise,
1070 subset of 'roots' that is returned in 'nodes'. Likewise,
1073 'outheads' is the subset of 'heads' that is also in 'nodes'.
1071 'outheads' is the subset of 'heads' that is also in 'nodes'.
1074
1072
1075 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1073 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1076 unspecified, uses nullid as the only root. If 'heads' is
1074 unspecified, uses nullid as the only root. If 'heads' is
1077 unspecified, uses list of all of the revlog's heads."""
1075 unspecified, uses list of all of the revlog's heads."""
1078 nonodes = ([], [], [])
1076 nonodes = ([], [], [])
1079 if roots is not None:
1077 if roots is not None:
1080 roots = list(roots)
1078 roots = list(roots)
1081 if not roots:
1079 if not roots:
1082 return nonodes
1080 return nonodes
1083 lowestrev = min([self.rev(n) for n in roots])
1081 lowestrev = min([self.rev(n) for n in roots])
1084 else:
1082 else:
1085 roots = [self.nullid] # Everybody's a descendant of nullid
1083 roots = [self.nullid] # Everybody's a descendant of nullid
1086 lowestrev = nullrev
1084 lowestrev = nullrev
1087 if (lowestrev == nullrev) and (heads is None):
1085 if (lowestrev == nullrev) and (heads is None):
1088 # We want _all_ the nodes!
1086 # We want _all_ the nodes!
1089 return (
1087 return (
1090 [self.node(r) for r in self],
1088 [self.node(r) for r in self],
1091 [self.nullid],
1089 [self.nullid],
1092 list(self.heads()),
1090 list(self.heads()),
1093 )
1091 )
1094 if heads is None:
1092 if heads is None:
1095 # All nodes are ancestors, so the latest ancestor is the last
1093 # All nodes are ancestors, so the latest ancestor is the last
1096 # node.
1094 # node.
1097 highestrev = len(self) - 1
1095 highestrev = len(self) - 1
1098 # Set ancestors to None to signal that every node is an ancestor.
1096 # Set ancestors to None to signal that every node is an ancestor.
1099 ancestors = None
1097 ancestors = None
1100 # Set heads to an empty dictionary for later discovery of heads
1098 # Set heads to an empty dictionary for later discovery of heads
1101 heads = {}
1099 heads = {}
1102 else:
1100 else:
1103 heads = list(heads)
1101 heads = list(heads)
1104 if not heads:
1102 if not heads:
1105 return nonodes
1103 return nonodes
1106 ancestors = set()
1104 ancestors = set()
1107 # Turn heads into a dictionary so we can remove 'fake' heads.
1105 # Turn heads into a dictionary so we can remove 'fake' heads.
1108 # Also, later we will be using it to filter out the heads we can't
1106 # Also, later we will be using it to filter out the heads we can't
1109 # find from roots.
1107 # find from roots.
1110 heads = dict.fromkeys(heads, False)
1108 heads = dict.fromkeys(heads, False)
1111 # Start at the top and keep marking parents until we're done.
1109 # Start at the top and keep marking parents until we're done.
1112 nodestotag = set(heads)
1110 nodestotag = set(heads)
1113 # Remember where the top was so we can use it as a limit later.
1111 # Remember where the top was so we can use it as a limit later.
1114 highestrev = max([self.rev(n) for n in nodestotag])
1112 highestrev = max([self.rev(n) for n in nodestotag])
1115 while nodestotag:
1113 while nodestotag:
1116 # grab a node to tag
1114 # grab a node to tag
1117 n = nodestotag.pop()
1115 n = nodestotag.pop()
1118 # Never tag nullid
1116 # Never tag nullid
1119 if n == self.nullid:
1117 if n == self.nullid:
1120 continue
1118 continue
1121 # A node's revision number represents its place in a
1119 # A node's revision number represents its place in a
1122 # topologically sorted list of nodes.
1120 # topologically sorted list of nodes.
1123 r = self.rev(n)
1121 r = self.rev(n)
1124 if r >= lowestrev:
1122 if r >= lowestrev:
1125 if n not in ancestors:
1123 if n not in ancestors:
1126 # If we are possibly a descendant of one of the roots
1124 # If we are possibly a descendant of one of the roots
1127 # and we haven't already been marked as an ancestor
1125 # and we haven't already been marked as an ancestor
1128 ancestors.add(n) # Mark as ancestor
1126 ancestors.add(n) # Mark as ancestor
1129 # Add non-nullid parents to list of nodes to tag.
1127 # Add non-nullid parents to list of nodes to tag.
1130 nodestotag.update(
1128 nodestotag.update(
1131 [p for p in self.parents(n) if p != self.nullid]
1129 [p for p in self.parents(n) if p != self.nullid]
1132 )
1130 )
1133 elif n in heads: # We've seen it before, is it a fake head?
1131 elif n in heads: # We've seen it before, is it a fake head?
1134 # So it is, real heads should not be the ancestors of
1132 # So it is, real heads should not be the ancestors of
1135 # any other heads.
1133 # any other heads.
1136 heads.pop(n)
1134 heads.pop(n)
1137 if not ancestors:
1135 if not ancestors:
1138 return nonodes
1136 return nonodes
1139 # Now that we have our set of ancestors, we want to remove any
1137 # Now that we have our set of ancestors, we want to remove any
1140 # roots that are not ancestors.
1138 # roots that are not ancestors.
1141
1139
1142 # If one of the roots was nullid, everything is included anyway.
1140 # If one of the roots was nullid, everything is included anyway.
1143 if lowestrev > nullrev:
1141 if lowestrev > nullrev:
1144 # But, since we weren't, let's recompute the lowest rev to not
1142 # But, since we weren't, let's recompute the lowest rev to not
1145 # include roots that aren't ancestors.
1143 # include roots that aren't ancestors.
1146
1144
1147 # Filter out roots that aren't ancestors of heads
1145 # Filter out roots that aren't ancestors of heads
1148 roots = [root for root in roots if root in ancestors]
1146 roots = [root for root in roots if root in ancestors]
1149 # Recompute the lowest revision
1147 # Recompute the lowest revision
1150 if roots:
1148 if roots:
1151 lowestrev = min([self.rev(root) for root in roots])
1149 lowestrev = min([self.rev(root) for root in roots])
1152 else:
1150 else:
1153 # No more roots? Return empty list
1151 # No more roots? Return empty list
1154 return nonodes
1152 return nonodes
1155 else:
1153 else:
1156 # We are descending from nullid, and don't need to care about
1154 # We are descending from nullid, and don't need to care about
1157 # any other roots.
1155 # any other roots.
1158 lowestrev = nullrev
1156 lowestrev = nullrev
1159 roots = [self.nullid]
1157 roots = [self.nullid]
1160 # Transform our roots list into a set.
1158 # Transform our roots list into a set.
1161 descendants = set(roots)
1159 descendants = set(roots)
1162 # Also, keep the original roots so we can filter out roots that aren't
1160 # Also, keep the original roots so we can filter out roots that aren't
1163 # 'real' roots (i.e. are descended from other roots).
1161 # 'real' roots (i.e. are descended from other roots).
1164 roots = descendants.copy()
1162 roots = descendants.copy()
1165 # Our topologically sorted list of output nodes.
1163 # Our topologically sorted list of output nodes.
1166 orderedout = []
1164 orderedout = []
1167 # Don't start at nullid since we don't want nullid in our output list,
1165 # Don't start at nullid since we don't want nullid in our output list,
1168 # and if nullid shows up in descendants, empty parents will look like
1166 # and if nullid shows up in descendants, empty parents will look like
1169 # they're descendants.
1167 # they're descendants.
1170 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1168 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1171 n = self.node(r)
1169 n = self.node(r)
1172 isdescendant = False
1170 isdescendant = False
1173 if lowestrev == nullrev: # Everybody is a descendant of nullid
1171 if lowestrev == nullrev: # Everybody is a descendant of nullid
1174 isdescendant = True
1172 isdescendant = True
1175 elif n in descendants:
1173 elif n in descendants:
1176 # n is already a descendant
1174 # n is already a descendant
1177 isdescendant = True
1175 isdescendant = True
1178 # This check only needs to be done here because all the roots
1176 # This check only needs to be done here because all the roots
1179 # will start being marked is descendants before the loop.
1177 # will start being marked is descendants before the loop.
1180 if n in roots:
1178 if n in roots:
1181 # If n was a root, check if it's a 'real' root.
1179 # If n was a root, check if it's a 'real' root.
1182 p = tuple(self.parents(n))
1180 p = tuple(self.parents(n))
1183 # If any of its parents are descendants, it's not a root.
1181 # If any of its parents are descendants, it's not a root.
1184 if (p[0] in descendants) or (p[1] in descendants):
1182 if (p[0] in descendants) or (p[1] in descendants):
1185 roots.remove(n)
1183 roots.remove(n)
1186 else:
1184 else:
1187 p = tuple(self.parents(n))
1185 p = tuple(self.parents(n))
1188 # A node is a descendant if either of its parents are
1186 # A node is a descendant if either of its parents are
1189 # descendants. (We seeded the dependents list with the roots
1187 # descendants. (We seeded the dependents list with the roots
1190 # up there, remember?)
1188 # up there, remember?)
1191 if (p[0] in descendants) or (p[1] in descendants):
1189 if (p[0] in descendants) or (p[1] in descendants):
1192 descendants.add(n)
1190 descendants.add(n)
1193 isdescendant = True
1191 isdescendant = True
1194 if isdescendant and ((ancestors is None) or (n in ancestors)):
1192 if isdescendant and ((ancestors is None) or (n in ancestors)):
1195 # Only include nodes that are both descendants and ancestors.
1193 # Only include nodes that are both descendants and ancestors.
1196 orderedout.append(n)
1194 orderedout.append(n)
1197 if (ancestors is not None) and (n in heads):
1195 if (ancestors is not None) and (n in heads):
1198 # We're trying to figure out which heads are reachable
1196 # We're trying to figure out which heads are reachable
1199 # from roots.
1197 # from roots.
1200 # Mark this head as having been reached
1198 # Mark this head as having been reached
1201 heads[n] = True
1199 heads[n] = True
1202 elif ancestors is None:
1200 elif ancestors is None:
1203 # Otherwise, we're trying to discover the heads.
1201 # Otherwise, we're trying to discover the heads.
1204 # Assume this is a head because if it isn't, the next step
1202 # Assume this is a head because if it isn't, the next step
1205 # will eventually remove it.
1203 # will eventually remove it.
1206 heads[n] = True
1204 heads[n] = True
1207 # But, obviously its parents aren't.
1205 # But, obviously its parents aren't.
1208 for p in self.parents(n):
1206 for p in self.parents(n):
1209 heads.pop(p, None)
1207 heads.pop(p, None)
1210 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1208 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1211 roots = list(roots)
1209 roots = list(roots)
1212 assert orderedout
1210 assert orderedout
1213 assert roots
1211 assert roots
1214 assert heads
1212 assert heads
1215 return (orderedout, roots, heads)
1213 return (orderedout, roots, heads)
1216
1214
1217 def headrevs(self, revs=None):
1215 def headrevs(self, revs=None):
1218 if revs is None:
1216 if revs is None:
1219 try:
1217 try:
1220 return self.index.headrevs()
1218 return self.index.headrevs()
1221 except AttributeError:
1219 except AttributeError:
1222 return self._headrevs()
1220 return self._headrevs()
1223 if rustdagop is not None:
1221 if rustdagop is not None:
1224 return rustdagop.headrevs(self.index, revs)
1222 return rustdagop.headrevs(self.index, revs)
1225 return dagop.headrevs(revs, self._uncheckedparentrevs)
1223 return dagop.headrevs(revs, self._uncheckedparentrevs)
1226
1224
1227 def computephases(self, roots):
1225 def computephases(self, roots):
1228 return self.index.computephasesmapsets(roots)
1226 return self.index.computephasesmapsets(roots)
1229
1227
1230 def _headrevs(self):
1228 def _headrevs(self):
1231 count = len(self)
1229 count = len(self)
1232 if not count:
1230 if not count:
1233 return [nullrev]
1231 return [nullrev]
1234 # we won't iter over filtered rev so nobody is a head at start
1232 # we won't iter over filtered rev so nobody is a head at start
1235 ishead = [0] * (count + 1)
1233 ishead = [0] * (count + 1)
1236 index = self.index
1234 index = self.index
1237 for r in self:
1235 for r in self:
1238 ishead[r] = 1 # I may be an head
1236 ishead[r] = 1 # I may be an head
1239 e = index[r]
1237 e = index[r]
1240 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1238 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1241 return [r for r, val in enumerate(ishead) if val]
1239 return [r for r, val in enumerate(ishead) if val]
1242
1240
1243 def heads(self, start=None, stop=None):
1241 def heads(self, start=None, stop=None):
1244 """return the list of all nodes that have no children
1242 """return the list of all nodes that have no children
1245
1243
1246 if start is specified, only heads that are descendants of
1244 if start is specified, only heads that are descendants of
1247 start will be returned
1245 start will be returned
1248 if stop is specified, it will consider all the revs from stop
1246 if stop is specified, it will consider all the revs from stop
1249 as if they had no children
1247 as if they had no children
1250 """
1248 """
1251 if start is None and stop is None:
1249 if start is None and stop is None:
1252 if not len(self):
1250 if not len(self):
1253 return [self.nullid]
1251 return [self.nullid]
1254 return [self.node(r) for r in self.headrevs()]
1252 return [self.node(r) for r in self.headrevs()]
1255
1253
1256 if start is None:
1254 if start is None:
1257 start = nullrev
1255 start = nullrev
1258 else:
1256 else:
1259 start = self.rev(start)
1257 start = self.rev(start)
1260
1258
1261 stoprevs = {self.rev(n) for n in stop or []}
1259 stoprevs = {self.rev(n) for n in stop or []}
1262
1260
1263 revs = dagop.headrevssubset(
1261 revs = dagop.headrevssubset(
1264 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1262 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1265 )
1263 )
1266
1264
1267 return [self.node(rev) for rev in revs]
1265 return [self.node(rev) for rev in revs]
1268
1266
1269 def children(self, node):
1267 def children(self, node):
1270 """find the children of a given node"""
1268 """find the children of a given node"""
1271 c = []
1269 c = []
1272 p = self.rev(node)
1270 p = self.rev(node)
1273 for r in self.revs(start=p + 1):
1271 for r in self.revs(start=p + 1):
1274 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1272 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1275 if prevs:
1273 if prevs:
1276 for pr in prevs:
1274 for pr in prevs:
1277 if pr == p:
1275 if pr == p:
1278 c.append(self.node(r))
1276 c.append(self.node(r))
1279 elif p == nullrev:
1277 elif p == nullrev:
1280 c.append(self.node(r))
1278 c.append(self.node(r))
1281 return c
1279 return c
1282
1280
1283 def commonancestorsheads(self, a, b):
1281 def commonancestorsheads(self, a, b):
1284 """calculate all the heads of the common ancestors of nodes a and b"""
1282 """calculate all the heads of the common ancestors of nodes a and b"""
1285 a, b = self.rev(a), self.rev(b)
1283 a, b = self.rev(a), self.rev(b)
1286 ancs = self._commonancestorsheads(a, b)
1284 ancs = self._commonancestorsheads(a, b)
1287 return pycompat.maplist(self.node, ancs)
1285 return pycompat.maplist(self.node, ancs)
1288
1286
1289 def _commonancestorsheads(self, *revs):
1287 def _commonancestorsheads(self, *revs):
1290 """calculate all the heads of the common ancestors of revs"""
1288 """calculate all the heads of the common ancestors of revs"""
1291 try:
1289 try:
1292 ancs = self.index.commonancestorsheads(*revs)
1290 ancs = self.index.commonancestorsheads(*revs)
1293 except (AttributeError, OverflowError): # C implementation failed
1291 except (AttributeError, OverflowError): # C implementation failed
1294 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1292 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1295 return ancs
1293 return ancs
1296
1294
1297 def isancestor(self, a, b):
1295 def isancestor(self, a, b):
1298 """return True if node a is an ancestor of node b
1296 """return True if node a is an ancestor of node b
1299
1297
1300 A revision is considered an ancestor of itself."""
1298 A revision is considered an ancestor of itself."""
1301 a, b = self.rev(a), self.rev(b)
1299 a, b = self.rev(a), self.rev(b)
1302 return self.isancestorrev(a, b)
1300 return self.isancestorrev(a, b)
1303
1301
1304 def isancestorrev(self, a, b):
1302 def isancestorrev(self, a, b):
1305 """return True if revision a is an ancestor of revision b
1303 """return True if revision a is an ancestor of revision b
1306
1304
1307 A revision is considered an ancestor of itself.
1305 A revision is considered an ancestor of itself.
1308
1306
1309 The implementation of this is trivial but the use of
1307 The implementation of this is trivial but the use of
1310 reachableroots is not."""
1308 reachableroots is not."""
1311 if a == nullrev:
1309 if a == nullrev:
1312 return True
1310 return True
1313 elif a == b:
1311 elif a == b:
1314 return True
1312 return True
1315 elif a > b:
1313 elif a > b:
1316 return False
1314 return False
1317 return bool(self.reachableroots(a, [b], [a], includepath=False))
1315 return bool(self.reachableroots(a, [b], [a], includepath=False))
1318
1316
1319 def reachableroots(self, minroot, heads, roots, includepath=False):
1317 def reachableroots(self, minroot, heads, roots, includepath=False):
1320 """return (heads(::(<roots> and <roots>::<heads>)))
1318 """return (heads(::(<roots> and <roots>::<heads>)))
1321
1319
1322 If includepath is True, return (<roots>::<heads>)."""
1320 If includepath is True, return (<roots>::<heads>)."""
1323 try:
1321 try:
1324 return self.index.reachableroots2(
1322 return self.index.reachableroots2(
1325 minroot, heads, roots, includepath
1323 minroot, heads, roots, includepath
1326 )
1324 )
1327 except AttributeError:
1325 except AttributeError:
1328 return dagop._reachablerootspure(
1326 return dagop._reachablerootspure(
1329 self.parentrevs, minroot, roots, heads, includepath
1327 self.parentrevs, minroot, roots, heads, includepath
1330 )
1328 )
1331
1329
1332 def ancestor(self, a, b):
1330 def ancestor(self, a, b):
1333 """calculate the "best" common ancestor of nodes a and b"""
1331 """calculate the "best" common ancestor of nodes a and b"""
1334
1332
1335 a, b = self.rev(a), self.rev(b)
1333 a, b = self.rev(a), self.rev(b)
1336 try:
1334 try:
1337 ancs = self.index.ancestors(a, b)
1335 ancs = self.index.ancestors(a, b)
1338 except (AttributeError, OverflowError):
1336 except (AttributeError, OverflowError):
1339 ancs = ancestor.ancestors(self.parentrevs, a, b)
1337 ancs = ancestor.ancestors(self.parentrevs, a, b)
1340 if ancs:
1338 if ancs:
1341 # choose a consistent winner when there's a tie
1339 # choose a consistent winner when there's a tie
1342 return min(map(self.node, ancs))
1340 return min(map(self.node, ancs))
1343 return self.nullid
1341 return self.nullid
1344
1342
1345 def _match(self, id):
1343 def _match(self, id):
1346 if isinstance(id, int):
1344 if isinstance(id, int):
1347 # rev
1345 # rev
1348 return self.node(id)
1346 return self.node(id)
1349 if len(id) == self.nodeconstants.nodelen:
1347 if len(id) == self.nodeconstants.nodelen:
1350 # possibly a binary node
1348 # possibly a binary node
1351 # odds of a binary node being all hex in ASCII are 1 in 10**25
1349 # odds of a binary node being all hex in ASCII are 1 in 10**25
1352 try:
1350 try:
1353 node = id
1351 node = id
1354 self.rev(node) # quick search the index
1352 self.rev(node) # quick search the index
1355 return node
1353 return node
1356 except error.LookupError:
1354 except error.LookupError:
1357 pass # may be partial hex id
1355 pass # may be partial hex id
1358 try:
1356 try:
1359 # str(rev)
1357 # str(rev)
1360 rev = int(id)
1358 rev = int(id)
1361 if b"%d" % rev != id:
1359 if b"%d" % rev != id:
1362 raise ValueError
1360 raise ValueError
1363 if rev < 0:
1361 if rev < 0:
1364 rev = len(self) + rev
1362 rev = len(self) + rev
1365 if rev < 0 or rev >= len(self):
1363 if rev < 0 or rev >= len(self):
1366 raise ValueError
1364 raise ValueError
1367 return self.node(rev)
1365 return self.node(rev)
1368 except (ValueError, OverflowError):
1366 except (ValueError, OverflowError):
1369 pass
1367 pass
1370 if len(id) == 2 * self.nodeconstants.nodelen:
1368 if len(id) == 2 * self.nodeconstants.nodelen:
1371 try:
1369 try:
1372 # a full hex nodeid?
1370 # a full hex nodeid?
1373 node = bin(id)
1371 node = bin(id)
1374 self.rev(node)
1372 self.rev(node)
1375 return node
1373 return node
1376 except (TypeError, error.LookupError):
1374 except (TypeError, error.LookupError):
1377 pass
1375 pass
1378
1376
1379 def _partialmatch(self, id):
1377 def _partialmatch(self, id):
1380 # we don't care wdirfilenodeids as they should be always full hash
1378 # we don't care wdirfilenodeids as they should be always full hash
1381 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1379 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1382 try:
1380 try:
1383 partial = self.index.partialmatch(id)
1381 partial = self.index.partialmatch(id)
1384 if partial and self.hasnode(partial):
1382 if partial and self.hasnode(partial):
1385 if maybewdir:
1383 if maybewdir:
1386 # single 'ff...' match in radix tree, ambiguous with wdir
1384 # single 'ff...' match in radix tree, ambiguous with wdir
1387 raise error.RevlogError
1385 raise error.RevlogError
1388 return partial
1386 return partial
1389 if maybewdir:
1387 if maybewdir:
1390 # no 'ff...' match in radix tree, wdir identified
1388 # no 'ff...' match in radix tree, wdir identified
1391 raise error.WdirUnsupported
1389 raise error.WdirUnsupported
1392 return None
1390 return None
1393 except error.RevlogError:
1391 except error.RevlogError:
1394 # parsers.c radix tree lookup gave multiple matches
1392 # parsers.c radix tree lookup gave multiple matches
1395 # fast path: for unfiltered changelog, radix tree is accurate
1393 # fast path: for unfiltered changelog, radix tree is accurate
1396 if not getattr(self, 'filteredrevs', None):
1394 if not getattr(self, 'filteredrevs', None):
1397 raise error.AmbiguousPrefixLookupError(
1395 raise error.AmbiguousPrefixLookupError(
1398 id, self._indexfile, _(b'ambiguous identifier')
1396 id, self._indexfile, _(b'ambiguous identifier')
1399 )
1397 )
1400 # fall through to slow path that filters hidden revisions
1398 # fall through to slow path that filters hidden revisions
1401 except (AttributeError, ValueError):
1399 except (AttributeError, ValueError):
1402 # we are pure python, or key was too short to search radix tree
1400 # we are pure python, or key was too short to search radix tree
1403 pass
1401 pass
1404
1402
1405 if id in self._pcache:
1403 if id in self._pcache:
1406 return self._pcache[id]
1404 return self._pcache[id]
1407
1405
1408 if len(id) <= 40:
1406 if len(id) <= 40:
1409 try:
1407 try:
1410 # hex(node)[:...]
1408 # hex(node)[:...]
1411 l = len(id) // 2 # grab an even number of digits
1409 l = len(id) // 2 # grab an even number of digits
1412 prefix = bin(id[: l * 2])
1410 prefix = bin(id[: l * 2])
1413 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1411 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1414 nl = [
1412 nl = [
1415 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1413 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1416 ]
1414 ]
1417 if self.nodeconstants.nullhex.startswith(id):
1415 if self.nodeconstants.nullhex.startswith(id):
1418 nl.append(self.nullid)
1416 nl.append(self.nullid)
1419 if len(nl) > 0:
1417 if len(nl) > 0:
1420 if len(nl) == 1 and not maybewdir:
1418 if len(nl) == 1 and not maybewdir:
1421 self._pcache[id] = nl[0]
1419 self._pcache[id] = nl[0]
1422 return nl[0]
1420 return nl[0]
1423 raise error.AmbiguousPrefixLookupError(
1421 raise error.AmbiguousPrefixLookupError(
1424 id, self._indexfile, _(b'ambiguous identifier')
1422 id, self._indexfile, _(b'ambiguous identifier')
1425 )
1423 )
1426 if maybewdir:
1424 if maybewdir:
1427 raise error.WdirUnsupported
1425 raise error.WdirUnsupported
1428 return None
1426 return None
1429 except TypeError:
1427 except TypeError:
1430 pass
1428 pass
1431
1429
1432 def lookup(self, id):
1430 def lookup(self, id):
1433 """locate a node based on:
1431 """locate a node based on:
1434 - revision number or str(revision number)
1432 - revision number or str(revision number)
1435 - nodeid or subset of hex nodeid
1433 - nodeid or subset of hex nodeid
1436 """
1434 """
1437 n = self._match(id)
1435 n = self._match(id)
1438 if n is not None:
1436 if n is not None:
1439 return n
1437 return n
1440 n = self._partialmatch(id)
1438 n = self._partialmatch(id)
1441 if n:
1439 if n:
1442 return n
1440 return n
1443
1441
1444 raise error.LookupError(id, self._indexfile, _(b'no match found'))
1442 raise error.LookupError(id, self._indexfile, _(b'no match found'))
1445
1443
1446 def shortest(self, node, minlength=1):
1444 def shortest(self, node, minlength=1):
1447 """Find the shortest unambiguous prefix that matches node."""
1445 """Find the shortest unambiguous prefix that matches node."""
1448
1446
1449 def isvalid(prefix):
1447 def isvalid(prefix):
1450 try:
1448 try:
1451 matchednode = self._partialmatch(prefix)
1449 matchednode = self._partialmatch(prefix)
1452 except error.AmbiguousPrefixLookupError:
1450 except error.AmbiguousPrefixLookupError:
1453 return False
1451 return False
1454 except error.WdirUnsupported:
1452 except error.WdirUnsupported:
1455 # single 'ff...' match
1453 # single 'ff...' match
1456 return True
1454 return True
1457 if matchednode is None:
1455 if matchednode is None:
1458 raise error.LookupError(node, self._indexfile, _(b'no node'))
1456 raise error.LookupError(node, self._indexfile, _(b'no node'))
1459 return True
1457 return True
1460
1458
1461 def maybewdir(prefix):
1459 def maybewdir(prefix):
1462 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1460 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1463
1461
1464 hexnode = hex(node)
1462 hexnode = hex(node)
1465
1463
1466 def disambiguate(hexnode, minlength):
1464 def disambiguate(hexnode, minlength):
1467 """Disambiguate against wdirid."""
1465 """Disambiguate against wdirid."""
1468 for length in range(minlength, len(hexnode) + 1):
1466 for length in range(minlength, len(hexnode) + 1):
1469 prefix = hexnode[:length]
1467 prefix = hexnode[:length]
1470 if not maybewdir(prefix):
1468 if not maybewdir(prefix):
1471 return prefix
1469 return prefix
1472
1470
1473 if not getattr(self, 'filteredrevs', None):
1471 if not getattr(self, 'filteredrevs', None):
1474 try:
1472 try:
1475 length = max(self.index.shortest(node), minlength)
1473 length = max(self.index.shortest(node), minlength)
1476 return disambiguate(hexnode, length)
1474 return disambiguate(hexnode, length)
1477 except error.RevlogError:
1475 except error.RevlogError:
1478 if node != self.nodeconstants.wdirid:
1476 if node != self.nodeconstants.wdirid:
1479 raise error.LookupError(
1477 raise error.LookupError(
1480 node, self._indexfile, _(b'no node')
1478 node, self._indexfile, _(b'no node')
1481 )
1479 )
1482 except AttributeError:
1480 except AttributeError:
1483 # Fall through to pure code
1481 # Fall through to pure code
1484 pass
1482 pass
1485
1483
1486 if node == self.nodeconstants.wdirid:
1484 if node == self.nodeconstants.wdirid:
1487 for length in range(minlength, len(hexnode) + 1):
1485 for length in range(minlength, len(hexnode) + 1):
1488 prefix = hexnode[:length]
1486 prefix = hexnode[:length]
1489 if isvalid(prefix):
1487 if isvalid(prefix):
1490 return prefix
1488 return prefix
1491
1489
1492 for length in range(minlength, len(hexnode) + 1):
1490 for length in range(minlength, len(hexnode) + 1):
1493 prefix = hexnode[:length]
1491 prefix = hexnode[:length]
1494 if isvalid(prefix):
1492 if isvalid(prefix):
1495 return disambiguate(hexnode, length)
1493 return disambiguate(hexnode, length)
1496
1494
1497 def cmp(self, node, text):
1495 def cmp(self, node, text):
1498 """compare text with a given file revision
1496 """compare text with a given file revision
1499
1497
1500 returns True if text is different than what is stored.
1498 returns True if text is different than what is stored.
1501 """
1499 """
1502 p1, p2 = self.parents(node)
1500 p1, p2 = self.parents(node)
1503 return storageutil.hashrevisionsha1(text, p1, p2) != node
1501 return storageutil.hashrevisionsha1(text, p1, p2) != node
1504
1502
1505 def _cachesegment(self, offset, data):
1503 def _cachesegment(self, offset, data):
1506 """Add a segment to the revlog cache.
1504 """Add a segment to the revlog cache.
1507
1505
1508 Accepts an absolute offset and the data that is at that location.
1506 Accepts an absolute offset and the data that is at that location.
1509 """
1507 """
1510 o, d = self._chunkcache
1508 o, d = self._chunkcache
1511 # try to add to existing cache
1509 # try to add to existing cache
1512 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1510 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1513 self._chunkcache = o, d + data
1511 self._chunkcache = o, d + data
1514 else:
1512 else:
1515 self._chunkcache = offset, data
1513 self._chunkcache = offset, data
1516
1514
1517 def _readsegment(self, offset, length, df=None):
1515 def _readsegment(self, offset, length, df=None):
1518 """Load a segment of raw data from the revlog.
1516 """Load a segment of raw data from the revlog.
1519
1517
1520 Accepts an absolute offset, length to read, and an optional existing
1518 Accepts an absolute offset, length to read, and an optional existing
1521 file handle to read from.
1519 file handle to read from.
1522
1520
1523 If an existing file handle is passed, it will be seeked and the
1521 If an existing file handle is passed, it will be seeked and the
1524 original seek position will NOT be restored.
1522 original seek position will NOT be restored.
1525
1523
1526 Returns a str or buffer of raw byte data.
1524 Returns a str or buffer of raw byte data.
1527
1525
1528 Raises if the requested number of bytes could not be read.
1526 Raises if the requested number of bytes could not be read.
1529 """
1527 """
1530 # Cache data both forward and backward around the requested
1528 # Cache data both forward and backward around the requested
1531 # data, in a fixed size window. This helps speed up operations
1529 # data, in a fixed size window. This helps speed up operations
1532 # involving reading the revlog backwards.
1530 # involving reading the revlog backwards.
1533 cachesize = self._chunkcachesize
1531 cachesize = self._chunkcachesize
1534 realoffset = offset & ~(cachesize - 1)
1532 realoffset = offset & ~(cachesize - 1)
1535 reallength = (
1533 reallength = (
1536 (offset + length + cachesize) & ~(cachesize - 1)
1534 (offset + length + cachesize) & ~(cachesize - 1)
1537 ) - realoffset
1535 ) - realoffset
1538 with self._datareadfp(df) as df:
1536 with self._datareadfp(df) as df:
1539 df.seek(realoffset)
1537 df.seek(realoffset)
1540 d = df.read(reallength)
1538 d = df.read(reallength)
1541
1539
1542 self._cachesegment(realoffset, d)
1540 self._cachesegment(realoffset, d)
1543 if offset != realoffset or reallength != length:
1541 if offset != realoffset or reallength != length:
1544 startoffset = offset - realoffset
1542 startoffset = offset - realoffset
1545 if len(d) - startoffset < length:
1543 if len(d) - startoffset < length:
1546 raise error.RevlogError(
1544 raise error.RevlogError(
1547 _(
1545 _(
1548 b'partial read of revlog %s; expected %d bytes from '
1546 b'partial read of revlog %s; expected %d bytes from '
1549 b'offset %d, got %d'
1547 b'offset %d, got %d'
1550 )
1548 )
1551 % (
1549 % (
1552 self._indexfile if self._inline else self._datafile,
1550 self._indexfile if self._inline else self._datafile,
1553 length,
1551 length,
1554 realoffset,
1552 realoffset,
1555 len(d) - startoffset,
1553 len(d) - startoffset,
1556 )
1554 )
1557 )
1555 )
1558
1556
1559 return util.buffer(d, startoffset, length)
1557 return util.buffer(d, startoffset, length)
1560
1558
1561 if len(d) < length:
1559 if len(d) < length:
1562 raise error.RevlogError(
1560 raise error.RevlogError(
1563 _(
1561 _(
1564 b'partial read of revlog %s; expected %d bytes from offset '
1562 b'partial read of revlog %s; expected %d bytes from offset '
1565 b'%d, got %d'
1563 b'%d, got %d'
1566 )
1564 )
1567 % (
1565 % (
1568 self._indexfile if self._inline else self._datafile,
1566 self._indexfile if self._inline else self._datafile,
1569 length,
1567 length,
1570 offset,
1568 offset,
1571 len(d),
1569 len(d),
1572 )
1570 )
1573 )
1571 )
1574
1572
1575 return d
1573 return d
1576
1574
1577 def _getsegment(self, offset, length, df=None):
1575 def _getsegment(self, offset, length, df=None):
1578 """Obtain a segment of raw data from the revlog.
1576 """Obtain a segment of raw data from the revlog.
1579
1577
1580 Accepts an absolute offset, length of bytes to obtain, and an
1578 Accepts an absolute offset, length of bytes to obtain, and an
1581 optional file handle to the already-opened revlog. If the file
1579 optional file handle to the already-opened revlog. If the file
1582 handle is used, it's original seek position will not be preserved.
1580 handle is used, it's original seek position will not be preserved.
1583
1581
1584 Requests for data may be returned from a cache.
1582 Requests for data may be returned from a cache.
1585
1583
1586 Returns a str or a buffer instance of raw byte data.
1584 Returns a str or a buffer instance of raw byte data.
1587 """
1585 """
1588 o, d = self._chunkcache
1586 o, d = self._chunkcache
1589 l = len(d)
1587 l = len(d)
1590
1588
1591 # is it in the cache?
1589 # is it in the cache?
1592 cachestart = offset - o
1590 cachestart = offset - o
1593 cacheend = cachestart + length
1591 cacheend = cachestart + length
1594 if cachestart >= 0 and cacheend <= l:
1592 if cachestart >= 0 and cacheend <= l:
1595 if cachestart == 0 and cacheend == l:
1593 if cachestart == 0 and cacheend == l:
1596 return d # avoid a copy
1594 return d # avoid a copy
1597 return util.buffer(d, cachestart, cacheend - cachestart)
1595 return util.buffer(d, cachestart, cacheend - cachestart)
1598
1596
1599 return self._readsegment(offset, length, df=df)
1597 return self._readsegment(offset, length, df=df)
1600
1598
1601 def _getsegmentforrevs(self, startrev, endrev, df=None):
1599 def _getsegmentforrevs(self, startrev, endrev, df=None):
1602 """Obtain a segment of raw data corresponding to a range of revisions.
1600 """Obtain a segment of raw data corresponding to a range of revisions.
1603
1601
1604 Accepts the start and end revisions and an optional already-open
1602 Accepts the start and end revisions and an optional already-open
1605 file handle to be used for reading. If the file handle is read, its
1603 file handle to be used for reading. If the file handle is read, its
1606 seek position will not be preserved.
1604 seek position will not be preserved.
1607
1605
1608 Requests for data may be satisfied by a cache.
1606 Requests for data may be satisfied by a cache.
1609
1607
1610 Returns a 2-tuple of (offset, data) for the requested range of
1608 Returns a 2-tuple of (offset, data) for the requested range of
1611 revisions. Offset is the integer offset from the beginning of the
1609 revisions. Offset is the integer offset from the beginning of the
1612 revlog and data is a str or buffer of the raw byte data.
1610 revlog and data is a str or buffer of the raw byte data.
1613
1611
1614 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1612 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1615 to determine where each revision's data begins and ends.
1613 to determine where each revision's data begins and ends.
1616 """
1614 """
1617 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1615 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1618 # (functions are expensive).
1616 # (functions are expensive).
1619 index = self.index
1617 index = self.index
1620 istart = index[startrev]
1618 istart = index[startrev]
1621 start = int(istart[0] >> 16)
1619 start = int(istart[0] >> 16)
1622 if startrev == endrev:
1620 if startrev == endrev:
1623 end = start + istart[1]
1621 end = start + istart[1]
1624 else:
1622 else:
1625 iend = index[endrev]
1623 iend = index[endrev]
1626 end = int(iend[0] >> 16) + iend[1]
1624 end = int(iend[0] >> 16) + iend[1]
1627
1625
1628 if self._inline:
1626 if self._inline:
1629 start += (startrev + 1) * self.index.entry_size
1627 start += (startrev + 1) * self.index.entry_size
1630 end += (endrev + 1) * self.index.entry_size
1628 end += (endrev + 1) * self.index.entry_size
1631 length = end - start
1629 length = end - start
1632
1630
1633 return start, self._getsegment(start, length, df=df)
1631 return start, self._getsegment(start, length, df=df)
1634
1632
1635 def _chunk(self, rev, df=None):
1633 def _chunk(self, rev, df=None):
1636 """Obtain a single decompressed chunk for a revision.
1634 """Obtain a single decompressed chunk for a revision.
1637
1635
1638 Accepts an integer revision and an optional already-open file handle
1636 Accepts an integer revision and an optional already-open file handle
1639 to be used for reading. If used, the seek position of the file will not
1637 to be used for reading. If used, the seek position of the file will not
1640 be preserved.
1638 be preserved.
1641
1639
1642 Returns a str holding uncompressed data for the requested revision.
1640 Returns a str holding uncompressed data for the requested revision.
1643 """
1641 """
1644 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1642 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1645
1643
1646 def _chunks(self, revs, df=None, targetsize=None):
1644 def _chunks(self, revs, df=None, targetsize=None):
1647 """Obtain decompressed chunks for the specified revisions.
1645 """Obtain decompressed chunks for the specified revisions.
1648
1646
1649 Accepts an iterable of numeric revisions that are assumed to be in
1647 Accepts an iterable of numeric revisions that are assumed to be in
1650 ascending order. Also accepts an optional already-open file handle
1648 ascending order. Also accepts an optional already-open file handle
1651 to be used for reading. If used, the seek position of the file will
1649 to be used for reading. If used, the seek position of the file will
1652 not be preserved.
1650 not be preserved.
1653
1651
1654 This function is similar to calling ``self._chunk()`` multiple times,
1652 This function is similar to calling ``self._chunk()`` multiple times,
1655 but is faster.
1653 but is faster.
1656
1654
1657 Returns a list with decompressed data for each requested revision.
1655 Returns a list with decompressed data for each requested revision.
1658 """
1656 """
1659 if not revs:
1657 if not revs:
1660 return []
1658 return []
1661 start = self.start
1659 start = self.start
1662 length = self.length
1660 length = self.length
1663 inline = self._inline
1661 inline = self._inline
1664 iosize = self.index.entry_size
1662 iosize = self.index.entry_size
1665 buffer = util.buffer
1663 buffer = util.buffer
1666
1664
1667 l = []
1665 l = []
1668 ladd = l.append
1666 ladd = l.append
1669
1667
1670 if not self._withsparseread:
1668 if not self._withsparseread:
1671 slicedchunks = (revs,)
1669 slicedchunks = (revs,)
1672 else:
1670 else:
1673 slicedchunks = deltautil.slicechunk(
1671 slicedchunks = deltautil.slicechunk(
1674 self, revs, targetsize=targetsize
1672 self, revs, targetsize=targetsize
1675 )
1673 )
1676
1674
1677 for revschunk in slicedchunks:
1675 for revschunk in slicedchunks:
1678 firstrev = revschunk[0]
1676 firstrev = revschunk[0]
1679 # Skip trailing revisions with empty diff
1677 # Skip trailing revisions with empty diff
1680 for lastrev in revschunk[::-1]:
1678 for lastrev in revschunk[::-1]:
1681 if length(lastrev) != 0:
1679 if length(lastrev) != 0:
1682 break
1680 break
1683
1681
1684 try:
1682 try:
1685 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1683 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1686 except OverflowError:
1684 except OverflowError:
1687 # issue4215 - we can't cache a run of chunks greater than
1685 # issue4215 - we can't cache a run of chunks greater than
1688 # 2G on Windows
1686 # 2G on Windows
1689 return [self._chunk(rev, df=df) for rev in revschunk]
1687 return [self._chunk(rev, df=df) for rev in revschunk]
1690
1688
1691 decomp = self.decompress
1689 decomp = self.decompress
1692 for rev in revschunk:
1690 for rev in revschunk:
1693 chunkstart = start(rev)
1691 chunkstart = start(rev)
1694 if inline:
1692 if inline:
1695 chunkstart += (rev + 1) * iosize
1693 chunkstart += (rev + 1) * iosize
1696 chunklength = length(rev)
1694 chunklength = length(rev)
1697 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1695 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1698
1696
1699 return l
1697 return l
1700
1698
1701 def _chunkclear(self):
1699 def _chunkclear(self):
1702 """Clear the raw chunk cache."""
1700 """Clear the raw chunk cache."""
1703 self._chunkcache = (0, b'')
1701 self._chunkcache = (0, b'')
1704
1702
1705 def deltaparent(self, rev):
1703 def deltaparent(self, rev):
1706 """return deltaparent of the given revision"""
1704 """return deltaparent of the given revision"""
1707 base = self.index[rev][3]
1705 base = self.index[rev][3]
1708 if base == rev:
1706 if base == rev:
1709 return nullrev
1707 return nullrev
1710 elif self._generaldelta:
1708 elif self._generaldelta:
1711 return base
1709 return base
1712 else:
1710 else:
1713 return rev - 1
1711 return rev - 1
1714
1712
1715 def issnapshot(self, rev):
1713 def issnapshot(self, rev):
1716 """tells whether rev is a snapshot"""
1714 """tells whether rev is a snapshot"""
1717 if not self._sparserevlog:
1715 if not self._sparserevlog:
1718 return self.deltaparent(rev) == nullrev
1716 return self.deltaparent(rev) == nullrev
1719 elif util.safehasattr(self.index, b'issnapshot'):
1717 elif util.safehasattr(self.index, b'issnapshot'):
1720 # directly assign the method to cache the testing and access
1718 # directly assign the method to cache the testing and access
1721 self.issnapshot = self.index.issnapshot
1719 self.issnapshot = self.index.issnapshot
1722 return self.issnapshot(rev)
1720 return self.issnapshot(rev)
1723 if rev == nullrev:
1721 if rev == nullrev:
1724 return True
1722 return True
1725 entry = self.index[rev]
1723 entry = self.index[rev]
1726 base = entry[3]
1724 base = entry[3]
1727 if base == rev:
1725 if base == rev:
1728 return True
1726 return True
1729 if base == nullrev:
1727 if base == nullrev:
1730 return True
1728 return True
1731 p1 = entry[5]
1729 p1 = entry[5]
1732 p2 = entry[6]
1730 p2 = entry[6]
1733 if base == p1 or base == p2:
1731 if base == p1 or base == p2:
1734 return False
1732 return False
1735 return self.issnapshot(base)
1733 return self.issnapshot(base)
1736
1734
1737 def snapshotdepth(self, rev):
1735 def snapshotdepth(self, rev):
1738 """number of snapshot in the chain before this one"""
1736 """number of snapshot in the chain before this one"""
1739 if not self.issnapshot(rev):
1737 if not self.issnapshot(rev):
1740 raise error.ProgrammingError(b'revision %d not a snapshot')
1738 raise error.ProgrammingError(b'revision %d not a snapshot')
1741 return len(self._deltachain(rev)[0]) - 1
1739 return len(self._deltachain(rev)[0]) - 1
1742
1740
1743 def revdiff(self, rev1, rev2):
1741 def revdiff(self, rev1, rev2):
1744 """return or calculate a delta between two revisions
1742 """return or calculate a delta between two revisions
1745
1743
1746 The delta calculated is in binary form and is intended to be written to
1744 The delta calculated is in binary form and is intended to be written to
1747 revlog data directly. So this function needs raw revision data.
1745 revlog data directly. So this function needs raw revision data.
1748 """
1746 """
1749 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1747 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1750 return bytes(self._chunk(rev2))
1748 return bytes(self._chunk(rev2))
1751
1749
1752 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1750 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1753
1751
1754 def _processflags(self, text, flags, operation, raw=False):
1752 def _processflags(self, text, flags, operation, raw=False):
1755 """deprecated entry point to access flag processors"""
1753 """deprecated entry point to access flag processors"""
1756 msg = b'_processflag(...) use the specialized variant'
1754 msg = b'_processflag(...) use the specialized variant'
1757 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1755 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1758 if raw:
1756 if raw:
1759 return text, flagutil.processflagsraw(self, text, flags)
1757 return text, flagutil.processflagsraw(self, text, flags)
1760 elif operation == b'read':
1758 elif operation == b'read':
1761 return flagutil.processflagsread(self, text, flags)
1759 return flagutil.processflagsread(self, text, flags)
1762 else: # write operation
1760 else: # write operation
1763 return flagutil.processflagswrite(self, text, flags)
1761 return flagutil.processflagswrite(self, text, flags)
1764
1762
1765 def revision(self, nodeorrev, _df=None, raw=False):
1763 def revision(self, nodeorrev, _df=None, raw=False):
1766 """return an uncompressed revision of a given node or revision
1764 """return an uncompressed revision of a given node or revision
1767 number.
1765 number.
1768
1766
1769 _df - an existing file handle to read from. (internal-only)
1767 _df - an existing file handle to read from. (internal-only)
1770 raw - an optional argument specifying if the revision data is to be
1768 raw - an optional argument specifying if the revision data is to be
1771 treated as raw data when applying flag transforms. 'raw' should be set
1769 treated as raw data when applying flag transforms. 'raw' should be set
1772 to True when generating changegroups or in debug commands.
1770 to True when generating changegroups or in debug commands.
1773 """
1771 """
1774 if raw:
1772 if raw:
1775 msg = (
1773 msg = (
1776 b'revlog.revision(..., raw=True) is deprecated, '
1774 b'revlog.revision(..., raw=True) is deprecated, '
1777 b'use revlog.rawdata(...)'
1775 b'use revlog.rawdata(...)'
1778 )
1776 )
1779 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1777 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1780 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1778 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1781
1779
1782 def sidedata(self, nodeorrev, _df=None):
1780 def sidedata(self, nodeorrev, _df=None):
1783 """a map of extra data related to the changeset but not part of the hash
1781 """a map of extra data related to the changeset but not part of the hash
1784
1782
1785 This function currently return a dictionary. However, more advanced
1783 This function currently return a dictionary. However, more advanced
1786 mapping object will likely be used in the future for a more
1784 mapping object will likely be used in the future for a more
1787 efficient/lazy code.
1785 efficient/lazy code.
1788 """
1786 """
1789 return self._revisiondata(nodeorrev, _df)[1]
1787 return self._revisiondata(nodeorrev, _df)[1]
1790
1788
1791 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1789 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1792 # deal with <nodeorrev> argument type
1790 # deal with <nodeorrev> argument type
1793 if isinstance(nodeorrev, int):
1791 if isinstance(nodeorrev, int):
1794 rev = nodeorrev
1792 rev = nodeorrev
1795 node = self.node(rev)
1793 node = self.node(rev)
1796 else:
1794 else:
1797 node = nodeorrev
1795 node = nodeorrev
1798 rev = None
1796 rev = None
1799
1797
1800 # fast path the special `nullid` rev
1798 # fast path the special `nullid` rev
1801 if node == self.nullid:
1799 if node == self.nullid:
1802 return b"", {}
1800 return b"", {}
1803
1801
1804 # ``rawtext`` is the text as stored inside the revlog. Might be the
1802 # ``rawtext`` is the text as stored inside the revlog. Might be the
1805 # revision or might need to be processed to retrieve the revision.
1803 # revision or might need to be processed to retrieve the revision.
1806 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1804 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1807
1805
1808 if self.hassidedata:
1806 if self.hassidedata:
1809 if rev is None:
1807 if rev is None:
1810 rev = self.rev(node)
1808 rev = self.rev(node)
1811 sidedata = self._sidedata(rev)
1809 sidedata = self._sidedata(rev)
1812 else:
1810 else:
1813 sidedata = {}
1811 sidedata = {}
1814
1812
1815 if raw and validated:
1813 if raw and validated:
1816 # if we don't want to process the raw text and that raw
1814 # if we don't want to process the raw text and that raw
1817 # text is cached, we can exit early.
1815 # text is cached, we can exit early.
1818 return rawtext, sidedata
1816 return rawtext, sidedata
1819 if rev is None:
1817 if rev is None:
1820 rev = self.rev(node)
1818 rev = self.rev(node)
1821 # the revlog's flag for this revision
1819 # the revlog's flag for this revision
1822 # (usually alter its state or content)
1820 # (usually alter its state or content)
1823 flags = self.flags(rev)
1821 flags = self.flags(rev)
1824
1822
1825 if validated and flags == REVIDX_DEFAULT_FLAGS:
1823 if validated and flags == REVIDX_DEFAULT_FLAGS:
1826 # no extra flags set, no flag processor runs, text = rawtext
1824 # no extra flags set, no flag processor runs, text = rawtext
1827 return rawtext, sidedata
1825 return rawtext, sidedata
1828
1826
1829 if raw:
1827 if raw:
1830 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1828 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1831 text = rawtext
1829 text = rawtext
1832 else:
1830 else:
1833 r = flagutil.processflagsread(self, rawtext, flags)
1831 r = flagutil.processflagsread(self, rawtext, flags)
1834 text, validatehash = r
1832 text, validatehash = r
1835 if validatehash:
1833 if validatehash:
1836 self.checkhash(text, node, rev=rev)
1834 self.checkhash(text, node, rev=rev)
1837 if not validated:
1835 if not validated:
1838 self._revisioncache = (node, rev, rawtext)
1836 self._revisioncache = (node, rev, rawtext)
1839
1837
1840 return text, sidedata
1838 return text, sidedata
1841
1839
1842 def _rawtext(self, node, rev, _df=None):
1840 def _rawtext(self, node, rev, _df=None):
1843 """return the possibly unvalidated rawtext for a revision
1841 """return the possibly unvalidated rawtext for a revision
1844
1842
1845 returns (rev, rawtext, validated)
1843 returns (rev, rawtext, validated)
1846 """
1844 """
1847
1845
1848 # revision in the cache (could be useful to apply delta)
1846 # revision in the cache (could be useful to apply delta)
1849 cachedrev = None
1847 cachedrev = None
1850 # An intermediate text to apply deltas to
1848 # An intermediate text to apply deltas to
1851 basetext = None
1849 basetext = None
1852
1850
1853 # Check if we have the entry in cache
1851 # Check if we have the entry in cache
1854 # The cache entry looks like (node, rev, rawtext)
1852 # The cache entry looks like (node, rev, rawtext)
1855 if self._revisioncache:
1853 if self._revisioncache:
1856 if self._revisioncache[0] == node:
1854 if self._revisioncache[0] == node:
1857 return (rev, self._revisioncache[2], True)
1855 return (rev, self._revisioncache[2], True)
1858 cachedrev = self._revisioncache[1]
1856 cachedrev = self._revisioncache[1]
1859
1857
1860 if rev is None:
1858 if rev is None:
1861 rev = self.rev(node)
1859 rev = self.rev(node)
1862
1860
1863 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1861 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1864 if stopped:
1862 if stopped:
1865 basetext = self._revisioncache[2]
1863 basetext = self._revisioncache[2]
1866
1864
1867 # drop cache to save memory, the caller is expected to
1865 # drop cache to save memory, the caller is expected to
1868 # update self._revisioncache after validating the text
1866 # update self._revisioncache after validating the text
1869 self._revisioncache = None
1867 self._revisioncache = None
1870
1868
1871 targetsize = None
1869 targetsize = None
1872 rawsize = self.index[rev][2]
1870 rawsize = self.index[rev][2]
1873 if 0 <= rawsize:
1871 if 0 <= rawsize:
1874 targetsize = 4 * rawsize
1872 targetsize = 4 * rawsize
1875
1873
1876 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1874 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1877 if basetext is None:
1875 if basetext is None:
1878 basetext = bytes(bins[0])
1876 basetext = bytes(bins[0])
1879 bins = bins[1:]
1877 bins = bins[1:]
1880
1878
1881 rawtext = mdiff.patches(basetext, bins)
1879 rawtext = mdiff.patches(basetext, bins)
1882 del basetext # let us have a chance to free memory early
1880 del basetext # let us have a chance to free memory early
1883 return (rev, rawtext, False)
1881 return (rev, rawtext, False)
1884
1882
1885 def _sidedata(self, rev):
1883 def _sidedata(self, rev):
1886 """Return the sidedata for a given revision number."""
1884 """Return the sidedata for a given revision number."""
1887 index_entry = self.index[rev]
1885 index_entry = self.index[rev]
1888 sidedata_offset = index_entry[8]
1886 sidedata_offset = index_entry[8]
1889 sidedata_size = index_entry[9]
1887 sidedata_size = index_entry[9]
1890
1888
1891 if self._inline:
1889 if self._inline:
1892 sidedata_offset += self.index.entry_size * (1 + rev)
1890 sidedata_offset += self.index.entry_size * (1 + rev)
1893 if sidedata_size == 0:
1891 if sidedata_size == 0:
1894 return {}
1892 return {}
1895
1893
1896 segment = self._getsegment(sidedata_offset, sidedata_size)
1894 segment = self._getsegment(sidedata_offset, sidedata_size)
1897 sidedata = sidedatautil.deserialize_sidedata(segment)
1895 sidedata = sidedatautil.deserialize_sidedata(segment)
1898 return sidedata
1896 return sidedata
1899
1897
1900 def rawdata(self, nodeorrev, _df=None):
1898 def rawdata(self, nodeorrev, _df=None):
1901 """return an uncompressed raw data of a given node or revision number.
1899 """return an uncompressed raw data of a given node or revision number.
1902
1900
1903 _df - an existing file handle to read from. (internal-only)
1901 _df - an existing file handle to read from. (internal-only)
1904 """
1902 """
1905 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1903 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1906
1904
1907 def hash(self, text, p1, p2):
1905 def hash(self, text, p1, p2):
1908 """Compute a node hash.
1906 """Compute a node hash.
1909
1907
1910 Available as a function so that subclasses can replace the hash
1908 Available as a function so that subclasses can replace the hash
1911 as needed.
1909 as needed.
1912 """
1910 """
1913 return storageutil.hashrevisionsha1(text, p1, p2)
1911 return storageutil.hashrevisionsha1(text, p1, p2)
1914
1912
1915 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1913 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1916 """Check node hash integrity.
1914 """Check node hash integrity.
1917
1915
1918 Available as a function so that subclasses can extend hash mismatch
1916 Available as a function so that subclasses can extend hash mismatch
1919 behaviors as needed.
1917 behaviors as needed.
1920 """
1918 """
1921 try:
1919 try:
1922 if p1 is None and p2 is None:
1920 if p1 is None and p2 is None:
1923 p1, p2 = self.parents(node)
1921 p1, p2 = self.parents(node)
1924 if node != self.hash(text, p1, p2):
1922 if node != self.hash(text, p1, p2):
1925 # Clear the revision cache on hash failure. The revision cache
1923 # Clear the revision cache on hash failure. The revision cache
1926 # only stores the raw revision and clearing the cache does have
1924 # only stores the raw revision and clearing the cache does have
1927 # the side-effect that we won't have a cache hit when the raw
1925 # the side-effect that we won't have a cache hit when the raw
1928 # revision data is accessed. But this case should be rare and
1926 # revision data is accessed. But this case should be rare and
1929 # it is extra work to teach the cache about the hash
1927 # it is extra work to teach the cache about the hash
1930 # verification state.
1928 # verification state.
1931 if self._revisioncache and self._revisioncache[0] == node:
1929 if self._revisioncache and self._revisioncache[0] == node:
1932 self._revisioncache = None
1930 self._revisioncache = None
1933
1931
1934 revornode = rev
1932 revornode = rev
1935 if revornode is None:
1933 if revornode is None:
1936 revornode = templatefilters.short(hex(node))
1934 revornode = templatefilters.short(hex(node))
1937 raise error.RevlogError(
1935 raise error.RevlogError(
1938 _(b"integrity check failed on %s:%s")
1936 _(b"integrity check failed on %s:%s")
1939 % (self._indexfile, pycompat.bytestr(revornode))
1937 % (self._indexfile, pycompat.bytestr(revornode))
1940 )
1938 )
1941 except error.RevlogError:
1939 except error.RevlogError:
1942 if self._censorable and storageutil.iscensoredtext(text):
1940 if self._censorable and storageutil.iscensoredtext(text):
1943 raise error.CensoredNodeError(self._indexfile, node, text)
1941 raise error.CensoredNodeError(self._indexfile, node, text)
1944 raise
1942 raise
1945
1943
1946 def _enforceinlinesize(self, tr, fp=None):
1944 def _enforceinlinesize(self, tr, fp=None):
1947 """Check if the revlog is too big for inline and convert if so.
1945 """Check if the revlog is too big for inline and convert if so.
1948
1946
1949 This should be called after revisions are added to the revlog. If the
1947 This should be called after revisions are added to the revlog. If the
1950 revlog has grown too large to be an inline revlog, it will convert it
1948 revlog has grown too large to be an inline revlog, it will convert it
1951 to use multiple index and data files.
1949 to use multiple index and data files.
1952 """
1950 """
1953 tiprev = len(self) - 1
1951 tiprev = len(self) - 1
1954 if (
1952 if (
1955 not self._inline
1953 not self._inline
1956 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1954 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1957 ):
1955 ):
1958 return
1956 return
1959
1957
1960 troffset = tr.findoffset(self._indexfile)
1958 troffset = tr.findoffset(self._indexfile)
1961 if troffset is None:
1959 if troffset is None:
1962 raise error.RevlogError(
1960 raise error.RevlogError(
1963 _(b"%s not found in the transaction") % self._indexfile
1961 _(b"%s not found in the transaction") % self._indexfile
1964 )
1962 )
1965 trindex = 0
1963 trindex = 0
1966 tr.add(self._datafile, 0)
1964 tr.add(self._datafile, 0)
1967
1965
1968 if fp:
1966 if fp:
1969 fp.flush()
1967 fp.flush()
1970 fp.close()
1968 fp.close()
1971 # We can't use the cached file handle after close(). So prevent
1969 # We can't use the cached file handle after close(). So prevent
1972 # its usage.
1970 # its usage.
1973 self._writinghandles = None
1971 self._writinghandles = None
1974
1972
1975 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1973 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1976 for r in self:
1974 for r in self:
1977 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1975 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1978 if troffset <= self.start(r):
1976 if troffset <= self.start(r):
1979 trindex = r
1977 trindex = r
1980
1978
1981 with self._indexfp(b'w') as fp:
1979 with self._indexfp(b'w') as fp:
1982 self._format_flags &= ~FLAG_INLINE_DATA
1980 self._format_flags &= ~FLAG_INLINE_DATA
1983 self._inline = False
1981 self._inline = False
1984 for i in self:
1982 for i in self:
1985 e = self.index.entry_binary(i)
1983 e = self.index.entry_binary(i)
1986 if i == 0:
1984 if i == 0:
1987 header = self._format_flags | self._format_version
1985 header = self._format_flags | self._format_version
1988 header = self.index.pack_header(header)
1986 header = self.index.pack_header(header)
1989 e = header + e
1987 e = header + e
1990 fp.write(e)
1988 fp.write(e)
1991
1989
1992 # the temp file replace the real index when we exit the context
1990 # the temp file replace the real index when we exit the context
1993 # manager
1991 # manager
1994
1992
1995 tr.replace(self._indexfile, trindex * self.index.entry_size)
1993 tr.replace(self._indexfile, trindex * self.index.entry_size)
1996 nodemaputil.setup_persistent_nodemap(tr, self)
1994 nodemaputil.setup_persistent_nodemap(tr, self)
1997 self._chunkclear()
1995 self._chunkclear()
1998
1996
1999 def _nodeduplicatecallback(self, transaction, node):
1997 def _nodeduplicatecallback(self, transaction, node):
2000 """called when trying to add a node already stored."""
1998 """called when trying to add a node already stored."""
2001
1999
2002 def addrevision(
2000 def addrevision(
2003 self,
2001 self,
2004 text,
2002 text,
2005 transaction,
2003 transaction,
2006 link,
2004 link,
2007 p1,
2005 p1,
2008 p2,
2006 p2,
2009 cachedelta=None,
2007 cachedelta=None,
2010 node=None,
2008 node=None,
2011 flags=REVIDX_DEFAULT_FLAGS,
2009 flags=REVIDX_DEFAULT_FLAGS,
2012 deltacomputer=None,
2010 deltacomputer=None,
2013 sidedata=None,
2011 sidedata=None,
2014 ):
2012 ):
2015 """add a revision to the log
2013 """add a revision to the log
2016
2014
2017 text - the revision data to add
2015 text - the revision data to add
2018 transaction - the transaction object used for rollback
2016 transaction - the transaction object used for rollback
2019 link - the linkrev data to add
2017 link - the linkrev data to add
2020 p1, p2 - the parent nodeids of the revision
2018 p1, p2 - the parent nodeids of the revision
2021 cachedelta - an optional precomputed delta
2019 cachedelta - an optional precomputed delta
2022 node - nodeid of revision; typically node is not specified, and it is
2020 node - nodeid of revision; typically node is not specified, and it is
2023 computed by default as hash(text, p1, p2), however subclasses might
2021 computed by default as hash(text, p1, p2), however subclasses might
2024 use different hashing method (and override checkhash() in such case)
2022 use different hashing method (and override checkhash() in such case)
2025 flags - the known flags to set on the revision
2023 flags - the known flags to set on the revision
2026 deltacomputer - an optional deltacomputer instance shared between
2024 deltacomputer - an optional deltacomputer instance shared between
2027 multiple calls
2025 multiple calls
2028 """
2026 """
2029 if link == nullrev:
2027 if link == nullrev:
2030 raise error.RevlogError(
2028 raise error.RevlogError(
2031 _(b"attempted to add linkrev -1 to %s") % self._indexfile
2029 _(b"attempted to add linkrev -1 to %s") % self._indexfile
2032 )
2030 )
2033
2031
2034 if sidedata is None:
2032 if sidedata is None:
2035 sidedata = {}
2033 sidedata = {}
2036 elif sidedata and not self.hassidedata:
2034 elif sidedata and not self.hassidedata:
2037 raise error.ProgrammingError(
2035 raise error.ProgrammingError(
2038 _(b"trying to add sidedata to a revlog who don't support them")
2036 _(b"trying to add sidedata to a revlog who don't support them")
2039 )
2037 )
2040
2038
2041 if flags:
2039 if flags:
2042 node = node or self.hash(text, p1, p2)
2040 node = node or self.hash(text, p1, p2)
2043
2041
2044 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2042 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2045
2043
2046 # If the flag processor modifies the revision data, ignore any provided
2044 # If the flag processor modifies the revision data, ignore any provided
2047 # cachedelta.
2045 # cachedelta.
2048 if rawtext != text:
2046 if rawtext != text:
2049 cachedelta = None
2047 cachedelta = None
2050
2048
2051 if len(rawtext) > _maxentrysize:
2049 if len(rawtext) > _maxentrysize:
2052 raise error.RevlogError(
2050 raise error.RevlogError(
2053 _(
2051 _(
2054 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2052 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2055 )
2053 )
2056 % (self._indexfile, len(rawtext))
2054 % (self._indexfile, len(rawtext))
2057 )
2055 )
2058
2056
2059 node = node or self.hash(rawtext, p1, p2)
2057 node = node or self.hash(rawtext, p1, p2)
2060 rev = self.index.get_rev(node)
2058 rev = self.index.get_rev(node)
2061 if rev is not None:
2059 if rev is not None:
2062 return rev
2060 return rev
2063
2061
2064 if validatehash:
2062 if validatehash:
2065 self.checkhash(rawtext, node, p1=p1, p2=p2)
2063 self.checkhash(rawtext, node, p1=p1, p2=p2)
2066
2064
2067 return self.addrawrevision(
2065 return self.addrawrevision(
2068 rawtext,
2066 rawtext,
2069 transaction,
2067 transaction,
2070 link,
2068 link,
2071 p1,
2069 p1,
2072 p2,
2070 p2,
2073 node,
2071 node,
2074 flags,
2072 flags,
2075 cachedelta=cachedelta,
2073 cachedelta=cachedelta,
2076 deltacomputer=deltacomputer,
2074 deltacomputer=deltacomputer,
2077 sidedata=sidedata,
2075 sidedata=sidedata,
2078 )
2076 )
2079
2077
2080 def addrawrevision(
2078 def addrawrevision(
2081 self,
2079 self,
2082 rawtext,
2080 rawtext,
2083 transaction,
2081 transaction,
2084 link,
2082 link,
2085 p1,
2083 p1,
2086 p2,
2084 p2,
2087 node,
2085 node,
2088 flags,
2086 flags,
2089 cachedelta=None,
2087 cachedelta=None,
2090 deltacomputer=None,
2088 deltacomputer=None,
2091 sidedata=None,
2089 sidedata=None,
2092 ):
2090 ):
2093 """add a raw revision with known flags, node and parents
2091 """add a raw revision with known flags, node and parents
2094 useful when reusing a revision not stored in this revlog (ex: received
2092 useful when reusing a revision not stored in this revlog (ex: received
2095 over wire, or read from an external bundle).
2093 over wire, or read from an external bundle).
2096 """
2094 """
2097 dfh = None
2095 dfh = None
2098 if not self._inline:
2096 if not self._inline:
2099 dfh = self._datafp(b"a+")
2097 dfh = self._datafp(b"a+")
2100 ifh = self._indexfp(b"a+")
2098 ifh = self._indexfp(b"a+")
2101 try:
2099 try:
2102 return self._addrevision(
2100 return self._addrevision(
2103 node,
2101 node,
2104 rawtext,
2102 rawtext,
2105 transaction,
2103 transaction,
2106 link,
2104 link,
2107 p1,
2105 p1,
2108 p2,
2106 p2,
2109 flags,
2107 flags,
2110 cachedelta,
2108 cachedelta,
2111 ifh,
2109 ifh,
2112 dfh,
2110 dfh,
2113 deltacomputer=deltacomputer,
2111 deltacomputer=deltacomputer,
2114 sidedata=sidedata,
2112 sidedata=sidedata,
2115 )
2113 )
2116 finally:
2114 finally:
2117 if dfh:
2115 if dfh:
2118 dfh.close()
2116 dfh.close()
2119 ifh.close()
2117 ifh.close()
2120
2118
2121 def compress(self, data):
2119 def compress(self, data):
2122 """Generate a possibly-compressed representation of data."""
2120 """Generate a possibly-compressed representation of data."""
2123 if not data:
2121 if not data:
2124 return b'', data
2122 return b'', data
2125
2123
2126 compressed = self._compressor.compress(data)
2124 compressed = self._compressor.compress(data)
2127
2125
2128 if compressed:
2126 if compressed:
2129 # The revlog compressor added the header in the returned data.
2127 # The revlog compressor added the header in the returned data.
2130 return b'', compressed
2128 return b'', compressed
2131
2129
2132 if data[0:1] == b'\0':
2130 if data[0:1] == b'\0':
2133 return b'', data
2131 return b'', data
2134 return b'u', data
2132 return b'u', data
2135
2133
2136 def decompress(self, data):
2134 def decompress(self, data):
2137 """Decompress a revlog chunk.
2135 """Decompress a revlog chunk.
2138
2136
2139 The chunk is expected to begin with a header identifying the
2137 The chunk is expected to begin with a header identifying the
2140 format type so it can be routed to an appropriate decompressor.
2138 format type so it can be routed to an appropriate decompressor.
2141 """
2139 """
2142 if not data:
2140 if not data:
2143 return data
2141 return data
2144
2142
2145 # Revlogs are read much more frequently than they are written and many
2143 # Revlogs are read much more frequently than they are written and many
2146 # chunks only take microseconds to decompress, so performance is
2144 # chunks only take microseconds to decompress, so performance is
2147 # important here.
2145 # important here.
2148 #
2146 #
2149 # We can make a few assumptions about revlogs:
2147 # We can make a few assumptions about revlogs:
2150 #
2148 #
2151 # 1) the majority of chunks will be compressed (as opposed to inline
2149 # 1) the majority of chunks will be compressed (as opposed to inline
2152 # raw data).
2150 # raw data).
2153 # 2) decompressing *any* data will likely by at least 10x slower than
2151 # 2) decompressing *any* data will likely by at least 10x slower than
2154 # returning raw inline data.
2152 # returning raw inline data.
2155 # 3) we want to prioritize common and officially supported compression
2153 # 3) we want to prioritize common and officially supported compression
2156 # engines
2154 # engines
2157 #
2155 #
2158 # It follows that we want to optimize for "decompress compressed data
2156 # It follows that we want to optimize for "decompress compressed data
2159 # when encoded with common and officially supported compression engines"
2157 # when encoded with common and officially supported compression engines"
2160 # case over "raw data" and "data encoded by less common or non-official
2158 # case over "raw data" and "data encoded by less common or non-official
2161 # compression engines." That is why we have the inline lookup first
2159 # compression engines." That is why we have the inline lookup first
2162 # followed by the compengines lookup.
2160 # followed by the compengines lookup.
2163 #
2161 #
2164 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2162 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2165 # compressed chunks. And this matters for changelog and manifest reads.
2163 # compressed chunks. And this matters for changelog and manifest reads.
2166 t = data[0:1]
2164 t = data[0:1]
2167
2165
2168 if t == b'x':
2166 if t == b'x':
2169 try:
2167 try:
2170 return _zlibdecompress(data)
2168 return _zlibdecompress(data)
2171 except zlib.error as e:
2169 except zlib.error as e:
2172 raise error.RevlogError(
2170 raise error.RevlogError(
2173 _(b'revlog decompress error: %s')
2171 _(b'revlog decompress error: %s')
2174 % stringutil.forcebytestr(e)
2172 % stringutil.forcebytestr(e)
2175 )
2173 )
2176 # '\0' is more common than 'u' so it goes first.
2174 # '\0' is more common than 'u' so it goes first.
2177 elif t == b'\0':
2175 elif t == b'\0':
2178 return data
2176 return data
2179 elif t == b'u':
2177 elif t == b'u':
2180 return util.buffer(data, 1)
2178 return util.buffer(data, 1)
2181
2179
2182 try:
2180 try:
2183 compressor = self._decompressors[t]
2181 compressor = self._decompressors[t]
2184 except KeyError:
2182 except KeyError:
2185 try:
2183 try:
2186 engine = util.compengines.forrevlogheader(t)
2184 engine = util.compengines.forrevlogheader(t)
2187 compressor = engine.revlogcompressor(self._compengineopts)
2185 compressor = engine.revlogcompressor(self._compengineopts)
2188 self._decompressors[t] = compressor
2186 self._decompressors[t] = compressor
2189 except KeyError:
2187 except KeyError:
2190 raise error.RevlogError(
2188 raise error.RevlogError(
2191 _(b'unknown compression type %s') % binascii.hexlify(t)
2189 _(b'unknown compression type %s') % binascii.hexlify(t)
2192 )
2190 )
2193
2191
2194 return compressor.decompress(data)
2192 return compressor.decompress(data)
2195
2193
2196 def _addrevision(
2194 def _addrevision(
2197 self,
2195 self,
2198 node,
2196 node,
2199 rawtext,
2197 rawtext,
2200 transaction,
2198 transaction,
2201 link,
2199 link,
2202 p1,
2200 p1,
2203 p2,
2201 p2,
2204 flags,
2202 flags,
2205 cachedelta,
2203 cachedelta,
2206 ifh,
2204 ifh,
2207 dfh,
2205 dfh,
2208 alwayscache=False,
2206 alwayscache=False,
2209 deltacomputer=None,
2207 deltacomputer=None,
2210 sidedata=None,
2208 sidedata=None,
2211 ):
2209 ):
2212 """internal function to add revisions to the log
2210 """internal function to add revisions to the log
2213
2211
2214 see addrevision for argument descriptions.
2212 see addrevision for argument descriptions.
2215
2213
2216 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2214 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2217
2215
2218 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2216 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2219 be used.
2217 be used.
2220
2218
2221 invariants:
2219 invariants:
2222 - rawtext is optional (can be None); if not set, cachedelta must be set.
2220 - rawtext is optional (can be None); if not set, cachedelta must be set.
2223 if both are set, they must correspond to each other.
2221 if both are set, they must correspond to each other.
2224 """
2222 """
2225 if node == self.nullid:
2223 if node == self.nullid:
2226 raise error.RevlogError(
2224 raise error.RevlogError(
2227 _(b"%s: attempt to add null revision") % self._indexfile
2225 _(b"%s: attempt to add null revision") % self._indexfile
2228 )
2226 )
2229 if (
2227 if (
2230 node == self.nodeconstants.wdirid
2228 node == self.nodeconstants.wdirid
2231 or node in self.nodeconstants.wdirfilenodeids
2229 or node in self.nodeconstants.wdirfilenodeids
2232 ):
2230 ):
2233 raise error.RevlogError(
2231 raise error.RevlogError(
2234 _(b"%s: attempt to add wdir revision") % self._indexfile
2232 _(b"%s: attempt to add wdir revision") % self._indexfile
2235 )
2233 )
2236
2234
2237 if self._inline:
2235 if self._inline:
2238 fh = ifh
2236 fh = ifh
2239 else:
2237 else:
2240 fh = dfh
2238 fh = dfh
2241
2239
2242 btext = [rawtext]
2240 btext = [rawtext]
2243
2241
2244 curr = len(self)
2242 curr = len(self)
2245 prev = curr - 1
2243 prev = curr - 1
2246
2244
2247 offset = self._get_data_offset(prev)
2245 offset = self._get_data_offset(prev)
2248
2246
2249 if self._concurrencychecker:
2247 if self._concurrencychecker:
2250 if self._inline:
2248 if self._inline:
2251 # offset is "as if" it were in the .d file, so we need to add on
2249 # offset is "as if" it were in the .d file, so we need to add on
2252 # the size of the entry metadata.
2250 # the size of the entry metadata.
2253 self._concurrencychecker(
2251 self._concurrencychecker(
2254 ifh, self._indexfile, offset + curr * self.index.entry_size
2252 ifh, self._indexfile, offset + curr * self.index.entry_size
2255 )
2253 )
2256 else:
2254 else:
2257 # Entries in the .i are a consistent size.
2255 # Entries in the .i are a consistent size.
2258 self._concurrencychecker(
2256 self._concurrencychecker(
2259 ifh, self._indexfile, curr * self.index.entry_size
2257 ifh, self._indexfile, curr * self.index.entry_size
2260 )
2258 )
2261 self._concurrencychecker(dfh, self._datafile, offset)
2259 self._concurrencychecker(dfh, self._datafile, offset)
2262
2260
2263 p1r, p2r = self.rev(p1), self.rev(p2)
2261 p1r, p2r = self.rev(p1), self.rev(p2)
2264
2262
2265 # full versions are inserted when the needed deltas
2263 # full versions are inserted when the needed deltas
2266 # become comparable to the uncompressed text
2264 # become comparable to the uncompressed text
2267 if rawtext is None:
2265 if rawtext is None:
2268 # need rawtext size, before changed by flag processors, which is
2266 # need rawtext size, before changed by flag processors, which is
2269 # the non-raw size. use revlog explicitly to avoid filelog's extra
2267 # the non-raw size. use revlog explicitly to avoid filelog's extra
2270 # logic that might remove metadata size.
2268 # logic that might remove metadata size.
2271 textlen = mdiff.patchedsize(
2269 textlen = mdiff.patchedsize(
2272 revlog.size(self, cachedelta[0]), cachedelta[1]
2270 revlog.size(self, cachedelta[0]), cachedelta[1]
2273 )
2271 )
2274 else:
2272 else:
2275 textlen = len(rawtext)
2273 textlen = len(rawtext)
2276
2274
2277 if deltacomputer is None:
2275 if deltacomputer is None:
2278 deltacomputer = deltautil.deltacomputer(self)
2276 deltacomputer = deltautil.deltacomputer(self)
2279
2277
2280 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2278 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2281
2279
2282 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2280 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2283
2281
2284 if sidedata and self.hassidedata:
2282 if sidedata and self.hassidedata:
2285 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2283 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2286 sidedata_offset = offset + deltainfo.deltalen
2284 sidedata_offset = offset + deltainfo.deltalen
2287 else:
2285 else:
2288 serialized_sidedata = b""
2286 serialized_sidedata = b""
2289 # Don't store the offset if the sidedata is empty, that way
2287 # Don't store the offset if the sidedata is empty, that way
2290 # we can easily detect empty sidedata and they will be no different
2288 # we can easily detect empty sidedata and they will be no different
2291 # than ones we manually add.
2289 # than ones we manually add.
2292 sidedata_offset = 0
2290 sidedata_offset = 0
2293
2291
2294 e = (
2292 e = (
2295 offset_type(offset, flags),
2293 offset_type(offset, flags),
2296 deltainfo.deltalen,
2294 deltainfo.deltalen,
2297 textlen,
2295 textlen,
2298 deltainfo.base,
2296 deltainfo.base,
2299 link,
2297 link,
2300 p1r,
2298 p1r,
2301 p2r,
2299 p2r,
2302 node,
2300 node,
2303 sidedata_offset,
2301 sidedata_offset,
2304 len(serialized_sidedata),
2302 len(serialized_sidedata),
2305 )
2303 )
2306
2304
2307 self.index.append(e)
2305 self.index.append(e)
2308 entry = self.index.entry_binary(curr)
2306 entry = self.index.entry_binary(curr)
2309 if curr == 0:
2307 if curr == 0:
2310 header = self._format_flags | self._format_version
2308 header = self._format_flags | self._format_version
2311 header = self.index.pack_header(header)
2309 header = self.index.pack_header(header)
2312 entry = header + entry
2310 entry = header + entry
2313 self._writeentry(
2311 self._writeentry(
2314 transaction,
2312 transaction,
2315 ifh,
2313 ifh,
2316 dfh,
2314 dfh,
2317 entry,
2315 entry,
2318 deltainfo.data,
2316 deltainfo.data,
2319 link,
2317 link,
2320 offset,
2318 offset,
2321 serialized_sidedata,
2319 serialized_sidedata,
2322 )
2320 )
2323
2321
2324 rawtext = btext[0]
2322 rawtext = btext[0]
2325
2323
2326 if alwayscache and rawtext is None:
2324 if alwayscache and rawtext is None:
2327 rawtext = deltacomputer.buildtext(revinfo, fh)
2325 rawtext = deltacomputer.buildtext(revinfo, fh)
2328
2326
2329 if type(rawtext) == bytes: # only accept immutable objects
2327 if type(rawtext) == bytes: # only accept immutable objects
2330 self._revisioncache = (node, curr, rawtext)
2328 self._revisioncache = (node, curr, rawtext)
2331 self._chainbasecache[curr] = deltainfo.chainbase
2329 self._chainbasecache[curr] = deltainfo.chainbase
2332 return curr
2330 return curr
2333
2331
2334 def _get_data_offset(self, prev):
2332 def _get_data_offset(self, prev):
2335 """Returns the current offset in the (in-transaction) data file.
2333 """Returns the current offset in the (in-transaction) data file.
2336 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2334 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2337 file to store that information: since sidedata can be rewritten to the
2335 file to store that information: since sidedata can be rewritten to the
2338 end of the data file within a transaction, you can have cases where, for
2336 end of the data file within a transaction, you can have cases where, for
2339 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2337 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2340 to `n - 1`'s sidedata being written after `n`'s data.
2338 to `n - 1`'s sidedata being written after `n`'s data.
2341
2339
2342 TODO cache this in a docket file before getting out of experimental."""
2340 TODO cache this in a docket file before getting out of experimental."""
2343 if self._format_version != REVLOGV2:
2341 if self._format_version != REVLOGV2:
2344 return self.end(prev)
2342 return self.end(prev)
2345
2343
2346 offset = 0
2344 offset = 0
2347 for rev, entry in enumerate(self.index):
2345 for rev, entry in enumerate(self.index):
2348 sidedata_end = entry[8] + entry[9]
2346 sidedata_end = entry[8] + entry[9]
2349 # Sidedata for a previous rev has potentially been written after
2347 # Sidedata for a previous rev has potentially been written after
2350 # this rev's end, so take the max.
2348 # this rev's end, so take the max.
2351 offset = max(self.end(rev), offset, sidedata_end)
2349 offset = max(self.end(rev), offset, sidedata_end)
2352 return offset
2350 return offset
2353
2351
2354 def _writeentry(
2352 def _writeentry(
2355 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2353 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2356 ):
2354 ):
2357 # Files opened in a+ mode have inconsistent behavior on various
2355 # Files opened in a+ mode have inconsistent behavior on various
2358 # platforms. Windows requires that a file positioning call be made
2356 # platforms. Windows requires that a file positioning call be made
2359 # when the file handle transitions between reads and writes. See
2357 # when the file handle transitions between reads and writes. See
2360 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2358 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2361 # platforms, Python or the platform itself can be buggy. Some versions
2359 # platforms, Python or the platform itself can be buggy. Some versions
2362 # of Solaris have been observed to not append at the end of the file
2360 # of Solaris have been observed to not append at the end of the file
2363 # if the file was seeked to before the end. See issue4943 for more.
2361 # if the file was seeked to before the end. See issue4943 for more.
2364 #
2362 #
2365 # We work around this issue by inserting a seek() before writing.
2363 # We work around this issue by inserting a seek() before writing.
2366 # Note: This is likely not necessary on Python 3. However, because
2364 # Note: This is likely not necessary on Python 3. However, because
2367 # the file handle is reused for reads and may be seeked there, we need
2365 # the file handle is reused for reads and may be seeked there, we need
2368 # to be careful before changing this.
2366 # to be careful before changing this.
2369 ifh.seek(0, os.SEEK_END)
2367 ifh.seek(0, os.SEEK_END)
2370 if dfh:
2368 if dfh:
2371 dfh.seek(0, os.SEEK_END)
2369 dfh.seek(0, os.SEEK_END)
2372
2370
2373 curr = len(self) - 1
2371 curr = len(self) - 1
2374 if not self._inline:
2372 if not self._inline:
2375 transaction.add(self._datafile, offset)
2373 transaction.add(self._datafile, offset)
2376 transaction.add(self._indexfile, curr * len(entry))
2374 transaction.add(self._indexfile, curr * len(entry))
2377 if data[0]:
2375 if data[0]:
2378 dfh.write(data[0])
2376 dfh.write(data[0])
2379 dfh.write(data[1])
2377 dfh.write(data[1])
2380 if sidedata:
2378 if sidedata:
2381 dfh.write(sidedata)
2379 dfh.write(sidedata)
2382 ifh.write(entry)
2380 ifh.write(entry)
2383 else:
2381 else:
2384 offset += curr * self.index.entry_size
2382 offset += curr * self.index.entry_size
2385 transaction.add(self._indexfile, offset)
2383 transaction.add(self._indexfile, offset)
2386 ifh.write(entry)
2384 ifh.write(entry)
2387 ifh.write(data[0])
2385 ifh.write(data[0])
2388 ifh.write(data[1])
2386 ifh.write(data[1])
2389 if sidedata:
2387 if sidedata:
2390 ifh.write(sidedata)
2388 ifh.write(sidedata)
2391 self._enforceinlinesize(transaction, ifh)
2389 self._enforceinlinesize(transaction, ifh)
2392 nodemaputil.setup_persistent_nodemap(transaction, self)
2390 nodemaputil.setup_persistent_nodemap(transaction, self)
2393
2391
2394 def addgroup(
2392 def addgroup(
2395 self,
2393 self,
2396 deltas,
2394 deltas,
2397 linkmapper,
2395 linkmapper,
2398 transaction,
2396 transaction,
2399 alwayscache=False,
2397 alwayscache=False,
2400 addrevisioncb=None,
2398 addrevisioncb=None,
2401 duplicaterevisioncb=None,
2399 duplicaterevisioncb=None,
2402 ):
2400 ):
2403 """
2401 """
2404 add a delta group
2402 add a delta group
2405
2403
2406 given a set of deltas, add them to the revision log. the
2404 given a set of deltas, add them to the revision log. the
2407 first delta is against its parent, which should be in our
2405 first delta is against its parent, which should be in our
2408 log, the rest are against the previous delta.
2406 log, the rest are against the previous delta.
2409
2407
2410 If ``addrevisioncb`` is defined, it will be called with arguments of
2408 If ``addrevisioncb`` is defined, it will be called with arguments of
2411 this revlog and the node that was added.
2409 this revlog and the node that was added.
2412 """
2410 """
2413
2411
2414 if self._writinghandles:
2412 if self._writinghandles:
2415 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2413 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2416
2414
2417 r = len(self)
2415 r = len(self)
2418 end = 0
2416 end = 0
2419 if r:
2417 if r:
2420 end = self.end(r - 1)
2418 end = self.end(r - 1)
2421 ifh = self._indexfp(b"a+")
2419 ifh = self._indexfp(b"a+")
2422 isize = r * self.index.entry_size
2420 isize = r * self.index.entry_size
2423 if self._inline:
2421 if self._inline:
2424 transaction.add(self._indexfile, end + isize)
2422 transaction.add(self._indexfile, end + isize)
2425 dfh = None
2423 dfh = None
2426 else:
2424 else:
2427 transaction.add(self._indexfile, isize)
2425 transaction.add(self._indexfile, isize)
2428 transaction.add(self._datafile, end)
2426 transaction.add(self._datafile, end)
2429 dfh = self._datafp(b"a+")
2427 dfh = self._datafp(b"a+")
2430
2428
2431 def flush():
2429 def flush():
2432 if dfh:
2430 if dfh:
2433 dfh.flush()
2431 dfh.flush()
2434 ifh.flush()
2432 ifh.flush()
2435
2433
2436 self._writinghandles = (ifh, dfh)
2434 self._writinghandles = (ifh, dfh)
2437 empty = True
2435 empty = True
2438
2436
2439 try:
2437 try:
2440 deltacomputer = deltautil.deltacomputer(self)
2438 deltacomputer = deltautil.deltacomputer(self)
2441 # loop through our set of deltas
2439 # loop through our set of deltas
2442 for data in deltas:
2440 for data in deltas:
2443 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2441 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2444 link = linkmapper(linknode)
2442 link = linkmapper(linknode)
2445 flags = flags or REVIDX_DEFAULT_FLAGS
2443 flags = flags or REVIDX_DEFAULT_FLAGS
2446
2444
2447 rev = self.index.get_rev(node)
2445 rev = self.index.get_rev(node)
2448 if rev is not None:
2446 if rev is not None:
2449 # this can happen if two branches make the same change
2447 # this can happen if two branches make the same change
2450 self._nodeduplicatecallback(transaction, rev)
2448 self._nodeduplicatecallback(transaction, rev)
2451 if duplicaterevisioncb:
2449 if duplicaterevisioncb:
2452 duplicaterevisioncb(self, rev)
2450 duplicaterevisioncb(self, rev)
2453 empty = False
2451 empty = False
2454 continue
2452 continue
2455
2453
2456 for p in (p1, p2):
2454 for p in (p1, p2):
2457 if not self.index.has_node(p):
2455 if not self.index.has_node(p):
2458 raise error.LookupError(
2456 raise error.LookupError(
2459 p, self._indexfile, _(b'unknown parent')
2457 p, self._indexfile, _(b'unknown parent')
2460 )
2458 )
2461
2459
2462 if not self.index.has_node(deltabase):
2460 if not self.index.has_node(deltabase):
2463 raise error.LookupError(
2461 raise error.LookupError(
2464 deltabase, self._indexfile, _(b'unknown delta base')
2462 deltabase, self._indexfile, _(b'unknown delta base')
2465 )
2463 )
2466
2464
2467 baserev = self.rev(deltabase)
2465 baserev = self.rev(deltabase)
2468
2466
2469 if baserev != nullrev and self.iscensored(baserev):
2467 if baserev != nullrev and self.iscensored(baserev):
2470 # if base is censored, delta must be full replacement in a
2468 # if base is censored, delta must be full replacement in a
2471 # single patch operation
2469 # single patch operation
2472 hlen = struct.calcsize(b">lll")
2470 hlen = struct.calcsize(b">lll")
2473 oldlen = self.rawsize(baserev)
2471 oldlen = self.rawsize(baserev)
2474 newlen = len(delta) - hlen
2472 newlen = len(delta) - hlen
2475 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2473 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2476 raise error.CensoredBaseError(
2474 raise error.CensoredBaseError(
2477 self._indexfile, self.node(baserev)
2475 self._indexfile, self.node(baserev)
2478 )
2476 )
2479
2477
2480 if not flags and self._peek_iscensored(baserev, delta, flush):
2478 if not flags and self._peek_iscensored(baserev, delta, flush):
2481 flags |= REVIDX_ISCENSORED
2479 flags |= REVIDX_ISCENSORED
2482
2480
2483 # We assume consumers of addrevisioncb will want to retrieve
2481 # We assume consumers of addrevisioncb will want to retrieve
2484 # the added revision, which will require a call to
2482 # the added revision, which will require a call to
2485 # revision(). revision() will fast path if there is a cache
2483 # revision(). revision() will fast path if there is a cache
2486 # hit. So, we tell _addrevision() to always cache in this case.
2484 # hit. So, we tell _addrevision() to always cache in this case.
2487 # We're only using addgroup() in the context of changegroup
2485 # We're only using addgroup() in the context of changegroup
2488 # generation so the revision data can always be handled as raw
2486 # generation so the revision data can always be handled as raw
2489 # by the flagprocessor.
2487 # by the flagprocessor.
2490 rev = self._addrevision(
2488 rev = self._addrevision(
2491 node,
2489 node,
2492 None,
2490 None,
2493 transaction,
2491 transaction,
2494 link,
2492 link,
2495 p1,
2493 p1,
2496 p2,
2494 p2,
2497 flags,
2495 flags,
2498 (baserev, delta),
2496 (baserev, delta),
2499 ifh,
2497 ifh,
2500 dfh,
2498 dfh,
2501 alwayscache=alwayscache,
2499 alwayscache=alwayscache,
2502 deltacomputer=deltacomputer,
2500 deltacomputer=deltacomputer,
2503 sidedata=sidedata,
2501 sidedata=sidedata,
2504 )
2502 )
2505
2503
2506 if addrevisioncb:
2504 if addrevisioncb:
2507 addrevisioncb(self, rev)
2505 addrevisioncb(self, rev)
2508 empty = False
2506 empty = False
2509
2507
2510 if not dfh and not self._inline:
2508 if not dfh and not self._inline:
2511 # addrevision switched from inline to conventional
2509 # addrevision switched from inline to conventional
2512 # reopen the index
2510 # reopen the index
2513 ifh.close()
2511 ifh.close()
2514 dfh = self._datafp(b"a+")
2512 dfh = self._datafp(b"a+")
2515 ifh = self._indexfp(b"a+")
2513 ifh = self._indexfp(b"a+")
2516 self._writinghandles = (ifh, dfh)
2514 self._writinghandles = (ifh, dfh)
2517 finally:
2515 finally:
2518 self._writinghandles = None
2516 self._writinghandles = None
2519
2517
2520 if dfh:
2518 if dfh:
2521 dfh.close()
2519 dfh.close()
2522 ifh.close()
2520 ifh.close()
2523 return not empty
2521 return not empty
2524
2522
2525 def iscensored(self, rev):
2523 def iscensored(self, rev):
2526 """Check if a file revision is censored."""
2524 """Check if a file revision is censored."""
2527 if not self._censorable:
2525 if not self._censorable:
2528 return False
2526 return False
2529
2527
2530 return self.flags(rev) & REVIDX_ISCENSORED
2528 return self.flags(rev) & REVIDX_ISCENSORED
2531
2529
2532 def _peek_iscensored(self, baserev, delta, flush):
2530 def _peek_iscensored(self, baserev, delta, flush):
2533 """Quickly check if a delta produces a censored revision."""
2531 """Quickly check if a delta produces a censored revision."""
2534 if not self._censorable:
2532 if not self._censorable:
2535 return False
2533 return False
2536
2534
2537 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2535 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2538
2536
2539 def getstrippoint(self, minlink):
2537 def getstrippoint(self, minlink):
2540 """find the minimum rev that must be stripped to strip the linkrev
2538 """find the minimum rev that must be stripped to strip the linkrev
2541
2539
2542 Returns a tuple containing the minimum rev and a set of all revs that
2540 Returns a tuple containing the minimum rev and a set of all revs that
2543 have linkrevs that will be broken by this strip.
2541 have linkrevs that will be broken by this strip.
2544 """
2542 """
2545 return storageutil.resolvestripinfo(
2543 return storageutil.resolvestripinfo(
2546 minlink,
2544 minlink,
2547 len(self) - 1,
2545 len(self) - 1,
2548 self.headrevs(),
2546 self.headrevs(),
2549 self.linkrev,
2547 self.linkrev,
2550 self.parentrevs,
2548 self.parentrevs,
2551 )
2549 )
2552
2550
2553 def strip(self, minlink, transaction):
2551 def strip(self, minlink, transaction):
2554 """truncate the revlog on the first revision with a linkrev >= minlink
2552 """truncate the revlog on the first revision with a linkrev >= minlink
2555
2553
2556 This function is called when we're stripping revision minlink and
2554 This function is called when we're stripping revision minlink and
2557 its descendants from the repository.
2555 its descendants from the repository.
2558
2556
2559 We have to remove all revisions with linkrev >= minlink, because
2557 We have to remove all revisions with linkrev >= minlink, because
2560 the equivalent changelog revisions will be renumbered after the
2558 the equivalent changelog revisions will be renumbered after the
2561 strip.
2559 strip.
2562
2560
2563 So we truncate the revlog on the first of these revisions, and
2561 So we truncate the revlog on the first of these revisions, and
2564 trust that the caller has saved the revisions that shouldn't be
2562 trust that the caller has saved the revisions that shouldn't be
2565 removed and that it'll re-add them after this truncation.
2563 removed and that it'll re-add them after this truncation.
2566 """
2564 """
2567 if len(self) == 0:
2565 if len(self) == 0:
2568 return
2566 return
2569
2567
2570 rev, _ = self.getstrippoint(minlink)
2568 rev, _ = self.getstrippoint(minlink)
2571 if rev == len(self):
2569 if rev == len(self):
2572 return
2570 return
2573
2571
2574 # first truncate the files on disk
2572 # first truncate the files on disk
2575 end = self.start(rev)
2573 end = self.start(rev)
2576 if not self._inline:
2574 if not self._inline:
2577 transaction.add(self._datafile, end)
2575 transaction.add(self._datafile, end)
2578 end = rev * self.index.entry_size
2576 end = rev * self.index.entry_size
2579 else:
2577 else:
2580 end += rev * self.index.entry_size
2578 end += rev * self.index.entry_size
2581
2579
2582 transaction.add(self._indexfile, end)
2580 transaction.add(self._indexfile, end)
2583
2581
2584 # then reset internal state in memory to forget those revisions
2582 # then reset internal state in memory to forget those revisions
2585 self._revisioncache = None
2583 self._revisioncache = None
2586 self._chaininfocache = util.lrucachedict(500)
2584 self._chaininfocache = util.lrucachedict(500)
2587 self._chunkclear()
2585 self._chunkclear()
2588
2586
2589 del self.index[rev:-1]
2587 del self.index[rev:-1]
2590
2588
2591 def checksize(self):
2589 def checksize(self):
2592 """Check size of index and data files
2590 """Check size of index and data files
2593
2591
2594 return a (dd, di) tuple.
2592 return a (dd, di) tuple.
2595 - dd: extra bytes for the "data" file
2593 - dd: extra bytes for the "data" file
2596 - di: extra bytes for the "index" file
2594 - di: extra bytes for the "index" file
2597
2595
2598 A healthy revlog will return (0, 0).
2596 A healthy revlog will return (0, 0).
2599 """
2597 """
2600 expected = 0
2598 expected = 0
2601 if len(self):
2599 if len(self):
2602 expected = max(0, self.end(len(self) - 1))
2600 expected = max(0, self.end(len(self) - 1))
2603
2601
2604 try:
2602 try:
2605 with self._datafp() as f:
2603 with self._datafp() as f:
2606 f.seek(0, io.SEEK_END)
2604 f.seek(0, io.SEEK_END)
2607 actual = f.tell()
2605 actual = f.tell()
2608 dd = actual - expected
2606 dd = actual - expected
2609 except IOError as inst:
2607 except IOError as inst:
2610 if inst.errno != errno.ENOENT:
2608 if inst.errno != errno.ENOENT:
2611 raise
2609 raise
2612 dd = 0
2610 dd = 0
2613
2611
2614 try:
2612 try:
2615 f = self.opener(self._indexfile)
2613 f = self.opener(self._indexfile)
2616 f.seek(0, io.SEEK_END)
2614 f.seek(0, io.SEEK_END)
2617 actual = f.tell()
2615 actual = f.tell()
2618 f.close()
2616 f.close()
2619 s = self.index.entry_size
2617 s = self.index.entry_size
2620 i = max(0, actual // s)
2618 i = max(0, actual // s)
2621 di = actual - (i * s)
2619 di = actual - (i * s)
2622 if self._inline:
2620 if self._inline:
2623 databytes = 0
2621 databytes = 0
2624 for r in self:
2622 for r in self:
2625 databytes += max(0, self.length(r))
2623 databytes += max(0, self.length(r))
2626 dd = 0
2624 dd = 0
2627 di = actual - len(self) * s - databytes
2625 di = actual - len(self) * s - databytes
2628 except IOError as inst:
2626 except IOError as inst:
2629 if inst.errno != errno.ENOENT:
2627 if inst.errno != errno.ENOENT:
2630 raise
2628 raise
2631 di = 0
2629 di = 0
2632
2630
2633 return (dd, di)
2631 return (dd, di)
2634
2632
2635 def files(self):
2633 def files(self):
2636 res = [self._indexfile]
2634 res = [self._indexfile]
2637 if not self._inline:
2635 if not self._inline:
2638 res.append(self._datafile)
2636 res.append(self._datafile)
2639 return res
2637 return res
2640
2638
2641 def emitrevisions(
2639 def emitrevisions(
2642 self,
2640 self,
2643 nodes,
2641 nodes,
2644 nodesorder=None,
2642 nodesorder=None,
2645 revisiondata=False,
2643 revisiondata=False,
2646 assumehaveparentrevisions=False,
2644 assumehaveparentrevisions=False,
2647 deltamode=repository.CG_DELTAMODE_STD,
2645 deltamode=repository.CG_DELTAMODE_STD,
2648 sidedata_helpers=None,
2646 sidedata_helpers=None,
2649 ):
2647 ):
2650 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2648 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2651 raise error.ProgrammingError(
2649 raise error.ProgrammingError(
2652 b'unhandled value for nodesorder: %s' % nodesorder
2650 b'unhandled value for nodesorder: %s' % nodesorder
2653 )
2651 )
2654
2652
2655 if nodesorder is None and not self._generaldelta:
2653 if nodesorder is None and not self._generaldelta:
2656 nodesorder = b'storage'
2654 nodesorder = b'storage'
2657
2655
2658 if (
2656 if (
2659 not self._storedeltachains
2657 not self._storedeltachains
2660 and deltamode != repository.CG_DELTAMODE_PREV
2658 and deltamode != repository.CG_DELTAMODE_PREV
2661 ):
2659 ):
2662 deltamode = repository.CG_DELTAMODE_FULL
2660 deltamode = repository.CG_DELTAMODE_FULL
2663
2661
2664 return storageutil.emitrevisions(
2662 return storageutil.emitrevisions(
2665 self,
2663 self,
2666 nodes,
2664 nodes,
2667 nodesorder,
2665 nodesorder,
2668 revlogrevisiondelta,
2666 revlogrevisiondelta,
2669 deltaparentfn=self.deltaparent,
2667 deltaparentfn=self.deltaparent,
2670 candeltafn=self.candelta,
2668 candeltafn=self.candelta,
2671 rawsizefn=self.rawsize,
2669 rawsizefn=self.rawsize,
2672 revdifffn=self.revdiff,
2670 revdifffn=self.revdiff,
2673 flagsfn=self.flags,
2671 flagsfn=self.flags,
2674 deltamode=deltamode,
2672 deltamode=deltamode,
2675 revisiondata=revisiondata,
2673 revisiondata=revisiondata,
2676 assumehaveparentrevisions=assumehaveparentrevisions,
2674 assumehaveparentrevisions=assumehaveparentrevisions,
2677 sidedata_helpers=sidedata_helpers,
2675 sidedata_helpers=sidedata_helpers,
2678 )
2676 )
2679
2677
2680 DELTAREUSEALWAYS = b'always'
2678 DELTAREUSEALWAYS = b'always'
2681 DELTAREUSESAMEREVS = b'samerevs'
2679 DELTAREUSESAMEREVS = b'samerevs'
2682 DELTAREUSENEVER = b'never'
2680 DELTAREUSENEVER = b'never'
2683
2681
2684 DELTAREUSEFULLADD = b'fulladd'
2682 DELTAREUSEFULLADD = b'fulladd'
2685
2683
2686 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2684 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2687
2685
2688 def clone(
2686 def clone(
2689 self,
2687 self,
2690 tr,
2688 tr,
2691 destrevlog,
2689 destrevlog,
2692 addrevisioncb=None,
2690 addrevisioncb=None,
2693 deltareuse=DELTAREUSESAMEREVS,
2691 deltareuse=DELTAREUSESAMEREVS,
2694 forcedeltabothparents=None,
2692 forcedeltabothparents=None,
2695 sidedata_helpers=None,
2693 sidedata_helpers=None,
2696 ):
2694 ):
2697 """Copy this revlog to another, possibly with format changes.
2695 """Copy this revlog to another, possibly with format changes.
2698
2696
2699 The destination revlog will contain the same revisions and nodes.
2697 The destination revlog will contain the same revisions and nodes.
2700 However, it may not be bit-for-bit identical due to e.g. delta encoding
2698 However, it may not be bit-for-bit identical due to e.g. delta encoding
2701 differences.
2699 differences.
2702
2700
2703 The ``deltareuse`` argument control how deltas from the existing revlog
2701 The ``deltareuse`` argument control how deltas from the existing revlog
2704 are preserved in the destination revlog. The argument can have the
2702 are preserved in the destination revlog. The argument can have the
2705 following values:
2703 following values:
2706
2704
2707 DELTAREUSEALWAYS
2705 DELTAREUSEALWAYS
2708 Deltas will always be reused (if possible), even if the destination
2706 Deltas will always be reused (if possible), even if the destination
2709 revlog would not select the same revisions for the delta. This is the
2707 revlog would not select the same revisions for the delta. This is the
2710 fastest mode of operation.
2708 fastest mode of operation.
2711 DELTAREUSESAMEREVS
2709 DELTAREUSESAMEREVS
2712 Deltas will be reused if the destination revlog would pick the same
2710 Deltas will be reused if the destination revlog would pick the same
2713 revisions for the delta. This mode strikes a balance between speed
2711 revisions for the delta. This mode strikes a balance between speed
2714 and optimization.
2712 and optimization.
2715 DELTAREUSENEVER
2713 DELTAREUSENEVER
2716 Deltas will never be reused. This is the slowest mode of execution.
2714 Deltas will never be reused. This is the slowest mode of execution.
2717 This mode can be used to recompute deltas (e.g. if the diff/delta
2715 This mode can be used to recompute deltas (e.g. if the diff/delta
2718 algorithm changes).
2716 algorithm changes).
2719 DELTAREUSEFULLADD
2717 DELTAREUSEFULLADD
2720 Revision will be re-added as if their were new content. This is
2718 Revision will be re-added as if their were new content. This is
2721 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2719 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2722 eg: large file detection and handling.
2720 eg: large file detection and handling.
2723
2721
2724 Delta computation can be slow, so the choice of delta reuse policy can
2722 Delta computation can be slow, so the choice of delta reuse policy can
2725 significantly affect run time.
2723 significantly affect run time.
2726
2724
2727 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2725 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2728 two extremes. Deltas will be reused if they are appropriate. But if the
2726 two extremes. Deltas will be reused if they are appropriate. But if the
2729 delta could choose a better revision, it will do so. This means if you
2727 delta could choose a better revision, it will do so. This means if you
2730 are converting a non-generaldelta revlog to a generaldelta revlog,
2728 are converting a non-generaldelta revlog to a generaldelta revlog,
2731 deltas will be recomputed if the delta's parent isn't a parent of the
2729 deltas will be recomputed if the delta's parent isn't a parent of the
2732 revision.
2730 revision.
2733
2731
2734 In addition to the delta policy, the ``forcedeltabothparents``
2732 In addition to the delta policy, the ``forcedeltabothparents``
2735 argument controls whether to force compute deltas against both parents
2733 argument controls whether to force compute deltas against both parents
2736 for merges. By default, the current default is used.
2734 for merges. By default, the current default is used.
2737
2735
2738 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2736 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2739 `sidedata_helpers`.
2737 `sidedata_helpers`.
2740 """
2738 """
2741 if deltareuse not in self.DELTAREUSEALL:
2739 if deltareuse not in self.DELTAREUSEALL:
2742 raise ValueError(
2740 raise ValueError(
2743 _(b'value for deltareuse invalid: %s') % deltareuse
2741 _(b'value for deltareuse invalid: %s') % deltareuse
2744 )
2742 )
2745
2743
2746 if len(destrevlog):
2744 if len(destrevlog):
2747 raise ValueError(_(b'destination revlog is not empty'))
2745 raise ValueError(_(b'destination revlog is not empty'))
2748
2746
2749 if getattr(self, 'filteredrevs', None):
2747 if getattr(self, 'filteredrevs', None):
2750 raise ValueError(_(b'source revlog has filtered revisions'))
2748 raise ValueError(_(b'source revlog has filtered revisions'))
2751 if getattr(destrevlog, 'filteredrevs', None):
2749 if getattr(destrevlog, 'filteredrevs', None):
2752 raise ValueError(_(b'destination revlog has filtered revisions'))
2750 raise ValueError(_(b'destination revlog has filtered revisions'))
2753
2751
2754 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2752 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2755 # if possible.
2753 # if possible.
2756 oldlazydelta = destrevlog._lazydelta
2754 oldlazydelta = destrevlog._lazydelta
2757 oldlazydeltabase = destrevlog._lazydeltabase
2755 oldlazydeltabase = destrevlog._lazydeltabase
2758 oldamd = destrevlog._deltabothparents
2756 oldamd = destrevlog._deltabothparents
2759
2757
2760 try:
2758 try:
2761 if deltareuse == self.DELTAREUSEALWAYS:
2759 if deltareuse == self.DELTAREUSEALWAYS:
2762 destrevlog._lazydeltabase = True
2760 destrevlog._lazydeltabase = True
2763 destrevlog._lazydelta = True
2761 destrevlog._lazydelta = True
2764 elif deltareuse == self.DELTAREUSESAMEREVS:
2762 elif deltareuse == self.DELTAREUSESAMEREVS:
2765 destrevlog._lazydeltabase = False
2763 destrevlog._lazydeltabase = False
2766 destrevlog._lazydelta = True
2764 destrevlog._lazydelta = True
2767 elif deltareuse == self.DELTAREUSENEVER:
2765 elif deltareuse == self.DELTAREUSENEVER:
2768 destrevlog._lazydeltabase = False
2766 destrevlog._lazydeltabase = False
2769 destrevlog._lazydelta = False
2767 destrevlog._lazydelta = False
2770
2768
2771 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2769 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2772
2770
2773 self._clone(
2771 self._clone(
2774 tr,
2772 tr,
2775 destrevlog,
2773 destrevlog,
2776 addrevisioncb,
2774 addrevisioncb,
2777 deltareuse,
2775 deltareuse,
2778 forcedeltabothparents,
2776 forcedeltabothparents,
2779 sidedata_helpers,
2777 sidedata_helpers,
2780 )
2778 )
2781
2779
2782 finally:
2780 finally:
2783 destrevlog._lazydelta = oldlazydelta
2781 destrevlog._lazydelta = oldlazydelta
2784 destrevlog._lazydeltabase = oldlazydeltabase
2782 destrevlog._lazydeltabase = oldlazydeltabase
2785 destrevlog._deltabothparents = oldamd
2783 destrevlog._deltabothparents = oldamd
2786
2784
2787 def _clone(
2785 def _clone(
2788 self,
2786 self,
2789 tr,
2787 tr,
2790 destrevlog,
2788 destrevlog,
2791 addrevisioncb,
2789 addrevisioncb,
2792 deltareuse,
2790 deltareuse,
2793 forcedeltabothparents,
2791 forcedeltabothparents,
2794 sidedata_helpers,
2792 sidedata_helpers,
2795 ):
2793 ):
2796 """perform the core duty of `revlog.clone` after parameter processing"""
2794 """perform the core duty of `revlog.clone` after parameter processing"""
2797 deltacomputer = deltautil.deltacomputer(destrevlog)
2795 deltacomputer = deltautil.deltacomputer(destrevlog)
2798 index = self.index
2796 index = self.index
2799 for rev in self:
2797 for rev in self:
2800 entry = index[rev]
2798 entry = index[rev]
2801
2799
2802 # Some classes override linkrev to take filtered revs into
2800 # Some classes override linkrev to take filtered revs into
2803 # account. Use raw entry from index.
2801 # account. Use raw entry from index.
2804 flags = entry[0] & 0xFFFF
2802 flags = entry[0] & 0xFFFF
2805 linkrev = entry[4]
2803 linkrev = entry[4]
2806 p1 = index[entry[5]][7]
2804 p1 = index[entry[5]][7]
2807 p2 = index[entry[6]][7]
2805 p2 = index[entry[6]][7]
2808 node = entry[7]
2806 node = entry[7]
2809
2807
2810 # (Possibly) reuse the delta from the revlog if allowed and
2808 # (Possibly) reuse the delta from the revlog if allowed and
2811 # the revlog chunk is a delta.
2809 # the revlog chunk is a delta.
2812 cachedelta = None
2810 cachedelta = None
2813 rawtext = None
2811 rawtext = None
2814 if deltareuse == self.DELTAREUSEFULLADD:
2812 if deltareuse == self.DELTAREUSEFULLADD:
2815 text, sidedata = self._revisiondata(rev)
2813 text, sidedata = self._revisiondata(rev)
2816
2814
2817 if sidedata_helpers is not None:
2815 if sidedata_helpers is not None:
2818 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2816 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2819 self, sidedata_helpers, sidedata, rev
2817 self, sidedata_helpers, sidedata, rev
2820 )
2818 )
2821 flags = flags | new_flags[0] & ~new_flags[1]
2819 flags = flags | new_flags[0] & ~new_flags[1]
2822
2820
2823 destrevlog.addrevision(
2821 destrevlog.addrevision(
2824 text,
2822 text,
2825 tr,
2823 tr,
2826 linkrev,
2824 linkrev,
2827 p1,
2825 p1,
2828 p2,
2826 p2,
2829 cachedelta=cachedelta,
2827 cachedelta=cachedelta,
2830 node=node,
2828 node=node,
2831 flags=flags,
2829 flags=flags,
2832 deltacomputer=deltacomputer,
2830 deltacomputer=deltacomputer,
2833 sidedata=sidedata,
2831 sidedata=sidedata,
2834 )
2832 )
2835 else:
2833 else:
2836 if destrevlog._lazydelta:
2834 if destrevlog._lazydelta:
2837 dp = self.deltaparent(rev)
2835 dp = self.deltaparent(rev)
2838 if dp != nullrev:
2836 if dp != nullrev:
2839 cachedelta = (dp, bytes(self._chunk(rev)))
2837 cachedelta = (dp, bytes(self._chunk(rev)))
2840
2838
2841 sidedata = None
2839 sidedata = None
2842 if not cachedelta:
2840 if not cachedelta:
2843 rawtext, sidedata = self._revisiondata(rev)
2841 rawtext, sidedata = self._revisiondata(rev)
2844 if sidedata is None:
2842 if sidedata is None:
2845 sidedata = self.sidedata(rev)
2843 sidedata = self.sidedata(rev)
2846
2844
2847 if sidedata_helpers is not None:
2845 if sidedata_helpers is not None:
2848 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2846 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2849 self, sidedata_helpers, sidedata, rev
2847 self, sidedata_helpers, sidedata, rev
2850 )
2848 )
2851 flags = flags | new_flags[0] & ~new_flags[1]
2849 flags = flags | new_flags[0] & ~new_flags[1]
2852
2850
2853 ifh = destrevlog.opener(
2851 ifh = destrevlog.opener(
2854 destrevlog._indexfile, b'a+', checkambig=False
2852 destrevlog._indexfile, b'a+', checkambig=False
2855 )
2853 )
2856 dfh = None
2854 dfh = None
2857 if not destrevlog._inline:
2855 if not destrevlog._inline:
2858 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2856 dfh = destrevlog.opener(destrevlog._datafile, b'a+')
2859 try:
2857 try:
2860 destrevlog._addrevision(
2858 destrevlog._addrevision(
2861 node,
2859 node,
2862 rawtext,
2860 rawtext,
2863 tr,
2861 tr,
2864 linkrev,
2862 linkrev,
2865 p1,
2863 p1,
2866 p2,
2864 p2,
2867 flags,
2865 flags,
2868 cachedelta,
2866 cachedelta,
2869 ifh,
2867 ifh,
2870 dfh,
2868 dfh,
2871 deltacomputer=deltacomputer,
2869 deltacomputer=deltacomputer,
2872 sidedata=sidedata,
2870 sidedata=sidedata,
2873 )
2871 )
2874 finally:
2872 finally:
2875 if dfh:
2873 if dfh:
2876 dfh.close()
2874 dfh.close()
2877 ifh.close()
2875 ifh.close()
2878
2876
2879 if addrevisioncb:
2877 if addrevisioncb:
2880 addrevisioncb(self, rev, node)
2878 addrevisioncb(self, rev, node)
2881
2879
2882 def censorrevision(self, tr, censornode, tombstone=b''):
2880 def censorrevision(self, tr, censornode, tombstone=b''):
2883 if self._format_version == REVLOGV0:
2881 if self._format_version == REVLOGV0:
2884 raise error.RevlogError(
2882 raise error.RevlogError(
2885 _(b'cannot censor with version %d revlogs')
2883 _(b'cannot censor with version %d revlogs')
2886 % self._format_version
2884 % self._format_version
2887 )
2885 )
2888
2886
2889 censorrev = self.rev(censornode)
2887 censorrev = self.rev(censornode)
2890 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2888 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2891
2889
2892 if len(tombstone) > self.rawsize(censorrev):
2890 if len(tombstone) > self.rawsize(censorrev):
2893 raise error.Abort(
2891 raise error.Abort(
2894 _(b'censor tombstone must be no longer than censored data')
2892 _(b'censor tombstone must be no longer than censored data')
2895 )
2893 )
2896
2894
2897 # Rewriting the revlog in place is hard. Our strategy for censoring is
2895 # Rewriting the revlog in place is hard. Our strategy for censoring is
2898 # to create a new revlog, copy all revisions to it, then replace the
2896 # to create a new revlog, copy all revisions to it, then replace the
2899 # revlogs on transaction close.
2897 # revlogs on transaction close.
2900 #
2898 #
2901 # This is a bit dangerous. We could easily have a mismatch of state.
2899 # This is a bit dangerous. We could easily have a mismatch of state.
2902 newrl = revlog(
2900 newrl = revlog(
2903 self.opener,
2901 self.opener,
2904 target=self.target,
2902 target=self.target,
2905 radix=self.radix,
2903 radix=self.radix,
2906 postfix=b'tmpcensored',
2904 postfix=b'tmpcensored',
2907 censorable=True,
2905 censorable=True,
2908 )
2906 )
2909 newrl._format_version = self._format_version
2907 newrl._format_version = self._format_version
2910 newrl._format_flags = self._format_flags
2908 newrl._format_flags = self._format_flags
2911 newrl._generaldelta = self._generaldelta
2909 newrl._generaldelta = self._generaldelta
2912 newrl._parse_index = self._parse_index
2910 newrl._parse_index = self._parse_index
2913
2911
2914 for rev in self.revs():
2912 for rev in self.revs():
2915 node = self.node(rev)
2913 node = self.node(rev)
2916 p1, p2 = self.parents(node)
2914 p1, p2 = self.parents(node)
2917
2915
2918 if rev == censorrev:
2916 if rev == censorrev:
2919 newrl.addrawrevision(
2917 newrl.addrawrevision(
2920 tombstone,
2918 tombstone,
2921 tr,
2919 tr,
2922 self.linkrev(censorrev),
2920 self.linkrev(censorrev),
2923 p1,
2921 p1,
2924 p2,
2922 p2,
2925 censornode,
2923 censornode,
2926 REVIDX_ISCENSORED,
2924 REVIDX_ISCENSORED,
2927 )
2925 )
2928
2926
2929 if newrl.deltaparent(rev) != nullrev:
2927 if newrl.deltaparent(rev) != nullrev:
2930 raise error.Abort(
2928 raise error.Abort(
2931 _(
2929 _(
2932 b'censored revision stored as delta; '
2930 b'censored revision stored as delta; '
2933 b'cannot censor'
2931 b'cannot censor'
2934 ),
2932 ),
2935 hint=_(
2933 hint=_(
2936 b'censoring of revlogs is not '
2934 b'censoring of revlogs is not '
2937 b'fully implemented; please report '
2935 b'fully implemented; please report '
2938 b'this bug'
2936 b'this bug'
2939 ),
2937 ),
2940 )
2938 )
2941 continue
2939 continue
2942
2940
2943 if self.iscensored(rev):
2941 if self.iscensored(rev):
2944 if self.deltaparent(rev) != nullrev:
2942 if self.deltaparent(rev) != nullrev:
2945 raise error.Abort(
2943 raise error.Abort(
2946 _(
2944 _(
2947 b'cannot censor due to censored '
2945 b'cannot censor due to censored '
2948 b'revision having delta stored'
2946 b'revision having delta stored'
2949 )
2947 )
2950 )
2948 )
2951 rawtext = self._chunk(rev)
2949 rawtext = self._chunk(rev)
2952 else:
2950 else:
2953 rawtext = self.rawdata(rev)
2951 rawtext = self.rawdata(rev)
2954
2952
2955 newrl.addrawrevision(
2953 newrl.addrawrevision(
2956 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2954 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2957 )
2955 )
2958
2956
2959 tr.addbackup(self._indexfile, location=b'store')
2957 tr.addbackup(self._indexfile, location=b'store')
2960 if not self._inline:
2958 if not self._inline:
2961 tr.addbackup(self._datafile, location=b'store')
2959 tr.addbackup(self._datafile, location=b'store')
2962
2960
2963 self.opener.rename(newrl._indexfile, self._indexfile)
2961 self.opener.rename(newrl._indexfile, self._indexfile)
2964 if not self._inline:
2962 if not self._inline:
2965 self.opener.rename(newrl._datafile, self._datafile)
2963 self.opener.rename(newrl._datafile, self._datafile)
2966
2964
2967 self.clearcaches()
2965 self.clearcaches()
2968 self._loadindex()
2966 self._loadindex()
2969
2967
2970 def verifyintegrity(self, state):
2968 def verifyintegrity(self, state):
2971 """Verifies the integrity of the revlog.
2969 """Verifies the integrity of the revlog.
2972
2970
2973 Yields ``revlogproblem`` instances describing problems that are
2971 Yields ``revlogproblem`` instances describing problems that are
2974 found.
2972 found.
2975 """
2973 """
2976 dd, di = self.checksize()
2974 dd, di = self.checksize()
2977 if dd:
2975 if dd:
2978 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2976 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2979 if di:
2977 if di:
2980 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2978 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2981
2979
2982 version = self._format_version
2980 version = self._format_version
2983
2981
2984 # The verifier tells us what version revlog we should be.
2982 # The verifier tells us what version revlog we should be.
2985 if version != state[b'expectedversion']:
2983 if version != state[b'expectedversion']:
2986 yield revlogproblem(
2984 yield revlogproblem(
2987 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2985 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2988 % (self._indexfile, version, state[b'expectedversion'])
2986 % (self._indexfile, version, state[b'expectedversion'])
2989 )
2987 )
2990
2988
2991 state[b'skipread'] = set()
2989 state[b'skipread'] = set()
2992 state[b'safe_renamed'] = set()
2990 state[b'safe_renamed'] = set()
2993
2991
2994 for rev in self:
2992 for rev in self:
2995 node = self.node(rev)
2993 node = self.node(rev)
2996
2994
2997 # Verify contents. 4 cases to care about:
2995 # Verify contents. 4 cases to care about:
2998 #
2996 #
2999 # common: the most common case
2997 # common: the most common case
3000 # rename: with a rename
2998 # rename: with a rename
3001 # meta: file content starts with b'\1\n', the metadata
2999 # meta: file content starts with b'\1\n', the metadata
3002 # header defined in filelog.py, but without a rename
3000 # header defined in filelog.py, but without a rename
3003 # ext: content stored externally
3001 # ext: content stored externally
3004 #
3002 #
3005 # More formally, their differences are shown below:
3003 # More formally, their differences are shown below:
3006 #
3004 #
3007 # | common | rename | meta | ext
3005 # | common | rename | meta | ext
3008 # -------------------------------------------------------
3006 # -------------------------------------------------------
3009 # flags() | 0 | 0 | 0 | not 0
3007 # flags() | 0 | 0 | 0 | not 0
3010 # renamed() | False | True | False | ?
3008 # renamed() | False | True | False | ?
3011 # rawtext[0:2]=='\1\n'| False | True | True | ?
3009 # rawtext[0:2]=='\1\n'| False | True | True | ?
3012 #
3010 #
3013 # "rawtext" means the raw text stored in revlog data, which
3011 # "rawtext" means the raw text stored in revlog data, which
3014 # could be retrieved by "rawdata(rev)". "text"
3012 # could be retrieved by "rawdata(rev)". "text"
3015 # mentioned below is "revision(rev)".
3013 # mentioned below is "revision(rev)".
3016 #
3014 #
3017 # There are 3 different lengths stored physically:
3015 # There are 3 different lengths stored physically:
3018 # 1. L1: rawsize, stored in revlog index
3016 # 1. L1: rawsize, stored in revlog index
3019 # 2. L2: len(rawtext), stored in revlog data
3017 # 2. L2: len(rawtext), stored in revlog data
3020 # 3. L3: len(text), stored in revlog data if flags==0, or
3018 # 3. L3: len(text), stored in revlog data if flags==0, or
3021 # possibly somewhere else if flags!=0
3019 # possibly somewhere else if flags!=0
3022 #
3020 #
3023 # L1 should be equal to L2. L3 could be different from them.
3021 # L1 should be equal to L2. L3 could be different from them.
3024 # "text" may or may not affect commit hash depending on flag
3022 # "text" may or may not affect commit hash depending on flag
3025 # processors (see flagutil.addflagprocessor).
3023 # processors (see flagutil.addflagprocessor).
3026 #
3024 #
3027 # | common | rename | meta | ext
3025 # | common | rename | meta | ext
3028 # -------------------------------------------------
3026 # -------------------------------------------------
3029 # rawsize() | L1 | L1 | L1 | L1
3027 # rawsize() | L1 | L1 | L1 | L1
3030 # size() | L1 | L2-LM | L1(*) | L1 (?)
3028 # size() | L1 | L2-LM | L1(*) | L1 (?)
3031 # len(rawtext) | L2 | L2 | L2 | L2
3029 # len(rawtext) | L2 | L2 | L2 | L2
3032 # len(text) | L2 | L2 | L2 | L3
3030 # len(text) | L2 | L2 | L2 | L3
3033 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3031 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3034 #
3032 #
3035 # LM: length of metadata, depending on rawtext
3033 # LM: length of metadata, depending on rawtext
3036 # (*): not ideal, see comment in filelog.size
3034 # (*): not ideal, see comment in filelog.size
3037 # (?): could be "- len(meta)" if the resolved content has
3035 # (?): could be "- len(meta)" if the resolved content has
3038 # rename metadata
3036 # rename metadata
3039 #
3037 #
3040 # Checks needed to be done:
3038 # Checks needed to be done:
3041 # 1. length check: L1 == L2, in all cases.
3039 # 1. length check: L1 == L2, in all cases.
3042 # 2. hash check: depending on flag processor, we may need to
3040 # 2. hash check: depending on flag processor, we may need to
3043 # use either "text" (external), or "rawtext" (in revlog).
3041 # use either "text" (external), or "rawtext" (in revlog).
3044
3042
3045 try:
3043 try:
3046 skipflags = state.get(b'skipflags', 0)
3044 skipflags = state.get(b'skipflags', 0)
3047 if skipflags:
3045 if skipflags:
3048 skipflags &= self.flags(rev)
3046 skipflags &= self.flags(rev)
3049
3047
3050 _verify_revision(self, skipflags, state, node)
3048 _verify_revision(self, skipflags, state, node)
3051
3049
3052 l1 = self.rawsize(rev)
3050 l1 = self.rawsize(rev)
3053 l2 = len(self.rawdata(node))
3051 l2 = len(self.rawdata(node))
3054
3052
3055 if l1 != l2:
3053 if l1 != l2:
3056 yield revlogproblem(
3054 yield revlogproblem(
3057 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3055 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3058 node=node,
3056 node=node,
3059 )
3057 )
3060
3058
3061 except error.CensoredNodeError:
3059 except error.CensoredNodeError:
3062 if state[b'erroroncensored']:
3060 if state[b'erroroncensored']:
3063 yield revlogproblem(
3061 yield revlogproblem(
3064 error=_(b'censored file data'), node=node
3062 error=_(b'censored file data'), node=node
3065 )
3063 )
3066 state[b'skipread'].add(node)
3064 state[b'skipread'].add(node)
3067 except Exception as e:
3065 except Exception as e:
3068 yield revlogproblem(
3066 yield revlogproblem(
3069 error=_(b'unpacking %s: %s')
3067 error=_(b'unpacking %s: %s')
3070 % (short(node), stringutil.forcebytestr(e)),
3068 % (short(node), stringutil.forcebytestr(e)),
3071 node=node,
3069 node=node,
3072 )
3070 )
3073 state[b'skipread'].add(node)
3071 state[b'skipread'].add(node)
3074
3072
3075 def storageinfo(
3073 def storageinfo(
3076 self,
3074 self,
3077 exclusivefiles=False,
3075 exclusivefiles=False,
3078 sharedfiles=False,
3076 sharedfiles=False,
3079 revisionscount=False,
3077 revisionscount=False,
3080 trackedsize=False,
3078 trackedsize=False,
3081 storedsize=False,
3079 storedsize=False,
3082 ):
3080 ):
3083 d = {}
3081 d = {}
3084
3082
3085 if exclusivefiles:
3083 if exclusivefiles:
3086 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3084 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3087 if not self._inline:
3085 if not self._inline:
3088 d[b'exclusivefiles'].append((self.opener, self._datafile))
3086 d[b'exclusivefiles'].append((self.opener, self._datafile))
3089
3087
3090 if sharedfiles:
3088 if sharedfiles:
3091 d[b'sharedfiles'] = []
3089 d[b'sharedfiles'] = []
3092
3090
3093 if revisionscount:
3091 if revisionscount:
3094 d[b'revisionscount'] = len(self)
3092 d[b'revisionscount'] = len(self)
3095
3093
3096 if trackedsize:
3094 if trackedsize:
3097 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3095 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3098
3096
3099 if storedsize:
3097 if storedsize:
3100 d[b'storedsize'] = sum(
3098 d[b'storedsize'] = sum(
3101 self.opener.stat(path).st_size for path in self.files()
3099 self.opener.stat(path).st_size for path in self.files()
3102 )
3100 )
3103
3101
3104 return d
3102 return d
3105
3103
3106 def rewrite_sidedata(self, helpers, startrev, endrev):
3104 def rewrite_sidedata(self, helpers, startrev, endrev):
3107 if not self.hassidedata:
3105 if not self.hassidedata:
3108 return
3106 return
3109 # inline are not yet supported because they suffer from an issue when
3107 # inline are not yet supported because they suffer from an issue when
3110 # rewriting them (since it's not an append-only operation).
3108 # rewriting them (since it's not an append-only operation).
3111 # See issue6485.
3109 # See issue6485.
3112 assert not self._inline
3110 assert not self._inline
3113 if not helpers[1] and not helpers[2]:
3111 if not helpers[1] and not helpers[2]:
3114 # Nothing to generate or remove
3112 # Nothing to generate or remove
3115 return
3113 return
3116
3114
3117 # changelog implement some "delayed" writing mechanism that assume that
3115 # changelog implement some "delayed" writing mechanism that assume that
3118 # all index data is writen in append mode and is therefor incompatible
3116 # all index data is writen in append mode and is therefor incompatible
3119 # with the seeked write done in this method. The use of such "delayed"
3117 # with the seeked write done in this method. The use of such "delayed"
3120 # writing will soon be removed for revlog version that support side
3118 # writing will soon be removed for revlog version that support side
3121 # data, so for now, we only keep this simple assert to highlight the
3119 # data, so for now, we only keep this simple assert to highlight the
3122 # situation.
3120 # situation.
3123 delayed = getattr(self, '_delayed', False)
3121 delayed = getattr(self, '_delayed', False)
3124 diverted = getattr(self, '_divert', False)
3122 diverted = getattr(self, '_divert', False)
3125 if delayed and not diverted:
3123 if delayed and not diverted:
3126 msg = "cannot rewrite_sidedata of a delayed revlog"
3124 msg = "cannot rewrite_sidedata of a delayed revlog"
3127 raise error.ProgrammingError(msg)
3125 raise error.ProgrammingError(msg)
3128
3126
3129 new_entries = []
3127 new_entries = []
3130 # append the new sidedata
3128 # append the new sidedata
3131 with self._datafp(b'a+') as fp:
3129 with self._datafp(b'a+') as fp:
3132 # Maybe this bug still exists, see revlog._writeentry
3130 # Maybe this bug still exists, see revlog._writeentry
3133 fp.seek(0, os.SEEK_END)
3131 fp.seek(0, os.SEEK_END)
3134 current_offset = fp.tell()
3132 current_offset = fp.tell()
3135 for rev in range(startrev, endrev + 1):
3133 for rev in range(startrev, endrev + 1):
3136 entry = self.index[rev]
3134 entry = self.index[rev]
3137 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3135 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3138 store=self,
3136 store=self,
3139 sidedata_helpers=helpers,
3137 sidedata_helpers=helpers,
3140 sidedata={},
3138 sidedata={},
3141 rev=rev,
3139 rev=rev,
3142 )
3140 )
3143
3141
3144 serialized_sidedata = sidedatautil.serialize_sidedata(
3142 serialized_sidedata = sidedatautil.serialize_sidedata(
3145 new_sidedata
3143 new_sidedata
3146 )
3144 )
3147 if entry[8] != 0 or entry[9] != 0:
3145 if entry[8] != 0 or entry[9] != 0:
3148 # rewriting entries that already have sidedata is not
3146 # rewriting entries that already have sidedata is not
3149 # supported yet, because it introduces garbage data in the
3147 # supported yet, because it introduces garbage data in the
3150 # revlog.
3148 # revlog.
3151 msg = b"Rewriting existing sidedata is not supported yet"
3149 msg = b"Rewriting existing sidedata is not supported yet"
3152 raise error.Abort(msg)
3150 raise error.Abort(msg)
3153
3151
3154 # Apply (potential) flags to add and to remove after running
3152 # Apply (potential) flags to add and to remove after running
3155 # the sidedata helpers
3153 # the sidedata helpers
3156 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3154 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3157 entry = (new_offset_flags,) + entry[1:8]
3155 entry = (new_offset_flags,) + entry[1:8]
3158 entry += (current_offset, len(serialized_sidedata))
3156 entry += (current_offset, len(serialized_sidedata))
3159
3157
3160 fp.write(serialized_sidedata)
3158 fp.write(serialized_sidedata)
3161 new_entries.append(entry)
3159 new_entries.append(entry)
3162 current_offset += len(serialized_sidedata)
3160 current_offset += len(serialized_sidedata)
3163
3161
3164 # rewrite the new index entries
3162 # rewrite the new index entries
3165 with self._indexfp(b'r+') as fp:
3163 with self._indexfp(b'r+') as fp:
3166 fp.seek(startrev * self.index.entry_size)
3164 fp.seek(startrev * self.index.entry_size)
3167 for i, e in enumerate(new_entries):
3165 for i, e in enumerate(new_entries):
3168 rev = startrev + i
3166 rev = startrev + i
3169 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3167 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3170 packed = self.index.entry_binary(rev)
3168 packed = self.index.entry_binary(rev)
3171 if rev == 0:
3169 if rev == 0:
3172 header = self._format_flags | self._format_version
3170 header = self._format_flags | self._format_version
3173 header = self.index.pack_header(header)
3171 header = self.index.pack_header(header)
3174 packed = header + packed
3172 packed = header + packed
3175 fp.write(packed)
3173 fp.write(packed)
@@ -1,664 +1,659 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from ..node import hex
16 from ..node import hex
17
17
18 from .. import (
18 from .. import (
19 error,
19 error,
20 util,
20 util,
21 )
21 )
22
22
23
23
24 class NodeMap(dict):
24 class NodeMap(dict):
25 def __missing__(self, x):
25 def __missing__(self, x):
26 raise error.RevlogError(b'unknown node: %s' % x)
26 raise error.RevlogError(b'unknown node: %s' % x)
27
27
28
28
29 def persisted_data(revlog):
29 def persisted_data(revlog):
30 """read the nodemap for a revlog from disk"""
30 """read the nodemap for a revlog from disk"""
31 if revlog.nodemap_file is None:
31 if revlog.nodemap_file is None:
32 return None
32 return None
33 pdata = revlog.opener.tryread(revlog.nodemap_file)
33 pdata = revlog.opener.tryread(revlog.nodemap_file)
34 if not pdata:
34 if not pdata:
35 return None
35 return None
36 offset = 0
36 offset = 0
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 if version != ONDISK_VERSION:
38 if version != ONDISK_VERSION:
39 return None
39 return None
40 offset += S_VERSION.size
40 offset += S_VERSION.size
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 offset += S_HEADER.size
43 offset += S_HEADER.size
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 offset += uid_size
45 offset += uid_size
46 docket.tip_rev = tip_rev
46 docket.tip_rev = tip_rev
47 docket.tip_node = pdata[offset : offset + tip_node_size]
47 docket.tip_node = pdata[offset : offset + tip_node_size]
48 docket.data_length = data_length
48 docket.data_length = data_length
49 docket.data_unused = data_unused
49 docket.data_unused = data_unused
50
50
51 filename = _rawdata_filepath(revlog, docket)
51 filename = _rawdata_filepath(revlog, docket)
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 try:
53 try:
54 with revlog.opener(filename) as fd:
54 with revlog.opener(filename) as fd:
55 if use_mmap:
55 if use_mmap:
56 try:
56 try:
57 data = util.buffer(util.mmapread(fd, data_length))
57 data = util.buffer(util.mmapread(fd, data_length))
58 except ValueError:
58 except ValueError:
59 # raised when the read file is too small
59 # raised when the read file is too small
60 data = b''
60 data = b''
61 else:
61 else:
62 data = fd.read(data_length)
62 data = fd.read(data_length)
63 except (IOError, OSError) as e:
63 except (IOError, OSError) as e:
64 if e.errno == errno.ENOENT:
64 if e.errno == errno.ENOENT:
65 return None
65 return None
66 else:
66 else:
67 raise
67 raise
68 if len(data) < data_length:
68 if len(data) < data_length:
69 return None
69 return None
70 return docket, data
70 return docket, data
71
71
72
72
73 def setup_persistent_nodemap(tr, revlog):
73 def setup_persistent_nodemap(tr, revlog):
74 """Install whatever is needed transaction side to persist a nodemap on disk
74 """Install whatever is needed transaction side to persist a nodemap on disk
75
75
76 (only actually persist the nodemap if this is relevant for this revlog)
76 (only actually persist the nodemap if this is relevant for this revlog)
77 """
77 """
78 if revlog._inline:
78 if revlog._inline:
79 return # inlined revlog are too small for this to be relevant
79 return # inlined revlog are too small for this to be relevant
80 if revlog.nodemap_file is None:
80 if revlog.nodemap_file is None:
81 return # we do not use persistent_nodemap on this revlog
81 return # we do not use persistent_nodemap on this revlog
82
82
83 # we need to happen after the changelog finalization, in that use "cl-"
83 # we need to happen after the changelog finalization, in that use "cl-"
84 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
84 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
85 if tr.hasfinalize(callback_id):
85 if tr.hasfinalize(callback_id):
86 return # no need to register again
86 return # no need to register again
87 tr.addpending(
87 tr.addpending(
88 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
88 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
89 )
89 )
90 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
90 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
91
91
92
92
93 class _NoTransaction(object):
93 class _NoTransaction(object):
94 """transaction like object to update the nodemap outside a transaction"""
94 """transaction like object to update the nodemap outside a transaction"""
95
95
96 def __init__(self):
96 def __init__(self):
97 self._postclose = {}
97 self._postclose = {}
98
98
99 def addpostclose(self, callback_id, callback_func):
99 def addpostclose(self, callback_id, callback_func):
100 self._postclose[callback_id] = callback_func
100 self._postclose[callback_id] = callback_func
101
101
102 def registertmp(self, *args, **kwargs):
102 def registertmp(self, *args, **kwargs):
103 pass
103 pass
104
104
105 def addbackup(self, *args, **kwargs):
105 def addbackup(self, *args, **kwargs):
106 pass
106 pass
107
107
108 def add(self, *args, **kwargs):
108 def add(self, *args, **kwargs):
109 pass
109 pass
110
110
111 def addabort(self, *args, **kwargs):
111 def addabort(self, *args, **kwargs):
112 pass
112 pass
113
113
114 def _report(self, *args):
114 def _report(self, *args):
115 pass
115 pass
116
116
117
117
118 def update_persistent_nodemap(revlog):
118 def update_persistent_nodemap(revlog):
119 """update the persistent nodemap right now
119 """update the persistent nodemap right now
120
120
121 To be used for updating the nodemap on disk outside of a normal transaction
121 To be used for updating the nodemap on disk outside of a normal transaction
122 setup (eg, `debugupdatecache`).
122 setup (eg, `debugupdatecache`).
123 """
123 """
124 if revlog._inline:
124 if revlog._inline:
125 return # inlined revlog are too small for this to be relevant
125 return # inlined revlog are too small for this to be relevant
126 if revlog.nodemap_file is None:
126 if revlog.nodemap_file is None:
127 return # we do not use persistent_nodemap on this revlog
127 return # we do not use persistent_nodemap on this revlog
128
128
129 notr = _NoTransaction()
129 notr = _NoTransaction()
130 persist_nodemap(notr, revlog)
130 persist_nodemap(notr, revlog)
131 for k in sorted(notr._postclose):
131 for k in sorted(notr._postclose):
132 notr._postclose[k](None)
132 notr._postclose[k](None)
133
133
134
134
135 def delete_nodemap(tr, repo, revlog):
135 def delete_nodemap(tr, repo, revlog):
136 """Delete nodemap data on disk for a given revlog"""
136 """Delete nodemap data on disk for a given revlog"""
137 if revlog.nodemap_file is None:
137 if revlog.nodemap_file is None:
138 msg = "calling persist nodemap on a revlog without the feature enabled"
138 msg = "calling persist nodemap on a revlog without the feature enabled"
139 raise error.ProgrammingError(msg)
139 raise error.ProgrammingError(msg)
140 repo.svfs.unlink(revlog.nodemap_file)
140 repo.svfs.unlink(revlog.nodemap_file)
141
141
142
142
143 def persist_nodemap(tr, revlog, pending=False, force=False):
143 def persist_nodemap(tr, revlog, pending=False, force=False):
144 """Write nodemap data on disk for a given revlog"""
144 """Write nodemap data on disk for a given revlog"""
145 if getattr(revlog, 'filteredrevs', ()):
145 if getattr(revlog, 'filteredrevs', ()):
146 raise error.ProgrammingError(
146 raise error.ProgrammingError(
147 "cannot persist nodemap of a filtered changelog"
147 "cannot persist nodemap of a filtered changelog"
148 )
148 )
149 if revlog.nodemap_file is None:
149 if revlog.nodemap_file is None:
150 if force:
150 if force:
151 revlog.nodemap_file = get_nodemap_file(
151 revlog.nodemap_file = get_nodemap_file(revlog)
152 revlog.opener, revlog._indexfile
153 )
154 else:
152 else:
155 msg = "calling persist nodemap on a revlog without the feature enabled"
153 msg = "calling persist nodemap on a revlog without the feature enabled"
156 raise error.ProgrammingError(msg)
154 raise error.ProgrammingError(msg)
157
155
158 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
156 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
159 ondisk_docket = revlog._nodemap_docket
157 ondisk_docket = revlog._nodemap_docket
160 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
158 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
161 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
159 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
162
160
163 data = None
161 data = None
164 # first attemp an incremental update of the data
162 # first attemp an incremental update of the data
165 if can_incremental and ondisk_docket is not None:
163 if can_incremental and ondisk_docket is not None:
166 target_docket = revlog._nodemap_docket.copy()
164 target_docket = revlog._nodemap_docket.copy()
167 (
165 (
168 src_docket,
166 src_docket,
169 data_changed_count,
167 data_changed_count,
170 data,
168 data,
171 ) = revlog.index.nodemap_data_incremental()
169 ) = revlog.index.nodemap_data_incremental()
172 new_length = target_docket.data_length + len(data)
170 new_length = target_docket.data_length + len(data)
173 new_unused = target_docket.data_unused + data_changed_count
171 new_unused = target_docket.data_unused + data_changed_count
174 if src_docket != target_docket:
172 if src_docket != target_docket:
175 data = None
173 data = None
176 elif new_length <= (new_unused * 10): # under 10% of unused data
174 elif new_length <= (new_unused * 10): # under 10% of unused data
177 data = None
175 data = None
178 else:
176 else:
179 datafile = _rawdata_filepath(revlog, target_docket)
177 datafile = _rawdata_filepath(revlog, target_docket)
180 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
178 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
181 # store vfs
179 # store vfs
182 tr.add(datafile, target_docket.data_length)
180 tr.add(datafile, target_docket.data_length)
183 with revlog.opener(datafile, b'r+') as fd:
181 with revlog.opener(datafile, b'r+') as fd:
184 fd.seek(target_docket.data_length)
182 fd.seek(target_docket.data_length)
185 fd.write(data)
183 fd.write(data)
186 if feed_data:
184 if feed_data:
187 if use_mmap:
185 if use_mmap:
188 fd.seek(0)
186 fd.seek(0)
189 new_data = fd.read(new_length)
187 new_data = fd.read(new_length)
190 else:
188 else:
191 fd.flush()
189 fd.flush()
192 new_data = util.buffer(util.mmapread(fd, new_length))
190 new_data = util.buffer(util.mmapread(fd, new_length))
193 target_docket.data_length = new_length
191 target_docket.data_length = new_length
194 target_docket.data_unused = new_unused
192 target_docket.data_unused = new_unused
195
193
196 if data is None:
194 if data is None:
197 # otherwise fallback to a full new export
195 # otherwise fallback to a full new export
198 target_docket = NodeMapDocket()
196 target_docket = NodeMapDocket()
199 datafile = _rawdata_filepath(revlog, target_docket)
197 datafile = _rawdata_filepath(revlog, target_docket)
200 if util.safehasattr(revlog.index, "nodemap_data_all"):
198 if util.safehasattr(revlog.index, "nodemap_data_all"):
201 data = revlog.index.nodemap_data_all()
199 data = revlog.index.nodemap_data_all()
202 else:
200 else:
203 data = persistent_data(revlog.index)
201 data = persistent_data(revlog.index)
204 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
202 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
205 # store vfs
203 # store vfs
206
204
207 tryunlink = revlog.opener.tryunlink
205 tryunlink = revlog.opener.tryunlink
208
206
209 def abortck(tr):
207 def abortck(tr):
210 tryunlink(datafile)
208 tryunlink(datafile)
211
209
212 callback_id = b"delete-%s" % datafile
210 callback_id = b"delete-%s" % datafile
213
211
214 # some flavor of the transaction abort does not cleanup new file, it
212 # some flavor of the transaction abort does not cleanup new file, it
215 # simply empty them.
213 # simply empty them.
216 tr.addabort(callback_id, abortck)
214 tr.addabort(callback_id, abortck)
217 with revlog.opener(datafile, b'w+') as fd:
215 with revlog.opener(datafile, b'w+') as fd:
218 fd.write(data)
216 fd.write(data)
219 if feed_data:
217 if feed_data:
220 if use_mmap:
218 if use_mmap:
221 new_data = data
219 new_data = data
222 else:
220 else:
223 fd.flush()
221 fd.flush()
224 new_data = util.buffer(util.mmapread(fd, len(data)))
222 new_data = util.buffer(util.mmapread(fd, len(data)))
225 target_docket.data_length = len(data)
223 target_docket.data_length = len(data)
226 target_docket.tip_rev = revlog.tiprev()
224 target_docket.tip_rev = revlog.tiprev()
227 target_docket.tip_node = revlog.node(target_docket.tip_rev)
225 target_docket.tip_node = revlog.node(target_docket.tip_rev)
228 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
226 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
229 # store vfs
227 # store vfs
230 file_path = revlog.nodemap_file
228 file_path = revlog.nodemap_file
231 if pending:
229 if pending:
232 file_path += b'.a'
230 file_path += b'.a'
233 tr.registertmp(file_path)
231 tr.registertmp(file_path)
234 else:
232 else:
235 tr.addbackup(file_path)
233 tr.addbackup(file_path)
236
234
237 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
235 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
238 fp.write(target_docket.serialize())
236 fp.write(target_docket.serialize())
239 revlog._nodemap_docket = target_docket
237 revlog._nodemap_docket = target_docket
240 if feed_data:
238 if feed_data:
241 revlog.index.update_nodemap_data(target_docket, new_data)
239 revlog.index.update_nodemap_data(target_docket, new_data)
242
240
243 # search for old index file in all cases, some older process might have
241 # search for old index file in all cases, some older process might have
244 # left one behind.
242 # left one behind.
245 olds = _other_rawdata_filepath(revlog, target_docket)
243 olds = _other_rawdata_filepath(revlog, target_docket)
246 if olds:
244 if olds:
247 realvfs = getattr(revlog, '_realopener', revlog.opener)
245 realvfs = getattr(revlog, '_realopener', revlog.opener)
248
246
249 def cleanup(tr):
247 def cleanup(tr):
250 for oldfile in olds:
248 for oldfile in olds:
251 realvfs.tryunlink(oldfile)
249 realvfs.tryunlink(oldfile)
252
250
253 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
251 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
254 tr.addpostclose(callback_id, cleanup)
252 tr.addpostclose(callback_id, cleanup)
255
253
256
254
257 ### Nodemap docket file
255 ### Nodemap docket file
258 #
256 #
259 # The nodemap data are stored on disk using 2 files:
257 # The nodemap data are stored on disk using 2 files:
260 #
258 #
261 # * a raw data files containing a persistent nodemap
259 # * a raw data files containing a persistent nodemap
262 # (see `Nodemap Trie` section)
260 # (see `Nodemap Trie` section)
263 #
261 #
264 # * a small "docket" file containing medatadata
262 # * a small "docket" file containing medatadata
265 #
263 #
266 # While the nodemap data can be multiple tens of megabytes, the "docket" is
264 # While the nodemap data can be multiple tens of megabytes, the "docket" is
267 # small, it is easy to update it automatically or to duplicated its content
265 # small, it is easy to update it automatically or to duplicated its content
268 # during a transaction.
266 # during a transaction.
269 #
267 #
270 # Multiple raw data can exist at the same time (The currently valid one and a
268 # Multiple raw data can exist at the same time (The currently valid one and a
271 # new one beind used by an in progress transaction). To accomodate this, the
269 # new one beind used by an in progress transaction). To accomodate this, the
272 # filename hosting the raw data has a variable parts. The exact filename is
270 # filename hosting the raw data has a variable parts. The exact filename is
273 # specified inside the "docket" file.
271 # specified inside the "docket" file.
274 #
272 #
275 # The docket file contains information to find, qualify and validate the raw
273 # The docket file contains information to find, qualify and validate the raw
276 # data. Its content is currently very light, but it will expand as the on disk
274 # data. Its content is currently very light, but it will expand as the on disk
277 # nodemap gains the necessary features to be used in production.
275 # nodemap gains the necessary features to be used in production.
278
276
279 ONDISK_VERSION = 1
277 ONDISK_VERSION = 1
280 S_VERSION = struct.Struct(">B")
278 S_VERSION = struct.Struct(">B")
281 S_HEADER = struct.Struct(">BQQQQ")
279 S_HEADER = struct.Struct(">BQQQQ")
282
280
283 ID_SIZE = 8
281 ID_SIZE = 8
284
282
285
283
286 def _make_uid():
284 def _make_uid():
287 """return a new unique identifier.
285 """return a new unique identifier.
288
286
289 The identifier is random and composed of ascii characters."""
287 The identifier is random and composed of ascii characters."""
290 return hex(os.urandom(ID_SIZE))
288 return hex(os.urandom(ID_SIZE))
291
289
292
290
293 class NodeMapDocket(object):
291 class NodeMapDocket(object):
294 """metadata associated with persistent nodemap data
292 """metadata associated with persistent nodemap data
295
293
296 The persistent data may come from disk or be on their way to disk.
294 The persistent data may come from disk or be on their way to disk.
297 """
295 """
298
296
299 def __init__(self, uid=None):
297 def __init__(self, uid=None):
300 if uid is None:
298 if uid is None:
301 uid = _make_uid()
299 uid = _make_uid()
302 # a unique identifier for the data file:
300 # a unique identifier for the data file:
303 # - When new data are appended, it is preserved.
301 # - When new data are appended, it is preserved.
304 # - When a new data file is created, a new identifier is generated.
302 # - When a new data file is created, a new identifier is generated.
305 self.uid = uid
303 self.uid = uid
306 # the tipmost revision stored in the data file. This revision and all
304 # the tipmost revision stored in the data file. This revision and all
307 # revision before it are expected to be encoded in the data file.
305 # revision before it are expected to be encoded in the data file.
308 self.tip_rev = None
306 self.tip_rev = None
309 # the node of that tipmost revision, if it mismatch the current index
307 # the node of that tipmost revision, if it mismatch the current index
310 # data the docket is not valid for the current index and should be
308 # data the docket is not valid for the current index and should be
311 # discarded.
309 # discarded.
312 #
310 #
313 # note: this method is not perfect as some destructive operation could
311 # note: this method is not perfect as some destructive operation could
314 # preserve the same tip_rev + tip_node while altering lower revision.
312 # preserve the same tip_rev + tip_node while altering lower revision.
315 # However this multiple other caches have the same vulnerability (eg:
313 # However this multiple other caches have the same vulnerability (eg:
316 # brancmap cache).
314 # brancmap cache).
317 self.tip_node = None
315 self.tip_node = None
318 # the size (in bytes) of the persisted data to encode the nodemap valid
316 # the size (in bytes) of the persisted data to encode the nodemap valid
319 # for `tip_rev`.
317 # for `tip_rev`.
320 # - data file shorter than this are corrupted,
318 # - data file shorter than this are corrupted,
321 # - any extra data should be ignored.
319 # - any extra data should be ignored.
322 self.data_length = None
320 self.data_length = None
323 # the amount (in bytes) of "dead" data, still in the data file but no
321 # the amount (in bytes) of "dead" data, still in the data file but no
324 # longer used for the nodemap.
322 # longer used for the nodemap.
325 self.data_unused = 0
323 self.data_unused = 0
326
324
327 def copy(self):
325 def copy(self):
328 new = NodeMapDocket(uid=self.uid)
326 new = NodeMapDocket(uid=self.uid)
329 new.tip_rev = self.tip_rev
327 new.tip_rev = self.tip_rev
330 new.tip_node = self.tip_node
328 new.tip_node = self.tip_node
331 new.data_length = self.data_length
329 new.data_length = self.data_length
332 new.data_unused = self.data_unused
330 new.data_unused = self.data_unused
333 return new
331 return new
334
332
335 def __cmp__(self, other):
333 def __cmp__(self, other):
336 if self.uid < other.uid:
334 if self.uid < other.uid:
337 return -1
335 return -1
338 if self.uid > other.uid:
336 if self.uid > other.uid:
339 return 1
337 return 1
340 elif self.data_length < other.data_length:
338 elif self.data_length < other.data_length:
341 return -1
339 return -1
342 elif self.data_length > other.data_length:
340 elif self.data_length > other.data_length:
343 return 1
341 return 1
344 return 0
342 return 0
345
343
346 def __eq__(self, other):
344 def __eq__(self, other):
347 return self.uid == other.uid and self.data_length == other.data_length
345 return self.uid == other.uid and self.data_length == other.data_length
348
346
349 def serialize(self):
347 def serialize(self):
350 """return serialized bytes for a docket using the passed uid"""
348 """return serialized bytes for a docket using the passed uid"""
351 data = []
349 data = []
352 data.append(S_VERSION.pack(ONDISK_VERSION))
350 data.append(S_VERSION.pack(ONDISK_VERSION))
353 headers = (
351 headers = (
354 len(self.uid),
352 len(self.uid),
355 self.tip_rev,
353 self.tip_rev,
356 self.data_length,
354 self.data_length,
357 self.data_unused,
355 self.data_unused,
358 len(self.tip_node),
356 len(self.tip_node),
359 )
357 )
360 data.append(S_HEADER.pack(*headers))
358 data.append(S_HEADER.pack(*headers))
361 data.append(self.uid)
359 data.append(self.uid)
362 data.append(self.tip_node)
360 data.append(self.tip_node)
363 return b''.join(data)
361 return b''.join(data)
364
362
365
363
366 def _rawdata_filepath(revlog, docket):
364 def _rawdata_filepath(revlog, docket):
367 """The (vfs relative) nodemap's rawdata file for a given uid"""
365 """The (vfs relative) nodemap's rawdata file for a given uid"""
368 if revlog.nodemap_file.endswith(b'.n.a'):
366 if revlog.nodemap_file.endswith(b'.n.a'):
369 prefix = revlog.nodemap_file[:-4]
367 prefix = revlog.nodemap_file[:-4]
370 else:
368 else:
371 prefix = revlog.nodemap_file[:-2]
369 prefix = revlog.nodemap_file[:-2]
372 return b"%s-%s.nd" % (prefix, docket.uid)
370 return b"%s-%s.nd" % (prefix, docket.uid)
373
371
374
372
375 def _other_rawdata_filepath(revlog, docket):
373 def _other_rawdata_filepath(revlog, docket):
376 prefix = revlog.nodemap_file[:-2]
374 prefix = revlog.nodemap_file[:-2]
377 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
375 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
378 new_file_path = _rawdata_filepath(revlog, docket)
376 new_file_path = _rawdata_filepath(revlog, docket)
379 new_file_name = revlog.opener.basename(new_file_path)
377 new_file_name = revlog.opener.basename(new_file_path)
380 dirpath = revlog.opener.dirname(new_file_path)
378 dirpath = revlog.opener.dirname(new_file_path)
381 others = []
379 others = []
382 for f in revlog.opener.listdir(dirpath):
380 for f in revlog.opener.listdir(dirpath):
383 if pattern.match(f) and f != new_file_name:
381 if pattern.match(f) and f != new_file_name:
384 others.append(f)
382 others.append(f)
385 return others
383 return others
386
384
387
385
388 ### Nodemap Trie
386 ### Nodemap Trie
389 #
387 #
390 # This is a simple reference implementation to compute and persist a nodemap
388 # This is a simple reference implementation to compute and persist a nodemap
391 # trie. This reference implementation is write only. The python version of this
389 # trie. This reference implementation is write only. The python version of this
392 # is not expected to be actually used, since it wont provide performance
390 # is not expected to be actually used, since it wont provide performance
393 # improvement over existing non-persistent C implementation.
391 # improvement over existing non-persistent C implementation.
394 #
392 #
395 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
393 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
396 # revision can be adressed using its node shortest prefix.
394 # revision can be adressed using its node shortest prefix.
397 #
395 #
398 # The trie is stored as a sequence of block. Each block contains 16 entries
396 # The trie is stored as a sequence of block. Each block contains 16 entries
399 # (signed 64bit integer, big endian). Each entry can be one of the following:
397 # (signed 64bit integer, big endian). Each entry can be one of the following:
400 #
398 #
401 # * value >= 0 -> index of sub-block
399 # * value >= 0 -> index of sub-block
402 # * value == -1 -> no value
400 # * value == -1 -> no value
403 # * value < -1 -> encoded revision: rev = -(value+2)
401 # * value < -1 -> encoded revision: rev = -(value+2)
404 #
402 #
405 # See REV_OFFSET and _transform_rev below.
403 # See REV_OFFSET and _transform_rev below.
406 #
404 #
407 # The implementation focus on simplicity, not on performance. A Rust
405 # The implementation focus on simplicity, not on performance. A Rust
408 # implementation should provide a efficient version of the same binary
406 # implementation should provide a efficient version of the same binary
409 # persistence. This reference python implementation is never meant to be
407 # persistence. This reference python implementation is never meant to be
410 # extensively use in production.
408 # extensively use in production.
411
409
412
410
413 def persistent_data(index):
411 def persistent_data(index):
414 """return the persistent binary form for a nodemap for a given index"""
412 """return the persistent binary form for a nodemap for a given index"""
415 trie = _build_trie(index)
413 trie = _build_trie(index)
416 return _persist_trie(trie)
414 return _persist_trie(trie)
417
415
418
416
419 def update_persistent_data(index, root, max_idx, last_rev):
417 def update_persistent_data(index, root, max_idx, last_rev):
420 """return the incremental update for persistent nodemap from a given index"""
418 """return the incremental update for persistent nodemap from a given index"""
421 changed_block, trie = _update_trie(index, root, last_rev)
419 changed_block, trie = _update_trie(index, root, last_rev)
422 return (
420 return (
423 changed_block * S_BLOCK.size,
421 changed_block * S_BLOCK.size,
424 _persist_trie(trie, existing_idx=max_idx),
422 _persist_trie(trie, existing_idx=max_idx),
425 )
423 )
426
424
427
425
428 S_BLOCK = struct.Struct(">" + ("l" * 16))
426 S_BLOCK = struct.Struct(">" + ("l" * 16))
429
427
430 NO_ENTRY = -1
428 NO_ENTRY = -1
431 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
429 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
432 REV_OFFSET = 2
430 REV_OFFSET = 2
433
431
434
432
435 def _transform_rev(rev):
433 def _transform_rev(rev):
436 """Return the number used to represent the rev in the tree.
434 """Return the number used to represent the rev in the tree.
437
435
438 (or retrieve a rev number from such representation)
436 (or retrieve a rev number from such representation)
439
437
440 Note that this is an involution, a function equal to its inverse (i.e.
438 Note that this is an involution, a function equal to its inverse (i.e.
441 which gives the identity when applied to itself).
439 which gives the identity when applied to itself).
442 """
440 """
443 return -(rev + REV_OFFSET)
441 return -(rev + REV_OFFSET)
444
442
445
443
446 def _to_int(hex_digit):
444 def _to_int(hex_digit):
447 """turn an hexadecimal digit into a proper integer"""
445 """turn an hexadecimal digit into a proper integer"""
448 return int(hex_digit, 16)
446 return int(hex_digit, 16)
449
447
450
448
451 class Block(dict):
449 class Block(dict):
452 """represent a block of the Trie
450 """represent a block of the Trie
453
451
454 contains up to 16 entry indexed from 0 to 15"""
452 contains up to 16 entry indexed from 0 to 15"""
455
453
456 def __init__(self):
454 def __init__(self):
457 super(Block, self).__init__()
455 super(Block, self).__init__()
458 # If this block exist on disk, here is its ID
456 # If this block exist on disk, here is its ID
459 self.ondisk_id = None
457 self.ondisk_id = None
460
458
461 def __iter__(self):
459 def __iter__(self):
462 return iter(self.get(i) for i in range(16))
460 return iter(self.get(i) for i in range(16))
463
461
464
462
465 def _build_trie(index):
463 def _build_trie(index):
466 """build a nodemap trie
464 """build a nodemap trie
467
465
468 The nodemap stores revision number for each unique prefix.
466 The nodemap stores revision number for each unique prefix.
469
467
470 Each block is a dictionary with keys in `[0, 15]`. Values are either
468 Each block is a dictionary with keys in `[0, 15]`. Values are either
471 another block or a revision number.
469 another block or a revision number.
472 """
470 """
473 root = Block()
471 root = Block()
474 for rev in range(len(index)):
472 for rev in range(len(index)):
475 current_hex = hex(index[rev][7])
473 current_hex = hex(index[rev][7])
476 _insert_into_block(index, 0, root, rev, current_hex)
474 _insert_into_block(index, 0, root, rev, current_hex)
477 return root
475 return root
478
476
479
477
480 def _update_trie(index, root, last_rev):
478 def _update_trie(index, root, last_rev):
481 """consume"""
479 """consume"""
482 changed = 0
480 changed = 0
483 for rev in range(last_rev + 1, len(index)):
481 for rev in range(last_rev + 1, len(index)):
484 current_hex = hex(index[rev][7])
482 current_hex = hex(index[rev][7])
485 changed += _insert_into_block(index, 0, root, rev, current_hex)
483 changed += _insert_into_block(index, 0, root, rev, current_hex)
486 return changed, root
484 return changed, root
487
485
488
486
489 def _insert_into_block(index, level, block, current_rev, current_hex):
487 def _insert_into_block(index, level, block, current_rev, current_hex):
490 """insert a new revision in a block
488 """insert a new revision in a block
491
489
492 index: the index we are adding revision for
490 index: the index we are adding revision for
493 level: the depth of the current block in the trie
491 level: the depth of the current block in the trie
494 block: the block currently being considered
492 block: the block currently being considered
495 current_rev: the revision number we are adding
493 current_rev: the revision number we are adding
496 current_hex: the hexadecimal representation of the of that revision
494 current_hex: the hexadecimal representation of the of that revision
497 """
495 """
498 changed = 1
496 changed = 1
499 if block.ondisk_id is not None:
497 if block.ondisk_id is not None:
500 block.ondisk_id = None
498 block.ondisk_id = None
501 hex_digit = _to_int(current_hex[level : level + 1])
499 hex_digit = _to_int(current_hex[level : level + 1])
502 entry = block.get(hex_digit)
500 entry = block.get(hex_digit)
503 if entry is None:
501 if entry is None:
504 # no entry, simply store the revision number
502 # no entry, simply store the revision number
505 block[hex_digit] = current_rev
503 block[hex_digit] = current_rev
506 elif isinstance(entry, dict):
504 elif isinstance(entry, dict):
507 # need to recurse to an underlying block
505 # need to recurse to an underlying block
508 changed += _insert_into_block(
506 changed += _insert_into_block(
509 index, level + 1, entry, current_rev, current_hex
507 index, level + 1, entry, current_rev, current_hex
510 )
508 )
511 else:
509 else:
512 # collision with a previously unique prefix, inserting new
510 # collision with a previously unique prefix, inserting new
513 # vertices to fit both entry.
511 # vertices to fit both entry.
514 other_hex = hex(index[entry][7])
512 other_hex = hex(index[entry][7])
515 other_rev = entry
513 other_rev = entry
516 new = Block()
514 new = Block()
517 block[hex_digit] = new
515 block[hex_digit] = new
518 _insert_into_block(index, level + 1, new, other_rev, other_hex)
516 _insert_into_block(index, level + 1, new, other_rev, other_hex)
519 _insert_into_block(index, level + 1, new, current_rev, current_hex)
517 _insert_into_block(index, level + 1, new, current_rev, current_hex)
520 return changed
518 return changed
521
519
522
520
523 def _persist_trie(root, existing_idx=None):
521 def _persist_trie(root, existing_idx=None):
524 """turn a nodemap trie into persistent binary data
522 """turn a nodemap trie into persistent binary data
525
523
526 See `_build_trie` for nodemap trie structure"""
524 See `_build_trie` for nodemap trie structure"""
527 block_map = {}
525 block_map = {}
528 if existing_idx is not None:
526 if existing_idx is not None:
529 base_idx = existing_idx + 1
527 base_idx = existing_idx + 1
530 else:
528 else:
531 base_idx = 0
529 base_idx = 0
532 chunks = []
530 chunks = []
533 for tn in _walk_trie(root):
531 for tn in _walk_trie(root):
534 if tn.ondisk_id is not None:
532 if tn.ondisk_id is not None:
535 block_map[id(tn)] = tn.ondisk_id
533 block_map[id(tn)] = tn.ondisk_id
536 else:
534 else:
537 block_map[id(tn)] = len(chunks) + base_idx
535 block_map[id(tn)] = len(chunks) + base_idx
538 chunks.append(_persist_block(tn, block_map))
536 chunks.append(_persist_block(tn, block_map))
539 return b''.join(chunks)
537 return b''.join(chunks)
540
538
541
539
542 def _walk_trie(block):
540 def _walk_trie(block):
543 """yield all the block in a trie
541 """yield all the block in a trie
544
542
545 Children blocks are always yield before their parent block.
543 Children blocks are always yield before their parent block.
546 """
544 """
547 for (__, item) in sorted(block.items()):
545 for (__, item) in sorted(block.items()):
548 if isinstance(item, dict):
546 if isinstance(item, dict):
549 for sub_block in _walk_trie(item):
547 for sub_block in _walk_trie(item):
550 yield sub_block
548 yield sub_block
551 yield block
549 yield block
552
550
553
551
554 def _persist_block(block_node, block_map):
552 def _persist_block(block_node, block_map):
555 """produce persistent binary data for a single block
553 """produce persistent binary data for a single block
556
554
557 Children block are assumed to be already persisted and present in
555 Children block are assumed to be already persisted and present in
558 block_map.
556 block_map.
559 """
557 """
560 data = tuple(_to_value(v, block_map) for v in block_node)
558 data = tuple(_to_value(v, block_map) for v in block_node)
561 return S_BLOCK.pack(*data)
559 return S_BLOCK.pack(*data)
562
560
563
561
564 def _to_value(item, block_map):
562 def _to_value(item, block_map):
565 """persist any value as an integer"""
563 """persist any value as an integer"""
566 if item is None:
564 if item is None:
567 return NO_ENTRY
565 return NO_ENTRY
568 elif isinstance(item, dict):
566 elif isinstance(item, dict):
569 return block_map[id(item)]
567 return block_map[id(item)]
570 else:
568 else:
571 return _transform_rev(item)
569 return _transform_rev(item)
572
570
573
571
574 def parse_data(data):
572 def parse_data(data):
575 """parse parse nodemap data into a nodemap Trie"""
573 """parse parse nodemap data into a nodemap Trie"""
576 if (len(data) % S_BLOCK.size) != 0:
574 if (len(data) % S_BLOCK.size) != 0:
577 msg = b"nodemap data size is not a multiple of block size (%d): %d"
575 msg = b"nodemap data size is not a multiple of block size (%d): %d"
578 raise error.Abort(msg % (S_BLOCK.size, len(data)))
576 raise error.Abort(msg % (S_BLOCK.size, len(data)))
579 if not data:
577 if not data:
580 return Block(), None
578 return Block(), None
581 block_map = {}
579 block_map = {}
582 new_blocks = []
580 new_blocks = []
583 for i in range(0, len(data), S_BLOCK.size):
581 for i in range(0, len(data), S_BLOCK.size):
584 block = Block()
582 block = Block()
585 block.ondisk_id = len(block_map)
583 block.ondisk_id = len(block_map)
586 block_map[block.ondisk_id] = block
584 block_map[block.ondisk_id] = block
587 block_data = data[i : i + S_BLOCK.size]
585 block_data = data[i : i + S_BLOCK.size]
588 values = S_BLOCK.unpack(block_data)
586 values = S_BLOCK.unpack(block_data)
589 new_blocks.append((block, values))
587 new_blocks.append((block, values))
590 for b, values in new_blocks:
588 for b, values in new_blocks:
591 for idx, v in enumerate(values):
589 for idx, v in enumerate(values):
592 if v == NO_ENTRY:
590 if v == NO_ENTRY:
593 continue
591 continue
594 elif v >= 0:
592 elif v >= 0:
595 b[idx] = block_map[v]
593 b[idx] = block_map[v]
596 else:
594 else:
597 b[idx] = _transform_rev(v)
595 b[idx] = _transform_rev(v)
598 return block, i // S_BLOCK.size
596 return block, i // S_BLOCK.size
599
597
600
598
601 # debug utility
599 # debug utility
602
600
603
601
604 def check_data(ui, index, data):
602 def check_data(ui, index, data):
605 """verify that the provided nodemap data are valid for the given idex"""
603 """verify that the provided nodemap data are valid for the given idex"""
606 ret = 0
604 ret = 0
607 ui.status((b"revision in index: %d\n") % len(index))
605 ui.status((b"revision in index: %d\n") % len(index))
608 root, __ = parse_data(data)
606 root, __ = parse_data(data)
609 all_revs = set(_all_revisions(root))
607 all_revs = set(_all_revisions(root))
610 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
608 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
611 for r in range(len(index)):
609 for r in range(len(index)):
612 if r not in all_revs:
610 if r not in all_revs:
613 msg = b" revision missing from nodemap: %d\n" % r
611 msg = b" revision missing from nodemap: %d\n" % r
614 ui.write_err(msg)
612 ui.write_err(msg)
615 ret = 1
613 ret = 1
616 else:
614 else:
617 all_revs.remove(r)
615 all_revs.remove(r)
618 nm_rev = _find_node(root, hex(index[r][7]))
616 nm_rev = _find_node(root, hex(index[r][7]))
619 if nm_rev is None:
617 if nm_rev is None:
620 msg = b" revision node does not match any entries: %d\n" % r
618 msg = b" revision node does not match any entries: %d\n" % r
621 ui.write_err(msg)
619 ui.write_err(msg)
622 ret = 1
620 ret = 1
623 elif nm_rev != r:
621 elif nm_rev != r:
624 msg = (
622 msg = (
625 b" revision node does not match the expected revision: "
623 b" revision node does not match the expected revision: "
626 b"%d != %d\n" % (r, nm_rev)
624 b"%d != %d\n" % (r, nm_rev)
627 )
625 )
628 ui.write_err(msg)
626 ui.write_err(msg)
629 ret = 1
627 ret = 1
630
628
631 if all_revs:
629 if all_revs:
632 for r in sorted(all_revs):
630 for r in sorted(all_revs):
633 msg = b" extra revision in nodemap: %d\n" % r
631 msg = b" extra revision in nodemap: %d\n" % r
634 ui.write_err(msg)
632 ui.write_err(msg)
635 ret = 1
633 ret = 1
636 return ret
634 return ret
637
635
638
636
639 def _all_revisions(root):
637 def _all_revisions(root):
640 """return all revisions stored in a Trie"""
638 """return all revisions stored in a Trie"""
641 for block in _walk_trie(root):
639 for block in _walk_trie(root):
642 for v in block:
640 for v in block:
643 if v is None or isinstance(v, Block):
641 if v is None or isinstance(v, Block):
644 continue
642 continue
645 yield v
643 yield v
646
644
647
645
648 def _find_node(block, node):
646 def _find_node(block, node):
649 """find the revision associated with a given node"""
647 """find the revision associated with a given node"""
650 entry = block.get(_to_int(node[0:1]))
648 entry = block.get(_to_int(node[0:1]))
651 if isinstance(entry, dict):
649 if isinstance(entry, dict):
652 return _find_node(entry, node[1:])
650 return _find_node(entry, node[1:])
653 return entry
651 return entry
654
652
655
653
656 def get_nodemap_file(opener, indexfile):
654 def get_nodemap_file(revlog):
657 if indexfile.endswith(b'.a'):
655 if revlog.postfix == b'a':
658 pending_path = indexfile[:-4] + b".n.a"
656 pending_path = revlog.radix + b".n.a"
659 if opener.exists(pending_path):
657 if revlog.opener.exists(pending_path):
660 return pending_path
658 return pending_path
661 else:
659 return revlog.radix + b".n"
662 return indexfile[:-4] + b".n"
663 else:
664 return indexfile[:-2] + b".n"
General Comments 0
You need to be logged in to leave comments. Login now