##// END OF EJS Templates
revlog: highlight current incompatibility in `rewrite_sidedata`...
marmoute -
r47905:1b33e38d default
parent child Browse files
Show More
@@ -1,3130 +1,3142 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 indexfile=None,
292 indexfile=None,
293 datafile=None,
293 datafile=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315 self.indexfile = indexfile
315 self.indexfile = indexfile
316 self.datafile = datafile or (indexfile[:-2] + b".d")
316 self.datafile = datafile or (indexfile[:-2] + b".d")
317 self.nodemap_file = None
317 self.nodemap_file = None
318 if persistentnodemap:
318 if persistentnodemap:
319 self.nodemap_file = nodemaputil.get_nodemap_file(
319 self.nodemap_file = nodemaputil.get_nodemap_file(
320 opener, self.indexfile
320 opener, self.indexfile
321 )
321 )
322
322
323 self.opener = opener
323 self.opener = opener
324 assert target[0] in ALL_KINDS
324 assert target[0] in ALL_KINDS
325 assert len(target) == 2
325 assert len(target) == 2
326 self.target = target
326 self.target = target
327 # When True, indexfile is opened with checkambig=True at writing, to
327 # When True, indexfile is opened with checkambig=True at writing, to
328 # avoid file stat ambiguity.
328 # avoid file stat ambiguity.
329 self._checkambig = checkambig
329 self._checkambig = checkambig
330 self._mmaplargeindex = mmaplargeindex
330 self._mmaplargeindex = mmaplargeindex
331 self._censorable = censorable
331 self._censorable = censorable
332 # 3-tuple of (node, rev, text) for a raw revision.
332 # 3-tuple of (node, rev, text) for a raw revision.
333 self._revisioncache = None
333 self._revisioncache = None
334 # Maps rev to chain base rev.
334 # Maps rev to chain base rev.
335 self._chainbasecache = util.lrucachedict(100)
335 self._chainbasecache = util.lrucachedict(100)
336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
337 self._chunkcache = (0, b'')
337 self._chunkcache = (0, b'')
338 # How much data to read and cache into the raw revlog data cache.
338 # How much data to read and cache into the raw revlog data cache.
339 self._chunkcachesize = 65536
339 self._chunkcachesize = 65536
340 self._maxchainlen = None
340 self._maxchainlen = None
341 self._deltabothparents = True
341 self._deltabothparents = True
342 self.index = None
342 self.index = None
343 self._nodemap_docket = None
343 self._nodemap_docket = None
344 # Mapping of partial identifiers to full nodes.
344 # Mapping of partial identifiers to full nodes.
345 self._pcache = {}
345 self._pcache = {}
346 # Mapping of revision integer to full node.
346 # Mapping of revision integer to full node.
347 self._compengine = b'zlib'
347 self._compengine = b'zlib'
348 self._compengineopts = {}
348 self._compengineopts = {}
349 self._maxdeltachainspan = -1
349 self._maxdeltachainspan = -1
350 self._withsparseread = False
350 self._withsparseread = False
351 self._sparserevlog = False
351 self._sparserevlog = False
352 self._srdensitythreshold = 0.50
352 self._srdensitythreshold = 0.50
353 self._srmingapsize = 262144
353 self._srmingapsize = 262144
354
354
355 # Make copy of flag processors so each revlog instance can support
355 # Make copy of flag processors so each revlog instance can support
356 # custom flags.
356 # custom flags.
357 self._flagprocessors = dict(flagutil.flagprocessors)
357 self._flagprocessors = dict(flagutil.flagprocessors)
358
358
359 # 2-tuple of file handles being used for active writing.
359 # 2-tuple of file handles being used for active writing.
360 self._writinghandles = None
360 self._writinghandles = None
361
361
362 self._loadindex()
362 self._loadindex()
363
363
364 self._concurrencychecker = concurrencychecker
364 self._concurrencychecker = concurrencychecker
365
365
366 def _loadindex(self):
366 def _loadindex(self):
367 mmapindexthreshold = None
367 mmapindexthreshold = None
368 opts = self.opener.options
368 opts = self.opener.options
369
369
370 if b'revlogv2' in opts:
370 if b'revlogv2' in opts:
371 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
371 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
372 elif b'revlogv1' in opts:
372 elif b'revlogv1' in opts:
373 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
373 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
374 if b'generaldelta' in opts:
374 if b'generaldelta' in opts:
375 newversionflags |= FLAG_GENERALDELTA
375 newversionflags |= FLAG_GENERALDELTA
376 elif b'revlogv0' in self.opener.options:
376 elif b'revlogv0' in self.opener.options:
377 newversionflags = REVLOGV0
377 newversionflags = REVLOGV0
378 else:
378 else:
379 newversionflags = REVLOG_DEFAULT_VERSION
379 newversionflags = REVLOG_DEFAULT_VERSION
380
380
381 if b'chunkcachesize' in opts:
381 if b'chunkcachesize' in opts:
382 self._chunkcachesize = opts[b'chunkcachesize']
382 self._chunkcachesize = opts[b'chunkcachesize']
383 if b'maxchainlen' in opts:
383 if b'maxchainlen' in opts:
384 self._maxchainlen = opts[b'maxchainlen']
384 self._maxchainlen = opts[b'maxchainlen']
385 if b'deltabothparents' in opts:
385 if b'deltabothparents' in opts:
386 self._deltabothparents = opts[b'deltabothparents']
386 self._deltabothparents = opts[b'deltabothparents']
387 self._lazydelta = bool(opts.get(b'lazydelta', True))
387 self._lazydelta = bool(opts.get(b'lazydelta', True))
388 self._lazydeltabase = False
388 self._lazydeltabase = False
389 if self._lazydelta:
389 if self._lazydelta:
390 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
390 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
391 if b'compengine' in opts:
391 if b'compengine' in opts:
392 self._compengine = opts[b'compengine']
392 self._compengine = opts[b'compengine']
393 if b'zlib.level' in opts:
393 if b'zlib.level' in opts:
394 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
394 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
395 if b'zstd.level' in opts:
395 if b'zstd.level' in opts:
396 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
396 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
397 if b'maxdeltachainspan' in opts:
397 if b'maxdeltachainspan' in opts:
398 self._maxdeltachainspan = opts[b'maxdeltachainspan']
398 self._maxdeltachainspan = opts[b'maxdeltachainspan']
399 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
399 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
400 mmapindexthreshold = opts[b'mmapindexthreshold']
400 mmapindexthreshold = opts[b'mmapindexthreshold']
401 self.hassidedata = bool(opts.get(b'side-data', False))
401 self.hassidedata = bool(opts.get(b'side-data', False))
402 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
402 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
403 withsparseread = bool(opts.get(b'with-sparse-read', False))
403 withsparseread = bool(opts.get(b'with-sparse-read', False))
404 # sparse-revlog forces sparse-read
404 # sparse-revlog forces sparse-read
405 self._withsparseread = self._sparserevlog or withsparseread
405 self._withsparseread = self._sparserevlog or withsparseread
406 if b'sparse-read-density-threshold' in opts:
406 if b'sparse-read-density-threshold' in opts:
407 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
407 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
408 if b'sparse-read-min-gap-size' in opts:
408 if b'sparse-read-min-gap-size' in opts:
409 self._srmingapsize = opts[b'sparse-read-min-gap-size']
409 self._srmingapsize = opts[b'sparse-read-min-gap-size']
410 if opts.get(b'enableellipsis'):
410 if opts.get(b'enableellipsis'):
411 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
411 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
412
412
413 # revlog v0 doesn't have flag processors
413 # revlog v0 doesn't have flag processors
414 for flag, processor in pycompat.iteritems(
414 for flag, processor in pycompat.iteritems(
415 opts.get(b'flagprocessors', {})
415 opts.get(b'flagprocessors', {})
416 ):
416 ):
417 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
417 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
418
418
419 if self._chunkcachesize <= 0:
419 if self._chunkcachesize <= 0:
420 raise error.RevlogError(
420 raise error.RevlogError(
421 _(b'revlog chunk cache size %r is not greater than 0')
421 _(b'revlog chunk cache size %r is not greater than 0')
422 % self._chunkcachesize
422 % self._chunkcachesize
423 )
423 )
424 elif self._chunkcachesize & (self._chunkcachesize - 1):
424 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 raise error.RevlogError(
425 raise error.RevlogError(
426 _(b'revlog chunk cache size %r is not a power of 2')
426 _(b'revlog chunk cache size %r is not a power of 2')
427 % self._chunkcachesize
427 % self._chunkcachesize
428 )
428 )
429
429
430 indexdata = b''
430 indexdata = b''
431 self._initempty = True
431 self._initempty = True
432 try:
432 try:
433 with self._indexfp() as f:
433 with self._indexfp() as f:
434 if (
434 if (
435 mmapindexthreshold is not None
435 mmapindexthreshold is not None
436 and self.opener.fstat(f).st_size >= mmapindexthreshold
436 and self.opener.fstat(f).st_size >= mmapindexthreshold
437 ):
437 ):
438 # TODO: should .close() to release resources without
438 # TODO: should .close() to release resources without
439 # relying on Python GC
439 # relying on Python GC
440 indexdata = util.buffer(util.mmapread(f))
440 indexdata = util.buffer(util.mmapread(f))
441 else:
441 else:
442 indexdata = f.read()
442 indexdata = f.read()
443 if len(indexdata) > 0:
443 if len(indexdata) > 0:
444 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
444 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
445 self._initempty = False
445 self._initempty = False
446 else:
446 else:
447 versionflags = newversionflags
447 versionflags = newversionflags
448 except IOError as inst:
448 except IOError as inst:
449 if inst.errno != errno.ENOENT:
449 if inst.errno != errno.ENOENT:
450 raise
450 raise
451
451
452 versionflags = newversionflags
452 versionflags = newversionflags
453
453
454 self.version = versionflags
454 self.version = versionflags
455
455
456 flags = versionflags & ~0xFFFF
456 flags = versionflags & ~0xFFFF
457 fmt = versionflags & 0xFFFF
457 fmt = versionflags & 0xFFFF
458
458
459 if fmt == REVLOGV0:
459 if fmt == REVLOGV0:
460 if flags:
460 if flags:
461 raise error.RevlogError(
461 raise error.RevlogError(
462 _(b'unknown flags (%#04x) in version %d revlog %s')
462 _(b'unknown flags (%#04x) in version %d revlog %s')
463 % (flags >> 16, fmt, self.indexfile)
463 % (flags >> 16, fmt, self.indexfile)
464 )
464 )
465
465
466 self._inline = False
466 self._inline = False
467 self._generaldelta = False
467 self._generaldelta = False
468
468
469 elif fmt == REVLOGV1:
469 elif fmt == REVLOGV1:
470 if flags & ~REVLOGV1_FLAGS:
470 if flags & ~REVLOGV1_FLAGS:
471 raise error.RevlogError(
471 raise error.RevlogError(
472 _(b'unknown flags (%#04x) in version %d revlog %s')
472 _(b'unknown flags (%#04x) in version %d revlog %s')
473 % (flags >> 16, fmt, self.indexfile)
473 % (flags >> 16, fmt, self.indexfile)
474 )
474 )
475
475
476 self._inline = versionflags & FLAG_INLINE_DATA
476 self._inline = versionflags & FLAG_INLINE_DATA
477 self._generaldelta = versionflags & FLAG_GENERALDELTA
477 self._generaldelta = versionflags & FLAG_GENERALDELTA
478
478
479 elif fmt == REVLOGV2:
479 elif fmt == REVLOGV2:
480 if flags & ~REVLOGV2_FLAGS:
480 if flags & ~REVLOGV2_FLAGS:
481 raise error.RevlogError(
481 raise error.RevlogError(
482 _(b'unknown flags (%#04x) in version %d revlog %s')
482 _(b'unknown flags (%#04x) in version %d revlog %s')
483 % (flags >> 16, fmt, self.indexfile)
483 % (flags >> 16, fmt, self.indexfile)
484 )
484 )
485
485
486 # There is a bug in the transaction handling when going from an
486 # There is a bug in the transaction handling when going from an
487 # inline revlog to a separate index and data file. Turn it off until
487 # inline revlog to a separate index and data file. Turn it off until
488 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
488 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
489 # See issue6485
489 # See issue6485
490 self._inline = False
490 self._inline = False
491 # generaldelta implied by version 2 revlogs.
491 # generaldelta implied by version 2 revlogs.
492 self._generaldelta = True
492 self._generaldelta = True
493
493
494 else:
494 else:
495 raise error.RevlogError(
495 raise error.RevlogError(
496 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
496 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
497 )
497 )
498
498
499 self.nodeconstants = sha1nodeconstants
499 self.nodeconstants = sha1nodeconstants
500 self.nullid = self.nodeconstants.nullid
500 self.nullid = self.nodeconstants.nullid
501
501
502 # sparse-revlog can't be on without general-delta (issue6056)
502 # sparse-revlog can't be on without general-delta (issue6056)
503 if not self._generaldelta:
503 if not self._generaldelta:
504 self._sparserevlog = False
504 self._sparserevlog = False
505
505
506 self._storedeltachains = True
506 self._storedeltachains = True
507
507
508 devel_nodemap = (
508 devel_nodemap = (
509 self.nodemap_file
509 self.nodemap_file
510 and opts.get(b'devel-force-nodemap', False)
510 and opts.get(b'devel-force-nodemap', False)
511 and parse_index_v1_nodemap is not None
511 and parse_index_v1_nodemap is not None
512 )
512 )
513
513
514 use_rust_index = False
514 use_rust_index = False
515 if rustrevlog is not None:
515 if rustrevlog is not None:
516 if self.nodemap_file is not None:
516 if self.nodemap_file is not None:
517 use_rust_index = True
517 use_rust_index = True
518 else:
518 else:
519 use_rust_index = self.opener.options.get(b'rust.index')
519 use_rust_index = self.opener.options.get(b'rust.index')
520
520
521 self._parse_index = parse_index_v1
521 self._parse_index = parse_index_v1
522 if self.version == REVLOGV0:
522 if self.version == REVLOGV0:
523 self._parse_index = revlogv0.parse_index_v0
523 self._parse_index = revlogv0.parse_index_v0
524 elif fmt == REVLOGV2:
524 elif fmt == REVLOGV2:
525 self._parse_index = parse_index_v2
525 self._parse_index = parse_index_v2
526 elif devel_nodemap:
526 elif devel_nodemap:
527 self._parse_index = parse_index_v1_nodemap
527 self._parse_index = parse_index_v1_nodemap
528 elif use_rust_index:
528 elif use_rust_index:
529 self._parse_index = parse_index_v1_mixed
529 self._parse_index = parse_index_v1_mixed
530 try:
530 try:
531 d = self._parse_index(indexdata, self._inline)
531 d = self._parse_index(indexdata, self._inline)
532 index, _chunkcache = d
532 index, _chunkcache = d
533 use_nodemap = (
533 use_nodemap = (
534 not self._inline
534 not self._inline
535 and self.nodemap_file is not None
535 and self.nodemap_file is not None
536 and util.safehasattr(index, 'update_nodemap_data')
536 and util.safehasattr(index, 'update_nodemap_data')
537 )
537 )
538 if use_nodemap:
538 if use_nodemap:
539 nodemap_data = nodemaputil.persisted_data(self)
539 nodemap_data = nodemaputil.persisted_data(self)
540 if nodemap_data is not None:
540 if nodemap_data is not None:
541 docket = nodemap_data[0]
541 docket = nodemap_data[0]
542 if (
542 if (
543 len(d[0]) > docket.tip_rev
543 len(d[0]) > docket.tip_rev
544 and d[0][docket.tip_rev][7] == docket.tip_node
544 and d[0][docket.tip_rev][7] == docket.tip_node
545 ):
545 ):
546 # no changelog tampering
546 # no changelog tampering
547 self._nodemap_docket = docket
547 self._nodemap_docket = docket
548 index.update_nodemap_data(*nodemap_data)
548 index.update_nodemap_data(*nodemap_data)
549 except (ValueError, IndexError):
549 except (ValueError, IndexError):
550 raise error.RevlogError(
550 raise error.RevlogError(
551 _(b"index %s is corrupted") % self.indexfile
551 _(b"index %s is corrupted") % self.indexfile
552 )
552 )
553 self.index, self._chunkcache = d
553 self.index, self._chunkcache = d
554 if not self._chunkcache:
554 if not self._chunkcache:
555 self._chunkclear()
555 self._chunkclear()
556 # revnum -> (chain-length, sum-delta-length)
556 # revnum -> (chain-length, sum-delta-length)
557 self._chaininfocache = util.lrucachedict(500)
557 self._chaininfocache = util.lrucachedict(500)
558 # revlog header -> revlog compressor
558 # revlog header -> revlog compressor
559 self._decompressors = {}
559 self._decompressors = {}
560
560
561 @util.propertycache
561 @util.propertycache
562 def revlog_kind(self):
562 def revlog_kind(self):
563 return self.target[0]
563 return self.target[0]
564
564
565 @util.propertycache
565 @util.propertycache
566 def _compressor(self):
566 def _compressor(self):
567 engine = util.compengines[self._compengine]
567 engine = util.compengines[self._compengine]
568 return engine.revlogcompressor(self._compengineopts)
568 return engine.revlogcompressor(self._compengineopts)
569
569
570 def _indexfp(self, mode=b'r'):
570 def _indexfp(self, mode=b'r'):
571 """file object for the revlog's index file"""
571 """file object for the revlog's index file"""
572 args = {'mode': mode}
572 args = {'mode': mode}
573 if mode != b'r':
573 if mode != b'r':
574 args['checkambig'] = self._checkambig
574 args['checkambig'] = self._checkambig
575 if mode == b'w':
575 if mode == b'w':
576 args['atomictemp'] = True
576 args['atomictemp'] = True
577 return self.opener(self.indexfile, **args)
577 return self.opener(self.indexfile, **args)
578
578
579 def _datafp(self, mode=b'r'):
579 def _datafp(self, mode=b'r'):
580 """file object for the revlog's data file"""
580 """file object for the revlog's data file"""
581 return self.opener(self.datafile, mode=mode)
581 return self.opener(self.datafile, mode=mode)
582
582
583 @contextlib.contextmanager
583 @contextlib.contextmanager
584 def _datareadfp(self, existingfp=None):
584 def _datareadfp(self, existingfp=None):
585 """file object suitable to read data"""
585 """file object suitable to read data"""
586 # Use explicit file handle, if given.
586 # Use explicit file handle, if given.
587 if existingfp is not None:
587 if existingfp is not None:
588 yield existingfp
588 yield existingfp
589
589
590 # Use a file handle being actively used for writes, if available.
590 # Use a file handle being actively used for writes, if available.
591 # There is some danger to doing this because reads will seek the
591 # There is some danger to doing this because reads will seek the
592 # file. However, _writeentry() performs a SEEK_END before all writes,
592 # file. However, _writeentry() performs a SEEK_END before all writes,
593 # so we should be safe.
593 # so we should be safe.
594 elif self._writinghandles:
594 elif self._writinghandles:
595 if self._inline:
595 if self._inline:
596 yield self._writinghandles[0]
596 yield self._writinghandles[0]
597 else:
597 else:
598 yield self._writinghandles[1]
598 yield self._writinghandles[1]
599
599
600 # Otherwise open a new file handle.
600 # Otherwise open a new file handle.
601 else:
601 else:
602 if self._inline:
602 if self._inline:
603 func = self._indexfp
603 func = self._indexfp
604 else:
604 else:
605 func = self._datafp
605 func = self._datafp
606 with func() as fp:
606 with func() as fp:
607 yield fp
607 yield fp
608
608
609 def tiprev(self):
609 def tiprev(self):
610 return len(self.index) - 1
610 return len(self.index) - 1
611
611
612 def tip(self):
612 def tip(self):
613 return self.node(self.tiprev())
613 return self.node(self.tiprev())
614
614
615 def __contains__(self, rev):
615 def __contains__(self, rev):
616 return 0 <= rev < len(self)
616 return 0 <= rev < len(self)
617
617
618 def __len__(self):
618 def __len__(self):
619 return len(self.index)
619 return len(self.index)
620
620
621 def __iter__(self):
621 def __iter__(self):
622 return iter(pycompat.xrange(len(self)))
622 return iter(pycompat.xrange(len(self)))
623
623
624 def revs(self, start=0, stop=None):
624 def revs(self, start=0, stop=None):
625 """iterate over all rev in this revlog (from start to stop)"""
625 """iterate over all rev in this revlog (from start to stop)"""
626 return storageutil.iterrevs(len(self), start=start, stop=stop)
626 return storageutil.iterrevs(len(self), start=start, stop=stop)
627
627
628 @property
628 @property
629 def nodemap(self):
629 def nodemap(self):
630 msg = (
630 msg = (
631 b"revlog.nodemap is deprecated, "
631 b"revlog.nodemap is deprecated, "
632 b"use revlog.index.[has_node|rev|get_rev]"
632 b"use revlog.index.[has_node|rev|get_rev]"
633 )
633 )
634 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
634 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
635 return self.index.nodemap
635 return self.index.nodemap
636
636
637 @property
637 @property
638 def _nodecache(self):
638 def _nodecache(self):
639 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
639 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
640 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
640 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
641 return self.index.nodemap
641 return self.index.nodemap
642
642
643 def hasnode(self, node):
643 def hasnode(self, node):
644 try:
644 try:
645 self.rev(node)
645 self.rev(node)
646 return True
646 return True
647 except KeyError:
647 except KeyError:
648 return False
648 return False
649
649
650 def candelta(self, baserev, rev):
650 def candelta(self, baserev, rev):
651 """whether two revisions (baserev, rev) can be delta-ed or not"""
651 """whether two revisions (baserev, rev) can be delta-ed or not"""
652 # Disable delta if either rev requires a content-changing flag
652 # Disable delta if either rev requires a content-changing flag
653 # processor (ex. LFS). This is because such flag processor can alter
653 # processor (ex. LFS). This is because such flag processor can alter
654 # the rawtext content that the delta will be based on, and two clients
654 # the rawtext content that the delta will be based on, and two clients
655 # could have a same revlog node with different flags (i.e. different
655 # could have a same revlog node with different flags (i.e. different
656 # rawtext contents) and the delta could be incompatible.
656 # rawtext contents) and the delta could be incompatible.
657 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
657 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
658 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
658 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
659 ):
659 ):
660 return False
660 return False
661 return True
661 return True
662
662
663 def update_caches(self, transaction):
663 def update_caches(self, transaction):
664 if self.nodemap_file is not None:
664 if self.nodemap_file is not None:
665 if transaction is None:
665 if transaction is None:
666 nodemaputil.update_persistent_nodemap(self)
666 nodemaputil.update_persistent_nodemap(self)
667 else:
667 else:
668 nodemaputil.setup_persistent_nodemap(transaction, self)
668 nodemaputil.setup_persistent_nodemap(transaction, self)
669
669
670 def clearcaches(self):
670 def clearcaches(self):
671 self._revisioncache = None
671 self._revisioncache = None
672 self._chainbasecache.clear()
672 self._chainbasecache.clear()
673 self._chunkcache = (0, b'')
673 self._chunkcache = (0, b'')
674 self._pcache = {}
674 self._pcache = {}
675 self._nodemap_docket = None
675 self._nodemap_docket = None
676 self.index.clearcaches()
676 self.index.clearcaches()
677 # The python code is the one responsible for validating the docket, we
677 # The python code is the one responsible for validating the docket, we
678 # end up having to refresh it here.
678 # end up having to refresh it here.
679 use_nodemap = (
679 use_nodemap = (
680 not self._inline
680 not self._inline
681 and self.nodemap_file is not None
681 and self.nodemap_file is not None
682 and util.safehasattr(self.index, 'update_nodemap_data')
682 and util.safehasattr(self.index, 'update_nodemap_data')
683 )
683 )
684 if use_nodemap:
684 if use_nodemap:
685 nodemap_data = nodemaputil.persisted_data(self)
685 nodemap_data = nodemaputil.persisted_data(self)
686 if nodemap_data is not None:
686 if nodemap_data is not None:
687 self._nodemap_docket = nodemap_data[0]
687 self._nodemap_docket = nodemap_data[0]
688 self.index.update_nodemap_data(*nodemap_data)
688 self.index.update_nodemap_data(*nodemap_data)
689
689
690 def rev(self, node):
690 def rev(self, node):
691 try:
691 try:
692 return self.index.rev(node)
692 return self.index.rev(node)
693 except TypeError:
693 except TypeError:
694 raise
694 raise
695 except error.RevlogError:
695 except error.RevlogError:
696 # parsers.c radix tree lookup failed
696 # parsers.c radix tree lookup failed
697 if (
697 if (
698 node == self.nodeconstants.wdirid
698 node == self.nodeconstants.wdirid
699 or node in self.nodeconstants.wdirfilenodeids
699 or node in self.nodeconstants.wdirfilenodeids
700 ):
700 ):
701 raise error.WdirUnsupported
701 raise error.WdirUnsupported
702 raise error.LookupError(node, self.indexfile, _(b'no node'))
702 raise error.LookupError(node, self.indexfile, _(b'no node'))
703
703
704 # Accessors for index entries.
704 # Accessors for index entries.
705
705
706 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
706 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
707 # are flags.
707 # are flags.
708 def start(self, rev):
708 def start(self, rev):
709 return int(self.index[rev][0] >> 16)
709 return int(self.index[rev][0] >> 16)
710
710
711 def flags(self, rev):
711 def flags(self, rev):
712 return self.index[rev][0] & 0xFFFF
712 return self.index[rev][0] & 0xFFFF
713
713
714 def length(self, rev):
714 def length(self, rev):
715 return self.index[rev][1]
715 return self.index[rev][1]
716
716
717 def sidedata_length(self, rev):
717 def sidedata_length(self, rev):
718 if self.version & 0xFFFF != REVLOGV2:
718 if self.version & 0xFFFF != REVLOGV2:
719 return 0
719 return 0
720 return self.index[rev][9]
720 return self.index[rev][9]
721
721
722 def rawsize(self, rev):
722 def rawsize(self, rev):
723 """return the length of the uncompressed text for a given revision"""
723 """return the length of the uncompressed text for a given revision"""
724 l = self.index[rev][2]
724 l = self.index[rev][2]
725 if l >= 0:
725 if l >= 0:
726 return l
726 return l
727
727
728 t = self.rawdata(rev)
728 t = self.rawdata(rev)
729 return len(t)
729 return len(t)
730
730
731 def size(self, rev):
731 def size(self, rev):
732 """length of non-raw text (processed by a "read" flag processor)"""
732 """length of non-raw text (processed by a "read" flag processor)"""
733 # fast path: if no "read" flag processor could change the content,
733 # fast path: if no "read" flag processor could change the content,
734 # size is rawsize. note: ELLIPSIS is known to not change the content.
734 # size is rawsize. note: ELLIPSIS is known to not change the content.
735 flags = self.flags(rev)
735 flags = self.flags(rev)
736 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
736 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
737 return self.rawsize(rev)
737 return self.rawsize(rev)
738
738
739 return len(self.revision(rev, raw=False))
739 return len(self.revision(rev, raw=False))
740
740
741 def chainbase(self, rev):
741 def chainbase(self, rev):
742 base = self._chainbasecache.get(rev)
742 base = self._chainbasecache.get(rev)
743 if base is not None:
743 if base is not None:
744 return base
744 return base
745
745
746 index = self.index
746 index = self.index
747 iterrev = rev
747 iterrev = rev
748 base = index[iterrev][3]
748 base = index[iterrev][3]
749 while base != iterrev:
749 while base != iterrev:
750 iterrev = base
750 iterrev = base
751 base = index[iterrev][3]
751 base = index[iterrev][3]
752
752
753 self._chainbasecache[rev] = base
753 self._chainbasecache[rev] = base
754 return base
754 return base
755
755
756 def linkrev(self, rev):
756 def linkrev(self, rev):
757 return self.index[rev][4]
757 return self.index[rev][4]
758
758
759 def parentrevs(self, rev):
759 def parentrevs(self, rev):
760 try:
760 try:
761 entry = self.index[rev]
761 entry = self.index[rev]
762 except IndexError:
762 except IndexError:
763 if rev == wdirrev:
763 if rev == wdirrev:
764 raise error.WdirUnsupported
764 raise error.WdirUnsupported
765 raise
765 raise
766 if entry[5] == nullrev:
766 if entry[5] == nullrev:
767 return entry[6], entry[5]
767 return entry[6], entry[5]
768 else:
768 else:
769 return entry[5], entry[6]
769 return entry[5], entry[6]
770
770
771 # fast parentrevs(rev) where rev isn't filtered
771 # fast parentrevs(rev) where rev isn't filtered
772 _uncheckedparentrevs = parentrevs
772 _uncheckedparentrevs = parentrevs
773
773
774 def node(self, rev):
774 def node(self, rev):
775 try:
775 try:
776 return self.index[rev][7]
776 return self.index[rev][7]
777 except IndexError:
777 except IndexError:
778 if rev == wdirrev:
778 if rev == wdirrev:
779 raise error.WdirUnsupported
779 raise error.WdirUnsupported
780 raise
780 raise
781
781
782 # Derived from index values.
782 # Derived from index values.
783
783
784 def end(self, rev):
784 def end(self, rev):
785 return self.start(rev) + self.length(rev)
785 return self.start(rev) + self.length(rev)
786
786
787 def parents(self, node):
787 def parents(self, node):
788 i = self.index
788 i = self.index
789 d = i[self.rev(node)]
789 d = i[self.rev(node)]
790 # inline node() to avoid function call overhead
790 # inline node() to avoid function call overhead
791 if d[5] == self.nullid:
791 if d[5] == self.nullid:
792 return i[d[6]][7], i[d[5]][7]
792 return i[d[6]][7], i[d[5]][7]
793 else:
793 else:
794 return i[d[5]][7], i[d[6]][7]
794 return i[d[5]][7], i[d[6]][7]
795
795
796 def chainlen(self, rev):
796 def chainlen(self, rev):
797 return self._chaininfo(rev)[0]
797 return self._chaininfo(rev)[0]
798
798
799 def _chaininfo(self, rev):
799 def _chaininfo(self, rev):
800 chaininfocache = self._chaininfocache
800 chaininfocache = self._chaininfocache
801 if rev in chaininfocache:
801 if rev in chaininfocache:
802 return chaininfocache[rev]
802 return chaininfocache[rev]
803 index = self.index
803 index = self.index
804 generaldelta = self._generaldelta
804 generaldelta = self._generaldelta
805 iterrev = rev
805 iterrev = rev
806 e = index[iterrev]
806 e = index[iterrev]
807 clen = 0
807 clen = 0
808 compresseddeltalen = 0
808 compresseddeltalen = 0
809 while iterrev != e[3]:
809 while iterrev != e[3]:
810 clen += 1
810 clen += 1
811 compresseddeltalen += e[1]
811 compresseddeltalen += e[1]
812 if generaldelta:
812 if generaldelta:
813 iterrev = e[3]
813 iterrev = e[3]
814 else:
814 else:
815 iterrev -= 1
815 iterrev -= 1
816 if iterrev in chaininfocache:
816 if iterrev in chaininfocache:
817 t = chaininfocache[iterrev]
817 t = chaininfocache[iterrev]
818 clen += t[0]
818 clen += t[0]
819 compresseddeltalen += t[1]
819 compresseddeltalen += t[1]
820 break
820 break
821 e = index[iterrev]
821 e = index[iterrev]
822 else:
822 else:
823 # Add text length of base since decompressing that also takes
823 # Add text length of base since decompressing that also takes
824 # work. For cache hits the length is already included.
824 # work. For cache hits the length is already included.
825 compresseddeltalen += e[1]
825 compresseddeltalen += e[1]
826 r = (clen, compresseddeltalen)
826 r = (clen, compresseddeltalen)
827 chaininfocache[rev] = r
827 chaininfocache[rev] = r
828 return r
828 return r
829
829
830 def _deltachain(self, rev, stoprev=None):
830 def _deltachain(self, rev, stoprev=None):
831 """Obtain the delta chain for a revision.
831 """Obtain the delta chain for a revision.
832
832
833 ``stoprev`` specifies a revision to stop at. If not specified, we
833 ``stoprev`` specifies a revision to stop at. If not specified, we
834 stop at the base of the chain.
834 stop at the base of the chain.
835
835
836 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
836 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
837 revs in ascending order and ``stopped`` is a bool indicating whether
837 revs in ascending order and ``stopped`` is a bool indicating whether
838 ``stoprev`` was hit.
838 ``stoprev`` was hit.
839 """
839 """
840 # Try C implementation.
840 # Try C implementation.
841 try:
841 try:
842 return self.index.deltachain(rev, stoprev, self._generaldelta)
842 return self.index.deltachain(rev, stoprev, self._generaldelta)
843 except AttributeError:
843 except AttributeError:
844 pass
844 pass
845
845
846 chain = []
846 chain = []
847
847
848 # Alias to prevent attribute lookup in tight loop.
848 # Alias to prevent attribute lookup in tight loop.
849 index = self.index
849 index = self.index
850 generaldelta = self._generaldelta
850 generaldelta = self._generaldelta
851
851
852 iterrev = rev
852 iterrev = rev
853 e = index[iterrev]
853 e = index[iterrev]
854 while iterrev != e[3] and iterrev != stoprev:
854 while iterrev != e[3] and iterrev != stoprev:
855 chain.append(iterrev)
855 chain.append(iterrev)
856 if generaldelta:
856 if generaldelta:
857 iterrev = e[3]
857 iterrev = e[3]
858 else:
858 else:
859 iterrev -= 1
859 iterrev -= 1
860 e = index[iterrev]
860 e = index[iterrev]
861
861
862 if iterrev == stoprev:
862 if iterrev == stoprev:
863 stopped = True
863 stopped = True
864 else:
864 else:
865 chain.append(iterrev)
865 chain.append(iterrev)
866 stopped = False
866 stopped = False
867
867
868 chain.reverse()
868 chain.reverse()
869 return chain, stopped
869 return chain, stopped
870
870
871 def ancestors(self, revs, stoprev=0, inclusive=False):
871 def ancestors(self, revs, stoprev=0, inclusive=False):
872 """Generate the ancestors of 'revs' in reverse revision order.
872 """Generate the ancestors of 'revs' in reverse revision order.
873 Does not generate revs lower than stoprev.
873 Does not generate revs lower than stoprev.
874
874
875 See the documentation for ancestor.lazyancestors for more details."""
875 See the documentation for ancestor.lazyancestors for more details."""
876
876
877 # first, make sure start revisions aren't filtered
877 # first, make sure start revisions aren't filtered
878 revs = list(revs)
878 revs = list(revs)
879 checkrev = self.node
879 checkrev = self.node
880 for r in revs:
880 for r in revs:
881 checkrev(r)
881 checkrev(r)
882 # and we're sure ancestors aren't filtered as well
882 # and we're sure ancestors aren't filtered as well
883
883
884 if rustancestor is not None:
884 if rustancestor is not None:
885 lazyancestors = rustancestor.LazyAncestors
885 lazyancestors = rustancestor.LazyAncestors
886 arg = self.index
886 arg = self.index
887 else:
887 else:
888 lazyancestors = ancestor.lazyancestors
888 lazyancestors = ancestor.lazyancestors
889 arg = self._uncheckedparentrevs
889 arg = self._uncheckedparentrevs
890 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
890 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
891
891
892 def descendants(self, revs):
892 def descendants(self, revs):
893 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
893 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
894
894
895 def findcommonmissing(self, common=None, heads=None):
895 def findcommonmissing(self, common=None, heads=None):
896 """Return a tuple of the ancestors of common and the ancestors of heads
896 """Return a tuple of the ancestors of common and the ancestors of heads
897 that are not ancestors of common. In revset terminology, we return the
897 that are not ancestors of common. In revset terminology, we return the
898 tuple:
898 tuple:
899
899
900 ::common, (::heads) - (::common)
900 ::common, (::heads) - (::common)
901
901
902 The list is sorted by revision number, meaning it is
902 The list is sorted by revision number, meaning it is
903 topologically sorted.
903 topologically sorted.
904
904
905 'heads' and 'common' are both lists of node IDs. If heads is
905 'heads' and 'common' are both lists of node IDs. If heads is
906 not supplied, uses all of the revlog's heads. If common is not
906 not supplied, uses all of the revlog's heads. If common is not
907 supplied, uses nullid."""
907 supplied, uses nullid."""
908 if common is None:
908 if common is None:
909 common = [self.nullid]
909 common = [self.nullid]
910 if heads is None:
910 if heads is None:
911 heads = self.heads()
911 heads = self.heads()
912
912
913 common = [self.rev(n) for n in common]
913 common = [self.rev(n) for n in common]
914 heads = [self.rev(n) for n in heads]
914 heads = [self.rev(n) for n in heads]
915
915
916 # we want the ancestors, but inclusive
916 # we want the ancestors, but inclusive
917 class lazyset(object):
917 class lazyset(object):
918 def __init__(self, lazyvalues):
918 def __init__(self, lazyvalues):
919 self.addedvalues = set()
919 self.addedvalues = set()
920 self.lazyvalues = lazyvalues
920 self.lazyvalues = lazyvalues
921
921
922 def __contains__(self, value):
922 def __contains__(self, value):
923 return value in self.addedvalues or value in self.lazyvalues
923 return value in self.addedvalues or value in self.lazyvalues
924
924
925 def __iter__(self):
925 def __iter__(self):
926 added = self.addedvalues
926 added = self.addedvalues
927 for r in added:
927 for r in added:
928 yield r
928 yield r
929 for r in self.lazyvalues:
929 for r in self.lazyvalues:
930 if not r in added:
930 if not r in added:
931 yield r
931 yield r
932
932
933 def add(self, value):
933 def add(self, value):
934 self.addedvalues.add(value)
934 self.addedvalues.add(value)
935
935
936 def update(self, values):
936 def update(self, values):
937 self.addedvalues.update(values)
937 self.addedvalues.update(values)
938
938
939 has = lazyset(self.ancestors(common))
939 has = lazyset(self.ancestors(common))
940 has.add(nullrev)
940 has.add(nullrev)
941 has.update(common)
941 has.update(common)
942
942
943 # take all ancestors from heads that aren't in has
943 # take all ancestors from heads that aren't in has
944 missing = set()
944 missing = set()
945 visit = collections.deque(r for r in heads if r not in has)
945 visit = collections.deque(r for r in heads if r not in has)
946 while visit:
946 while visit:
947 r = visit.popleft()
947 r = visit.popleft()
948 if r in missing:
948 if r in missing:
949 continue
949 continue
950 else:
950 else:
951 missing.add(r)
951 missing.add(r)
952 for p in self.parentrevs(r):
952 for p in self.parentrevs(r):
953 if p not in has:
953 if p not in has:
954 visit.append(p)
954 visit.append(p)
955 missing = list(missing)
955 missing = list(missing)
956 missing.sort()
956 missing.sort()
957 return has, [self.node(miss) for miss in missing]
957 return has, [self.node(miss) for miss in missing]
958
958
959 def incrementalmissingrevs(self, common=None):
959 def incrementalmissingrevs(self, common=None):
960 """Return an object that can be used to incrementally compute the
960 """Return an object that can be used to incrementally compute the
961 revision numbers of the ancestors of arbitrary sets that are not
961 revision numbers of the ancestors of arbitrary sets that are not
962 ancestors of common. This is an ancestor.incrementalmissingancestors
962 ancestors of common. This is an ancestor.incrementalmissingancestors
963 object.
963 object.
964
964
965 'common' is a list of revision numbers. If common is not supplied, uses
965 'common' is a list of revision numbers. If common is not supplied, uses
966 nullrev.
966 nullrev.
967 """
967 """
968 if common is None:
968 if common is None:
969 common = [nullrev]
969 common = [nullrev]
970
970
971 if rustancestor is not None:
971 if rustancestor is not None:
972 return rustancestor.MissingAncestors(self.index, common)
972 return rustancestor.MissingAncestors(self.index, common)
973 return ancestor.incrementalmissingancestors(self.parentrevs, common)
973 return ancestor.incrementalmissingancestors(self.parentrevs, common)
974
974
975 def findmissingrevs(self, common=None, heads=None):
975 def findmissingrevs(self, common=None, heads=None):
976 """Return the revision numbers of the ancestors of heads that
976 """Return the revision numbers of the ancestors of heads that
977 are not ancestors of common.
977 are not ancestors of common.
978
978
979 More specifically, return a list of revision numbers corresponding to
979 More specifically, return a list of revision numbers corresponding to
980 nodes N such that every N satisfies the following constraints:
980 nodes N such that every N satisfies the following constraints:
981
981
982 1. N is an ancestor of some node in 'heads'
982 1. N is an ancestor of some node in 'heads'
983 2. N is not an ancestor of any node in 'common'
983 2. N is not an ancestor of any node in 'common'
984
984
985 The list is sorted by revision number, meaning it is
985 The list is sorted by revision number, meaning it is
986 topologically sorted.
986 topologically sorted.
987
987
988 'heads' and 'common' are both lists of revision numbers. If heads is
988 'heads' and 'common' are both lists of revision numbers. If heads is
989 not supplied, uses all of the revlog's heads. If common is not
989 not supplied, uses all of the revlog's heads. If common is not
990 supplied, uses nullid."""
990 supplied, uses nullid."""
991 if common is None:
991 if common is None:
992 common = [nullrev]
992 common = [nullrev]
993 if heads is None:
993 if heads is None:
994 heads = self.headrevs()
994 heads = self.headrevs()
995
995
996 inc = self.incrementalmissingrevs(common=common)
996 inc = self.incrementalmissingrevs(common=common)
997 return inc.missingancestors(heads)
997 return inc.missingancestors(heads)
998
998
999 def findmissing(self, common=None, heads=None):
999 def findmissing(self, common=None, heads=None):
1000 """Return the ancestors of heads that are not ancestors of common.
1000 """Return the ancestors of heads that are not ancestors of common.
1001
1001
1002 More specifically, return a list of nodes N such that every N
1002 More specifically, return a list of nodes N such that every N
1003 satisfies the following constraints:
1003 satisfies the following constraints:
1004
1004
1005 1. N is an ancestor of some node in 'heads'
1005 1. N is an ancestor of some node in 'heads'
1006 2. N is not an ancestor of any node in 'common'
1006 2. N is not an ancestor of any node in 'common'
1007
1007
1008 The list is sorted by revision number, meaning it is
1008 The list is sorted by revision number, meaning it is
1009 topologically sorted.
1009 topologically sorted.
1010
1010
1011 'heads' and 'common' are both lists of node IDs. If heads is
1011 'heads' and 'common' are both lists of node IDs. If heads is
1012 not supplied, uses all of the revlog's heads. If common is not
1012 not supplied, uses all of the revlog's heads. If common is not
1013 supplied, uses nullid."""
1013 supplied, uses nullid."""
1014 if common is None:
1014 if common is None:
1015 common = [self.nullid]
1015 common = [self.nullid]
1016 if heads is None:
1016 if heads is None:
1017 heads = self.heads()
1017 heads = self.heads()
1018
1018
1019 common = [self.rev(n) for n in common]
1019 common = [self.rev(n) for n in common]
1020 heads = [self.rev(n) for n in heads]
1020 heads = [self.rev(n) for n in heads]
1021
1021
1022 inc = self.incrementalmissingrevs(common=common)
1022 inc = self.incrementalmissingrevs(common=common)
1023 return [self.node(r) for r in inc.missingancestors(heads)]
1023 return [self.node(r) for r in inc.missingancestors(heads)]
1024
1024
1025 def nodesbetween(self, roots=None, heads=None):
1025 def nodesbetween(self, roots=None, heads=None):
1026 """Return a topological path from 'roots' to 'heads'.
1026 """Return a topological path from 'roots' to 'heads'.
1027
1027
1028 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1028 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1029 topologically sorted list of all nodes N that satisfy both of
1029 topologically sorted list of all nodes N that satisfy both of
1030 these constraints:
1030 these constraints:
1031
1031
1032 1. N is a descendant of some node in 'roots'
1032 1. N is a descendant of some node in 'roots'
1033 2. N is an ancestor of some node in 'heads'
1033 2. N is an ancestor of some node in 'heads'
1034
1034
1035 Every node is considered to be both a descendant and an ancestor
1035 Every node is considered to be both a descendant and an ancestor
1036 of itself, so every reachable node in 'roots' and 'heads' will be
1036 of itself, so every reachable node in 'roots' and 'heads' will be
1037 included in 'nodes'.
1037 included in 'nodes'.
1038
1038
1039 'outroots' is the list of reachable nodes in 'roots', i.e., the
1039 'outroots' is the list of reachable nodes in 'roots', i.e., the
1040 subset of 'roots' that is returned in 'nodes'. Likewise,
1040 subset of 'roots' that is returned in 'nodes'. Likewise,
1041 'outheads' is the subset of 'heads' that is also in 'nodes'.
1041 'outheads' is the subset of 'heads' that is also in 'nodes'.
1042
1042
1043 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1043 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1044 unspecified, uses nullid as the only root. If 'heads' is
1044 unspecified, uses nullid as the only root. If 'heads' is
1045 unspecified, uses list of all of the revlog's heads."""
1045 unspecified, uses list of all of the revlog's heads."""
1046 nonodes = ([], [], [])
1046 nonodes = ([], [], [])
1047 if roots is not None:
1047 if roots is not None:
1048 roots = list(roots)
1048 roots = list(roots)
1049 if not roots:
1049 if not roots:
1050 return nonodes
1050 return nonodes
1051 lowestrev = min([self.rev(n) for n in roots])
1051 lowestrev = min([self.rev(n) for n in roots])
1052 else:
1052 else:
1053 roots = [self.nullid] # Everybody's a descendant of nullid
1053 roots = [self.nullid] # Everybody's a descendant of nullid
1054 lowestrev = nullrev
1054 lowestrev = nullrev
1055 if (lowestrev == nullrev) and (heads is None):
1055 if (lowestrev == nullrev) and (heads is None):
1056 # We want _all_ the nodes!
1056 # We want _all_ the nodes!
1057 return (
1057 return (
1058 [self.node(r) for r in self],
1058 [self.node(r) for r in self],
1059 [self.nullid],
1059 [self.nullid],
1060 list(self.heads()),
1060 list(self.heads()),
1061 )
1061 )
1062 if heads is None:
1062 if heads is None:
1063 # All nodes are ancestors, so the latest ancestor is the last
1063 # All nodes are ancestors, so the latest ancestor is the last
1064 # node.
1064 # node.
1065 highestrev = len(self) - 1
1065 highestrev = len(self) - 1
1066 # Set ancestors to None to signal that every node is an ancestor.
1066 # Set ancestors to None to signal that every node is an ancestor.
1067 ancestors = None
1067 ancestors = None
1068 # Set heads to an empty dictionary for later discovery of heads
1068 # Set heads to an empty dictionary for later discovery of heads
1069 heads = {}
1069 heads = {}
1070 else:
1070 else:
1071 heads = list(heads)
1071 heads = list(heads)
1072 if not heads:
1072 if not heads:
1073 return nonodes
1073 return nonodes
1074 ancestors = set()
1074 ancestors = set()
1075 # Turn heads into a dictionary so we can remove 'fake' heads.
1075 # Turn heads into a dictionary so we can remove 'fake' heads.
1076 # Also, later we will be using it to filter out the heads we can't
1076 # Also, later we will be using it to filter out the heads we can't
1077 # find from roots.
1077 # find from roots.
1078 heads = dict.fromkeys(heads, False)
1078 heads = dict.fromkeys(heads, False)
1079 # Start at the top and keep marking parents until we're done.
1079 # Start at the top and keep marking parents until we're done.
1080 nodestotag = set(heads)
1080 nodestotag = set(heads)
1081 # Remember where the top was so we can use it as a limit later.
1081 # Remember where the top was so we can use it as a limit later.
1082 highestrev = max([self.rev(n) for n in nodestotag])
1082 highestrev = max([self.rev(n) for n in nodestotag])
1083 while nodestotag:
1083 while nodestotag:
1084 # grab a node to tag
1084 # grab a node to tag
1085 n = nodestotag.pop()
1085 n = nodestotag.pop()
1086 # Never tag nullid
1086 # Never tag nullid
1087 if n == self.nullid:
1087 if n == self.nullid:
1088 continue
1088 continue
1089 # A node's revision number represents its place in a
1089 # A node's revision number represents its place in a
1090 # topologically sorted list of nodes.
1090 # topologically sorted list of nodes.
1091 r = self.rev(n)
1091 r = self.rev(n)
1092 if r >= lowestrev:
1092 if r >= lowestrev:
1093 if n not in ancestors:
1093 if n not in ancestors:
1094 # If we are possibly a descendant of one of the roots
1094 # If we are possibly a descendant of one of the roots
1095 # and we haven't already been marked as an ancestor
1095 # and we haven't already been marked as an ancestor
1096 ancestors.add(n) # Mark as ancestor
1096 ancestors.add(n) # Mark as ancestor
1097 # Add non-nullid parents to list of nodes to tag.
1097 # Add non-nullid parents to list of nodes to tag.
1098 nodestotag.update(
1098 nodestotag.update(
1099 [p for p in self.parents(n) if p != self.nullid]
1099 [p for p in self.parents(n) if p != self.nullid]
1100 )
1100 )
1101 elif n in heads: # We've seen it before, is it a fake head?
1101 elif n in heads: # We've seen it before, is it a fake head?
1102 # So it is, real heads should not be the ancestors of
1102 # So it is, real heads should not be the ancestors of
1103 # any other heads.
1103 # any other heads.
1104 heads.pop(n)
1104 heads.pop(n)
1105 if not ancestors:
1105 if not ancestors:
1106 return nonodes
1106 return nonodes
1107 # Now that we have our set of ancestors, we want to remove any
1107 # Now that we have our set of ancestors, we want to remove any
1108 # roots that are not ancestors.
1108 # roots that are not ancestors.
1109
1109
1110 # If one of the roots was nullid, everything is included anyway.
1110 # If one of the roots was nullid, everything is included anyway.
1111 if lowestrev > nullrev:
1111 if lowestrev > nullrev:
1112 # But, since we weren't, let's recompute the lowest rev to not
1112 # But, since we weren't, let's recompute the lowest rev to not
1113 # include roots that aren't ancestors.
1113 # include roots that aren't ancestors.
1114
1114
1115 # Filter out roots that aren't ancestors of heads
1115 # Filter out roots that aren't ancestors of heads
1116 roots = [root for root in roots if root in ancestors]
1116 roots = [root for root in roots if root in ancestors]
1117 # Recompute the lowest revision
1117 # Recompute the lowest revision
1118 if roots:
1118 if roots:
1119 lowestrev = min([self.rev(root) for root in roots])
1119 lowestrev = min([self.rev(root) for root in roots])
1120 else:
1120 else:
1121 # No more roots? Return empty list
1121 # No more roots? Return empty list
1122 return nonodes
1122 return nonodes
1123 else:
1123 else:
1124 # We are descending from nullid, and don't need to care about
1124 # We are descending from nullid, and don't need to care about
1125 # any other roots.
1125 # any other roots.
1126 lowestrev = nullrev
1126 lowestrev = nullrev
1127 roots = [self.nullid]
1127 roots = [self.nullid]
1128 # Transform our roots list into a set.
1128 # Transform our roots list into a set.
1129 descendants = set(roots)
1129 descendants = set(roots)
1130 # Also, keep the original roots so we can filter out roots that aren't
1130 # Also, keep the original roots so we can filter out roots that aren't
1131 # 'real' roots (i.e. are descended from other roots).
1131 # 'real' roots (i.e. are descended from other roots).
1132 roots = descendants.copy()
1132 roots = descendants.copy()
1133 # Our topologically sorted list of output nodes.
1133 # Our topologically sorted list of output nodes.
1134 orderedout = []
1134 orderedout = []
1135 # Don't start at nullid since we don't want nullid in our output list,
1135 # Don't start at nullid since we don't want nullid in our output list,
1136 # and if nullid shows up in descendants, empty parents will look like
1136 # and if nullid shows up in descendants, empty parents will look like
1137 # they're descendants.
1137 # they're descendants.
1138 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1138 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1139 n = self.node(r)
1139 n = self.node(r)
1140 isdescendant = False
1140 isdescendant = False
1141 if lowestrev == nullrev: # Everybody is a descendant of nullid
1141 if lowestrev == nullrev: # Everybody is a descendant of nullid
1142 isdescendant = True
1142 isdescendant = True
1143 elif n in descendants:
1143 elif n in descendants:
1144 # n is already a descendant
1144 # n is already a descendant
1145 isdescendant = True
1145 isdescendant = True
1146 # This check only needs to be done here because all the roots
1146 # This check only needs to be done here because all the roots
1147 # will start being marked is descendants before the loop.
1147 # will start being marked is descendants before the loop.
1148 if n in roots:
1148 if n in roots:
1149 # If n was a root, check if it's a 'real' root.
1149 # If n was a root, check if it's a 'real' root.
1150 p = tuple(self.parents(n))
1150 p = tuple(self.parents(n))
1151 # If any of its parents are descendants, it's not a root.
1151 # If any of its parents are descendants, it's not a root.
1152 if (p[0] in descendants) or (p[1] in descendants):
1152 if (p[0] in descendants) or (p[1] in descendants):
1153 roots.remove(n)
1153 roots.remove(n)
1154 else:
1154 else:
1155 p = tuple(self.parents(n))
1155 p = tuple(self.parents(n))
1156 # A node is a descendant if either of its parents are
1156 # A node is a descendant if either of its parents are
1157 # descendants. (We seeded the dependents list with the roots
1157 # descendants. (We seeded the dependents list with the roots
1158 # up there, remember?)
1158 # up there, remember?)
1159 if (p[0] in descendants) or (p[1] in descendants):
1159 if (p[0] in descendants) or (p[1] in descendants):
1160 descendants.add(n)
1160 descendants.add(n)
1161 isdescendant = True
1161 isdescendant = True
1162 if isdescendant and ((ancestors is None) or (n in ancestors)):
1162 if isdescendant and ((ancestors is None) or (n in ancestors)):
1163 # Only include nodes that are both descendants and ancestors.
1163 # Only include nodes that are both descendants and ancestors.
1164 orderedout.append(n)
1164 orderedout.append(n)
1165 if (ancestors is not None) and (n in heads):
1165 if (ancestors is not None) and (n in heads):
1166 # We're trying to figure out which heads are reachable
1166 # We're trying to figure out which heads are reachable
1167 # from roots.
1167 # from roots.
1168 # Mark this head as having been reached
1168 # Mark this head as having been reached
1169 heads[n] = True
1169 heads[n] = True
1170 elif ancestors is None:
1170 elif ancestors is None:
1171 # Otherwise, we're trying to discover the heads.
1171 # Otherwise, we're trying to discover the heads.
1172 # Assume this is a head because if it isn't, the next step
1172 # Assume this is a head because if it isn't, the next step
1173 # will eventually remove it.
1173 # will eventually remove it.
1174 heads[n] = True
1174 heads[n] = True
1175 # But, obviously its parents aren't.
1175 # But, obviously its parents aren't.
1176 for p in self.parents(n):
1176 for p in self.parents(n):
1177 heads.pop(p, None)
1177 heads.pop(p, None)
1178 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1178 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1179 roots = list(roots)
1179 roots = list(roots)
1180 assert orderedout
1180 assert orderedout
1181 assert roots
1181 assert roots
1182 assert heads
1182 assert heads
1183 return (orderedout, roots, heads)
1183 return (orderedout, roots, heads)
1184
1184
1185 def headrevs(self, revs=None):
1185 def headrevs(self, revs=None):
1186 if revs is None:
1186 if revs is None:
1187 try:
1187 try:
1188 return self.index.headrevs()
1188 return self.index.headrevs()
1189 except AttributeError:
1189 except AttributeError:
1190 return self._headrevs()
1190 return self._headrevs()
1191 if rustdagop is not None:
1191 if rustdagop is not None:
1192 return rustdagop.headrevs(self.index, revs)
1192 return rustdagop.headrevs(self.index, revs)
1193 return dagop.headrevs(revs, self._uncheckedparentrevs)
1193 return dagop.headrevs(revs, self._uncheckedparentrevs)
1194
1194
1195 def computephases(self, roots):
1195 def computephases(self, roots):
1196 return self.index.computephasesmapsets(roots)
1196 return self.index.computephasesmapsets(roots)
1197
1197
1198 def _headrevs(self):
1198 def _headrevs(self):
1199 count = len(self)
1199 count = len(self)
1200 if not count:
1200 if not count:
1201 return [nullrev]
1201 return [nullrev]
1202 # we won't iter over filtered rev so nobody is a head at start
1202 # we won't iter over filtered rev so nobody is a head at start
1203 ishead = [0] * (count + 1)
1203 ishead = [0] * (count + 1)
1204 index = self.index
1204 index = self.index
1205 for r in self:
1205 for r in self:
1206 ishead[r] = 1 # I may be an head
1206 ishead[r] = 1 # I may be an head
1207 e = index[r]
1207 e = index[r]
1208 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1208 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1209 return [r for r, val in enumerate(ishead) if val]
1209 return [r for r, val in enumerate(ishead) if val]
1210
1210
1211 def heads(self, start=None, stop=None):
1211 def heads(self, start=None, stop=None):
1212 """return the list of all nodes that have no children
1212 """return the list of all nodes that have no children
1213
1213
1214 if start is specified, only heads that are descendants of
1214 if start is specified, only heads that are descendants of
1215 start will be returned
1215 start will be returned
1216 if stop is specified, it will consider all the revs from stop
1216 if stop is specified, it will consider all the revs from stop
1217 as if they had no children
1217 as if they had no children
1218 """
1218 """
1219 if start is None and stop is None:
1219 if start is None and stop is None:
1220 if not len(self):
1220 if not len(self):
1221 return [self.nullid]
1221 return [self.nullid]
1222 return [self.node(r) for r in self.headrevs()]
1222 return [self.node(r) for r in self.headrevs()]
1223
1223
1224 if start is None:
1224 if start is None:
1225 start = nullrev
1225 start = nullrev
1226 else:
1226 else:
1227 start = self.rev(start)
1227 start = self.rev(start)
1228
1228
1229 stoprevs = {self.rev(n) for n in stop or []}
1229 stoprevs = {self.rev(n) for n in stop or []}
1230
1230
1231 revs = dagop.headrevssubset(
1231 revs = dagop.headrevssubset(
1232 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1232 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1233 )
1233 )
1234
1234
1235 return [self.node(rev) for rev in revs]
1235 return [self.node(rev) for rev in revs]
1236
1236
1237 def children(self, node):
1237 def children(self, node):
1238 """find the children of a given node"""
1238 """find the children of a given node"""
1239 c = []
1239 c = []
1240 p = self.rev(node)
1240 p = self.rev(node)
1241 for r in self.revs(start=p + 1):
1241 for r in self.revs(start=p + 1):
1242 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1242 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1243 if prevs:
1243 if prevs:
1244 for pr in prevs:
1244 for pr in prevs:
1245 if pr == p:
1245 if pr == p:
1246 c.append(self.node(r))
1246 c.append(self.node(r))
1247 elif p == nullrev:
1247 elif p == nullrev:
1248 c.append(self.node(r))
1248 c.append(self.node(r))
1249 return c
1249 return c
1250
1250
1251 def commonancestorsheads(self, a, b):
1251 def commonancestorsheads(self, a, b):
1252 """calculate all the heads of the common ancestors of nodes a and b"""
1252 """calculate all the heads of the common ancestors of nodes a and b"""
1253 a, b = self.rev(a), self.rev(b)
1253 a, b = self.rev(a), self.rev(b)
1254 ancs = self._commonancestorsheads(a, b)
1254 ancs = self._commonancestorsheads(a, b)
1255 return pycompat.maplist(self.node, ancs)
1255 return pycompat.maplist(self.node, ancs)
1256
1256
1257 def _commonancestorsheads(self, *revs):
1257 def _commonancestorsheads(self, *revs):
1258 """calculate all the heads of the common ancestors of revs"""
1258 """calculate all the heads of the common ancestors of revs"""
1259 try:
1259 try:
1260 ancs = self.index.commonancestorsheads(*revs)
1260 ancs = self.index.commonancestorsheads(*revs)
1261 except (AttributeError, OverflowError): # C implementation failed
1261 except (AttributeError, OverflowError): # C implementation failed
1262 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1262 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1263 return ancs
1263 return ancs
1264
1264
1265 def isancestor(self, a, b):
1265 def isancestor(self, a, b):
1266 """return True if node a is an ancestor of node b
1266 """return True if node a is an ancestor of node b
1267
1267
1268 A revision is considered an ancestor of itself."""
1268 A revision is considered an ancestor of itself."""
1269 a, b = self.rev(a), self.rev(b)
1269 a, b = self.rev(a), self.rev(b)
1270 return self.isancestorrev(a, b)
1270 return self.isancestorrev(a, b)
1271
1271
1272 def isancestorrev(self, a, b):
1272 def isancestorrev(self, a, b):
1273 """return True if revision a is an ancestor of revision b
1273 """return True if revision a is an ancestor of revision b
1274
1274
1275 A revision is considered an ancestor of itself.
1275 A revision is considered an ancestor of itself.
1276
1276
1277 The implementation of this is trivial but the use of
1277 The implementation of this is trivial but the use of
1278 reachableroots is not."""
1278 reachableroots is not."""
1279 if a == nullrev:
1279 if a == nullrev:
1280 return True
1280 return True
1281 elif a == b:
1281 elif a == b:
1282 return True
1282 return True
1283 elif a > b:
1283 elif a > b:
1284 return False
1284 return False
1285 return bool(self.reachableroots(a, [b], [a], includepath=False))
1285 return bool(self.reachableroots(a, [b], [a], includepath=False))
1286
1286
1287 def reachableroots(self, minroot, heads, roots, includepath=False):
1287 def reachableroots(self, minroot, heads, roots, includepath=False):
1288 """return (heads(::(<roots> and <roots>::<heads>)))
1288 """return (heads(::(<roots> and <roots>::<heads>)))
1289
1289
1290 If includepath is True, return (<roots>::<heads>)."""
1290 If includepath is True, return (<roots>::<heads>)."""
1291 try:
1291 try:
1292 return self.index.reachableroots2(
1292 return self.index.reachableroots2(
1293 minroot, heads, roots, includepath
1293 minroot, heads, roots, includepath
1294 )
1294 )
1295 except AttributeError:
1295 except AttributeError:
1296 return dagop._reachablerootspure(
1296 return dagop._reachablerootspure(
1297 self.parentrevs, minroot, roots, heads, includepath
1297 self.parentrevs, minroot, roots, heads, includepath
1298 )
1298 )
1299
1299
1300 def ancestor(self, a, b):
1300 def ancestor(self, a, b):
1301 """calculate the "best" common ancestor of nodes a and b"""
1301 """calculate the "best" common ancestor of nodes a and b"""
1302
1302
1303 a, b = self.rev(a), self.rev(b)
1303 a, b = self.rev(a), self.rev(b)
1304 try:
1304 try:
1305 ancs = self.index.ancestors(a, b)
1305 ancs = self.index.ancestors(a, b)
1306 except (AttributeError, OverflowError):
1306 except (AttributeError, OverflowError):
1307 ancs = ancestor.ancestors(self.parentrevs, a, b)
1307 ancs = ancestor.ancestors(self.parentrevs, a, b)
1308 if ancs:
1308 if ancs:
1309 # choose a consistent winner when there's a tie
1309 # choose a consistent winner when there's a tie
1310 return min(map(self.node, ancs))
1310 return min(map(self.node, ancs))
1311 return self.nullid
1311 return self.nullid
1312
1312
1313 def _match(self, id):
1313 def _match(self, id):
1314 if isinstance(id, int):
1314 if isinstance(id, int):
1315 # rev
1315 # rev
1316 return self.node(id)
1316 return self.node(id)
1317 if len(id) == self.nodeconstants.nodelen:
1317 if len(id) == self.nodeconstants.nodelen:
1318 # possibly a binary node
1318 # possibly a binary node
1319 # odds of a binary node being all hex in ASCII are 1 in 10**25
1319 # odds of a binary node being all hex in ASCII are 1 in 10**25
1320 try:
1320 try:
1321 node = id
1321 node = id
1322 self.rev(node) # quick search the index
1322 self.rev(node) # quick search the index
1323 return node
1323 return node
1324 except error.LookupError:
1324 except error.LookupError:
1325 pass # may be partial hex id
1325 pass # may be partial hex id
1326 try:
1326 try:
1327 # str(rev)
1327 # str(rev)
1328 rev = int(id)
1328 rev = int(id)
1329 if b"%d" % rev != id:
1329 if b"%d" % rev != id:
1330 raise ValueError
1330 raise ValueError
1331 if rev < 0:
1331 if rev < 0:
1332 rev = len(self) + rev
1332 rev = len(self) + rev
1333 if rev < 0 or rev >= len(self):
1333 if rev < 0 or rev >= len(self):
1334 raise ValueError
1334 raise ValueError
1335 return self.node(rev)
1335 return self.node(rev)
1336 except (ValueError, OverflowError):
1336 except (ValueError, OverflowError):
1337 pass
1337 pass
1338 if len(id) == 2 * self.nodeconstants.nodelen:
1338 if len(id) == 2 * self.nodeconstants.nodelen:
1339 try:
1339 try:
1340 # a full hex nodeid?
1340 # a full hex nodeid?
1341 node = bin(id)
1341 node = bin(id)
1342 self.rev(node)
1342 self.rev(node)
1343 return node
1343 return node
1344 except (TypeError, error.LookupError):
1344 except (TypeError, error.LookupError):
1345 pass
1345 pass
1346
1346
1347 def _partialmatch(self, id):
1347 def _partialmatch(self, id):
1348 # we don't care wdirfilenodeids as they should be always full hash
1348 # we don't care wdirfilenodeids as they should be always full hash
1349 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1349 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1350 try:
1350 try:
1351 partial = self.index.partialmatch(id)
1351 partial = self.index.partialmatch(id)
1352 if partial and self.hasnode(partial):
1352 if partial and self.hasnode(partial):
1353 if maybewdir:
1353 if maybewdir:
1354 # single 'ff...' match in radix tree, ambiguous with wdir
1354 # single 'ff...' match in radix tree, ambiguous with wdir
1355 raise error.RevlogError
1355 raise error.RevlogError
1356 return partial
1356 return partial
1357 if maybewdir:
1357 if maybewdir:
1358 # no 'ff...' match in radix tree, wdir identified
1358 # no 'ff...' match in radix tree, wdir identified
1359 raise error.WdirUnsupported
1359 raise error.WdirUnsupported
1360 return None
1360 return None
1361 except error.RevlogError:
1361 except error.RevlogError:
1362 # parsers.c radix tree lookup gave multiple matches
1362 # parsers.c radix tree lookup gave multiple matches
1363 # fast path: for unfiltered changelog, radix tree is accurate
1363 # fast path: for unfiltered changelog, radix tree is accurate
1364 if not getattr(self, 'filteredrevs', None):
1364 if not getattr(self, 'filteredrevs', None):
1365 raise error.AmbiguousPrefixLookupError(
1365 raise error.AmbiguousPrefixLookupError(
1366 id, self.indexfile, _(b'ambiguous identifier')
1366 id, self.indexfile, _(b'ambiguous identifier')
1367 )
1367 )
1368 # fall through to slow path that filters hidden revisions
1368 # fall through to slow path that filters hidden revisions
1369 except (AttributeError, ValueError):
1369 except (AttributeError, ValueError):
1370 # we are pure python, or key was too short to search radix tree
1370 # we are pure python, or key was too short to search radix tree
1371 pass
1371 pass
1372
1372
1373 if id in self._pcache:
1373 if id in self._pcache:
1374 return self._pcache[id]
1374 return self._pcache[id]
1375
1375
1376 if len(id) <= 40:
1376 if len(id) <= 40:
1377 try:
1377 try:
1378 # hex(node)[:...]
1378 # hex(node)[:...]
1379 l = len(id) // 2 # grab an even number of digits
1379 l = len(id) // 2 # grab an even number of digits
1380 prefix = bin(id[: l * 2])
1380 prefix = bin(id[: l * 2])
1381 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1381 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1382 nl = [
1382 nl = [
1383 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1383 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1384 ]
1384 ]
1385 if self.nodeconstants.nullhex.startswith(id):
1385 if self.nodeconstants.nullhex.startswith(id):
1386 nl.append(self.nullid)
1386 nl.append(self.nullid)
1387 if len(nl) > 0:
1387 if len(nl) > 0:
1388 if len(nl) == 1 and not maybewdir:
1388 if len(nl) == 1 and not maybewdir:
1389 self._pcache[id] = nl[0]
1389 self._pcache[id] = nl[0]
1390 return nl[0]
1390 return nl[0]
1391 raise error.AmbiguousPrefixLookupError(
1391 raise error.AmbiguousPrefixLookupError(
1392 id, self.indexfile, _(b'ambiguous identifier')
1392 id, self.indexfile, _(b'ambiguous identifier')
1393 )
1393 )
1394 if maybewdir:
1394 if maybewdir:
1395 raise error.WdirUnsupported
1395 raise error.WdirUnsupported
1396 return None
1396 return None
1397 except TypeError:
1397 except TypeError:
1398 pass
1398 pass
1399
1399
1400 def lookup(self, id):
1400 def lookup(self, id):
1401 """locate a node based on:
1401 """locate a node based on:
1402 - revision number or str(revision number)
1402 - revision number or str(revision number)
1403 - nodeid or subset of hex nodeid
1403 - nodeid or subset of hex nodeid
1404 """
1404 """
1405 n = self._match(id)
1405 n = self._match(id)
1406 if n is not None:
1406 if n is not None:
1407 return n
1407 return n
1408 n = self._partialmatch(id)
1408 n = self._partialmatch(id)
1409 if n:
1409 if n:
1410 return n
1410 return n
1411
1411
1412 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1412 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1413
1413
1414 def shortest(self, node, minlength=1):
1414 def shortest(self, node, minlength=1):
1415 """Find the shortest unambiguous prefix that matches node."""
1415 """Find the shortest unambiguous prefix that matches node."""
1416
1416
1417 def isvalid(prefix):
1417 def isvalid(prefix):
1418 try:
1418 try:
1419 matchednode = self._partialmatch(prefix)
1419 matchednode = self._partialmatch(prefix)
1420 except error.AmbiguousPrefixLookupError:
1420 except error.AmbiguousPrefixLookupError:
1421 return False
1421 return False
1422 except error.WdirUnsupported:
1422 except error.WdirUnsupported:
1423 # single 'ff...' match
1423 # single 'ff...' match
1424 return True
1424 return True
1425 if matchednode is None:
1425 if matchednode is None:
1426 raise error.LookupError(node, self.indexfile, _(b'no node'))
1426 raise error.LookupError(node, self.indexfile, _(b'no node'))
1427 return True
1427 return True
1428
1428
1429 def maybewdir(prefix):
1429 def maybewdir(prefix):
1430 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1430 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1431
1431
1432 hexnode = hex(node)
1432 hexnode = hex(node)
1433
1433
1434 def disambiguate(hexnode, minlength):
1434 def disambiguate(hexnode, minlength):
1435 """Disambiguate against wdirid."""
1435 """Disambiguate against wdirid."""
1436 for length in range(minlength, len(hexnode) + 1):
1436 for length in range(minlength, len(hexnode) + 1):
1437 prefix = hexnode[:length]
1437 prefix = hexnode[:length]
1438 if not maybewdir(prefix):
1438 if not maybewdir(prefix):
1439 return prefix
1439 return prefix
1440
1440
1441 if not getattr(self, 'filteredrevs', None):
1441 if not getattr(self, 'filteredrevs', None):
1442 try:
1442 try:
1443 length = max(self.index.shortest(node), minlength)
1443 length = max(self.index.shortest(node), minlength)
1444 return disambiguate(hexnode, length)
1444 return disambiguate(hexnode, length)
1445 except error.RevlogError:
1445 except error.RevlogError:
1446 if node != self.nodeconstants.wdirid:
1446 if node != self.nodeconstants.wdirid:
1447 raise error.LookupError(node, self.indexfile, _(b'no node'))
1447 raise error.LookupError(node, self.indexfile, _(b'no node'))
1448 except AttributeError:
1448 except AttributeError:
1449 # Fall through to pure code
1449 # Fall through to pure code
1450 pass
1450 pass
1451
1451
1452 if node == self.nodeconstants.wdirid:
1452 if node == self.nodeconstants.wdirid:
1453 for length in range(minlength, len(hexnode) + 1):
1453 for length in range(minlength, len(hexnode) + 1):
1454 prefix = hexnode[:length]
1454 prefix = hexnode[:length]
1455 if isvalid(prefix):
1455 if isvalid(prefix):
1456 return prefix
1456 return prefix
1457
1457
1458 for length in range(minlength, len(hexnode) + 1):
1458 for length in range(minlength, len(hexnode) + 1):
1459 prefix = hexnode[:length]
1459 prefix = hexnode[:length]
1460 if isvalid(prefix):
1460 if isvalid(prefix):
1461 return disambiguate(hexnode, length)
1461 return disambiguate(hexnode, length)
1462
1462
1463 def cmp(self, node, text):
1463 def cmp(self, node, text):
1464 """compare text with a given file revision
1464 """compare text with a given file revision
1465
1465
1466 returns True if text is different than what is stored.
1466 returns True if text is different than what is stored.
1467 """
1467 """
1468 p1, p2 = self.parents(node)
1468 p1, p2 = self.parents(node)
1469 return storageutil.hashrevisionsha1(text, p1, p2) != node
1469 return storageutil.hashrevisionsha1(text, p1, p2) != node
1470
1470
1471 def _cachesegment(self, offset, data):
1471 def _cachesegment(self, offset, data):
1472 """Add a segment to the revlog cache.
1472 """Add a segment to the revlog cache.
1473
1473
1474 Accepts an absolute offset and the data that is at that location.
1474 Accepts an absolute offset and the data that is at that location.
1475 """
1475 """
1476 o, d = self._chunkcache
1476 o, d = self._chunkcache
1477 # try to add to existing cache
1477 # try to add to existing cache
1478 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1478 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1479 self._chunkcache = o, d + data
1479 self._chunkcache = o, d + data
1480 else:
1480 else:
1481 self._chunkcache = offset, data
1481 self._chunkcache = offset, data
1482
1482
1483 def _readsegment(self, offset, length, df=None):
1483 def _readsegment(self, offset, length, df=None):
1484 """Load a segment of raw data from the revlog.
1484 """Load a segment of raw data from the revlog.
1485
1485
1486 Accepts an absolute offset, length to read, and an optional existing
1486 Accepts an absolute offset, length to read, and an optional existing
1487 file handle to read from.
1487 file handle to read from.
1488
1488
1489 If an existing file handle is passed, it will be seeked and the
1489 If an existing file handle is passed, it will be seeked and the
1490 original seek position will NOT be restored.
1490 original seek position will NOT be restored.
1491
1491
1492 Returns a str or buffer of raw byte data.
1492 Returns a str or buffer of raw byte data.
1493
1493
1494 Raises if the requested number of bytes could not be read.
1494 Raises if the requested number of bytes could not be read.
1495 """
1495 """
1496 # Cache data both forward and backward around the requested
1496 # Cache data both forward and backward around the requested
1497 # data, in a fixed size window. This helps speed up operations
1497 # data, in a fixed size window. This helps speed up operations
1498 # involving reading the revlog backwards.
1498 # involving reading the revlog backwards.
1499 cachesize = self._chunkcachesize
1499 cachesize = self._chunkcachesize
1500 realoffset = offset & ~(cachesize - 1)
1500 realoffset = offset & ~(cachesize - 1)
1501 reallength = (
1501 reallength = (
1502 (offset + length + cachesize) & ~(cachesize - 1)
1502 (offset + length + cachesize) & ~(cachesize - 1)
1503 ) - realoffset
1503 ) - realoffset
1504 with self._datareadfp(df) as df:
1504 with self._datareadfp(df) as df:
1505 df.seek(realoffset)
1505 df.seek(realoffset)
1506 d = df.read(reallength)
1506 d = df.read(reallength)
1507
1507
1508 self._cachesegment(realoffset, d)
1508 self._cachesegment(realoffset, d)
1509 if offset != realoffset or reallength != length:
1509 if offset != realoffset or reallength != length:
1510 startoffset = offset - realoffset
1510 startoffset = offset - realoffset
1511 if len(d) - startoffset < length:
1511 if len(d) - startoffset < length:
1512 raise error.RevlogError(
1512 raise error.RevlogError(
1513 _(
1513 _(
1514 b'partial read of revlog %s; expected %d bytes from '
1514 b'partial read of revlog %s; expected %d bytes from '
1515 b'offset %d, got %d'
1515 b'offset %d, got %d'
1516 )
1516 )
1517 % (
1517 % (
1518 self.indexfile if self._inline else self.datafile,
1518 self.indexfile if self._inline else self.datafile,
1519 length,
1519 length,
1520 realoffset,
1520 realoffset,
1521 len(d) - startoffset,
1521 len(d) - startoffset,
1522 )
1522 )
1523 )
1523 )
1524
1524
1525 return util.buffer(d, startoffset, length)
1525 return util.buffer(d, startoffset, length)
1526
1526
1527 if len(d) < length:
1527 if len(d) < length:
1528 raise error.RevlogError(
1528 raise error.RevlogError(
1529 _(
1529 _(
1530 b'partial read of revlog %s; expected %d bytes from offset '
1530 b'partial read of revlog %s; expected %d bytes from offset '
1531 b'%d, got %d'
1531 b'%d, got %d'
1532 )
1532 )
1533 % (
1533 % (
1534 self.indexfile if self._inline else self.datafile,
1534 self.indexfile if self._inline else self.datafile,
1535 length,
1535 length,
1536 offset,
1536 offset,
1537 len(d),
1537 len(d),
1538 )
1538 )
1539 )
1539 )
1540
1540
1541 return d
1541 return d
1542
1542
1543 def _getsegment(self, offset, length, df=None):
1543 def _getsegment(self, offset, length, df=None):
1544 """Obtain a segment of raw data from the revlog.
1544 """Obtain a segment of raw data from the revlog.
1545
1545
1546 Accepts an absolute offset, length of bytes to obtain, and an
1546 Accepts an absolute offset, length of bytes to obtain, and an
1547 optional file handle to the already-opened revlog. If the file
1547 optional file handle to the already-opened revlog. If the file
1548 handle is used, it's original seek position will not be preserved.
1548 handle is used, it's original seek position will not be preserved.
1549
1549
1550 Requests for data may be returned from a cache.
1550 Requests for data may be returned from a cache.
1551
1551
1552 Returns a str or a buffer instance of raw byte data.
1552 Returns a str or a buffer instance of raw byte data.
1553 """
1553 """
1554 o, d = self._chunkcache
1554 o, d = self._chunkcache
1555 l = len(d)
1555 l = len(d)
1556
1556
1557 # is it in the cache?
1557 # is it in the cache?
1558 cachestart = offset - o
1558 cachestart = offset - o
1559 cacheend = cachestart + length
1559 cacheend = cachestart + length
1560 if cachestart >= 0 and cacheend <= l:
1560 if cachestart >= 0 and cacheend <= l:
1561 if cachestart == 0 and cacheend == l:
1561 if cachestart == 0 and cacheend == l:
1562 return d # avoid a copy
1562 return d # avoid a copy
1563 return util.buffer(d, cachestart, cacheend - cachestart)
1563 return util.buffer(d, cachestart, cacheend - cachestart)
1564
1564
1565 return self._readsegment(offset, length, df=df)
1565 return self._readsegment(offset, length, df=df)
1566
1566
1567 def _getsegmentforrevs(self, startrev, endrev, df=None):
1567 def _getsegmentforrevs(self, startrev, endrev, df=None):
1568 """Obtain a segment of raw data corresponding to a range of revisions.
1568 """Obtain a segment of raw data corresponding to a range of revisions.
1569
1569
1570 Accepts the start and end revisions and an optional already-open
1570 Accepts the start and end revisions and an optional already-open
1571 file handle to be used for reading. If the file handle is read, its
1571 file handle to be used for reading. If the file handle is read, its
1572 seek position will not be preserved.
1572 seek position will not be preserved.
1573
1573
1574 Requests for data may be satisfied by a cache.
1574 Requests for data may be satisfied by a cache.
1575
1575
1576 Returns a 2-tuple of (offset, data) for the requested range of
1576 Returns a 2-tuple of (offset, data) for the requested range of
1577 revisions. Offset is the integer offset from the beginning of the
1577 revisions. Offset is the integer offset from the beginning of the
1578 revlog and data is a str or buffer of the raw byte data.
1578 revlog and data is a str or buffer of the raw byte data.
1579
1579
1580 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1580 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1581 to determine where each revision's data begins and ends.
1581 to determine where each revision's data begins and ends.
1582 """
1582 """
1583 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1583 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1584 # (functions are expensive).
1584 # (functions are expensive).
1585 index = self.index
1585 index = self.index
1586 istart = index[startrev]
1586 istart = index[startrev]
1587 start = int(istart[0] >> 16)
1587 start = int(istart[0] >> 16)
1588 if startrev == endrev:
1588 if startrev == endrev:
1589 end = start + istart[1]
1589 end = start + istart[1]
1590 else:
1590 else:
1591 iend = index[endrev]
1591 iend = index[endrev]
1592 end = int(iend[0] >> 16) + iend[1]
1592 end = int(iend[0] >> 16) + iend[1]
1593
1593
1594 if self._inline:
1594 if self._inline:
1595 start += (startrev + 1) * self.index.entry_size
1595 start += (startrev + 1) * self.index.entry_size
1596 end += (endrev + 1) * self.index.entry_size
1596 end += (endrev + 1) * self.index.entry_size
1597 length = end - start
1597 length = end - start
1598
1598
1599 return start, self._getsegment(start, length, df=df)
1599 return start, self._getsegment(start, length, df=df)
1600
1600
1601 def _chunk(self, rev, df=None):
1601 def _chunk(self, rev, df=None):
1602 """Obtain a single decompressed chunk for a revision.
1602 """Obtain a single decompressed chunk for a revision.
1603
1603
1604 Accepts an integer revision and an optional already-open file handle
1604 Accepts an integer revision and an optional already-open file handle
1605 to be used for reading. If used, the seek position of the file will not
1605 to be used for reading. If used, the seek position of the file will not
1606 be preserved.
1606 be preserved.
1607
1607
1608 Returns a str holding uncompressed data for the requested revision.
1608 Returns a str holding uncompressed data for the requested revision.
1609 """
1609 """
1610 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1610 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1611
1611
1612 def _chunks(self, revs, df=None, targetsize=None):
1612 def _chunks(self, revs, df=None, targetsize=None):
1613 """Obtain decompressed chunks for the specified revisions.
1613 """Obtain decompressed chunks for the specified revisions.
1614
1614
1615 Accepts an iterable of numeric revisions that are assumed to be in
1615 Accepts an iterable of numeric revisions that are assumed to be in
1616 ascending order. Also accepts an optional already-open file handle
1616 ascending order. Also accepts an optional already-open file handle
1617 to be used for reading. If used, the seek position of the file will
1617 to be used for reading. If used, the seek position of the file will
1618 not be preserved.
1618 not be preserved.
1619
1619
1620 This function is similar to calling ``self._chunk()`` multiple times,
1620 This function is similar to calling ``self._chunk()`` multiple times,
1621 but is faster.
1621 but is faster.
1622
1622
1623 Returns a list with decompressed data for each requested revision.
1623 Returns a list with decompressed data for each requested revision.
1624 """
1624 """
1625 if not revs:
1625 if not revs:
1626 return []
1626 return []
1627 start = self.start
1627 start = self.start
1628 length = self.length
1628 length = self.length
1629 inline = self._inline
1629 inline = self._inline
1630 iosize = self.index.entry_size
1630 iosize = self.index.entry_size
1631 buffer = util.buffer
1631 buffer = util.buffer
1632
1632
1633 l = []
1633 l = []
1634 ladd = l.append
1634 ladd = l.append
1635
1635
1636 if not self._withsparseread:
1636 if not self._withsparseread:
1637 slicedchunks = (revs,)
1637 slicedchunks = (revs,)
1638 else:
1638 else:
1639 slicedchunks = deltautil.slicechunk(
1639 slicedchunks = deltautil.slicechunk(
1640 self, revs, targetsize=targetsize
1640 self, revs, targetsize=targetsize
1641 )
1641 )
1642
1642
1643 for revschunk in slicedchunks:
1643 for revschunk in slicedchunks:
1644 firstrev = revschunk[0]
1644 firstrev = revschunk[0]
1645 # Skip trailing revisions with empty diff
1645 # Skip trailing revisions with empty diff
1646 for lastrev in revschunk[::-1]:
1646 for lastrev in revschunk[::-1]:
1647 if length(lastrev) != 0:
1647 if length(lastrev) != 0:
1648 break
1648 break
1649
1649
1650 try:
1650 try:
1651 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1651 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1652 except OverflowError:
1652 except OverflowError:
1653 # issue4215 - we can't cache a run of chunks greater than
1653 # issue4215 - we can't cache a run of chunks greater than
1654 # 2G on Windows
1654 # 2G on Windows
1655 return [self._chunk(rev, df=df) for rev in revschunk]
1655 return [self._chunk(rev, df=df) for rev in revschunk]
1656
1656
1657 decomp = self.decompress
1657 decomp = self.decompress
1658 for rev in revschunk:
1658 for rev in revschunk:
1659 chunkstart = start(rev)
1659 chunkstart = start(rev)
1660 if inline:
1660 if inline:
1661 chunkstart += (rev + 1) * iosize
1661 chunkstart += (rev + 1) * iosize
1662 chunklength = length(rev)
1662 chunklength = length(rev)
1663 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1663 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1664
1664
1665 return l
1665 return l
1666
1666
1667 def _chunkclear(self):
1667 def _chunkclear(self):
1668 """Clear the raw chunk cache."""
1668 """Clear the raw chunk cache."""
1669 self._chunkcache = (0, b'')
1669 self._chunkcache = (0, b'')
1670
1670
1671 def deltaparent(self, rev):
1671 def deltaparent(self, rev):
1672 """return deltaparent of the given revision"""
1672 """return deltaparent of the given revision"""
1673 base = self.index[rev][3]
1673 base = self.index[rev][3]
1674 if base == rev:
1674 if base == rev:
1675 return nullrev
1675 return nullrev
1676 elif self._generaldelta:
1676 elif self._generaldelta:
1677 return base
1677 return base
1678 else:
1678 else:
1679 return rev - 1
1679 return rev - 1
1680
1680
1681 def issnapshot(self, rev):
1681 def issnapshot(self, rev):
1682 """tells whether rev is a snapshot"""
1682 """tells whether rev is a snapshot"""
1683 if not self._sparserevlog:
1683 if not self._sparserevlog:
1684 return self.deltaparent(rev) == nullrev
1684 return self.deltaparent(rev) == nullrev
1685 elif util.safehasattr(self.index, b'issnapshot'):
1685 elif util.safehasattr(self.index, b'issnapshot'):
1686 # directly assign the method to cache the testing and access
1686 # directly assign the method to cache the testing and access
1687 self.issnapshot = self.index.issnapshot
1687 self.issnapshot = self.index.issnapshot
1688 return self.issnapshot(rev)
1688 return self.issnapshot(rev)
1689 if rev == nullrev:
1689 if rev == nullrev:
1690 return True
1690 return True
1691 entry = self.index[rev]
1691 entry = self.index[rev]
1692 base = entry[3]
1692 base = entry[3]
1693 if base == rev:
1693 if base == rev:
1694 return True
1694 return True
1695 if base == nullrev:
1695 if base == nullrev:
1696 return True
1696 return True
1697 p1 = entry[5]
1697 p1 = entry[5]
1698 p2 = entry[6]
1698 p2 = entry[6]
1699 if base == p1 or base == p2:
1699 if base == p1 or base == p2:
1700 return False
1700 return False
1701 return self.issnapshot(base)
1701 return self.issnapshot(base)
1702
1702
1703 def snapshotdepth(self, rev):
1703 def snapshotdepth(self, rev):
1704 """number of snapshot in the chain before this one"""
1704 """number of snapshot in the chain before this one"""
1705 if not self.issnapshot(rev):
1705 if not self.issnapshot(rev):
1706 raise error.ProgrammingError(b'revision %d not a snapshot')
1706 raise error.ProgrammingError(b'revision %d not a snapshot')
1707 return len(self._deltachain(rev)[0]) - 1
1707 return len(self._deltachain(rev)[0]) - 1
1708
1708
1709 def revdiff(self, rev1, rev2):
1709 def revdiff(self, rev1, rev2):
1710 """return or calculate a delta between two revisions
1710 """return or calculate a delta between two revisions
1711
1711
1712 The delta calculated is in binary form and is intended to be written to
1712 The delta calculated is in binary form and is intended to be written to
1713 revlog data directly. So this function needs raw revision data.
1713 revlog data directly. So this function needs raw revision data.
1714 """
1714 """
1715 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1715 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1716 return bytes(self._chunk(rev2))
1716 return bytes(self._chunk(rev2))
1717
1717
1718 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1718 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1719
1719
1720 def _processflags(self, text, flags, operation, raw=False):
1720 def _processflags(self, text, flags, operation, raw=False):
1721 """deprecated entry point to access flag processors"""
1721 """deprecated entry point to access flag processors"""
1722 msg = b'_processflag(...) use the specialized variant'
1722 msg = b'_processflag(...) use the specialized variant'
1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1723 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1724 if raw:
1724 if raw:
1725 return text, flagutil.processflagsraw(self, text, flags)
1725 return text, flagutil.processflagsraw(self, text, flags)
1726 elif operation == b'read':
1726 elif operation == b'read':
1727 return flagutil.processflagsread(self, text, flags)
1727 return flagutil.processflagsread(self, text, flags)
1728 else: # write operation
1728 else: # write operation
1729 return flagutil.processflagswrite(self, text, flags)
1729 return flagutil.processflagswrite(self, text, flags)
1730
1730
1731 def revision(self, nodeorrev, _df=None, raw=False):
1731 def revision(self, nodeorrev, _df=None, raw=False):
1732 """return an uncompressed revision of a given node or revision
1732 """return an uncompressed revision of a given node or revision
1733 number.
1733 number.
1734
1734
1735 _df - an existing file handle to read from. (internal-only)
1735 _df - an existing file handle to read from. (internal-only)
1736 raw - an optional argument specifying if the revision data is to be
1736 raw - an optional argument specifying if the revision data is to be
1737 treated as raw data when applying flag transforms. 'raw' should be set
1737 treated as raw data when applying flag transforms. 'raw' should be set
1738 to True when generating changegroups or in debug commands.
1738 to True when generating changegroups or in debug commands.
1739 """
1739 """
1740 if raw:
1740 if raw:
1741 msg = (
1741 msg = (
1742 b'revlog.revision(..., raw=True) is deprecated, '
1742 b'revlog.revision(..., raw=True) is deprecated, '
1743 b'use revlog.rawdata(...)'
1743 b'use revlog.rawdata(...)'
1744 )
1744 )
1745 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1745 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1746 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1746 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1747
1747
1748 def sidedata(self, nodeorrev, _df=None):
1748 def sidedata(self, nodeorrev, _df=None):
1749 """a map of extra data related to the changeset but not part of the hash
1749 """a map of extra data related to the changeset but not part of the hash
1750
1750
1751 This function currently return a dictionary. However, more advanced
1751 This function currently return a dictionary. However, more advanced
1752 mapping object will likely be used in the future for a more
1752 mapping object will likely be used in the future for a more
1753 efficient/lazy code.
1753 efficient/lazy code.
1754 """
1754 """
1755 return self._revisiondata(nodeorrev, _df)[1]
1755 return self._revisiondata(nodeorrev, _df)[1]
1756
1756
1757 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1757 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1758 # deal with <nodeorrev> argument type
1758 # deal with <nodeorrev> argument type
1759 if isinstance(nodeorrev, int):
1759 if isinstance(nodeorrev, int):
1760 rev = nodeorrev
1760 rev = nodeorrev
1761 node = self.node(rev)
1761 node = self.node(rev)
1762 else:
1762 else:
1763 node = nodeorrev
1763 node = nodeorrev
1764 rev = None
1764 rev = None
1765
1765
1766 # fast path the special `nullid` rev
1766 # fast path the special `nullid` rev
1767 if node == self.nullid:
1767 if node == self.nullid:
1768 return b"", {}
1768 return b"", {}
1769
1769
1770 # ``rawtext`` is the text as stored inside the revlog. Might be the
1770 # ``rawtext`` is the text as stored inside the revlog. Might be the
1771 # revision or might need to be processed to retrieve the revision.
1771 # revision or might need to be processed to retrieve the revision.
1772 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1772 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1773
1773
1774 if self.version & 0xFFFF == REVLOGV2:
1774 if self.version & 0xFFFF == REVLOGV2:
1775 if rev is None:
1775 if rev is None:
1776 rev = self.rev(node)
1776 rev = self.rev(node)
1777 sidedata = self._sidedata(rev)
1777 sidedata = self._sidedata(rev)
1778 else:
1778 else:
1779 sidedata = {}
1779 sidedata = {}
1780
1780
1781 if raw and validated:
1781 if raw and validated:
1782 # if we don't want to process the raw text and that raw
1782 # if we don't want to process the raw text and that raw
1783 # text is cached, we can exit early.
1783 # text is cached, we can exit early.
1784 return rawtext, sidedata
1784 return rawtext, sidedata
1785 if rev is None:
1785 if rev is None:
1786 rev = self.rev(node)
1786 rev = self.rev(node)
1787 # the revlog's flag for this revision
1787 # the revlog's flag for this revision
1788 # (usually alter its state or content)
1788 # (usually alter its state or content)
1789 flags = self.flags(rev)
1789 flags = self.flags(rev)
1790
1790
1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1791 if validated and flags == REVIDX_DEFAULT_FLAGS:
1792 # no extra flags set, no flag processor runs, text = rawtext
1792 # no extra flags set, no flag processor runs, text = rawtext
1793 return rawtext, sidedata
1793 return rawtext, sidedata
1794
1794
1795 if raw:
1795 if raw:
1796 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1796 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1797 text = rawtext
1797 text = rawtext
1798 else:
1798 else:
1799 r = flagutil.processflagsread(self, rawtext, flags)
1799 r = flagutil.processflagsread(self, rawtext, flags)
1800 text, validatehash = r
1800 text, validatehash = r
1801 if validatehash:
1801 if validatehash:
1802 self.checkhash(text, node, rev=rev)
1802 self.checkhash(text, node, rev=rev)
1803 if not validated:
1803 if not validated:
1804 self._revisioncache = (node, rev, rawtext)
1804 self._revisioncache = (node, rev, rawtext)
1805
1805
1806 return text, sidedata
1806 return text, sidedata
1807
1807
1808 def _rawtext(self, node, rev, _df=None):
1808 def _rawtext(self, node, rev, _df=None):
1809 """return the possibly unvalidated rawtext for a revision
1809 """return the possibly unvalidated rawtext for a revision
1810
1810
1811 returns (rev, rawtext, validated)
1811 returns (rev, rawtext, validated)
1812 """
1812 """
1813
1813
1814 # revision in the cache (could be useful to apply delta)
1814 # revision in the cache (could be useful to apply delta)
1815 cachedrev = None
1815 cachedrev = None
1816 # An intermediate text to apply deltas to
1816 # An intermediate text to apply deltas to
1817 basetext = None
1817 basetext = None
1818
1818
1819 # Check if we have the entry in cache
1819 # Check if we have the entry in cache
1820 # The cache entry looks like (node, rev, rawtext)
1820 # The cache entry looks like (node, rev, rawtext)
1821 if self._revisioncache:
1821 if self._revisioncache:
1822 if self._revisioncache[0] == node:
1822 if self._revisioncache[0] == node:
1823 return (rev, self._revisioncache[2], True)
1823 return (rev, self._revisioncache[2], True)
1824 cachedrev = self._revisioncache[1]
1824 cachedrev = self._revisioncache[1]
1825
1825
1826 if rev is None:
1826 if rev is None:
1827 rev = self.rev(node)
1827 rev = self.rev(node)
1828
1828
1829 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1829 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1830 if stopped:
1830 if stopped:
1831 basetext = self._revisioncache[2]
1831 basetext = self._revisioncache[2]
1832
1832
1833 # drop cache to save memory, the caller is expected to
1833 # drop cache to save memory, the caller is expected to
1834 # update self._revisioncache after validating the text
1834 # update self._revisioncache after validating the text
1835 self._revisioncache = None
1835 self._revisioncache = None
1836
1836
1837 targetsize = None
1837 targetsize = None
1838 rawsize = self.index[rev][2]
1838 rawsize = self.index[rev][2]
1839 if 0 <= rawsize:
1839 if 0 <= rawsize:
1840 targetsize = 4 * rawsize
1840 targetsize = 4 * rawsize
1841
1841
1842 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1842 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1843 if basetext is None:
1843 if basetext is None:
1844 basetext = bytes(bins[0])
1844 basetext = bytes(bins[0])
1845 bins = bins[1:]
1845 bins = bins[1:]
1846
1846
1847 rawtext = mdiff.patches(basetext, bins)
1847 rawtext = mdiff.patches(basetext, bins)
1848 del basetext # let us have a chance to free memory early
1848 del basetext # let us have a chance to free memory early
1849 return (rev, rawtext, False)
1849 return (rev, rawtext, False)
1850
1850
1851 def _sidedata(self, rev):
1851 def _sidedata(self, rev):
1852 """Return the sidedata for a given revision number."""
1852 """Return the sidedata for a given revision number."""
1853 index_entry = self.index[rev]
1853 index_entry = self.index[rev]
1854 sidedata_offset = index_entry[8]
1854 sidedata_offset = index_entry[8]
1855 sidedata_size = index_entry[9]
1855 sidedata_size = index_entry[9]
1856
1856
1857 if self._inline:
1857 if self._inline:
1858 sidedata_offset += self.index.entry_size * (1 + rev)
1858 sidedata_offset += self.index.entry_size * (1 + rev)
1859 if sidedata_size == 0:
1859 if sidedata_size == 0:
1860 return {}
1860 return {}
1861
1861
1862 segment = self._getsegment(sidedata_offset, sidedata_size)
1862 segment = self._getsegment(sidedata_offset, sidedata_size)
1863 sidedata = sidedatautil.deserialize_sidedata(segment)
1863 sidedata = sidedatautil.deserialize_sidedata(segment)
1864 return sidedata
1864 return sidedata
1865
1865
1866 def rawdata(self, nodeorrev, _df=None):
1866 def rawdata(self, nodeorrev, _df=None):
1867 """return an uncompressed raw data of a given node or revision number.
1867 """return an uncompressed raw data of a given node or revision number.
1868
1868
1869 _df - an existing file handle to read from. (internal-only)
1869 _df - an existing file handle to read from. (internal-only)
1870 """
1870 """
1871 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1871 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1872
1872
1873 def hash(self, text, p1, p2):
1873 def hash(self, text, p1, p2):
1874 """Compute a node hash.
1874 """Compute a node hash.
1875
1875
1876 Available as a function so that subclasses can replace the hash
1876 Available as a function so that subclasses can replace the hash
1877 as needed.
1877 as needed.
1878 """
1878 """
1879 return storageutil.hashrevisionsha1(text, p1, p2)
1879 return storageutil.hashrevisionsha1(text, p1, p2)
1880
1880
1881 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1881 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1882 """Check node hash integrity.
1882 """Check node hash integrity.
1883
1883
1884 Available as a function so that subclasses can extend hash mismatch
1884 Available as a function so that subclasses can extend hash mismatch
1885 behaviors as needed.
1885 behaviors as needed.
1886 """
1886 """
1887 try:
1887 try:
1888 if p1 is None and p2 is None:
1888 if p1 is None and p2 is None:
1889 p1, p2 = self.parents(node)
1889 p1, p2 = self.parents(node)
1890 if node != self.hash(text, p1, p2):
1890 if node != self.hash(text, p1, p2):
1891 # Clear the revision cache on hash failure. The revision cache
1891 # Clear the revision cache on hash failure. The revision cache
1892 # only stores the raw revision and clearing the cache does have
1892 # only stores the raw revision and clearing the cache does have
1893 # the side-effect that we won't have a cache hit when the raw
1893 # the side-effect that we won't have a cache hit when the raw
1894 # revision data is accessed. But this case should be rare and
1894 # revision data is accessed. But this case should be rare and
1895 # it is extra work to teach the cache about the hash
1895 # it is extra work to teach the cache about the hash
1896 # verification state.
1896 # verification state.
1897 if self._revisioncache and self._revisioncache[0] == node:
1897 if self._revisioncache and self._revisioncache[0] == node:
1898 self._revisioncache = None
1898 self._revisioncache = None
1899
1899
1900 revornode = rev
1900 revornode = rev
1901 if revornode is None:
1901 if revornode is None:
1902 revornode = templatefilters.short(hex(node))
1902 revornode = templatefilters.short(hex(node))
1903 raise error.RevlogError(
1903 raise error.RevlogError(
1904 _(b"integrity check failed on %s:%s")
1904 _(b"integrity check failed on %s:%s")
1905 % (self.indexfile, pycompat.bytestr(revornode))
1905 % (self.indexfile, pycompat.bytestr(revornode))
1906 )
1906 )
1907 except error.RevlogError:
1907 except error.RevlogError:
1908 if self._censorable and storageutil.iscensoredtext(text):
1908 if self._censorable and storageutil.iscensoredtext(text):
1909 raise error.CensoredNodeError(self.indexfile, node, text)
1909 raise error.CensoredNodeError(self.indexfile, node, text)
1910 raise
1910 raise
1911
1911
1912 def _enforceinlinesize(self, tr, fp=None):
1912 def _enforceinlinesize(self, tr, fp=None):
1913 """Check if the revlog is too big for inline and convert if so.
1913 """Check if the revlog is too big for inline and convert if so.
1914
1914
1915 This should be called after revisions are added to the revlog. If the
1915 This should be called after revisions are added to the revlog. If the
1916 revlog has grown too large to be an inline revlog, it will convert it
1916 revlog has grown too large to be an inline revlog, it will convert it
1917 to use multiple index and data files.
1917 to use multiple index and data files.
1918 """
1918 """
1919 tiprev = len(self) - 1
1919 tiprev = len(self) - 1
1920 if (
1920 if (
1921 not self._inline
1921 not self._inline
1922 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1922 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1923 ):
1923 ):
1924 return
1924 return
1925
1925
1926 troffset = tr.findoffset(self.indexfile)
1926 troffset = tr.findoffset(self.indexfile)
1927 if troffset is None:
1927 if troffset is None:
1928 raise error.RevlogError(
1928 raise error.RevlogError(
1929 _(b"%s not found in the transaction") % self.indexfile
1929 _(b"%s not found in the transaction") % self.indexfile
1930 )
1930 )
1931 trindex = 0
1931 trindex = 0
1932 tr.add(self.datafile, 0)
1932 tr.add(self.datafile, 0)
1933
1933
1934 if fp:
1934 if fp:
1935 fp.flush()
1935 fp.flush()
1936 fp.close()
1936 fp.close()
1937 # We can't use the cached file handle after close(). So prevent
1937 # We can't use the cached file handle after close(). So prevent
1938 # its usage.
1938 # its usage.
1939 self._writinghandles = None
1939 self._writinghandles = None
1940
1940
1941 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1941 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1942 for r in self:
1942 for r in self:
1943 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1943 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1944 if troffset <= self.start(r):
1944 if troffset <= self.start(r):
1945 trindex = r
1945 trindex = r
1946
1946
1947 with self._indexfp(b'w') as fp:
1947 with self._indexfp(b'w') as fp:
1948 self.version &= ~FLAG_INLINE_DATA
1948 self.version &= ~FLAG_INLINE_DATA
1949 self._inline = False
1949 self._inline = False
1950 for i in self:
1950 for i in self:
1951 e = self.index.entry_binary(i)
1951 e = self.index.entry_binary(i)
1952 if i == 0:
1952 if i == 0:
1953 header = self.index.pack_header(self.version)
1953 header = self.index.pack_header(self.version)
1954 e = header + e
1954 e = header + e
1955 fp.write(e)
1955 fp.write(e)
1956
1956
1957 # the temp file replace the real index when we exit the context
1957 # the temp file replace the real index when we exit the context
1958 # manager
1958 # manager
1959
1959
1960 tr.replace(self.indexfile, trindex * self.index.entry_size)
1960 tr.replace(self.indexfile, trindex * self.index.entry_size)
1961 nodemaputil.setup_persistent_nodemap(tr, self)
1961 nodemaputil.setup_persistent_nodemap(tr, self)
1962 self._chunkclear()
1962 self._chunkclear()
1963
1963
1964 def _nodeduplicatecallback(self, transaction, node):
1964 def _nodeduplicatecallback(self, transaction, node):
1965 """called when trying to add a node already stored."""
1965 """called when trying to add a node already stored."""
1966
1966
1967 def addrevision(
1967 def addrevision(
1968 self,
1968 self,
1969 text,
1969 text,
1970 transaction,
1970 transaction,
1971 link,
1971 link,
1972 p1,
1972 p1,
1973 p2,
1973 p2,
1974 cachedelta=None,
1974 cachedelta=None,
1975 node=None,
1975 node=None,
1976 flags=REVIDX_DEFAULT_FLAGS,
1976 flags=REVIDX_DEFAULT_FLAGS,
1977 deltacomputer=None,
1977 deltacomputer=None,
1978 sidedata=None,
1978 sidedata=None,
1979 ):
1979 ):
1980 """add a revision to the log
1980 """add a revision to the log
1981
1981
1982 text - the revision data to add
1982 text - the revision data to add
1983 transaction - the transaction object used for rollback
1983 transaction - the transaction object used for rollback
1984 link - the linkrev data to add
1984 link - the linkrev data to add
1985 p1, p2 - the parent nodeids of the revision
1985 p1, p2 - the parent nodeids of the revision
1986 cachedelta - an optional precomputed delta
1986 cachedelta - an optional precomputed delta
1987 node - nodeid of revision; typically node is not specified, and it is
1987 node - nodeid of revision; typically node is not specified, and it is
1988 computed by default as hash(text, p1, p2), however subclasses might
1988 computed by default as hash(text, p1, p2), however subclasses might
1989 use different hashing method (and override checkhash() in such case)
1989 use different hashing method (and override checkhash() in such case)
1990 flags - the known flags to set on the revision
1990 flags - the known flags to set on the revision
1991 deltacomputer - an optional deltacomputer instance shared between
1991 deltacomputer - an optional deltacomputer instance shared between
1992 multiple calls
1992 multiple calls
1993 """
1993 """
1994 if link == nullrev:
1994 if link == nullrev:
1995 raise error.RevlogError(
1995 raise error.RevlogError(
1996 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1996 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1997 )
1997 )
1998
1998
1999 if sidedata is None:
1999 if sidedata is None:
2000 sidedata = {}
2000 sidedata = {}
2001 elif sidedata and not self.hassidedata:
2001 elif sidedata and not self.hassidedata:
2002 raise error.ProgrammingError(
2002 raise error.ProgrammingError(
2003 _(b"trying to add sidedata to a revlog who don't support them")
2003 _(b"trying to add sidedata to a revlog who don't support them")
2004 )
2004 )
2005
2005
2006 if flags:
2006 if flags:
2007 node = node or self.hash(text, p1, p2)
2007 node = node or self.hash(text, p1, p2)
2008
2008
2009 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2009 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2010
2010
2011 # If the flag processor modifies the revision data, ignore any provided
2011 # If the flag processor modifies the revision data, ignore any provided
2012 # cachedelta.
2012 # cachedelta.
2013 if rawtext != text:
2013 if rawtext != text:
2014 cachedelta = None
2014 cachedelta = None
2015
2015
2016 if len(rawtext) > _maxentrysize:
2016 if len(rawtext) > _maxentrysize:
2017 raise error.RevlogError(
2017 raise error.RevlogError(
2018 _(
2018 _(
2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2019 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2020 )
2020 )
2021 % (self.indexfile, len(rawtext))
2021 % (self.indexfile, len(rawtext))
2022 )
2022 )
2023
2023
2024 node = node or self.hash(rawtext, p1, p2)
2024 node = node or self.hash(rawtext, p1, p2)
2025 rev = self.index.get_rev(node)
2025 rev = self.index.get_rev(node)
2026 if rev is not None:
2026 if rev is not None:
2027 return rev
2027 return rev
2028
2028
2029 if validatehash:
2029 if validatehash:
2030 self.checkhash(rawtext, node, p1=p1, p2=p2)
2030 self.checkhash(rawtext, node, p1=p1, p2=p2)
2031
2031
2032 return self.addrawrevision(
2032 return self.addrawrevision(
2033 rawtext,
2033 rawtext,
2034 transaction,
2034 transaction,
2035 link,
2035 link,
2036 p1,
2036 p1,
2037 p2,
2037 p2,
2038 node,
2038 node,
2039 flags,
2039 flags,
2040 cachedelta=cachedelta,
2040 cachedelta=cachedelta,
2041 deltacomputer=deltacomputer,
2041 deltacomputer=deltacomputer,
2042 sidedata=sidedata,
2042 sidedata=sidedata,
2043 )
2043 )
2044
2044
2045 def addrawrevision(
2045 def addrawrevision(
2046 self,
2046 self,
2047 rawtext,
2047 rawtext,
2048 transaction,
2048 transaction,
2049 link,
2049 link,
2050 p1,
2050 p1,
2051 p2,
2051 p2,
2052 node,
2052 node,
2053 flags,
2053 flags,
2054 cachedelta=None,
2054 cachedelta=None,
2055 deltacomputer=None,
2055 deltacomputer=None,
2056 sidedata=None,
2056 sidedata=None,
2057 ):
2057 ):
2058 """add a raw revision with known flags, node and parents
2058 """add a raw revision with known flags, node and parents
2059 useful when reusing a revision not stored in this revlog (ex: received
2059 useful when reusing a revision not stored in this revlog (ex: received
2060 over wire, or read from an external bundle).
2060 over wire, or read from an external bundle).
2061 """
2061 """
2062 dfh = None
2062 dfh = None
2063 if not self._inline:
2063 if not self._inline:
2064 dfh = self._datafp(b"a+")
2064 dfh = self._datafp(b"a+")
2065 ifh = self._indexfp(b"a+")
2065 ifh = self._indexfp(b"a+")
2066 try:
2066 try:
2067 return self._addrevision(
2067 return self._addrevision(
2068 node,
2068 node,
2069 rawtext,
2069 rawtext,
2070 transaction,
2070 transaction,
2071 link,
2071 link,
2072 p1,
2072 p1,
2073 p2,
2073 p2,
2074 flags,
2074 flags,
2075 cachedelta,
2075 cachedelta,
2076 ifh,
2076 ifh,
2077 dfh,
2077 dfh,
2078 deltacomputer=deltacomputer,
2078 deltacomputer=deltacomputer,
2079 sidedata=sidedata,
2079 sidedata=sidedata,
2080 )
2080 )
2081 finally:
2081 finally:
2082 if dfh:
2082 if dfh:
2083 dfh.close()
2083 dfh.close()
2084 ifh.close()
2084 ifh.close()
2085
2085
2086 def compress(self, data):
2086 def compress(self, data):
2087 """Generate a possibly-compressed representation of data."""
2087 """Generate a possibly-compressed representation of data."""
2088 if not data:
2088 if not data:
2089 return b'', data
2089 return b'', data
2090
2090
2091 compressed = self._compressor.compress(data)
2091 compressed = self._compressor.compress(data)
2092
2092
2093 if compressed:
2093 if compressed:
2094 # The revlog compressor added the header in the returned data.
2094 # The revlog compressor added the header in the returned data.
2095 return b'', compressed
2095 return b'', compressed
2096
2096
2097 if data[0:1] == b'\0':
2097 if data[0:1] == b'\0':
2098 return b'', data
2098 return b'', data
2099 return b'u', data
2099 return b'u', data
2100
2100
2101 def decompress(self, data):
2101 def decompress(self, data):
2102 """Decompress a revlog chunk.
2102 """Decompress a revlog chunk.
2103
2103
2104 The chunk is expected to begin with a header identifying the
2104 The chunk is expected to begin with a header identifying the
2105 format type so it can be routed to an appropriate decompressor.
2105 format type so it can be routed to an appropriate decompressor.
2106 """
2106 """
2107 if not data:
2107 if not data:
2108 return data
2108 return data
2109
2109
2110 # Revlogs are read much more frequently than they are written and many
2110 # Revlogs are read much more frequently than they are written and many
2111 # chunks only take microseconds to decompress, so performance is
2111 # chunks only take microseconds to decompress, so performance is
2112 # important here.
2112 # important here.
2113 #
2113 #
2114 # We can make a few assumptions about revlogs:
2114 # We can make a few assumptions about revlogs:
2115 #
2115 #
2116 # 1) the majority of chunks will be compressed (as opposed to inline
2116 # 1) the majority of chunks will be compressed (as opposed to inline
2117 # raw data).
2117 # raw data).
2118 # 2) decompressing *any* data will likely by at least 10x slower than
2118 # 2) decompressing *any* data will likely by at least 10x slower than
2119 # returning raw inline data.
2119 # returning raw inline data.
2120 # 3) we want to prioritize common and officially supported compression
2120 # 3) we want to prioritize common and officially supported compression
2121 # engines
2121 # engines
2122 #
2122 #
2123 # It follows that we want to optimize for "decompress compressed data
2123 # It follows that we want to optimize for "decompress compressed data
2124 # when encoded with common and officially supported compression engines"
2124 # when encoded with common and officially supported compression engines"
2125 # case over "raw data" and "data encoded by less common or non-official
2125 # case over "raw data" and "data encoded by less common or non-official
2126 # compression engines." That is why we have the inline lookup first
2126 # compression engines." That is why we have the inline lookup first
2127 # followed by the compengines lookup.
2127 # followed by the compengines lookup.
2128 #
2128 #
2129 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2129 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2130 # compressed chunks. And this matters for changelog and manifest reads.
2130 # compressed chunks. And this matters for changelog and manifest reads.
2131 t = data[0:1]
2131 t = data[0:1]
2132
2132
2133 if t == b'x':
2133 if t == b'x':
2134 try:
2134 try:
2135 return _zlibdecompress(data)
2135 return _zlibdecompress(data)
2136 except zlib.error as e:
2136 except zlib.error as e:
2137 raise error.RevlogError(
2137 raise error.RevlogError(
2138 _(b'revlog decompress error: %s')
2138 _(b'revlog decompress error: %s')
2139 % stringutil.forcebytestr(e)
2139 % stringutil.forcebytestr(e)
2140 )
2140 )
2141 # '\0' is more common than 'u' so it goes first.
2141 # '\0' is more common than 'u' so it goes first.
2142 elif t == b'\0':
2142 elif t == b'\0':
2143 return data
2143 return data
2144 elif t == b'u':
2144 elif t == b'u':
2145 return util.buffer(data, 1)
2145 return util.buffer(data, 1)
2146
2146
2147 try:
2147 try:
2148 compressor = self._decompressors[t]
2148 compressor = self._decompressors[t]
2149 except KeyError:
2149 except KeyError:
2150 try:
2150 try:
2151 engine = util.compengines.forrevlogheader(t)
2151 engine = util.compengines.forrevlogheader(t)
2152 compressor = engine.revlogcompressor(self._compengineopts)
2152 compressor = engine.revlogcompressor(self._compengineopts)
2153 self._decompressors[t] = compressor
2153 self._decompressors[t] = compressor
2154 except KeyError:
2154 except KeyError:
2155 raise error.RevlogError(
2155 raise error.RevlogError(
2156 _(b'unknown compression type %s') % binascii.hexlify(t)
2156 _(b'unknown compression type %s') % binascii.hexlify(t)
2157 )
2157 )
2158
2158
2159 return compressor.decompress(data)
2159 return compressor.decompress(data)
2160
2160
2161 def _addrevision(
2161 def _addrevision(
2162 self,
2162 self,
2163 node,
2163 node,
2164 rawtext,
2164 rawtext,
2165 transaction,
2165 transaction,
2166 link,
2166 link,
2167 p1,
2167 p1,
2168 p2,
2168 p2,
2169 flags,
2169 flags,
2170 cachedelta,
2170 cachedelta,
2171 ifh,
2171 ifh,
2172 dfh,
2172 dfh,
2173 alwayscache=False,
2173 alwayscache=False,
2174 deltacomputer=None,
2174 deltacomputer=None,
2175 sidedata=None,
2175 sidedata=None,
2176 ):
2176 ):
2177 """internal function to add revisions to the log
2177 """internal function to add revisions to the log
2178
2178
2179 see addrevision for argument descriptions.
2179 see addrevision for argument descriptions.
2180
2180
2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2182
2182
2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2184 be used.
2184 be used.
2185
2185
2186 invariants:
2186 invariants:
2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2188 if both are set, they must correspond to each other.
2188 if both are set, they must correspond to each other.
2189 """
2189 """
2190 if node == self.nullid:
2190 if node == self.nullid:
2191 raise error.RevlogError(
2191 raise error.RevlogError(
2192 _(b"%s: attempt to add null revision") % self.indexfile
2192 _(b"%s: attempt to add null revision") % self.indexfile
2193 )
2193 )
2194 if (
2194 if (
2195 node == self.nodeconstants.wdirid
2195 node == self.nodeconstants.wdirid
2196 or node in self.nodeconstants.wdirfilenodeids
2196 or node in self.nodeconstants.wdirfilenodeids
2197 ):
2197 ):
2198 raise error.RevlogError(
2198 raise error.RevlogError(
2199 _(b"%s: attempt to add wdir revision") % self.indexfile
2199 _(b"%s: attempt to add wdir revision") % self.indexfile
2200 )
2200 )
2201
2201
2202 if self._inline:
2202 if self._inline:
2203 fh = ifh
2203 fh = ifh
2204 else:
2204 else:
2205 fh = dfh
2205 fh = dfh
2206
2206
2207 btext = [rawtext]
2207 btext = [rawtext]
2208
2208
2209 curr = len(self)
2209 curr = len(self)
2210 prev = curr - 1
2210 prev = curr - 1
2211
2211
2212 offset = self._get_data_offset(prev)
2212 offset = self._get_data_offset(prev)
2213
2213
2214 if self._concurrencychecker:
2214 if self._concurrencychecker:
2215 if self._inline:
2215 if self._inline:
2216 # offset is "as if" it were in the .d file, so we need to add on
2216 # offset is "as if" it were in the .d file, so we need to add on
2217 # the size of the entry metadata.
2217 # the size of the entry metadata.
2218 self._concurrencychecker(
2218 self._concurrencychecker(
2219 ifh, self.indexfile, offset + curr * self.index.entry_size
2219 ifh, self.indexfile, offset + curr * self.index.entry_size
2220 )
2220 )
2221 else:
2221 else:
2222 # Entries in the .i are a consistent size.
2222 # Entries in the .i are a consistent size.
2223 self._concurrencychecker(
2223 self._concurrencychecker(
2224 ifh, self.indexfile, curr * self.index.entry_size
2224 ifh, self.indexfile, curr * self.index.entry_size
2225 )
2225 )
2226 self._concurrencychecker(dfh, self.datafile, offset)
2226 self._concurrencychecker(dfh, self.datafile, offset)
2227
2227
2228 p1r, p2r = self.rev(p1), self.rev(p2)
2228 p1r, p2r = self.rev(p1), self.rev(p2)
2229
2229
2230 # full versions are inserted when the needed deltas
2230 # full versions are inserted when the needed deltas
2231 # become comparable to the uncompressed text
2231 # become comparable to the uncompressed text
2232 if rawtext is None:
2232 if rawtext is None:
2233 # need rawtext size, before changed by flag processors, which is
2233 # need rawtext size, before changed by flag processors, which is
2234 # the non-raw size. use revlog explicitly to avoid filelog's extra
2234 # the non-raw size. use revlog explicitly to avoid filelog's extra
2235 # logic that might remove metadata size.
2235 # logic that might remove metadata size.
2236 textlen = mdiff.patchedsize(
2236 textlen = mdiff.patchedsize(
2237 revlog.size(self, cachedelta[0]), cachedelta[1]
2237 revlog.size(self, cachedelta[0]), cachedelta[1]
2238 )
2238 )
2239 else:
2239 else:
2240 textlen = len(rawtext)
2240 textlen = len(rawtext)
2241
2241
2242 if deltacomputer is None:
2242 if deltacomputer is None:
2243 deltacomputer = deltautil.deltacomputer(self)
2243 deltacomputer = deltautil.deltacomputer(self)
2244
2244
2245 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2245 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2246
2246
2247 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2247 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2248
2248
2249 if sidedata and self.version & 0xFFFF == REVLOGV2:
2249 if sidedata and self.version & 0xFFFF == REVLOGV2:
2250 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2250 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2251 sidedata_offset = offset + deltainfo.deltalen
2251 sidedata_offset = offset + deltainfo.deltalen
2252 else:
2252 else:
2253 serialized_sidedata = b""
2253 serialized_sidedata = b""
2254 # Don't store the offset if the sidedata is empty, that way
2254 # Don't store the offset if the sidedata is empty, that way
2255 # we can easily detect empty sidedata and they will be no different
2255 # we can easily detect empty sidedata and they will be no different
2256 # than ones we manually add.
2256 # than ones we manually add.
2257 sidedata_offset = 0
2257 sidedata_offset = 0
2258
2258
2259 e = (
2259 e = (
2260 offset_type(offset, flags),
2260 offset_type(offset, flags),
2261 deltainfo.deltalen,
2261 deltainfo.deltalen,
2262 textlen,
2262 textlen,
2263 deltainfo.base,
2263 deltainfo.base,
2264 link,
2264 link,
2265 p1r,
2265 p1r,
2266 p2r,
2266 p2r,
2267 node,
2267 node,
2268 sidedata_offset,
2268 sidedata_offset,
2269 len(serialized_sidedata),
2269 len(serialized_sidedata),
2270 )
2270 )
2271
2271
2272 if self.version & 0xFFFF != REVLOGV2:
2272 if self.version & 0xFFFF != REVLOGV2:
2273 e = e[:8]
2273 e = e[:8]
2274
2274
2275 self.index.append(e)
2275 self.index.append(e)
2276 entry = self.index.entry_binary(curr)
2276 entry = self.index.entry_binary(curr)
2277 if curr == 0:
2277 if curr == 0:
2278 header = self.index.pack_header(self.version)
2278 header = self.index.pack_header(self.version)
2279 entry = header + entry
2279 entry = header + entry
2280 self._writeentry(
2280 self._writeentry(
2281 transaction,
2281 transaction,
2282 ifh,
2282 ifh,
2283 dfh,
2283 dfh,
2284 entry,
2284 entry,
2285 deltainfo.data,
2285 deltainfo.data,
2286 link,
2286 link,
2287 offset,
2287 offset,
2288 serialized_sidedata,
2288 serialized_sidedata,
2289 )
2289 )
2290
2290
2291 rawtext = btext[0]
2291 rawtext = btext[0]
2292
2292
2293 if alwayscache and rawtext is None:
2293 if alwayscache and rawtext is None:
2294 rawtext = deltacomputer.buildtext(revinfo, fh)
2294 rawtext = deltacomputer.buildtext(revinfo, fh)
2295
2295
2296 if type(rawtext) == bytes: # only accept immutable objects
2296 if type(rawtext) == bytes: # only accept immutable objects
2297 self._revisioncache = (node, curr, rawtext)
2297 self._revisioncache = (node, curr, rawtext)
2298 self._chainbasecache[curr] = deltainfo.chainbase
2298 self._chainbasecache[curr] = deltainfo.chainbase
2299 return curr
2299 return curr
2300
2300
2301 def _get_data_offset(self, prev):
2301 def _get_data_offset(self, prev):
2302 """Returns the current offset in the (in-transaction) data file.
2302 """Returns the current offset in the (in-transaction) data file.
2303 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2303 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2304 file to store that information: since sidedata can be rewritten to the
2304 file to store that information: since sidedata can be rewritten to the
2305 end of the data file within a transaction, you can have cases where, for
2305 end of the data file within a transaction, you can have cases where, for
2306 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2306 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2307 to `n - 1`'s sidedata being written after `n`'s data.
2307 to `n - 1`'s sidedata being written after `n`'s data.
2308
2308
2309 TODO cache this in a docket file before getting out of experimental."""
2309 TODO cache this in a docket file before getting out of experimental."""
2310 if self.version & 0xFFFF != REVLOGV2:
2310 if self.version & 0xFFFF != REVLOGV2:
2311 return self.end(prev)
2311 return self.end(prev)
2312
2312
2313 offset = 0
2313 offset = 0
2314 for rev, entry in enumerate(self.index):
2314 for rev, entry in enumerate(self.index):
2315 sidedata_end = entry[8] + entry[9]
2315 sidedata_end = entry[8] + entry[9]
2316 # Sidedata for a previous rev has potentially been written after
2316 # Sidedata for a previous rev has potentially been written after
2317 # this rev's end, so take the max.
2317 # this rev's end, so take the max.
2318 offset = max(self.end(rev), offset, sidedata_end)
2318 offset = max(self.end(rev), offset, sidedata_end)
2319 return offset
2319 return offset
2320
2320
2321 def _writeentry(
2321 def _writeentry(
2322 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2322 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2323 ):
2323 ):
2324 # Files opened in a+ mode have inconsistent behavior on various
2324 # Files opened in a+ mode have inconsistent behavior on various
2325 # platforms. Windows requires that a file positioning call be made
2325 # platforms. Windows requires that a file positioning call be made
2326 # when the file handle transitions between reads and writes. See
2326 # when the file handle transitions between reads and writes. See
2327 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2327 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2328 # platforms, Python or the platform itself can be buggy. Some versions
2328 # platforms, Python or the platform itself can be buggy. Some versions
2329 # of Solaris have been observed to not append at the end of the file
2329 # of Solaris have been observed to not append at the end of the file
2330 # if the file was seeked to before the end. See issue4943 for more.
2330 # if the file was seeked to before the end. See issue4943 for more.
2331 #
2331 #
2332 # We work around this issue by inserting a seek() before writing.
2332 # We work around this issue by inserting a seek() before writing.
2333 # Note: This is likely not necessary on Python 3. However, because
2333 # Note: This is likely not necessary on Python 3. However, because
2334 # the file handle is reused for reads and may be seeked there, we need
2334 # the file handle is reused for reads and may be seeked there, we need
2335 # to be careful before changing this.
2335 # to be careful before changing this.
2336 ifh.seek(0, os.SEEK_END)
2336 ifh.seek(0, os.SEEK_END)
2337 if dfh:
2337 if dfh:
2338 dfh.seek(0, os.SEEK_END)
2338 dfh.seek(0, os.SEEK_END)
2339
2339
2340 curr = len(self) - 1
2340 curr = len(self) - 1
2341 if not self._inline:
2341 if not self._inline:
2342 transaction.add(self.datafile, offset)
2342 transaction.add(self.datafile, offset)
2343 transaction.add(self.indexfile, curr * len(entry))
2343 transaction.add(self.indexfile, curr * len(entry))
2344 if data[0]:
2344 if data[0]:
2345 dfh.write(data[0])
2345 dfh.write(data[0])
2346 dfh.write(data[1])
2346 dfh.write(data[1])
2347 if sidedata:
2347 if sidedata:
2348 dfh.write(sidedata)
2348 dfh.write(sidedata)
2349 ifh.write(entry)
2349 ifh.write(entry)
2350 else:
2350 else:
2351 offset += curr * self.index.entry_size
2351 offset += curr * self.index.entry_size
2352 transaction.add(self.indexfile, offset)
2352 transaction.add(self.indexfile, offset)
2353 ifh.write(entry)
2353 ifh.write(entry)
2354 ifh.write(data[0])
2354 ifh.write(data[0])
2355 ifh.write(data[1])
2355 ifh.write(data[1])
2356 if sidedata:
2356 if sidedata:
2357 ifh.write(sidedata)
2357 ifh.write(sidedata)
2358 self._enforceinlinesize(transaction, ifh)
2358 self._enforceinlinesize(transaction, ifh)
2359 nodemaputil.setup_persistent_nodemap(transaction, self)
2359 nodemaputil.setup_persistent_nodemap(transaction, self)
2360
2360
2361 def addgroup(
2361 def addgroup(
2362 self,
2362 self,
2363 deltas,
2363 deltas,
2364 linkmapper,
2364 linkmapper,
2365 transaction,
2365 transaction,
2366 alwayscache=False,
2366 alwayscache=False,
2367 addrevisioncb=None,
2367 addrevisioncb=None,
2368 duplicaterevisioncb=None,
2368 duplicaterevisioncb=None,
2369 ):
2369 ):
2370 """
2370 """
2371 add a delta group
2371 add a delta group
2372
2372
2373 given a set of deltas, add them to the revision log. the
2373 given a set of deltas, add them to the revision log. the
2374 first delta is against its parent, which should be in our
2374 first delta is against its parent, which should be in our
2375 log, the rest are against the previous delta.
2375 log, the rest are against the previous delta.
2376
2376
2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2378 this revlog and the node that was added.
2378 this revlog and the node that was added.
2379 """
2379 """
2380
2380
2381 if self._writinghandles:
2381 if self._writinghandles:
2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2383
2383
2384 r = len(self)
2384 r = len(self)
2385 end = 0
2385 end = 0
2386 if r:
2386 if r:
2387 end = self.end(r - 1)
2387 end = self.end(r - 1)
2388 ifh = self._indexfp(b"a+")
2388 ifh = self._indexfp(b"a+")
2389 isize = r * self.index.entry_size
2389 isize = r * self.index.entry_size
2390 if self._inline:
2390 if self._inline:
2391 transaction.add(self.indexfile, end + isize)
2391 transaction.add(self.indexfile, end + isize)
2392 dfh = None
2392 dfh = None
2393 else:
2393 else:
2394 transaction.add(self.indexfile, isize)
2394 transaction.add(self.indexfile, isize)
2395 transaction.add(self.datafile, end)
2395 transaction.add(self.datafile, end)
2396 dfh = self._datafp(b"a+")
2396 dfh = self._datafp(b"a+")
2397
2397
2398 def flush():
2398 def flush():
2399 if dfh:
2399 if dfh:
2400 dfh.flush()
2400 dfh.flush()
2401 ifh.flush()
2401 ifh.flush()
2402
2402
2403 self._writinghandles = (ifh, dfh)
2403 self._writinghandles = (ifh, dfh)
2404 empty = True
2404 empty = True
2405
2405
2406 try:
2406 try:
2407 deltacomputer = deltautil.deltacomputer(self)
2407 deltacomputer = deltautil.deltacomputer(self)
2408 # loop through our set of deltas
2408 # loop through our set of deltas
2409 for data in deltas:
2409 for data in deltas:
2410 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2410 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2411 link = linkmapper(linknode)
2411 link = linkmapper(linknode)
2412 flags = flags or REVIDX_DEFAULT_FLAGS
2412 flags = flags or REVIDX_DEFAULT_FLAGS
2413
2413
2414 rev = self.index.get_rev(node)
2414 rev = self.index.get_rev(node)
2415 if rev is not None:
2415 if rev is not None:
2416 # this can happen if two branches make the same change
2416 # this can happen if two branches make the same change
2417 self._nodeduplicatecallback(transaction, rev)
2417 self._nodeduplicatecallback(transaction, rev)
2418 if duplicaterevisioncb:
2418 if duplicaterevisioncb:
2419 duplicaterevisioncb(self, rev)
2419 duplicaterevisioncb(self, rev)
2420 empty = False
2420 empty = False
2421 continue
2421 continue
2422
2422
2423 for p in (p1, p2):
2423 for p in (p1, p2):
2424 if not self.index.has_node(p):
2424 if not self.index.has_node(p):
2425 raise error.LookupError(
2425 raise error.LookupError(
2426 p, self.indexfile, _(b'unknown parent')
2426 p, self.indexfile, _(b'unknown parent')
2427 )
2427 )
2428
2428
2429 if not self.index.has_node(deltabase):
2429 if not self.index.has_node(deltabase):
2430 raise error.LookupError(
2430 raise error.LookupError(
2431 deltabase, self.indexfile, _(b'unknown delta base')
2431 deltabase, self.indexfile, _(b'unknown delta base')
2432 )
2432 )
2433
2433
2434 baserev = self.rev(deltabase)
2434 baserev = self.rev(deltabase)
2435
2435
2436 if baserev != nullrev and self.iscensored(baserev):
2436 if baserev != nullrev and self.iscensored(baserev):
2437 # if base is censored, delta must be full replacement in a
2437 # if base is censored, delta must be full replacement in a
2438 # single patch operation
2438 # single patch operation
2439 hlen = struct.calcsize(b">lll")
2439 hlen = struct.calcsize(b">lll")
2440 oldlen = self.rawsize(baserev)
2440 oldlen = self.rawsize(baserev)
2441 newlen = len(delta) - hlen
2441 newlen = len(delta) - hlen
2442 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2442 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2443 raise error.CensoredBaseError(
2443 raise error.CensoredBaseError(
2444 self.indexfile, self.node(baserev)
2444 self.indexfile, self.node(baserev)
2445 )
2445 )
2446
2446
2447 if not flags and self._peek_iscensored(baserev, delta, flush):
2447 if not flags and self._peek_iscensored(baserev, delta, flush):
2448 flags |= REVIDX_ISCENSORED
2448 flags |= REVIDX_ISCENSORED
2449
2449
2450 # We assume consumers of addrevisioncb will want to retrieve
2450 # We assume consumers of addrevisioncb will want to retrieve
2451 # the added revision, which will require a call to
2451 # the added revision, which will require a call to
2452 # revision(). revision() will fast path if there is a cache
2452 # revision(). revision() will fast path if there is a cache
2453 # hit. So, we tell _addrevision() to always cache in this case.
2453 # hit. So, we tell _addrevision() to always cache in this case.
2454 # We're only using addgroup() in the context of changegroup
2454 # We're only using addgroup() in the context of changegroup
2455 # generation so the revision data can always be handled as raw
2455 # generation so the revision data can always be handled as raw
2456 # by the flagprocessor.
2456 # by the flagprocessor.
2457 rev = self._addrevision(
2457 rev = self._addrevision(
2458 node,
2458 node,
2459 None,
2459 None,
2460 transaction,
2460 transaction,
2461 link,
2461 link,
2462 p1,
2462 p1,
2463 p2,
2463 p2,
2464 flags,
2464 flags,
2465 (baserev, delta),
2465 (baserev, delta),
2466 ifh,
2466 ifh,
2467 dfh,
2467 dfh,
2468 alwayscache=alwayscache,
2468 alwayscache=alwayscache,
2469 deltacomputer=deltacomputer,
2469 deltacomputer=deltacomputer,
2470 sidedata=sidedata,
2470 sidedata=sidedata,
2471 )
2471 )
2472
2472
2473 if addrevisioncb:
2473 if addrevisioncb:
2474 addrevisioncb(self, rev)
2474 addrevisioncb(self, rev)
2475 empty = False
2475 empty = False
2476
2476
2477 if not dfh and not self._inline:
2477 if not dfh and not self._inline:
2478 # addrevision switched from inline to conventional
2478 # addrevision switched from inline to conventional
2479 # reopen the index
2479 # reopen the index
2480 ifh.close()
2480 ifh.close()
2481 dfh = self._datafp(b"a+")
2481 dfh = self._datafp(b"a+")
2482 ifh = self._indexfp(b"a+")
2482 ifh = self._indexfp(b"a+")
2483 self._writinghandles = (ifh, dfh)
2483 self._writinghandles = (ifh, dfh)
2484 finally:
2484 finally:
2485 self._writinghandles = None
2485 self._writinghandles = None
2486
2486
2487 if dfh:
2487 if dfh:
2488 dfh.close()
2488 dfh.close()
2489 ifh.close()
2489 ifh.close()
2490 return not empty
2490 return not empty
2491
2491
2492 def iscensored(self, rev):
2492 def iscensored(self, rev):
2493 """Check if a file revision is censored."""
2493 """Check if a file revision is censored."""
2494 if not self._censorable:
2494 if not self._censorable:
2495 return False
2495 return False
2496
2496
2497 return self.flags(rev) & REVIDX_ISCENSORED
2497 return self.flags(rev) & REVIDX_ISCENSORED
2498
2498
2499 def _peek_iscensored(self, baserev, delta, flush):
2499 def _peek_iscensored(self, baserev, delta, flush):
2500 """Quickly check if a delta produces a censored revision."""
2500 """Quickly check if a delta produces a censored revision."""
2501 if not self._censorable:
2501 if not self._censorable:
2502 return False
2502 return False
2503
2503
2504 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2504 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2505
2505
2506 def getstrippoint(self, minlink):
2506 def getstrippoint(self, minlink):
2507 """find the minimum rev that must be stripped to strip the linkrev
2507 """find the minimum rev that must be stripped to strip the linkrev
2508
2508
2509 Returns a tuple containing the minimum rev and a set of all revs that
2509 Returns a tuple containing the minimum rev and a set of all revs that
2510 have linkrevs that will be broken by this strip.
2510 have linkrevs that will be broken by this strip.
2511 """
2511 """
2512 return storageutil.resolvestripinfo(
2512 return storageutil.resolvestripinfo(
2513 minlink,
2513 minlink,
2514 len(self) - 1,
2514 len(self) - 1,
2515 self.headrevs(),
2515 self.headrevs(),
2516 self.linkrev,
2516 self.linkrev,
2517 self.parentrevs,
2517 self.parentrevs,
2518 )
2518 )
2519
2519
2520 def strip(self, minlink, transaction):
2520 def strip(self, minlink, transaction):
2521 """truncate the revlog on the first revision with a linkrev >= minlink
2521 """truncate the revlog on the first revision with a linkrev >= minlink
2522
2522
2523 This function is called when we're stripping revision minlink and
2523 This function is called when we're stripping revision minlink and
2524 its descendants from the repository.
2524 its descendants from the repository.
2525
2525
2526 We have to remove all revisions with linkrev >= minlink, because
2526 We have to remove all revisions with linkrev >= minlink, because
2527 the equivalent changelog revisions will be renumbered after the
2527 the equivalent changelog revisions will be renumbered after the
2528 strip.
2528 strip.
2529
2529
2530 So we truncate the revlog on the first of these revisions, and
2530 So we truncate the revlog on the first of these revisions, and
2531 trust that the caller has saved the revisions that shouldn't be
2531 trust that the caller has saved the revisions that shouldn't be
2532 removed and that it'll re-add them after this truncation.
2532 removed and that it'll re-add them after this truncation.
2533 """
2533 """
2534 if len(self) == 0:
2534 if len(self) == 0:
2535 return
2535 return
2536
2536
2537 rev, _ = self.getstrippoint(minlink)
2537 rev, _ = self.getstrippoint(minlink)
2538 if rev == len(self):
2538 if rev == len(self):
2539 return
2539 return
2540
2540
2541 # first truncate the files on disk
2541 # first truncate the files on disk
2542 end = self.start(rev)
2542 end = self.start(rev)
2543 if not self._inline:
2543 if not self._inline:
2544 transaction.add(self.datafile, end)
2544 transaction.add(self.datafile, end)
2545 end = rev * self.index.entry_size
2545 end = rev * self.index.entry_size
2546 else:
2546 else:
2547 end += rev * self.index.entry_size
2547 end += rev * self.index.entry_size
2548
2548
2549 transaction.add(self.indexfile, end)
2549 transaction.add(self.indexfile, end)
2550
2550
2551 # then reset internal state in memory to forget those revisions
2551 # then reset internal state in memory to forget those revisions
2552 self._revisioncache = None
2552 self._revisioncache = None
2553 self._chaininfocache = util.lrucachedict(500)
2553 self._chaininfocache = util.lrucachedict(500)
2554 self._chunkclear()
2554 self._chunkclear()
2555
2555
2556 del self.index[rev:-1]
2556 del self.index[rev:-1]
2557
2557
2558 def checksize(self):
2558 def checksize(self):
2559 """Check size of index and data files
2559 """Check size of index and data files
2560
2560
2561 return a (dd, di) tuple.
2561 return a (dd, di) tuple.
2562 - dd: extra bytes for the "data" file
2562 - dd: extra bytes for the "data" file
2563 - di: extra bytes for the "index" file
2563 - di: extra bytes for the "index" file
2564
2564
2565 A healthy revlog will return (0, 0).
2565 A healthy revlog will return (0, 0).
2566 """
2566 """
2567 expected = 0
2567 expected = 0
2568 if len(self):
2568 if len(self):
2569 expected = max(0, self.end(len(self) - 1))
2569 expected = max(0, self.end(len(self) - 1))
2570
2570
2571 try:
2571 try:
2572 with self._datafp() as f:
2572 with self._datafp() as f:
2573 f.seek(0, io.SEEK_END)
2573 f.seek(0, io.SEEK_END)
2574 actual = f.tell()
2574 actual = f.tell()
2575 dd = actual - expected
2575 dd = actual - expected
2576 except IOError as inst:
2576 except IOError as inst:
2577 if inst.errno != errno.ENOENT:
2577 if inst.errno != errno.ENOENT:
2578 raise
2578 raise
2579 dd = 0
2579 dd = 0
2580
2580
2581 try:
2581 try:
2582 f = self.opener(self.indexfile)
2582 f = self.opener(self.indexfile)
2583 f.seek(0, io.SEEK_END)
2583 f.seek(0, io.SEEK_END)
2584 actual = f.tell()
2584 actual = f.tell()
2585 f.close()
2585 f.close()
2586 s = self.index.entry_size
2586 s = self.index.entry_size
2587 i = max(0, actual // s)
2587 i = max(0, actual // s)
2588 di = actual - (i * s)
2588 di = actual - (i * s)
2589 if self._inline:
2589 if self._inline:
2590 databytes = 0
2590 databytes = 0
2591 for r in self:
2591 for r in self:
2592 databytes += max(0, self.length(r))
2592 databytes += max(0, self.length(r))
2593 dd = 0
2593 dd = 0
2594 di = actual - len(self) * s - databytes
2594 di = actual - len(self) * s - databytes
2595 except IOError as inst:
2595 except IOError as inst:
2596 if inst.errno != errno.ENOENT:
2596 if inst.errno != errno.ENOENT:
2597 raise
2597 raise
2598 di = 0
2598 di = 0
2599
2599
2600 return (dd, di)
2600 return (dd, di)
2601
2601
2602 def files(self):
2602 def files(self):
2603 res = [self.indexfile]
2603 res = [self.indexfile]
2604 if not self._inline:
2604 if not self._inline:
2605 res.append(self.datafile)
2605 res.append(self.datafile)
2606 return res
2606 return res
2607
2607
2608 def emitrevisions(
2608 def emitrevisions(
2609 self,
2609 self,
2610 nodes,
2610 nodes,
2611 nodesorder=None,
2611 nodesorder=None,
2612 revisiondata=False,
2612 revisiondata=False,
2613 assumehaveparentrevisions=False,
2613 assumehaveparentrevisions=False,
2614 deltamode=repository.CG_DELTAMODE_STD,
2614 deltamode=repository.CG_DELTAMODE_STD,
2615 sidedata_helpers=None,
2615 sidedata_helpers=None,
2616 ):
2616 ):
2617 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2617 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2618 raise error.ProgrammingError(
2618 raise error.ProgrammingError(
2619 b'unhandled value for nodesorder: %s' % nodesorder
2619 b'unhandled value for nodesorder: %s' % nodesorder
2620 )
2620 )
2621
2621
2622 if nodesorder is None and not self._generaldelta:
2622 if nodesorder is None and not self._generaldelta:
2623 nodesorder = b'storage'
2623 nodesorder = b'storage'
2624
2624
2625 if (
2625 if (
2626 not self._storedeltachains
2626 not self._storedeltachains
2627 and deltamode != repository.CG_DELTAMODE_PREV
2627 and deltamode != repository.CG_DELTAMODE_PREV
2628 ):
2628 ):
2629 deltamode = repository.CG_DELTAMODE_FULL
2629 deltamode = repository.CG_DELTAMODE_FULL
2630
2630
2631 return storageutil.emitrevisions(
2631 return storageutil.emitrevisions(
2632 self,
2632 self,
2633 nodes,
2633 nodes,
2634 nodesorder,
2634 nodesorder,
2635 revlogrevisiondelta,
2635 revlogrevisiondelta,
2636 deltaparentfn=self.deltaparent,
2636 deltaparentfn=self.deltaparent,
2637 candeltafn=self.candelta,
2637 candeltafn=self.candelta,
2638 rawsizefn=self.rawsize,
2638 rawsizefn=self.rawsize,
2639 revdifffn=self.revdiff,
2639 revdifffn=self.revdiff,
2640 flagsfn=self.flags,
2640 flagsfn=self.flags,
2641 deltamode=deltamode,
2641 deltamode=deltamode,
2642 revisiondata=revisiondata,
2642 revisiondata=revisiondata,
2643 assumehaveparentrevisions=assumehaveparentrevisions,
2643 assumehaveparentrevisions=assumehaveparentrevisions,
2644 sidedata_helpers=sidedata_helpers,
2644 sidedata_helpers=sidedata_helpers,
2645 )
2645 )
2646
2646
2647 DELTAREUSEALWAYS = b'always'
2647 DELTAREUSEALWAYS = b'always'
2648 DELTAREUSESAMEREVS = b'samerevs'
2648 DELTAREUSESAMEREVS = b'samerevs'
2649 DELTAREUSENEVER = b'never'
2649 DELTAREUSENEVER = b'never'
2650
2650
2651 DELTAREUSEFULLADD = b'fulladd'
2651 DELTAREUSEFULLADD = b'fulladd'
2652
2652
2653 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2653 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2654
2654
2655 def clone(
2655 def clone(
2656 self,
2656 self,
2657 tr,
2657 tr,
2658 destrevlog,
2658 destrevlog,
2659 addrevisioncb=None,
2659 addrevisioncb=None,
2660 deltareuse=DELTAREUSESAMEREVS,
2660 deltareuse=DELTAREUSESAMEREVS,
2661 forcedeltabothparents=None,
2661 forcedeltabothparents=None,
2662 sidedata_helpers=None,
2662 sidedata_helpers=None,
2663 ):
2663 ):
2664 """Copy this revlog to another, possibly with format changes.
2664 """Copy this revlog to another, possibly with format changes.
2665
2665
2666 The destination revlog will contain the same revisions and nodes.
2666 The destination revlog will contain the same revisions and nodes.
2667 However, it may not be bit-for-bit identical due to e.g. delta encoding
2667 However, it may not be bit-for-bit identical due to e.g. delta encoding
2668 differences.
2668 differences.
2669
2669
2670 The ``deltareuse`` argument control how deltas from the existing revlog
2670 The ``deltareuse`` argument control how deltas from the existing revlog
2671 are preserved in the destination revlog. The argument can have the
2671 are preserved in the destination revlog. The argument can have the
2672 following values:
2672 following values:
2673
2673
2674 DELTAREUSEALWAYS
2674 DELTAREUSEALWAYS
2675 Deltas will always be reused (if possible), even if the destination
2675 Deltas will always be reused (if possible), even if the destination
2676 revlog would not select the same revisions for the delta. This is the
2676 revlog would not select the same revisions for the delta. This is the
2677 fastest mode of operation.
2677 fastest mode of operation.
2678 DELTAREUSESAMEREVS
2678 DELTAREUSESAMEREVS
2679 Deltas will be reused if the destination revlog would pick the same
2679 Deltas will be reused if the destination revlog would pick the same
2680 revisions for the delta. This mode strikes a balance between speed
2680 revisions for the delta. This mode strikes a balance between speed
2681 and optimization.
2681 and optimization.
2682 DELTAREUSENEVER
2682 DELTAREUSENEVER
2683 Deltas will never be reused. This is the slowest mode of execution.
2683 Deltas will never be reused. This is the slowest mode of execution.
2684 This mode can be used to recompute deltas (e.g. if the diff/delta
2684 This mode can be used to recompute deltas (e.g. if the diff/delta
2685 algorithm changes).
2685 algorithm changes).
2686 DELTAREUSEFULLADD
2686 DELTAREUSEFULLADD
2687 Revision will be re-added as if their were new content. This is
2687 Revision will be re-added as if their were new content. This is
2688 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2688 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2689 eg: large file detection and handling.
2689 eg: large file detection and handling.
2690
2690
2691 Delta computation can be slow, so the choice of delta reuse policy can
2691 Delta computation can be slow, so the choice of delta reuse policy can
2692 significantly affect run time.
2692 significantly affect run time.
2693
2693
2694 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2694 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2695 two extremes. Deltas will be reused if they are appropriate. But if the
2695 two extremes. Deltas will be reused if they are appropriate. But if the
2696 delta could choose a better revision, it will do so. This means if you
2696 delta could choose a better revision, it will do so. This means if you
2697 are converting a non-generaldelta revlog to a generaldelta revlog,
2697 are converting a non-generaldelta revlog to a generaldelta revlog,
2698 deltas will be recomputed if the delta's parent isn't a parent of the
2698 deltas will be recomputed if the delta's parent isn't a parent of the
2699 revision.
2699 revision.
2700
2700
2701 In addition to the delta policy, the ``forcedeltabothparents``
2701 In addition to the delta policy, the ``forcedeltabothparents``
2702 argument controls whether to force compute deltas against both parents
2702 argument controls whether to force compute deltas against both parents
2703 for merges. By default, the current default is used.
2703 for merges. By default, the current default is used.
2704
2704
2705 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2705 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2706 `sidedata_helpers`.
2706 `sidedata_helpers`.
2707 """
2707 """
2708 if deltareuse not in self.DELTAREUSEALL:
2708 if deltareuse not in self.DELTAREUSEALL:
2709 raise ValueError(
2709 raise ValueError(
2710 _(b'value for deltareuse invalid: %s') % deltareuse
2710 _(b'value for deltareuse invalid: %s') % deltareuse
2711 )
2711 )
2712
2712
2713 if len(destrevlog):
2713 if len(destrevlog):
2714 raise ValueError(_(b'destination revlog is not empty'))
2714 raise ValueError(_(b'destination revlog is not empty'))
2715
2715
2716 if getattr(self, 'filteredrevs', None):
2716 if getattr(self, 'filteredrevs', None):
2717 raise ValueError(_(b'source revlog has filtered revisions'))
2717 raise ValueError(_(b'source revlog has filtered revisions'))
2718 if getattr(destrevlog, 'filteredrevs', None):
2718 if getattr(destrevlog, 'filteredrevs', None):
2719 raise ValueError(_(b'destination revlog has filtered revisions'))
2719 raise ValueError(_(b'destination revlog has filtered revisions'))
2720
2720
2721 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2721 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2722 # if possible.
2722 # if possible.
2723 oldlazydelta = destrevlog._lazydelta
2723 oldlazydelta = destrevlog._lazydelta
2724 oldlazydeltabase = destrevlog._lazydeltabase
2724 oldlazydeltabase = destrevlog._lazydeltabase
2725 oldamd = destrevlog._deltabothparents
2725 oldamd = destrevlog._deltabothparents
2726
2726
2727 try:
2727 try:
2728 if deltareuse == self.DELTAREUSEALWAYS:
2728 if deltareuse == self.DELTAREUSEALWAYS:
2729 destrevlog._lazydeltabase = True
2729 destrevlog._lazydeltabase = True
2730 destrevlog._lazydelta = True
2730 destrevlog._lazydelta = True
2731 elif deltareuse == self.DELTAREUSESAMEREVS:
2731 elif deltareuse == self.DELTAREUSESAMEREVS:
2732 destrevlog._lazydeltabase = False
2732 destrevlog._lazydeltabase = False
2733 destrevlog._lazydelta = True
2733 destrevlog._lazydelta = True
2734 elif deltareuse == self.DELTAREUSENEVER:
2734 elif deltareuse == self.DELTAREUSENEVER:
2735 destrevlog._lazydeltabase = False
2735 destrevlog._lazydeltabase = False
2736 destrevlog._lazydelta = False
2736 destrevlog._lazydelta = False
2737
2737
2738 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2738 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2739
2739
2740 self._clone(
2740 self._clone(
2741 tr,
2741 tr,
2742 destrevlog,
2742 destrevlog,
2743 addrevisioncb,
2743 addrevisioncb,
2744 deltareuse,
2744 deltareuse,
2745 forcedeltabothparents,
2745 forcedeltabothparents,
2746 sidedata_helpers,
2746 sidedata_helpers,
2747 )
2747 )
2748
2748
2749 finally:
2749 finally:
2750 destrevlog._lazydelta = oldlazydelta
2750 destrevlog._lazydelta = oldlazydelta
2751 destrevlog._lazydeltabase = oldlazydeltabase
2751 destrevlog._lazydeltabase = oldlazydeltabase
2752 destrevlog._deltabothparents = oldamd
2752 destrevlog._deltabothparents = oldamd
2753
2753
2754 def _clone(
2754 def _clone(
2755 self,
2755 self,
2756 tr,
2756 tr,
2757 destrevlog,
2757 destrevlog,
2758 addrevisioncb,
2758 addrevisioncb,
2759 deltareuse,
2759 deltareuse,
2760 forcedeltabothparents,
2760 forcedeltabothparents,
2761 sidedata_helpers,
2761 sidedata_helpers,
2762 ):
2762 ):
2763 """perform the core duty of `revlog.clone` after parameter processing"""
2763 """perform the core duty of `revlog.clone` after parameter processing"""
2764 deltacomputer = deltautil.deltacomputer(destrevlog)
2764 deltacomputer = deltautil.deltacomputer(destrevlog)
2765 index = self.index
2765 index = self.index
2766 for rev in self:
2766 for rev in self:
2767 entry = index[rev]
2767 entry = index[rev]
2768
2768
2769 # Some classes override linkrev to take filtered revs into
2769 # Some classes override linkrev to take filtered revs into
2770 # account. Use raw entry from index.
2770 # account. Use raw entry from index.
2771 flags = entry[0] & 0xFFFF
2771 flags = entry[0] & 0xFFFF
2772 linkrev = entry[4]
2772 linkrev = entry[4]
2773 p1 = index[entry[5]][7]
2773 p1 = index[entry[5]][7]
2774 p2 = index[entry[6]][7]
2774 p2 = index[entry[6]][7]
2775 node = entry[7]
2775 node = entry[7]
2776
2776
2777 # (Possibly) reuse the delta from the revlog if allowed and
2777 # (Possibly) reuse the delta from the revlog if allowed and
2778 # the revlog chunk is a delta.
2778 # the revlog chunk is a delta.
2779 cachedelta = None
2779 cachedelta = None
2780 rawtext = None
2780 rawtext = None
2781 if deltareuse == self.DELTAREUSEFULLADD:
2781 if deltareuse == self.DELTAREUSEFULLADD:
2782 text, sidedata = self._revisiondata(rev)
2782 text, sidedata = self._revisiondata(rev)
2783
2783
2784 if sidedata_helpers is not None:
2784 if sidedata_helpers is not None:
2785 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2785 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2786 self, sidedata_helpers, sidedata, rev
2786 self, sidedata_helpers, sidedata, rev
2787 )
2787 )
2788 flags = flags | new_flags[0] & ~new_flags[1]
2788 flags = flags | new_flags[0] & ~new_flags[1]
2789
2789
2790 destrevlog.addrevision(
2790 destrevlog.addrevision(
2791 text,
2791 text,
2792 tr,
2792 tr,
2793 linkrev,
2793 linkrev,
2794 p1,
2794 p1,
2795 p2,
2795 p2,
2796 cachedelta=cachedelta,
2796 cachedelta=cachedelta,
2797 node=node,
2797 node=node,
2798 flags=flags,
2798 flags=flags,
2799 deltacomputer=deltacomputer,
2799 deltacomputer=deltacomputer,
2800 sidedata=sidedata,
2800 sidedata=sidedata,
2801 )
2801 )
2802 else:
2802 else:
2803 if destrevlog._lazydelta:
2803 if destrevlog._lazydelta:
2804 dp = self.deltaparent(rev)
2804 dp = self.deltaparent(rev)
2805 if dp != nullrev:
2805 if dp != nullrev:
2806 cachedelta = (dp, bytes(self._chunk(rev)))
2806 cachedelta = (dp, bytes(self._chunk(rev)))
2807
2807
2808 sidedata = None
2808 sidedata = None
2809 if not cachedelta:
2809 if not cachedelta:
2810 rawtext, sidedata = self._revisiondata(rev)
2810 rawtext, sidedata = self._revisiondata(rev)
2811 if sidedata is None:
2811 if sidedata is None:
2812 sidedata = self.sidedata(rev)
2812 sidedata = self.sidedata(rev)
2813
2813
2814 if sidedata_helpers is not None:
2814 if sidedata_helpers is not None:
2815 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2815 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2816 self, sidedata_helpers, sidedata, rev
2816 self, sidedata_helpers, sidedata, rev
2817 )
2817 )
2818 flags = flags | new_flags[0] & ~new_flags[1]
2818 flags = flags | new_flags[0] & ~new_flags[1]
2819
2819
2820 ifh = destrevlog.opener(
2820 ifh = destrevlog.opener(
2821 destrevlog.indexfile, b'a+', checkambig=False
2821 destrevlog.indexfile, b'a+', checkambig=False
2822 )
2822 )
2823 dfh = None
2823 dfh = None
2824 if not destrevlog._inline:
2824 if not destrevlog._inline:
2825 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2825 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2826 try:
2826 try:
2827 destrevlog._addrevision(
2827 destrevlog._addrevision(
2828 node,
2828 node,
2829 rawtext,
2829 rawtext,
2830 tr,
2830 tr,
2831 linkrev,
2831 linkrev,
2832 p1,
2832 p1,
2833 p2,
2833 p2,
2834 flags,
2834 flags,
2835 cachedelta,
2835 cachedelta,
2836 ifh,
2836 ifh,
2837 dfh,
2837 dfh,
2838 deltacomputer=deltacomputer,
2838 deltacomputer=deltacomputer,
2839 sidedata=sidedata,
2839 sidedata=sidedata,
2840 )
2840 )
2841 finally:
2841 finally:
2842 if dfh:
2842 if dfh:
2843 dfh.close()
2843 dfh.close()
2844 ifh.close()
2844 ifh.close()
2845
2845
2846 if addrevisioncb:
2846 if addrevisioncb:
2847 addrevisioncb(self, rev, node)
2847 addrevisioncb(self, rev, node)
2848
2848
2849 def censorrevision(self, tr, censornode, tombstone=b''):
2849 def censorrevision(self, tr, censornode, tombstone=b''):
2850 if (self.version & 0xFFFF) == REVLOGV0:
2850 if (self.version & 0xFFFF) == REVLOGV0:
2851 raise error.RevlogError(
2851 raise error.RevlogError(
2852 _(b'cannot censor with version %d revlogs') % self.version
2852 _(b'cannot censor with version %d revlogs') % self.version
2853 )
2853 )
2854
2854
2855 censorrev = self.rev(censornode)
2855 censorrev = self.rev(censornode)
2856 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2856 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2857
2857
2858 if len(tombstone) > self.rawsize(censorrev):
2858 if len(tombstone) > self.rawsize(censorrev):
2859 raise error.Abort(
2859 raise error.Abort(
2860 _(b'censor tombstone must be no longer than censored data')
2860 _(b'censor tombstone must be no longer than censored data')
2861 )
2861 )
2862
2862
2863 # Rewriting the revlog in place is hard. Our strategy for censoring is
2863 # Rewriting the revlog in place is hard. Our strategy for censoring is
2864 # to create a new revlog, copy all revisions to it, then replace the
2864 # to create a new revlog, copy all revisions to it, then replace the
2865 # revlogs on transaction close.
2865 # revlogs on transaction close.
2866
2866
2867 newindexfile = self.indexfile + b'.tmpcensored'
2867 newindexfile = self.indexfile + b'.tmpcensored'
2868 newdatafile = self.datafile + b'.tmpcensored'
2868 newdatafile = self.datafile + b'.tmpcensored'
2869
2869
2870 # This is a bit dangerous. We could easily have a mismatch of state.
2870 # This is a bit dangerous. We could easily have a mismatch of state.
2871 newrl = revlog(
2871 newrl = revlog(
2872 self.opener,
2872 self.opener,
2873 target=self.target,
2873 target=self.target,
2874 indexfile=newindexfile,
2874 indexfile=newindexfile,
2875 datafile=newdatafile,
2875 datafile=newdatafile,
2876 censorable=True,
2876 censorable=True,
2877 )
2877 )
2878 newrl.version = self.version
2878 newrl.version = self.version
2879 newrl._generaldelta = self._generaldelta
2879 newrl._generaldelta = self._generaldelta
2880 newrl._parse_index = self._parse_index
2880 newrl._parse_index = self._parse_index
2881
2881
2882 for rev in self.revs():
2882 for rev in self.revs():
2883 node = self.node(rev)
2883 node = self.node(rev)
2884 p1, p2 = self.parents(node)
2884 p1, p2 = self.parents(node)
2885
2885
2886 if rev == censorrev:
2886 if rev == censorrev:
2887 newrl.addrawrevision(
2887 newrl.addrawrevision(
2888 tombstone,
2888 tombstone,
2889 tr,
2889 tr,
2890 self.linkrev(censorrev),
2890 self.linkrev(censorrev),
2891 p1,
2891 p1,
2892 p2,
2892 p2,
2893 censornode,
2893 censornode,
2894 REVIDX_ISCENSORED,
2894 REVIDX_ISCENSORED,
2895 )
2895 )
2896
2896
2897 if newrl.deltaparent(rev) != nullrev:
2897 if newrl.deltaparent(rev) != nullrev:
2898 raise error.Abort(
2898 raise error.Abort(
2899 _(
2899 _(
2900 b'censored revision stored as delta; '
2900 b'censored revision stored as delta; '
2901 b'cannot censor'
2901 b'cannot censor'
2902 ),
2902 ),
2903 hint=_(
2903 hint=_(
2904 b'censoring of revlogs is not '
2904 b'censoring of revlogs is not '
2905 b'fully implemented; please report '
2905 b'fully implemented; please report '
2906 b'this bug'
2906 b'this bug'
2907 ),
2907 ),
2908 )
2908 )
2909 continue
2909 continue
2910
2910
2911 if self.iscensored(rev):
2911 if self.iscensored(rev):
2912 if self.deltaparent(rev) != nullrev:
2912 if self.deltaparent(rev) != nullrev:
2913 raise error.Abort(
2913 raise error.Abort(
2914 _(
2914 _(
2915 b'cannot censor due to censored '
2915 b'cannot censor due to censored '
2916 b'revision having delta stored'
2916 b'revision having delta stored'
2917 )
2917 )
2918 )
2918 )
2919 rawtext = self._chunk(rev)
2919 rawtext = self._chunk(rev)
2920 else:
2920 else:
2921 rawtext = self.rawdata(rev)
2921 rawtext = self.rawdata(rev)
2922
2922
2923 newrl.addrawrevision(
2923 newrl.addrawrevision(
2924 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2924 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2925 )
2925 )
2926
2926
2927 tr.addbackup(self.indexfile, location=b'store')
2927 tr.addbackup(self.indexfile, location=b'store')
2928 if not self._inline:
2928 if not self._inline:
2929 tr.addbackup(self.datafile, location=b'store')
2929 tr.addbackup(self.datafile, location=b'store')
2930
2930
2931 self.opener.rename(newrl.indexfile, self.indexfile)
2931 self.opener.rename(newrl.indexfile, self.indexfile)
2932 if not self._inline:
2932 if not self._inline:
2933 self.opener.rename(newrl.datafile, self.datafile)
2933 self.opener.rename(newrl.datafile, self.datafile)
2934
2934
2935 self.clearcaches()
2935 self.clearcaches()
2936 self._loadindex()
2936 self._loadindex()
2937
2937
2938 def verifyintegrity(self, state):
2938 def verifyintegrity(self, state):
2939 """Verifies the integrity of the revlog.
2939 """Verifies the integrity of the revlog.
2940
2940
2941 Yields ``revlogproblem`` instances describing problems that are
2941 Yields ``revlogproblem`` instances describing problems that are
2942 found.
2942 found.
2943 """
2943 """
2944 dd, di = self.checksize()
2944 dd, di = self.checksize()
2945 if dd:
2945 if dd:
2946 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2946 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2947 if di:
2947 if di:
2948 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2948 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2949
2949
2950 version = self.version & 0xFFFF
2950 version = self.version & 0xFFFF
2951
2951
2952 # The verifier tells us what version revlog we should be.
2952 # The verifier tells us what version revlog we should be.
2953 if version != state[b'expectedversion']:
2953 if version != state[b'expectedversion']:
2954 yield revlogproblem(
2954 yield revlogproblem(
2955 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2955 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2956 % (self.indexfile, version, state[b'expectedversion'])
2956 % (self.indexfile, version, state[b'expectedversion'])
2957 )
2957 )
2958
2958
2959 state[b'skipread'] = set()
2959 state[b'skipread'] = set()
2960 state[b'safe_renamed'] = set()
2960 state[b'safe_renamed'] = set()
2961
2961
2962 for rev in self:
2962 for rev in self:
2963 node = self.node(rev)
2963 node = self.node(rev)
2964
2964
2965 # Verify contents. 4 cases to care about:
2965 # Verify contents. 4 cases to care about:
2966 #
2966 #
2967 # common: the most common case
2967 # common: the most common case
2968 # rename: with a rename
2968 # rename: with a rename
2969 # meta: file content starts with b'\1\n', the metadata
2969 # meta: file content starts with b'\1\n', the metadata
2970 # header defined in filelog.py, but without a rename
2970 # header defined in filelog.py, but without a rename
2971 # ext: content stored externally
2971 # ext: content stored externally
2972 #
2972 #
2973 # More formally, their differences are shown below:
2973 # More formally, their differences are shown below:
2974 #
2974 #
2975 # | common | rename | meta | ext
2975 # | common | rename | meta | ext
2976 # -------------------------------------------------------
2976 # -------------------------------------------------------
2977 # flags() | 0 | 0 | 0 | not 0
2977 # flags() | 0 | 0 | 0 | not 0
2978 # renamed() | False | True | False | ?
2978 # renamed() | False | True | False | ?
2979 # rawtext[0:2]=='\1\n'| False | True | True | ?
2979 # rawtext[0:2]=='\1\n'| False | True | True | ?
2980 #
2980 #
2981 # "rawtext" means the raw text stored in revlog data, which
2981 # "rawtext" means the raw text stored in revlog data, which
2982 # could be retrieved by "rawdata(rev)". "text"
2982 # could be retrieved by "rawdata(rev)". "text"
2983 # mentioned below is "revision(rev)".
2983 # mentioned below is "revision(rev)".
2984 #
2984 #
2985 # There are 3 different lengths stored physically:
2985 # There are 3 different lengths stored physically:
2986 # 1. L1: rawsize, stored in revlog index
2986 # 1. L1: rawsize, stored in revlog index
2987 # 2. L2: len(rawtext), stored in revlog data
2987 # 2. L2: len(rawtext), stored in revlog data
2988 # 3. L3: len(text), stored in revlog data if flags==0, or
2988 # 3. L3: len(text), stored in revlog data if flags==0, or
2989 # possibly somewhere else if flags!=0
2989 # possibly somewhere else if flags!=0
2990 #
2990 #
2991 # L1 should be equal to L2. L3 could be different from them.
2991 # L1 should be equal to L2. L3 could be different from them.
2992 # "text" may or may not affect commit hash depending on flag
2992 # "text" may or may not affect commit hash depending on flag
2993 # processors (see flagutil.addflagprocessor).
2993 # processors (see flagutil.addflagprocessor).
2994 #
2994 #
2995 # | common | rename | meta | ext
2995 # | common | rename | meta | ext
2996 # -------------------------------------------------
2996 # -------------------------------------------------
2997 # rawsize() | L1 | L1 | L1 | L1
2997 # rawsize() | L1 | L1 | L1 | L1
2998 # size() | L1 | L2-LM | L1(*) | L1 (?)
2998 # size() | L1 | L2-LM | L1(*) | L1 (?)
2999 # len(rawtext) | L2 | L2 | L2 | L2
2999 # len(rawtext) | L2 | L2 | L2 | L2
3000 # len(text) | L2 | L2 | L2 | L3
3000 # len(text) | L2 | L2 | L2 | L3
3001 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3001 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3002 #
3002 #
3003 # LM: length of metadata, depending on rawtext
3003 # LM: length of metadata, depending on rawtext
3004 # (*): not ideal, see comment in filelog.size
3004 # (*): not ideal, see comment in filelog.size
3005 # (?): could be "- len(meta)" if the resolved content has
3005 # (?): could be "- len(meta)" if the resolved content has
3006 # rename metadata
3006 # rename metadata
3007 #
3007 #
3008 # Checks needed to be done:
3008 # Checks needed to be done:
3009 # 1. length check: L1 == L2, in all cases.
3009 # 1. length check: L1 == L2, in all cases.
3010 # 2. hash check: depending on flag processor, we may need to
3010 # 2. hash check: depending on flag processor, we may need to
3011 # use either "text" (external), or "rawtext" (in revlog).
3011 # use either "text" (external), or "rawtext" (in revlog).
3012
3012
3013 try:
3013 try:
3014 skipflags = state.get(b'skipflags', 0)
3014 skipflags = state.get(b'skipflags', 0)
3015 if skipflags:
3015 if skipflags:
3016 skipflags &= self.flags(rev)
3016 skipflags &= self.flags(rev)
3017
3017
3018 _verify_revision(self, skipflags, state, node)
3018 _verify_revision(self, skipflags, state, node)
3019
3019
3020 l1 = self.rawsize(rev)
3020 l1 = self.rawsize(rev)
3021 l2 = len(self.rawdata(node))
3021 l2 = len(self.rawdata(node))
3022
3022
3023 if l1 != l2:
3023 if l1 != l2:
3024 yield revlogproblem(
3024 yield revlogproblem(
3025 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3025 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3026 node=node,
3026 node=node,
3027 )
3027 )
3028
3028
3029 except error.CensoredNodeError:
3029 except error.CensoredNodeError:
3030 if state[b'erroroncensored']:
3030 if state[b'erroroncensored']:
3031 yield revlogproblem(
3031 yield revlogproblem(
3032 error=_(b'censored file data'), node=node
3032 error=_(b'censored file data'), node=node
3033 )
3033 )
3034 state[b'skipread'].add(node)
3034 state[b'skipread'].add(node)
3035 except Exception as e:
3035 except Exception as e:
3036 yield revlogproblem(
3036 yield revlogproblem(
3037 error=_(b'unpacking %s: %s')
3037 error=_(b'unpacking %s: %s')
3038 % (short(node), stringutil.forcebytestr(e)),
3038 % (short(node), stringutil.forcebytestr(e)),
3039 node=node,
3039 node=node,
3040 )
3040 )
3041 state[b'skipread'].add(node)
3041 state[b'skipread'].add(node)
3042
3042
3043 def storageinfo(
3043 def storageinfo(
3044 self,
3044 self,
3045 exclusivefiles=False,
3045 exclusivefiles=False,
3046 sharedfiles=False,
3046 sharedfiles=False,
3047 revisionscount=False,
3047 revisionscount=False,
3048 trackedsize=False,
3048 trackedsize=False,
3049 storedsize=False,
3049 storedsize=False,
3050 ):
3050 ):
3051 d = {}
3051 d = {}
3052
3052
3053 if exclusivefiles:
3053 if exclusivefiles:
3054 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3054 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3055 if not self._inline:
3055 if not self._inline:
3056 d[b'exclusivefiles'].append((self.opener, self.datafile))
3056 d[b'exclusivefiles'].append((self.opener, self.datafile))
3057
3057
3058 if sharedfiles:
3058 if sharedfiles:
3059 d[b'sharedfiles'] = []
3059 d[b'sharedfiles'] = []
3060
3060
3061 if revisionscount:
3061 if revisionscount:
3062 d[b'revisionscount'] = len(self)
3062 d[b'revisionscount'] = len(self)
3063
3063
3064 if trackedsize:
3064 if trackedsize:
3065 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3065 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3066
3066
3067 if storedsize:
3067 if storedsize:
3068 d[b'storedsize'] = sum(
3068 d[b'storedsize'] = sum(
3069 self.opener.stat(path).st_size for path in self.files()
3069 self.opener.stat(path).st_size for path in self.files()
3070 )
3070 )
3071
3071
3072 return d
3072 return d
3073
3073
3074 def rewrite_sidedata(self, helpers, startrev, endrev):
3074 def rewrite_sidedata(self, helpers, startrev, endrev):
3075 if self.version & 0xFFFF != REVLOGV2:
3075 if self.version & 0xFFFF != REVLOGV2:
3076 return
3076 return
3077 # inline are not yet supported because they suffer from an issue when
3077 # inline are not yet supported because they suffer from an issue when
3078 # rewriting them (since it's not an append-only operation).
3078 # rewriting them (since it's not an append-only operation).
3079 # See issue6485.
3079 # See issue6485.
3080 assert not self._inline
3080 assert not self._inline
3081 if not helpers[1] and not helpers[2]:
3081 if not helpers[1] and not helpers[2]:
3082 # Nothing to generate or remove
3082 # Nothing to generate or remove
3083 return
3083 return
3084
3084
3085 # changelog implement some "delayed" writing mechanism that assume that
3086 # all index data is writen in append mode and is therefor incompatible
3087 # with the seeked write done in this method. The use of such "delayed"
3088 # writing will soon be removed for revlog version that support side
3089 # data, so for now, we only keep this simple assert to highlight the
3090 # situation.
3091 delayed = getattr(self, '_delayed', False)
3092 diverted = getattr(self, '_divert', False)
3093 if delayed and not diverted:
3094 msg = "cannot rewrite_sidedata of a delayed revlog"
3095 raise error.ProgrammingError(msg)
3096
3085 new_entries = []
3097 new_entries = []
3086 # append the new sidedata
3098 # append the new sidedata
3087 with self._datafp(b'a+') as fp:
3099 with self._datafp(b'a+') as fp:
3088 # Maybe this bug still exists, see revlog._writeentry
3100 # Maybe this bug still exists, see revlog._writeentry
3089 fp.seek(0, os.SEEK_END)
3101 fp.seek(0, os.SEEK_END)
3090 current_offset = fp.tell()
3102 current_offset = fp.tell()
3091 for rev in range(startrev, endrev + 1):
3103 for rev in range(startrev, endrev + 1):
3092 entry = self.index[rev]
3104 entry = self.index[rev]
3093 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3105 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3094 store=self,
3106 store=self,
3095 sidedata_helpers=helpers,
3107 sidedata_helpers=helpers,
3096 sidedata={},
3108 sidedata={},
3097 rev=rev,
3109 rev=rev,
3098 )
3110 )
3099
3111
3100 serialized_sidedata = sidedatautil.serialize_sidedata(
3112 serialized_sidedata = sidedatautil.serialize_sidedata(
3101 new_sidedata
3113 new_sidedata
3102 )
3114 )
3103 if entry[8] != 0 or entry[9] != 0:
3115 if entry[8] != 0 or entry[9] != 0:
3104 # rewriting entries that already have sidedata is not
3116 # rewriting entries that already have sidedata is not
3105 # supported yet, because it introduces garbage data in the
3117 # supported yet, because it introduces garbage data in the
3106 # revlog.
3118 # revlog.
3107 msg = b"Rewriting existing sidedata is not supported yet"
3119 msg = b"Rewriting existing sidedata is not supported yet"
3108 raise error.Abort(msg)
3120 raise error.Abort(msg)
3109
3121
3110 # Apply (potential) flags to add and to remove after running
3122 # Apply (potential) flags to add and to remove after running
3111 # the sidedata helpers
3123 # the sidedata helpers
3112 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3124 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3113 entry = (new_offset_flags,) + entry[1:8]
3125 entry = (new_offset_flags,) + entry[1:8]
3114 entry += (current_offset, len(serialized_sidedata))
3126 entry += (current_offset, len(serialized_sidedata))
3115
3127
3116 fp.write(serialized_sidedata)
3128 fp.write(serialized_sidedata)
3117 new_entries.append(entry)
3129 new_entries.append(entry)
3118 current_offset += len(serialized_sidedata)
3130 current_offset += len(serialized_sidedata)
3119
3131
3120 # rewrite the new index entries
3132 # rewrite the new index entries
3121 with self._indexfp(b'r+') as fp:
3133 with self._indexfp(b'r+') as fp:
3122 fp.seek(startrev * self.index.entry_size)
3134 fp.seek(startrev * self.index.entry_size)
3123 for i, e in enumerate(new_entries):
3135 for i, e in enumerate(new_entries):
3124 rev = startrev + i
3136 rev = startrev + i
3125 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3137 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3126 packed = self.index.entry_binary(rev)
3138 packed = self.index.entry_binary(rev)
3127 if rev == 0:
3139 if rev == 0:
3128 header = self.index.pack_header(self.version)
3140 header = self.index.pack_header(self.version)
3129 packed = header + packed
3141 packed = header + packed
3130 fp.write(packed)
3142 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now