##// END OF EJS Templates
revlog: use `_writing` in `rewrite_sidedata`...
marmoute -
r47992:b3acefde default
parent child Browse files
Show More
@@ -1,3206 +1,3205 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 radix,
292 radix,
293 postfix=None,
293 postfix=None,
294 checkambig=False,
294 checkambig=False,
295 mmaplargeindex=False,
295 mmaplargeindex=False,
296 censorable=False,
296 censorable=False,
297 upperboundcomp=None,
297 upperboundcomp=None,
298 persistentnodemap=False,
298 persistentnodemap=False,
299 concurrencychecker=None,
299 concurrencychecker=None,
300 ):
300 ):
301 """
301 """
302 create a revlog object
302 create a revlog object
303
303
304 opener is a function that abstracts the file opening operation
304 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
305 and can be used to implement COW semantics or the like.
306
306
307 `target`: a (KIND, ID) tuple that identify the content stored in
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
308 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
309 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
310 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
311 that test, debug, or performance measurement code might not set this to
312 accurate value.
312 accurate value.
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315
315
316 self.radix = radix
316 self.radix = radix
317
317
318 self._indexfile = None
318 self._indexfile = None
319 self._datafile = None
319 self._datafile = None
320 self._nodemap_file = None
320 self._nodemap_file = None
321 self.postfix = postfix
321 self.postfix = postfix
322 self.opener = opener
322 self.opener = opener
323 if persistentnodemap:
323 if persistentnodemap:
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
324 self._nodemap_file = nodemaputil.get_nodemap_file(self)
325
325
326 assert target[0] in ALL_KINDS
326 assert target[0] in ALL_KINDS
327 assert len(target) == 2
327 assert len(target) == 2
328 self.target = target
328 self.target = target
329 # When True, indexfile is opened with checkambig=True at writing, to
329 # When True, indexfile is opened with checkambig=True at writing, to
330 # avoid file stat ambiguity.
330 # avoid file stat ambiguity.
331 self._checkambig = checkambig
331 self._checkambig = checkambig
332 self._mmaplargeindex = mmaplargeindex
332 self._mmaplargeindex = mmaplargeindex
333 self._censorable = censorable
333 self._censorable = censorable
334 # 3-tuple of (node, rev, text) for a raw revision.
334 # 3-tuple of (node, rev, text) for a raw revision.
335 self._revisioncache = None
335 self._revisioncache = None
336 # Maps rev to chain base rev.
336 # Maps rev to chain base rev.
337 self._chainbasecache = util.lrucachedict(100)
337 self._chainbasecache = util.lrucachedict(100)
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
338 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
339 self._chunkcache = (0, b'')
339 self._chunkcache = (0, b'')
340 # How much data to read and cache into the raw revlog data cache.
340 # How much data to read and cache into the raw revlog data cache.
341 self._chunkcachesize = 65536
341 self._chunkcachesize = 65536
342 self._maxchainlen = None
342 self._maxchainlen = None
343 self._deltabothparents = True
343 self._deltabothparents = True
344 self.index = None
344 self.index = None
345 self._nodemap_docket = None
345 self._nodemap_docket = None
346 # Mapping of partial identifiers to full nodes.
346 # Mapping of partial identifiers to full nodes.
347 self._pcache = {}
347 self._pcache = {}
348 # Mapping of revision integer to full node.
348 # Mapping of revision integer to full node.
349 self._compengine = b'zlib'
349 self._compengine = b'zlib'
350 self._compengineopts = {}
350 self._compengineopts = {}
351 self._maxdeltachainspan = -1
351 self._maxdeltachainspan = -1
352 self._withsparseread = False
352 self._withsparseread = False
353 self._sparserevlog = False
353 self._sparserevlog = False
354 self._srdensitythreshold = 0.50
354 self._srdensitythreshold = 0.50
355 self._srmingapsize = 262144
355 self._srmingapsize = 262144
356
356
357 # Make copy of flag processors so each revlog instance can support
357 # Make copy of flag processors so each revlog instance can support
358 # custom flags.
358 # custom flags.
359 self._flagprocessors = dict(flagutil.flagprocessors)
359 self._flagprocessors = dict(flagutil.flagprocessors)
360
360
361 # 2-tuple of file handles being used for active writing.
361 # 2-tuple of file handles being used for active writing.
362 self._writinghandles = None
362 self._writinghandles = None
363 # prevent nesting of addgroup
363 # prevent nesting of addgroup
364 self._adding_group = None
364 self._adding_group = None
365
365
366 self._loadindex()
366 self._loadindex()
367
367
368 self._concurrencychecker = concurrencychecker
368 self._concurrencychecker = concurrencychecker
369
369
370 def _init_opts(self):
370 def _init_opts(self):
371 """process options (from above/config) to setup associated default revlog mode
371 """process options (from above/config) to setup associated default revlog mode
372
372
373 These values might be affected when actually reading on disk information.
373 These values might be affected when actually reading on disk information.
374
374
375 The relevant values are returned for use in _loadindex().
375 The relevant values are returned for use in _loadindex().
376
376
377 * newversionflags:
377 * newversionflags:
378 version header to use if we need to create a new revlog
378 version header to use if we need to create a new revlog
379
379
380 * mmapindexthreshold:
380 * mmapindexthreshold:
381 minimal index size for start to use mmap
381 minimal index size for start to use mmap
382
382
383 * force_nodemap:
383 * force_nodemap:
384 force the usage of a "development" version of the nodemap code
384 force the usage of a "development" version of the nodemap code
385 """
385 """
386 mmapindexthreshold = None
386 mmapindexthreshold = None
387 opts = self.opener.options
387 opts = self.opener.options
388
388
389 if b'revlogv2' in opts:
389 if b'revlogv2' in opts:
390 new_header = REVLOGV2 | FLAG_INLINE_DATA
390 new_header = REVLOGV2 | FLAG_INLINE_DATA
391 elif b'revlogv1' in opts:
391 elif b'revlogv1' in opts:
392 new_header = REVLOGV1 | FLAG_INLINE_DATA
392 new_header = REVLOGV1 | FLAG_INLINE_DATA
393 if b'generaldelta' in opts:
393 if b'generaldelta' in opts:
394 new_header |= FLAG_GENERALDELTA
394 new_header |= FLAG_GENERALDELTA
395 elif b'revlogv0' in self.opener.options:
395 elif b'revlogv0' in self.opener.options:
396 new_header = REVLOGV0
396 new_header = REVLOGV0
397 else:
397 else:
398 new_header = REVLOG_DEFAULT_VERSION
398 new_header = REVLOG_DEFAULT_VERSION
399
399
400 if b'chunkcachesize' in opts:
400 if b'chunkcachesize' in opts:
401 self._chunkcachesize = opts[b'chunkcachesize']
401 self._chunkcachesize = opts[b'chunkcachesize']
402 if b'maxchainlen' in opts:
402 if b'maxchainlen' in opts:
403 self._maxchainlen = opts[b'maxchainlen']
403 self._maxchainlen = opts[b'maxchainlen']
404 if b'deltabothparents' in opts:
404 if b'deltabothparents' in opts:
405 self._deltabothparents = opts[b'deltabothparents']
405 self._deltabothparents = opts[b'deltabothparents']
406 self._lazydelta = bool(opts.get(b'lazydelta', True))
406 self._lazydelta = bool(opts.get(b'lazydelta', True))
407 self._lazydeltabase = False
407 self._lazydeltabase = False
408 if self._lazydelta:
408 if self._lazydelta:
409 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
409 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
410 if b'compengine' in opts:
410 if b'compengine' in opts:
411 self._compengine = opts[b'compengine']
411 self._compengine = opts[b'compengine']
412 if b'zlib.level' in opts:
412 if b'zlib.level' in opts:
413 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
413 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
414 if b'zstd.level' in opts:
414 if b'zstd.level' in opts:
415 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
415 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
416 if b'maxdeltachainspan' in opts:
416 if b'maxdeltachainspan' in opts:
417 self._maxdeltachainspan = opts[b'maxdeltachainspan']
417 self._maxdeltachainspan = opts[b'maxdeltachainspan']
418 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
418 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
419 mmapindexthreshold = opts[b'mmapindexthreshold']
419 mmapindexthreshold = opts[b'mmapindexthreshold']
420 self.hassidedata = bool(opts.get(b'side-data', False))
420 self.hassidedata = bool(opts.get(b'side-data', False))
421 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
421 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
422 withsparseread = bool(opts.get(b'with-sparse-read', False))
422 withsparseread = bool(opts.get(b'with-sparse-read', False))
423 # sparse-revlog forces sparse-read
423 # sparse-revlog forces sparse-read
424 self._withsparseread = self._sparserevlog or withsparseread
424 self._withsparseread = self._sparserevlog or withsparseread
425 if b'sparse-read-density-threshold' in opts:
425 if b'sparse-read-density-threshold' in opts:
426 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
426 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
427 if b'sparse-read-min-gap-size' in opts:
427 if b'sparse-read-min-gap-size' in opts:
428 self._srmingapsize = opts[b'sparse-read-min-gap-size']
428 self._srmingapsize = opts[b'sparse-read-min-gap-size']
429 if opts.get(b'enableellipsis'):
429 if opts.get(b'enableellipsis'):
430 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
430 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
431
431
432 # revlog v0 doesn't have flag processors
432 # revlog v0 doesn't have flag processors
433 for flag, processor in pycompat.iteritems(
433 for flag, processor in pycompat.iteritems(
434 opts.get(b'flagprocessors', {})
434 opts.get(b'flagprocessors', {})
435 ):
435 ):
436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
436 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
437
437
438 if self._chunkcachesize <= 0:
438 if self._chunkcachesize <= 0:
439 raise error.RevlogError(
439 raise error.RevlogError(
440 _(b'revlog chunk cache size %r is not greater than 0')
440 _(b'revlog chunk cache size %r is not greater than 0')
441 % self._chunkcachesize
441 % self._chunkcachesize
442 )
442 )
443 elif self._chunkcachesize & (self._chunkcachesize - 1):
443 elif self._chunkcachesize & (self._chunkcachesize - 1):
444 raise error.RevlogError(
444 raise error.RevlogError(
445 _(b'revlog chunk cache size %r is not a power of 2')
445 _(b'revlog chunk cache size %r is not a power of 2')
446 % self._chunkcachesize
446 % self._chunkcachesize
447 )
447 )
448 force_nodemap = opts.get(b'devel-force-nodemap', False)
448 force_nodemap = opts.get(b'devel-force-nodemap', False)
449 return new_header, mmapindexthreshold, force_nodemap
449 return new_header, mmapindexthreshold, force_nodemap
450
450
451 def _get_data(self, filepath, mmap_threshold):
451 def _get_data(self, filepath, mmap_threshold):
452 """return a file content with or without mmap
452 """return a file content with or without mmap
453
453
454 If the file is missing return the empty string"""
454 If the file is missing return the empty string"""
455 try:
455 try:
456 with self.opener(filepath) as fp:
456 with self.opener(filepath) as fp:
457 if mmap_threshold is not None:
457 if mmap_threshold is not None:
458 file_size = self.opener.fstat(fp).st_size
458 file_size = self.opener.fstat(fp).st_size
459 if file_size >= mmap_threshold:
459 if file_size >= mmap_threshold:
460 # TODO: should .close() to release resources without
460 # TODO: should .close() to release resources without
461 # relying on Python GC
461 # relying on Python GC
462 return util.buffer(util.mmapread(fp))
462 return util.buffer(util.mmapread(fp))
463 return fp.read()
463 return fp.read()
464 except IOError as inst:
464 except IOError as inst:
465 if inst.errno != errno.ENOENT:
465 if inst.errno != errno.ENOENT:
466 raise
466 raise
467 return b''
467 return b''
468
468
469 def _loadindex(self):
469 def _loadindex(self):
470
470
471 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
471 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
472
472
473 if self.postfix is None:
473 if self.postfix is None:
474 entry_point = b'%s.i' % self.radix
474 entry_point = b'%s.i' % self.radix
475 else:
475 else:
476 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
476 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
477
477
478 entry_data = b''
478 entry_data = b''
479 self._initempty = True
479 self._initempty = True
480 entry_data = self._get_data(entry_point, mmapindexthreshold)
480 entry_data = self._get_data(entry_point, mmapindexthreshold)
481 if len(entry_data) > 0:
481 if len(entry_data) > 0:
482 header = INDEX_HEADER.unpack(entry_data[:4])[0]
482 header = INDEX_HEADER.unpack(entry_data[:4])[0]
483 self._initempty = False
483 self._initempty = False
484 else:
484 else:
485 header = new_header
485 header = new_header
486
486
487 self._format_flags = header & ~0xFFFF
487 self._format_flags = header & ~0xFFFF
488 self._format_version = header & 0xFFFF
488 self._format_version = header & 0xFFFF
489
489
490 if self._format_version == REVLOGV0:
490 if self._format_version == REVLOGV0:
491 if self._format_flags:
491 if self._format_flags:
492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
492 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
493 display_flag = self._format_flags >> 16
493 display_flag = self._format_flags >> 16
494 msg %= (display_flag, self._format_version, self.display_id)
494 msg %= (display_flag, self._format_version, self.display_id)
495 raise error.RevlogError(msg)
495 raise error.RevlogError(msg)
496
496
497 self._inline = False
497 self._inline = False
498 self._generaldelta = False
498 self._generaldelta = False
499
499
500 elif self._format_version == REVLOGV1:
500 elif self._format_version == REVLOGV1:
501 if self._format_flags & ~REVLOGV1_FLAGS:
501 if self._format_flags & ~REVLOGV1_FLAGS:
502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
502 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
503 display_flag = self._format_flags >> 16
503 display_flag = self._format_flags >> 16
504 msg %= (display_flag, self._format_version, self.display_id)
504 msg %= (display_flag, self._format_version, self.display_id)
505 raise error.RevlogError(msg)
505 raise error.RevlogError(msg)
506
506
507 self._inline = self._format_flags & FLAG_INLINE_DATA
507 self._inline = self._format_flags & FLAG_INLINE_DATA
508 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
508 self._generaldelta = self._format_flags & FLAG_GENERALDELTA
509
509
510 elif self._format_version == REVLOGV2:
510 elif self._format_version == REVLOGV2:
511 if self._format_flags & ~REVLOGV2_FLAGS:
511 if self._format_flags & ~REVLOGV2_FLAGS:
512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
512 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
513 display_flag = self._format_flags >> 16
513 display_flag = self._format_flags >> 16
514 msg %= (display_flag, self._format_version, self.display_id)
514 msg %= (display_flag, self._format_version, self.display_id)
515 raise error.RevlogError(msg)
515 raise error.RevlogError(msg)
516
516
517 # There is a bug in the transaction handling when going from an
517 # There is a bug in the transaction handling when going from an
518 # inline revlog to a separate index and data file. Turn it off until
518 # inline revlog to a separate index and data file. Turn it off until
519 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
519 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
520 # See issue6485
520 # See issue6485
521 self._inline = False
521 self._inline = False
522 # generaldelta implied by version 2 revlogs.
522 # generaldelta implied by version 2 revlogs.
523 self._generaldelta = True
523 self._generaldelta = True
524
524
525 else:
525 else:
526 msg = _(b'unknown version (%d) in revlog %s')
526 msg = _(b'unknown version (%d) in revlog %s')
527 msg %= (self._format_version, self.display_id)
527 msg %= (self._format_version, self.display_id)
528 raise error.RevlogError(msg)
528 raise error.RevlogError(msg)
529
529
530 index_data = entry_data
530 index_data = entry_data
531 self._indexfile = entry_point
531 self._indexfile = entry_point
532
532
533 if self.postfix is None or self.postfix == b'a':
533 if self.postfix is None or self.postfix == b'a':
534 self._datafile = b'%s.d' % self.radix
534 self._datafile = b'%s.d' % self.radix
535 else:
535 else:
536 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
536 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
537
537
538 self.nodeconstants = sha1nodeconstants
538 self.nodeconstants = sha1nodeconstants
539 self.nullid = self.nodeconstants.nullid
539 self.nullid = self.nodeconstants.nullid
540
540
541 # sparse-revlog can't be on without general-delta (issue6056)
541 # sparse-revlog can't be on without general-delta (issue6056)
542 if not self._generaldelta:
542 if not self._generaldelta:
543 self._sparserevlog = False
543 self._sparserevlog = False
544
544
545 self._storedeltachains = True
545 self._storedeltachains = True
546
546
547 devel_nodemap = (
547 devel_nodemap = (
548 self._nodemap_file
548 self._nodemap_file
549 and force_nodemap
549 and force_nodemap
550 and parse_index_v1_nodemap is not None
550 and parse_index_v1_nodemap is not None
551 )
551 )
552
552
553 use_rust_index = False
553 use_rust_index = False
554 if rustrevlog is not None:
554 if rustrevlog is not None:
555 if self._nodemap_file is not None:
555 if self._nodemap_file is not None:
556 use_rust_index = True
556 use_rust_index = True
557 else:
557 else:
558 use_rust_index = self.opener.options.get(b'rust.index')
558 use_rust_index = self.opener.options.get(b'rust.index')
559
559
560 self._parse_index = parse_index_v1
560 self._parse_index = parse_index_v1
561 if self._format_version == REVLOGV0:
561 if self._format_version == REVLOGV0:
562 self._parse_index = revlogv0.parse_index_v0
562 self._parse_index = revlogv0.parse_index_v0
563 elif self._format_version == REVLOGV2:
563 elif self._format_version == REVLOGV2:
564 self._parse_index = parse_index_v2
564 self._parse_index = parse_index_v2
565 elif devel_nodemap:
565 elif devel_nodemap:
566 self._parse_index = parse_index_v1_nodemap
566 self._parse_index = parse_index_v1_nodemap
567 elif use_rust_index:
567 elif use_rust_index:
568 self._parse_index = parse_index_v1_mixed
568 self._parse_index = parse_index_v1_mixed
569 try:
569 try:
570 d = self._parse_index(index_data, self._inline)
570 d = self._parse_index(index_data, self._inline)
571 index, _chunkcache = d
571 index, _chunkcache = d
572 use_nodemap = (
572 use_nodemap = (
573 not self._inline
573 not self._inline
574 and self._nodemap_file is not None
574 and self._nodemap_file is not None
575 and util.safehasattr(index, 'update_nodemap_data')
575 and util.safehasattr(index, 'update_nodemap_data')
576 )
576 )
577 if use_nodemap:
577 if use_nodemap:
578 nodemap_data = nodemaputil.persisted_data(self)
578 nodemap_data = nodemaputil.persisted_data(self)
579 if nodemap_data is not None:
579 if nodemap_data is not None:
580 docket = nodemap_data[0]
580 docket = nodemap_data[0]
581 if (
581 if (
582 len(d[0]) > docket.tip_rev
582 len(d[0]) > docket.tip_rev
583 and d[0][docket.tip_rev][7] == docket.tip_node
583 and d[0][docket.tip_rev][7] == docket.tip_node
584 ):
584 ):
585 # no changelog tampering
585 # no changelog tampering
586 self._nodemap_docket = docket
586 self._nodemap_docket = docket
587 index.update_nodemap_data(*nodemap_data)
587 index.update_nodemap_data(*nodemap_data)
588 except (ValueError, IndexError):
588 except (ValueError, IndexError):
589 raise error.RevlogError(
589 raise error.RevlogError(
590 _(b"index %s is corrupted") % self.display_id
590 _(b"index %s is corrupted") % self.display_id
591 )
591 )
592 self.index, self._chunkcache = d
592 self.index, self._chunkcache = d
593 if not self._chunkcache:
593 if not self._chunkcache:
594 self._chunkclear()
594 self._chunkclear()
595 # revnum -> (chain-length, sum-delta-length)
595 # revnum -> (chain-length, sum-delta-length)
596 self._chaininfocache = util.lrucachedict(500)
596 self._chaininfocache = util.lrucachedict(500)
597 # revlog header -> revlog compressor
597 # revlog header -> revlog compressor
598 self._decompressors = {}
598 self._decompressors = {}
599
599
600 @util.propertycache
600 @util.propertycache
601 def revlog_kind(self):
601 def revlog_kind(self):
602 return self.target[0]
602 return self.target[0]
603
603
604 @util.propertycache
604 @util.propertycache
605 def display_id(self):
605 def display_id(self):
606 """The public facing "ID" of the revlog that we use in message"""
606 """The public facing "ID" of the revlog that we use in message"""
607 # Maybe we should build a user facing representation of
607 # Maybe we should build a user facing representation of
608 # revlog.target instead of using `self.radix`
608 # revlog.target instead of using `self.radix`
609 return self.radix
609 return self.radix
610
610
611 @util.propertycache
611 @util.propertycache
612 def _compressor(self):
612 def _compressor(self):
613 engine = util.compengines[self._compengine]
613 engine = util.compengines[self._compengine]
614 return engine.revlogcompressor(self._compengineopts)
614 return engine.revlogcompressor(self._compengineopts)
615
615
616 def _indexfp(self, mode=b'r'):
616 def _indexfp(self, mode=b'r'):
617 """file object for the revlog's index file"""
617 """file object for the revlog's index file"""
618 args = {'mode': mode}
618 args = {'mode': mode}
619 if mode != b'r':
619 if mode != b'r':
620 args['checkambig'] = self._checkambig
620 args['checkambig'] = self._checkambig
621 if mode == b'w':
621 if mode == b'w':
622 args['atomictemp'] = True
622 args['atomictemp'] = True
623 return self.opener(self._indexfile, **args)
623 return self.opener(self._indexfile, **args)
624
624
625 def _datafp(self, mode=b'r'):
625 def _datafp(self, mode=b'r'):
626 """file object for the revlog's data file"""
626 """file object for the revlog's data file"""
627 return self.opener(self._datafile, mode=mode)
627 return self.opener(self._datafile, mode=mode)
628
628
629 @contextlib.contextmanager
629 @contextlib.contextmanager
630 def _datareadfp(self, existingfp=None):
630 def _datareadfp(self, existingfp=None):
631 """file object suitable to read data"""
631 """file object suitable to read data"""
632 # Use explicit file handle, if given.
632 # Use explicit file handle, if given.
633 if existingfp is not None:
633 if existingfp is not None:
634 yield existingfp
634 yield existingfp
635
635
636 # Use a file handle being actively used for writes, if available.
636 # Use a file handle being actively used for writes, if available.
637 # There is some danger to doing this because reads will seek the
637 # There is some danger to doing this because reads will seek the
638 # file. However, _writeentry() performs a SEEK_END before all writes,
638 # file. However, _writeentry() performs a SEEK_END before all writes,
639 # so we should be safe.
639 # so we should be safe.
640 elif self._writinghandles:
640 elif self._writinghandles:
641 if self._inline:
641 if self._inline:
642 yield self._writinghandles[0]
642 yield self._writinghandles[0]
643 else:
643 else:
644 yield self._writinghandles[1]
644 yield self._writinghandles[1]
645
645
646 # Otherwise open a new file handle.
646 # Otherwise open a new file handle.
647 else:
647 else:
648 if self._inline:
648 if self._inline:
649 func = self._indexfp
649 func = self._indexfp
650 else:
650 else:
651 func = self._datafp
651 func = self._datafp
652 with func() as fp:
652 with func() as fp:
653 yield fp
653 yield fp
654
654
655 def tiprev(self):
655 def tiprev(self):
656 return len(self.index) - 1
656 return len(self.index) - 1
657
657
658 def tip(self):
658 def tip(self):
659 return self.node(self.tiprev())
659 return self.node(self.tiprev())
660
660
661 def __contains__(self, rev):
661 def __contains__(self, rev):
662 return 0 <= rev < len(self)
662 return 0 <= rev < len(self)
663
663
664 def __len__(self):
664 def __len__(self):
665 return len(self.index)
665 return len(self.index)
666
666
667 def __iter__(self):
667 def __iter__(self):
668 return iter(pycompat.xrange(len(self)))
668 return iter(pycompat.xrange(len(self)))
669
669
670 def revs(self, start=0, stop=None):
670 def revs(self, start=0, stop=None):
671 """iterate over all rev in this revlog (from start to stop)"""
671 """iterate over all rev in this revlog (from start to stop)"""
672 return storageutil.iterrevs(len(self), start=start, stop=stop)
672 return storageutil.iterrevs(len(self), start=start, stop=stop)
673
673
674 @property
674 @property
675 def nodemap(self):
675 def nodemap(self):
676 msg = (
676 msg = (
677 b"revlog.nodemap is deprecated, "
677 b"revlog.nodemap is deprecated, "
678 b"use revlog.index.[has_node|rev|get_rev]"
678 b"use revlog.index.[has_node|rev|get_rev]"
679 )
679 )
680 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
680 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
681 return self.index.nodemap
681 return self.index.nodemap
682
682
683 @property
683 @property
684 def _nodecache(self):
684 def _nodecache(self):
685 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
685 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
686 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
686 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
687 return self.index.nodemap
687 return self.index.nodemap
688
688
689 def hasnode(self, node):
689 def hasnode(self, node):
690 try:
690 try:
691 self.rev(node)
691 self.rev(node)
692 return True
692 return True
693 except KeyError:
693 except KeyError:
694 return False
694 return False
695
695
696 def candelta(self, baserev, rev):
696 def candelta(self, baserev, rev):
697 """whether two revisions (baserev, rev) can be delta-ed or not"""
697 """whether two revisions (baserev, rev) can be delta-ed or not"""
698 # Disable delta if either rev requires a content-changing flag
698 # Disable delta if either rev requires a content-changing flag
699 # processor (ex. LFS). This is because such flag processor can alter
699 # processor (ex. LFS). This is because such flag processor can alter
700 # the rawtext content that the delta will be based on, and two clients
700 # the rawtext content that the delta will be based on, and two clients
701 # could have a same revlog node with different flags (i.e. different
701 # could have a same revlog node with different flags (i.e. different
702 # rawtext contents) and the delta could be incompatible.
702 # rawtext contents) and the delta could be incompatible.
703 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
703 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
704 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
704 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
705 ):
705 ):
706 return False
706 return False
707 return True
707 return True
708
708
709 def update_caches(self, transaction):
709 def update_caches(self, transaction):
710 if self._nodemap_file is not None:
710 if self._nodemap_file is not None:
711 if transaction is None:
711 if transaction is None:
712 nodemaputil.update_persistent_nodemap(self)
712 nodemaputil.update_persistent_nodemap(self)
713 else:
713 else:
714 nodemaputil.setup_persistent_nodemap(transaction, self)
714 nodemaputil.setup_persistent_nodemap(transaction, self)
715
715
716 def clearcaches(self):
716 def clearcaches(self):
717 self._revisioncache = None
717 self._revisioncache = None
718 self._chainbasecache.clear()
718 self._chainbasecache.clear()
719 self._chunkcache = (0, b'')
719 self._chunkcache = (0, b'')
720 self._pcache = {}
720 self._pcache = {}
721 self._nodemap_docket = None
721 self._nodemap_docket = None
722 self.index.clearcaches()
722 self.index.clearcaches()
723 # The python code is the one responsible for validating the docket, we
723 # The python code is the one responsible for validating the docket, we
724 # end up having to refresh it here.
724 # end up having to refresh it here.
725 use_nodemap = (
725 use_nodemap = (
726 not self._inline
726 not self._inline
727 and self._nodemap_file is not None
727 and self._nodemap_file is not None
728 and util.safehasattr(self.index, 'update_nodemap_data')
728 and util.safehasattr(self.index, 'update_nodemap_data')
729 )
729 )
730 if use_nodemap:
730 if use_nodemap:
731 nodemap_data = nodemaputil.persisted_data(self)
731 nodemap_data = nodemaputil.persisted_data(self)
732 if nodemap_data is not None:
732 if nodemap_data is not None:
733 self._nodemap_docket = nodemap_data[0]
733 self._nodemap_docket = nodemap_data[0]
734 self.index.update_nodemap_data(*nodemap_data)
734 self.index.update_nodemap_data(*nodemap_data)
735
735
736 def rev(self, node):
736 def rev(self, node):
737 try:
737 try:
738 return self.index.rev(node)
738 return self.index.rev(node)
739 except TypeError:
739 except TypeError:
740 raise
740 raise
741 except error.RevlogError:
741 except error.RevlogError:
742 # parsers.c radix tree lookup failed
742 # parsers.c radix tree lookup failed
743 if (
743 if (
744 node == self.nodeconstants.wdirid
744 node == self.nodeconstants.wdirid
745 or node in self.nodeconstants.wdirfilenodeids
745 or node in self.nodeconstants.wdirfilenodeids
746 ):
746 ):
747 raise error.WdirUnsupported
747 raise error.WdirUnsupported
748 raise error.LookupError(node, self.display_id, _(b'no node'))
748 raise error.LookupError(node, self.display_id, _(b'no node'))
749
749
750 # Accessors for index entries.
750 # Accessors for index entries.
751
751
752 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
752 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
753 # are flags.
753 # are flags.
754 def start(self, rev):
754 def start(self, rev):
755 return int(self.index[rev][0] >> 16)
755 return int(self.index[rev][0] >> 16)
756
756
757 def flags(self, rev):
757 def flags(self, rev):
758 return self.index[rev][0] & 0xFFFF
758 return self.index[rev][0] & 0xFFFF
759
759
760 def length(self, rev):
760 def length(self, rev):
761 return self.index[rev][1]
761 return self.index[rev][1]
762
762
763 def sidedata_length(self, rev):
763 def sidedata_length(self, rev):
764 if not self.hassidedata:
764 if not self.hassidedata:
765 return 0
765 return 0
766 return self.index[rev][9]
766 return self.index[rev][9]
767
767
768 def rawsize(self, rev):
768 def rawsize(self, rev):
769 """return the length of the uncompressed text for a given revision"""
769 """return the length of the uncompressed text for a given revision"""
770 l = self.index[rev][2]
770 l = self.index[rev][2]
771 if l >= 0:
771 if l >= 0:
772 return l
772 return l
773
773
774 t = self.rawdata(rev)
774 t = self.rawdata(rev)
775 return len(t)
775 return len(t)
776
776
777 def size(self, rev):
777 def size(self, rev):
778 """length of non-raw text (processed by a "read" flag processor)"""
778 """length of non-raw text (processed by a "read" flag processor)"""
779 # fast path: if no "read" flag processor could change the content,
779 # fast path: if no "read" flag processor could change the content,
780 # size is rawsize. note: ELLIPSIS is known to not change the content.
780 # size is rawsize. note: ELLIPSIS is known to not change the content.
781 flags = self.flags(rev)
781 flags = self.flags(rev)
782 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
782 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
783 return self.rawsize(rev)
783 return self.rawsize(rev)
784
784
785 return len(self.revision(rev, raw=False))
785 return len(self.revision(rev, raw=False))
786
786
787 def chainbase(self, rev):
787 def chainbase(self, rev):
788 base = self._chainbasecache.get(rev)
788 base = self._chainbasecache.get(rev)
789 if base is not None:
789 if base is not None:
790 return base
790 return base
791
791
792 index = self.index
792 index = self.index
793 iterrev = rev
793 iterrev = rev
794 base = index[iterrev][3]
794 base = index[iterrev][3]
795 while base != iterrev:
795 while base != iterrev:
796 iterrev = base
796 iterrev = base
797 base = index[iterrev][3]
797 base = index[iterrev][3]
798
798
799 self._chainbasecache[rev] = base
799 self._chainbasecache[rev] = base
800 return base
800 return base
801
801
802 def linkrev(self, rev):
802 def linkrev(self, rev):
803 return self.index[rev][4]
803 return self.index[rev][4]
804
804
805 def parentrevs(self, rev):
805 def parentrevs(self, rev):
806 try:
806 try:
807 entry = self.index[rev]
807 entry = self.index[rev]
808 except IndexError:
808 except IndexError:
809 if rev == wdirrev:
809 if rev == wdirrev:
810 raise error.WdirUnsupported
810 raise error.WdirUnsupported
811 raise
811 raise
812 if entry[5] == nullrev:
812 if entry[5] == nullrev:
813 return entry[6], entry[5]
813 return entry[6], entry[5]
814 else:
814 else:
815 return entry[5], entry[6]
815 return entry[5], entry[6]
816
816
817 # fast parentrevs(rev) where rev isn't filtered
817 # fast parentrevs(rev) where rev isn't filtered
818 _uncheckedparentrevs = parentrevs
818 _uncheckedparentrevs = parentrevs
819
819
820 def node(self, rev):
820 def node(self, rev):
821 try:
821 try:
822 return self.index[rev][7]
822 return self.index[rev][7]
823 except IndexError:
823 except IndexError:
824 if rev == wdirrev:
824 if rev == wdirrev:
825 raise error.WdirUnsupported
825 raise error.WdirUnsupported
826 raise
826 raise
827
827
828 # Derived from index values.
828 # Derived from index values.
829
829
830 def end(self, rev):
830 def end(self, rev):
831 return self.start(rev) + self.length(rev)
831 return self.start(rev) + self.length(rev)
832
832
833 def parents(self, node):
833 def parents(self, node):
834 i = self.index
834 i = self.index
835 d = i[self.rev(node)]
835 d = i[self.rev(node)]
836 # inline node() to avoid function call overhead
836 # inline node() to avoid function call overhead
837 if d[5] == self.nullid:
837 if d[5] == self.nullid:
838 return i[d[6]][7], i[d[5]][7]
838 return i[d[6]][7], i[d[5]][7]
839 else:
839 else:
840 return i[d[5]][7], i[d[6]][7]
840 return i[d[5]][7], i[d[6]][7]
841
841
842 def chainlen(self, rev):
842 def chainlen(self, rev):
843 return self._chaininfo(rev)[0]
843 return self._chaininfo(rev)[0]
844
844
845 def _chaininfo(self, rev):
845 def _chaininfo(self, rev):
846 chaininfocache = self._chaininfocache
846 chaininfocache = self._chaininfocache
847 if rev in chaininfocache:
847 if rev in chaininfocache:
848 return chaininfocache[rev]
848 return chaininfocache[rev]
849 index = self.index
849 index = self.index
850 generaldelta = self._generaldelta
850 generaldelta = self._generaldelta
851 iterrev = rev
851 iterrev = rev
852 e = index[iterrev]
852 e = index[iterrev]
853 clen = 0
853 clen = 0
854 compresseddeltalen = 0
854 compresseddeltalen = 0
855 while iterrev != e[3]:
855 while iterrev != e[3]:
856 clen += 1
856 clen += 1
857 compresseddeltalen += e[1]
857 compresseddeltalen += e[1]
858 if generaldelta:
858 if generaldelta:
859 iterrev = e[3]
859 iterrev = e[3]
860 else:
860 else:
861 iterrev -= 1
861 iterrev -= 1
862 if iterrev in chaininfocache:
862 if iterrev in chaininfocache:
863 t = chaininfocache[iterrev]
863 t = chaininfocache[iterrev]
864 clen += t[0]
864 clen += t[0]
865 compresseddeltalen += t[1]
865 compresseddeltalen += t[1]
866 break
866 break
867 e = index[iterrev]
867 e = index[iterrev]
868 else:
868 else:
869 # Add text length of base since decompressing that also takes
869 # Add text length of base since decompressing that also takes
870 # work. For cache hits the length is already included.
870 # work. For cache hits the length is already included.
871 compresseddeltalen += e[1]
871 compresseddeltalen += e[1]
872 r = (clen, compresseddeltalen)
872 r = (clen, compresseddeltalen)
873 chaininfocache[rev] = r
873 chaininfocache[rev] = r
874 return r
874 return r
875
875
876 def _deltachain(self, rev, stoprev=None):
876 def _deltachain(self, rev, stoprev=None):
877 """Obtain the delta chain for a revision.
877 """Obtain the delta chain for a revision.
878
878
879 ``stoprev`` specifies a revision to stop at. If not specified, we
879 ``stoprev`` specifies a revision to stop at. If not specified, we
880 stop at the base of the chain.
880 stop at the base of the chain.
881
881
882 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
882 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
883 revs in ascending order and ``stopped`` is a bool indicating whether
883 revs in ascending order and ``stopped`` is a bool indicating whether
884 ``stoprev`` was hit.
884 ``stoprev`` was hit.
885 """
885 """
886 # Try C implementation.
886 # Try C implementation.
887 try:
887 try:
888 return self.index.deltachain(rev, stoprev, self._generaldelta)
888 return self.index.deltachain(rev, stoprev, self._generaldelta)
889 except AttributeError:
889 except AttributeError:
890 pass
890 pass
891
891
892 chain = []
892 chain = []
893
893
894 # Alias to prevent attribute lookup in tight loop.
894 # Alias to prevent attribute lookup in tight loop.
895 index = self.index
895 index = self.index
896 generaldelta = self._generaldelta
896 generaldelta = self._generaldelta
897
897
898 iterrev = rev
898 iterrev = rev
899 e = index[iterrev]
899 e = index[iterrev]
900 while iterrev != e[3] and iterrev != stoprev:
900 while iterrev != e[3] and iterrev != stoprev:
901 chain.append(iterrev)
901 chain.append(iterrev)
902 if generaldelta:
902 if generaldelta:
903 iterrev = e[3]
903 iterrev = e[3]
904 else:
904 else:
905 iterrev -= 1
905 iterrev -= 1
906 e = index[iterrev]
906 e = index[iterrev]
907
907
908 if iterrev == stoprev:
908 if iterrev == stoprev:
909 stopped = True
909 stopped = True
910 else:
910 else:
911 chain.append(iterrev)
911 chain.append(iterrev)
912 stopped = False
912 stopped = False
913
913
914 chain.reverse()
914 chain.reverse()
915 return chain, stopped
915 return chain, stopped
916
916
917 def ancestors(self, revs, stoprev=0, inclusive=False):
917 def ancestors(self, revs, stoprev=0, inclusive=False):
918 """Generate the ancestors of 'revs' in reverse revision order.
918 """Generate the ancestors of 'revs' in reverse revision order.
919 Does not generate revs lower than stoprev.
919 Does not generate revs lower than stoprev.
920
920
921 See the documentation for ancestor.lazyancestors for more details."""
921 See the documentation for ancestor.lazyancestors for more details."""
922
922
923 # first, make sure start revisions aren't filtered
923 # first, make sure start revisions aren't filtered
924 revs = list(revs)
924 revs = list(revs)
925 checkrev = self.node
925 checkrev = self.node
926 for r in revs:
926 for r in revs:
927 checkrev(r)
927 checkrev(r)
928 # and we're sure ancestors aren't filtered as well
928 # and we're sure ancestors aren't filtered as well
929
929
930 if rustancestor is not None:
930 if rustancestor is not None:
931 lazyancestors = rustancestor.LazyAncestors
931 lazyancestors = rustancestor.LazyAncestors
932 arg = self.index
932 arg = self.index
933 else:
933 else:
934 lazyancestors = ancestor.lazyancestors
934 lazyancestors = ancestor.lazyancestors
935 arg = self._uncheckedparentrevs
935 arg = self._uncheckedparentrevs
936 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
936 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
937
937
938 def descendants(self, revs):
938 def descendants(self, revs):
939 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
939 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
940
940
941 def findcommonmissing(self, common=None, heads=None):
941 def findcommonmissing(self, common=None, heads=None):
942 """Return a tuple of the ancestors of common and the ancestors of heads
942 """Return a tuple of the ancestors of common and the ancestors of heads
943 that are not ancestors of common. In revset terminology, we return the
943 that are not ancestors of common. In revset terminology, we return the
944 tuple:
944 tuple:
945
945
946 ::common, (::heads) - (::common)
946 ::common, (::heads) - (::common)
947
947
948 The list is sorted by revision number, meaning it is
948 The list is sorted by revision number, meaning it is
949 topologically sorted.
949 topologically sorted.
950
950
951 'heads' and 'common' are both lists of node IDs. If heads is
951 'heads' and 'common' are both lists of node IDs. If heads is
952 not supplied, uses all of the revlog's heads. If common is not
952 not supplied, uses all of the revlog's heads. If common is not
953 supplied, uses nullid."""
953 supplied, uses nullid."""
954 if common is None:
954 if common is None:
955 common = [self.nullid]
955 common = [self.nullid]
956 if heads is None:
956 if heads is None:
957 heads = self.heads()
957 heads = self.heads()
958
958
959 common = [self.rev(n) for n in common]
959 common = [self.rev(n) for n in common]
960 heads = [self.rev(n) for n in heads]
960 heads = [self.rev(n) for n in heads]
961
961
962 # we want the ancestors, but inclusive
962 # we want the ancestors, but inclusive
963 class lazyset(object):
963 class lazyset(object):
964 def __init__(self, lazyvalues):
964 def __init__(self, lazyvalues):
965 self.addedvalues = set()
965 self.addedvalues = set()
966 self.lazyvalues = lazyvalues
966 self.lazyvalues = lazyvalues
967
967
968 def __contains__(self, value):
968 def __contains__(self, value):
969 return value in self.addedvalues or value in self.lazyvalues
969 return value in self.addedvalues or value in self.lazyvalues
970
970
971 def __iter__(self):
971 def __iter__(self):
972 added = self.addedvalues
972 added = self.addedvalues
973 for r in added:
973 for r in added:
974 yield r
974 yield r
975 for r in self.lazyvalues:
975 for r in self.lazyvalues:
976 if not r in added:
976 if not r in added:
977 yield r
977 yield r
978
978
979 def add(self, value):
979 def add(self, value):
980 self.addedvalues.add(value)
980 self.addedvalues.add(value)
981
981
982 def update(self, values):
982 def update(self, values):
983 self.addedvalues.update(values)
983 self.addedvalues.update(values)
984
984
985 has = lazyset(self.ancestors(common))
985 has = lazyset(self.ancestors(common))
986 has.add(nullrev)
986 has.add(nullrev)
987 has.update(common)
987 has.update(common)
988
988
989 # take all ancestors from heads that aren't in has
989 # take all ancestors from heads that aren't in has
990 missing = set()
990 missing = set()
991 visit = collections.deque(r for r in heads if r not in has)
991 visit = collections.deque(r for r in heads if r not in has)
992 while visit:
992 while visit:
993 r = visit.popleft()
993 r = visit.popleft()
994 if r in missing:
994 if r in missing:
995 continue
995 continue
996 else:
996 else:
997 missing.add(r)
997 missing.add(r)
998 for p in self.parentrevs(r):
998 for p in self.parentrevs(r):
999 if p not in has:
999 if p not in has:
1000 visit.append(p)
1000 visit.append(p)
1001 missing = list(missing)
1001 missing = list(missing)
1002 missing.sort()
1002 missing.sort()
1003 return has, [self.node(miss) for miss in missing]
1003 return has, [self.node(miss) for miss in missing]
1004
1004
1005 def incrementalmissingrevs(self, common=None):
1005 def incrementalmissingrevs(self, common=None):
1006 """Return an object that can be used to incrementally compute the
1006 """Return an object that can be used to incrementally compute the
1007 revision numbers of the ancestors of arbitrary sets that are not
1007 revision numbers of the ancestors of arbitrary sets that are not
1008 ancestors of common. This is an ancestor.incrementalmissingancestors
1008 ancestors of common. This is an ancestor.incrementalmissingancestors
1009 object.
1009 object.
1010
1010
1011 'common' is a list of revision numbers. If common is not supplied, uses
1011 'common' is a list of revision numbers. If common is not supplied, uses
1012 nullrev.
1012 nullrev.
1013 """
1013 """
1014 if common is None:
1014 if common is None:
1015 common = [nullrev]
1015 common = [nullrev]
1016
1016
1017 if rustancestor is not None:
1017 if rustancestor is not None:
1018 return rustancestor.MissingAncestors(self.index, common)
1018 return rustancestor.MissingAncestors(self.index, common)
1019 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1019 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1020
1020
1021 def findmissingrevs(self, common=None, heads=None):
1021 def findmissingrevs(self, common=None, heads=None):
1022 """Return the revision numbers of the ancestors of heads that
1022 """Return the revision numbers of the ancestors of heads that
1023 are not ancestors of common.
1023 are not ancestors of common.
1024
1024
1025 More specifically, return a list of revision numbers corresponding to
1025 More specifically, return a list of revision numbers corresponding to
1026 nodes N such that every N satisfies the following constraints:
1026 nodes N such that every N satisfies the following constraints:
1027
1027
1028 1. N is an ancestor of some node in 'heads'
1028 1. N is an ancestor of some node in 'heads'
1029 2. N is not an ancestor of any node in 'common'
1029 2. N is not an ancestor of any node in 'common'
1030
1030
1031 The list is sorted by revision number, meaning it is
1031 The list is sorted by revision number, meaning it is
1032 topologically sorted.
1032 topologically sorted.
1033
1033
1034 'heads' and 'common' are both lists of revision numbers. If heads is
1034 'heads' and 'common' are both lists of revision numbers. If heads is
1035 not supplied, uses all of the revlog's heads. If common is not
1035 not supplied, uses all of the revlog's heads. If common is not
1036 supplied, uses nullid."""
1036 supplied, uses nullid."""
1037 if common is None:
1037 if common is None:
1038 common = [nullrev]
1038 common = [nullrev]
1039 if heads is None:
1039 if heads is None:
1040 heads = self.headrevs()
1040 heads = self.headrevs()
1041
1041
1042 inc = self.incrementalmissingrevs(common=common)
1042 inc = self.incrementalmissingrevs(common=common)
1043 return inc.missingancestors(heads)
1043 return inc.missingancestors(heads)
1044
1044
1045 def findmissing(self, common=None, heads=None):
1045 def findmissing(self, common=None, heads=None):
1046 """Return the ancestors of heads that are not ancestors of common.
1046 """Return the ancestors of heads that are not ancestors of common.
1047
1047
1048 More specifically, return a list of nodes N such that every N
1048 More specifically, return a list of nodes N such that every N
1049 satisfies the following constraints:
1049 satisfies the following constraints:
1050
1050
1051 1. N is an ancestor of some node in 'heads'
1051 1. N is an ancestor of some node in 'heads'
1052 2. N is not an ancestor of any node in 'common'
1052 2. N is not an ancestor of any node in 'common'
1053
1053
1054 The list is sorted by revision number, meaning it is
1054 The list is sorted by revision number, meaning it is
1055 topologically sorted.
1055 topologically sorted.
1056
1056
1057 'heads' and 'common' are both lists of node IDs. If heads is
1057 'heads' and 'common' are both lists of node IDs. If heads is
1058 not supplied, uses all of the revlog's heads. If common is not
1058 not supplied, uses all of the revlog's heads. If common is not
1059 supplied, uses nullid."""
1059 supplied, uses nullid."""
1060 if common is None:
1060 if common is None:
1061 common = [self.nullid]
1061 common = [self.nullid]
1062 if heads is None:
1062 if heads is None:
1063 heads = self.heads()
1063 heads = self.heads()
1064
1064
1065 common = [self.rev(n) for n in common]
1065 common = [self.rev(n) for n in common]
1066 heads = [self.rev(n) for n in heads]
1066 heads = [self.rev(n) for n in heads]
1067
1067
1068 inc = self.incrementalmissingrevs(common=common)
1068 inc = self.incrementalmissingrevs(common=common)
1069 return [self.node(r) for r in inc.missingancestors(heads)]
1069 return [self.node(r) for r in inc.missingancestors(heads)]
1070
1070
1071 def nodesbetween(self, roots=None, heads=None):
1071 def nodesbetween(self, roots=None, heads=None):
1072 """Return a topological path from 'roots' to 'heads'.
1072 """Return a topological path from 'roots' to 'heads'.
1073
1073
1074 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1074 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1075 topologically sorted list of all nodes N that satisfy both of
1075 topologically sorted list of all nodes N that satisfy both of
1076 these constraints:
1076 these constraints:
1077
1077
1078 1. N is a descendant of some node in 'roots'
1078 1. N is a descendant of some node in 'roots'
1079 2. N is an ancestor of some node in 'heads'
1079 2. N is an ancestor of some node in 'heads'
1080
1080
1081 Every node is considered to be both a descendant and an ancestor
1081 Every node is considered to be both a descendant and an ancestor
1082 of itself, so every reachable node in 'roots' and 'heads' will be
1082 of itself, so every reachable node in 'roots' and 'heads' will be
1083 included in 'nodes'.
1083 included in 'nodes'.
1084
1084
1085 'outroots' is the list of reachable nodes in 'roots', i.e., the
1085 'outroots' is the list of reachable nodes in 'roots', i.e., the
1086 subset of 'roots' that is returned in 'nodes'. Likewise,
1086 subset of 'roots' that is returned in 'nodes'. Likewise,
1087 'outheads' is the subset of 'heads' that is also in 'nodes'.
1087 'outheads' is the subset of 'heads' that is also in 'nodes'.
1088
1088
1089 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1089 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1090 unspecified, uses nullid as the only root. If 'heads' is
1090 unspecified, uses nullid as the only root. If 'heads' is
1091 unspecified, uses list of all of the revlog's heads."""
1091 unspecified, uses list of all of the revlog's heads."""
1092 nonodes = ([], [], [])
1092 nonodes = ([], [], [])
1093 if roots is not None:
1093 if roots is not None:
1094 roots = list(roots)
1094 roots = list(roots)
1095 if not roots:
1095 if not roots:
1096 return nonodes
1096 return nonodes
1097 lowestrev = min([self.rev(n) for n in roots])
1097 lowestrev = min([self.rev(n) for n in roots])
1098 else:
1098 else:
1099 roots = [self.nullid] # Everybody's a descendant of nullid
1099 roots = [self.nullid] # Everybody's a descendant of nullid
1100 lowestrev = nullrev
1100 lowestrev = nullrev
1101 if (lowestrev == nullrev) and (heads is None):
1101 if (lowestrev == nullrev) and (heads is None):
1102 # We want _all_ the nodes!
1102 # We want _all_ the nodes!
1103 return (
1103 return (
1104 [self.node(r) for r in self],
1104 [self.node(r) for r in self],
1105 [self.nullid],
1105 [self.nullid],
1106 list(self.heads()),
1106 list(self.heads()),
1107 )
1107 )
1108 if heads is None:
1108 if heads is None:
1109 # All nodes are ancestors, so the latest ancestor is the last
1109 # All nodes are ancestors, so the latest ancestor is the last
1110 # node.
1110 # node.
1111 highestrev = len(self) - 1
1111 highestrev = len(self) - 1
1112 # Set ancestors to None to signal that every node is an ancestor.
1112 # Set ancestors to None to signal that every node is an ancestor.
1113 ancestors = None
1113 ancestors = None
1114 # Set heads to an empty dictionary for later discovery of heads
1114 # Set heads to an empty dictionary for later discovery of heads
1115 heads = {}
1115 heads = {}
1116 else:
1116 else:
1117 heads = list(heads)
1117 heads = list(heads)
1118 if not heads:
1118 if not heads:
1119 return nonodes
1119 return nonodes
1120 ancestors = set()
1120 ancestors = set()
1121 # Turn heads into a dictionary so we can remove 'fake' heads.
1121 # Turn heads into a dictionary so we can remove 'fake' heads.
1122 # Also, later we will be using it to filter out the heads we can't
1122 # Also, later we will be using it to filter out the heads we can't
1123 # find from roots.
1123 # find from roots.
1124 heads = dict.fromkeys(heads, False)
1124 heads = dict.fromkeys(heads, False)
1125 # Start at the top and keep marking parents until we're done.
1125 # Start at the top and keep marking parents until we're done.
1126 nodestotag = set(heads)
1126 nodestotag = set(heads)
1127 # Remember where the top was so we can use it as a limit later.
1127 # Remember where the top was so we can use it as a limit later.
1128 highestrev = max([self.rev(n) for n in nodestotag])
1128 highestrev = max([self.rev(n) for n in nodestotag])
1129 while nodestotag:
1129 while nodestotag:
1130 # grab a node to tag
1130 # grab a node to tag
1131 n = nodestotag.pop()
1131 n = nodestotag.pop()
1132 # Never tag nullid
1132 # Never tag nullid
1133 if n == self.nullid:
1133 if n == self.nullid:
1134 continue
1134 continue
1135 # A node's revision number represents its place in a
1135 # A node's revision number represents its place in a
1136 # topologically sorted list of nodes.
1136 # topologically sorted list of nodes.
1137 r = self.rev(n)
1137 r = self.rev(n)
1138 if r >= lowestrev:
1138 if r >= lowestrev:
1139 if n not in ancestors:
1139 if n not in ancestors:
1140 # If we are possibly a descendant of one of the roots
1140 # If we are possibly a descendant of one of the roots
1141 # and we haven't already been marked as an ancestor
1141 # and we haven't already been marked as an ancestor
1142 ancestors.add(n) # Mark as ancestor
1142 ancestors.add(n) # Mark as ancestor
1143 # Add non-nullid parents to list of nodes to tag.
1143 # Add non-nullid parents to list of nodes to tag.
1144 nodestotag.update(
1144 nodestotag.update(
1145 [p for p in self.parents(n) if p != self.nullid]
1145 [p for p in self.parents(n) if p != self.nullid]
1146 )
1146 )
1147 elif n in heads: # We've seen it before, is it a fake head?
1147 elif n in heads: # We've seen it before, is it a fake head?
1148 # So it is, real heads should not be the ancestors of
1148 # So it is, real heads should not be the ancestors of
1149 # any other heads.
1149 # any other heads.
1150 heads.pop(n)
1150 heads.pop(n)
1151 if not ancestors:
1151 if not ancestors:
1152 return nonodes
1152 return nonodes
1153 # Now that we have our set of ancestors, we want to remove any
1153 # Now that we have our set of ancestors, we want to remove any
1154 # roots that are not ancestors.
1154 # roots that are not ancestors.
1155
1155
1156 # If one of the roots was nullid, everything is included anyway.
1156 # If one of the roots was nullid, everything is included anyway.
1157 if lowestrev > nullrev:
1157 if lowestrev > nullrev:
1158 # But, since we weren't, let's recompute the lowest rev to not
1158 # But, since we weren't, let's recompute the lowest rev to not
1159 # include roots that aren't ancestors.
1159 # include roots that aren't ancestors.
1160
1160
1161 # Filter out roots that aren't ancestors of heads
1161 # Filter out roots that aren't ancestors of heads
1162 roots = [root for root in roots if root in ancestors]
1162 roots = [root for root in roots if root in ancestors]
1163 # Recompute the lowest revision
1163 # Recompute the lowest revision
1164 if roots:
1164 if roots:
1165 lowestrev = min([self.rev(root) for root in roots])
1165 lowestrev = min([self.rev(root) for root in roots])
1166 else:
1166 else:
1167 # No more roots? Return empty list
1167 # No more roots? Return empty list
1168 return nonodes
1168 return nonodes
1169 else:
1169 else:
1170 # We are descending from nullid, and don't need to care about
1170 # We are descending from nullid, and don't need to care about
1171 # any other roots.
1171 # any other roots.
1172 lowestrev = nullrev
1172 lowestrev = nullrev
1173 roots = [self.nullid]
1173 roots = [self.nullid]
1174 # Transform our roots list into a set.
1174 # Transform our roots list into a set.
1175 descendants = set(roots)
1175 descendants = set(roots)
1176 # Also, keep the original roots so we can filter out roots that aren't
1176 # Also, keep the original roots so we can filter out roots that aren't
1177 # 'real' roots (i.e. are descended from other roots).
1177 # 'real' roots (i.e. are descended from other roots).
1178 roots = descendants.copy()
1178 roots = descendants.copy()
1179 # Our topologically sorted list of output nodes.
1179 # Our topologically sorted list of output nodes.
1180 orderedout = []
1180 orderedout = []
1181 # Don't start at nullid since we don't want nullid in our output list,
1181 # Don't start at nullid since we don't want nullid in our output list,
1182 # and if nullid shows up in descendants, empty parents will look like
1182 # and if nullid shows up in descendants, empty parents will look like
1183 # they're descendants.
1183 # they're descendants.
1184 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1184 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1185 n = self.node(r)
1185 n = self.node(r)
1186 isdescendant = False
1186 isdescendant = False
1187 if lowestrev == nullrev: # Everybody is a descendant of nullid
1187 if lowestrev == nullrev: # Everybody is a descendant of nullid
1188 isdescendant = True
1188 isdescendant = True
1189 elif n in descendants:
1189 elif n in descendants:
1190 # n is already a descendant
1190 # n is already a descendant
1191 isdescendant = True
1191 isdescendant = True
1192 # This check only needs to be done here because all the roots
1192 # This check only needs to be done here because all the roots
1193 # will start being marked is descendants before the loop.
1193 # will start being marked is descendants before the loop.
1194 if n in roots:
1194 if n in roots:
1195 # If n was a root, check if it's a 'real' root.
1195 # If n was a root, check if it's a 'real' root.
1196 p = tuple(self.parents(n))
1196 p = tuple(self.parents(n))
1197 # If any of its parents are descendants, it's not a root.
1197 # If any of its parents are descendants, it's not a root.
1198 if (p[0] in descendants) or (p[1] in descendants):
1198 if (p[0] in descendants) or (p[1] in descendants):
1199 roots.remove(n)
1199 roots.remove(n)
1200 else:
1200 else:
1201 p = tuple(self.parents(n))
1201 p = tuple(self.parents(n))
1202 # A node is a descendant if either of its parents are
1202 # A node is a descendant if either of its parents are
1203 # descendants. (We seeded the dependents list with the roots
1203 # descendants. (We seeded the dependents list with the roots
1204 # up there, remember?)
1204 # up there, remember?)
1205 if (p[0] in descendants) or (p[1] in descendants):
1205 if (p[0] in descendants) or (p[1] in descendants):
1206 descendants.add(n)
1206 descendants.add(n)
1207 isdescendant = True
1207 isdescendant = True
1208 if isdescendant and ((ancestors is None) or (n in ancestors)):
1208 if isdescendant and ((ancestors is None) or (n in ancestors)):
1209 # Only include nodes that are both descendants and ancestors.
1209 # Only include nodes that are both descendants and ancestors.
1210 orderedout.append(n)
1210 orderedout.append(n)
1211 if (ancestors is not None) and (n in heads):
1211 if (ancestors is not None) and (n in heads):
1212 # We're trying to figure out which heads are reachable
1212 # We're trying to figure out which heads are reachable
1213 # from roots.
1213 # from roots.
1214 # Mark this head as having been reached
1214 # Mark this head as having been reached
1215 heads[n] = True
1215 heads[n] = True
1216 elif ancestors is None:
1216 elif ancestors is None:
1217 # Otherwise, we're trying to discover the heads.
1217 # Otherwise, we're trying to discover the heads.
1218 # Assume this is a head because if it isn't, the next step
1218 # Assume this is a head because if it isn't, the next step
1219 # will eventually remove it.
1219 # will eventually remove it.
1220 heads[n] = True
1220 heads[n] = True
1221 # But, obviously its parents aren't.
1221 # But, obviously its parents aren't.
1222 for p in self.parents(n):
1222 for p in self.parents(n):
1223 heads.pop(p, None)
1223 heads.pop(p, None)
1224 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1224 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1225 roots = list(roots)
1225 roots = list(roots)
1226 assert orderedout
1226 assert orderedout
1227 assert roots
1227 assert roots
1228 assert heads
1228 assert heads
1229 return (orderedout, roots, heads)
1229 return (orderedout, roots, heads)
1230
1230
1231 def headrevs(self, revs=None):
1231 def headrevs(self, revs=None):
1232 if revs is None:
1232 if revs is None:
1233 try:
1233 try:
1234 return self.index.headrevs()
1234 return self.index.headrevs()
1235 except AttributeError:
1235 except AttributeError:
1236 return self._headrevs()
1236 return self._headrevs()
1237 if rustdagop is not None:
1237 if rustdagop is not None:
1238 return rustdagop.headrevs(self.index, revs)
1238 return rustdagop.headrevs(self.index, revs)
1239 return dagop.headrevs(revs, self._uncheckedparentrevs)
1239 return dagop.headrevs(revs, self._uncheckedparentrevs)
1240
1240
1241 def computephases(self, roots):
1241 def computephases(self, roots):
1242 return self.index.computephasesmapsets(roots)
1242 return self.index.computephasesmapsets(roots)
1243
1243
1244 def _headrevs(self):
1244 def _headrevs(self):
1245 count = len(self)
1245 count = len(self)
1246 if not count:
1246 if not count:
1247 return [nullrev]
1247 return [nullrev]
1248 # we won't iter over filtered rev so nobody is a head at start
1248 # we won't iter over filtered rev so nobody is a head at start
1249 ishead = [0] * (count + 1)
1249 ishead = [0] * (count + 1)
1250 index = self.index
1250 index = self.index
1251 for r in self:
1251 for r in self:
1252 ishead[r] = 1 # I may be an head
1252 ishead[r] = 1 # I may be an head
1253 e = index[r]
1253 e = index[r]
1254 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1254 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1255 return [r for r, val in enumerate(ishead) if val]
1255 return [r for r, val in enumerate(ishead) if val]
1256
1256
1257 def heads(self, start=None, stop=None):
1257 def heads(self, start=None, stop=None):
1258 """return the list of all nodes that have no children
1258 """return the list of all nodes that have no children
1259
1259
1260 if start is specified, only heads that are descendants of
1260 if start is specified, only heads that are descendants of
1261 start will be returned
1261 start will be returned
1262 if stop is specified, it will consider all the revs from stop
1262 if stop is specified, it will consider all the revs from stop
1263 as if they had no children
1263 as if they had no children
1264 """
1264 """
1265 if start is None and stop is None:
1265 if start is None and stop is None:
1266 if not len(self):
1266 if not len(self):
1267 return [self.nullid]
1267 return [self.nullid]
1268 return [self.node(r) for r in self.headrevs()]
1268 return [self.node(r) for r in self.headrevs()]
1269
1269
1270 if start is None:
1270 if start is None:
1271 start = nullrev
1271 start = nullrev
1272 else:
1272 else:
1273 start = self.rev(start)
1273 start = self.rev(start)
1274
1274
1275 stoprevs = {self.rev(n) for n in stop or []}
1275 stoprevs = {self.rev(n) for n in stop or []}
1276
1276
1277 revs = dagop.headrevssubset(
1277 revs = dagop.headrevssubset(
1278 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1278 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1279 )
1279 )
1280
1280
1281 return [self.node(rev) for rev in revs]
1281 return [self.node(rev) for rev in revs]
1282
1282
1283 def children(self, node):
1283 def children(self, node):
1284 """find the children of a given node"""
1284 """find the children of a given node"""
1285 c = []
1285 c = []
1286 p = self.rev(node)
1286 p = self.rev(node)
1287 for r in self.revs(start=p + 1):
1287 for r in self.revs(start=p + 1):
1288 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1288 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1289 if prevs:
1289 if prevs:
1290 for pr in prevs:
1290 for pr in prevs:
1291 if pr == p:
1291 if pr == p:
1292 c.append(self.node(r))
1292 c.append(self.node(r))
1293 elif p == nullrev:
1293 elif p == nullrev:
1294 c.append(self.node(r))
1294 c.append(self.node(r))
1295 return c
1295 return c
1296
1296
1297 def commonancestorsheads(self, a, b):
1297 def commonancestorsheads(self, a, b):
1298 """calculate all the heads of the common ancestors of nodes a and b"""
1298 """calculate all the heads of the common ancestors of nodes a and b"""
1299 a, b = self.rev(a), self.rev(b)
1299 a, b = self.rev(a), self.rev(b)
1300 ancs = self._commonancestorsheads(a, b)
1300 ancs = self._commonancestorsheads(a, b)
1301 return pycompat.maplist(self.node, ancs)
1301 return pycompat.maplist(self.node, ancs)
1302
1302
1303 def _commonancestorsheads(self, *revs):
1303 def _commonancestorsheads(self, *revs):
1304 """calculate all the heads of the common ancestors of revs"""
1304 """calculate all the heads of the common ancestors of revs"""
1305 try:
1305 try:
1306 ancs = self.index.commonancestorsheads(*revs)
1306 ancs = self.index.commonancestorsheads(*revs)
1307 except (AttributeError, OverflowError): # C implementation failed
1307 except (AttributeError, OverflowError): # C implementation failed
1308 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1308 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1309 return ancs
1309 return ancs
1310
1310
1311 def isancestor(self, a, b):
1311 def isancestor(self, a, b):
1312 """return True if node a is an ancestor of node b
1312 """return True if node a is an ancestor of node b
1313
1313
1314 A revision is considered an ancestor of itself."""
1314 A revision is considered an ancestor of itself."""
1315 a, b = self.rev(a), self.rev(b)
1315 a, b = self.rev(a), self.rev(b)
1316 return self.isancestorrev(a, b)
1316 return self.isancestorrev(a, b)
1317
1317
1318 def isancestorrev(self, a, b):
1318 def isancestorrev(self, a, b):
1319 """return True if revision a is an ancestor of revision b
1319 """return True if revision a is an ancestor of revision b
1320
1320
1321 A revision is considered an ancestor of itself.
1321 A revision is considered an ancestor of itself.
1322
1322
1323 The implementation of this is trivial but the use of
1323 The implementation of this is trivial but the use of
1324 reachableroots is not."""
1324 reachableroots is not."""
1325 if a == nullrev:
1325 if a == nullrev:
1326 return True
1326 return True
1327 elif a == b:
1327 elif a == b:
1328 return True
1328 return True
1329 elif a > b:
1329 elif a > b:
1330 return False
1330 return False
1331 return bool(self.reachableroots(a, [b], [a], includepath=False))
1331 return bool(self.reachableroots(a, [b], [a], includepath=False))
1332
1332
1333 def reachableroots(self, minroot, heads, roots, includepath=False):
1333 def reachableroots(self, minroot, heads, roots, includepath=False):
1334 """return (heads(::(<roots> and <roots>::<heads>)))
1334 """return (heads(::(<roots> and <roots>::<heads>)))
1335
1335
1336 If includepath is True, return (<roots>::<heads>)."""
1336 If includepath is True, return (<roots>::<heads>)."""
1337 try:
1337 try:
1338 return self.index.reachableroots2(
1338 return self.index.reachableroots2(
1339 minroot, heads, roots, includepath
1339 minroot, heads, roots, includepath
1340 )
1340 )
1341 except AttributeError:
1341 except AttributeError:
1342 return dagop._reachablerootspure(
1342 return dagop._reachablerootspure(
1343 self.parentrevs, minroot, roots, heads, includepath
1343 self.parentrevs, minroot, roots, heads, includepath
1344 )
1344 )
1345
1345
1346 def ancestor(self, a, b):
1346 def ancestor(self, a, b):
1347 """calculate the "best" common ancestor of nodes a and b"""
1347 """calculate the "best" common ancestor of nodes a and b"""
1348
1348
1349 a, b = self.rev(a), self.rev(b)
1349 a, b = self.rev(a), self.rev(b)
1350 try:
1350 try:
1351 ancs = self.index.ancestors(a, b)
1351 ancs = self.index.ancestors(a, b)
1352 except (AttributeError, OverflowError):
1352 except (AttributeError, OverflowError):
1353 ancs = ancestor.ancestors(self.parentrevs, a, b)
1353 ancs = ancestor.ancestors(self.parentrevs, a, b)
1354 if ancs:
1354 if ancs:
1355 # choose a consistent winner when there's a tie
1355 # choose a consistent winner when there's a tie
1356 return min(map(self.node, ancs))
1356 return min(map(self.node, ancs))
1357 return self.nullid
1357 return self.nullid
1358
1358
1359 def _match(self, id):
1359 def _match(self, id):
1360 if isinstance(id, int):
1360 if isinstance(id, int):
1361 # rev
1361 # rev
1362 return self.node(id)
1362 return self.node(id)
1363 if len(id) == self.nodeconstants.nodelen:
1363 if len(id) == self.nodeconstants.nodelen:
1364 # possibly a binary node
1364 # possibly a binary node
1365 # odds of a binary node being all hex in ASCII are 1 in 10**25
1365 # odds of a binary node being all hex in ASCII are 1 in 10**25
1366 try:
1366 try:
1367 node = id
1367 node = id
1368 self.rev(node) # quick search the index
1368 self.rev(node) # quick search the index
1369 return node
1369 return node
1370 except error.LookupError:
1370 except error.LookupError:
1371 pass # may be partial hex id
1371 pass # may be partial hex id
1372 try:
1372 try:
1373 # str(rev)
1373 # str(rev)
1374 rev = int(id)
1374 rev = int(id)
1375 if b"%d" % rev != id:
1375 if b"%d" % rev != id:
1376 raise ValueError
1376 raise ValueError
1377 if rev < 0:
1377 if rev < 0:
1378 rev = len(self) + rev
1378 rev = len(self) + rev
1379 if rev < 0 or rev >= len(self):
1379 if rev < 0 or rev >= len(self):
1380 raise ValueError
1380 raise ValueError
1381 return self.node(rev)
1381 return self.node(rev)
1382 except (ValueError, OverflowError):
1382 except (ValueError, OverflowError):
1383 pass
1383 pass
1384 if len(id) == 2 * self.nodeconstants.nodelen:
1384 if len(id) == 2 * self.nodeconstants.nodelen:
1385 try:
1385 try:
1386 # a full hex nodeid?
1386 # a full hex nodeid?
1387 node = bin(id)
1387 node = bin(id)
1388 self.rev(node)
1388 self.rev(node)
1389 return node
1389 return node
1390 except (TypeError, error.LookupError):
1390 except (TypeError, error.LookupError):
1391 pass
1391 pass
1392
1392
1393 def _partialmatch(self, id):
1393 def _partialmatch(self, id):
1394 # we don't care wdirfilenodeids as they should be always full hash
1394 # we don't care wdirfilenodeids as they should be always full hash
1395 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1395 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1396 try:
1396 try:
1397 partial = self.index.partialmatch(id)
1397 partial = self.index.partialmatch(id)
1398 if partial and self.hasnode(partial):
1398 if partial and self.hasnode(partial):
1399 if maybewdir:
1399 if maybewdir:
1400 # single 'ff...' match in radix tree, ambiguous with wdir
1400 # single 'ff...' match in radix tree, ambiguous with wdir
1401 raise error.RevlogError
1401 raise error.RevlogError
1402 return partial
1402 return partial
1403 if maybewdir:
1403 if maybewdir:
1404 # no 'ff...' match in radix tree, wdir identified
1404 # no 'ff...' match in radix tree, wdir identified
1405 raise error.WdirUnsupported
1405 raise error.WdirUnsupported
1406 return None
1406 return None
1407 except error.RevlogError:
1407 except error.RevlogError:
1408 # parsers.c radix tree lookup gave multiple matches
1408 # parsers.c radix tree lookup gave multiple matches
1409 # fast path: for unfiltered changelog, radix tree is accurate
1409 # fast path: for unfiltered changelog, radix tree is accurate
1410 if not getattr(self, 'filteredrevs', None):
1410 if not getattr(self, 'filteredrevs', None):
1411 raise error.AmbiguousPrefixLookupError(
1411 raise error.AmbiguousPrefixLookupError(
1412 id, self.display_id, _(b'ambiguous identifier')
1412 id, self.display_id, _(b'ambiguous identifier')
1413 )
1413 )
1414 # fall through to slow path that filters hidden revisions
1414 # fall through to slow path that filters hidden revisions
1415 except (AttributeError, ValueError):
1415 except (AttributeError, ValueError):
1416 # we are pure python, or key was too short to search radix tree
1416 # we are pure python, or key was too short to search radix tree
1417 pass
1417 pass
1418
1418
1419 if id in self._pcache:
1419 if id in self._pcache:
1420 return self._pcache[id]
1420 return self._pcache[id]
1421
1421
1422 if len(id) <= 40:
1422 if len(id) <= 40:
1423 try:
1423 try:
1424 # hex(node)[:...]
1424 # hex(node)[:...]
1425 l = len(id) // 2 # grab an even number of digits
1425 l = len(id) // 2 # grab an even number of digits
1426 prefix = bin(id[: l * 2])
1426 prefix = bin(id[: l * 2])
1427 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1427 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1428 nl = [
1428 nl = [
1429 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1429 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1430 ]
1430 ]
1431 if self.nodeconstants.nullhex.startswith(id):
1431 if self.nodeconstants.nullhex.startswith(id):
1432 nl.append(self.nullid)
1432 nl.append(self.nullid)
1433 if len(nl) > 0:
1433 if len(nl) > 0:
1434 if len(nl) == 1 and not maybewdir:
1434 if len(nl) == 1 and not maybewdir:
1435 self._pcache[id] = nl[0]
1435 self._pcache[id] = nl[0]
1436 return nl[0]
1436 return nl[0]
1437 raise error.AmbiguousPrefixLookupError(
1437 raise error.AmbiguousPrefixLookupError(
1438 id, self.display_id, _(b'ambiguous identifier')
1438 id, self.display_id, _(b'ambiguous identifier')
1439 )
1439 )
1440 if maybewdir:
1440 if maybewdir:
1441 raise error.WdirUnsupported
1441 raise error.WdirUnsupported
1442 return None
1442 return None
1443 except TypeError:
1443 except TypeError:
1444 pass
1444 pass
1445
1445
1446 def lookup(self, id):
1446 def lookup(self, id):
1447 """locate a node based on:
1447 """locate a node based on:
1448 - revision number or str(revision number)
1448 - revision number or str(revision number)
1449 - nodeid or subset of hex nodeid
1449 - nodeid or subset of hex nodeid
1450 """
1450 """
1451 n = self._match(id)
1451 n = self._match(id)
1452 if n is not None:
1452 if n is not None:
1453 return n
1453 return n
1454 n = self._partialmatch(id)
1454 n = self._partialmatch(id)
1455 if n:
1455 if n:
1456 return n
1456 return n
1457
1457
1458 raise error.LookupError(id, self.display_id, _(b'no match found'))
1458 raise error.LookupError(id, self.display_id, _(b'no match found'))
1459
1459
1460 def shortest(self, node, minlength=1):
1460 def shortest(self, node, minlength=1):
1461 """Find the shortest unambiguous prefix that matches node."""
1461 """Find the shortest unambiguous prefix that matches node."""
1462
1462
1463 def isvalid(prefix):
1463 def isvalid(prefix):
1464 try:
1464 try:
1465 matchednode = self._partialmatch(prefix)
1465 matchednode = self._partialmatch(prefix)
1466 except error.AmbiguousPrefixLookupError:
1466 except error.AmbiguousPrefixLookupError:
1467 return False
1467 return False
1468 except error.WdirUnsupported:
1468 except error.WdirUnsupported:
1469 # single 'ff...' match
1469 # single 'ff...' match
1470 return True
1470 return True
1471 if matchednode is None:
1471 if matchednode is None:
1472 raise error.LookupError(node, self.display_id, _(b'no node'))
1472 raise error.LookupError(node, self.display_id, _(b'no node'))
1473 return True
1473 return True
1474
1474
1475 def maybewdir(prefix):
1475 def maybewdir(prefix):
1476 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1476 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1477
1477
1478 hexnode = hex(node)
1478 hexnode = hex(node)
1479
1479
1480 def disambiguate(hexnode, minlength):
1480 def disambiguate(hexnode, minlength):
1481 """Disambiguate against wdirid."""
1481 """Disambiguate against wdirid."""
1482 for length in range(minlength, len(hexnode) + 1):
1482 for length in range(minlength, len(hexnode) + 1):
1483 prefix = hexnode[:length]
1483 prefix = hexnode[:length]
1484 if not maybewdir(prefix):
1484 if not maybewdir(prefix):
1485 return prefix
1485 return prefix
1486
1486
1487 if not getattr(self, 'filteredrevs', None):
1487 if not getattr(self, 'filteredrevs', None):
1488 try:
1488 try:
1489 length = max(self.index.shortest(node), minlength)
1489 length = max(self.index.shortest(node), minlength)
1490 return disambiguate(hexnode, length)
1490 return disambiguate(hexnode, length)
1491 except error.RevlogError:
1491 except error.RevlogError:
1492 if node != self.nodeconstants.wdirid:
1492 if node != self.nodeconstants.wdirid:
1493 raise error.LookupError(
1493 raise error.LookupError(
1494 node, self.display_id, _(b'no node')
1494 node, self.display_id, _(b'no node')
1495 )
1495 )
1496 except AttributeError:
1496 except AttributeError:
1497 # Fall through to pure code
1497 # Fall through to pure code
1498 pass
1498 pass
1499
1499
1500 if node == self.nodeconstants.wdirid:
1500 if node == self.nodeconstants.wdirid:
1501 for length in range(minlength, len(hexnode) + 1):
1501 for length in range(minlength, len(hexnode) + 1):
1502 prefix = hexnode[:length]
1502 prefix = hexnode[:length]
1503 if isvalid(prefix):
1503 if isvalid(prefix):
1504 return prefix
1504 return prefix
1505
1505
1506 for length in range(minlength, len(hexnode) + 1):
1506 for length in range(minlength, len(hexnode) + 1):
1507 prefix = hexnode[:length]
1507 prefix = hexnode[:length]
1508 if isvalid(prefix):
1508 if isvalid(prefix):
1509 return disambiguate(hexnode, length)
1509 return disambiguate(hexnode, length)
1510
1510
1511 def cmp(self, node, text):
1511 def cmp(self, node, text):
1512 """compare text with a given file revision
1512 """compare text with a given file revision
1513
1513
1514 returns True if text is different than what is stored.
1514 returns True if text is different than what is stored.
1515 """
1515 """
1516 p1, p2 = self.parents(node)
1516 p1, p2 = self.parents(node)
1517 return storageutil.hashrevisionsha1(text, p1, p2) != node
1517 return storageutil.hashrevisionsha1(text, p1, p2) != node
1518
1518
1519 def _cachesegment(self, offset, data):
1519 def _cachesegment(self, offset, data):
1520 """Add a segment to the revlog cache.
1520 """Add a segment to the revlog cache.
1521
1521
1522 Accepts an absolute offset and the data that is at that location.
1522 Accepts an absolute offset and the data that is at that location.
1523 """
1523 """
1524 o, d = self._chunkcache
1524 o, d = self._chunkcache
1525 # try to add to existing cache
1525 # try to add to existing cache
1526 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1526 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1527 self._chunkcache = o, d + data
1527 self._chunkcache = o, d + data
1528 else:
1528 else:
1529 self._chunkcache = offset, data
1529 self._chunkcache = offset, data
1530
1530
1531 def _readsegment(self, offset, length, df=None):
1531 def _readsegment(self, offset, length, df=None):
1532 """Load a segment of raw data from the revlog.
1532 """Load a segment of raw data from the revlog.
1533
1533
1534 Accepts an absolute offset, length to read, and an optional existing
1534 Accepts an absolute offset, length to read, and an optional existing
1535 file handle to read from.
1535 file handle to read from.
1536
1536
1537 If an existing file handle is passed, it will be seeked and the
1537 If an existing file handle is passed, it will be seeked and the
1538 original seek position will NOT be restored.
1538 original seek position will NOT be restored.
1539
1539
1540 Returns a str or buffer of raw byte data.
1540 Returns a str or buffer of raw byte data.
1541
1541
1542 Raises if the requested number of bytes could not be read.
1542 Raises if the requested number of bytes could not be read.
1543 """
1543 """
1544 # Cache data both forward and backward around the requested
1544 # Cache data both forward and backward around the requested
1545 # data, in a fixed size window. This helps speed up operations
1545 # data, in a fixed size window. This helps speed up operations
1546 # involving reading the revlog backwards.
1546 # involving reading the revlog backwards.
1547 cachesize = self._chunkcachesize
1547 cachesize = self._chunkcachesize
1548 realoffset = offset & ~(cachesize - 1)
1548 realoffset = offset & ~(cachesize - 1)
1549 reallength = (
1549 reallength = (
1550 (offset + length + cachesize) & ~(cachesize - 1)
1550 (offset + length + cachesize) & ~(cachesize - 1)
1551 ) - realoffset
1551 ) - realoffset
1552 with self._datareadfp(df) as df:
1552 with self._datareadfp(df) as df:
1553 df.seek(realoffset)
1553 df.seek(realoffset)
1554 d = df.read(reallength)
1554 d = df.read(reallength)
1555
1555
1556 self._cachesegment(realoffset, d)
1556 self._cachesegment(realoffset, d)
1557 if offset != realoffset or reallength != length:
1557 if offset != realoffset or reallength != length:
1558 startoffset = offset - realoffset
1558 startoffset = offset - realoffset
1559 if len(d) - startoffset < length:
1559 if len(d) - startoffset < length:
1560 raise error.RevlogError(
1560 raise error.RevlogError(
1561 _(
1561 _(
1562 b'partial read of revlog %s; expected %d bytes from '
1562 b'partial read of revlog %s; expected %d bytes from '
1563 b'offset %d, got %d'
1563 b'offset %d, got %d'
1564 )
1564 )
1565 % (
1565 % (
1566 self._indexfile if self._inline else self._datafile,
1566 self._indexfile if self._inline else self._datafile,
1567 length,
1567 length,
1568 offset,
1568 offset,
1569 len(d) - startoffset,
1569 len(d) - startoffset,
1570 )
1570 )
1571 )
1571 )
1572
1572
1573 return util.buffer(d, startoffset, length)
1573 return util.buffer(d, startoffset, length)
1574
1574
1575 if len(d) < length:
1575 if len(d) < length:
1576 raise error.RevlogError(
1576 raise error.RevlogError(
1577 _(
1577 _(
1578 b'partial read of revlog %s; expected %d bytes from offset '
1578 b'partial read of revlog %s; expected %d bytes from offset '
1579 b'%d, got %d'
1579 b'%d, got %d'
1580 )
1580 )
1581 % (
1581 % (
1582 self._indexfile if self._inline else self._datafile,
1582 self._indexfile if self._inline else self._datafile,
1583 length,
1583 length,
1584 offset,
1584 offset,
1585 len(d),
1585 len(d),
1586 )
1586 )
1587 )
1587 )
1588
1588
1589 return d
1589 return d
1590
1590
1591 def _getsegment(self, offset, length, df=None):
1591 def _getsegment(self, offset, length, df=None):
1592 """Obtain a segment of raw data from the revlog.
1592 """Obtain a segment of raw data from the revlog.
1593
1593
1594 Accepts an absolute offset, length of bytes to obtain, and an
1594 Accepts an absolute offset, length of bytes to obtain, and an
1595 optional file handle to the already-opened revlog. If the file
1595 optional file handle to the already-opened revlog. If the file
1596 handle is used, it's original seek position will not be preserved.
1596 handle is used, it's original seek position will not be preserved.
1597
1597
1598 Requests for data may be returned from a cache.
1598 Requests for data may be returned from a cache.
1599
1599
1600 Returns a str or a buffer instance of raw byte data.
1600 Returns a str or a buffer instance of raw byte data.
1601 """
1601 """
1602 o, d = self._chunkcache
1602 o, d = self._chunkcache
1603 l = len(d)
1603 l = len(d)
1604
1604
1605 # is it in the cache?
1605 # is it in the cache?
1606 cachestart = offset - o
1606 cachestart = offset - o
1607 cacheend = cachestart + length
1607 cacheend = cachestart + length
1608 if cachestart >= 0 and cacheend <= l:
1608 if cachestart >= 0 and cacheend <= l:
1609 if cachestart == 0 and cacheend == l:
1609 if cachestart == 0 and cacheend == l:
1610 return d # avoid a copy
1610 return d # avoid a copy
1611 return util.buffer(d, cachestart, cacheend - cachestart)
1611 return util.buffer(d, cachestart, cacheend - cachestart)
1612
1612
1613 return self._readsegment(offset, length, df=df)
1613 return self._readsegment(offset, length, df=df)
1614
1614
1615 def _getsegmentforrevs(self, startrev, endrev, df=None):
1615 def _getsegmentforrevs(self, startrev, endrev, df=None):
1616 """Obtain a segment of raw data corresponding to a range of revisions.
1616 """Obtain a segment of raw data corresponding to a range of revisions.
1617
1617
1618 Accepts the start and end revisions and an optional already-open
1618 Accepts the start and end revisions and an optional already-open
1619 file handle to be used for reading. If the file handle is read, its
1619 file handle to be used for reading. If the file handle is read, its
1620 seek position will not be preserved.
1620 seek position will not be preserved.
1621
1621
1622 Requests for data may be satisfied by a cache.
1622 Requests for data may be satisfied by a cache.
1623
1623
1624 Returns a 2-tuple of (offset, data) for the requested range of
1624 Returns a 2-tuple of (offset, data) for the requested range of
1625 revisions. Offset is the integer offset from the beginning of the
1625 revisions. Offset is the integer offset from the beginning of the
1626 revlog and data is a str or buffer of the raw byte data.
1626 revlog and data is a str or buffer of the raw byte data.
1627
1627
1628 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1628 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1629 to determine where each revision's data begins and ends.
1629 to determine where each revision's data begins and ends.
1630 """
1630 """
1631 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1631 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1632 # (functions are expensive).
1632 # (functions are expensive).
1633 index = self.index
1633 index = self.index
1634 istart = index[startrev]
1634 istart = index[startrev]
1635 start = int(istart[0] >> 16)
1635 start = int(istart[0] >> 16)
1636 if startrev == endrev:
1636 if startrev == endrev:
1637 end = start + istart[1]
1637 end = start + istart[1]
1638 else:
1638 else:
1639 iend = index[endrev]
1639 iend = index[endrev]
1640 end = int(iend[0] >> 16) + iend[1]
1640 end = int(iend[0] >> 16) + iend[1]
1641
1641
1642 if self._inline:
1642 if self._inline:
1643 start += (startrev + 1) * self.index.entry_size
1643 start += (startrev + 1) * self.index.entry_size
1644 end += (endrev + 1) * self.index.entry_size
1644 end += (endrev + 1) * self.index.entry_size
1645 length = end - start
1645 length = end - start
1646
1646
1647 return start, self._getsegment(start, length, df=df)
1647 return start, self._getsegment(start, length, df=df)
1648
1648
1649 def _chunk(self, rev, df=None):
1649 def _chunk(self, rev, df=None):
1650 """Obtain a single decompressed chunk for a revision.
1650 """Obtain a single decompressed chunk for a revision.
1651
1651
1652 Accepts an integer revision and an optional already-open file handle
1652 Accepts an integer revision and an optional already-open file handle
1653 to be used for reading. If used, the seek position of the file will not
1653 to be used for reading. If used, the seek position of the file will not
1654 be preserved.
1654 be preserved.
1655
1655
1656 Returns a str holding uncompressed data for the requested revision.
1656 Returns a str holding uncompressed data for the requested revision.
1657 """
1657 """
1658 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1658 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1659
1659
1660 def _chunks(self, revs, df=None, targetsize=None):
1660 def _chunks(self, revs, df=None, targetsize=None):
1661 """Obtain decompressed chunks for the specified revisions.
1661 """Obtain decompressed chunks for the specified revisions.
1662
1662
1663 Accepts an iterable of numeric revisions that are assumed to be in
1663 Accepts an iterable of numeric revisions that are assumed to be in
1664 ascending order. Also accepts an optional already-open file handle
1664 ascending order. Also accepts an optional already-open file handle
1665 to be used for reading. If used, the seek position of the file will
1665 to be used for reading. If used, the seek position of the file will
1666 not be preserved.
1666 not be preserved.
1667
1667
1668 This function is similar to calling ``self._chunk()`` multiple times,
1668 This function is similar to calling ``self._chunk()`` multiple times,
1669 but is faster.
1669 but is faster.
1670
1670
1671 Returns a list with decompressed data for each requested revision.
1671 Returns a list with decompressed data for each requested revision.
1672 """
1672 """
1673 if not revs:
1673 if not revs:
1674 return []
1674 return []
1675 start = self.start
1675 start = self.start
1676 length = self.length
1676 length = self.length
1677 inline = self._inline
1677 inline = self._inline
1678 iosize = self.index.entry_size
1678 iosize = self.index.entry_size
1679 buffer = util.buffer
1679 buffer = util.buffer
1680
1680
1681 l = []
1681 l = []
1682 ladd = l.append
1682 ladd = l.append
1683
1683
1684 if not self._withsparseread:
1684 if not self._withsparseread:
1685 slicedchunks = (revs,)
1685 slicedchunks = (revs,)
1686 else:
1686 else:
1687 slicedchunks = deltautil.slicechunk(
1687 slicedchunks = deltautil.slicechunk(
1688 self, revs, targetsize=targetsize
1688 self, revs, targetsize=targetsize
1689 )
1689 )
1690
1690
1691 for revschunk in slicedchunks:
1691 for revschunk in slicedchunks:
1692 firstrev = revschunk[0]
1692 firstrev = revschunk[0]
1693 # Skip trailing revisions with empty diff
1693 # Skip trailing revisions with empty diff
1694 for lastrev in revschunk[::-1]:
1694 for lastrev in revschunk[::-1]:
1695 if length(lastrev) != 0:
1695 if length(lastrev) != 0:
1696 break
1696 break
1697
1697
1698 try:
1698 try:
1699 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1699 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1700 except OverflowError:
1700 except OverflowError:
1701 # issue4215 - we can't cache a run of chunks greater than
1701 # issue4215 - we can't cache a run of chunks greater than
1702 # 2G on Windows
1702 # 2G on Windows
1703 return [self._chunk(rev, df=df) for rev in revschunk]
1703 return [self._chunk(rev, df=df) for rev in revschunk]
1704
1704
1705 decomp = self.decompress
1705 decomp = self.decompress
1706 for rev in revschunk:
1706 for rev in revschunk:
1707 chunkstart = start(rev)
1707 chunkstart = start(rev)
1708 if inline:
1708 if inline:
1709 chunkstart += (rev + 1) * iosize
1709 chunkstart += (rev + 1) * iosize
1710 chunklength = length(rev)
1710 chunklength = length(rev)
1711 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1711 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1712
1712
1713 return l
1713 return l
1714
1714
1715 def _chunkclear(self):
1715 def _chunkclear(self):
1716 """Clear the raw chunk cache."""
1716 """Clear the raw chunk cache."""
1717 self._chunkcache = (0, b'')
1717 self._chunkcache = (0, b'')
1718
1718
1719 def deltaparent(self, rev):
1719 def deltaparent(self, rev):
1720 """return deltaparent of the given revision"""
1720 """return deltaparent of the given revision"""
1721 base = self.index[rev][3]
1721 base = self.index[rev][3]
1722 if base == rev:
1722 if base == rev:
1723 return nullrev
1723 return nullrev
1724 elif self._generaldelta:
1724 elif self._generaldelta:
1725 return base
1725 return base
1726 else:
1726 else:
1727 return rev - 1
1727 return rev - 1
1728
1728
1729 def issnapshot(self, rev):
1729 def issnapshot(self, rev):
1730 """tells whether rev is a snapshot"""
1730 """tells whether rev is a snapshot"""
1731 if not self._sparserevlog:
1731 if not self._sparserevlog:
1732 return self.deltaparent(rev) == nullrev
1732 return self.deltaparent(rev) == nullrev
1733 elif util.safehasattr(self.index, b'issnapshot'):
1733 elif util.safehasattr(self.index, b'issnapshot'):
1734 # directly assign the method to cache the testing and access
1734 # directly assign the method to cache the testing and access
1735 self.issnapshot = self.index.issnapshot
1735 self.issnapshot = self.index.issnapshot
1736 return self.issnapshot(rev)
1736 return self.issnapshot(rev)
1737 if rev == nullrev:
1737 if rev == nullrev:
1738 return True
1738 return True
1739 entry = self.index[rev]
1739 entry = self.index[rev]
1740 base = entry[3]
1740 base = entry[3]
1741 if base == rev:
1741 if base == rev:
1742 return True
1742 return True
1743 if base == nullrev:
1743 if base == nullrev:
1744 return True
1744 return True
1745 p1 = entry[5]
1745 p1 = entry[5]
1746 p2 = entry[6]
1746 p2 = entry[6]
1747 if base == p1 or base == p2:
1747 if base == p1 or base == p2:
1748 return False
1748 return False
1749 return self.issnapshot(base)
1749 return self.issnapshot(base)
1750
1750
1751 def snapshotdepth(self, rev):
1751 def snapshotdepth(self, rev):
1752 """number of snapshot in the chain before this one"""
1752 """number of snapshot in the chain before this one"""
1753 if not self.issnapshot(rev):
1753 if not self.issnapshot(rev):
1754 raise error.ProgrammingError(b'revision %d not a snapshot')
1754 raise error.ProgrammingError(b'revision %d not a snapshot')
1755 return len(self._deltachain(rev)[0]) - 1
1755 return len(self._deltachain(rev)[0]) - 1
1756
1756
1757 def revdiff(self, rev1, rev2):
1757 def revdiff(self, rev1, rev2):
1758 """return or calculate a delta between two revisions
1758 """return or calculate a delta between two revisions
1759
1759
1760 The delta calculated is in binary form and is intended to be written to
1760 The delta calculated is in binary form and is intended to be written to
1761 revlog data directly. So this function needs raw revision data.
1761 revlog data directly. So this function needs raw revision data.
1762 """
1762 """
1763 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1763 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1764 return bytes(self._chunk(rev2))
1764 return bytes(self._chunk(rev2))
1765
1765
1766 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1766 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1767
1767
1768 def _processflags(self, text, flags, operation, raw=False):
1768 def _processflags(self, text, flags, operation, raw=False):
1769 """deprecated entry point to access flag processors"""
1769 """deprecated entry point to access flag processors"""
1770 msg = b'_processflag(...) use the specialized variant'
1770 msg = b'_processflag(...) use the specialized variant'
1771 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1771 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1772 if raw:
1772 if raw:
1773 return text, flagutil.processflagsraw(self, text, flags)
1773 return text, flagutil.processflagsraw(self, text, flags)
1774 elif operation == b'read':
1774 elif operation == b'read':
1775 return flagutil.processflagsread(self, text, flags)
1775 return flagutil.processflagsread(self, text, flags)
1776 else: # write operation
1776 else: # write operation
1777 return flagutil.processflagswrite(self, text, flags)
1777 return flagutil.processflagswrite(self, text, flags)
1778
1778
1779 def revision(self, nodeorrev, _df=None, raw=False):
1779 def revision(self, nodeorrev, _df=None, raw=False):
1780 """return an uncompressed revision of a given node or revision
1780 """return an uncompressed revision of a given node or revision
1781 number.
1781 number.
1782
1782
1783 _df - an existing file handle to read from. (internal-only)
1783 _df - an existing file handle to read from. (internal-only)
1784 raw - an optional argument specifying if the revision data is to be
1784 raw - an optional argument specifying if the revision data is to be
1785 treated as raw data when applying flag transforms. 'raw' should be set
1785 treated as raw data when applying flag transforms. 'raw' should be set
1786 to True when generating changegroups or in debug commands.
1786 to True when generating changegroups or in debug commands.
1787 """
1787 """
1788 if raw:
1788 if raw:
1789 msg = (
1789 msg = (
1790 b'revlog.revision(..., raw=True) is deprecated, '
1790 b'revlog.revision(..., raw=True) is deprecated, '
1791 b'use revlog.rawdata(...)'
1791 b'use revlog.rawdata(...)'
1792 )
1792 )
1793 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1793 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1794 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1794 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1795
1795
1796 def sidedata(self, nodeorrev, _df=None):
1796 def sidedata(self, nodeorrev, _df=None):
1797 """a map of extra data related to the changeset but not part of the hash
1797 """a map of extra data related to the changeset but not part of the hash
1798
1798
1799 This function currently return a dictionary. However, more advanced
1799 This function currently return a dictionary. However, more advanced
1800 mapping object will likely be used in the future for a more
1800 mapping object will likely be used in the future for a more
1801 efficient/lazy code.
1801 efficient/lazy code.
1802 """
1802 """
1803 return self._revisiondata(nodeorrev, _df)[1]
1803 return self._revisiondata(nodeorrev, _df)[1]
1804
1804
1805 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1805 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1806 # deal with <nodeorrev> argument type
1806 # deal with <nodeorrev> argument type
1807 if isinstance(nodeorrev, int):
1807 if isinstance(nodeorrev, int):
1808 rev = nodeorrev
1808 rev = nodeorrev
1809 node = self.node(rev)
1809 node = self.node(rev)
1810 else:
1810 else:
1811 node = nodeorrev
1811 node = nodeorrev
1812 rev = None
1812 rev = None
1813
1813
1814 # fast path the special `nullid` rev
1814 # fast path the special `nullid` rev
1815 if node == self.nullid:
1815 if node == self.nullid:
1816 return b"", {}
1816 return b"", {}
1817
1817
1818 # ``rawtext`` is the text as stored inside the revlog. Might be the
1818 # ``rawtext`` is the text as stored inside the revlog. Might be the
1819 # revision or might need to be processed to retrieve the revision.
1819 # revision or might need to be processed to retrieve the revision.
1820 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1820 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1821
1821
1822 if self.hassidedata:
1822 if self.hassidedata:
1823 if rev is None:
1823 if rev is None:
1824 rev = self.rev(node)
1824 rev = self.rev(node)
1825 sidedata = self._sidedata(rev)
1825 sidedata = self._sidedata(rev)
1826 else:
1826 else:
1827 sidedata = {}
1827 sidedata = {}
1828
1828
1829 if raw and validated:
1829 if raw and validated:
1830 # if we don't want to process the raw text and that raw
1830 # if we don't want to process the raw text and that raw
1831 # text is cached, we can exit early.
1831 # text is cached, we can exit early.
1832 return rawtext, sidedata
1832 return rawtext, sidedata
1833 if rev is None:
1833 if rev is None:
1834 rev = self.rev(node)
1834 rev = self.rev(node)
1835 # the revlog's flag for this revision
1835 # the revlog's flag for this revision
1836 # (usually alter its state or content)
1836 # (usually alter its state or content)
1837 flags = self.flags(rev)
1837 flags = self.flags(rev)
1838
1838
1839 if validated and flags == REVIDX_DEFAULT_FLAGS:
1839 if validated and flags == REVIDX_DEFAULT_FLAGS:
1840 # no extra flags set, no flag processor runs, text = rawtext
1840 # no extra flags set, no flag processor runs, text = rawtext
1841 return rawtext, sidedata
1841 return rawtext, sidedata
1842
1842
1843 if raw:
1843 if raw:
1844 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1844 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1845 text = rawtext
1845 text = rawtext
1846 else:
1846 else:
1847 r = flagutil.processflagsread(self, rawtext, flags)
1847 r = flagutil.processflagsread(self, rawtext, flags)
1848 text, validatehash = r
1848 text, validatehash = r
1849 if validatehash:
1849 if validatehash:
1850 self.checkhash(text, node, rev=rev)
1850 self.checkhash(text, node, rev=rev)
1851 if not validated:
1851 if not validated:
1852 self._revisioncache = (node, rev, rawtext)
1852 self._revisioncache = (node, rev, rawtext)
1853
1853
1854 return text, sidedata
1854 return text, sidedata
1855
1855
1856 def _rawtext(self, node, rev, _df=None):
1856 def _rawtext(self, node, rev, _df=None):
1857 """return the possibly unvalidated rawtext for a revision
1857 """return the possibly unvalidated rawtext for a revision
1858
1858
1859 returns (rev, rawtext, validated)
1859 returns (rev, rawtext, validated)
1860 """
1860 """
1861
1861
1862 # revision in the cache (could be useful to apply delta)
1862 # revision in the cache (could be useful to apply delta)
1863 cachedrev = None
1863 cachedrev = None
1864 # An intermediate text to apply deltas to
1864 # An intermediate text to apply deltas to
1865 basetext = None
1865 basetext = None
1866
1866
1867 # Check if we have the entry in cache
1867 # Check if we have the entry in cache
1868 # The cache entry looks like (node, rev, rawtext)
1868 # The cache entry looks like (node, rev, rawtext)
1869 if self._revisioncache:
1869 if self._revisioncache:
1870 if self._revisioncache[0] == node:
1870 if self._revisioncache[0] == node:
1871 return (rev, self._revisioncache[2], True)
1871 return (rev, self._revisioncache[2], True)
1872 cachedrev = self._revisioncache[1]
1872 cachedrev = self._revisioncache[1]
1873
1873
1874 if rev is None:
1874 if rev is None:
1875 rev = self.rev(node)
1875 rev = self.rev(node)
1876
1876
1877 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1877 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1878 if stopped:
1878 if stopped:
1879 basetext = self._revisioncache[2]
1879 basetext = self._revisioncache[2]
1880
1880
1881 # drop cache to save memory, the caller is expected to
1881 # drop cache to save memory, the caller is expected to
1882 # update self._revisioncache after validating the text
1882 # update self._revisioncache after validating the text
1883 self._revisioncache = None
1883 self._revisioncache = None
1884
1884
1885 targetsize = None
1885 targetsize = None
1886 rawsize = self.index[rev][2]
1886 rawsize = self.index[rev][2]
1887 if 0 <= rawsize:
1887 if 0 <= rawsize:
1888 targetsize = 4 * rawsize
1888 targetsize = 4 * rawsize
1889
1889
1890 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1890 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1891 if basetext is None:
1891 if basetext is None:
1892 basetext = bytes(bins[0])
1892 basetext = bytes(bins[0])
1893 bins = bins[1:]
1893 bins = bins[1:]
1894
1894
1895 rawtext = mdiff.patches(basetext, bins)
1895 rawtext = mdiff.patches(basetext, bins)
1896 del basetext # let us have a chance to free memory early
1896 del basetext # let us have a chance to free memory early
1897 return (rev, rawtext, False)
1897 return (rev, rawtext, False)
1898
1898
1899 def _sidedata(self, rev):
1899 def _sidedata(self, rev):
1900 """Return the sidedata for a given revision number."""
1900 """Return the sidedata for a given revision number."""
1901 index_entry = self.index[rev]
1901 index_entry = self.index[rev]
1902 sidedata_offset = index_entry[8]
1902 sidedata_offset = index_entry[8]
1903 sidedata_size = index_entry[9]
1903 sidedata_size = index_entry[9]
1904
1904
1905 if self._inline:
1905 if self._inline:
1906 sidedata_offset += self.index.entry_size * (1 + rev)
1906 sidedata_offset += self.index.entry_size * (1 + rev)
1907 if sidedata_size == 0:
1907 if sidedata_size == 0:
1908 return {}
1908 return {}
1909
1909
1910 segment = self._getsegment(sidedata_offset, sidedata_size)
1910 segment = self._getsegment(sidedata_offset, sidedata_size)
1911 sidedata = sidedatautil.deserialize_sidedata(segment)
1911 sidedata = sidedatautil.deserialize_sidedata(segment)
1912 return sidedata
1912 return sidedata
1913
1913
1914 def rawdata(self, nodeorrev, _df=None):
1914 def rawdata(self, nodeorrev, _df=None):
1915 """return an uncompressed raw data of a given node or revision number.
1915 """return an uncompressed raw data of a given node or revision number.
1916
1916
1917 _df - an existing file handle to read from. (internal-only)
1917 _df - an existing file handle to read from. (internal-only)
1918 """
1918 """
1919 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1919 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1920
1920
1921 def hash(self, text, p1, p2):
1921 def hash(self, text, p1, p2):
1922 """Compute a node hash.
1922 """Compute a node hash.
1923
1923
1924 Available as a function so that subclasses can replace the hash
1924 Available as a function so that subclasses can replace the hash
1925 as needed.
1925 as needed.
1926 """
1926 """
1927 return storageutil.hashrevisionsha1(text, p1, p2)
1927 return storageutil.hashrevisionsha1(text, p1, p2)
1928
1928
1929 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1929 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1930 """Check node hash integrity.
1930 """Check node hash integrity.
1931
1931
1932 Available as a function so that subclasses can extend hash mismatch
1932 Available as a function so that subclasses can extend hash mismatch
1933 behaviors as needed.
1933 behaviors as needed.
1934 """
1934 """
1935 try:
1935 try:
1936 if p1 is None and p2 is None:
1936 if p1 is None and p2 is None:
1937 p1, p2 = self.parents(node)
1937 p1, p2 = self.parents(node)
1938 if node != self.hash(text, p1, p2):
1938 if node != self.hash(text, p1, p2):
1939 # Clear the revision cache on hash failure. The revision cache
1939 # Clear the revision cache on hash failure. The revision cache
1940 # only stores the raw revision and clearing the cache does have
1940 # only stores the raw revision and clearing the cache does have
1941 # the side-effect that we won't have a cache hit when the raw
1941 # the side-effect that we won't have a cache hit when the raw
1942 # revision data is accessed. But this case should be rare and
1942 # revision data is accessed. But this case should be rare and
1943 # it is extra work to teach the cache about the hash
1943 # it is extra work to teach the cache about the hash
1944 # verification state.
1944 # verification state.
1945 if self._revisioncache and self._revisioncache[0] == node:
1945 if self._revisioncache and self._revisioncache[0] == node:
1946 self._revisioncache = None
1946 self._revisioncache = None
1947
1947
1948 revornode = rev
1948 revornode = rev
1949 if revornode is None:
1949 if revornode is None:
1950 revornode = templatefilters.short(hex(node))
1950 revornode = templatefilters.short(hex(node))
1951 raise error.RevlogError(
1951 raise error.RevlogError(
1952 _(b"integrity check failed on %s:%s")
1952 _(b"integrity check failed on %s:%s")
1953 % (self.display_id, pycompat.bytestr(revornode))
1953 % (self.display_id, pycompat.bytestr(revornode))
1954 )
1954 )
1955 except error.RevlogError:
1955 except error.RevlogError:
1956 if self._censorable and storageutil.iscensoredtext(text):
1956 if self._censorable and storageutil.iscensoredtext(text):
1957 raise error.CensoredNodeError(self.display_id, node, text)
1957 raise error.CensoredNodeError(self.display_id, node, text)
1958 raise
1958 raise
1959
1959
1960 def _enforceinlinesize(self, tr):
1960 def _enforceinlinesize(self, tr):
1961 """Check if the revlog is too big for inline and convert if so.
1961 """Check if the revlog is too big for inline and convert if so.
1962
1962
1963 This should be called after revisions are added to the revlog. If the
1963 This should be called after revisions are added to the revlog. If the
1964 revlog has grown too large to be an inline revlog, it will convert it
1964 revlog has grown too large to be an inline revlog, it will convert it
1965 to use multiple index and data files.
1965 to use multiple index and data files.
1966 """
1966 """
1967 tiprev = len(self) - 1
1967 tiprev = len(self) - 1
1968 total_size = self.start(tiprev) + self.length(tiprev)
1968 total_size = self.start(tiprev) + self.length(tiprev)
1969 if not self._inline or total_size < _maxinline:
1969 if not self._inline or total_size < _maxinline:
1970 return
1970 return
1971
1971
1972 troffset = tr.findoffset(self._indexfile)
1972 troffset = tr.findoffset(self._indexfile)
1973 if troffset is None:
1973 if troffset is None:
1974 raise error.RevlogError(
1974 raise error.RevlogError(
1975 _(b"%s not found in the transaction") % self._indexfile
1975 _(b"%s not found in the transaction") % self._indexfile
1976 )
1976 )
1977 trindex = 0
1977 trindex = 0
1978 tr.add(self._datafile, 0)
1978 tr.add(self._datafile, 0)
1979
1979
1980 existing_handles = False
1980 existing_handles = False
1981 if self._writinghandles is not None:
1981 if self._writinghandles is not None:
1982 existing_handles = True
1982 existing_handles = True
1983 fp = self._writinghandles[0]
1983 fp = self._writinghandles[0]
1984 fp.flush()
1984 fp.flush()
1985 fp.close()
1985 fp.close()
1986 # We can't use the cached file handle after close(). So prevent
1986 # We can't use the cached file handle after close(). So prevent
1987 # its usage.
1987 # its usage.
1988 self._writinghandles = None
1988 self._writinghandles = None
1989
1989
1990 new_dfh = self._datafp(b'w+')
1990 new_dfh = self._datafp(b'w+')
1991 new_dfh.truncate(0) # drop any potentially existing data
1991 new_dfh.truncate(0) # drop any potentially existing data
1992 try:
1992 try:
1993 with self._indexfp(b'r') as read_ifh:
1993 with self._indexfp(b'r') as read_ifh:
1994 for r in self:
1994 for r in self:
1995 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
1995 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
1996 if troffset <= self.start(r):
1996 if troffset <= self.start(r):
1997 trindex = r
1997 trindex = r
1998 new_dfh.flush()
1998 new_dfh.flush()
1999
1999
2000 with self.opener(self._indexfile, mode=b'w', atomictemp=True) as fp:
2000 with self.opener(self._indexfile, mode=b'w', atomictemp=True) as fp:
2001 self._format_flags &= ~FLAG_INLINE_DATA
2001 self._format_flags &= ~FLAG_INLINE_DATA
2002 self._inline = False
2002 self._inline = False
2003 for i in self:
2003 for i in self:
2004 e = self.index.entry_binary(i)
2004 e = self.index.entry_binary(i)
2005 if i == 0:
2005 if i == 0:
2006 header = self._format_flags | self._format_version
2006 header = self._format_flags | self._format_version
2007 header = self.index.pack_header(header)
2007 header = self.index.pack_header(header)
2008 e = header + e
2008 e = header + e
2009 fp.write(e)
2009 fp.write(e)
2010 # the temp file replace the real index when we exit the context
2010 # the temp file replace the real index when we exit the context
2011 # manager
2011 # manager
2012
2012
2013 tr.replace(self._indexfile, trindex * self.index.entry_size)
2013 tr.replace(self._indexfile, trindex * self.index.entry_size)
2014 nodemaputil.setup_persistent_nodemap(tr, self)
2014 nodemaputil.setup_persistent_nodemap(tr, self)
2015 self._chunkclear()
2015 self._chunkclear()
2016
2016
2017 if existing_handles:
2017 if existing_handles:
2018 # switched from inline to conventional reopen the index
2018 # switched from inline to conventional reopen the index
2019 ifh = self._indexfp(b"r+")
2019 ifh = self._indexfp(b"r+")
2020 self._writinghandles = (ifh, new_dfh)
2020 self._writinghandles = (ifh, new_dfh)
2021 new_dfh = None
2021 new_dfh = None
2022 finally:
2022 finally:
2023 if new_dfh is not None:
2023 if new_dfh is not None:
2024 new_dfh.close()
2024 new_dfh.close()
2025
2025
2026 def _nodeduplicatecallback(self, transaction, node):
2026 def _nodeduplicatecallback(self, transaction, node):
2027 """called when trying to add a node already stored."""
2027 """called when trying to add a node already stored."""
2028
2028
2029 @contextlib.contextmanager
2029 @contextlib.contextmanager
2030 def _writing(self, transaction):
2030 def _writing(self, transaction):
2031 if self._writinghandles is not None:
2031 if self._writinghandles is not None:
2032 yield
2032 yield
2033 else:
2033 else:
2034 r = len(self)
2034 r = len(self)
2035 dsize = 0
2035 dsize = 0
2036 if r:
2036 if r:
2037 dsize = self.end(r - 1)
2037 dsize = self.end(r - 1)
2038 dfh = None
2038 dfh = None
2039 if not self._inline:
2039 if not self._inline:
2040 try:
2040 try:
2041 dfh = self._datafp(b"r+")
2041 dfh = self._datafp(b"r+")
2042 dfh.seek(0, os.SEEK_END)
2042 dfh.seek(0, os.SEEK_END)
2043 except IOError as inst:
2043 except IOError as inst:
2044 if inst.errno != errno.ENOENT:
2044 if inst.errno != errno.ENOENT:
2045 raise
2045 raise
2046 dfh = self._datafp(b"w+")
2046 dfh = self._datafp(b"w+")
2047 transaction.add(self._datafile, dsize)
2047 transaction.add(self._datafile, dsize)
2048 try:
2048 try:
2049 isize = r * self.index.entry_size
2049 isize = r * self.index.entry_size
2050 try:
2050 try:
2051 ifh = self._indexfp(b"r+")
2051 ifh = self._indexfp(b"r+")
2052 ifh.seek(0, os.SEEK_END)
2052 ifh.seek(0, os.SEEK_END)
2053 except IOError as inst:
2053 except IOError as inst:
2054 if inst.errno != errno.ENOENT:
2054 if inst.errno != errno.ENOENT:
2055 raise
2055 raise
2056 ifh = self._indexfp(b"w+")
2056 ifh = self._indexfp(b"w+")
2057 if self._inline:
2057 if self._inline:
2058 transaction.add(self._indexfile, dsize + isize)
2058 transaction.add(self._indexfile, dsize + isize)
2059 else:
2059 else:
2060 transaction.add(self._indexfile, isize)
2060 transaction.add(self._indexfile, isize)
2061 try:
2061 try:
2062 self._writinghandles = (ifh, dfh)
2062 self._writinghandles = (ifh, dfh)
2063 try:
2063 try:
2064 yield
2064 yield
2065 finally:
2065 finally:
2066 self._writinghandles = None
2066 self._writinghandles = None
2067 finally:
2067 finally:
2068 ifh.close()
2068 ifh.close()
2069 finally:
2069 finally:
2070 if dfh is not None:
2070 if dfh is not None:
2071 dfh.close()
2071 dfh.close()
2072
2072
2073 def addrevision(
2073 def addrevision(
2074 self,
2074 self,
2075 text,
2075 text,
2076 transaction,
2076 transaction,
2077 link,
2077 link,
2078 p1,
2078 p1,
2079 p2,
2079 p2,
2080 cachedelta=None,
2080 cachedelta=None,
2081 node=None,
2081 node=None,
2082 flags=REVIDX_DEFAULT_FLAGS,
2082 flags=REVIDX_DEFAULT_FLAGS,
2083 deltacomputer=None,
2083 deltacomputer=None,
2084 sidedata=None,
2084 sidedata=None,
2085 ):
2085 ):
2086 """add a revision to the log
2086 """add a revision to the log
2087
2087
2088 text - the revision data to add
2088 text - the revision data to add
2089 transaction - the transaction object used for rollback
2089 transaction - the transaction object used for rollback
2090 link - the linkrev data to add
2090 link - the linkrev data to add
2091 p1, p2 - the parent nodeids of the revision
2091 p1, p2 - the parent nodeids of the revision
2092 cachedelta - an optional precomputed delta
2092 cachedelta - an optional precomputed delta
2093 node - nodeid of revision; typically node is not specified, and it is
2093 node - nodeid of revision; typically node is not specified, and it is
2094 computed by default as hash(text, p1, p2), however subclasses might
2094 computed by default as hash(text, p1, p2), however subclasses might
2095 use different hashing method (and override checkhash() in such case)
2095 use different hashing method (and override checkhash() in such case)
2096 flags - the known flags to set on the revision
2096 flags - the known flags to set on the revision
2097 deltacomputer - an optional deltacomputer instance shared between
2097 deltacomputer - an optional deltacomputer instance shared between
2098 multiple calls
2098 multiple calls
2099 """
2099 """
2100 if link == nullrev:
2100 if link == nullrev:
2101 raise error.RevlogError(
2101 raise error.RevlogError(
2102 _(b"attempted to add linkrev -1 to %s") % self.display_id
2102 _(b"attempted to add linkrev -1 to %s") % self.display_id
2103 )
2103 )
2104
2104
2105 if sidedata is None:
2105 if sidedata is None:
2106 sidedata = {}
2106 sidedata = {}
2107 elif sidedata and not self.hassidedata:
2107 elif sidedata and not self.hassidedata:
2108 raise error.ProgrammingError(
2108 raise error.ProgrammingError(
2109 _(b"trying to add sidedata to a revlog who don't support them")
2109 _(b"trying to add sidedata to a revlog who don't support them")
2110 )
2110 )
2111
2111
2112 if flags:
2112 if flags:
2113 node = node or self.hash(text, p1, p2)
2113 node = node or self.hash(text, p1, p2)
2114
2114
2115 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2115 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2116
2116
2117 # If the flag processor modifies the revision data, ignore any provided
2117 # If the flag processor modifies the revision data, ignore any provided
2118 # cachedelta.
2118 # cachedelta.
2119 if rawtext != text:
2119 if rawtext != text:
2120 cachedelta = None
2120 cachedelta = None
2121
2121
2122 if len(rawtext) > _maxentrysize:
2122 if len(rawtext) > _maxentrysize:
2123 raise error.RevlogError(
2123 raise error.RevlogError(
2124 _(
2124 _(
2125 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2125 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2126 )
2126 )
2127 % (self.display_id, len(rawtext))
2127 % (self.display_id, len(rawtext))
2128 )
2128 )
2129
2129
2130 node = node or self.hash(rawtext, p1, p2)
2130 node = node or self.hash(rawtext, p1, p2)
2131 rev = self.index.get_rev(node)
2131 rev = self.index.get_rev(node)
2132 if rev is not None:
2132 if rev is not None:
2133 return rev
2133 return rev
2134
2134
2135 if validatehash:
2135 if validatehash:
2136 self.checkhash(rawtext, node, p1=p1, p2=p2)
2136 self.checkhash(rawtext, node, p1=p1, p2=p2)
2137
2137
2138 return self.addrawrevision(
2138 return self.addrawrevision(
2139 rawtext,
2139 rawtext,
2140 transaction,
2140 transaction,
2141 link,
2141 link,
2142 p1,
2142 p1,
2143 p2,
2143 p2,
2144 node,
2144 node,
2145 flags,
2145 flags,
2146 cachedelta=cachedelta,
2146 cachedelta=cachedelta,
2147 deltacomputer=deltacomputer,
2147 deltacomputer=deltacomputer,
2148 sidedata=sidedata,
2148 sidedata=sidedata,
2149 )
2149 )
2150
2150
2151 def addrawrevision(
2151 def addrawrevision(
2152 self,
2152 self,
2153 rawtext,
2153 rawtext,
2154 transaction,
2154 transaction,
2155 link,
2155 link,
2156 p1,
2156 p1,
2157 p2,
2157 p2,
2158 node,
2158 node,
2159 flags,
2159 flags,
2160 cachedelta=None,
2160 cachedelta=None,
2161 deltacomputer=None,
2161 deltacomputer=None,
2162 sidedata=None,
2162 sidedata=None,
2163 ):
2163 ):
2164 """add a raw revision with known flags, node and parents
2164 """add a raw revision with known flags, node and parents
2165 useful when reusing a revision not stored in this revlog (ex: received
2165 useful when reusing a revision not stored in this revlog (ex: received
2166 over wire, or read from an external bundle).
2166 over wire, or read from an external bundle).
2167 """
2167 """
2168 with self._writing(transaction):
2168 with self._writing(transaction):
2169 return self._addrevision(
2169 return self._addrevision(
2170 node,
2170 node,
2171 rawtext,
2171 rawtext,
2172 transaction,
2172 transaction,
2173 link,
2173 link,
2174 p1,
2174 p1,
2175 p2,
2175 p2,
2176 flags,
2176 flags,
2177 cachedelta,
2177 cachedelta,
2178 deltacomputer=deltacomputer,
2178 deltacomputer=deltacomputer,
2179 sidedata=sidedata,
2179 sidedata=sidedata,
2180 )
2180 )
2181
2181
2182 def compress(self, data):
2182 def compress(self, data):
2183 """Generate a possibly-compressed representation of data."""
2183 """Generate a possibly-compressed representation of data."""
2184 if not data:
2184 if not data:
2185 return b'', data
2185 return b'', data
2186
2186
2187 compressed = self._compressor.compress(data)
2187 compressed = self._compressor.compress(data)
2188
2188
2189 if compressed:
2189 if compressed:
2190 # The revlog compressor added the header in the returned data.
2190 # The revlog compressor added the header in the returned data.
2191 return b'', compressed
2191 return b'', compressed
2192
2192
2193 if data[0:1] == b'\0':
2193 if data[0:1] == b'\0':
2194 return b'', data
2194 return b'', data
2195 return b'u', data
2195 return b'u', data
2196
2196
2197 def decompress(self, data):
2197 def decompress(self, data):
2198 """Decompress a revlog chunk.
2198 """Decompress a revlog chunk.
2199
2199
2200 The chunk is expected to begin with a header identifying the
2200 The chunk is expected to begin with a header identifying the
2201 format type so it can be routed to an appropriate decompressor.
2201 format type so it can be routed to an appropriate decompressor.
2202 """
2202 """
2203 if not data:
2203 if not data:
2204 return data
2204 return data
2205
2205
2206 # Revlogs are read much more frequently than they are written and many
2206 # Revlogs are read much more frequently than they are written and many
2207 # chunks only take microseconds to decompress, so performance is
2207 # chunks only take microseconds to decompress, so performance is
2208 # important here.
2208 # important here.
2209 #
2209 #
2210 # We can make a few assumptions about revlogs:
2210 # We can make a few assumptions about revlogs:
2211 #
2211 #
2212 # 1) the majority of chunks will be compressed (as opposed to inline
2212 # 1) the majority of chunks will be compressed (as opposed to inline
2213 # raw data).
2213 # raw data).
2214 # 2) decompressing *any* data will likely by at least 10x slower than
2214 # 2) decompressing *any* data will likely by at least 10x slower than
2215 # returning raw inline data.
2215 # returning raw inline data.
2216 # 3) we want to prioritize common and officially supported compression
2216 # 3) we want to prioritize common and officially supported compression
2217 # engines
2217 # engines
2218 #
2218 #
2219 # It follows that we want to optimize for "decompress compressed data
2219 # It follows that we want to optimize for "decompress compressed data
2220 # when encoded with common and officially supported compression engines"
2220 # when encoded with common and officially supported compression engines"
2221 # case over "raw data" and "data encoded by less common or non-official
2221 # case over "raw data" and "data encoded by less common or non-official
2222 # compression engines." That is why we have the inline lookup first
2222 # compression engines." That is why we have the inline lookup first
2223 # followed by the compengines lookup.
2223 # followed by the compengines lookup.
2224 #
2224 #
2225 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2225 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2226 # compressed chunks. And this matters for changelog and manifest reads.
2226 # compressed chunks. And this matters for changelog and manifest reads.
2227 t = data[0:1]
2227 t = data[0:1]
2228
2228
2229 if t == b'x':
2229 if t == b'x':
2230 try:
2230 try:
2231 return _zlibdecompress(data)
2231 return _zlibdecompress(data)
2232 except zlib.error as e:
2232 except zlib.error as e:
2233 raise error.RevlogError(
2233 raise error.RevlogError(
2234 _(b'revlog decompress error: %s')
2234 _(b'revlog decompress error: %s')
2235 % stringutil.forcebytestr(e)
2235 % stringutil.forcebytestr(e)
2236 )
2236 )
2237 # '\0' is more common than 'u' so it goes first.
2237 # '\0' is more common than 'u' so it goes first.
2238 elif t == b'\0':
2238 elif t == b'\0':
2239 return data
2239 return data
2240 elif t == b'u':
2240 elif t == b'u':
2241 return util.buffer(data, 1)
2241 return util.buffer(data, 1)
2242
2242
2243 try:
2243 try:
2244 compressor = self._decompressors[t]
2244 compressor = self._decompressors[t]
2245 except KeyError:
2245 except KeyError:
2246 try:
2246 try:
2247 engine = util.compengines.forrevlogheader(t)
2247 engine = util.compengines.forrevlogheader(t)
2248 compressor = engine.revlogcompressor(self._compengineopts)
2248 compressor = engine.revlogcompressor(self._compengineopts)
2249 self._decompressors[t] = compressor
2249 self._decompressors[t] = compressor
2250 except KeyError:
2250 except KeyError:
2251 raise error.RevlogError(
2251 raise error.RevlogError(
2252 _(b'unknown compression type %s') % binascii.hexlify(t)
2252 _(b'unknown compression type %s') % binascii.hexlify(t)
2253 )
2253 )
2254
2254
2255 return compressor.decompress(data)
2255 return compressor.decompress(data)
2256
2256
2257 def _addrevision(
2257 def _addrevision(
2258 self,
2258 self,
2259 node,
2259 node,
2260 rawtext,
2260 rawtext,
2261 transaction,
2261 transaction,
2262 link,
2262 link,
2263 p1,
2263 p1,
2264 p2,
2264 p2,
2265 flags,
2265 flags,
2266 cachedelta,
2266 cachedelta,
2267 alwayscache=False,
2267 alwayscache=False,
2268 deltacomputer=None,
2268 deltacomputer=None,
2269 sidedata=None,
2269 sidedata=None,
2270 ):
2270 ):
2271 """internal function to add revisions to the log
2271 """internal function to add revisions to the log
2272
2272
2273 see addrevision for argument descriptions.
2273 see addrevision for argument descriptions.
2274
2274
2275 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2275 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2276
2276
2277 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2277 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2278 be used.
2278 be used.
2279
2279
2280 invariants:
2280 invariants:
2281 - rawtext is optional (can be None); if not set, cachedelta must be set.
2281 - rawtext is optional (can be None); if not set, cachedelta must be set.
2282 if both are set, they must correspond to each other.
2282 if both are set, they must correspond to each other.
2283 """
2283 """
2284 if node == self.nullid:
2284 if node == self.nullid:
2285 raise error.RevlogError(
2285 raise error.RevlogError(
2286 _(b"%s: attempt to add null revision") % self.display_id
2286 _(b"%s: attempt to add null revision") % self.display_id
2287 )
2287 )
2288 if (
2288 if (
2289 node == self.nodeconstants.wdirid
2289 node == self.nodeconstants.wdirid
2290 or node in self.nodeconstants.wdirfilenodeids
2290 or node in self.nodeconstants.wdirfilenodeids
2291 ):
2291 ):
2292 raise error.RevlogError(
2292 raise error.RevlogError(
2293 _(b"%s: attempt to add wdir revision") % self.display_id
2293 _(b"%s: attempt to add wdir revision") % self.display_id
2294 )
2294 )
2295 if self._writinghandles is None:
2295 if self._writinghandles is None:
2296 msg = b'adding revision outside `revlog._writing` context'
2296 msg = b'adding revision outside `revlog._writing` context'
2297 raise error.ProgrammingError(msg)
2297 raise error.ProgrammingError(msg)
2298
2298
2299 if self._inline:
2299 if self._inline:
2300 fh = self._writinghandles[0]
2300 fh = self._writinghandles[0]
2301 else:
2301 else:
2302 fh = self._writinghandles[1]
2302 fh = self._writinghandles[1]
2303
2303
2304 btext = [rawtext]
2304 btext = [rawtext]
2305
2305
2306 curr = len(self)
2306 curr = len(self)
2307 prev = curr - 1
2307 prev = curr - 1
2308
2308
2309 offset = self._get_data_offset(prev)
2309 offset = self._get_data_offset(prev)
2310
2310
2311 if self._concurrencychecker:
2311 if self._concurrencychecker:
2312 ifh, dfh = self._writinghandles
2312 ifh, dfh = self._writinghandles
2313 if self._inline:
2313 if self._inline:
2314 # offset is "as if" it were in the .d file, so we need to add on
2314 # offset is "as if" it were in the .d file, so we need to add on
2315 # the size of the entry metadata.
2315 # the size of the entry metadata.
2316 self._concurrencychecker(
2316 self._concurrencychecker(
2317 ifh, self._indexfile, offset + curr * self.index.entry_size
2317 ifh, self._indexfile, offset + curr * self.index.entry_size
2318 )
2318 )
2319 else:
2319 else:
2320 # Entries in the .i are a consistent size.
2320 # Entries in the .i are a consistent size.
2321 self._concurrencychecker(
2321 self._concurrencychecker(
2322 ifh, self._indexfile, curr * self.index.entry_size
2322 ifh, self._indexfile, curr * self.index.entry_size
2323 )
2323 )
2324 self._concurrencychecker(dfh, self._datafile, offset)
2324 self._concurrencychecker(dfh, self._datafile, offset)
2325
2325
2326 p1r, p2r = self.rev(p1), self.rev(p2)
2326 p1r, p2r = self.rev(p1), self.rev(p2)
2327
2327
2328 # full versions are inserted when the needed deltas
2328 # full versions are inserted when the needed deltas
2329 # become comparable to the uncompressed text
2329 # become comparable to the uncompressed text
2330 if rawtext is None:
2330 if rawtext is None:
2331 # need rawtext size, before changed by flag processors, which is
2331 # need rawtext size, before changed by flag processors, which is
2332 # the non-raw size. use revlog explicitly to avoid filelog's extra
2332 # the non-raw size. use revlog explicitly to avoid filelog's extra
2333 # logic that might remove metadata size.
2333 # logic that might remove metadata size.
2334 textlen = mdiff.patchedsize(
2334 textlen = mdiff.patchedsize(
2335 revlog.size(self, cachedelta[0]), cachedelta[1]
2335 revlog.size(self, cachedelta[0]), cachedelta[1]
2336 )
2336 )
2337 else:
2337 else:
2338 textlen = len(rawtext)
2338 textlen = len(rawtext)
2339
2339
2340 if deltacomputer is None:
2340 if deltacomputer is None:
2341 deltacomputer = deltautil.deltacomputer(self)
2341 deltacomputer = deltautil.deltacomputer(self)
2342
2342
2343 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2343 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2344
2344
2345 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2345 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2346
2346
2347 if sidedata and self.hassidedata:
2347 if sidedata and self.hassidedata:
2348 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2348 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2349 sidedata_offset = offset + deltainfo.deltalen
2349 sidedata_offset = offset + deltainfo.deltalen
2350 else:
2350 else:
2351 serialized_sidedata = b""
2351 serialized_sidedata = b""
2352 # Don't store the offset if the sidedata is empty, that way
2352 # Don't store the offset if the sidedata is empty, that way
2353 # we can easily detect empty sidedata and they will be no different
2353 # we can easily detect empty sidedata and they will be no different
2354 # than ones we manually add.
2354 # than ones we manually add.
2355 sidedata_offset = 0
2355 sidedata_offset = 0
2356
2356
2357 e = (
2357 e = (
2358 offset_type(offset, flags),
2358 offset_type(offset, flags),
2359 deltainfo.deltalen,
2359 deltainfo.deltalen,
2360 textlen,
2360 textlen,
2361 deltainfo.base,
2361 deltainfo.base,
2362 link,
2362 link,
2363 p1r,
2363 p1r,
2364 p2r,
2364 p2r,
2365 node,
2365 node,
2366 sidedata_offset,
2366 sidedata_offset,
2367 len(serialized_sidedata),
2367 len(serialized_sidedata),
2368 )
2368 )
2369
2369
2370 self.index.append(e)
2370 self.index.append(e)
2371 entry = self.index.entry_binary(curr)
2371 entry = self.index.entry_binary(curr)
2372 if curr == 0:
2372 if curr == 0:
2373 header = self._format_flags | self._format_version
2373 header = self._format_flags | self._format_version
2374 header = self.index.pack_header(header)
2374 header = self.index.pack_header(header)
2375 entry = header + entry
2375 entry = header + entry
2376 self._writeentry(
2376 self._writeentry(
2377 transaction,
2377 transaction,
2378 entry,
2378 entry,
2379 deltainfo.data,
2379 deltainfo.data,
2380 link,
2380 link,
2381 offset,
2381 offset,
2382 serialized_sidedata,
2382 serialized_sidedata,
2383 )
2383 )
2384
2384
2385 rawtext = btext[0]
2385 rawtext = btext[0]
2386
2386
2387 if alwayscache and rawtext is None:
2387 if alwayscache and rawtext is None:
2388 rawtext = deltacomputer.buildtext(revinfo, fh)
2388 rawtext = deltacomputer.buildtext(revinfo, fh)
2389
2389
2390 if type(rawtext) == bytes: # only accept immutable objects
2390 if type(rawtext) == bytes: # only accept immutable objects
2391 self._revisioncache = (node, curr, rawtext)
2391 self._revisioncache = (node, curr, rawtext)
2392 self._chainbasecache[curr] = deltainfo.chainbase
2392 self._chainbasecache[curr] = deltainfo.chainbase
2393 return curr
2393 return curr
2394
2394
2395 def _get_data_offset(self, prev):
2395 def _get_data_offset(self, prev):
2396 """Returns the current offset in the (in-transaction) data file.
2396 """Returns the current offset in the (in-transaction) data file.
2397 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2397 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2398 file to store that information: since sidedata can be rewritten to the
2398 file to store that information: since sidedata can be rewritten to the
2399 end of the data file within a transaction, you can have cases where, for
2399 end of the data file within a transaction, you can have cases where, for
2400 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2400 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2401 to `n - 1`'s sidedata being written after `n`'s data.
2401 to `n - 1`'s sidedata being written after `n`'s data.
2402
2402
2403 TODO cache this in a docket file before getting out of experimental."""
2403 TODO cache this in a docket file before getting out of experimental."""
2404 if self._format_version != REVLOGV2:
2404 if self._format_version != REVLOGV2:
2405 return self.end(prev)
2405 return self.end(prev)
2406
2406
2407 offset = 0
2407 offset = 0
2408 for rev, entry in enumerate(self.index):
2408 for rev, entry in enumerate(self.index):
2409 sidedata_end = entry[8] + entry[9]
2409 sidedata_end = entry[8] + entry[9]
2410 # Sidedata for a previous rev has potentially been written after
2410 # Sidedata for a previous rev has potentially been written after
2411 # this rev's end, so take the max.
2411 # this rev's end, so take the max.
2412 offset = max(self.end(rev), offset, sidedata_end)
2412 offset = max(self.end(rev), offset, sidedata_end)
2413 return offset
2413 return offset
2414
2414
2415 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2415 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2416 # Files opened in a+ mode have inconsistent behavior on various
2416 # Files opened in a+ mode have inconsistent behavior on various
2417 # platforms. Windows requires that a file positioning call be made
2417 # platforms. Windows requires that a file positioning call be made
2418 # when the file handle transitions between reads and writes. See
2418 # when the file handle transitions between reads and writes. See
2419 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2419 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2420 # platforms, Python or the platform itself can be buggy. Some versions
2420 # platforms, Python or the platform itself can be buggy. Some versions
2421 # of Solaris have been observed to not append at the end of the file
2421 # of Solaris have been observed to not append at the end of the file
2422 # if the file was seeked to before the end. See issue4943 for more.
2422 # if the file was seeked to before the end. See issue4943 for more.
2423 #
2423 #
2424 # We work around this issue by inserting a seek() before writing.
2424 # We work around this issue by inserting a seek() before writing.
2425 # Note: This is likely not necessary on Python 3. However, because
2425 # Note: This is likely not necessary on Python 3. However, because
2426 # the file handle is reused for reads and may be seeked there, we need
2426 # the file handle is reused for reads and may be seeked there, we need
2427 # to be careful before changing this.
2427 # to be careful before changing this.
2428 if self._writinghandles is None:
2428 if self._writinghandles is None:
2429 msg = b'adding revision outside `revlog._writing` context'
2429 msg = b'adding revision outside `revlog._writing` context'
2430 raise error.ProgrammingError(msg)
2430 raise error.ProgrammingError(msg)
2431 ifh, dfh = self._writinghandles
2431 ifh, dfh = self._writinghandles
2432 ifh.seek(0, os.SEEK_END)
2432 ifh.seek(0, os.SEEK_END)
2433 if dfh:
2433 if dfh:
2434 dfh.seek(0, os.SEEK_END)
2434 dfh.seek(0, os.SEEK_END)
2435
2435
2436 curr = len(self) - 1
2436 curr = len(self) - 1
2437 if not self._inline:
2437 if not self._inline:
2438 transaction.add(self._datafile, offset)
2438 transaction.add(self._datafile, offset)
2439 transaction.add(self._indexfile, curr * len(entry))
2439 transaction.add(self._indexfile, curr * len(entry))
2440 if data[0]:
2440 if data[0]:
2441 dfh.write(data[0])
2441 dfh.write(data[0])
2442 dfh.write(data[1])
2442 dfh.write(data[1])
2443 if sidedata:
2443 if sidedata:
2444 dfh.write(sidedata)
2444 dfh.write(sidedata)
2445 ifh.write(entry)
2445 ifh.write(entry)
2446 else:
2446 else:
2447 offset += curr * self.index.entry_size
2447 offset += curr * self.index.entry_size
2448 transaction.add(self._indexfile, offset)
2448 transaction.add(self._indexfile, offset)
2449 ifh.write(entry)
2449 ifh.write(entry)
2450 ifh.write(data[0])
2450 ifh.write(data[0])
2451 ifh.write(data[1])
2451 ifh.write(data[1])
2452 if sidedata:
2452 if sidedata:
2453 ifh.write(sidedata)
2453 ifh.write(sidedata)
2454 self._enforceinlinesize(transaction)
2454 self._enforceinlinesize(transaction)
2455 nodemaputil.setup_persistent_nodemap(transaction, self)
2455 nodemaputil.setup_persistent_nodemap(transaction, self)
2456
2456
2457 def addgroup(
2457 def addgroup(
2458 self,
2458 self,
2459 deltas,
2459 deltas,
2460 linkmapper,
2460 linkmapper,
2461 transaction,
2461 transaction,
2462 alwayscache=False,
2462 alwayscache=False,
2463 addrevisioncb=None,
2463 addrevisioncb=None,
2464 duplicaterevisioncb=None,
2464 duplicaterevisioncb=None,
2465 ):
2465 ):
2466 """
2466 """
2467 add a delta group
2467 add a delta group
2468
2468
2469 given a set of deltas, add them to the revision log. the
2469 given a set of deltas, add them to the revision log. the
2470 first delta is against its parent, which should be in our
2470 first delta is against its parent, which should be in our
2471 log, the rest are against the previous delta.
2471 log, the rest are against the previous delta.
2472
2472
2473 If ``addrevisioncb`` is defined, it will be called with arguments of
2473 If ``addrevisioncb`` is defined, it will be called with arguments of
2474 this revlog and the node that was added.
2474 this revlog and the node that was added.
2475 """
2475 """
2476
2476
2477 if self._adding_group:
2477 if self._adding_group:
2478 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2478 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2479
2479
2480 self._adding_group = True
2480 self._adding_group = True
2481 empty = True
2481 empty = True
2482 try:
2482 try:
2483 with self._writing(transaction):
2483 with self._writing(transaction):
2484 deltacomputer = deltautil.deltacomputer(self)
2484 deltacomputer = deltautil.deltacomputer(self)
2485 # loop through our set of deltas
2485 # loop through our set of deltas
2486 for data in deltas:
2486 for data in deltas:
2487 (
2487 (
2488 node,
2488 node,
2489 p1,
2489 p1,
2490 p2,
2490 p2,
2491 linknode,
2491 linknode,
2492 deltabase,
2492 deltabase,
2493 delta,
2493 delta,
2494 flags,
2494 flags,
2495 sidedata,
2495 sidedata,
2496 ) = data
2496 ) = data
2497 link = linkmapper(linknode)
2497 link = linkmapper(linknode)
2498 flags = flags or REVIDX_DEFAULT_FLAGS
2498 flags = flags or REVIDX_DEFAULT_FLAGS
2499
2499
2500 rev = self.index.get_rev(node)
2500 rev = self.index.get_rev(node)
2501 if rev is not None:
2501 if rev is not None:
2502 # this can happen if two branches make the same change
2502 # this can happen if two branches make the same change
2503 self._nodeduplicatecallback(transaction, rev)
2503 self._nodeduplicatecallback(transaction, rev)
2504 if duplicaterevisioncb:
2504 if duplicaterevisioncb:
2505 duplicaterevisioncb(self, rev)
2505 duplicaterevisioncb(self, rev)
2506 empty = False
2506 empty = False
2507 continue
2507 continue
2508
2508
2509 for p in (p1, p2):
2509 for p in (p1, p2):
2510 if not self.index.has_node(p):
2510 if not self.index.has_node(p):
2511 raise error.LookupError(
2511 raise error.LookupError(
2512 p, self.radix, _(b'unknown parent')
2512 p, self.radix, _(b'unknown parent')
2513 )
2513 )
2514
2514
2515 if not self.index.has_node(deltabase):
2515 if not self.index.has_node(deltabase):
2516 raise error.LookupError(
2516 raise error.LookupError(
2517 deltabase, self.display_id, _(b'unknown delta base')
2517 deltabase, self.display_id, _(b'unknown delta base')
2518 )
2518 )
2519
2519
2520 baserev = self.rev(deltabase)
2520 baserev = self.rev(deltabase)
2521
2521
2522 if baserev != nullrev and self.iscensored(baserev):
2522 if baserev != nullrev and self.iscensored(baserev):
2523 # if base is censored, delta must be full replacement in a
2523 # if base is censored, delta must be full replacement in a
2524 # single patch operation
2524 # single patch operation
2525 hlen = struct.calcsize(b">lll")
2525 hlen = struct.calcsize(b">lll")
2526 oldlen = self.rawsize(baserev)
2526 oldlen = self.rawsize(baserev)
2527 newlen = len(delta) - hlen
2527 newlen = len(delta) - hlen
2528 if delta[:hlen] != mdiff.replacediffheader(
2528 if delta[:hlen] != mdiff.replacediffheader(
2529 oldlen, newlen
2529 oldlen, newlen
2530 ):
2530 ):
2531 raise error.CensoredBaseError(
2531 raise error.CensoredBaseError(
2532 self.display_id, self.node(baserev)
2532 self.display_id, self.node(baserev)
2533 )
2533 )
2534
2534
2535 if not flags and self._peek_iscensored(baserev, delta):
2535 if not flags and self._peek_iscensored(baserev, delta):
2536 flags |= REVIDX_ISCENSORED
2536 flags |= REVIDX_ISCENSORED
2537
2537
2538 # We assume consumers of addrevisioncb will want to retrieve
2538 # We assume consumers of addrevisioncb will want to retrieve
2539 # the added revision, which will require a call to
2539 # the added revision, which will require a call to
2540 # revision(). revision() will fast path if there is a cache
2540 # revision(). revision() will fast path if there is a cache
2541 # hit. So, we tell _addrevision() to always cache in this case.
2541 # hit. So, we tell _addrevision() to always cache in this case.
2542 # We're only using addgroup() in the context of changegroup
2542 # We're only using addgroup() in the context of changegroup
2543 # generation so the revision data can always be handled as raw
2543 # generation so the revision data can always be handled as raw
2544 # by the flagprocessor.
2544 # by the flagprocessor.
2545 rev = self._addrevision(
2545 rev = self._addrevision(
2546 node,
2546 node,
2547 None,
2547 None,
2548 transaction,
2548 transaction,
2549 link,
2549 link,
2550 p1,
2550 p1,
2551 p2,
2551 p2,
2552 flags,
2552 flags,
2553 (baserev, delta),
2553 (baserev, delta),
2554 alwayscache=alwayscache,
2554 alwayscache=alwayscache,
2555 deltacomputer=deltacomputer,
2555 deltacomputer=deltacomputer,
2556 sidedata=sidedata,
2556 sidedata=sidedata,
2557 )
2557 )
2558
2558
2559 if addrevisioncb:
2559 if addrevisioncb:
2560 addrevisioncb(self, rev)
2560 addrevisioncb(self, rev)
2561 empty = False
2561 empty = False
2562 finally:
2562 finally:
2563 self._adding_group = False
2563 self._adding_group = False
2564 return not empty
2564 return not empty
2565
2565
2566 def iscensored(self, rev):
2566 def iscensored(self, rev):
2567 """Check if a file revision is censored."""
2567 """Check if a file revision is censored."""
2568 if not self._censorable:
2568 if not self._censorable:
2569 return False
2569 return False
2570
2570
2571 return self.flags(rev) & REVIDX_ISCENSORED
2571 return self.flags(rev) & REVIDX_ISCENSORED
2572
2572
2573 def _peek_iscensored(self, baserev, delta):
2573 def _peek_iscensored(self, baserev, delta):
2574 """Quickly check if a delta produces a censored revision."""
2574 """Quickly check if a delta produces a censored revision."""
2575 if not self._censorable:
2575 if not self._censorable:
2576 return False
2576 return False
2577
2577
2578 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2578 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2579
2579
2580 def getstrippoint(self, minlink):
2580 def getstrippoint(self, minlink):
2581 """find the minimum rev that must be stripped to strip the linkrev
2581 """find the minimum rev that must be stripped to strip the linkrev
2582
2582
2583 Returns a tuple containing the minimum rev and a set of all revs that
2583 Returns a tuple containing the minimum rev and a set of all revs that
2584 have linkrevs that will be broken by this strip.
2584 have linkrevs that will be broken by this strip.
2585 """
2585 """
2586 return storageutil.resolvestripinfo(
2586 return storageutil.resolvestripinfo(
2587 minlink,
2587 minlink,
2588 len(self) - 1,
2588 len(self) - 1,
2589 self.headrevs(),
2589 self.headrevs(),
2590 self.linkrev,
2590 self.linkrev,
2591 self.parentrevs,
2591 self.parentrevs,
2592 )
2592 )
2593
2593
2594 def strip(self, minlink, transaction):
2594 def strip(self, minlink, transaction):
2595 """truncate the revlog on the first revision with a linkrev >= minlink
2595 """truncate the revlog on the first revision with a linkrev >= minlink
2596
2596
2597 This function is called when we're stripping revision minlink and
2597 This function is called when we're stripping revision minlink and
2598 its descendants from the repository.
2598 its descendants from the repository.
2599
2599
2600 We have to remove all revisions with linkrev >= minlink, because
2600 We have to remove all revisions with linkrev >= minlink, because
2601 the equivalent changelog revisions will be renumbered after the
2601 the equivalent changelog revisions will be renumbered after the
2602 strip.
2602 strip.
2603
2603
2604 So we truncate the revlog on the first of these revisions, and
2604 So we truncate the revlog on the first of these revisions, and
2605 trust that the caller has saved the revisions that shouldn't be
2605 trust that the caller has saved the revisions that shouldn't be
2606 removed and that it'll re-add them after this truncation.
2606 removed and that it'll re-add them after this truncation.
2607 """
2607 """
2608 if len(self) == 0:
2608 if len(self) == 0:
2609 return
2609 return
2610
2610
2611 rev, _ = self.getstrippoint(minlink)
2611 rev, _ = self.getstrippoint(minlink)
2612 if rev == len(self):
2612 if rev == len(self):
2613 return
2613 return
2614
2614
2615 # first truncate the files on disk
2615 # first truncate the files on disk
2616 end = self.start(rev)
2616 end = self.start(rev)
2617 if not self._inline:
2617 if not self._inline:
2618 transaction.add(self._datafile, end)
2618 transaction.add(self._datafile, end)
2619 end = rev * self.index.entry_size
2619 end = rev * self.index.entry_size
2620 else:
2620 else:
2621 end += rev * self.index.entry_size
2621 end += rev * self.index.entry_size
2622
2622
2623 transaction.add(self._indexfile, end)
2623 transaction.add(self._indexfile, end)
2624
2624
2625 # then reset internal state in memory to forget those revisions
2625 # then reset internal state in memory to forget those revisions
2626 self._revisioncache = None
2626 self._revisioncache = None
2627 self._chaininfocache = util.lrucachedict(500)
2627 self._chaininfocache = util.lrucachedict(500)
2628 self._chunkclear()
2628 self._chunkclear()
2629
2629
2630 del self.index[rev:-1]
2630 del self.index[rev:-1]
2631
2631
2632 def checksize(self):
2632 def checksize(self):
2633 """Check size of index and data files
2633 """Check size of index and data files
2634
2634
2635 return a (dd, di) tuple.
2635 return a (dd, di) tuple.
2636 - dd: extra bytes for the "data" file
2636 - dd: extra bytes for the "data" file
2637 - di: extra bytes for the "index" file
2637 - di: extra bytes for the "index" file
2638
2638
2639 A healthy revlog will return (0, 0).
2639 A healthy revlog will return (0, 0).
2640 """
2640 """
2641 expected = 0
2641 expected = 0
2642 if len(self):
2642 if len(self):
2643 expected = max(0, self.end(len(self) - 1))
2643 expected = max(0, self.end(len(self) - 1))
2644
2644
2645 try:
2645 try:
2646 with self._datafp() as f:
2646 with self._datafp() as f:
2647 f.seek(0, io.SEEK_END)
2647 f.seek(0, io.SEEK_END)
2648 actual = f.tell()
2648 actual = f.tell()
2649 dd = actual - expected
2649 dd = actual - expected
2650 except IOError as inst:
2650 except IOError as inst:
2651 if inst.errno != errno.ENOENT:
2651 if inst.errno != errno.ENOENT:
2652 raise
2652 raise
2653 dd = 0
2653 dd = 0
2654
2654
2655 try:
2655 try:
2656 f = self.opener(self._indexfile)
2656 f = self.opener(self._indexfile)
2657 f.seek(0, io.SEEK_END)
2657 f.seek(0, io.SEEK_END)
2658 actual = f.tell()
2658 actual = f.tell()
2659 f.close()
2659 f.close()
2660 s = self.index.entry_size
2660 s = self.index.entry_size
2661 i = max(0, actual // s)
2661 i = max(0, actual // s)
2662 di = actual - (i * s)
2662 di = actual - (i * s)
2663 if self._inline:
2663 if self._inline:
2664 databytes = 0
2664 databytes = 0
2665 for r in self:
2665 for r in self:
2666 databytes += max(0, self.length(r))
2666 databytes += max(0, self.length(r))
2667 dd = 0
2667 dd = 0
2668 di = actual - len(self) * s - databytes
2668 di = actual - len(self) * s - databytes
2669 except IOError as inst:
2669 except IOError as inst:
2670 if inst.errno != errno.ENOENT:
2670 if inst.errno != errno.ENOENT:
2671 raise
2671 raise
2672 di = 0
2672 di = 0
2673
2673
2674 return (dd, di)
2674 return (dd, di)
2675
2675
2676 def files(self):
2676 def files(self):
2677 res = [self._indexfile]
2677 res = [self._indexfile]
2678 if not self._inline:
2678 if not self._inline:
2679 res.append(self._datafile)
2679 res.append(self._datafile)
2680 return res
2680 return res
2681
2681
2682 def emitrevisions(
2682 def emitrevisions(
2683 self,
2683 self,
2684 nodes,
2684 nodes,
2685 nodesorder=None,
2685 nodesorder=None,
2686 revisiondata=False,
2686 revisiondata=False,
2687 assumehaveparentrevisions=False,
2687 assumehaveparentrevisions=False,
2688 deltamode=repository.CG_DELTAMODE_STD,
2688 deltamode=repository.CG_DELTAMODE_STD,
2689 sidedata_helpers=None,
2689 sidedata_helpers=None,
2690 ):
2690 ):
2691 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2691 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2692 raise error.ProgrammingError(
2692 raise error.ProgrammingError(
2693 b'unhandled value for nodesorder: %s' % nodesorder
2693 b'unhandled value for nodesorder: %s' % nodesorder
2694 )
2694 )
2695
2695
2696 if nodesorder is None and not self._generaldelta:
2696 if nodesorder is None and not self._generaldelta:
2697 nodesorder = b'storage'
2697 nodesorder = b'storage'
2698
2698
2699 if (
2699 if (
2700 not self._storedeltachains
2700 not self._storedeltachains
2701 and deltamode != repository.CG_DELTAMODE_PREV
2701 and deltamode != repository.CG_DELTAMODE_PREV
2702 ):
2702 ):
2703 deltamode = repository.CG_DELTAMODE_FULL
2703 deltamode = repository.CG_DELTAMODE_FULL
2704
2704
2705 return storageutil.emitrevisions(
2705 return storageutil.emitrevisions(
2706 self,
2706 self,
2707 nodes,
2707 nodes,
2708 nodesorder,
2708 nodesorder,
2709 revlogrevisiondelta,
2709 revlogrevisiondelta,
2710 deltaparentfn=self.deltaparent,
2710 deltaparentfn=self.deltaparent,
2711 candeltafn=self.candelta,
2711 candeltafn=self.candelta,
2712 rawsizefn=self.rawsize,
2712 rawsizefn=self.rawsize,
2713 revdifffn=self.revdiff,
2713 revdifffn=self.revdiff,
2714 flagsfn=self.flags,
2714 flagsfn=self.flags,
2715 deltamode=deltamode,
2715 deltamode=deltamode,
2716 revisiondata=revisiondata,
2716 revisiondata=revisiondata,
2717 assumehaveparentrevisions=assumehaveparentrevisions,
2717 assumehaveparentrevisions=assumehaveparentrevisions,
2718 sidedata_helpers=sidedata_helpers,
2718 sidedata_helpers=sidedata_helpers,
2719 )
2719 )
2720
2720
2721 DELTAREUSEALWAYS = b'always'
2721 DELTAREUSEALWAYS = b'always'
2722 DELTAREUSESAMEREVS = b'samerevs'
2722 DELTAREUSESAMEREVS = b'samerevs'
2723 DELTAREUSENEVER = b'never'
2723 DELTAREUSENEVER = b'never'
2724
2724
2725 DELTAREUSEFULLADD = b'fulladd'
2725 DELTAREUSEFULLADD = b'fulladd'
2726
2726
2727 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2727 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2728
2728
2729 def clone(
2729 def clone(
2730 self,
2730 self,
2731 tr,
2731 tr,
2732 destrevlog,
2732 destrevlog,
2733 addrevisioncb=None,
2733 addrevisioncb=None,
2734 deltareuse=DELTAREUSESAMEREVS,
2734 deltareuse=DELTAREUSESAMEREVS,
2735 forcedeltabothparents=None,
2735 forcedeltabothparents=None,
2736 sidedata_helpers=None,
2736 sidedata_helpers=None,
2737 ):
2737 ):
2738 """Copy this revlog to another, possibly with format changes.
2738 """Copy this revlog to another, possibly with format changes.
2739
2739
2740 The destination revlog will contain the same revisions and nodes.
2740 The destination revlog will contain the same revisions and nodes.
2741 However, it may not be bit-for-bit identical due to e.g. delta encoding
2741 However, it may not be bit-for-bit identical due to e.g. delta encoding
2742 differences.
2742 differences.
2743
2743
2744 The ``deltareuse`` argument control how deltas from the existing revlog
2744 The ``deltareuse`` argument control how deltas from the existing revlog
2745 are preserved in the destination revlog. The argument can have the
2745 are preserved in the destination revlog. The argument can have the
2746 following values:
2746 following values:
2747
2747
2748 DELTAREUSEALWAYS
2748 DELTAREUSEALWAYS
2749 Deltas will always be reused (if possible), even if the destination
2749 Deltas will always be reused (if possible), even if the destination
2750 revlog would not select the same revisions for the delta. This is the
2750 revlog would not select the same revisions for the delta. This is the
2751 fastest mode of operation.
2751 fastest mode of operation.
2752 DELTAREUSESAMEREVS
2752 DELTAREUSESAMEREVS
2753 Deltas will be reused if the destination revlog would pick the same
2753 Deltas will be reused if the destination revlog would pick the same
2754 revisions for the delta. This mode strikes a balance between speed
2754 revisions for the delta. This mode strikes a balance between speed
2755 and optimization.
2755 and optimization.
2756 DELTAREUSENEVER
2756 DELTAREUSENEVER
2757 Deltas will never be reused. This is the slowest mode of execution.
2757 Deltas will never be reused. This is the slowest mode of execution.
2758 This mode can be used to recompute deltas (e.g. if the diff/delta
2758 This mode can be used to recompute deltas (e.g. if the diff/delta
2759 algorithm changes).
2759 algorithm changes).
2760 DELTAREUSEFULLADD
2760 DELTAREUSEFULLADD
2761 Revision will be re-added as if their were new content. This is
2761 Revision will be re-added as if their were new content. This is
2762 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2762 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2763 eg: large file detection and handling.
2763 eg: large file detection and handling.
2764
2764
2765 Delta computation can be slow, so the choice of delta reuse policy can
2765 Delta computation can be slow, so the choice of delta reuse policy can
2766 significantly affect run time.
2766 significantly affect run time.
2767
2767
2768 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2768 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2769 two extremes. Deltas will be reused if they are appropriate. But if the
2769 two extremes. Deltas will be reused if they are appropriate. But if the
2770 delta could choose a better revision, it will do so. This means if you
2770 delta could choose a better revision, it will do so. This means if you
2771 are converting a non-generaldelta revlog to a generaldelta revlog,
2771 are converting a non-generaldelta revlog to a generaldelta revlog,
2772 deltas will be recomputed if the delta's parent isn't a parent of the
2772 deltas will be recomputed if the delta's parent isn't a parent of the
2773 revision.
2773 revision.
2774
2774
2775 In addition to the delta policy, the ``forcedeltabothparents``
2775 In addition to the delta policy, the ``forcedeltabothparents``
2776 argument controls whether to force compute deltas against both parents
2776 argument controls whether to force compute deltas against both parents
2777 for merges. By default, the current default is used.
2777 for merges. By default, the current default is used.
2778
2778
2779 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2779 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2780 `sidedata_helpers`.
2780 `sidedata_helpers`.
2781 """
2781 """
2782 if deltareuse not in self.DELTAREUSEALL:
2782 if deltareuse not in self.DELTAREUSEALL:
2783 raise ValueError(
2783 raise ValueError(
2784 _(b'value for deltareuse invalid: %s') % deltareuse
2784 _(b'value for deltareuse invalid: %s') % deltareuse
2785 )
2785 )
2786
2786
2787 if len(destrevlog):
2787 if len(destrevlog):
2788 raise ValueError(_(b'destination revlog is not empty'))
2788 raise ValueError(_(b'destination revlog is not empty'))
2789
2789
2790 if getattr(self, 'filteredrevs', None):
2790 if getattr(self, 'filteredrevs', None):
2791 raise ValueError(_(b'source revlog has filtered revisions'))
2791 raise ValueError(_(b'source revlog has filtered revisions'))
2792 if getattr(destrevlog, 'filteredrevs', None):
2792 if getattr(destrevlog, 'filteredrevs', None):
2793 raise ValueError(_(b'destination revlog has filtered revisions'))
2793 raise ValueError(_(b'destination revlog has filtered revisions'))
2794
2794
2795 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2795 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2796 # if possible.
2796 # if possible.
2797 oldlazydelta = destrevlog._lazydelta
2797 oldlazydelta = destrevlog._lazydelta
2798 oldlazydeltabase = destrevlog._lazydeltabase
2798 oldlazydeltabase = destrevlog._lazydeltabase
2799 oldamd = destrevlog._deltabothparents
2799 oldamd = destrevlog._deltabothparents
2800
2800
2801 try:
2801 try:
2802 if deltareuse == self.DELTAREUSEALWAYS:
2802 if deltareuse == self.DELTAREUSEALWAYS:
2803 destrevlog._lazydeltabase = True
2803 destrevlog._lazydeltabase = True
2804 destrevlog._lazydelta = True
2804 destrevlog._lazydelta = True
2805 elif deltareuse == self.DELTAREUSESAMEREVS:
2805 elif deltareuse == self.DELTAREUSESAMEREVS:
2806 destrevlog._lazydeltabase = False
2806 destrevlog._lazydeltabase = False
2807 destrevlog._lazydelta = True
2807 destrevlog._lazydelta = True
2808 elif deltareuse == self.DELTAREUSENEVER:
2808 elif deltareuse == self.DELTAREUSENEVER:
2809 destrevlog._lazydeltabase = False
2809 destrevlog._lazydeltabase = False
2810 destrevlog._lazydelta = False
2810 destrevlog._lazydelta = False
2811
2811
2812 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2812 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2813
2813
2814 self._clone(
2814 self._clone(
2815 tr,
2815 tr,
2816 destrevlog,
2816 destrevlog,
2817 addrevisioncb,
2817 addrevisioncb,
2818 deltareuse,
2818 deltareuse,
2819 forcedeltabothparents,
2819 forcedeltabothparents,
2820 sidedata_helpers,
2820 sidedata_helpers,
2821 )
2821 )
2822
2822
2823 finally:
2823 finally:
2824 destrevlog._lazydelta = oldlazydelta
2824 destrevlog._lazydelta = oldlazydelta
2825 destrevlog._lazydeltabase = oldlazydeltabase
2825 destrevlog._lazydeltabase = oldlazydeltabase
2826 destrevlog._deltabothparents = oldamd
2826 destrevlog._deltabothparents = oldamd
2827
2827
2828 def _clone(
2828 def _clone(
2829 self,
2829 self,
2830 tr,
2830 tr,
2831 destrevlog,
2831 destrevlog,
2832 addrevisioncb,
2832 addrevisioncb,
2833 deltareuse,
2833 deltareuse,
2834 forcedeltabothparents,
2834 forcedeltabothparents,
2835 sidedata_helpers,
2835 sidedata_helpers,
2836 ):
2836 ):
2837 """perform the core duty of `revlog.clone` after parameter processing"""
2837 """perform the core duty of `revlog.clone` after parameter processing"""
2838 deltacomputer = deltautil.deltacomputer(destrevlog)
2838 deltacomputer = deltautil.deltacomputer(destrevlog)
2839 index = self.index
2839 index = self.index
2840 for rev in self:
2840 for rev in self:
2841 entry = index[rev]
2841 entry = index[rev]
2842
2842
2843 # Some classes override linkrev to take filtered revs into
2843 # Some classes override linkrev to take filtered revs into
2844 # account. Use raw entry from index.
2844 # account. Use raw entry from index.
2845 flags = entry[0] & 0xFFFF
2845 flags = entry[0] & 0xFFFF
2846 linkrev = entry[4]
2846 linkrev = entry[4]
2847 p1 = index[entry[5]][7]
2847 p1 = index[entry[5]][7]
2848 p2 = index[entry[6]][7]
2848 p2 = index[entry[6]][7]
2849 node = entry[7]
2849 node = entry[7]
2850
2850
2851 # (Possibly) reuse the delta from the revlog if allowed and
2851 # (Possibly) reuse the delta from the revlog if allowed and
2852 # the revlog chunk is a delta.
2852 # the revlog chunk is a delta.
2853 cachedelta = None
2853 cachedelta = None
2854 rawtext = None
2854 rawtext = None
2855 if deltareuse == self.DELTAREUSEFULLADD:
2855 if deltareuse == self.DELTAREUSEFULLADD:
2856 text, sidedata = self._revisiondata(rev)
2856 text, sidedata = self._revisiondata(rev)
2857
2857
2858 if sidedata_helpers is not None:
2858 if sidedata_helpers is not None:
2859 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2859 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2860 self, sidedata_helpers, sidedata, rev
2860 self, sidedata_helpers, sidedata, rev
2861 )
2861 )
2862 flags = flags | new_flags[0] & ~new_flags[1]
2862 flags = flags | new_flags[0] & ~new_flags[1]
2863
2863
2864 destrevlog.addrevision(
2864 destrevlog.addrevision(
2865 text,
2865 text,
2866 tr,
2866 tr,
2867 linkrev,
2867 linkrev,
2868 p1,
2868 p1,
2869 p2,
2869 p2,
2870 cachedelta=cachedelta,
2870 cachedelta=cachedelta,
2871 node=node,
2871 node=node,
2872 flags=flags,
2872 flags=flags,
2873 deltacomputer=deltacomputer,
2873 deltacomputer=deltacomputer,
2874 sidedata=sidedata,
2874 sidedata=sidedata,
2875 )
2875 )
2876 else:
2876 else:
2877 if destrevlog._lazydelta:
2877 if destrevlog._lazydelta:
2878 dp = self.deltaparent(rev)
2878 dp = self.deltaparent(rev)
2879 if dp != nullrev:
2879 if dp != nullrev:
2880 cachedelta = (dp, bytes(self._chunk(rev)))
2880 cachedelta = (dp, bytes(self._chunk(rev)))
2881
2881
2882 sidedata = None
2882 sidedata = None
2883 if not cachedelta:
2883 if not cachedelta:
2884 rawtext, sidedata = self._revisiondata(rev)
2884 rawtext, sidedata = self._revisiondata(rev)
2885 if sidedata is None:
2885 if sidedata is None:
2886 sidedata = self.sidedata(rev)
2886 sidedata = self.sidedata(rev)
2887
2887
2888 if sidedata_helpers is not None:
2888 if sidedata_helpers is not None:
2889 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2889 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2890 self, sidedata_helpers, sidedata, rev
2890 self, sidedata_helpers, sidedata, rev
2891 )
2891 )
2892 flags = flags | new_flags[0] & ~new_flags[1]
2892 flags = flags | new_flags[0] & ~new_flags[1]
2893
2893
2894 with destrevlog._writing(tr):
2894 with destrevlog._writing(tr):
2895 destrevlog._addrevision(
2895 destrevlog._addrevision(
2896 node,
2896 node,
2897 rawtext,
2897 rawtext,
2898 tr,
2898 tr,
2899 linkrev,
2899 linkrev,
2900 p1,
2900 p1,
2901 p2,
2901 p2,
2902 flags,
2902 flags,
2903 cachedelta,
2903 cachedelta,
2904 deltacomputer=deltacomputer,
2904 deltacomputer=deltacomputer,
2905 sidedata=sidedata,
2905 sidedata=sidedata,
2906 )
2906 )
2907
2907
2908 if addrevisioncb:
2908 if addrevisioncb:
2909 addrevisioncb(self, rev, node)
2909 addrevisioncb(self, rev, node)
2910
2910
2911 def censorrevision(self, tr, censornode, tombstone=b''):
2911 def censorrevision(self, tr, censornode, tombstone=b''):
2912 if self._format_version == REVLOGV0:
2912 if self._format_version == REVLOGV0:
2913 raise error.RevlogError(
2913 raise error.RevlogError(
2914 _(b'cannot censor with version %d revlogs')
2914 _(b'cannot censor with version %d revlogs')
2915 % self._format_version
2915 % self._format_version
2916 )
2916 )
2917
2917
2918 censorrev = self.rev(censornode)
2918 censorrev = self.rev(censornode)
2919 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2919 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2920
2920
2921 if len(tombstone) > self.rawsize(censorrev):
2921 if len(tombstone) > self.rawsize(censorrev):
2922 raise error.Abort(
2922 raise error.Abort(
2923 _(b'censor tombstone must be no longer than censored data')
2923 _(b'censor tombstone must be no longer than censored data')
2924 )
2924 )
2925
2925
2926 # Rewriting the revlog in place is hard. Our strategy for censoring is
2926 # Rewriting the revlog in place is hard. Our strategy for censoring is
2927 # to create a new revlog, copy all revisions to it, then replace the
2927 # to create a new revlog, copy all revisions to it, then replace the
2928 # revlogs on transaction close.
2928 # revlogs on transaction close.
2929 #
2929 #
2930 # This is a bit dangerous. We could easily have a mismatch of state.
2930 # This is a bit dangerous. We could easily have a mismatch of state.
2931 newrl = revlog(
2931 newrl = revlog(
2932 self.opener,
2932 self.opener,
2933 target=self.target,
2933 target=self.target,
2934 radix=self.radix,
2934 radix=self.radix,
2935 postfix=b'tmpcensored',
2935 postfix=b'tmpcensored',
2936 censorable=True,
2936 censorable=True,
2937 )
2937 )
2938 newrl._format_version = self._format_version
2938 newrl._format_version = self._format_version
2939 newrl._format_flags = self._format_flags
2939 newrl._format_flags = self._format_flags
2940 newrl._generaldelta = self._generaldelta
2940 newrl._generaldelta = self._generaldelta
2941 newrl._parse_index = self._parse_index
2941 newrl._parse_index = self._parse_index
2942
2942
2943 for rev in self.revs():
2943 for rev in self.revs():
2944 node = self.node(rev)
2944 node = self.node(rev)
2945 p1, p2 = self.parents(node)
2945 p1, p2 = self.parents(node)
2946
2946
2947 if rev == censorrev:
2947 if rev == censorrev:
2948 newrl.addrawrevision(
2948 newrl.addrawrevision(
2949 tombstone,
2949 tombstone,
2950 tr,
2950 tr,
2951 self.linkrev(censorrev),
2951 self.linkrev(censorrev),
2952 p1,
2952 p1,
2953 p2,
2953 p2,
2954 censornode,
2954 censornode,
2955 REVIDX_ISCENSORED,
2955 REVIDX_ISCENSORED,
2956 )
2956 )
2957
2957
2958 if newrl.deltaparent(rev) != nullrev:
2958 if newrl.deltaparent(rev) != nullrev:
2959 raise error.Abort(
2959 raise error.Abort(
2960 _(
2960 _(
2961 b'censored revision stored as delta; '
2961 b'censored revision stored as delta; '
2962 b'cannot censor'
2962 b'cannot censor'
2963 ),
2963 ),
2964 hint=_(
2964 hint=_(
2965 b'censoring of revlogs is not '
2965 b'censoring of revlogs is not '
2966 b'fully implemented; please report '
2966 b'fully implemented; please report '
2967 b'this bug'
2967 b'this bug'
2968 ),
2968 ),
2969 )
2969 )
2970 continue
2970 continue
2971
2971
2972 if self.iscensored(rev):
2972 if self.iscensored(rev):
2973 if self.deltaparent(rev) != nullrev:
2973 if self.deltaparent(rev) != nullrev:
2974 raise error.Abort(
2974 raise error.Abort(
2975 _(
2975 _(
2976 b'cannot censor due to censored '
2976 b'cannot censor due to censored '
2977 b'revision having delta stored'
2977 b'revision having delta stored'
2978 )
2978 )
2979 )
2979 )
2980 rawtext = self._chunk(rev)
2980 rawtext = self._chunk(rev)
2981 else:
2981 else:
2982 rawtext = self.rawdata(rev)
2982 rawtext = self.rawdata(rev)
2983
2983
2984 newrl.addrawrevision(
2984 newrl.addrawrevision(
2985 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2985 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2986 )
2986 )
2987
2987
2988 tr.addbackup(self._indexfile, location=b'store')
2988 tr.addbackup(self._indexfile, location=b'store')
2989 if not self._inline:
2989 if not self._inline:
2990 tr.addbackup(self._datafile, location=b'store')
2990 tr.addbackup(self._datafile, location=b'store')
2991
2991
2992 self.opener.rename(newrl._indexfile, self._indexfile)
2992 self.opener.rename(newrl._indexfile, self._indexfile)
2993 if not self._inline:
2993 if not self._inline:
2994 self.opener.rename(newrl._datafile, self._datafile)
2994 self.opener.rename(newrl._datafile, self._datafile)
2995
2995
2996 self.clearcaches()
2996 self.clearcaches()
2997 self._loadindex()
2997 self._loadindex()
2998
2998
2999 def verifyintegrity(self, state):
2999 def verifyintegrity(self, state):
3000 """Verifies the integrity of the revlog.
3000 """Verifies the integrity of the revlog.
3001
3001
3002 Yields ``revlogproblem`` instances describing problems that are
3002 Yields ``revlogproblem`` instances describing problems that are
3003 found.
3003 found.
3004 """
3004 """
3005 dd, di = self.checksize()
3005 dd, di = self.checksize()
3006 if dd:
3006 if dd:
3007 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3007 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3008 if di:
3008 if di:
3009 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3009 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3010
3010
3011 version = self._format_version
3011 version = self._format_version
3012
3012
3013 # The verifier tells us what version revlog we should be.
3013 # The verifier tells us what version revlog we should be.
3014 if version != state[b'expectedversion']:
3014 if version != state[b'expectedversion']:
3015 yield revlogproblem(
3015 yield revlogproblem(
3016 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3016 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3017 % (self.display_id, version, state[b'expectedversion'])
3017 % (self.display_id, version, state[b'expectedversion'])
3018 )
3018 )
3019
3019
3020 state[b'skipread'] = set()
3020 state[b'skipread'] = set()
3021 state[b'safe_renamed'] = set()
3021 state[b'safe_renamed'] = set()
3022
3022
3023 for rev in self:
3023 for rev in self:
3024 node = self.node(rev)
3024 node = self.node(rev)
3025
3025
3026 # Verify contents. 4 cases to care about:
3026 # Verify contents. 4 cases to care about:
3027 #
3027 #
3028 # common: the most common case
3028 # common: the most common case
3029 # rename: with a rename
3029 # rename: with a rename
3030 # meta: file content starts with b'\1\n', the metadata
3030 # meta: file content starts with b'\1\n', the metadata
3031 # header defined in filelog.py, but without a rename
3031 # header defined in filelog.py, but without a rename
3032 # ext: content stored externally
3032 # ext: content stored externally
3033 #
3033 #
3034 # More formally, their differences are shown below:
3034 # More formally, their differences are shown below:
3035 #
3035 #
3036 # | common | rename | meta | ext
3036 # | common | rename | meta | ext
3037 # -------------------------------------------------------
3037 # -------------------------------------------------------
3038 # flags() | 0 | 0 | 0 | not 0
3038 # flags() | 0 | 0 | 0 | not 0
3039 # renamed() | False | True | False | ?
3039 # renamed() | False | True | False | ?
3040 # rawtext[0:2]=='\1\n'| False | True | True | ?
3040 # rawtext[0:2]=='\1\n'| False | True | True | ?
3041 #
3041 #
3042 # "rawtext" means the raw text stored in revlog data, which
3042 # "rawtext" means the raw text stored in revlog data, which
3043 # could be retrieved by "rawdata(rev)". "text"
3043 # could be retrieved by "rawdata(rev)". "text"
3044 # mentioned below is "revision(rev)".
3044 # mentioned below is "revision(rev)".
3045 #
3045 #
3046 # There are 3 different lengths stored physically:
3046 # There are 3 different lengths stored physically:
3047 # 1. L1: rawsize, stored in revlog index
3047 # 1. L1: rawsize, stored in revlog index
3048 # 2. L2: len(rawtext), stored in revlog data
3048 # 2. L2: len(rawtext), stored in revlog data
3049 # 3. L3: len(text), stored in revlog data if flags==0, or
3049 # 3. L3: len(text), stored in revlog data if flags==0, or
3050 # possibly somewhere else if flags!=0
3050 # possibly somewhere else if flags!=0
3051 #
3051 #
3052 # L1 should be equal to L2. L3 could be different from them.
3052 # L1 should be equal to L2. L3 could be different from them.
3053 # "text" may or may not affect commit hash depending on flag
3053 # "text" may or may not affect commit hash depending on flag
3054 # processors (see flagutil.addflagprocessor).
3054 # processors (see flagutil.addflagprocessor).
3055 #
3055 #
3056 # | common | rename | meta | ext
3056 # | common | rename | meta | ext
3057 # -------------------------------------------------
3057 # -------------------------------------------------
3058 # rawsize() | L1 | L1 | L1 | L1
3058 # rawsize() | L1 | L1 | L1 | L1
3059 # size() | L1 | L2-LM | L1(*) | L1 (?)
3059 # size() | L1 | L2-LM | L1(*) | L1 (?)
3060 # len(rawtext) | L2 | L2 | L2 | L2
3060 # len(rawtext) | L2 | L2 | L2 | L2
3061 # len(text) | L2 | L2 | L2 | L3
3061 # len(text) | L2 | L2 | L2 | L3
3062 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3062 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3063 #
3063 #
3064 # LM: length of metadata, depending on rawtext
3064 # LM: length of metadata, depending on rawtext
3065 # (*): not ideal, see comment in filelog.size
3065 # (*): not ideal, see comment in filelog.size
3066 # (?): could be "- len(meta)" if the resolved content has
3066 # (?): could be "- len(meta)" if the resolved content has
3067 # rename metadata
3067 # rename metadata
3068 #
3068 #
3069 # Checks needed to be done:
3069 # Checks needed to be done:
3070 # 1. length check: L1 == L2, in all cases.
3070 # 1. length check: L1 == L2, in all cases.
3071 # 2. hash check: depending on flag processor, we may need to
3071 # 2. hash check: depending on flag processor, we may need to
3072 # use either "text" (external), or "rawtext" (in revlog).
3072 # use either "text" (external), or "rawtext" (in revlog).
3073
3073
3074 try:
3074 try:
3075 skipflags = state.get(b'skipflags', 0)
3075 skipflags = state.get(b'skipflags', 0)
3076 if skipflags:
3076 if skipflags:
3077 skipflags &= self.flags(rev)
3077 skipflags &= self.flags(rev)
3078
3078
3079 _verify_revision(self, skipflags, state, node)
3079 _verify_revision(self, skipflags, state, node)
3080
3080
3081 l1 = self.rawsize(rev)
3081 l1 = self.rawsize(rev)
3082 l2 = len(self.rawdata(node))
3082 l2 = len(self.rawdata(node))
3083
3083
3084 if l1 != l2:
3084 if l1 != l2:
3085 yield revlogproblem(
3085 yield revlogproblem(
3086 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3086 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3087 node=node,
3087 node=node,
3088 )
3088 )
3089
3089
3090 except error.CensoredNodeError:
3090 except error.CensoredNodeError:
3091 if state[b'erroroncensored']:
3091 if state[b'erroroncensored']:
3092 yield revlogproblem(
3092 yield revlogproblem(
3093 error=_(b'censored file data'), node=node
3093 error=_(b'censored file data'), node=node
3094 )
3094 )
3095 state[b'skipread'].add(node)
3095 state[b'skipread'].add(node)
3096 except Exception as e:
3096 except Exception as e:
3097 yield revlogproblem(
3097 yield revlogproblem(
3098 error=_(b'unpacking %s: %s')
3098 error=_(b'unpacking %s: %s')
3099 % (short(node), stringutil.forcebytestr(e)),
3099 % (short(node), stringutil.forcebytestr(e)),
3100 node=node,
3100 node=node,
3101 )
3101 )
3102 state[b'skipread'].add(node)
3102 state[b'skipread'].add(node)
3103
3103
3104 def storageinfo(
3104 def storageinfo(
3105 self,
3105 self,
3106 exclusivefiles=False,
3106 exclusivefiles=False,
3107 sharedfiles=False,
3107 sharedfiles=False,
3108 revisionscount=False,
3108 revisionscount=False,
3109 trackedsize=False,
3109 trackedsize=False,
3110 storedsize=False,
3110 storedsize=False,
3111 ):
3111 ):
3112 d = {}
3112 d = {}
3113
3113
3114 if exclusivefiles:
3114 if exclusivefiles:
3115 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3115 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3116 if not self._inline:
3116 if not self._inline:
3117 d[b'exclusivefiles'].append((self.opener, self._datafile))
3117 d[b'exclusivefiles'].append((self.opener, self._datafile))
3118
3118
3119 if sharedfiles:
3119 if sharedfiles:
3120 d[b'sharedfiles'] = []
3120 d[b'sharedfiles'] = []
3121
3121
3122 if revisionscount:
3122 if revisionscount:
3123 d[b'revisionscount'] = len(self)
3123 d[b'revisionscount'] = len(self)
3124
3124
3125 if trackedsize:
3125 if trackedsize:
3126 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3126 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3127
3127
3128 if storedsize:
3128 if storedsize:
3129 d[b'storedsize'] = sum(
3129 d[b'storedsize'] = sum(
3130 self.opener.stat(path).st_size for path in self.files()
3130 self.opener.stat(path).st_size for path in self.files()
3131 )
3131 )
3132
3132
3133 return d
3133 return d
3134
3134
3135 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3135 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3136 if not self.hassidedata:
3136 if not self.hassidedata:
3137 return
3137 return
3138 # inline are not yet supported because they suffer from an issue when
3138 # inline are not yet supported because they suffer from an issue when
3139 # rewriting them (since it's not an append-only operation).
3139 # rewriting them (since it's not an append-only operation).
3140 # See issue6485.
3140 # See issue6485.
3141 assert not self._inline
3141 assert not self._inline
3142 if not helpers[1] and not helpers[2]:
3142 if not helpers[1] and not helpers[2]:
3143 # Nothing to generate or remove
3143 # Nothing to generate or remove
3144 return
3144 return
3145
3145
3146 # changelog implement some "delayed" writing mechanism that assume that
3146 # changelog implement some "delayed" writing mechanism that assume that
3147 # all index data is writen in append mode and is therefor incompatible
3147 # all index data is writen in append mode and is therefor incompatible
3148 # with the seeked write done in this method. The use of such "delayed"
3148 # with the seeked write done in this method. The use of such "delayed"
3149 # writing will soon be removed for revlog version that support side
3149 # writing will soon be removed for revlog version that support side
3150 # data, so for now, we only keep this simple assert to highlight the
3150 # data, so for now, we only keep this simple assert to highlight the
3151 # situation.
3151 # situation.
3152 delayed = getattr(self, '_delayed', False)
3152 delayed = getattr(self, '_delayed', False)
3153 diverted = getattr(self, '_divert', False)
3153 diverted = getattr(self, '_divert', False)
3154 if delayed and not diverted:
3154 if delayed and not diverted:
3155 msg = "cannot rewrite_sidedata of a delayed revlog"
3155 msg = "cannot rewrite_sidedata of a delayed revlog"
3156 raise error.ProgrammingError(msg)
3156 raise error.ProgrammingError(msg)
3157
3157
3158 new_entries = []
3158 new_entries = []
3159 # append the new sidedata
3159 # append the new sidedata
3160 with self._datafp(b'a+') as dfh:
3160 with self._writing(transaction):
3161 # Maybe this bug still exists, see revlog._writeentry
3161 ifh, dfh = self._writinghandles
3162 dfh.seek(0, os.SEEK_END)
3162 dfh.seek(0, os.SEEK_END)
3163 current_offset = dfh.tell()
3163 current_offset = dfh.tell()
3164 for rev in range(startrev, endrev + 1):
3164 for rev in range(startrev, endrev + 1):
3165 entry = self.index[rev]
3165 entry = self.index[rev]
3166 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3166 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3167 store=self,
3167 store=self,
3168 sidedata_helpers=helpers,
3168 sidedata_helpers=helpers,
3169 sidedata={},
3169 sidedata={},
3170 rev=rev,
3170 rev=rev,
3171 )
3171 )
3172
3172
3173 serialized_sidedata = sidedatautil.serialize_sidedata(
3173 serialized_sidedata = sidedatautil.serialize_sidedata(
3174 new_sidedata
3174 new_sidedata
3175 )
3175 )
3176 if entry[8] != 0 or entry[9] != 0:
3176 if entry[8] != 0 or entry[9] != 0:
3177 # rewriting entries that already have sidedata is not
3177 # rewriting entries that already have sidedata is not
3178 # supported yet, because it introduces garbage data in the
3178 # supported yet, because it introduces garbage data in the
3179 # revlog.
3179 # revlog.
3180 msg = b"Rewriting existing sidedata is not supported yet"
3180 msg = b"Rewriting existing sidedata is not supported yet"
3181 raise error.Abort(msg)
3181 raise error.Abort(msg)
3182
3182
3183 # Apply (potential) flags to add and to remove after running
3183 # Apply (potential) flags to add and to remove after running
3184 # the sidedata helpers
3184 # the sidedata helpers
3185 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3185 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3186 entry = (new_offset_flags,) + entry[1:8]
3186 entry = (new_offset_flags,) + entry[1:8]
3187 entry += (current_offset, len(serialized_sidedata))
3187 entry += (current_offset, len(serialized_sidedata))
3188
3188
3189 # the sidedata computation might have move the file cursors around
3189 # the sidedata computation might have move the file cursors around
3190 dfh.seek(current_offset, os.SEEK_SET)
3190 dfh.seek(current_offset, os.SEEK_SET)
3191 dfh.write(serialized_sidedata)
3191 dfh.write(serialized_sidedata)
3192 new_entries.append(entry)
3192 new_entries.append(entry)
3193 current_offset += len(serialized_sidedata)
3193 current_offset += len(serialized_sidedata)
3194
3194
3195 # rewrite the new index entries
3195 # rewrite the new index entries
3196 with self._indexfp(b'r+') as ifh:
3196 ifh.seek(startrev * self.index.entry_size)
3197 fp.seek(startrev * self.index.entry_size)
3198 for i, e in enumerate(new_entries):
3197 for i, e in enumerate(new_entries):
3199 rev = startrev + i
3198 rev = startrev + i
3200 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3199 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3201 packed = self.index.entry_binary(rev)
3200 packed = self.index.entry_binary(rev)
3202 if rev == 0:
3201 if rev == 0:
3203 header = self._format_flags | self._format_version
3202 header = self._format_flags | self._format_version
3204 header = self.index.pack_header(header)
3203 header = self.index.pack_header(header)
3205 packed = header + packed
3204 packed = header + packed
3206 ifh.write(packed)
3205 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now