##// END OF EJS Templates
revlog: simplify entry update logic in `rewrite_sidedata`...
marmoute -
r48019:de63be07 default
parent child Browse files
Show More
@@ -1,3264 +1,3267 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FEATURES_BY_VERSION,
38 FEATURES_BY_VERSION,
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 INDEX_HEADER,
41 INDEX_HEADER,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 SUPPORTED_FLAGS,
50 SUPPORTED_FLAGS,
51 )
51 )
52 from .revlogutils.flagutil import (
52 from .revlogutils.flagutil import (
53 REVIDX_DEFAULT_FLAGS,
53 REVIDX_DEFAULT_FLAGS,
54 REVIDX_ELLIPSIS,
54 REVIDX_ELLIPSIS,
55 REVIDX_EXTSTORED,
55 REVIDX_EXTSTORED,
56 REVIDX_FLAGS_ORDER,
56 REVIDX_FLAGS_ORDER,
57 REVIDX_HASCOPIESINFO,
57 REVIDX_HASCOPIESINFO,
58 REVIDX_ISCENSORED,
58 REVIDX_ISCENSORED,
59 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_RAWTEXT_CHANGING_FLAGS,
60 )
60 )
61 from .thirdparty import attr
61 from .thirdparty import attr
62 from . import (
62 from . import (
63 ancestor,
63 ancestor,
64 dagop,
64 dagop,
65 error,
65 error,
66 mdiff,
66 mdiff,
67 policy,
67 policy,
68 pycompat,
68 pycompat,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 docket as docketutil,
78 docket as docketutil,
79 flagutil,
79 flagutil,
80 nodemap as nodemaputil,
80 nodemap as nodemaputil,
81 revlogv0,
81 revlogv0,
82 sidedata as sidedatautil,
82 sidedata as sidedatautil,
83 )
83 )
84 from .utils import (
84 from .utils import (
85 storageutil,
85 storageutil,
86 stringutil,
86 stringutil,
87 )
87 )
88
88
89 # blanked usage of all the name to prevent pyflakes constraints
89 # blanked usage of all the name to prevent pyflakes constraints
90 # We need these name available in the module for extensions.
90 # We need these name available in the module for extensions.
91
91
92 REVLOGV0
92 REVLOGV0
93 REVLOGV1
93 REVLOGV1
94 REVLOGV2
94 REVLOGV2
95 FLAG_INLINE_DATA
95 FLAG_INLINE_DATA
96 FLAG_GENERALDELTA
96 FLAG_GENERALDELTA
97 REVLOG_DEFAULT_FLAGS
97 REVLOG_DEFAULT_FLAGS
98 REVLOG_DEFAULT_FORMAT
98 REVLOG_DEFAULT_FORMAT
99 REVLOG_DEFAULT_VERSION
99 REVLOG_DEFAULT_VERSION
100 REVLOGV1_FLAGS
100 REVLOGV1_FLAGS
101 REVLOGV2_FLAGS
101 REVLOGV2_FLAGS
102 REVIDX_ISCENSORED
102 REVIDX_ISCENSORED
103 REVIDX_ELLIPSIS
103 REVIDX_ELLIPSIS
104 REVIDX_HASCOPIESINFO
104 REVIDX_HASCOPIESINFO
105 REVIDX_EXTSTORED
105 REVIDX_EXTSTORED
106 REVIDX_DEFAULT_FLAGS
106 REVIDX_DEFAULT_FLAGS
107 REVIDX_FLAGS_ORDER
107 REVIDX_FLAGS_ORDER
108 REVIDX_RAWTEXT_CHANGING_FLAGS
108 REVIDX_RAWTEXT_CHANGING_FLAGS
109
109
110 parsers = policy.importmod('parsers')
110 parsers = policy.importmod('parsers')
111 rustancestor = policy.importrust('ancestor')
111 rustancestor = policy.importrust('ancestor')
112 rustdagop = policy.importrust('dagop')
112 rustdagop = policy.importrust('dagop')
113 rustrevlog = policy.importrust('revlog')
113 rustrevlog = policy.importrust('revlog')
114
114
115 # Aliased for performance.
115 # Aliased for performance.
116 _zlibdecompress = zlib.decompress
116 _zlibdecompress = zlib.decompress
117
117
118 # max size of revlog with inline data
118 # max size of revlog with inline data
119 _maxinline = 131072
119 _maxinline = 131072
120 _chunksize = 1048576
120 _chunksize = 1048576
121
121
122 # Flag processors for REVIDX_ELLIPSIS.
122 # Flag processors for REVIDX_ELLIPSIS.
123 def ellipsisreadprocessor(rl, text):
123 def ellipsisreadprocessor(rl, text):
124 return text, False
124 return text, False
125
125
126
126
127 def ellipsiswriteprocessor(rl, text):
127 def ellipsiswriteprocessor(rl, text):
128 return text, False
128 return text, False
129
129
130
130
131 def ellipsisrawprocessor(rl, text):
131 def ellipsisrawprocessor(rl, text):
132 return False
132 return False
133
133
134
134
135 ellipsisprocessor = (
135 ellipsisprocessor = (
136 ellipsisreadprocessor,
136 ellipsisreadprocessor,
137 ellipsiswriteprocessor,
137 ellipsiswriteprocessor,
138 ellipsisrawprocessor,
138 ellipsisrawprocessor,
139 )
139 )
140
140
141
141
142 def offset_type(offset, type):
142 def offset_type(offset, type):
143 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
143 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
144 raise ValueError(b'unknown revlog index flags')
144 raise ValueError(b'unknown revlog index flags')
145 return int(int(offset) << 16 | type)
145 return int(int(offset) << 16 | type)
146
146
147
147
148 def _verify_revision(rl, skipflags, state, node):
148 def _verify_revision(rl, skipflags, state, node):
149 """Verify the integrity of the given revlog ``node`` while providing a hook
149 """Verify the integrity of the given revlog ``node`` while providing a hook
150 point for extensions to influence the operation."""
150 point for extensions to influence the operation."""
151 if skipflags:
151 if skipflags:
152 state[b'skipread'].add(node)
152 state[b'skipread'].add(node)
153 else:
153 else:
154 # Side-effect: read content and verify hash.
154 # Side-effect: read content and verify hash.
155 rl.revision(node)
155 rl.revision(node)
156
156
157
157
158 # True if a fast implementation for persistent-nodemap is available
158 # True if a fast implementation for persistent-nodemap is available
159 #
159 #
160 # We also consider we have a "fast" implementation in "pure" python because
160 # We also consider we have a "fast" implementation in "pure" python because
161 # people using pure don't really have performance consideration (and a
161 # people using pure don't really have performance consideration (and a
162 # wheelbarrow of other slowness source)
162 # wheelbarrow of other slowness source)
163 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
163 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
164 parsers, 'BaseIndexObject'
164 parsers, 'BaseIndexObject'
165 )
165 )
166
166
167
167
168 @attr.s(slots=True, frozen=True)
168 @attr.s(slots=True, frozen=True)
169 class _revisioninfo(object):
169 class _revisioninfo(object):
170 """Information about a revision that allows building its fulltext
170 """Information about a revision that allows building its fulltext
171 node: expected hash of the revision
171 node: expected hash of the revision
172 p1, p2: parent revs of the revision
172 p1, p2: parent revs of the revision
173 btext: built text cache consisting of a one-element list
173 btext: built text cache consisting of a one-element list
174 cachedelta: (baserev, uncompressed_delta) or None
174 cachedelta: (baserev, uncompressed_delta) or None
175 flags: flags associated to the revision storage
175 flags: flags associated to the revision storage
176
176
177 One of btext[0] or cachedelta must be set.
177 One of btext[0] or cachedelta must be set.
178 """
178 """
179
179
180 node = attr.ib()
180 node = attr.ib()
181 p1 = attr.ib()
181 p1 = attr.ib()
182 p2 = attr.ib()
182 p2 = attr.ib()
183 btext = attr.ib()
183 btext = attr.ib()
184 textlen = attr.ib()
184 textlen = attr.ib()
185 cachedelta = attr.ib()
185 cachedelta = attr.ib()
186 flags = attr.ib()
186 flags = attr.ib()
187
187
188
188
189 @interfaceutil.implementer(repository.irevisiondelta)
189 @interfaceutil.implementer(repository.irevisiondelta)
190 @attr.s(slots=True)
190 @attr.s(slots=True)
191 class revlogrevisiondelta(object):
191 class revlogrevisiondelta(object):
192 node = attr.ib()
192 node = attr.ib()
193 p1node = attr.ib()
193 p1node = attr.ib()
194 p2node = attr.ib()
194 p2node = attr.ib()
195 basenode = attr.ib()
195 basenode = attr.ib()
196 flags = attr.ib()
196 flags = attr.ib()
197 baserevisionsize = attr.ib()
197 baserevisionsize = attr.ib()
198 revision = attr.ib()
198 revision = attr.ib()
199 delta = attr.ib()
199 delta = attr.ib()
200 sidedata = attr.ib()
200 sidedata = attr.ib()
201 protocol_flags = attr.ib()
201 protocol_flags = attr.ib()
202 linknode = attr.ib(default=None)
202 linknode = attr.ib(default=None)
203
203
204
204
205 @interfaceutil.implementer(repository.iverifyproblem)
205 @interfaceutil.implementer(repository.iverifyproblem)
206 @attr.s(frozen=True)
206 @attr.s(frozen=True)
207 class revlogproblem(object):
207 class revlogproblem(object):
208 warning = attr.ib(default=None)
208 warning = attr.ib(default=None)
209 error = attr.ib(default=None)
209 error = attr.ib(default=None)
210 node = attr.ib(default=None)
210 node = attr.ib(default=None)
211
211
212
212
213 def parse_index_v1(data, inline):
213 def parse_index_v1(data, inline):
214 # call the C implementation to parse the index data
214 # call the C implementation to parse the index data
215 index, cache = parsers.parse_index2(data, inline)
215 index, cache = parsers.parse_index2(data, inline)
216 return index, cache
216 return index, cache
217
217
218
218
219 def parse_index_v2(data, inline):
219 def parse_index_v2(data, inline):
220 # call the C implementation to parse the index data
220 # call the C implementation to parse the index data
221 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
221 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
222 return index, cache
222 return index, cache
223
223
224
224
225 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
225 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
226
226
227 def parse_index_v1_nodemap(data, inline):
227 def parse_index_v1_nodemap(data, inline):
228 index, cache = parsers.parse_index_devel_nodemap(data, inline)
228 index, cache = parsers.parse_index_devel_nodemap(data, inline)
229 return index, cache
229 return index, cache
230
230
231
231
232 else:
232 else:
233 parse_index_v1_nodemap = None
233 parse_index_v1_nodemap = None
234
234
235
235
236 def parse_index_v1_mixed(data, inline):
236 def parse_index_v1_mixed(data, inline):
237 index, cache = parse_index_v1(data, inline)
237 index, cache = parse_index_v1(data, inline)
238 return rustrevlog.MixedIndex(index), cache
238 return rustrevlog.MixedIndex(index), cache
239
239
240
240
241 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
241 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
242 # signed integer)
242 # signed integer)
243 _maxentrysize = 0x7FFFFFFF
243 _maxentrysize = 0x7FFFFFFF
244
244
245
245
246 class revlog(object):
246 class revlog(object):
247 """
247 """
248 the underlying revision storage object
248 the underlying revision storage object
249
249
250 A revlog consists of two parts, an index and the revision data.
250 A revlog consists of two parts, an index and the revision data.
251
251
252 The index is a file with a fixed record size containing
252 The index is a file with a fixed record size containing
253 information on each revision, including its nodeid (hash), the
253 information on each revision, including its nodeid (hash), the
254 nodeids of its parents, the position and offset of its data within
254 nodeids of its parents, the position and offset of its data within
255 the data file, and the revision it's based on. Finally, each entry
255 the data file, and the revision it's based on. Finally, each entry
256 contains a linkrev entry that can serve as a pointer to external
256 contains a linkrev entry that can serve as a pointer to external
257 data.
257 data.
258
258
259 The revision data itself is a linear collection of data chunks.
259 The revision data itself is a linear collection of data chunks.
260 Each chunk represents a revision and is usually represented as a
260 Each chunk represents a revision and is usually represented as a
261 delta against the previous chunk. To bound lookup time, runs of
261 delta against the previous chunk. To bound lookup time, runs of
262 deltas are limited to about 2 times the length of the original
262 deltas are limited to about 2 times the length of the original
263 version data. This makes retrieval of a version proportional to
263 version data. This makes retrieval of a version proportional to
264 its size, or O(1) relative to the number of revisions.
264 its size, or O(1) relative to the number of revisions.
265
265
266 Both pieces of the revlog are written to in an append-only
266 Both pieces of the revlog are written to in an append-only
267 fashion, which means we never need to rewrite a file to insert or
267 fashion, which means we never need to rewrite a file to insert or
268 remove data, and can use some simple techniques to avoid the need
268 remove data, and can use some simple techniques to avoid the need
269 for locking while reading.
269 for locking while reading.
270
270
271 If checkambig, indexfile is opened with checkambig=True at
271 If checkambig, indexfile is opened with checkambig=True at
272 writing, to avoid file stat ambiguity.
272 writing, to avoid file stat ambiguity.
273
273
274 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 If mmaplargeindex is True, and an mmapindexthreshold is set, the
275 index will be mmapped rather than read if it is larger than the
275 index will be mmapped rather than read if it is larger than the
276 configured threshold.
276 configured threshold.
277
277
278 If censorable is True, the revlog can have censored revisions.
278 If censorable is True, the revlog can have censored revisions.
279
279
280 If `upperboundcomp` is not None, this is the expected maximal gain from
280 If `upperboundcomp` is not None, this is the expected maximal gain from
281 compression for the data content.
281 compression for the data content.
282
282
283 `concurrencychecker` is an optional function that receives 3 arguments: a
283 `concurrencychecker` is an optional function that receives 3 arguments: a
284 file handle, a filename, and an expected position. It should check whether
284 file handle, a filename, and an expected position. It should check whether
285 the current position in the file handle is valid, and log/warn/fail (by
285 the current position in the file handle is valid, and log/warn/fail (by
286 raising).
286 raising).
287 """
287 """
288
288
289 _flagserrorclass = error.RevlogError
289 _flagserrorclass = error.RevlogError
290
290
291 def __init__(
291 def __init__(
292 self,
292 self,
293 opener,
293 opener,
294 target,
294 target,
295 radix,
295 radix,
296 postfix=None, # only exist for `tmpcensored` now
296 postfix=None, # only exist for `tmpcensored` now
297 checkambig=False,
297 checkambig=False,
298 mmaplargeindex=False,
298 mmaplargeindex=False,
299 censorable=False,
299 censorable=False,
300 upperboundcomp=None,
300 upperboundcomp=None,
301 persistentnodemap=False,
301 persistentnodemap=False,
302 concurrencychecker=None,
302 concurrencychecker=None,
303 trypending=False,
303 trypending=False,
304 ):
304 ):
305 """
305 """
306 create a revlog object
306 create a revlog object
307
307
308 opener is a function that abstracts the file opening operation
308 opener is a function that abstracts the file opening operation
309 and can be used to implement COW semantics or the like.
309 and can be used to implement COW semantics or the like.
310
310
311 `target`: a (KIND, ID) tuple that identify the content stored in
311 `target`: a (KIND, ID) tuple that identify the content stored in
312 this revlog. It help the rest of the code to understand what the revlog
312 this revlog. It help the rest of the code to understand what the revlog
313 is about without having to resort to heuristic and index filename
313 is about without having to resort to heuristic and index filename
314 analysis. Note: that this must be reliably be set by normal code, but
314 analysis. Note: that this must be reliably be set by normal code, but
315 that test, debug, or performance measurement code might not set this to
315 that test, debug, or performance measurement code might not set this to
316 accurate value.
316 accurate value.
317 """
317 """
318 self.upperboundcomp = upperboundcomp
318 self.upperboundcomp = upperboundcomp
319
319
320 self.radix = radix
320 self.radix = radix
321
321
322 self._docket_file = None
322 self._docket_file = None
323 self._indexfile = None
323 self._indexfile = None
324 self._datafile = None
324 self._datafile = None
325 self._nodemap_file = None
325 self._nodemap_file = None
326 self.postfix = postfix
326 self.postfix = postfix
327 self._trypending = trypending
327 self._trypending = trypending
328 self.opener = opener
328 self.opener = opener
329 if persistentnodemap:
329 if persistentnodemap:
330 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330 self._nodemap_file = nodemaputil.get_nodemap_file(self)
331
331
332 assert target[0] in ALL_KINDS
332 assert target[0] in ALL_KINDS
333 assert len(target) == 2
333 assert len(target) == 2
334 self.target = target
334 self.target = target
335 # When True, indexfile is opened with checkambig=True at writing, to
335 # When True, indexfile is opened with checkambig=True at writing, to
336 # avoid file stat ambiguity.
336 # avoid file stat ambiguity.
337 self._checkambig = checkambig
337 self._checkambig = checkambig
338 self._mmaplargeindex = mmaplargeindex
338 self._mmaplargeindex = mmaplargeindex
339 self._censorable = censorable
339 self._censorable = censorable
340 # 3-tuple of (node, rev, text) for a raw revision.
340 # 3-tuple of (node, rev, text) for a raw revision.
341 self._revisioncache = None
341 self._revisioncache = None
342 # Maps rev to chain base rev.
342 # Maps rev to chain base rev.
343 self._chainbasecache = util.lrucachedict(100)
343 self._chainbasecache = util.lrucachedict(100)
344 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
345 self._chunkcache = (0, b'')
345 self._chunkcache = (0, b'')
346 # How much data to read and cache into the raw revlog data cache.
346 # How much data to read and cache into the raw revlog data cache.
347 self._chunkcachesize = 65536
347 self._chunkcachesize = 65536
348 self._maxchainlen = None
348 self._maxchainlen = None
349 self._deltabothparents = True
349 self._deltabothparents = True
350 self.index = None
350 self.index = None
351 self._docket = None
351 self._docket = None
352 self._nodemap_docket = None
352 self._nodemap_docket = None
353 # Mapping of partial identifiers to full nodes.
353 # Mapping of partial identifiers to full nodes.
354 self._pcache = {}
354 self._pcache = {}
355 # Mapping of revision integer to full node.
355 # Mapping of revision integer to full node.
356 self._compengine = b'zlib'
356 self._compengine = b'zlib'
357 self._compengineopts = {}
357 self._compengineopts = {}
358 self._maxdeltachainspan = -1
358 self._maxdeltachainspan = -1
359 self._withsparseread = False
359 self._withsparseread = False
360 self._sparserevlog = False
360 self._sparserevlog = False
361 self.hassidedata = False
361 self.hassidedata = False
362 self._srdensitythreshold = 0.50
362 self._srdensitythreshold = 0.50
363 self._srmingapsize = 262144
363 self._srmingapsize = 262144
364
364
365 # Make copy of flag processors so each revlog instance can support
365 # Make copy of flag processors so each revlog instance can support
366 # custom flags.
366 # custom flags.
367 self._flagprocessors = dict(flagutil.flagprocessors)
367 self._flagprocessors = dict(flagutil.flagprocessors)
368
368
369 # 2-tuple of file handles being used for active writing.
369 # 2-tuple of file handles being used for active writing.
370 self._writinghandles = None
370 self._writinghandles = None
371 # prevent nesting of addgroup
371 # prevent nesting of addgroup
372 self._adding_group = None
372 self._adding_group = None
373
373
374 self._loadindex()
374 self._loadindex()
375
375
376 self._concurrencychecker = concurrencychecker
376 self._concurrencychecker = concurrencychecker
377
377
378 def _init_opts(self):
378 def _init_opts(self):
379 """process options (from above/config) to setup associated default revlog mode
379 """process options (from above/config) to setup associated default revlog mode
380
380
381 These values might be affected when actually reading on disk information.
381 These values might be affected when actually reading on disk information.
382
382
383 The relevant values are returned for use in _loadindex().
383 The relevant values are returned for use in _loadindex().
384
384
385 * newversionflags:
385 * newversionflags:
386 version header to use if we need to create a new revlog
386 version header to use if we need to create a new revlog
387
387
388 * mmapindexthreshold:
388 * mmapindexthreshold:
389 minimal index size for start to use mmap
389 minimal index size for start to use mmap
390
390
391 * force_nodemap:
391 * force_nodemap:
392 force the usage of a "development" version of the nodemap code
392 force the usage of a "development" version of the nodemap code
393 """
393 """
394 mmapindexthreshold = None
394 mmapindexthreshold = None
395 opts = self.opener.options
395 opts = self.opener.options
396
396
397 if b'revlogv2' in opts:
397 if b'revlogv2' in opts:
398 new_header = REVLOGV2 | FLAG_INLINE_DATA
398 new_header = REVLOGV2 | FLAG_INLINE_DATA
399 elif b'revlogv1' in opts:
399 elif b'revlogv1' in opts:
400 new_header = REVLOGV1 | FLAG_INLINE_DATA
400 new_header = REVLOGV1 | FLAG_INLINE_DATA
401 if b'generaldelta' in opts:
401 if b'generaldelta' in opts:
402 new_header |= FLAG_GENERALDELTA
402 new_header |= FLAG_GENERALDELTA
403 elif b'revlogv0' in self.opener.options:
403 elif b'revlogv0' in self.opener.options:
404 new_header = REVLOGV0
404 new_header = REVLOGV0
405 else:
405 else:
406 new_header = REVLOG_DEFAULT_VERSION
406 new_header = REVLOG_DEFAULT_VERSION
407
407
408 if b'chunkcachesize' in opts:
408 if b'chunkcachesize' in opts:
409 self._chunkcachesize = opts[b'chunkcachesize']
409 self._chunkcachesize = opts[b'chunkcachesize']
410 if b'maxchainlen' in opts:
410 if b'maxchainlen' in opts:
411 self._maxchainlen = opts[b'maxchainlen']
411 self._maxchainlen = opts[b'maxchainlen']
412 if b'deltabothparents' in opts:
412 if b'deltabothparents' in opts:
413 self._deltabothparents = opts[b'deltabothparents']
413 self._deltabothparents = opts[b'deltabothparents']
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydeltabase = False
415 self._lazydeltabase = False
416 if self._lazydelta:
416 if self._lazydelta:
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 if b'compengine' in opts:
418 if b'compengine' in opts:
419 self._compengine = opts[b'compengine']
419 self._compengine = opts[b'compengine']
420 if b'zlib.level' in opts:
420 if b'zlib.level' in opts:
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 if b'zstd.level' in opts:
422 if b'zstd.level' in opts:
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 if b'maxdeltachainspan' in opts:
424 if b'maxdeltachainspan' in opts:
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 mmapindexthreshold = opts[b'mmapindexthreshold']
427 mmapindexthreshold = opts[b'mmapindexthreshold']
428 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
428 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 withsparseread = bool(opts.get(b'with-sparse-read', False))
429 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 # sparse-revlog forces sparse-read
430 # sparse-revlog forces sparse-read
431 self._withsparseread = self._sparserevlog or withsparseread
431 self._withsparseread = self._sparserevlog or withsparseread
432 if b'sparse-read-density-threshold' in opts:
432 if b'sparse-read-density-threshold' in opts:
433 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
433 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 if b'sparse-read-min-gap-size' in opts:
434 if b'sparse-read-min-gap-size' in opts:
435 self._srmingapsize = opts[b'sparse-read-min-gap-size']
435 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 if opts.get(b'enableellipsis'):
436 if opts.get(b'enableellipsis'):
437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438
438
439 # revlog v0 doesn't have flag processors
439 # revlog v0 doesn't have flag processors
440 for flag, processor in pycompat.iteritems(
440 for flag, processor in pycompat.iteritems(
441 opts.get(b'flagprocessors', {})
441 opts.get(b'flagprocessors', {})
442 ):
442 ):
443 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
443 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444
444
445 if self._chunkcachesize <= 0:
445 if self._chunkcachesize <= 0:
446 raise error.RevlogError(
446 raise error.RevlogError(
447 _(b'revlog chunk cache size %r is not greater than 0')
447 _(b'revlog chunk cache size %r is not greater than 0')
448 % self._chunkcachesize
448 % self._chunkcachesize
449 )
449 )
450 elif self._chunkcachesize & (self._chunkcachesize - 1):
450 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 raise error.RevlogError(
451 raise error.RevlogError(
452 _(b'revlog chunk cache size %r is not a power of 2')
452 _(b'revlog chunk cache size %r is not a power of 2')
453 % self._chunkcachesize
453 % self._chunkcachesize
454 )
454 )
455 force_nodemap = opts.get(b'devel-force-nodemap', False)
455 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 return new_header, mmapindexthreshold, force_nodemap
456 return new_header, mmapindexthreshold, force_nodemap
457
457
458 def _get_data(self, filepath, mmap_threshold, size=None):
458 def _get_data(self, filepath, mmap_threshold, size=None):
459 """return a file content with or without mmap
459 """return a file content with or without mmap
460
460
461 If the file is missing return the empty string"""
461 If the file is missing return the empty string"""
462 try:
462 try:
463 with self.opener(filepath) as fp:
463 with self.opener(filepath) as fp:
464 if mmap_threshold is not None:
464 if mmap_threshold is not None:
465 file_size = self.opener.fstat(fp).st_size
465 file_size = self.opener.fstat(fp).st_size
466 if file_size >= mmap_threshold:
466 if file_size >= mmap_threshold:
467 if size is not None:
467 if size is not None:
468 # avoid potentiel mmap crash
468 # avoid potentiel mmap crash
469 size = min(file_size, size)
469 size = min(file_size, size)
470 # TODO: should .close() to release resources without
470 # TODO: should .close() to release resources without
471 # relying on Python GC
471 # relying on Python GC
472 if size is None:
472 if size is None:
473 return util.buffer(util.mmapread(fp))
473 return util.buffer(util.mmapread(fp))
474 else:
474 else:
475 return util.buffer(util.mmapread(fp, size))
475 return util.buffer(util.mmapread(fp, size))
476 if size is None:
476 if size is None:
477 return fp.read()
477 return fp.read()
478 else:
478 else:
479 return fp.read(size)
479 return fp.read(size)
480 except IOError as inst:
480 except IOError as inst:
481 if inst.errno != errno.ENOENT:
481 if inst.errno != errno.ENOENT:
482 raise
482 raise
483 return b''
483 return b''
484
484
485 def _loadindex(self):
485 def _loadindex(self):
486
486
487 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
487 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
488
488
489 if self.postfix is not None:
489 if self.postfix is not None:
490 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
490 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
491 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 entry_point = b'%s.i.a' % self.radix
492 entry_point = b'%s.i.a' % self.radix
493 else:
493 else:
494 entry_point = b'%s.i' % self.radix
494 entry_point = b'%s.i' % self.radix
495
495
496 entry_data = b''
496 entry_data = b''
497 self._initempty = True
497 self._initempty = True
498 entry_data = self._get_data(entry_point, mmapindexthreshold)
498 entry_data = self._get_data(entry_point, mmapindexthreshold)
499 if len(entry_data) > 0:
499 if len(entry_data) > 0:
500 header = INDEX_HEADER.unpack(entry_data[:4])[0]
500 header = INDEX_HEADER.unpack(entry_data[:4])[0]
501 self._initempty = False
501 self._initempty = False
502 else:
502 else:
503 header = new_header
503 header = new_header
504
504
505 self._format_flags = header & ~0xFFFF
505 self._format_flags = header & ~0xFFFF
506 self._format_version = header & 0xFFFF
506 self._format_version = header & 0xFFFF
507
507
508 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
508 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
509 if supported_flags is None:
509 if supported_flags is None:
510 msg = _(b'unknown version (%d) in revlog %s')
510 msg = _(b'unknown version (%d) in revlog %s')
511 msg %= (self._format_version, self.display_id)
511 msg %= (self._format_version, self.display_id)
512 raise error.RevlogError(msg)
512 raise error.RevlogError(msg)
513 elif self._format_flags & ~supported_flags:
513 elif self._format_flags & ~supported_flags:
514 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
514 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
515 display_flag = self._format_flags >> 16
515 display_flag = self._format_flags >> 16
516 msg %= (display_flag, self._format_version, self.display_id)
516 msg %= (display_flag, self._format_version, self.display_id)
517 raise error.RevlogError(msg)
517 raise error.RevlogError(msg)
518
518
519 features = FEATURES_BY_VERSION[self._format_version]
519 features = FEATURES_BY_VERSION[self._format_version]
520 self._inline = features[b'inline'](self._format_flags)
520 self._inline = features[b'inline'](self._format_flags)
521 self._generaldelta = features[b'generaldelta'](self._format_flags)
521 self._generaldelta = features[b'generaldelta'](self._format_flags)
522 self.hassidedata = features[b'sidedata']
522 self.hassidedata = features[b'sidedata']
523
523
524 if not features[b'docket']:
524 if not features[b'docket']:
525 self._indexfile = entry_point
525 self._indexfile = entry_point
526 index_data = entry_data
526 index_data = entry_data
527 else:
527 else:
528 self._docket_file = entry_point
528 self._docket_file = entry_point
529 if self._initempty:
529 if self._initempty:
530 self._docket = docketutil.default_docket(self, header)
530 self._docket = docketutil.default_docket(self, header)
531 else:
531 else:
532 self._docket = docketutil.parse_docket(
532 self._docket = docketutil.parse_docket(
533 self, entry_data, use_pending=self._trypending
533 self, entry_data, use_pending=self._trypending
534 )
534 )
535 self._indexfile = self._docket.index_filepath()
535 self._indexfile = self._docket.index_filepath()
536 index_data = b''
536 index_data = b''
537 index_size = self._docket.index_end
537 index_size = self._docket.index_end
538 if index_size > 0:
538 if index_size > 0:
539 index_data = self._get_data(
539 index_data = self._get_data(
540 self._indexfile, mmapindexthreshold, size=index_size
540 self._indexfile, mmapindexthreshold, size=index_size
541 )
541 )
542 if len(index_data) < index_size:
542 if len(index_data) < index_size:
543 msg = _(b'too few index data for %s: got %d, expected %d')
543 msg = _(b'too few index data for %s: got %d, expected %d')
544 msg %= (self.display_id, len(index_data), index_size)
544 msg %= (self.display_id, len(index_data), index_size)
545 raise error.RevlogError(msg)
545 raise error.RevlogError(msg)
546
546
547 self._inline = False
547 self._inline = False
548 # generaldelta implied by version 2 revlogs.
548 # generaldelta implied by version 2 revlogs.
549 self._generaldelta = True
549 self._generaldelta = True
550 # the logic for persistent nodemap will be dealt with within the
550 # the logic for persistent nodemap will be dealt with within the
551 # main docket, so disable it for now.
551 # main docket, so disable it for now.
552 self._nodemap_file = None
552 self._nodemap_file = None
553
553
554 if self.postfix is None:
554 if self.postfix is None:
555 self._datafile = b'%s.d' % self.radix
555 self._datafile = b'%s.d' % self.radix
556 else:
556 else:
557 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
557 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
558
558
559 self.nodeconstants = sha1nodeconstants
559 self.nodeconstants = sha1nodeconstants
560 self.nullid = self.nodeconstants.nullid
560 self.nullid = self.nodeconstants.nullid
561
561
562 # sparse-revlog can't be on without general-delta (issue6056)
562 # sparse-revlog can't be on without general-delta (issue6056)
563 if not self._generaldelta:
563 if not self._generaldelta:
564 self._sparserevlog = False
564 self._sparserevlog = False
565
565
566 self._storedeltachains = True
566 self._storedeltachains = True
567
567
568 devel_nodemap = (
568 devel_nodemap = (
569 self._nodemap_file
569 self._nodemap_file
570 and force_nodemap
570 and force_nodemap
571 and parse_index_v1_nodemap is not None
571 and parse_index_v1_nodemap is not None
572 )
572 )
573
573
574 use_rust_index = False
574 use_rust_index = False
575 if rustrevlog is not None:
575 if rustrevlog is not None:
576 if self._nodemap_file is not None:
576 if self._nodemap_file is not None:
577 use_rust_index = True
577 use_rust_index = True
578 else:
578 else:
579 use_rust_index = self.opener.options.get(b'rust.index')
579 use_rust_index = self.opener.options.get(b'rust.index')
580
580
581 self._parse_index = parse_index_v1
581 self._parse_index = parse_index_v1
582 if self._format_version == REVLOGV0:
582 if self._format_version == REVLOGV0:
583 self._parse_index = revlogv0.parse_index_v0
583 self._parse_index = revlogv0.parse_index_v0
584 elif self._format_version == REVLOGV2:
584 elif self._format_version == REVLOGV2:
585 self._parse_index = parse_index_v2
585 self._parse_index = parse_index_v2
586 elif devel_nodemap:
586 elif devel_nodemap:
587 self._parse_index = parse_index_v1_nodemap
587 self._parse_index = parse_index_v1_nodemap
588 elif use_rust_index:
588 elif use_rust_index:
589 self._parse_index = parse_index_v1_mixed
589 self._parse_index = parse_index_v1_mixed
590 try:
590 try:
591 d = self._parse_index(index_data, self._inline)
591 d = self._parse_index(index_data, self._inline)
592 index, _chunkcache = d
592 index, _chunkcache = d
593 use_nodemap = (
593 use_nodemap = (
594 not self._inline
594 not self._inline
595 and self._nodemap_file is not None
595 and self._nodemap_file is not None
596 and util.safehasattr(index, 'update_nodemap_data')
596 and util.safehasattr(index, 'update_nodemap_data')
597 )
597 )
598 if use_nodemap:
598 if use_nodemap:
599 nodemap_data = nodemaputil.persisted_data(self)
599 nodemap_data = nodemaputil.persisted_data(self)
600 if nodemap_data is not None:
600 if nodemap_data is not None:
601 docket = nodemap_data[0]
601 docket = nodemap_data[0]
602 if (
602 if (
603 len(d[0]) > docket.tip_rev
603 len(d[0]) > docket.tip_rev
604 and d[0][docket.tip_rev][7] == docket.tip_node
604 and d[0][docket.tip_rev][7] == docket.tip_node
605 ):
605 ):
606 # no changelog tampering
606 # no changelog tampering
607 self._nodemap_docket = docket
607 self._nodemap_docket = docket
608 index.update_nodemap_data(*nodemap_data)
608 index.update_nodemap_data(*nodemap_data)
609 except (ValueError, IndexError):
609 except (ValueError, IndexError):
610 raise error.RevlogError(
610 raise error.RevlogError(
611 _(b"index %s is corrupted") % self.display_id
611 _(b"index %s is corrupted") % self.display_id
612 )
612 )
613 self.index, self._chunkcache = d
613 self.index, self._chunkcache = d
614 if not self._chunkcache:
614 if not self._chunkcache:
615 self._chunkclear()
615 self._chunkclear()
616 # revnum -> (chain-length, sum-delta-length)
616 # revnum -> (chain-length, sum-delta-length)
617 self._chaininfocache = util.lrucachedict(500)
617 self._chaininfocache = util.lrucachedict(500)
618 # revlog header -> revlog compressor
618 # revlog header -> revlog compressor
619 self._decompressors = {}
619 self._decompressors = {}
620
620
621 @util.propertycache
621 @util.propertycache
622 def revlog_kind(self):
622 def revlog_kind(self):
623 return self.target[0]
623 return self.target[0]
624
624
625 @util.propertycache
625 @util.propertycache
626 def display_id(self):
626 def display_id(self):
627 """The public facing "ID" of the revlog that we use in message"""
627 """The public facing "ID" of the revlog that we use in message"""
628 # Maybe we should build a user facing representation of
628 # Maybe we should build a user facing representation of
629 # revlog.target instead of using `self.radix`
629 # revlog.target instead of using `self.radix`
630 return self.radix
630 return self.radix
631
631
632 @util.propertycache
632 @util.propertycache
633 def _compressor(self):
633 def _compressor(self):
634 engine = util.compengines[self._compengine]
634 engine = util.compengines[self._compengine]
635 return engine.revlogcompressor(self._compengineopts)
635 return engine.revlogcompressor(self._compengineopts)
636
636
637 def _indexfp(self):
637 def _indexfp(self):
638 """file object for the revlog's index file"""
638 """file object for the revlog's index file"""
639 return self.opener(self._indexfile, mode=b"r")
639 return self.opener(self._indexfile, mode=b"r")
640
640
641 def __index_write_fp(self):
641 def __index_write_fp(self):
642 # You should not use this directly and use `_writing` instead
642 # You should not use this directly and use `_writing` instead
643 try:
643 try:
644 f = self.opener(
644 f = self.opener(
645 self._indexfile, mode=b"r+", checkambig=self._checkambig
645 self._indexfile, mode=b"r+", checkambig=self._checkambig
646 )
646 )
647 if self._docket is None:
647 if self._docket is None:
648 f.seek(0, os.SEEK_END)
648 f.seek(0, os.SEEK_END)
649 else:
649 else:
650 f.seek(self._docket.index_end, os.SEEK_SET)
650 f.seek(self._docket.index_end, os.SEEK_SET)
651 return f
651 return f
652 except IOError as inst:
652 except IOError as inst:
653 if inst.errno != errno.ENOENT:
653 if inst.errno != errno.ENOENT:
654 raise
654 raise
655 return self.opener(
655 return self.opener(
656 self._indexfile, mode=b"w+", checkambig=self._checkambig
656 self._indexfile, mode=b"w+", checkambig=self._checkambig
657 )
657 )
658
658
659 def __index_new_fp(self):
659 def __index_new_fp(self):
660 # You should not use this unless you are upgrading from inline revlog
660 # You should not use this unless you are upgrading from inline revlog
661 return self.opener(
661 return self.opener(
662 self._indexfile,
662 self._indexfile,
663 mode=b"w",
663 mode=b"w",
664 checkambig=self._checkambig,
664 checkambig=self._checkambig,
665 atomictemp=True,
665 atomictemp=True,
666 )
666 )
667
667
668 def _datafp(self, mode=b'r'):
668 def _datafp(self, mode=b'r'):
669 """file object for the revlog's data file"""
669 """file object for the revlog's data file"""
670 return self.opener(self._datafile, mode=mode)
670 return self.opener(self._datafile, mode=mode)
671
671
672 @contextlib.contextmanager
672 @contextlib.contextmanager
673 def _datareadfp(self, existingfp=None):
673 def _datareadfp(self, existingfp=None):
674 """file object suitable to read data"""
674 """file object suitable to read data"""
675 # Use explicit file handle, if given.
675 # Use explicit file handle, if given.
676 if existingfp is not None:
676 if existingfp is not None:
677 yield existingfp
677 yield existingfp
678
678
679 # Use a file handle being actively used for writes, if available.
679 # Use a file handle being actively used for writes, if available.
680 # There is some danger to doing this because reads will seek the
680 # There is some danger to doing this because reads will seek the
681 # file. However, _writeentry() performs a SEEK_END before all writes,
681 # file. However, _writeentry() performs a SEEK_END before all writes,
682 # so we should be safe.
682 # so we should be safe.
683 elif self._writinghandles:
683 elif self._writinghandles:
684 if self._inline:
684 if self._inline:
685 yield self._writinghandles[0]
685 yield self._writinghandles[0]
686 else:
686 else:
687 yield self._writinghandles[1]
687 yield self._writinghandles[1]
688
688
689 # Otherwise open a new file handle.
689 # Otherwise open a new file handle.
690 else:
690 else:
691 if self._inline:
691 if self._inline:
692 func = self._indexfp
692 func = self._indexfp
693 else:
693 else:
694 func = self._datafp
694 func = self._datafp
695 with func() as fp:
695 with func() as fp:
696 yield fp
696 yield fp
697
697
698 def tiprev(self):
698 def tiprev(self):
699 return len(self.index) - 1
699 return len(self.index) - 1
700
700
701 def tip(self):
701 def tip(self):
702 return self.node(self.tiprev())
702 return self.node(self.tiprev())
703
703
704 def __contains__(self, rev):
704 def __contains__(self, rev):
705 return 0 <= rev < len(self)
705 return 0 <= rev < len(self)
706
706
707 def __len__(self):
707 def __len__(self):
708 return len(self.index)
708 return len(self.index)
709
709
710 def __iter__(self):
710 def __iter__(self):
711 return iter(pycompat.xrange(len(self)))
711 return iter(pycompat.xrange(len(self)))
712
712
713 def revs(self, start=0, stop=None):
713 def revs(self, start=0, stop=None):
714 """iterate over all rev in this revlog (from start to stop)"""
714 """iterate over all rev in this revlog (from start to stop)"""
715 return storageutil.iterrevs(len(self), start=start, stop=stop)
715 return storageutil.iterrevs(len(self), start=start, stop=stop)
716
716
717 @property
717 @property
718 def nodemap(self):
718 def nodemap(self):
719 msg = (
719 msg = (
720 b"revlog.nodemap is deprecated, "
720 b"revlog.nodemap is deprecated, "
721 b"use revlog.index.[has_node|rev|get_rev]"
721 b"use revlog.index.[has_node|rev|get_rev]"
722 )
722 )
723 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
723 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
724 return self.index.nodemap
724 return self.index.nodemap
725
725
726 @property
726 @property
727 def _nodecache(self):
727 def _nodecache(self):
728 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
728 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
729 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
729 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
730 return self.index.nodemap
730 return self.index.nodemap
731
731
732 def hasnode(self, node):
732 def hasnode(self, node):
733 try:
733 try:
734 self.rev(node)
734 self.rev(node)
735 return True
735 return True
736 except KeyError:
736 except KeyError:
737 return False
737 return False
738
738
739 def candelta(self, baserev, rev):
739 def candelta(self, baserev, rev):
740 """whether two revisions (baserev, rev) can be delta-ed or not"""
740 """whether two revisions (baserev, rev) can be delta-ed or not"""
741 # Disable delta if either rev requires a content-changing flag
741 # Disable delta if either rev requires a content-changing flag
742 # processor (ex. LFS). This is because such flag processor can alter
742 # processor (ex. LFS). This is because such flag processor can alter
743 # the rawtext content that the delta will be based on, and two clients
743 # the rawtext content that the delta will be based on, and two clients
744 # could have a same revlog node with different flags (i.e. different
744 # could have a same revlog node with different flags (i.e. different
745 # rawtext contents) and the delta could be incompatible.
745 # rawtext contents) and the delta could be incompatible.
746 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
746 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
747 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
747 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
748 ):
748 ):
749 return False
749 return False
750 return True
750 return True
751
751
752 def update_caches(self, transaction):
752 def update_caches(self, transaction):
753 if self._nodemap_file is not None:
753 if self._nodemap_file is not None:
754 if transaction is None:
754 if transaction is None:
755 nodemaputil.update_persistent_nodemap(self)
755 nodemaputil.update_persistent_nodemap(self)
756 else:
756 else:
757 nodemaputil.setup_persistent_nodemap(transaction, self)
757 nodemaputil.setup_persistent_nodemap(transaction, self)
758
758
759 def clearcaches(self):
759 def clearcaches(self):
760 self._revisioncache = None
760 self._revisioncache = None
761 self._chainbasecache.clear()
761 self._chainbasecache.clear()
762 self._chunkcache = (0, b'')
762 self._chunkcache = (0, b'')
763 self._pcache = {}
763 self._pcache = {}
764 self._nodemap_docket = None
764 self._nodemap_docket = None
765 self.index.clearcaches()
765 self.index.clearcaches()
766 # The python code is the one responsible for validating the docket, we
766 # The python code is the one responsible for validating the docket, we
767 # end up having to refresh it here.
767 # end up having to refresh it here.
768 use_nodemap = (
768 use_nodemap = (
769 not self._inline
769 not self._inline
770 and self._nodemap_file is not None
770 and self._nodemap_file is not None
771 and util.safehasattr(self.index, 'update_nodemap_data')
771 and util.safehasattr(self.index, 'update_nodemap_data')
772 )
772 )
773 if use_nodemap:
773 if use_nodemap:
774 nodemap_data = nodemaputil.persisted_data(self)
774 nodemap_data = nodemaputil.persisted_data(self)
775 if nodemap_data is not None:
775 if nodemap_data is not None:
776 self._nodemap_docket = nodemap_data[0]
776 self._nodemap_docket = nodemap_data[0]
777 self.index.update_nodemap_data(*nodemap_data)
777 self.index.update_nodemap_data(*nodemap_data)
778
778
779 def rev(self, node):
779 def rev(self, node):
780 try:
780 try:
781 return self.index.rev(node)
781 return self.index.rev(node)
782 except TypeError:
782 except TypeError:
783 raise
783 raise
784 except error.RevlogError:
784 except error.RevlogError:
785 # parsers.c radix tree lookup failed
785 # parsers.c radix tree lookup failed
786 if (
786 if (
787 node == self.nodeconstants.wdirid
787 node == self.nodeconstants.wdirid
788 or node in self.nodeconstants.wdirfilenodeids
788 or node in self.nodeconstants.wdirfilenodeids
789 ):
789 ):
790 raise error.WdirUnsupported
790 raise error.WdirUnsupported
791 raise error.LookupError(node, self.display_id, _(b'no node'))
791 raise error.LookupError(node, self.display_id, _(b'no node'))
792
792
793 # Accessors for index entries.
793 # Accessors for index entries.
794
794
795 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
795 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
796 # are flags.
796 # are flags.
797 def start(self, rev):
797 def start(self, rev):
798 return int(self.index[rev][0] >> 16)
798 return int(self.index[rev][0] >> 16)
799
799
800 def flags(self, rev):
800 def flags(self, rev):
801 return self.index[rev][0] & 0xFFFF
801 return self.index[rev][0] & 0xFFFF
802
802
803 def length(self, rev):
803 def length(self, rev):
804 return self.index[rev][1]
804 return self.index[rev][1]
805
805
806 def sidedata_length(self, rev):
806 def sidedata_length(self, rev):
807 if not self.hassidedata:
807 if not self.hassidedata:
808 return 0
808 return 0
809 return self.index[rev][9]
809 return self.index[rev][9]
810
810
811 def rawsize(self, rev):
811 def rawsize(self, rev):
812 """return the length of the uncompressed text for a given revision"""
812 """return the length of the uncompressed text for a given revision"""
813 l = self.index[rev][2]
813 l = self.index[rev][2]
814 if l >= 0:
814 if l >= 0:
815 return l
815 return l
816
816
817 t = self.rawdata(rev)
817 t = self.rawdata(rev)
818 return len(t)
818 return len(t)
819
819
820 def size(self, rev):
820 def size(self, rev):
821 """length of non-raw text (processed by a "read" flag processor)"""
821 """length of non-raw text (processed by a "read" flag processor)"""
822 # fast path: if no "read" flag processor could change the content,
822 # fast path: if no "read" flag processor could change the content,
823 # size is rawsize. note: ELLIPSIS is known to not change the content.
823 # size is rawsize. note: ELLIPSIS is known to not change the content.
824 flags = self.flags(rev)
824 flags = self.flags(rev)
825 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
825 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
826 return self.rawsize(rev)
826 return self.rawsize(rev)
827
827
828 return len(self.revision(rev, raw=False))
828 return len(self.revision(rev, raw=False))
829
829
830 def chainbase(self, rev):
830 def chainbase(self, rev):
831 base = self._chainbasecache.get(rev)
831 base = self._chainbasecache.get(rev)
832 if base is not None:
832 if base is not None:
833 return base
833 return base
834
834
835 index = self.index
835 index = self.index
836 iterrev = rev
836 iterrev = rev
837 base = index[iterrev][3]
837 base = index[iterrev][3]
838 while base != iterrev:
838 while base != iterrev:
839 iterrev = base
839 iterrev = base
840 base = index[iterrev][3]
840 base = index[iterrev][3]
841
841
842 self._chainbasecache[rev] = base
842 self._chainbasecache[rev] = base
843 return base
843 return base
844
844
845 def linkrev(self, rev):
845 def linkrev(self, rev):
846 return self.index[rev][4]
846 return self.index[rev][4]
847
847
848 def parentrevs(self, rev):
848 def parentrevs(self, rev):
849 try:
849 try:
850 entry = self.index[rev]
850 entry = self.index[rev]
851 except IndexError:
851 except IndexError:
852 if rev == wdirrev:
852 if rev == wdirrev:
853 raise error.WdirUnsupported
853 raise error.WdirUnsupported
854 raise
854 raise
855 if entry[5] == nullrev:
855 if entry[5] == nullrev:
856 return entry[6], entry[5]
856 return entry[6], entry[5]
857 else:
857 else:
858 return entry[5], entry[6]
858 return entry[5], entry[6]
859
859
860 # fast parentrevs(rev) where rev isn't filtered
860 # fast parentrevs(rev) where rev isn't filtered
861 _uncheckedparentrevs = parentrevs
861 _uncheckedparentrevs = parentrevs
862
862
863 def node(self, rev):
863 def node(self, rev):
864 try:
864 try:
865 return self.index[rev][7]
865 return self.index[rev][7]
866 except IndexError:
866 except IndexError:
867 if rev == wdirrev:
867 if rev == wdirrev:
868 raise error.WdirUnsupported
868 raise error.WdirUnsupported
869 raise
869 raise
870
870
871 # Derived from index values.
871 # Derived from index values.
872
872
873 def end(self, rev):
873 def end(self, rev):
874 return self.start(rev) + self.length(rev)
874 return self.start(rev) + self.length(rev)
875
875
876 def parents(self, node):
876 def parents(self, node):
877 i = self.index
877 i = self.index
878 d = i[self.rev(node)]
878 d = i[self.rev(node)]
879 # inline node() to avoid function call overhead
879 # inline node() to avoid function call overhead
880 if d[5] == self.nullid:
880 if d[5] == self.nullid:
881 return i[d[6]][7], i[d[5]][7]
881 return i[d[6]][7], i[d[5]][7]
882 else:
882 else:
883 return i[d[5]][7], i[d[6]][7]
883 return i[d[5]][7], i[d[6]][7]
884
884
885 def chainlen(self, rev):
885 def chainlen(self, rev):
886 return self._chaininfo(rev)[0]
886 return self._chaininfo(rev)[0]
887
887
888 def _chaininfo(self, rev):
888 def _chaininfo(self, rev):
889 chaininfocache = self._chaininfocache
889 chaininfocache = self._chaininfocache
890 if rev in chaininfocache:
890 if rev in chaininfocache:
891 return chaininfocache[rev]
891 return chaininfocache[rev]
892 index = self.index
892 index = self.index
893 generaldelta = self._generaldelta
893 generaldelta = self._generaldelta
894 iterrev = rev
894 iterrev = rev
895 e = index[iterrev]
895 e = index[iterrev]
896 clen = 0
896 clen = 0
897 compresseddeltalen = 0
897 compresseddeltalen = 0
898 while iterrev != e[3]:
898 while iterrev != e[3]:
899 clen += 1
899 clen += 1
900 compresseddeltalen += e[1]
900 compresseddeltalen += e[1]
901 if generaldelta:
901 if generaldelta:
902 iterrev = e[3]
902 iterrev = e[3]
903 else:
903 else:
904 iterrev -= 1
904 iterrev -= 1
905 if iterrev in chaininfocache:
905 if iterrev in chaininfocache:
906 t = chaininfocache[iterrev]
906 t = chaininfocache[iterrev]
907 clen += t[0]
907 clen += t[0]
908 compresseddeltalen += t[1]
908 compresseddeltalen += t[1]
909 break
909 break
910 e = index[iterrev]
910 e = index[iterrev]
911 else:
911 else:
912 # Add text length of base since decompressing that also takes
912 # Add text length of base since decompressing that also takes
913 # work. For cache hits the length is already included.
913 # work. For cache hits the length is already included.
914 compresseddeltalen += e[1]
914 compresseddeltalen += e[1]
915 r = (clen, compresseddeltalen)
915 r = (clen, compresseddeltalen)
916 chaininfocache[rev] = r
916 chaininfocache[rev] = r
917 return r
917 return r
918
918
919 def _deltachain(self, rev, stoprev=None):
919 def _deltachain(self, rev, stoprev=None):
920 """Obtain the delta chain for a revision.
920 """Obtain the delta chain for a revision.
921
921
922 ``stoprev`` specifies a revision to stop at. If not specified, we
922 ``stoprev`` specifies a revision to stop at. If not specified, we
923 stop at the base of the chain.
923 stop at the base of the chain.
924
924
925 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
925 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
926 revs in ascending order and ``stopped`` is a bool indicating whether
926 revs in ascending order and ``stopped`` is a bool indicating whether
927 ``stoprev`` was hit.
927 ``stoprev`` was hit.
928 """
928 """
929 # Try C implementation.
929 # Try C implementation.
930 try:
930 try:
931 return self.index.deltachain(rev, stoprev, self._generaldelta)
931 return self.index.deltachain(rev, stoprev, self._generaldelta)
932 except AttributeError:
932 except AttributeError:
933 pass
933 pass
934
934
935 chain = []
935 chain = []
936
936
937 # Alias to prevent attribute lookup in tight loop.
937 # Alias to prevent attribute lookup in tight loop.
938 index = self.index
938 index = self.index
939 generaldelta = self._generaldelta
939 generaldelta = self._generaldelta
940
940
941 iterrev = rev
941 iterrev = rev
942 e = index[iterrev]
942 e = index[iterrev]
943 while iterrev != e[3] and iterrev != stoprev:
943 while iterrev != e[3] and iterrev != stoprev:
944 chain.append(iterrev)
944 chain.append(iterrev)
945 if generaldelta:
945 if generaldelta:
946 iterrev = e[3]
946 iterrev = e[3]
947 else:
947 else:
948 iterrev -= 1
948 iterrev -= 1
949 e = index[iterrev]
949 e = index[iterrev]
950
950
951 if iterrev == stoprev:
951 if iterrev == stoprev:
952 stopped = True
952 stopped = True
953 else:
953 else:
954 chain.append(iterrev)
954 chain.append(iterrev)
955 stopped = False
955 stopped = False
956
956
957 chain.reverse()
957 chain.reverse()
958 return chain, stopped
958 return chain, stopped
959
959
960 def ancestors(self, revs, stoprev=0, inclusive=False):
960 def ancestors(self, revs, stoprev=0, inclusive=False):
961 """Generate the ancestors of 'revs' in reverse revision order.
961 """Generate the ancestors of 'revs' in reverse revision order.
962 Does not generate revs lower than stoprev.
962 Does not generate revs lower than stoprev.
963
963
964 See the documentation for ancestor.lazyancestors for more details."""
964 See the documentation for ancestor.lazyancestors for more details."""
965
965
966 # first, make sure start revisions aren't filtered
966 # first, make sure start revisions aren't filtered
967 revs = list(revs)
967 revs = list(revs)
968 checkrev = self.node
968 checkrev = self.node
969 for r in revs:
969 for r in revs:
970 checkrev(r)
970 checkrev(r)
971 # and we're sure ancestors aren't filtered as well
971 # and we're sure ancestors aren't filtered as well
972
972
973 if rustancestor is not None:
973 if rustancestor is not None:
974 lazyancestors = rustancestor.LazyAncestors
974 lazyancestors = rustancestor.LazyAncestors
975 arg = self.index
975 arg = self.index
976 else:
976 else:
977 lazyancestors = ancestor.lazyancestors
977 lazyancestors = ancestor.lazyancestors
978 arg = self._uncheckedparentrevs
978 arg = self._uncheckedparentrevs
979 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
979 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
980
980
981 def descendants(self, revs):
981 def descendants(self, revs):
982 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
982 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
983
983
984 def findcommonmissing(self, common=None, heads=None):
984 def findcommonmissing(self, common=None, heads=None):
985 """Return a tuple of the ancestors of common and the ancestors of heads
985 """Return a tuple of the ancestors of common and the ancestors of heads
986 that are not ancestors of common. In revset terminology, we return the
986 that are not ancestors of common. In revset terminology, we return the
987 tuple:
987 tuple:
988
988
989 ::common, (::heads) - (::common)
989 ::common, (::heads) - (::common)
990
990
991 The list is sorted by revision number, meaning it is
991 The list is sorted by revision number, meaning it is
992 topologically sorted.
992 topologically sorted.
993
993
994 'heads' and 'common' are both lists of node IDs. If heads is
994 'heads' and 'common' are both lists of node IDs. If heads is
995 not supplied, uses all of the revlog's heads. If common is not
995 not supplied, uses all of the revlog's heads. If common is not
996 supplied, uses nullid."""
996 supplied, uses nullid."""
997 if common is None:
997 if common is None:
998 common = [self.nullid]
998 common = [self.nullid]
999 if heads is None:
999 if heads is None:
1000 heads = self.heads()
1000 heads = self.heads()
1001
1001
1002 common = [self.rev(n) for n in common]
1002 common = [self.rev(n) for n in common]
1003 heads = [self.rev(n) for n in heads]
1003 heads = [self.rev(n) for n in heads]
1004
1004
1005 # we want the ancestors, but inclusive
1005 # we want the ancestors, but inclusive
1006 class lazyset(object):
1006 class lazyset(object):
1007 def __init__(self, lazyvalues):
1007 def __init__(self, lazyvalues):
1008 self.addedvalues = set()
1008 self.addedvalues = set()
1009 self.lazyvalues = lazyvalues
1009 self.lazyvalues = lazyvalues
1010
1010
1011 def __contains__(self, value):
1011 def __contains__(self, value):
1012 return value in self.addedvalues or value in self.lazyvalues
1012 return value in self.addedvalues or value in self.lazyvalues
1013
1013
1014 def __iter__(self):
1014 def __iter__(self):
1015 added = self.addedvalues
1015 added = self.addedvalues
1016 for r in added:
1016 for r in added:
1017 yield r
1017 yield r
1018 for r in self.lazyvalues:
1018 for r in self.lazyvalues:
1019 if not r in added:
1019 if not r in added:
1020 yield r
1020 yield r
1021
1021
1022 def add(self, value):
1022 def add(self, value):
1023 self.addedvalues.add(value)
1023 self.addedvalues.add(value)
1024
1024
1025 def update(self, values):
1025 def update(self, values):
1026 self.addedvalues.update(values)
1026 self.addedvalues.update(values)
1027
1027
1028 has = lazyset(self.ancestors(common))
1028 has = lazyset(self.ancestors(common))
1029 has.add(nullrev)
1029 has.add(nullrev)
1030 has.update(common)
1030 has.update(common)
1031
1031
1032 # take all ancestors from heads that aren't in has
1032 # take all ancestors from heads that aren't in has
1033 missing = set()
1033 missing = set()
1034 visit = collections.deque(r for r in heads if r not in has)
1034 visit = collections.deque(r for r in heads if r not in has)
1035 while visit:
1035 while visit:
1036 r = visit.popleft()
1036 r = visit.popleft()
1037 if r in missing:
1037 if r in missing:
1038 continue
1038 continue
1039 else:
1039 else:
1040 missing.add(r)
1040 missing.add(r)
1041 for p in self.parentrevs(r):
1041 for p in self.parentrevs(r):
1042 if p not in has:
1042 if p not in has:
1043 visit.append(p)
1043 visit.append(p)
1044 missing = list(missing)
1044 missing = list(missing)
1045 missing.sort()
1045 missing.sort()
1046 return has, [self.node(miss) for miss in missing]
1046 return has, [self.node(miss) for miss in missing]
1047
1047
1048 def incrementalmissingrevs(self, common=None):
1048 def incrementalmissingrevs(self, common=None):
1049 """Return an object that can be used to incrementally compute the
1049 """Return an object that can be used to incrementally compute the
1050 revision numbers of the ancestors of arbitrary sets that are not
1050 revision numbers of the ancestors of arbitrary sets that are not
1051 ancestors of common. This is an ancestor.incrementalmissingancestors
1051 ancestors of common. This is an ancestor.incrementalmissingancestors
1052 object.
1052 object.
1053
1053
1054 'common' is a list of revision numbers. If common is not supplied, uses
1054 'common' is a list of revision numbers. If common is not supplied, uses
1055 nullrev.
1055 nullrev.
1056 """
1056 """
1057 if common is None:
1057 if common is None:
1058 common = [nullrev]
1058 common = [nullrev]
1059
1059
1060 if rustancestor is not None:
1060 if rustancestor is not None:
1061 return rustancestor.MissingAncestors(self.index, common)
1061 return rustancestor.MissingAncestors(self.index, common)
1062 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1062 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1063
1063
1064 def findmissingrevs(self, common=None, heads=None):
1064 def findmissingrevs(self, common=None, heads=None):
1065 """Return the revision numbers of the ancestors of heads that
1065 """Return the revision numbers of the ancestors of heads that
1066 are not ancestors of common.
1066 are not ancestors of common.
1067
1067
1068 More specifically, return a list of revision numbers corresponding to
1068 More specifically, return a list of revision numbers corresponding to
1069 nodes N such that every N satisfies the following constraints:
1069 nodes N such that every N satisfies the following constraints:
1070
1070
1071 1. N is an ancestor of some node in 'heads'
1071 1. N is an ancestor of some node in 'heads'
1072 2. N is not an ancestor of any node in 'common'
1072 2. N is not an ancestor of any node in 'common'
1073
1073
1074 The list is sorted by revision number, meaning it is
1074 The list is sorted by revision number, meaning it is
1075 topologically sorted.
1075 topologically sorted.
1076
1076
1077 'heads' and 'common' are both lists of revision numbers. If heads is
1077 'heads' and 'common' are both lists of revision numbers. If heads is
1078 not supplied, uses all of the revlog's heads. If common is not
1078 not supplied, uses all of the revlog's heads. If common is not
1079 supplied, uses nullid."""
1079 supplied, uses nullid."""
1080 if common is None:
1080 if common is None:
1081 common = [nullrev]
1081 common = [nullrev]
1082 if heads is None:
1082 if heads is None:
1083 heads = self.headrevs()
1083 heads = self.headrevs()
1084
1084
1085 inc = self.incrementalmissingrevs(common=common)
1085 inc = self.incrementalmissingrevs(common=common)
1086 return inc.missingancestors(heads)
1086 return inc.missingancestors(heads)
1087
1087
1088 def findmissing(self, common=None, heads=None):
1088 def findmissing(self, common=None, heads=None):
1089 """Return the ancestors of heads that are not ancestors of common.
1089 """Return the ancestors of heads that are not ancestors of common.
1090
1090
1091 More specifically, return a list of nodes N such that every N
1091 More specifically, return a list of nodes N such that every N
1092 satisfies the following constraints:
1092 satisfies the following constraints:
1093
1093
1094 1. N is an ancestor of some node in 'heads'
1094 1. N is an ancestor of some node in 'heads'
1095 2. N is not an ancestor of any node in 'common'
1095 2. N is not an ancestor of any node in 'common'
1096
1096
1097 The list is sorted by revision number, meaning it is
1097 The list is sorted by revision number, meaning it is
1098 topologically sorted.
1098 topologically sorted.
1099
1099
1100 'heads' and 'common' are both lists of node IDs. If heads is
1100 'heads' and 'common' are both lists of node IDs. If heads is
1101 not supplied, uses all of the revlog's heads. If common is not
1101 not supplied, uses all of the revlog's heads. If common is not
1102 supplied, uses nullid."""
1102 supplied, uses nullid."""
1103 if common is None:
1103 if common is None:
1104 common = [self.nullid]
1104 common = [self.nullid]
1105 if heads is None:
1105 if heads is None:
1106 heads = self.heads()
1106 heads = self.heads()
1107
1107
1108 common = [self.rev(n) for n in common]
1108 common = [self.rev(n) for n in common]
1109 heads = [self.rev(n) for n in heads]
1109 heads = [self.rev(n) for n in heads]
1110
1110
1111 inc = self.incrementalmissingrevs(common=common)
1111 inc = self.incrementalmissingrevs(common=common)
1112 return [self.node(r) for r in inc.missingancestors(heads)]
1112 return [self.node(r) for r in inc.missingancestors(heads)]
1113
1113
1114 def nodesbetween(self, roots=None, heads=None):
1114 def nodesbetween(self, roots=None, heads=None):
1115 """Return a topological path from 'roots' to 'heads'.
1115 """Return a topological path from 'roots' to 'heads'.
1116
1116
1117 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1117 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1118 topologically sorted list of all nodes N that satisfy both of
1118 topologically sorted list of all nodes N that satisfy both of
1119 these constraints:
1119 these constraints:
1120
1120
1121 1. N is a descendant of some node in 'roots'
1121 1. N is a descendant of some node in 'roots'
1122 2. N is an ancestor of some node in 'heads'
1122 2. N is an ancestor of some node in 'heads'
1123
1123
1124 Every node is considered to be both a descendant and an ancestor
1124 Every node is considered to be both a descendant and an ancestor
1125 of itself, so every reachable node in 'roots' and 'heads' will be
1125 of itself, so every reachable node in 'roots' and 'heads' will be
1126 included in 'nodes'.
1126 included in 'nodes'.
1127
1127
1128 'outroots' is the list of reachable nodes in 'roots', i.e., the
1128 'outroots' is the list of reachable nodes in 'roots', i.e., the
1129 subset of 'roots' that is returned in 'nodes'. Likewise,
1129 subset of 'roots' that is returned in 'nodes'. Likewise,
1130 'outheads' is the subset of 'heads' that is also in 'nodes'.
1130 'outheads' is the subset of 'heads' that is also in 'nodes'.
1131
1131
1132 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1132 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1133 unspecified, uses nullid as the only root. If 'heads' is
1133 unspecified, uses nullid as the only root. If 'heads' is
1134 unspecified, uses list of all of the revlog's heads."""
1134 unspecified, uses list of all of the revlog's heads."""
1135 nonodes = ([], [], [])
1135 nonodes = ([], [], [])
1136 if roots is not None:
1136 if roots is not None:
1137 roots = list(roots)
1137 roots = list(roots)
1138 if not roots:
1138 if not roots:
1139 return nonodes
1139 return nonodes
1140 lowestrev = min([self.rev(n) for n in roots])
1140 lowestrev = min([self.rev(n) for n in roots])
1141 else:
1141 else:
1142 roots = [self.nullid] # Everybody's a descendant of nullid
1142 roots = [self.nullid] # Everybody's a descendant of nullid
1143 lowestrev = nullrev
1143 lowestrev = nullrev
1144 if (lowestrev == nullrev) and (heads is None):
1144 if (lowestrev == nullrev) and (heads is None):
1145 # We want _all_ the nodes!
1145 # We want _all_ the nodes!
1146 return (
1146 return (
1147 [self.node(r) for r in self],
1147 [self.node(r) for r in self],
1148 [self.nullid],
1148 [self.nullid],
1149 list(self.heads()),
1149 list(self.heads()),
1150 )
1150 )
1151 if heads is None:
1151 if heads is None:
1152 # All nodes are ancestors, so the latest ancestor is the last
1152 # All nodes are ancestors, so the latest ancestor is the last
1153 # node.
1153 # node.
1154 highestrev = len(self) - 1
1154 highestrev = len(self) - 1
1155 # Set ancestors to None to signal that every node is an ancestor.
1155 # Set ancestors to None to signal that every node is an ancestor.
1156 ancestors = None
1156 ancestors = None
1157 # Set heads to an empty dictionary for later discovery of heads
1157 # Set heads to an empty dictionary for later discovery of heads
1158 heads = {}
1158 heads = {}
1159 else:
1159 else:
1160 heads = list(heads)
1160 heads = list(heads)
1161 if not heads:
1161 if not heads:
1162 return nonodes
1162 return nonodes
1163 ancestors = set()
1163 ancestors = set()
1164 # Turn heads into a dictionary so we can remove 'fake' heads.
1164 # Turn heads into a dictionary so we can remove 'fake' heads.
1165 # Also, later we will be using it to filter out the heads we can't
1165 # Also, later we will be using it to filter out the heads we can't
1166 # find from roots.
1166 # find from roots.
1167 heads = dict.fromkeys(heads, False)
1167 heads = dict.fromkeys(heads, False)
1168 # Start at the top and keep marking parents until we're done.
1168 # Start at the top and keep marking parents until we're done.
1169 nodestotag = set(heads)
1169 nodestotag = set(heads)
1170 # Remember where the top was so we can use it as a limit later.
1170 # Remember where the top was so we can use it as a limit later.
1171 highestrev = max([self.rev(n) for n in nodestotag])
1171 highestrev = max([self.rev(n) for n in nodestotag])
1172 while nodestotag:
1172 while nodestotag:
1173 # grab a node to tag
1173 # grab a node to tag
1174 n = nodestotag.pop()
1174 n = nodestotag.pop()
1175 # Never tag nullid
1175 # Never tag nullid
1176 if n == self.nullid:
1176 if n == self.nullid:
1177 continue
1177 continue
1178 # A node's revision number represents its place in a
1178 # A node's revision number represents its place in a
1179 # topologically sorted list of nodes.
1179 # topologically sorted list of nodes.
1180 r = self.rev(n)
1180 r = self.rev(n)
1181 if r >= lowestrev:
1181 if r >= lowestrev:
1182 if n not in ancestors:
1182 if n not in ancestors:
1183 # If we are possibly a descendant of one of the roots
1183 # If we are possibly a descendant of one of the roots
1184 # and we haven't already been marked as an ancestor
1184 # and we haven't already been marked as an ancestor
1185 ancestors.add(n) # Mark as ancestor
1185 ancestors.add(n) # Mark as ancestor
1186 # Add non-nullid parents to list of nodes to tag.
1186 # Add non-nullid parents to list of nodes to tag.
1187 nodestotag.update(
1187 nodestotag.update(
1188 [p for p in self.parents(n) if p != self.nullid]
1188 [p for p in self.parents(n) if p != self.nullid]
1189 )
1189 )
1190 elif n in heads: # We've seen it before, is it a fake head?
1190 elif n in heads: # We've seen it before, is it a fake head?
1191 # So it is, real heads should not be the ancestors of
1191 # So it is, real heads should not be the ancestors of
1192 # any other heads.
1192 # any other heads.
1193 heads.pop(n)
1193 heads.pop(n)
1194 if not ancestors:
1194 if not ancestors:
1195 return nonodes
1195 return nonodes
1196 # Now that we have our set of ancestors, we want to remove any
1196 # Now that we have our set of ancestors, we want to remove any
1197 # roots that are not ancestors.
1197 # roots that are not ancestors.
1198
1198
1199 # If one of the roots was nullid, everything is included anyway.
1199 # If one of the roots was nullid, everything is included anyway.
1200 if lowestrev > nullrev:
1200 if lowestrev > nullrev:
1201 # But, since we weren't, let's recompute the lowest rev to not
1201 # But, since we weren't, let's recompute the lowest rev to not
1202 # include roots that aren't ancestors.
1202 # include roots that aren't ancestors.
1203
1203
1204 # Filter out roots that aren't ancestors of heads
1204 # Filter out roots that aren't ancestors of heads
1205 roots = [root for root in roots if root in ancestors]
1205 roots = [root for root in roots if root in ancestors]
1206 # Recompute the lowest revision
1206 # Recompute the lowest revision
1207 if roots:
1207 if roots:
1208 lowestrev = min([self.rev(root) for root in roots])
1208 lowestrev = min([self.rev(root) for root in roots])
1209 else:
1209 else:
1210 # No more roots? Return empty list
1210 # No more roots? Return empty list
1211 return nonodes
1211 return nonodes
1212 else:
1212 else:
1213 # We are descending from nullid, and don't need to care about
1213 # We are descending from nullid, and don't need to care about
1214 # any other roots.
1214 # any other roots.
1215 lowestrev = nullrev
1215 lowestrev = nullrev
1216 roots = [self.nullid]
1216 roots = [self.nullid]
1217 # Transform our roots list into a set.
1217 # Transform our roots list into a set.
1218 descendants = set(roots)
1218 descendants = set(roots)
1219 # Also, keep the original roots so we can filter out roots that aren't
1219 # Also, keep the original roots so we can filter out roots that aren't
1220 # 'real' roots (i.e. are descended from other roots).
1220 # 'real' roots (i.e. are descended from other roots).
1221 roots = descendants.copy()
1221 roots = descendants.copy()
1222 # Our topologically sorted list of output nodes.
1222 # Our topologically sorted list of output nodes.
1223 orderedout = []
1223 orderedout = []
1224 # Don't start at nullid since we don't want nullid in our output list,
1224 # Don't start at nullid since we don't want nullid in our output list,
1225 # and if nullid shows up in descendants, empty parents will look like
1225 # and if nullid shows up in descendants, empty parents will look like
1226 # they're descendants.
1226 # they're descendants.
1227 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1227 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1228 n = self.node(r)
1228 n = self.node(r)
1229 isdescendant = False
1229 isdescendant = False
1230 if lowestrev == nullrev: # Everybody is a descendant of nullid
1230 if lowestrev == nullrev: # Everybody is a descendant of nullid
1231 isdescendant = True
1231 isdescendant = True
1232 elif n in descendants:
1232 elif n in descendants:
1233 # n is already a descendant
1233 # n is already a descendant
1234 isdescendant = True
1234 isdescendant = True
1235 # This check only needs to be done here because all the roots
1235 # This check only needs to be done here because all the roots
1236 # will start being marked is descendants before the loop.
1236 # will start being marked is descendants before the loop.
1237 if n in roots:
1237 if n in roots:
1238 # If n was a root, check if it's a 'real' root.
1238 # If n was a root, check if it's a 'real' root.
1239 p = tuple(self.parents(n))
1239 p = tuple(self.parents(n))
1240 # If any of its parents are descendants, it's not a root.
1240 # If any of its parents are descendants, it's not a root.
1241 if (p[0] in descendants) or (p[1] in descendants):
1241 if (p[0] in descendants) or (p[1] in descendants):
1242 roots.remove(n)
1242 roots.remove(n)
1243 else:
1243 else:
1244 p = tuple(self.parents(n))
1244 p = tuple(self.parents(n))
1245 # A node is a descendant if either of its parents are
1245 # A node is a descendant if either of its parents are
1246 # descendants. (We seeded the dependents list with the roots
1246 # descendants. (We seeded the dependents list with the roots
1247 # up there, remember?)
1247 # up there, remember?)
1248 if (p[0] in descendants) or (p[1] in descendants):
1248 if (p[0] in descendants) or (p[1] in descendants):
1249 descendants.add(n)
1249 descendants.add(n)
1250 isdescendant = True
1250 isdescendant = True
1251 if isdescendant and ((ancestors is None) or (n in ancestors)):
1251 if isdescendant and ((ancestors is None) or (n in ancestors)):
1252 # Only include nodes that are both descendants and ancestors.
1252 # Only include nodes that are both descendants and ancestors.
1253 orderedout.append(n)
1253 orderedout.append(n)
1254 if (ancestors is not None) and (n in heads):
1254 if (ancestors is not None) and (n in heads):
1255 # We're trying to figure out which heads are reachable
1255 # We're trying to figure out which heads are reachable
1256 # from roots.
1256 # from roots.
1257 # Mark this head as having been reached
1257 # Mark this head as having been reached
1258 heads[n] = True
1258 heads[n] = True
1259 elif ancestors is None:
1259 elif ancestors is None:
1260 # Otherwise, we're trying to discover the heads.
1260 # Otherwise, we're trying to discover the heads.
1261 # Assume this is a head because if it isn't, the next step
1261 # Assume this is a head because if it isn't, the next step
1262 # will eventually remove it.
1262 # will eventually remove it.
1263 heads[n] = True
1263 heads[n] = True
1264 # But, obviously its parents aren't.
1264 # But, obviously its parents aren't.
1265 for p in self.parents(n):
1265 for p in self.parents(n):
1266 heads.pop(p, None)
1266 heads.pop(p, None)
1267 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1267 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1268 roots = list(roots)
1268 roots = list(roots)
1269 assert orderedout
1269 assert orderedout
1270 assert roots
1270 assert roots
1271 assert heads
1271 assert heads
1272 return (orderedout, roots, heads)
1272 return (orderedout, roots, heads)
1273
1273
1274 def headrevs(self, revs=None):
1274 def headrevs(self, revs=None):
1275 if revs is None:
1275 if revs is None:
1276 try:
1276 try:
1277 return self.index.headrevs()
1277 return self.index.headrevs()
1278 except AttributeError:
1278 except AttributeError:
1279 return self._headrevs()
1279 return self._headrevs()
1280 if rustdagop is not None:
1280 if rustdagop is not None:
1281 return rustdagop.headrevs(self.index, revs)
1281 return rustdagop.headrevs(self.index, revs)
1282 return dagop.headrevs(revs, self._uncheckedparentrevs)
1282 return dagop.headrevs(revs, self._uncheckedparentrevs)
1283
1283
1284 def computephases(self, roots):
1284 def computephases(self, roots):
1285 return self.index.computephasesmapsets(roots)
1285 return self.index.computephasesmapsets(roots)
1286
1286
1287 def _headrevs(self):
1287 def _headrevs(self):
1288 count = len(self)
1288 count = len(self)
1289 if not count:
1289 if not count:
1290 return [nullrev]
1290 return [nullrev]
1291 # we won't iter over filtered rev so nobody is a head at start
1291 # we won't iter over filtered rev so nobody is a head at start
1292 ishead = [0] * (count + 1)
1292 ishead = [0] * (count + 1)
1293 index = self.index
1293 index = self.index
1294 for r in self:
1294 for r in self:
1295 ishead[r] = 1 # I may be an head
1295 ishead[r] = 1 # I may be an head
1296 e = index[r]
1296 e = index[r]
1297 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1297 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1298 return [r for r, val in enumerate(ishead) if val]
1298 return [r for r, val in enumerate(ishead) if val]
1299
1299
1300 def heads(self, start=None, stop=None):
1300 def heads(self, start=None, stop=None):
1301 """return the list of all nodes that have no children
1301 """return the list of all nodes that have no children
1302
1302
1303 if start is specified, only heads that are descendants of
1303 if start is specified, only heads that are descendants of
1304 start will be returned
1304 start will be returned
1305 if stop is specified, it will consider all the revs from stop
1305 if stop is specified, it will consider all the revs from stop
1306 as if they had no children
1306 as if they had no children
1307 """
1307 """
1308 if start is None and stop is None:
1308 if start is None and stop is None:
1309 if not len(self):
1309 if not len(self):
1310 return [self.nullid]
1310 return [self.nullid]
1311 return [self.node(r) for r in self.headrevs()]
1311 return [self.node(r) for r in self.headrevs()]
1312
1312
1313 if start is None:
1313 if start is None:
1314 start = nullrev
1314 start = nullrev
1315 else:
1315 else:
1316 start = self.rev(start)
1316 start = self.rev(start)
1317
1317
1318 stoprevs = {self.rev(n) for n in stop or []}
1318 stoprevs = {self.rev(n) for n in stop or []}
1319
1319
1320 revs = dagop.headrevssubset(
1320 revs = dagop.headrevssubset(
1321 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1321 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1322 )
1322 )
1323
1323
1324 return [self.node(rev) for rev in revs]
1324 return [self.node(rev) for rev in revs]
1325
1325
1326 def children(self, node):
1326 def children(self, node):
1327 """find the children of a given node"""
1327 """find the children of a given node"""
1328 c = []
1328 c = []
1329 p = self.rev(node)
1329 p = self.rev(node)
1330 for r in self.revs(start=p + 1):
1330 for r in self.revs(start=p + 1):
1331 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1331 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1332 if prevs:
1332 if prevs:
1333 for pr in prevs:
1333 for pr in prevs:
1334 if pr == p:
1334 if pr == p:
1335 c.append(self.node(r))
1335 c.append(self.node(r))
1336 elif p == nullrev:
1336 elif p == nullrev:
1337 c.append(self.node(r))
1337 c.append(self.node(r))
1338 return c
1338 return c
1339
1339
1340 def commonancestorsheads(self, a, b):
1340 def commonancestorsheads(self, a, b):
1341 """calculate all the heads of the common ancestors of nodes a and b"""
1341 """calculate all the heads of the common ancestors of nodes a and b"""
1342 a, b = self.rev(a), self.rev(b)
1342 a, b = self.rev(a), self.rev(b)
1343 ancs = self._commonancestorsheads(a, b)
1343 ancs = self._commonancestorsheads(a, b)
1344 return pycompat.maplist(self.node, ancs)
1344 return pycompat.maplist(self.node, ancs)
1345
1345
1346 def _commonancestorsheads(self, *revs):
1346 def _commonancestorsheads(self, *revs):
1347 """calculate all the heads of the common ancestors of revs"""
1347 """calculate all the heads of the common ancestors of revs"""
1348 try:
1348 try:
1349 ancs = self.index.commonancestorsheads(*revs)
1349 ancs = self.index.commonancestorsheads(*revs)
1350 except (AttributeError, OverflowError): # C implementation failed
1350 except (AttributeError, OverflowError): # C implementation failed
1351 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1351 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1352 return ancs
1352 return ancs
1353
1353
1354 def isancestor(self, a, b):
1354 def isancestor(self, a, b):
1355 """return True if node a is an ancestor of node b
1355 """return True if node a is an ancestor of node b
1356
1356
1357 A revision is considered an ancestor of itself."""
1357 A revision is considered an ancestor of itself."""
1358 a, b = self.rev(a), self.rev(b)
1358 a, b = self.rev(a), self.rev(b)
1359 return self.isancestorrev(a, b)
1359 return self.isancestorrev(a, b)
1360
1360
1361 def isancestorrev(self, a, b):
1361 def isancestorrev(self, a, b):
1362 """return True if revision a is an ancestor of revision b
1362 """return True if revision a is an ancestor of revision b
1363
1363
1364 A revision is considered an ancestor of itself.
1364 A revision is considered an ancestor of itself.
1365
1365
1366 The implementation of this is trivial but the use of
1366 The implementation of this is trivial but the use of
1367 reachableroots is not."""
1367 reachableroots is not."""
1368 if a == nullrev:
1368 if a == nullrev:
1369 return True
1369 return True
1370 elif a == b:
1370 elif a == b:
1371 return True
1371 return True
1372 elif a > b:
1372 elif a > b:
1373 return False
1373 return False
1374 return bool(self.reachableroots(a, [b], [a], includepath=False))
1374 return bool(self.reachableroots(a, [b], [a], includepath=False))
1375
1375
1376 def reachableroots(self, minroot, heads, roots, includepath=False):
1376 def reachableroots(self, minroot, heads, roots, includepath=False):
1377 """return (heads(::(<roots> and <roots>::<heads>)))
1377 """return (heads(::(<roots> and <roots>::<heads>)))
1378
1378
1379 If includepath is True, return (<roots>::<heads>)."""
1379 If includepath is True, return (<roots>::<heads>)."""
1380 try:
1380 try:
1381 return self.index.reachableroots2(
1381 return self.index.reachableroots2(
1382 minroot, heads, roots, includepath
1382 minroot, heads, roots, includepath
1383 )
1383 )
1384 except AttributeError:
1384 except AttributeError:
1385 return dagop._reachablerootspure(
1385 return dagop._reachablerootspure(
1386 self.parentrevs, minroot, roots, heads, includepath
1386 self.parentrevs, minroot, roots, heads, includepath
1387 )
1387 )
1388
1388
1389 def ancestor(self, a, b):
1389 def ancestor(self, a, b):
1390 """calculate the "best" common ancestor of nodes a and b"""
1390 """calculate the "best" common ancestor of nodes a and b"""
1391
1391
1392 a, b = self.rev(a), self.rev(b)
1392 a, b = self.rev(a), self.rev(b)
1393 try:
1393 try:
1394 ancs = self.index.ancestors(a, b)
1394 ancs = self.index.ancestors(a, b)
1395 except (AttributeError, OverflowError):
1395 except (AttributeError, OverflowError):
1396 ancs = ancestor.ancestors(self.parentrevs, a, b)
1396 ancs = ancestor.ancestors(self.parentrevs, a, b)
1397 if ancs:
1397 if ancs:
1398 # choose a consistent winner when there's a tie
1398 # choose a consistent winner when there's a tie
1399 return min(map(self.node, ancs))
1399 return min(map(self.node, ancs))
1400 return self.nullid
1400 return self.nullid
1401
1401
1402 def _match(self, id):
1402 def _match(self, id):
1403 if isinstance(id, int):
1403 if isinstance(id, int):
1404 # rev
1404 # rev
1405 return self.node(id)
1405 return self.node(id)
1406 if len(id) == self.nodeconstants.nodelen:
1406 if len(id) == self.nodeconstants.nodelen:
1407 # possibly a binary node
1407 # possibly a binary node
1408 # odds of a binary node being all hex in ASCII are 1 in 10**25
1408 # odds of a binary node being all hex in ASCII are 1 in 10**25
1409 try:
1409 try:
1410 node = id
1410 node = id
1411 self.rev(node) # quick search the index
1411 self.rev(node) # quick search the index
1412 return node
1412 return node
1413 except error.LookupError:
1413 except error.LookupError:
1414 pass # may be partial hex id
1414 pass # may be partial hex id
1415 try:
1415 try:
1416 # str(rev)
1416 # str(rev)
1417 rev = int(id)
1417 rev = int(id)
1418 if b"%d" % rev != id:
1418 if b"%d" % rev != id:
1419 raise ValueError
1419 raise ValueError
1420 if rev < 0:
1420 if rev < 0:
1421 rev = len(self) + rev
1421 rev = len(self) + rev
1422 if rev < 0 or rev >= len(self):
1422 if rev < 0 or rev >= len(self):
1423 raise ValueError
1423 raise ValueError
1424 return self.node(rev)
1424 return self.node(rev)
1425 except (ValueError, OverflowError):
1425 except (ValueError, OverflowError):
1426 pass
1426 pass
1427 if len(id) == 2 * self.nodeconstants.nodelen:
1427 if len(id) == 2 * self.nodeconstants.nodelen:
1428 try:
1428 try:
1429 # a full hex nodeid?
1429 # a full hex nodeid?
1430 node = bin(id)
1430 node = bin(id)
1431 self.rev(node)
1431 self.rev(node)
1432 return node
1432 return node
1433 except (TypeError, error.LookupError):
1433 except (TypeError, error.LookupError):
1434 pass
1434 pass
1435
1435
1436 def _partialmatch(self, id):
1436 def _partialmatch(self, id):
1437 # we don't care wdirfilenodeids as they should be always full hash
1437 # we don't care wdirfilenodeids as they should be always full hash
1438 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1438 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1439 try:
1439 try:
1440 partial = self.index.partialmatch(id)
1440 partial = self.index.partialmatch(id)
1441 if partial and self.hasnode(partial):
1441 if partial and self.hasnode(partial):
1442 if maybewdir:
1442 if maybewdir:
1443 # single 'ff...' match in radix tree, ambiguous with wdir
1443 # single 'ff...' match in radix tree, ambiguous with wdir
1444 raise error.RevlogError
1444 raise error.RevlogError
1445 return partial
1445 return partial
1446 if maybewdir:
1446 if maybewdir:
1447 # no 'ff...' match in radix tree, wdir identified
1447 # no 'ff...' match in radix tree, wdir identified
1448 raise error.WdirUnsupported
1448 raise error.WdirUnsupported
1449 return None
1449 return None
1450 except error.RevlogError:
1450 except error.RevlogError:
1451 # parsers.c radix tree lookup gave multiple matches
1451 # parsers.c radix tree lookup gave multiple matches
1452 # fast path: for unfiltered changelog, radix tree is accurate
1452 # fast path: for unfiltered changelog, radix tree is accurate
1453 if not getattr(self, 'filteredrevs', None):
1453 if not getattr(self, 'filteredrevs', None):
1454 raise error.AmbiguousPrefixLookupError(
1454 raise error.AmbiguousPrefixLookupError(
1455 id, self.display_id, _(b'ambiguous identifier')
1455 id, self.display_id, _(b'ambiguous identifier')
1456 )
1456 )
1457 # fall through to slow path that filters hidden revisions
1457 # fall through to slow path that filters hidden revisions
1458 except (AttributeError, ValueError):
1458 except (AttributeError, ValueError):
1459 # we are pure python, or key was too short to search radix tree
1459 # we are pure python, or key was too short to search radix tree
1460 pass
1460 pass
1461
1461
1462 if id in self._pcache:
1462 if id in self._pcache:
1463 return self._pcache[id]
1463 return self._pcache[id]
1464
1464
1465 if len(id) <= 40:
1465 if len(id) <= 40:
1466 try:
1466 try:
1467 # hex(node)[:...]
1467 # hex(node)[:...]
1468 l = len(id) // 2 # grab an even number of digits
1468 l = len(id) // 2 # grab an even number of digits
1469 prefix = bin(id[: l * 2])
1469 prefix = bin(id[: l * 2])
1470 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1470 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1471 nl = [
1471 nl = [
1472 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1472 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1473 ]
1473 ]
1474 if self.nodeconstants.nullhex.startswith(id):
1474 if self.nodeconstants.nullhex.startswith(id):
1475 nl.append(self.nullid)
1475 nl.append(self.nullid)
1476 if len(nl) > 0:
1476 if len(nl) > 0:
1477 if len(nl) == 1 and not maybewdir:
1477 if len(nl) == 1 and not maybewdir:
1478 self._pcache[id] = nl[0]
1478 self._pcache[id] = nl[0]
1479 return nl[0]
1479 return nl[0]
1480 raise error.AmbiguousPrefixLookupError(
1480 raise error.AmbiguousPrefixLookupError(
1481 id, self.display_id, _(b'ambiguous identifier')
1481 id, self.display_id, _(b'ambiguous identifier')
1482 )
1482 )
1483 if maybewdir:
1483 if maybewdir:
1484 raise error.WdirUnsupported
1484 raise error.WdirUnsupported
1485 return None
1485 return None
1486 except TypeError:
1486 except TypeError:
1487 pass
1487 pass
1488
1488
1489 def lookup(self, id):
1489 def lookup(self, id):
1490 """locate a node based on:
1490 """locate a node based on:
1491 - revision number or str(revision number)
1491 - revision number or str(revision number)
1492 - nodeid or subset of hex nodeid
1492 - nodeid or subset of hex nodeid
1493 """
1493 """
1494 n = self._match(id)
1494 n = self._match(id)
1495 if n is not None:
1495 if n is not None:
1496 return n
1496 return n
1497 n = self._partialmatch(id)
1497 n = self._partialmatch(id)
1498 if n:
1498 if n:
1499 return n
1499 return n
1500
1500
1501 raise error.LookupError(id, self.display_id, _(b'no match found'))
1501 raise error.LookupError(id, self.display_id, _(b'no match found'))
1502
1502
1503 def shortest(self, node, minlength=1):
1503 def shortest(self, node, minlength=1):
1504 """Find the shortest unambiguous prefix that matches node."""
1504 """Find the shortest unambiguous prefix that matches node."""
1505
1505
1506 def isvalid(prefix):
1506 def isvalid(prefix):
1507 try:
1507 try:
1508 matchednode = self._partialmatch(prefix)
1508 matchednode = self._partialmatch(prefix)
1509 except error.AmbiguousPrefixLookupError:
1509 except error.AmbiguousPrefixLookupError:
1510 return False
1510 return False
1511 except error.WdirUnsupported:
1511 except error.WdirUnsupported:
1512 # single 'ff...' match
1512 # single 'ff...' match
1513 return True
1513 return True
1514 if matchednode is None:
1514 if matchednode is None:
1515 raise error.LookupError(node, self.display_id, _(b'no node'))
1515 raise error.LookupError(node, self.display_id, _(b'no node'))
1516 return True
1516 return True
1517
1517
1518 def maybewdir(prefix):
1518 def maybewdir(prefix):
1519 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1519 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1520
1520
1521 hexnode = hex(node)
1521 hexnode = hex(node)
1522
1522
1523 def disambiguate(hexnode, minlength):
1523 def disambiguate(hexnode, minlength):
1524 """Disambiguate against wdirid."""
1524 """Disambiguate against wdirid."""
1525 for length in range(minlength, len(hexnode) + 1):
1525 for length in range(minlength, len(hexnode) + 1):
1526 prefix = hexnode[:length]
1526 prefix = hexnode[:length]
1527 if not maybewdir(prefix):
1527 if not maybewdir(prefix):
1528 return prefix
1528 return prefix
1529
1529
1530 if not getattr(self, 'filteredrevs', None):
1530 if not getattr(self, 'filteredrevs', None):
1531 try:
1531 try:
1532 length = max(self.index.shortest(node), minlength)
1532 length = max(self.index.shortest(node), minlength)
1533 return disambiguate(hexnode, length)
1533 return disambiguate(hexnode, length)
1534 except error.RevlogError:
1534 except error.RevlogError:
1535 if node != self.nodeconstants.wdirid:
1535 if node != self.nodeconstants.wdirid:
1536 raise error.LookupError(
1536 raise error.LookupError(
1537 node, self.display_id, _(b'no node')
1537 node, self.display_id, _(b'no node')
1538 )
1538 )
1539 except AttributeError:
1539 except AttributeError:
1540 # Fall through to pure code
1540 # Fall through to pure code
1541 pass
1541 pass
1542
1542
1543 if node == self.nodeconstants.wdirid:
1543 if node == self.nodeconstants.wdirid:
1544 for length in range(minlength, len(hexnode) + 1):
1544 for length in range(minlength, len(hexnode) + 1):
1545 prefix = hexnode[:length]
1545 prefix = hexnode[:length]
1546 if isvalid(prefix):
1546 if isvalid(prefix):
1547 return prefix
1547 return prefix
1548
1548
1549 for length in range(minlength, len(hexnode) + 1):
1549 for length in range(minlength, len(hexnode) + 1):
1550 prefix = hexnode[:length]
1550 prefix = hexnode[:length]
1551 if isvalid(prefix):
1551 if isvalid(prefix):
1552 return disambiguate(hexnode, length)
1552 return disambiguate(hexnode, length)
1553
1553
1554 def cmp(self, node, text):
1554 def cmp(self, node, text):
1555 """compare text with a given file revision
1555 """compare text with a given file revision
1556
1556
1557 returns True if text is different than what is stored.
1557 returns True if text is different than what is stored.
1558 """
1558 """
1559 p1, p2 = self.parents(node)
1559 p1, p2 = self.parents(node)
1560 return storageutil.hashrevisionsha1(text, p1, p2) != node
1560 return storageutil.hashrevisionsha1(text, p1, p2) != node
1561
1561
1562 def _cachesegment(self, offset, data):
1562 def _cachesegment(self, offset, data):
1563 """Add a segment to the revlog cache.
1563 """Add a segment to the revlog cache.
1564
1564
1565 Accepts an absolute offset and the data that is at that location.
1565 Accepts an absolute offset and the data that is at that location.
1566 """
1566 """
1567 o, d = self._chunkcache
1567 o, d = self._chunkcache
1568 # try to add to existing cache
1568 # try to add to existing cache
1569 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1569 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1570 self._chunkcache = o, d + data
1570 self._chunkcache = o, d + data
1571 else:
1571 else:
1572 self._chunkcache = offset, data
1572 self._chunkcache = offset, data
1573
1573
1574 def _readsegment(self, offset, length, df=None):
1574 def _readsegment(self, offset, length, df=None):
1575 """Load a segment of raw data from the revlog.
1575 """Load a segment of raw data from the revlog.
1576
1576
1577 Accepts an absolute offset, length to read, and an optional existing
1577 Accepts an absolute offset, length to read, and an optional existing
1578 file handle to read from.
1578 file handle to read from.
1579
1579
1580 If an existing file handle is passed, it will be seeked and the
1580 If an existing file handle is passed, it will be seeked and the
1581 original seek position will NOT be restored.
1581 original seek position will NOT be restored.
1582
1582
1583 Returns a str or buffer of raw byte data.
1583 Returns a str or buffer of raw byte data.
1584
1584
1585 Raises if the requested number of bytes could not be read.
1585 Raises if the requested number of bytes could not be read.
1586 """
1586 """
1587 # Cache data both forward and backward around the requested
1587 # Cache data both forward and backward around the requested
1588 # data, in a fixed size window. This helps speed up operations
1588 # data, in a fixed size window. This helps speed up operations
1589 # involving reading the revlog backwards.
1589 # involving reading the revlog backwards.
1590 cachesize = self._chunkcachesize
1590 cachesize = self._chunkcachesize
1591 realoffset = offset & ~(cachesize - 1)
1591 realoffset = offset & ~(cachesize - 1)
1592 reallength = (
1592 reallength = (
1593 (offset + length + cachesize) & ~(cachesize - 1)
1593 (offset + length + cachesize) & ~(cachesize - 1)
1594 ) - realoffset
1594 ) - realoffset
1595 with self._datareadfp(df) as df:
1595 with self._datareadfp(df) as df:
1596 df.seek(realoffset)
1596 df.seek(realoffset)
1597 d = df.read(reallength)
1597 d = df.read(reallength)
1598
1598
1599 self._cachesegment(realoffset, d)
1599 self._cachesegment(realoffset, d)
1600 if offset != realoffset or reallength != length:
1600 if offset != realoffset or reallength != length:
1601 startoffset = offset - realoffset
1601 startoffset = offset - realoffset
1602 if len(d) - startoffset < length:
1602 if len(d) - startoffset < length:
1603 raise error.RevlogError(
1603 raise error.RevlogError(
1604 _(
1604 _(
1605 b'partial read of revlog %s; expected %d bytes from '
1605 b'partial read of revlog %s; expected %d bytes from '
1606 b'offset %d, got %d'
1606 b'offset %d, got %d'
1607 )
1607 )
1608 % (
1608 % (
1609 self._indexfile if self._inline else self._datafile,
1609 self._indexfile if self._inline else self._datafile,
1610 length,
1610 length,
1611 offset,
1611 offset,
1612 len(d) - startoffset,
1612 len(d) - startoffset,
1613 )
1613 )
1614 )
1614 )
1615
1615
1616 return util.buffer(d, startoffset, length)
1616 return util.buffer(d, startoffset, length)
1617
1617
1618 if len(d) < length:
1618 if len(d) < length:
1619 raise error.RevlogError(
1619 raise error.RevlogError(
1620 _(
1620 _(
1621 b'partial read of revlog %s; expected %d bytes from offset '
1621 b'partial read of revlog %s; expected %d bytes from offset '
1622 b'%d, got %d'
1622 b'%d, got %d'
1623 )
1623 )
1624 % (
1624 % (
1625 self._indexfile if self._inline else self._datafile,
1625 self._indexfile if self._inline else self._datafile,
1626 length,
1626 length,
1627 offset,
1627 offset,
1628 len(d),
1628 len(d),
1629 )
1629 )
1630 )
1630 )
1631
1631
1632 return d
1632 return d
1633
1633
1634 def _getsegment(self, offset, length, df=None):
1634 def _getsegment(self, offset, length, df=None):
1635 """Obtain a segment of raw data from the revlog.
1635 """Obtain a segment of raw data from the revlog.
1636
1636
1637 Accepts an absolute offset, length of bytes to obtain, and an
1637 Accepts an absolute offset, length of bytes to obtain, and an
1638 optional file handle to the already-opened revlog. If the file
1638 optional file handle to the already-opened revlog. If the file
1639 handle is used, it's original seek position will not be preserved.
1639 handle is used, it's original seek position will not be preserved.
1640
1640
1641 Requests for data may be returned from a cache.
1641 Requests for data may be returned from a cache.
1642
1642
1643 Returns a str or a buffer instance of raw byte data.
1643 Returns a str or a buffer instance of raw byte data.
1644 """
1644 """
1645 o, d = self._chunkcache
1645 o, d = self._chunkcache
1646 l = len(d)
1646 l = len(d)
1647
1647
1648 # is it in the cache?
1648 # is it in the cache?
1649 cachestart = offset - o
1649 cachestart = offset - o
1650 cacheend = cachestart + length
1650 cacheend = cachestart + length
1651 if cachestart >= 0 and cacheend <= l:
1651 if cachestart >= 0 and cacheend <= l:
1652 if cachestart == 0 and cacheend == l:
1652 if cachestart == 0 and cacheend == l:
1653 return d # avoid a copy
1653 return d # avoid a copy
1654 return util.buffer(d, cachestart, cacheend - cachestart)
1654 return util.buffer(d, cachestart, cacheend - cachestart)
1655
1655
1656 return self._readsegment(offset, length, df=df)
1656 return self._readsegment(offset, length, df=df)
1657
1657
1658 def _getsegmentforrevs(self, startrev, endrev, df=None):
1658 def _getsegmentforrevs(self, startrev, endrev, df=None):
1659 """Obtain a segment of raw data corresponding to a range of revisions.
1659 """Obtain a segment of raw data corresponding to a range of revisions.
1660
1660
1661 Accepts the start and end revisions and an optional already-open
1661 Accepts the start and end revisions and an optional already-open
1662 file handle to be used for reading. If the file handle is read, its
1662 file handle to be used for reading. If the file handle is read, its
1663 seek position will not be preserved.
1663 seek position will not be preserved.
1664
1664
1665 Requests for data may be satisfied by a cache.
1665 Requests for data may be satisfied by a cache.
1666
1666
1667 Returns a 2-tuple of (offset, data) for the requested range of
1667 Returns a 2-tuple of (offset, data) for the requested range of
1668 revisions. Offset is the integer offset from the beginning of the
1668 revisions. Offset is the integer offset from the beginning of the
1669 revlog and data is a str or buffer of the raw byte data.
1669 revlog and data is a str or buffer of the raw byte data.
1670
1670
1671 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1671 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1672 to determine where each revision's data begins and ends.
1672 to determine where each revision's data begins and ends.
1673 """
1673 """
1674 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1674 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1675 # (functions are expensive).
1675 # (functions are expensive).
1676 index = self.index
1676 index = self.index
1677 istart = index[startrev]
1677 istart = index[startrev]
1678 start = int(istart[0] >> 16)
1678 start = int(istart[0] >> 16)
1679 if startrev == endrev:
1679 if startrev == endrev:
1680 end = start + istart[1]
1680 end = start + istart[1]
1681 else:
1681 else:
1682 iend = index[endrev]
1682 iend = index[endrev]
1683 end = int(iend[0] >> 16) + iend[1]
1683 end = int(iend[0] >> 16) + iend[1]
1684
1684
1685 if self._inline:
1685 if self._inline:
1686 start += (startrev + 1) * self.index.entry_size
1686 start += (startrev + 1) * self.index.entry_size
1687 end += (endrev + 1) * self.index.entry_size
1687 end += (endrev + 1) * self.index.entry_size
1688 length = end - start
1688 length = end - start
1689
1689
1690 return start, self._getsegment(start, length, df=df)
1690 return start, self._getsegment(start, length, df=df)
1691
1691
1692 def _chunk(self, rev, df=None):
1692 def _chunk(self, rev, df=None):
1693 """Obtain a single decompressed chunk for a revision.
1693 """Obtain a single decompressed chunk for a revision.
1694
1694
1695 Accepts an integer revision and an optional already-open file handle
1695 Accepts an integer revision and an optional already-open file handle
1696 to be used for reading. If used, the seek position of the file will not
1696 to be used for reading. If used, the seek position of the file will not
1697 be preserved.
1697 be preserved.
1698
1698
1699 Returns a str holding uncompressed data for the requested revision.
1699 Returns a str holding uncompressed data for the requested revision.
1700 """
1700 """
1701 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1701 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1702
1702
1703 def _chunks(self, revs, df=None, targetsize=None):
1703 def _chunks(self, revs, df=None, targetsize=None):
1704 """Obtain decompressed chunks for the specified revisions.
1704 """Obtain decompressed chunks for the specified revisions.
1705
1705
1706 Accepts an iterable of numeric revisions that are assumed to be in
1706 Accepts an iterable of numeric revisions that are assumed to be in
1707 ascending order. Also accepts an optional already-open file handle
1707 ascending order. Also accepts an optional already-open file handle
1708 to be used for reading. If used, the seek position of the file will
1708 to be used for reading. If used, the seek position of the file will
1709 not be preserved.
1709 not be preserved.
1710
1710
1711 This function is similar to calling ``self._chunk()`` multiple times,
1711 This function is similar to calling ``self._chunk()`` multiple times,
1712 but is faster.
1712 but is faster.
1713
1713
1714 Returns a list with decompressed data for each requested revision.
1714 Returns a list with decompressed data for each requested revision.
1715 """
1715 """
1716 if not revs:
1716 if not revs:
1717 return []
1717 return []
1718 start = self.start
1718 start = self.start
1719 length = self.length
1719 length = self.length
1720 inline = self._inline
1720 inline = self._inline
1721 iosize = self.index.entry_size
1721 iosize = self.index.entry_size
1722 buffer = util.buffer
1722 buffer = util.buffer
1723
1723
1724 l = []
1724 l = []
1725 ladd = l.append
1725 ladd = l.append
1726
1726
1727 if not self._withsparseread:
1727 if not self._withsparseread:
1728 slicedchunks = (revs,)
1728 slicedchunks = (revs,)
1729 else:
1729 else:
1730 slicedchunks = deltautil.slicechunk(
1730 slicedchunks = deltautil.slicechunk(
1731 self, revs, targetsize=targetsize
1731 self, revs, targetsize=targetsize
1732 )
1732 )
1733
1733
1734 for revschunk in slicedchunks:
1734 for revschunk in slicedchunks:
1735 firstrev = revschunk[0]
1735 firstrev = revschunk[0]
1736 # Skip trailing revisions with empty diff
1736 # Skip trailing revisions with empty diff
1737 for lastrev in revschunk[::-1]:
1737 for lastrev in revschunk[::-1]:
1738 if length(lastrev) != 0:
1738 if length(lastrev) != 0:
1739 break
1739 break
1740
1740
1741 try:
1741 try:
1742 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1742 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1743 except OverflowError:
1743 except OverflowError:
1744 # issue4215 - we can't cache a run of chunks greater than
1744 # issue4215 - we can't cache a run of chunks greater than
1745 # 2G on Windows
1745 # 2G on Windows
1746 return [self._chunk(rev, df=df) for rev in revschunk]
1746 return [self._chunk(rev, df=df) for rev in revschunk]
1747
1747
1748 decomp = self.decompress
1748 decomp = self.decompress
1749 for rev in revschunk:
1749 for rev in revschunk:
1750 chunkstart = start(rev)
1750 chunkstart = start(rev)
1751 if inline:
1751 if inline:
1752 chunkstart += (rev + 1) * iosize
1752 chunkstart += (rev + 1) * iosize
1753 chunklength = length(rev)
1753 chunklength = length(rev)
1754 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1754 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1755
1755
1756 return l
1756 return l
1757
1757
1758 def _chunkclear(self):
1758 def _chunkclear(self):
1759 """Clear the raw chunk cache."""
1759 """Clear the raw chunk cache."""
1760 self._chunkcache = (0, b'')
1760 self._chunkcache = (0, b'')
1761
1761
1762 def deltaparent(self, rev):
1762 def deltaparent(self, rev):
1763 """return deltaparent of the given revision"""
1763 """return deltaparent of the given revision"""
1764 base = self.index[rev][3]
1764 base = self.index[rev][3]
1765 if base == rev:
1765 if base == rev:
1766 return nullrev
1766 return nullrev
1767 elif self._generaldelta:
1767 elif self._generaldelta:
1768 return base
1768 return base
1769 else:
1769 else:
1770 return rev - 1
1770 return rev - 1
1771
1771
1772 def issnapshot(self, rev):
1772 def issnapshot(self, rev):
1773 """tells whether rev is a snapshot"""
1773 """tells whether rev is a snapshot"""
1774 if not self._sparserevlog:
1774 if not self._sparserevlog:
1775 return self.deltaparent(rev) == nullrev
1775 return self.deltaparent(rev) == nullrev
1776 elif util.safehasattr(self.index, b'issnapshot'):
1776 elif util.safehasattr(self.index, b'issnapshot'):
1777 # directly assign the method to cache the testing and access
1777 # directly assign the method to cache the testing and access
1778 self.issnapshot = self.index.issnapshot
1778 self.issnapshot = self.index.issnapshot
1779 return self.issnapshot(rev)
1779 return self.issnapshot(rev)
1780 if rev == nullrev:
1780 if rev == nullrev:
1781 return True
1781 return True
1782 entry = self.index[rev]
1782 entry = self.index[rev]
1783 base = entry[3]
1783 base = entry[3]
1784 if base == rev:
1784 if base == rev:
1785 return True
1785 return True
1786 if base == nullrev:
1786 if base == nullrev:
1787 return True
1787 return True
1788 p1 = entry[5]
1788 p1 = entry[5]
1789 p2 = entry[6]
1789 p2 = entry[6]
1790 if base == p1 or base == p2:
1790 if base == p1 or base == p2:
1791 return False
1791 return False
1792 return self.issnapshot(base)
1792 return self.issnapshot(base)
1793
1793
1794 def snapshotdepth(self, rev):
1794 def snapshotdepth(self, rev):
1795 """number of snapshot in the chain before this one"""
1795 """number of snapshot in the chain before this one"""
1796 if not self.issnapshot(rev):
1796 if not self.issnapshot(rev):
1797 raise error.ProgrammingError(b'revision %d not a snapshot')
1797 raise error.ProgrammingError(b'revision %d not a snapshot')
1798 return len(self._deltachain(rev)[0]) - 1
1798 return len(self._deltachain(rev)[0]) - 1
1799
1799
1800 def revdiff(self, rev1, rev2):
1800 def revdiff(self, rev1, rev2):
1801 """return or calculate a delta between two revisions
1801 """return or calculate a delta between two revisions
1802
1802
1803 The delta calculated is in binary form and is intended to be written to
1803 The delta calculated is in binary form and is intended to be written to
1804 revlog data directly. So this function needs raw revision data.
1804 revlog data directly. So this function needs raw revision data.
1805 """
1805 """
1806 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1806 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1807 return bytes(self._chunk(rev2))
1807 return bytes(self._chunk(rev2))
1808
1808
1809 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1809 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1810
1810
1811 def _processflags(self, text, flags, operation, raw=False):
1811 def _processflags(self, text, flags, operation, raw=False):
1812 """deprecated entry point to access flag processors"""
1812 """deprecated entry point to access flag processors"""
1813 msg = b'_processflag(...) use the specialized variant'
1813 msg = b'_processflag(...) use the specialized variant'
1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1814 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1815 if raw:
1815 if raw:
1816 return text, flagutil.processflagsraw(self, text, flags)
1816 return text, flagutil.processflagsraw(self, text, flags)
1817 elif operation == b'read':
1817 elif operation == b'read':
1818 return flagutil.processflagsread(self, text, flags)
1818 return flagutil.processflagsread(self, text, flags)
1819 else: # write operation
1819 else: # write operation
1820 return flagutil.processflagswrite(self, text, flags)
1820 return flagutil.processflagswrite(self, text, flags)
1821
1821
1822 def revision(self, nodeorrev, _df=None, raw=False):
1822 def revision(self, nodeorrev, _df=None, raw=False):
1823 """return an uncompressed revision of a given node or revision
1823 """return an uncompressed revision of a given node or revision
1824 number.
1824 number.
1825
1825
1826 _df - an existing file handle to read from. (internal-only)
1826 _df - an existing file handle to read from. (internal-only)
1827 raw - an optional argument specifying if the revision data is to be
1827 raw - an optional argument specifying if the revision data is to be
1828 treated as raw data when applying flag transforms. 'raw' should be set
1828 treated as raw data when applying flag transforms. 'raw' should be set
1829 to True when generating changegroups or in debug commands.
1829 to True when generating changegroups or in debug commands.
1830 """
1830 """
1831 if raw:
1831 if raw:
1832 msg = (
1832 msg = (
1833 b'revlog.revision(..., raw=True) is deprecated, '
1833 b'revlog.revision(..., raw=True) is deprecated, '
1834 b'use revlog.rawdata(...)'
1834 b'use revlog.rawdata(...)'
1835 )
1835 )
1836 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1836 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1837 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1837 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1838
1838
1839 def sidedata(self, nodeorrev, _df=None):
1839 def sidedata(self, nodeorrev, _df=None):
1840 """a map of extra data related to the changeset but not part of the hash
1840 """a map of extra data related to the changeset but not part of the hash
1841
1841
1842 This function currently return a dictionary. However, more advanced
1842 This function currently return a dictionary. However, more advanced
1843 mapping object will likely be used in the future for a more
1843 mapping object will likely be used in the future for a more
1844 efficient/lazy code.
1844 efficient/lazy code.
1845 """
1845 """
1846 return self._revisiondata(nodeorrev, _df)[1]
1846 return self._revisiondata(nodeorrev, _df)[1]
1847
1847
1848 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1848 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1849 # deal with <nodeorrev> argument type
1849 # deal with <nodeorrev> argument type
1850 if isinstance(nodeorrev, int):
1850 if isinstance(nodeorrev, int):
1851 rev = nodeorrev
1851 rev = nodeorrev
1852 node = self.node(rev)
1852 node = self.node(rev)
1853 else:
1853 else:
1854 node = nodeorrev
1854 node = nodeorrev
1855 rev = None
1855 rev = None
1856
1856
1857 # fast path the special `nullid` rev
1857 # fast path the special `nullid` rev
1858 if node == self.nullid:
1858 if node == self.nullid:
1859 return b"", {}
1859 return b"", {}
1860
1860
1861 # ``rawtext`` is the text as stored inside the revlog. Might be the
1861 # ``rawtext`` is the text as stored inside the revlog. Might be the
1862 # revision or might need to be processed to retrieve the revision.
1862 # revision or might need to be processed to retrieve the revision.
1863 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1863 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1864
1864
1865 if self.hassidedata:
1865 if self.hassidedata:
1866 if rev is None:
1866 if rev is None:
1867 rev = self.rev(node)
1867 rev = self.rev(node)
1868 sidedata = self._sidedata(rev)
1868 sidedata = self._sidedata(rev)
1869 else:
1869 else:
1870 sidedata = {}
1870 sidedata = {}
1871
1871
1872 if raw and validated:
1872 if raw and validated:
1873 # if we don't want to process the raw text and that raw
1873 # if we don't want to process the raw text and that raw
1874 # text is cached, we can exit early.
1874 # text is cached, we can exit early.
1875 return rawtext, sidedata
1875 return rawtext, sidedata
1876 if rev is None:
1876 if rev is None:
1877 rev = self.rev(node)
1877 rev = self.rev(node)
1878 # the revlog's flag for this revision
1878 # the revlog's flag for this revision
1879 # (usually alter its state or content)
1879 # (usually alter its state or content)
1880 flags = self.flags(rev)
1880 flags = self.flags(rev)
1881
1881
1882 if validated and flags == REVIDX_DEFAULT_FLAGS:
1882 if validated and flags == REVIDX_DEFAULT_FLAGS:
1883 # no extra flags set, no flag processor runs, text = rawtext
1883 # no extra flags set, no flag processor runs, text = rawtext
1884 return rawtext, sidedata
1884 return rawtext, sidedata
1885
1885
1886 if raw:
1886 if raw:
1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1888 text = rawtext
1888 text = rawtext
1889 else:
1889 else:
1890 r = flagutil.processflagsread(self, rawtext, flags)
1890 r = flagutil.processflagsread(self, rawtext, flags)
1891 text, validatehash = r
1891 text, validatehash = r
1892 if validatehash:
1892 if validatehash:
1893 self.checkhash(text, node, rev=rev)
1893 self.checkhash(text, node, rev=rev)
1894 if not validated:
1894 if not validated:
1895 self._revisioncache = (node, rev, rawtext)
1895 self._revisioncache = (node, rev, rawtext)
1896
1896
1897 return text, sidedata
1897 return text, sidedata
1898
1898
1899 def _rawtext(self, node, rev, _df=None):
1899 def _rawtext(self, node, rev, _df=None):
1900 """return the possibly unvalidated rawtext for a revision
1900 """return the possibly unvalidated rawtext for a revision
1901
1901
1902 returns (rev, rawtext, validated)
1902 returns (rev, rawtext, validated)
1903 """
1903 """
1904
1904
1905 # revision in the cache (could be useful to apply delta)
1905 # revision in the cache (could be useful to apply delta)
1906 cachedrev = None
1906 cachedrev = None
1907 # An intermediate text to apply deltas to
1907 # An intermediate text to apply deltas to
1908 basetext = None
1908 basetext = None
1909
1909
1910 # Check if we have the entry in cache
1910 # Check if we have the entry in cache
1911 # The cache entry looks like (node, rev, rawtext)
1911 # The cache entry looks like (node, rev, rawtext)
1912 if self._revisioncache:
1912 if self._revisioncache:
1913 if self._revisioncache[0] == node:
1913 if self._revisioncache[0] == node:
1914 return (rev, self._revisioncache[2], True)
1914 return (rev, self._revisioncache[2], True)
1915 cachedrev = self._revisioncache[1]
1915 cachedrev = self._revisioncache[1]
1916
1916
1917 if rev is None:
1917 if rev is None:
1918 rev = self.rev(node)
1918 rev = self.rev(node)
1919
1919
1920 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1920 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1921 if stopped:
1921 if stopped:
1922 basetext = self._revisioncache[2]
1922 basetext = self._revisioncache[2]
1923
1923
1924 # drop cache to save memory, the caller is expected to
1924 # drop cache to save memory, the caller is expected to
1925 # update self._revisioncache after validating the text
1925 # update self._revisioncache after validating the text
1926 self._revisioncache = None
1926 self._revisioncache = None
1927
1927
1928 targetsize = None
1928 targetsize = None
1929 rawsize = self.index[rev][2]
1929 rawsize = self.index[rev][2]
1930 if 0 <= rawsize:
1930 if 0 <= rawsize:
1931 targetsize = 4 * rawsize
1931 targetsize = 4 * rawsize
1932
1932
1933 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1933 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1934 if basetext is None:
1934 if basetext is None:
1935 basetext = bytes(bins[0])
1935 basetext = bytes(bins[0])
1936 bins = bins[1:]
1936 bins = bins[1:]
1937
1937
1938 rawtext = mdiff.patches(basetext, bins)
1938 rawtext = mdiff.patches(basetext, bins)
1939 del basetext # let us have a chance to free memory early
1939 del basetext # let us have a chance to free memory early
1940 return (rev, rawtext, False)
1940 return (rev, rawtext, False)
1941
1941
1942 def _sidedata(self, rev):
1942 def _sidedata(self, rev):
1943 """Return the sidedata for a given revision number."""
1943 """Return the sidedata for a given revision number."""
1944 index_entry = self.index[rev]
1944 index_entry = self.index[rev]
1945 sidedata_offset = index_entry[8]
1945 sidedata_offset = index_entry[8]
1946 sidedata_size = index_entry[9]
1946 sidedata_size = index_entry[9]
1947
1947
1948 if self._inline:
1948 if self._inline:
1949 sidedata_offset += self.index.entry_size * (1 + rev)
1949 sidedata_offset += self.index.entry_size * (1 + rev)
1950 if sidedata_size == 0:
1950 if sidedata_size == 0:
1951 return {}
1951 return {}
1952
1952
1953 segment = self._getsegment(sidedata_offset, sidedata_size)
1953 segment = self._getsegment(sidedata_offset, sidedata_size)
1954 sidedata = sidedatautil.deserialize_sidedata(segment)
1954 sidedata = sidedatautil.deserialize_sidedata(segment)
1955 return sidedata
1955 return sidedata
1956
1956
1957 def rawdata(self, nodeorrev, _df=None):
1957 def rawdata(self, nodeorrev, _df=None):
1958 """return an uncompressed raw data of a given node or revision number.
1958 """return an uncompressed raw data of a given node or revision number.
1959
1959
1960 _df - an existing file handle to read from. (internal-only)
1960 _df - an existing file handle to read from. (internal-only)
1961 """
1961 """
1962 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1962 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1963
1963
1964 def hash(self, text, p1, p2):
1964 def hash(self, text, p1, p2):
1965 """Compute a node hash.
1965 """Compute a node hash.
1966
1966
1967 Available as a function so that subclasses can replace the hash
1967 Available as a function so that subclasses can replace the hash
1968 as needed.
1968 as needed.
1969 """
1969 """
1970 return storageutil.hashrevisionsha1(text, p1, p2)
1970 return storageutil.hashrevisionsha1(text, p1, p2)
1971
1971
1972 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1972 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1973 """Check node hash integrity.
1973 """Check node hash integrity.
1974
1974
1975 Available as a function so that subclasses can extend hash mismatch
1975 Available as a function so that subclasses can extend hash mismatch
1976 behaviors as needed.
1976 behaviors as needed.
1977 """
1977 """
1978 try:
1978 try:
1979 if p1 is None and p2 is None:
1979 if p1 is None and p2 is None:
1980 p1, p2 = self.parents(node)
1980 p1, p2 = self.parents(node)
1981 if node != self.hash(text, p1, p2):
1981 if node != self.hash(text, p1, p2):
1982 # Clear the revision cache on hash failure. The revision cache
1982 # Clear the revision cache on hash failure. The revision cache
1983 # only stores the raw revision and clearing the cache does have
1983 # only stores the raw revision and clearing the cache does have
1984 # the side-effect that we won't have a cache hit when the raw
1984 # the side-effect that we won't have a cache hit when the raw
1985 # revision data is accessed. But this case should be rare and
1985 # revision data is accessed. But this case should be rare and
1986 # it is extra work to teach the cache about the hash
1986 # it is extra work to teach the cache about the hash
1987 # verification state.
1987 # verification state.
1988 if self._revisioncache and self._revisioncache[0] == node:
1988 if self._revisioncache and self._revisioncache[0] == node:
1989 self._revisioncache = None
1989 self._revisioncache = None
1990
1990
1991 revornode = rev
1991 revornode = rev
1992 if revornode is None:
1992 if revornode is None:
1993 revornode = templatefilters.short(hex(node))
1993 revornode = templatefilters.short(hex(node))
1994 raise error.RevlogError(
1994 raise error.RevlogError(
1995 _(b"integrity check failed on %s:%s")
1995 _(b"integrity check failed on %s:%s")
1996 % (self.display_id, pycompat.bytestr(revornode))
1996 % (self.display_id, pycompat.bytestr(revornode))
1997 )
1997 )
1998 except error.RevlogError:
1998 except error.RevlogError:
1999 if self._censorable and storageutil.iscensoredtext(text):
1999 if self._censorable and storageutil.iscensoredtext(text):
2000 raise error.CensoredNodeError(self.display_id, node, text)
2000 raise error.CensoredNodeError(self.display_id, node, text)
2001 raise
2001 raise
2002
2002
2003 def _enforceinlinesize(self, tr):
2003 def _enforceinlinesize(self, tr):
2004 """Check if the revlog is too big for inline and convert if so.
2004 """Check if the revlog is too big for inline and convert if so.
2005
2005
2006 This should be called after revisions are added to the revlog. If the
2006 This should be called after revisions are added to the revlog. If the
2007 revlog has grown too large to be an inline revlog, it will convert it
2007 revlog has grown too large to be an inline revlog, it will convert it
2008 to use multiple index and data files.
2008 to use multiple index and data files.
2009 """
2009 """
2010 tiprev = len(self) - 1
2010 tiprev = len(self) - 1
2011 total_size = self.start(tiprev) + self.length(tiprev)
2011 total_size = self.start(tiprev) + self.length(tiprev)
2012 if not self._inline or total_size < _maxinline:
2012 if not self._inline or total_size < _maxinline:
2013 return
2013 return
2014
2014
2015 troffset = tr.findoffset(self._indexfile)
2015 troffset = tr.findoffset(self._indexfile)
2016 if troffset is None:
2016 if troffset is None:
2017 raise error.RevlogError(
2017 raise error.RevlogError(
2018 _(b"%s not found in the transaction") % self._indexfile
2018 _(b"%s not found in the transaction") % self._indexfile
2019 )
2019 )
2020 trindex = 0
2020 trindex = 0
2021 tr.add(self._datafile, 0)
2021 tr.add(self._datafile, 0)
2022
2022
2023 existing_handles = False
2023 existing_handles = False
2024 if self._writinghandles is not None:
2024 if self._writinghandles is not None:
2025 existing_handles = True
2025 existing_handles = True
2026 fp = self._writinghandles[0]
2026 fp = self._writinghandles[0]
2027 fp.flush()
2027 fp.flush()
2028 fp.close()
2028 fp.close()
2029 # We can't use the cached file handle after close(). So prevent
2029 # We can't use the cached file handle after close(). So prevent
2030 # its usage.
2030 # its usage.
2031 self._writinghandles = None
2031 self._writinghandles = None
2032
2032
2033 new_dfh = self._datafp(b'w+')
2033 new_dfh = self._datafp(b'w+')
2034 new_dfh.truncate(0) # drop any potentially existing data
2034 new_dfh.truncate(0) # drop any potentially existing data
2035 try:
2035 try:
2036 with self._indexfp() as read_ifh:
2036 with self._indexfp() as read_ifh:
2037 for r in self:
2037 for r in self:
2038 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2038 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2039 if troffset <= self.start(r):
2039 if troffset <= self.start(r):
2040 trindex = r
2040 trindex = r
2041 new_dfh.flush()
2041 new_dfh.flush()
2042
2042
2043 with self.__index_new_fp() as fp:
2043 with self.__index_new_fp() as fp:
2044 self._format_flags &= ~FLAG_INLINE_DATA
2044 self._format_flags &= ~FLAG_INLINE_DATA
2045 self._inline = False
2045 self._inline = False
2046 for i in self:
2046 for i in self:
2047 e = self.index.entry_binary(i)
2047 e = self.index.entry_binary(i)
2048 if i == 0 and self._docket is None:
2048 if i == 0 and self._docket is None:
2049 header = self._format_flags | self._format_version
2049 header = self._format_flags | self._format_version
2050 header = self.index.pack_header(header)
2050 header = self.index.pack_header(header)
2051 e = header + e
2051 e = header + e
2052 fp.write(e)
2052 fp.write(e)
2053 if self._docket is not None:
2053 if self._docket is not None:
2054 self._docket.index_end = fp.tell()
2054 self._docket.index_end = fp.tell()
2055 # the temp file replace the real index when we exit the context
2055 # the temp file replace the real index when we exit the context
2056 # manager
2056 # manager
2057
2057
2058 tr.replace(self._indexfile, trindex * self.index.entry_size)
2058 tr.replace(self._indexfile, trindex * self.index.entry_size)
2059 nodemaputil.setup_persistent_nodemap(tr, self)
2059 nodemaputil.setup_persistent_nodemap(tr, self)
2060 self._chunkclear()
2060 self._chunkclear()
2061
2061
2062 if existing_handles:
2062 if existing_handles:
2063 # switched from inline to conventional reopen the index
2063 # switched from inline to conventional reopen the index
2064 ifh = self.__index_write_fp()
2064 ifh = self.__index_write_fp()
2065 self._writinghandles = (ifh, new_dfh)
2065 self._writinghandles = (ifh, new_dfh)
2066 new_dfh = None
2066 new_dfh = None
2067 finally:
2067 finally:
2068 if new_dfh is not None:
2068 if new_dfh is not None:
2069 new_dfh.close()
2069 new_dfh.close()
2070
2070
2071 def _nodeduplicatecallback(self, transaction, node):
2071 def _nodeduplicatecallback(self, transaction, node):
2072 """called when trying to add a node already stored."""
2072 """called when trying to add a node already stored."""
2073
2073
2074 @contextlib.contextmanager
2074 @contextlib.contextmanager
2075 def _writing(self, transaction):
2075 def _writing(self, transaction):
2076 if self._trypending:
2076 if self._trypending:
2077 msg = b'try to write in a `trypending` revlog: %s'
2077 msg = b'try to write in a `trypending` revlog: %s'
2078 msg %= self.display_id
2078 msg %= self.display_id
2079 raise error.ProgrammingError(msg)
2079 raise error.ProgrammingError(msg)
2080 if self._writinghandles is not None:
2080 if self._writinghandles is not None:
2081 yield
2081 yield
2082 else:
2082 else:
2083 r = len(self)
2083 r = len(self)
2084 dsize = 0
2084 dsize = 0
2085 if r:
2085 if r:
2086 dsize = self.end(r - 1)
2086 dsize = self.end(r - 1)
2087 dfh = None
2087 dfh = None
2088 if not self._inline:
2088 if not self._inline:
2089 try:
2089 try:
2090 dfh = self._datafp(b"r+")
2090 dfh = self._datafp(b"r+")
2091 if self._docket is None:
2091 if self._docket is None:
2092 dfh.seek(0, os.SEEK_END)
2092 dfh.seek(0, os.SEEK_END)
2093 else:
2093 else:
2094 dfh.seek(self._docket.data_end, os.SEEK_SET)
2094 dfh.seek(self._docket.data_end, os.SEEK_SET)
2095 except IOError as inst:
2095 except IOError as inst:
2096 if inst.errno != errno.ENOENT:
2096 if inst.errno != errno.ENOENT:
2097 raise
2097 raise
2098 dfh = self._datafp(b"w+")
2098 dfh = self._datafp(b"w+")
2099 transaction.add(self._datafile, dsize)
2099 transaction.add(self._datafile, dsize)
2100 try:
2100 try:
2101 isize = r * self.index.entry_size
2101 isize = r * self.index.entry_size
2102 ifh = self.__index_write_fp()
2102 ifh = self.__index_write_fp()
2103 if self._inline:
2103 if self._inline:
2104 transaction.add(self._indexfile, dsize + isize)
2104 transaction.add(self._indexfile, dsize + isize)
2105 else:
2105 else:
2106 transaction.add(self._indexfile, isize)
2106 transaction.add(self._indexfile, isize)
2107 try:
2107 try:
2108 self._writinghandles = (ifh, dfh)
2108 self._writinghandles = (ifh, dfh)
2109 try:
2109 try:
2110 yield
2110 yield
2111 if self._docket is not None:
2111 if self._docket is not None:
2112 self._write_docket(transaction)
2112 self._write_docket(transaction)
2113 finally:
2113 finally:
2114 self._writinghandles = None
2114 self._writinghandles = None
2115 finally:
2115 finally:
2116 ifh.close()
2116 ifh.close()
2117 finally:
2117 finally:
2118 if dfh is not None:
2118 if dfh is not None:
2119 dfh.close()
2119 dfh.close()
2120
2120
2121 def _write_docket(self, transaction):
2121 def _write_docket(self, transaction):
2122 """write the current docket on disk
2122 """write the current docket on disk
2123
2123
2124 Exist as a method to help changelog to implement transaction logic
2124 Exist as a method to help changelog to implement transaction logic
2125
2125
2126 We could also imagine using the same transaction logic for all revlog
2126 We could also imagine using the same transaction logic for all revlog
2127 since docket are cheap."""
2127 since docket are cheap."""
2128 self._docket.write(transaction)
2128 self._docket.write(transaction)
2129
2129
2130 def addrevision(
2130 def addrevision(
2131 self,
2131 self,
2132 text,
2132 text,
2133 transaction,
2133 transaction,
2134 link,
2134 link,
2135 p1,
2135 p1,
2136 p2,
2136 p2,
2137 cachedelta=None,
2137 cachedelta=None,
2138 node=None,
2138 node=None,
2139 flags=REVIDX_DEFAULT_FLAGS,
2139 flags=REVIDX_DEFAULT_FLAGS,
2140 deltacomputer=None,
2140 deltacomputer=None,
2141 sidedata=None,
2141 sidedata=None,
2142 ):
2142 ):
2143 """add a revision to the log
2143 """add a revision to the log
2144
2144
2145 text - the revision data to add
2145 text - the revision data to add
2146 transaction - the transaction object used for rollback
2146 transaction - the transaction object used for rollback
2147 link - the linkrev data to add
2147 link - the linkrev data to add
2148 p1, p2 - the parent nodeids of the revision
2148 p1, p2 - the parent nodeids of the revision
2149 cachedelta - an optional precomputed delta
2149 cachedelta - an optional precomputed delta
2150 node - nodeid of revision; typically node is not specified, and it is
2150 node - nodeid of revision; typically node is not specified, and it is
2151 computed by default as hash(text, p1, p2), however subclasses might
2151 computed by default as hash(text, p1, p2), however subclasses might
2152 use different hashing method (and override checkhash() in such case)
2152 use different hashing method (and override checkhash() in such case)
2153 flags - the known flags to set on the revision
2153 flags - the known flags to set on the revision
2154 deltacomputer - an optional deltacomputer instance shared between
2154 deltacomputer - an optional deltacomputer instance shared between
2155 multiple calls
2155 multiple calls
2156 """
2156 """
2157 if link == nullrev:
2157 if link == nullrev:
2158 raise error.RevlogError(
2158 raise error.RevlogError(
2159 _(b"attempted to add linkrev -1 to %s") % self.display_id
2159 _(b"attempted to add linkrev -1 to %s") % self.display_id
2160 )
2160 )
2161
2161
2162 if sidedata is None:
2162 if sidedata is None:
2163 sidedata = {}
2163 sidedata = {}
2164 elif sidedata and not self.hassidedata:
2164 elif sidedata and not self.hassidedata:
2165 raise error.ProgrammingError(
2165 raise error.ProgrammingError(
2166 _(b"trying to add sidedata to a revlog who don't support them")
2166 _(b"trying to add sidedata to a revlog who don't support them")
2167 )
2167 )
2168
2168
2169 if flags:
2169 if flags:
2170 node = node or self.hash(text, p1, p2)
2170 node = node or self.hash(text, p1, p2)
2171
2171
2172 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2172 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2173
2173
2174 # If the flag processor modifies the revision data, ignore any provided
2174 # If the flag processor modifies the revision data, ignore any provided
2175 # cachedelta.
2175 # cachedelta.
2176 if rawtext != text:
2176 if rawtext != text:
2177 cachedelta = None
2177 cachedelta = None
2178
2178
2179 if len(rawtext) > _maxentrysize:
2179 if len(rawtext) > _maxentrysize:
2180 raise error.RevlogError(
2180 raise error.RevlogError(
2181 _(
2181 _(
2182 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2182 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2183 )
2183 )
2184 % (self.display_id, len(rawtext))
2184 % (self.display_id, len(rawtext))
2185 )
2185 )
2186
2186
2187 node = node or self.hash(rawtext, p1, p2)
2187 node = node or self.hash(rawtext, p1, p2)
2188 rev = self.index.get_rev(node)
2188 rev = self.index.get_rev(node)
2189 if rev is not None:
2189 if rev is not None:
2190 return rev
2190 return rev
2191
2191
2192 if validatehash:
2192 if validatehash:
2193 self.checkhash(rawtext, node, p1=p1, p2=p2)
2193 self.checkhash(rawtext, node, p1=p1, p2=p2)
2194
2194
2195 return self.addrawrevision(
2195 return self.addrawrevision(
2196 rawtext,
2196 rawtext,
2197 transaction,
2197 transaction,
2198 link,
2198 link,
2199 p1,
2199 p1,
2200 p2,
2200 p2,
2201 node,
2201 node,
2202 flags,
2202 flags,
2203 cachedelta=cachedelta,
2203 cachedelta=cachedelta,
2204 deltacomputer=deltacomputer,
2204 deltacomputer=deltacomputer,
2205 sidedata=sidedata,
2205 sidedata=sidedata,
2206 )
2206 )
2207
2207
2208 def addrawrevision(
2208 def addrawrevision(
2209 self,
2209 self,
2210 rawtext,
2210 rawtext,
2211 transaction,
2211 transaction,
2212 link,
2212 link,
2213 p1,
2213 p1,
2214 p2,
2214 p2,
2215 node,
2215 node,
2216 flags,
2216 flags,
2217 cachedelta=None,
2217 cachedelta=None,
2218 deltacomputer=None,
2218 deltacomputer=None,
2219 sidedata=None,
2219 sidedata=None,
2220 ):
2220 ):
2221 """add a raw revision with known flags, node and parents
2221 """add a raw revision with known flags, node and parents
2222 useful when reusing a revision not stored in this revlog (ex: received
2222 useful when reusing a revision not stored in this revlog (ex: received
2223 over wire, or read from an external bundle).
2223 over wire, or read from an external bundle).
2224 """
2224 """
2225 with self._writing(transaction):
2225 with self._writing(transaction):
2226 return self._addrevision(
2226 return self._addrevision(
2227 node,
2227 node,
2228 rawtext,
2228 rawtext,
2229 transaction,
2229 transaction,
2230 link,
2230 link,
2231 p1,
2231 p1,
2232 p2,
2232 p2,
2233 flags,
2233 flags,
2234 cachedelta,
2234 cachedelta,
2235 deltacomputer=deltacomputer,
2235 deltacomputer=deltacomputer,
2236 sidedata=sidedata,
2236 sidedata=sidedata,
2237 )
2237 )
2238
2238
2239 def compress(self, data):
2239 def compress(self, data):
2240 """Generate a possibly-compressed representation of data."""
2240 """Generate a possibly-compressed representation of data."""
2241 if not data:
2241 if not data:
2242 return b'', data
2242 return b'', data
2243
2243
2244 compressed = self._compressor.compress(data)
2244 compressed = self._compressor.compress(data)
2245
2245
2246 if compressed:
2246 if compressed:
2247 # The revlog compressor added the header in the returned data.
2247 # The revlog compressor added the header in the returned data.
2248 return b'', compressed
2248 return b'', compressed
2249
2249
2250 if data[0:1] == b'\0':
2250 if data[0:1] == b'\0':
2251 return b'', data
2251 return b'', data
2252 return b'u', data
2252 return b'u', data
2253
2253
2254 def decompress(self, data):
2254 def decompress(self, data):
2255 """Decompress a revlog chunk.
2255 """Decompress a revlog chunk.
2256
2256
2257 The chunk is expected to begin with a header identifying the
2257 The chunk is expected to begin with a header identifying the
2258 format type so it can be routed to an appropriate decompressor.
2258 format type so it can be routed to an appropriate decompressor.
2259 """
2259 """
2260 if not data:
2260 if not data:
2261 return data
2261 return data
2262
2262
2263 # Revlogs are read much more frequently than they are written and many
2263 # Revlogs are read much more frequently than they are written and many
2264 # chunks only take microseconds to decompress, so performance is
2264 # chunks only take microseconds to decompress, so performance is
2265 # important here.
2265 # important here.
2266 #
2266 #
2267 # We can make a few assumptions about revlogs:
2267 # We can make a few assumptions about revlogs:
2268 #
2268 #
2269 # 1) the majority of chunks will be compressed (as opposed to inline
2269 # 1) the majority of chunks will be compressed (as opposed to inline
2270 # raw data).
2270 # raw data).
2271 # 2) decompressing *any* data will likely by at least 10x slower than
2271 # 2) decompressing *any* data will likely by at least 10x slower than
2272 # returning raw inline data.
2272 # returning raw inline data.
2273 # 3) we want to prioritize common and officially supported compression
2273 # 3) we want to prioritize common and officially supported compression
2274 # engines
2274 # engines
2275 #
2275 #
2276 # It follows that we want to optimize for "decompress compressed data
2276 # It follows that we want to optimize for "decompress compressed data
2277 # when encoded with common and officially supported compression engines"
2277 # when encoded with common and officially supported compression engines"
2278 # case over "raw data" and "data encoded by less common or non-official
2278 # case over "raw data" and "data encoded by less common or non-official
2279 # compression engines." That is why we have the inline lookup first
2279 # compression engines." That is why we have the inline lookup first
2280 # followed by the compengines lookup.
2280 # followed by the compengines lookup.
2281 #
2281 #
2282 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2282 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2283 # compressed chunks. And this matters for changelog and manifest reads.
2283 # compressed chunks. And this matters for changelog and manifest reads.
2284 t = data[0:1]
2284 t = data[0:1]
2285
2285
2286 if t == b'x':
2286 if t == b'x':
2287 try:
2287 try:
2288 return _zlibdecompress(data)
2288 return _zlibdecompress(data)
2289 except zlib.error as e:
2289 except zlib.error as e:
2290 raise error.RevlogError(
2290 raise error.RevlogError(
2291 _(b'revlog decompress error: %s')
2291 _(b'revlog decompress error: %s')
2292 % stringutil.forcebytestr(e)
2292 % stringutil.forcebytestr(e)
2293 )
2293 )
2294 # '\0' is more common than 'u' so it goes first.
2294 # '\0' is more common than 'u' so it goes first.
2295 elif t == b'\0':
2295 elif t == b'\0':
2296 return data
2296 return data
2297 elif t == b'u':
2297 elif t == b'u':
2298 return util.buffer(data, 1)
2298 return util.buffer(data, 1)
2299
2299
2300 try:
2300 try:
2301 compressor = self._decompressors[t]
2301 compressor = self._decompressors[t]
2302 except KeyError:
2302 except KeyError:
2303 try:
2303 try:
2304 engine = util.compengines.forrevlogheader(t)
2304 engine = util.compengines.forrevlogheader(t)
2305 compressor = engine.revlogcompressor(self._compengineopts)
2305 compressor = engine.revlogcompressor(self._compengineopts)
2306 self._decompressors[t] = compressor
2306 self._decompressors[t] = compressor
2307 except KeyError:
2307 except KeyError:
2308 raise error.RevlogError(
2308 raise error.RevlogError(
2309 _(b'unknown compression type %s') % binascii.hexlify(t)
2309 _(b'unknown compression type %s') % binascii.hexlify(t)
2310 )
2310 )
2311
2311
2312 return compressor.decompress(data)
2312 return compressor.decompress(data)
2313
2313
2314 def _addrevision(
2314 def _addrevision(
2315 self,
2315 self,
2316 node,
2316 node,
2317 rawtext,
2317 rawtext,
2318 transaction,
2318 transaction,
2319 link,
2319 link,
2320 p1,
2320 p1,
2321 p2,
2321 p2,
2322 flags,
2322 flags,
2323 cachedelta,
2323 cachedelta,
2324 alwayscache=False,
2324 alwayscache=False,
2325 deltacomputer=None,
2325 deltacomputer=None,
2326 sidedata=None,
2326 sidedata=None,
2327 ):
2327 ):
2328 """internal function to add revisions to the log
2328 """internal function to add revisions to the log
2329
2329
2330 see addrevision for argument descriptions.
2330 see addrevision for argument descriptions.
2331
2331
2332 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2332 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2333
2333
2334 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2334 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2335 be used.
2335 be used.
2336
2336
2337 invariants:
2337 invariants:
2338 - rawtext is optional (can be None); if not set, cachedelta must be set.
2338 - rawtext is optional (can be None); if not set, cachedelta must be set.
2339 if both are set, they must correspond to each other.
2339 if both are set, they must correspond to each other.
2340 """
2340 """
2341 if node == self.nullid:
2341 if node == self.nullid:
2342 raise error.RevlogError(
2342 raise error.RevlogError(
2343 _(b"%s: attempt to add null revision") % self.display_id
2343 _(b"%s: attempt to add null revision") % self.display_id
2344 )
2344 )
2345 if (
2345 if (
2346 node == self.nodeconstants.wdirid
2346 node == self.nodeconstants.wdirid
2347 or node in self.nodeconstants.wdirfilenodeids
2347 or node in self.nodeconstants.wdirfilenodeids
2348 ):
2348 ):
2349 raise error.RevlogError(
2349 raise error.RevlogError(
2350 _(b"%s: attempt to add wdir revision") % self.display_id
2350 _(b"%s: attempt to add wdir revision") % self.display_id
2351 )
2351 )
2352 if self._writinghandles is None:
2352 if self._writinghandles is None:
2353 msg = b'adding revision outside `revlog._writing` context'
2353 msg = b'adding revision outside `revlog._writing` context'
2354 raise error.ProgrammingError(msg)
2354 raise error.ProgrammingError(msg)
2355
2355
2356 if self._inline:
2356 if self._inline:
2357 fh = self._writinghandles[0]
2357 fh = self._writinghandles[0]
2358 else:
2358 else:
2359 fh = self._writinghandles[1]
2359 fh = self._writinghandles[1]
2360
2360
2361 btext = [rawtext]
2361 btext = [rawtext]
2362
2362
2363 curr = len(self)
2363 curr = len(self)
2364 prev = curr - 1
2364 prev = curr - 1
2365
2365
2366 offset = self._get_data_offset(prev)
2366 offset = self._get_data_offset(prev)
2367
2367
2368 if self._concurrencychecker:
2368 if self._concurrencychecker:
2369 ifh, dfh = self._writinghandles
2369 ifh, dfh = self._writinghandles
2370 if self._inline:
2370 if self._inline:
2371 # offset is "as if" it were in the .d file, so we need to add on
2371 # offset is "as if" it were in the .d file, so we need to add on
2372 # the size of the entry metadata.
2372 # the size of the entry metadata.
2373 self._concurrencychecker(
2373 self._concurrencychecker(
2374 ifh, self._indexfile, offset + curr * self.index.entry_size
2374 ifh, self._indexfile, offset + curr * self.index.entry_size
2375 )
2375 )
2376 else:
2376 else:
2377 # Entries in the .i are a consistent size.
2377 # Entries in the .i are a consistent size.
2378 self._concurrencychecker(
2378 self._concurrencychecker(
2379 ifh, self._indexfile, curr * self.index.entry_size
2379 ifh, self._indexfile, curr * self.index.entry_size
2380 )
2380 )
2381 self._concurrencychecker(dfh, self._datafile, offset)
2381 self._concurrencychecker(dfh, self._datafile, offset)
2382
2382
2383 p1r, p2r = self.rev(p1), self.rev(p2)
2383 p1r, p2r = self.rev(p1), self.rev(p2)
2384
2384
2385 # full versions are inserted when the needed deltas
2385 # full versions are inserted when the needed deltas
2386 # become comparable to the uncompressed text
2386 # become comparable to the uncompressed text
2387 if rawtext is None:
2387 if rawtext is None:
2388 # need rawtext size, before changed by flag processors, which is
2388 # need rawtext size, before changed by flag processors, which is
2389 # the non-raw size. use revlog explicitly to avoid filelog's extra
2389 # the non-raw size. use revlog explicitly to avoid filelog's extra
2390 # logic that might remove metadata size.
2390 # logic that might remove metadata size.
2391 textlen = mdiff.patchedsize(
2391 textlen = mdiff.patchedsize(
2392 revlog.size(self, cachedelta[0]), cachedelta[1]
2392 revlog.size(self, cachedelta[0]), cachedelta[1]
2393 )
2393 )
2394 else:
2394 else:
2395 textlen = len(rawtext)
2395 textlen = len(rawtext)
2396
2396
2397 if deltacomputer is None:
2397 if deltacomputer is None:
2398 deltacomputer = deltautil.deltacomputer(self)
2398 deltacomputer = deltautil.deltacomputer(self)
2399
2399
2400 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2400 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2401
2401
2402 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2402 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2403
2403
2404 if sidedata and self.hassidedata:
2404 if sidedata and self.hassidedata:
2405 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2405 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2406 sidedata_offset = offset + deltainfo.deltalen
2406 sidedata_offset = offset + deltainfo.deltalen
2407 else:
2407 else:
2408 serialized_sidedata = b""
2408 serialized_sidedata = b""
2409 # Don't store the offset if the sidedata is empty, that way
2409 # Don't store the offset if the sidedata is empty, that way
2410 # we can easily detect empty sidedata and they will be no different
2410 # we can easily detect empty sidedata and they will be no different
2411 # than ones we manually add.
2411 # than ones we manually add.
2412 sidedata_offset = 0
2412 sidedata_offset = 0
2413
2413
2414 e = (
2414 e = (
2415 offset_type(offset, flags),
2415 offset_type(offset, flags),
2416 deltainfo.deltalen,
2416 deltainfo.deltalen,
2417 textlen,
2417 textlen,
2418 deltainfo.base,
2418 deltainfo.base,
2419 link,
2419 link,
2420 p1r,
2420 p1r,
2421 p2r,
2421 p2r,
2422 node,
2422 node,
2423 sidedata_offset,
2423 sidedata_offset,
2424 len(serialized_sidedata),
2424 len(serialized_sidedata),
2425 )
2425 )
2426
2426
2427 self.index.append(e)
2427 self.index.append(e)
2428 entry = self.index.entry_binary(curr)
2428 entry = self.index.entry_binary(curr)
2429 if curr == 0 and self._docket is None:
2429 if curr == 0 and self._docket is None:
2430 header = self._format_flags | self._format_version
2430 header = self._format_flags | self._format_version
2431 header = self.index.pack_header(header)
2431 header = self.index.pack_header(header)
2432 entry = header + entry
2432 entry = header + entry
2433 self._writeentry(
2433 self._writeentry(
2434 transaction,
2434 transaction,
2435 entry,
2435 entry,
2436 deltainfo.data,
2436 deltainfo.data,
2437 link,
2437 link,
2438 offset,
2438 offset,
2439 serialized_sidedata,
2439 serialized_sidedata,
2440 )
2440 )
2441
2441
2442 rawtext = btext[0]
2442 rawtext = btext[0]
2443
2443
2444 if alwayscache and rawtext is None:
2444 if alwayscache and rawtext is None:
2445 rawtext = deltacomputer.buildtext(revinfo, fh)
2445 rawtext = deltacomputer.buildtext(revinfo, fh)
2446
2446
2447 if type(rawtext) == bytes: # only accept immutable objects
2447 if type(rawtext) == bytes: # only accept immutable objects
2448 self._revisioncache = (node, curr, rawtext)
2448 self._revisioncache = (node, curr, rawtext)
2449 self._chainbasecache[curr] = deltainfo.chainbase
2449 self._chainbasecache[curr] = deltainfo.chainbase
2450 return curr
2450 return curr
2451
2451
2452 def _get_data_offset(self, prev):
2452 def _get_data_offset(self, prev):
2453 """Returns the current offset in the (in-transaction) data file.
2453 """Returns the current offset in the (in-transaction) data file.
2454 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2454 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2455 file to store that information: since sidedata can be rewritten to the
2455 file to store that information: since sidedata can be rewritten to the
2456 end of the data file within a transaction, you can have cases where, for
2456 end of the data file within a transaction, you can have cases where, for
2457 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2457 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2458 to `n - 1`'s sidedata being written after `n`'s data.
2458 to `n - 1`'s sidedata being written after `n`'s data.
2459
2459
2460 TODO cache this in a docket file before getting out of experimental."""
2460 TODO cache this in a docket file before getting out of experimental."""
2461 if self._docket is None:
2461 if self._docket is None:
2462 return self.end(prev)
2462 return self.end(prev)
2463 else:
2463 else:
2464 return self._docket.data_end
2464 return self._docket.data_end
2465
2465
2466 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2466 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2467 # Files opened in a+ mode have inconsistent behavior on various
2467 # Files opened in a+ mode have inconsistent behavior on various
2468 # platforms. Windows requires that a file positioning call be made
2468 # platforms. Windows requires that a file positioning call be made
2469 # when the file handle transitions between reads and writes. See
2469 # when the file handle transitions between reads and writes. See
2470 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2470 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2471 # platforms, Python or the platform itself can be buggy. Some versions
2471 # platforms, Python or the platform itself can be buggy. Some versions
2472 # of Solaris have been observed to not append at the end of the file
2472 # of Solaris have been observed to not append at the end of the file
2473 # if the file was seeked to before the end. See issue4943 for more.
2473 # if the file was seeked to before the end. See issue4943 for more.
2474 #
2474 #
2475 # We work around this issue by inserting a seek() before writing.
2475 # We work around this issue by inserting a seek() before writing.
2476 # Note: This is likely not necessary on Python 3. However, because
2476 # Note: This is likely not necessary on Python 3. However, because
2477 # the file handle is reused for reads and may be seeked there, we need
2477 # the file handle is reused for reads and may be seeked there, we need
2478 # to be careful before changing this.
2478 # to be careful before changing this.
2479 if self._writinghandles is None:
2479 if self._writinghandles is None:
2480 msg = b'adding revision outside `revlog._writing` context'
2480 msg = b'adding revision outside `revlog._writing` context'
2481 raise error.ProgrammingError(msg)
2481 raise error.ProgrammingError(msg)
2482 ifh, dfh = self._writinghandles
2482 ifh, dfh = self._writinghandles
2483 if self._docket is None:
2483 if self._docket is None:
2484 ifh.seek(0, os.SEEK_END)
2484 ifh.seek(0, os.SEEK_END)
2485 else:
2485 else:
2486 ifh.seek(self._docket.index_end, os.SEEK_SET)
2486 ifh.seek(self._docket.index_end, os.SEEK_SET)
2487 if dfh:
2487 if dfh:
2488 if self._docket is None:
2488 if self._docket is None:
2489 dfh.seek(0, os.SEEK_END)
2489 dfh.seek(0, os.SEEK_END)
2490 else:
2490 else:
2491 dfh.seek(self._docket.data_end, os.SEEK_SET)
2491 dfh.seek(self._docket.data_end, os.SEEK_SET)
2492
2492
2493 curr = len(self) - 1
2493 curr = len(self) - 1
2494 if not self._inline:
2494 if not self._inline:
2495 transaction.add(self._datafile, offset)
2495 transaction.add(self._datafile, offset)
2496 transaction.add(self._indexfile, curr * len(entry))
2496 transaction.add(self._indexfile, curr * len(entry))
2497 if data[0]:
2497 if data[0]:
2498 dfh.write(data[0])
2498 dfh.write(data[0])
2499 dfh.write(data[1])
2499 dfh.write(data[1])
2500 if sidedata:
2500 if sidedata:
2501 dfh.write(sidedata)
2501 dfh.write(sidedata)
2502 ifh.write(entry)
2502 ifh.write(entry)
2503 else:
2503 else:
2504 offset += curr * self.index.entry_size
2504 offset += curr * self.index.entry_size
2505 transaction.add(self._indexfile, offset)
2505 transaction.add(self._indexfile, offset)
2506 ifh.write(entry)
2506 ifh.write(entry)
2507 ifh.write(data[0])
2507 ifh.write(data[0])
2508 ifh.write(data[1])
2508 ifh.write(data[1])
2509 if sidedata:
2509 if sidedata:
2510 ifh.write(sidedata)
2510 ifh.write(sidedata)
2511 self._enforceinlinesize(transaction)
2511 self._enforceinlinesize(transaction)
2512 if self._docket is not None:
2512 if self._docket is not None:
2513 self._docket.index_end = self._writinghandles[0].tell()
2513 self._docket.index_end = self._writinghandles[0].tell()
2514 self._docket.data_end = self._writinghandles[1].tell()
2514 self._docket.data_end = self._writinghandles[1].tell()
2515
2515
2516 nodemaputil.setup_persistent_nodemap(transaction, self)
2516 nodemaputil.setup_persistent_nodemap(transaction, self)
2517
2517
2518 def addgroup(
2518 def addgroup(
2519 self,
2519 self,
2520 deltas,
2520 deltas,
2521 linkmapper,
2521 linkmapper,
2522 transaction,
2522 transaction,
2523 alwayscache=False,
2523 alwayscache=False,
2524 addrevisioncb=None,
2524 addrevisioncb=None,
2525 duplicaterevisioncb=None,
2525 duplicaterevisioncb=None,
2526 ):
2526 ):
2527 """
2527 """
2528 add a delta group
2528 add a delta group
2529
2529
2530 given a set of deltas, add them to the revision log. the
2530 given a set of deltas, add them to the revision log. the
2531 first delta is against its parent, which should be in our
2531 first delta is against its parent, which should be in our
2532 log, the rest are against the previous delta.
2532 log, the rest are against the previous delta.
2533
2533
2534 If ``addrevisioncb`` is defined, it will be called with arguments of
2534 If ``addrevisioncb`` is defined, it will be called with arguments of
2535 this revlog and the node that was added.
2535 this revlog and the node that was added.
2536 """
2536 """
2537
2537
2538 if self._adding_group:
2538 if self._adding_group:
2539 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2539 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2540
2540
2541 self._adding_group = True
2541 self._adding_group = True
2542 empty = True
2542 empty = True
2543 try:
2543 try:
2544 with self._writing(transaction):
2544 with self._writing(transaction):
2545 deltacomputer = deltautil.deltacomputer(self)
2545 deltacomputer = deltautil.deltacomputer(self)
2546 # loop through our set of deltas
2546 # loop through our set of deltas
2547 for data in deltas:
2547 for data in deltas:
2548 (
2548 (
2549 node,
2549 node,
2550 p1,
2550 p1,
2551 p2,
2551 p2,
2552 linknode,
2552 linknode,
2553 deltabase,
2553 deltabase,
2554 delta,
2554 delta,
2555 flags,
2555 flags,
2556 sidedata,
2556 sidedata,
2557 ) = data
2557 ) = data
2558 link = linkmapper(linknode)
2558 link = linkmapper(linknode)
2559 flags = flags or REVIDX_DEFAULT_FLAGS
2559 flags = flags or REVIDX_DEFAULT_FLAGS
2560
2560
2561 rev = self.index.get_rev(node)
2561 rev = self.index.get_rev(node)
2562 if rev is not None:
2562 if rev is not None:
2563 # this can happen if two branches make the same change
2563 # this can happen if two branches make the same change
2564 self._nodeduplicatecallback(transaction, rev)
2564 self._nodeduplicatecallback(transaction, rev)
2565 if duplicaterevisioncb:
2565 if duplicaterevisioncb:
2566 duplicaterevisioncb(self, rev)
2566 duplicaterevisioncb(self, rev)
2567 empty = False
2567 empty = False
2568 continue
2568 continue
2569
2569
2570 for p in (p1, p2):
2570 for p in (p1, p2):
2571 if not self.index.has_node(p):
2571 if not self.index.has_node(p):
2572 raise error.LookupError(
2572 raise error.LookupError(
2573 p, self.radix, _(b'unknown parent')
2573 p, self.radix, _(b'unknown parent')
2574 )
2574 )
2575
2575
2576 if not self.index.has_node(deltabase):
2576 if not self.index.has_node(deltabase):
2577 raise error.LookupError(
2577 raise error.LookupError(
2578 deltabase, self.display_id, _(b'unknown delta base')
2578 deltabase, self.display_id, _(b'unknown delta base')
2579 )
2579 )
2580
2580
2581 baserev = self.rev(deltabase)
2581 baserev = self.rev(deltabase)
2582
2582
2583 if baserev != nullrev and self.iscensored(baserev):
2583 if baserev != nullrev and self.iscensored(baserev):
2584 # if base is censored, delta must be full replacement in a
2584 # if base is censored, delta must be full replacement in a
2585 # single patch operation
2585 # single patch operation
2586 hlen = struct.calcsize(b">lll")
2586 hlen = struct.calcsize(b">lll")
2587 oldlen = self.rawsize(baserev)
2587 oldlen = self.rawsize(baserev)
2588 newlen = len(delta) - hlen
2588 newlen = len(delta) - hlen
2589 if delta[:hlen] != mdiff.replacediffheader(
2589 if delta[:hlen] != mdiff.replacediffheader(
2590 oldlen, newlen
2590 oldlen, newlen
2591 ):
2591 ):
2592 raise error.CensoredBaseError(
2592 raise error.CensoredBaseError(
2593 self.display_id, self.node(baserev)
2593 self.display_id, self.node(baserev)
2594 )
2594 )
2595
2595
2596 if not flags and self._peek_iscensored(baserev, delta):
2596 if not flags and self._peek_iscensored(baserev, delta):
2597 flags |= REVIDX_ISCENSORED
2597 flags |= REVIDX_ISCENSORED
2598
2598
2599 # We assume consumers of addrevisioncb will want to retrieve
2599 # We assume consumers of addrevisioncb will want to retrieve
2600 # the added revision, which will require a call to
2600 # the added revision, which will require a call to
2601 # revision(). revision() will fast path if there is a cache
2601 # revision(). revision() will fast path if there is a cache
2602 # hit. So, we tell _addrevision() to always cache in this case.
2602 # hit. So, we tell _addrevision() to always cache in this case.
2603 # We're only using addgroup() in the context of changegroup
2603 # We're only using addgroup() in the context of changegroup
2604 # generation so the revision data can always be handled as raw
2604 # generation so the revision data can always be handled as raw
2605 # by the flagprocessor.
2605 # by the flagprocessor.
2606 rev = self._addrevision(
2606 rev = self._addrevision(
2607 node,
2607 node,
2608 None,
2608 None,
2609 transaction,
2609 transaction,
2610 link,
2610 link,
2611 p1,
2611 p1,
2612 p2,
2612 p2,
2613 flags,
2613 flags,
2614 (baserev, delta),
2614 (baserev, delta),
2615 alwayscache=alwayscache,
2615 alwayscache=alwayscache,
2616 deltacomputer=deltacomputer,
2616 deltacomputer=deltacomputer,
2617 sidedata=sidedata,
2617 sidedata=sidedata,
2618 )
2618 )
2619
2619
2620 if addrevisioncb:
2620 if addrevisioncb:
2621 addrevisioncb(self, rev)
2621 addrevisioncb(self, rev)
2622 empty = False
2622 empty = False
2623 finally:
2623 finally:
2624 self._adding_group = False
2624 self._adding_group = False
2625 return not empty
2625 return not empty
2626
2626
2627 def iscensored(self, rev):
2627 def iscensored(self, rev):
2628 """Check if a file revision is censored."""
2628 """Check if a file revision is censored."""
2629 if not self._censorable:
2629 if not self._censorable:
2630 return False
2630 return False
2631
2631
2632 return self.flags(rev) & REVIDX_ISCENSORED
2632 return self.flags(rev) & REVIDX_ISCENSORED
2633
2633
2634 def _peek_iscensored(self, baserev, delta):
2634 def _peek_iscensored(self, baserev, delta):
2635 """Quickly check if a delta produces a censored revision."""
2635 """Quickly check if a delta produces a censored revision."""
2636 if not self._censorable:
2636 if not self._censorable:
2637 return False
2637 return False
2638
2638
2639 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2639 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2640
2640
2641 def getstrippoint(self, minlink):
2641 def getstrippoint(self, minlink):
2642 """find the minimum rev that must be stripped to strip the linkrev
2642 """find the minimum rev that must be stripped to strip the linkrev
2643
2643
2644 Returns a tuple containing the minimum rev and a set of all revs that
2644 Returns a tuple containing the minimum rev and a set of all revs that
2645 have linkrevs that will be broken by this strip.
2645 have linkrevs that will be broken by this strip.
2646 """
2646 """
2647 return storageutil.resolvestripinfo(
2647 return storageutil.resolvestripinfo(
2648 minlink,
2648 minlink,
2649 len(self) - 1,
2649 len(self) - 1,
2650 self.headrevs(),
2650 self.headrevs(),
2651 self.linkrev,
2651 self.linkrev,
2652 self.parentrevs,
2652 self.parentrevs,
2653 )
2653 )
2654
2654
2655 def strip(self, minlink, transaction):
2655 def strip(self, minlink, transaction):
2656 """truncate the revlog on the first revision with a linkrev >= minlink
2656 """truncate the revlog on the first revision with a linkrev >= minlink
2657
2657
2658 This function is called when we're stripping revision minlink and
2658 This function is called when we're stripping revision minlink and
2659 its descendants from the repository.
2659 its descendants from the repository.
2660
2660
2661 We have to remove all revisions with linkrev >= minlink, because
2661 We have to remove all revisions with linkrev >= minlink, because
2662 the equivalent changelog revisions will be renumbered after the
2662 the equivalent changelog revisions will be renumbered after the
2663 strip.
2663 strip.
2664
2664
2665 So we truncate the revlog on the first of these revisions, and
2665 So we truncate the revlog on the first of these revisions, and
2666 trust that the caller has saved the revisions that shouldn't be
2666 trust that the caller has saved the revisions that shouldn't be
2667 removed and that it'll re-add them after this truncation.
2667 removed and that it'll re-add them after this truncation.
2668 """
2668 """
2669 if len(self) == 0:
2669 if len(self) == 0:
2670 return
2670 return
2671
2671
2672 rev, _ = self.getstrippoint(minlink)
2672 rev, _ = self.getstrippoint(minlink)
2673 if rev == len(self):
2673 if rev == len(self):
2674 return
2674 return
2675
2675
2676 # first truncate the files on disk
2676 # first truncate the files on disk
2677 data_end = self.start(rev)
2677 data_end = self.start(rev)
2678 if not self._inline:
2678 if not self._inline:
2679 transaction.add(self._datafile, data_end)
2679 transaction.add(self._datafile, data_end)
2680 end = rev * self.index.entry_size
2680 end = rev * self.index.entry_size
2681 else:
2681 else:
2682 end = data_end + (rev * self.index.entry_size)
2682 end = data_end + (rev * self.index.entry_size)
2683
2683
2684 transaction.add(self._indexfile, end)
2684 transaction.add(self._indexfile, end)
2685 if self._docket is not None:
2685 if self._docket is not None:
2686 # XXX we could, leverage the docket while stripping. However it is
2686 # XXX we could, leverage the docket while stripping. However it is
2687 # not powerfull enough at the time of this comment
2687 # not powerfull enough at the time of this comment
2688 self._docket.index_end = end
2688 self._docket.index_end = end
2689 self._docket.data_end = data_end
2689 self._docket.data_end = data_end
2690 self._docket.write(transaction, stripping=True)
2690 self._docket.write(transaction, stripping=True)
2691
2691
2692 # then reset internal state in memory to forget those revisions
2692 # then reset internal state in memory to forget those revisions
2693 self._revisioncache = None
2693 self._revisioncache = None
2694 self._chaininfocache = util.lrucachedict(500)
2694 self._chaininfocache = util.lrucachedict(500)
2695 self._chunkclear()
2695 self._chunkclear()
2696
2696
2697 del self.index[rev:-1]
2697 del self.index[rev:-1]
2698
2698
2699 def checksize(self):
2699 def checksize(self):
2700 """Check size of index and data files
2700 """Check size of index and data files
2701
2701
2702 return a (dd, di) tuple.
2702 return a (dd, di) tuple.
2703 - dd: extra bytes for the "data" file
2703 - dd: extra bytes for the "data" file
2704 - di: extra bytes for the "index" file
2704 - di: extra bytes for the "index" file
2705
2705
2706 A healthy revlog will return (0, 0).
2706 A healthy revlog will return (0, 0).
2707 """
2707 """
2708 expected = 0
2708 expected = 0
2709 if len(self):
2709 if len(self):
2710 expected = max(0, self.end(len(self) - 1))
2710 expected = max(0, self.end(len(self) - 1))
2711
2711
2712 try:
2712 try:
2713 with self._datafp() as f:
2713 with self._datafp() as f:
2714 f.seek(0, io.SEEK_END)
2714 f.seek(0, io.SEEK_END)
2715 actual = f.tell()
2715 actual = f.tell()
2716 dd = actual - expected
2716 dd = actual - expected
2717 except IOError as inst:
2717 except IOError as inst:
2718 if inst.errno != errno.ENOENT:
2718 if inst.errno != errno.ENOENT:
2719 raise
2719 raise
2720 dd = 0
2720 dd = 0
2721
2721
2722 try:
2722 try:
2723 f = self.opener(self._indexfile)
2723 f = self.opener(self._indexfile)
2724 f.seek(0, io.SEEK_END)
2724 f.seek(0, io.SEEK_END)
2725 actual = f.tell()
2725 actual = f.tell()
2726 f.close()
2726 f.close()
2727 s = self.index.entry_size
2727 s = self.index.entry_size
2728 i = max(0, actual // s)
2728 i = max(0, actual // s)
2729 di = actual - (i * s)
2729 di = actual - (i * s)
2730 if self._inline:
2730 if self._inline:
2731 databytes = 0
2731 databytes = 0
2732 for r in self:
2732 for r in self:
2733 databytes += max(0, self.length(r))
2733 databytes += max(0, self.length(r))
2734 dd = 0
2734 dd = 0
2735 di = actual - len(self) * s - databytes
2735 di = actual - len(self) * s - databytes
2736 except IOError as inst:
2736 except IOError as inst:
2737 if inst.errno != errno.ENOENT:
2737 if inst.errno != errno.ENOENT:
2738 raise
2738 raise
2739 di = 0
2739 di = 0
2740
2740
2741 return (dd, di)
2741 return (dd, di)
2742
2742
2743 def files(self):
2743 def files(self):
2744 res = [self._indexfile]
2744 res = [self._indexfile]
2745 if not self._inline:
2745 if not self._inline:
2746 res.append(self._datafile)
2746 res.append(self._datafile)
2747 return res
2747 return res
2748
2748
2749 def emitrevisions(
2749 def emitrevisions(
2750 self,
2750 self,
2751 nodes,
2751 nodes,
2752 nodesorder=None,
2752 nodesorder=None,
2753 revisiondata=False,
2753 revisiondata=False,
2754 assumehaveparentrevisions=False,
2754 assumehaveparentrevisions=False,
2755 deltamode=repository.CG_DELTAMODE_STD,
2755 deltamode=repository.CG_DELTAMODE_STD,
2756 sidedata_helpers=None,
2756 sidedata_helpers=None,
2757 ):
2757 ):
2758 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2758 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2759 raise error.ProgrammingError(
2759 raise error.ProgrammingError(
2760 b'unhandled value for nodesorder: %s' % nodesorder
2760 b'unhandled value for nodesorder: %s' % nodesorder
2761 )
2761 )
2762
2762
2763 if nodesorder is None and not self._generaldelta:
2763 if nodesorder is None and not self._generaldelta:
2764 nodesorder = b'storage'
2764 nodesorder = b'storage'
2765
2765
2766 if (
2766 if (
2767 not self._storedeltachains
2767 not self._storedeltachains
2768 and deltamode != repository.CG_DELTAMODE_PREV
2768 and deltamode != repository.CG_DELTAMODE_PREV
2769 ):
2769 ):
2770 deltamode = repository.CG_DELTAMODE_FULL
2770 deltamode = repository.CG_DELTAMODE_FULL
2771
2771
2772 return storageutil.emitrevisions(
2772 return storageutil.emitrevisions(
2773 self,
2773 self,
2774 nodes,
2774 nodes,
2775 nodesorder,
2775 nodesorder,
2776 revlogrevisiondelta,
2776 revlogrevisiondelta,
2777 deltaparentfn=self.deltaparent,
2777 deltaparentfn=self.deltaparent,
2778 candeltafn=self.candelta,
2778 candeltafn=self.candelta,
2779 rawsizefn=self.rawsize,
2779 rawsizefn=self.rawsize,
2780 revdifffn=self.revdiff,
2780 revdifffn=self.revdiff,
2781 flagsfn=self.flags,
2781 flagsfn=self.flags,
2782 deltamode=deltamode,
2782 deltamode=deltamode,
2783 revisiondata=revisiondata,
2783 revisiondata=revisiondata,
2784 assumehaveparentrevisions=assumehaveparentrevisions,
2784 assumehaveparentrevisions=assumehaveparentrevisions,
2785 sidedata_helpers=sidedata_helpers,
2785 sidedata_helpers=sidedata_helpers,
2786 )
2786 )
2787
2787
2788 DELTAREUSEALWAYS = b'always'
2788 DELTAREUSEALWAYS = b'always'
2789 DELTAREUSESAMEREVS = b'samerevs'
2789 DELTAREUSESAMEREVS = b'samerevs'
2790 DELTAREUSENEVER = b'never'
2790 DELTAREUSENEVER = b'never'
2791
2791
2792 DELTAREUSEFULLADD = b'fulladd'
2792 DELTAREUSEFULLADD = b'fulladd'
2793
2793
2794 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2794 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2795
2795
2796 def clone(
2796 def clone(
2797 self,
2797 self,
2798 tr,
2798 tr,
2799 destrevlog,
2799 destrevlog,
2800 addrevisioncb=None,
2800 addrevisioncb=None,
2801 deltareuse=DELTAREUSESAMEREVS,
2801 deltareuse=DELTAREUSESAMEREVS,
2802 forcedeltabothparents=None,
2802 forcedeltabothparents=None,
2803 sidedata_helpers=None,
2803 sidedata_helpers=None,
2804 ):
2804 ):
2805 """Copy this revlog to another, possibly with format changes.
2805 """Copy this revlog to another, possibly with format changes.
2806
2806
2807 The destination revlog will contain the same revisions and nodes.
2807 The destination revlog will contain the same revisions and nodes.
2808 However, it may not be bit-for-bit identical due to e.g. delta encoding
2808 However, it may not be bit-for-bit identical due to e.g. delta encoding
2809 differences.
2809 differences.
2810
2810
2811 The ``deltareuse`` argument control how deltas from the existing revlog
2811 The ``deltareuse`` argument control how deltas from the existing revlog
2812 are preserved in the destination revlog. The argument can have the
2812 are preserved in the destination revlog. The argument can have the
2813 following values:
2813 following values:
2814
2814
2815 DELTAREUSEALWAYS
2815 DELTAREUSEALWAYS
2816 Deltas will always be reused (if possible), even if the destination
2816 Deltas will always be reused (if possible), even if the destination
2817 revlog would not select the same revisions for the delta. This is the
2817 revlog would not select the same revisions for the delta. This is the
2818 fastest mode of operation.
2818 fastest mode of operation.
2819 DELTAREUSESAMEREVS
2819 DELTAREUSESAMEREVS
2820 Deltas will be reused if the destination revlog would pick the same
2820 Deltas will be reused if the destination revlog would pick the same
2821 revisions for the delta. This mode strikes a balance between speed
2821 revisions for the delta. This mode strikes a balance between speed
2822 and optimization.
2822 and optimization.
2823 DELTAREUSENEVER
2823 DELTAREUSENEVER
2824 Deltas will never be reused. This is the slowest mode of execution.
2824 Deltas will never be reused. This is the slowest mode of execution.
2825 This mode can be used to recompute deltas (e.g. if the diff/delta
2825 This mode can be used to recompute deltas (e.g. if the diff/delta
2826 algorithm changes).
2826 algorithm changes).
2827 DELTAREUSEFULLADD
2827 DELTAREUSEFULLADD
2828 Revision will be re-added as if their were new content. This is
2828 Revision will be re-added as if their were new content. This is
2829 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2829 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2830 eg: large file detection and handling.
2830 eg: large file detection and handling.
2831
2831
2832 Delta computation can be slow, so the choice of delta reuse policy can
2832 Delta computation can be slow, so the choice of delta reuse policy can
2833 significantly affect run time.
2833 significantly affect run time.
2834
2834
2835 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2835 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2836 two extremes. Deltas will be reused if they are appropriate. But if the
2836 two extremes. Deltas will be reused if they are appropriate. But if the
2837 delta could choose a better revision, it will do so. This means if you
2837 delta could choose a better revision, it will do so. This means if you
2838 are converting a non-generaldelta revlog to a generaldelta revlog,
2838 are converting a non-generaldelta revlog to a generaldelta revlog,
2839 deltas will be recomputed if the delta's parent isn't a parent of the
2839 deltas will be recomputed if the delta's parent isn't a parent of the
2840 revision.
2840 revision.
2841
2841
2842 In addition to the delta policy, the ``forcedeltabothparents``
2842 In addition to the delta policy, the ``forcedeltabothparents``
2843 argument controls whether to force compute deltas against both parents
2843 argument controls whether to force compute deltas against both parents
2844 for merges. By default, the current default is used.
2844 for merges. By default, the current default is used.
2845
2845
2846 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2846 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2847 `sidedata_helpers`.
2847 `sidedata_helpers`.
2848 """
2848 """
2849 if deltareuse not in self.DELTAREUSEALL:
2849 if deltareuse not in self.DELTAREUSEALL:
2850 raise ValueError(
2850 raise ValueError(
2851 _(b'value for deltareuse invalid: %s') % deltareuse
2851 _(b'value for deltareuse invalid: %s') % deltareuse
2852 )
2852 )
2853
2853
2854 if len(destrevlog):
2854 if len(destrevlog):
2855 raise ValueError(_(b'destination revlog is not empty'))
2855 raise ValueError(_(b'destination revlog is not empty'))
2856
2856
2857 if getattr(self, 'filteredrevs', None):
2857 if getattr(self, 'filteredrevs', None):
2858 raise ValueError(_(b'source revlog has filtered revisions'))
2858 raise ValueError(_(b'source revlog has filtered revisions'))
2859 if getattr(destrevlog, 'filteredrevs', None):
2859 if getattr(destrevlog, 'filteredrevs', None):
2860 raise ValueError(_(b'destination revlog has filtered revisions'))
2860 raise ValueError(_(b'destination revlog has filtered revisions'))
2861
2861
2862 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2862 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2863 # if possible.
2863 # if possible.
2864 oldlazydelta = destrevlog._lazydelta
2864 oldlazydelta = destrevlog._lazydelta
2865 oldlazydeltabase = destrevlog._lazydeltabase
2865 oldlazydeltabase = destrevlog._lazydeltabase
2866 oldamd = destrevlog._deltabothparents
2866 oldamd = destrevlog._deltabothparents
2867
2867
2868 try:
2868 try:
2869 if deltareuse == self.DELTAREUSEALWAYS:
2869 if deltareuse == self.DELTAREUSEALWAYS:
2870 destrevlog._lazydeltabase = True
2870 destrevlog._lazydeltabase = True
2871 destrevlog._lazydelta = True
2871 destrevlog._lazydelta = True
2872 elif deltareuse == self.DELTAREUSESAMEREVS:
2872 elif deltareuse == self.DELTAREUSESAMEREVS:
2873 destrevlog._lazydeltabase = False
2873 destrevlog._lazydeltabase = False
2874 destrevlog._lazydelta = True
2874 destrevlog._lazydelta = True
2875 elif deltareuse == self.DELTAREUSENEVER:
2875 elif deltareuse == self.DELTAREUSENEVER:
2876 destrevlog._lazydeltabase = False
2876 destrevlog._lazydeltabase = False
2877 destrevlog._lazydelta = False
2877 destrevlog._lazydelta = False
2878
2878
2879 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2879 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2880
2880
2881 self._clone(
2881 self._clone(
2882 tr,
2882 tr,
2883 destrevlog,
2883 destrevlog,
2884 addrevisioncb,
2884 addrevisioncb,
2885 deltareuse,
2885 deltareuse,
2886 forcedeltabothparents,
2886 forcedeltabothparents,
2887 sidedata_helpers,
2887 sidedata_helpers,
2888 )
2888 )
2889
2889
2890 finally:
2890 finally:
2891 destrevlog._lazydelta = oldlazydelta
2891 destrevlog._lazydelta = oldlazydelta
2892 destrevlog._lazydeltabase = oldlazydeltabase
2892 destrevlog._lazydeltabase = oldlazydeltabase
2893 destrevlog._deltabothparents = oldamd
2893 destrevlog._deltabothparents = oldamd
2894
2894
2895 def _clone(
2895 def _clone(
2896 self,
2896 self,
2897 tr,
2897 tr,
2898 destrevlog,
2898 destrevlog,
2899 addrevisioncb,
2899 addrevisioncb,
2900 deltareuse,
2900 deltareuse,
2901 forcedeltabothparents,
2901 forcedeltabothparents,
2902 sidedata_helpers,
2902 sidedata_helpers,
2903 ):
2903 ):
2904 """perform the core duty of `revlog.clone` after parameter processing"""
2904 """perform the core duty of `revlog.clone` after parameter processing"""
2905 deltacomputer = deltautil.deltacomputer(destrevlog)
2905 deltacomputer = deltautil.deltacomputer(destrevlog)
2906 index = self.index
2906 index = self.index
2907 for rev in self:
2907 for rev in self:
2908 entry = index[rev]
2908 entry = index[rev]
2909
2909
2910 # Some classes override linkrev to take filtered revs into
2910 # Some classes override linkrev to take filtered revs into
2911 # account. Use raw entry from index.
2911 # account. Use raw entry from index.
2912 flags = entry[0] & 0xFFFF
2912 flags = entry[0] & 0xFFFF
2913 linkrev = entry[4]
2913 linkrev = entry[4]
2914 p1 = index[entry[5]][7]
2914 p1 = index[entry[5]][7]
2915 p2 = index[entry[6]][7]
2915 p2 = index[entry[6]][7]
2916 node = entry[7]
2916 node = entry[7]
2917
2917
2918 # (Possibly) reuse the delta from the revlog if allowed and
2918 # (Possibly) reuse the delta from the revlog if allowed and
2919 # the revlog chunk is a delta.
2919 # the revlog chunk is a delta.
2920 cachedelta = None
2920 cachedelta = None
2921 rawtext = None
2921 rawtext = None
2922 if deltareuse == self.DELTAREUSEFULLADD:
2922 if deltareuse == self.DELTAREUSEFULLADD:
2923 text, sidedata = self._revisiondata(rev)
2923 text, sidedata = self._revisiondata(rev)
2924
2924
2925 if sidedata_helpers is not None:
2925 if sidedata_helpers is not None:
2926 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2926 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2927 self, sidedata_helpers, sidedata, rev
2927 self, sidedata_helpers, sidedata, rev
2928 )
2928 )
2929 flags = flags | new_flags[0] & ~new_flags[1]
2929 flags = flags | new_flags[0] & ~new_flags[1]
2930
2930
2931 destrevlog.addrevision(
2931 destrevlog.addrevision(
2932 text,
2932 text,
2933 tr,
2933 tr,
2934 linkrev,
2934 linkrev,
2935 p1,
2935 p1,
2936 p2,
2936 p2,
2937 cachedelta=cachedelta,
2937 cachedelta=cachedelta,
2938 node=node,
2938 node=node,
2939 flags=flags,
2939 flags=flags,
2940 deltacomputer=deltacomputer,
2940 deltacomputer=deltacomputer,
2941 sidedata=sidedata,
2941 sidedata=sidedata,
2942 )
2942 )
2943 else:
2943 else:
2944 if destrevlog._lazydelta:
2944 if destrevlog._lazydelta:
2945 dp = self.deltaparent(rev)
2945 dp = self.deltaparent(rev)
2946 if dp != nullrev:
2946 if dp != nullrev:
2947 cachedelta = (dp, bytes(self._chunk(rev)))
2947 cachedelta = (dp, bytes(self._chunk(rev)))
2948
2948
2949 sidedata = None
2949 sidedata = None
2950 if not cachedelta:
2950 if not cachedelta:
2951 rawtext, sidedata = self._revisiondata(rev)
2951 rawtext, sidedata = self._revisiondata(rev)
2952 if sidedata is None:
2952 if sidedata is None:
2953 sidedata = self.sidedata(rev)
2953 sidedata = self.sidedata(rev)
2954
2954
2955 if sidedata_helpers is not None:
2955 if sidedata_helpers is not None:
2956 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2956 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2957 self, sidedata_helpers, sidedata, rev
2957 self, sidedata_helpers, sidedata, rev
2958 )
2958 )
2959 flags = flags | new_flags[0] & ~new_flags[1]
2959 flags = flags | new_flags[0] & ~new_flags[1]
2960
2960
2961 with destrevlog._writing(tr):
2961 with destrevlog._writing(tr):
2962 destrevlog._addrevision(
2962 destrevlog._addrevision(
2963 node,
2963 node,
2964 rawtext,
2964 rawtext,
2965 tr,
2965 tr,
2966 linkrev,
2966 linkrev,
2967 p1,
2967 p1,
2968 p2,
2968 p2,
2969 flags,
2969 flags,
2970 cachedelta,
2970 cachedelta,
2971 deltacomputer=deltacomputer,
2971 deltacomputer=deltacomputer,
2972 sidedata=sidedata,
2972 sidedata=sidedata,
2973 )
2973 )
2974
2974
2975 if addrevisioncb:
2975 if addrevisioncb:
2976 addrevisioncb(self, rev, node)
2976 addrevisioncb(self, rev, node)
2977
2977
2978 def censorrevision(self, tr, censornode, tombstone=b''):
2978 def censorrevision(self, tr, censornode, tombstone=b''):
2979 if self._format_version == REVLOGV0:
2979 if self._format_version == REVLOGV0:
2980 raise error.RevlogError(
2980 raise error.RevlogError(
2981 _(b'cannot censor with version %d revlogs')
2981 _(b'cannot censor with version %d revlogs')
2982 % self._format_version
2982 % self._format_version
2983 )
2983 )
2984
2984
2985 censorrev = self.rev(censornode)
2985 censorrev = self.rev(censornode)
2986 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2986 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2987
2987
2988 if len(tombstone) > self.rawsize(censorrev):
2988 if len(tombstone) > self.rawsize(censorrev):
2989 raise error.Abort(
2989 raise error.Abort(
2990 _(b'censor tombstone must be no longer than censored data')
2990 _(b'censor tombstone must be no longer than censored data')
2991 )
2991 )
2992
2992
2993 # Rewriting the revlog in place is hard. Our strategy for censoring is
2993 # Rewriting the revlog in place is hard. Our strategy for censoring is
2994 # to create a new revlog, copy all revisions to it, then replace the
2994 # to create a new revlog, copy all revisions to it, then replace the
2995 # revlogs on transaction close.
2995 # revlogs on transaction close.
2996 #
2996 #
2997 # This is a bit dangerous. We could easily have a mismatch of state.
2997 # This is a bit dangerous. We could easily have a mismatch of state.
2998 newrl = revlog(
2998 newrl = revlog(
2999 self.opener,
2999 self.opener,
3000 target=self.target,
3000 target=self.target,
3001 radix=self.radix,
3001 radix=self.radix,
3002 postfix=b'tmpcensored',
3002 postfix=b'tmpcensored',
3003 censorable=True,
3003 censorable=True,
3004 )
3004 )
3005 newrl._format_version = self._format_version
3005 newrl._format_version = self._format_version
3006 newrl._format_flags = self._format_flags
3006 newrl._format_flags = self._format_flags
3007 newrl._generaldelta = self._generaldelta
3007 newrl._generaldelta = self._generaldelta
3008 newrl._parse_index = self._parse_index
3008 newrl._parse_index = self._parse_index
3009
3009
3010 for rev in self.revs():
3010 for rev in self.revs():
3011 node = self.node(rev)
3011 node = self.node(rev)
3012 p1, p2 = self.parents(node)
3012 p1, p2 = self.parents(node)
3013
3013
3014 if rev == censorrev:
3014 if rev == censorrev:
3015 newrl.addrawrevision(
3015 newrl.addrawrevision(
3016 tombstone,
3016 tombstone,
3017 tr,
3017 tr,
3018 self.linkrev(censorrev),
3018 self.linkrev(censorrev),
3019 p1,
3019 p1,
3020 p2,
3020 p2,
3021 censornode,
3021 censornode,
3022 REVIDX_ISCENSORED,
3022 REVIDX_ISCENSORED,
3023 )
3023 )
3024
3024
3025 if newrl.deltaparent(rev) != nullrev:
3025 if newrl.deltaparent(rev) != nullrev:
3026 raise error.Abort(
3026 raise error.Abort(
3027 _(
3027 _(
3028 b'censored revision stored as delta; '
3028 b'censored revision stored as delta; '
3029 b'cannot censor'
3029 b'cannot censor'
3030 ),
3030 ),
3031 hint=_(
3031 hint=_(
3032 b'censoring of revlogs is not '
3032 b'censoring of revlogs is not '
3033 b'fully implemented; please report '
3033 b'fully implemented; please report '
3034 b'this bug'
3034 b'this bug'
3035 ),
3035 ),
3036 )
3036 )
3037 continue
3037 continue
3038
3038
3039 if self.iscensored(rev):
3039 if self.iscensored(rev):
3040 if self.deltaparent(rev) != nullrev:
3040 if self.deltaparent(rev) != nullrev:
3041 raise error.Abort(
3041 raise error.Abort(
3042 _(
3042 _(
3043 b'cannot censor due to censored '
3043 b'cannot censor due to censored '
3044 b'revision having delta stored'
3044 b'revision having delta stored'
3045 )
3045 )
3046 )
3046 )
3047 rawtext = self._chunk(rev)
3047 rawtext = self._chunk(rev)
3048 else:
3048 else:
3049 rawtext = self.rawdata(rev)
3049 rawtext = self.rawdata(rev)
3050
3050
3051 newrl.addrawrevision(
3051 newrl.addrawrevision(
3052 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3052 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3053 )
3053 )
3054
3054
3055 tr.addbackup(self._indexfile, location=b'store')
3055 tr.addbackup(self._indexfile, location=b'store')
3056 if not self._inline:
3056 if not self._inline:
3057 tr.addbackup(self._datafile, location=b'store')
3057 tr.addbackup(self._datafile, location=b'store')
3058
3058
3059 self.opener.rename(newrl._indexfile, self._indexfile)
3059 self.opener.rename(newrl._indexfile, self._indexfile)
3060 if not self._inline:
3060 if not self._inline:
3061 self.opener.rename(newrl._datafile, self._datafile)
3061 self.opener.rename(newrl._datafile, self._datafile)
3062
3062
3063 self.clearcaches()
3063 self.clearcaches()
3064 self._loadindex()
3064 self._loadindex()
3065
3065
3066 def verifyintegrity(self, state):
3066 def verifyintegrity(self, state):
3067 """Verifies the integrity of the revlog.
3067 """Verifies the integrity of the revlog.
3068
3068
3069 Yields ``revlogproblem`` instances describing problems that are
3069 Yields ``revlogproblem`` instances describing problems that are
3070 found.
3070 found.
3071 """
3071 """
3072 dd, di = self.checksize()
3072 dd, di = self.checksize()
3073 if dd:
3073 if dd:
3074 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3074 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3075 if di:
3075 if di:
3076 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3076 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3077
3077
3078 version = self._format_version
3078 version = self._format_version
3079
3079
3080 # The verifier tells us what version revlog we should be.
3080 # The verifier tells us what version revlog we should be.
3081 if version != state[b'expectedversion']:
3081 if version != state[b'expectedversion']:
3082 yield revlogproblem(
3082 yield revlogproblem(
3083 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3083 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3084 % (self.display_id, version, state[b'expectedversion'])
3084 % (self.display_id, version, state[b'expectedversion'])
3085 )
3085 )
3086
3086
3087 state[b'skipread'] = set()
3087 state[b'skipread'] = set()
3088 state[b'safe_renamed'] = set()
3088 state[b'safe_renamed'] = set()
3089
3089
3090 for rev in self:
3090 for rev in self:
3091 node = self.node(rev)
3091 node = self.node(rev)
3092
3092
3093 # Verify contents. 4 cases to care about:
3093 # Verify contents. 4 cases to care about:
3094 #
3094 #
3095 # common: the most common case
3095 # common: the most common case
3096 # rename: with a rename
3096 # rename: with a rename
3097 # meta: file content starts with b'\1\n', the metadata
3097 # meta: file content starts with b'\1\n', the metadata
3098 # header defined in filelog.py, but without a rename
3098 # header defined in filelog.py, but without a rename
3099 # ext: content stored externally
3099 # ext: content stored externally
3100 #
3100 #
3101 # More formally, their differences are shown below:
3101 # More formally, their differences are shown below:
3102 #
3102 #
3103 # | common | rename | meta | ext
3103 # | common | rename | meta | ext
3104 # -------------------------------------------------------
3104 # -------------------------------------------------------
3105 # flags() | 0 | 0 | 0 | not 0
3105 # flags() | 0 | 0 | 0 | not 0
3106 # renamed() | False | True | False | ?
3106 # renamed() | False | True | False | ?
3107 # rawtext[0:2]=='\1\n'| False | True | True | ?
3107 # rawtext[0:2]=='\1\n'| False | True | True | ?
3108 #
3108 #
3109 # "rawtext" means the raw text stored in revlog data, which
3109 # "rawtext" means the raw text stored in revlog data, which
3110 # could be retrieved by "rawdata(rev)". "text"
3110 # could be retrieved by "rawdata(rev)". "text"
3111 # mentioned below is "revision(rev)".
3111 # mentioned below is "revision(rev)".
3112 #
3112 #
3113 # There are 3 different lengths stored physically:
3113 # There are 3 different lengths stored physically:
3114 # 1. L1: rawsize, stored in revlog index
3114 # 1. L1: rawsize, stored in revlog index
3115 # 2. L2: len(rawtext), stored in revlog data
3115 # 2. L2: len(rawtext), stored in revlog data
3116 # 3. L3: len(text), stored in revlog data if flags==0, or
3116 # 3. L3: len(text), stored in revlog data if flags==0, or
3117 # possibly somewhere else if flags!=0
3117 # possibly somewhere else if flags!=0
3118 #
3118 #
3119 # L1 should be equal to L2. L3 could be different from them.
3119 # L1 should be equal to L2. L3 could be different from them.
3120 # "text" may or may not affect commit hash depending on flag
3120 # "text" may or may not affect commit hash depending on flag
3121 # processors (see flagutil.addflagprocessor).
3121 # processors (see flagutil.addflagprocessor).
3122 #
3122 #
3123 # | common | rename | meta | ext
3123 # | common | rename | meta | ext
3124 # -------------------------------------------------
3124 # -------------------------------------------------
3125 # rawsize() | L1 | L1 | L1 | L1
3125 # rawsize() | L1 | L1 | L1 | L1
3126 # size() | L1 | L2-LM | L1(*) | L1 (?)
3126 # size() | L1 | L2-LM | L1(*) | L1 (?)
3127 # len(rawtext) | L2 | L2 | L2 | L2
3127 # len(rawtext) | L2 | L2 | L2 | L2
3128 # len(text) | L2 | L2 | L2 | L3
3128 # len(text) | L2 | L2 | L2 | L3
3129 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3129 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3130 #
3130 #
3131 # LM: length of metadata, depending on rawtext
3131 # LM: length of metadata, depending on rawtext
3132 # (*): not ideal, see comment in filelog.size
3132 # (*): not ideal, see comment in filelog.size
3133 # (?): could be "- len(meta)" if the resolved content has
3133 # (?): could be "- len(meta)" if the resolved content has
3134 # rename metadata
3134 # rename metadata
3135 #
3135 #
3136 # Checks needed to be done:
3136 # Checks needed to be done:
3137 # 1. length check: L1 == L2, in all cases.
3137 # 1. length check: L1 == L2, in all cases.
3138 # 2. hash check: depending on flag processor, we may need to
3138 # 2. hash check: depending on flag processor, we may need to
3139 # use either "text" (external), or "rawtext" (in revlog).
3139 # use either "text" (external), or "rawtext" (in revlog).
3140
3140
3141 try:
3141 try:
3142 skipflags = state.get(b'skipflags', 0)
3142 skipflags = state.get(b'skipflags', 0)
3143 if skipflags:
3143 if skipflags:
3144 skipflags &= self.flags(rev)
3144 skipflags &= self.flags(rev)
3145
3145
3146 _verify_revision(self, skipflags, state, node)
3146 _verify_revision(self, skipflags, state, node)
3147
3147
3148 l1 = self.rawsize(rev)
3148 l1 = self.rawsize(rev)
3149 l2 = len(self.rawdata(node))
3149 l2 = len(self.rawdata(node))
3150
3150
3151 if l1 != l2:
3151 if l1 != l2:
3152 yield revlogproblem(
3152 yield revlogproblem(
3153 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3153 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3154 node=node,
3154 node=node,
3155 )
3155 )
3156
3156
3157 except error.CensoredNodeError:
3157 except error.CensoredNodeError:
3158 if state[b'erroroncensored']:
3158 if state[b'erroroncensored']:
3159 yield revlogproblem(
3159 yield revlogproblem(
3160 error=_(b'censored file data'), node=node
3160 error=_(b'censored file data'), node=node
3161 )
3161 )
3162 state[b'skipread'].add(node)
3162 state[b'skipread'].add(node)
3163 except Exception as e:
3163 except Exception as e:
3164 yield revlogproblem(
3164 yield revlogproblem(
3165 error=_(b'unpacking %s: %s')
3165 error=_(b'unpacking %s: %s')
3166 % (short(node), stringutil.forcebytestr(e)),
3166 % (short(node), stringutil.forcebytestr(e)),
3167 node=node,
3167 node=node,
3168 )
3168 )
3169 state[b'skipread'].add(node)
3169 state[b'skipread'].add(node)
3170
3170
3171 def storageinfo(
3171 def storageinfo(
3172 self,
3172 self,
3173 exclusivefiles=False,
3173 exclusivefiles=False,
3174 sharedfiles=False,
3174 sharedfiles=False,
3175 revisionscount=False,
3175 revisionscount=False,
3176 trackedsize=False,
3176 trackedsize=False,
3177 storedsize=False,
3177 storedsize=False,
3178 ):
3178 ):
3179 d = {}
3179 d = {}
3180
3180
3181 if exclusivefiles:
3181 if exclusivefiles:
3182 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3182 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3183 if not self._inline:
3183 if not self._inline:
3184 d[b'exclusivefiles'].append((self.opener, self._datafile))
3184 d[b'exclusivefiles'].append((self.opener, self._datafile))
3185
3185
3186 if sharedfiles:
3186 if sharedfiles:
3187 d[b'sharedfiles'] = []
3187 d[b'sharedfiles'] = []
3188
3188
3189 if revisionscount:
3189 if revisionscount:
3190 d[b'revisionscount'] = len(self)
3190 d[b'revisionscount'] = len(self)
3191
3191
3192 if trackedsize:
3192 if trackedsize:
3193 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3193 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3194
3194
3195 if storedsize:
3195 if storedsize:
3196 d[b'storedsize'] = sum(
3196 d[b'storedsize'] = sum(
3197 self.opener.stat(path).st_size for path in self.files()
3197 self.opener.stat(path).st_size for path in self.files()
3198 )
3198 )
3199
3199
3200 return d
3200 return d
3201
3201
3202 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3202 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3203 if not self.hassidedata:
3203 if not self.hassidedata:
3204 return
3204 return
3205 # revlog formats with sidedata support does not support inline
3205 # revlog formats with sidedata support does not support inline
3206 assert not self._inline
3206 assert not self._inline
3207 if not helpers[1] and not helpers[2]:
3207 if not helpers[1] and not helpers[2]:
3208 # Nothing to generate or remove
3208 # Nothing to generate or remove
3209 return
3209 return
3210
3210
3211 new_entries = []
3211 new_entries = []
3212 # append the new sidedata
3212 # append the new sidedata
3213 with self._writing(transaction):
3213 with self._writing(transaction):
3214 ifh, dfh = self._writinghandles
3214 ifh, dfh = self._writinghandles
3215 if self._docket is not None:
3215 if self._docket is not None:
3216 dfh.seek(self._docket.data_end, os.SEEK_SET)
3216 dfh.seek(self._docket.data_end, os.SEEK_SET)
3217 else:
3217 else:
3218 dfh.seek(0, os.SEEK_END)
3218 dfh.seek(0, os.SEEK_END)
3219
3219
3220 current_offset = dfh.tell()
3220 current_offset = dfh.tell()
3221 for rev in range(startrev, endrev + 1):
3221 for rev in range(startrev, endrev + 1):
3222 entry = self.index[rev]
3222 entry = self.index[rev]
3223 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3223 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3224 store=self,
3224 store=self,
3225 sidedata_helpers=helpers,
3225 sidedata_helpers=helpers,
3226 sidedata={},
3226 sidedata={},
3227 rev=rev,
3227 rev=rev,
3228 )
3228 )
3229
3229
3230 serialized_sidedata = sidedatautil.serialize_sidedata(
3230 serialized_sidedata = sidedatautil.serialize_sidedata(
3231 new_sidedata
3231 new_sidedata
3232 )
3232 )
3233 if entry[8] != 0 or entry[9] != 0:
3233 if entry[8] != 0 or entry[9] != 0:
3234 # rewriting entries that already have sidedata is not
3234 # rewriting entries that already have sidedata is not
3235 # supported yet, because it introduces garbage data in the
3235 # supported yet, because it introduces garbage data in the
3236 # revlog.
3236 # revlog.
3237 msg = b"rewriting existing sidedata is not supported yet"
3237 msg = b"rewriting existing sidedata is not supported yet"
3238 raise error.Abort(msg)
3238 raise error.Abort(msg)
3239
3239
3240 # Apply (potential) flags to add and to remove after running
3240 # Apply (potential) flags to add and to remove after running
3241 # the sidedata helpers
3241 # the sidedata helpers
3242 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3242 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3243 entry = (new_offset_flags,) + entry[1:8]
3243 entry_update = (
3244 entry += (current_offset, len(serialized_sidedata))
3244 current_offset,
3245 len(serialized_sidedata),
3246 new_offset_flags,
3247 )
3245
3248
3246 # the sidedata computation might have move the file cursors around
3249 # the sidedata computation might have move the file cursors around
3247 dfh.seek(current_offset, os.SEEK_SET)
3250 dfh.seek(current_offset, os.SEEK_SET)
3248 dfh.write(serialized_sidedata)
3251 dfh.write(serialized_sidedata)
3249 new_entries.append(entry)
3252 new_entries.append(entry_update)
3250 current_offset += len(serialized_sidedata)
3253 current_offset += len(serialized_sidedata)
3251 if self._docket is not None:
3254 if self._docket is not None:
3252 self._docket.data_end = dfh.tell()
3255 self._docket.data_end = dfh.tell()
3253
3256
3254 # rewrite the new index entries
3257 # rewrite the new index entries
3255 ifh.seek(startrev * self.index.entry_size)
3258 ifh.seek(startrev * self.index.entry_size)
3256 for i, e in enumerate(new_entries):
3259 for i, e in enumerate(new_entries):
3257 rev = startrev + i
3260 rev = startrev + i
3258 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3261 self.index.replace_sidedata_info(rev, *e)
3259 packed = self.index.entry_binary(rev)
3262 packed = self.index.entry_binary(rev)
3260 if rev == 0 and self._docket is None:
3263 if rev == 0 and self._docket is None:
3261 header = self._format_flags | self._format_version
3264 header = self._format_flags | self._format_version
3262 header = self.index.pack_header(header)
3265 header = self.index.pack_header(header)
3263 packed = header + packed
3266 packed = header + packed
3264 ifh.write(packed)
3267 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now