##// END OF EJS Templates
revlog: make the `candelta` method private...
marmoute -
r51897:39fa0b94 default
parent child Browse files
Show More
@@ -1,3546 +1,3546 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class revlog:
244 class revlog:
245 """
245 """
246 the underlying revision storage object
246 the underlying revision storage object
247
247
248 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
249
249
250 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
255 data.
255 data.
256
256
257 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
263
263
264 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
267 for locking while reading.
268
268
269 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
271
271
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 index will be mmapped rather than read if it is larger than the
273 index will be mmapped rather than read if it is larger than the
274 configured threshold.
274 configured threshold.
275
275
276 If censorable is True, the revlog can have censored revisions.
276 If censorable is True, the revlog can have censored revisions.
277
277
278 If `upperboundcomp` is not None, this is the expected maximal gain from
278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 compression for the data content.
279 compression for the data content.
280
280
281 `concurrencychecker` is an optional function that receives 3 arguments: a
281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 file handle, a filename, and an expected position. It should check whether
282 file handle, a filename, and an expected position. It should check whether
283 the current position in the file handle is valid, and log/warn/fail (by
283 the current position in the file handle is valid, and log/warn/fail (by
284 raising).
284 raising).
285
285
286 See mercurial/revlogutils/contants.py for details about the content of an
286 See mercurial/revlogutils/contants.py for details about the content of an
287 index entry.
287 index entry.
288 """
288 """
289
289
290 _flagserrorclass = error.RevlogError
290 _flagserrorclass = error.RevlogError
291
291
292 @staticmethod
292 @staticmethod
293 def is_inline_index(header_bytes):
293 def is_inline_index(header_bytes):
294 """Determine if a revlog is inline from the initial bytes of the index"""
294 """Determine if a revlog is inline from the initial bytes of the index"""
295 header = INDEX_HEADER.unpack(header_bytes)[0]
295 header = INDEX_HEADER.unpack(header_bytes)[0]
296
296
297 _format_flags = header & ~0xFFFF
297 _format_flags = header & ~0xFFFF
298 _format_version = header & 0xFFFF
298 _format_version = header & 0xFFFF
299
299
300 features = FEATURES_BY_VERSION[_format_version]
300 features = FEATURES_BY_VERSION[_format_version]
301 return features[b'inline'](_format_flags)
301 return features[b'inline'](_format_flags)
302
302
303 def __init__(
303 def __init__(
304 self,
304 self,
305 opener,
305 opener,
306 target,
306 target,
307 radix,
307 radix,
308 postfix=None, # only exist for `tmpcensored` now
308 postfix=None, # only exist for `tmpcensored` now
309 checkambig=False,
309 checkambig=False,
310 mmaplargeindex=False,
310 mmaplargeindex=False,
311 censorable=False,
311 censorable=False,
312 upperboundcomp=None,
312 upperboundcomp=None,
313 persistentnodemap=False,
313 persistentnodemap=False,
314 concurrencychecker=None,
314 concurrencychecker=None,
315 trypending=False,
315 trypending=False,
316 try_split=False,
316 try_split=False,
317 canonical_parent_order=True,
317 canonical_parent_order=True,
318 ):
318 ):
319 """
319 """
320 create a revlog object
320 create a revlog object
321
321
322 opener is a function that abstracts the file opening operation
322 opener is a function that abstracts the file opening operation
323 and can be used to implement COW semantics or the like.
323 and can be used to implement COW semantics or the like.
324
324
325 `target`: a (KIND, ID) tuple that identify the content stored in
325 `target`: a (KIND, ID) tuple that identify the content stored in
326 this revlog. It help the rest of the code to understand what the revlog
326 this revlog. It help the rest of the code to understand what the revlog
327 is about without having to resort to heuristic and index filename
327 is about without having to resort to heuristic and index filename
328 analysis. Note: that this must be reliably be set by normal code, but
328 analysis. Note: that this must be reliably be set by normal code, but
329 that test, debug, or performance measurement code might not set this to
329 that test, debug, or performance measurement code might not set this to
330 accurate value.
330 accurate value.
331 """
331 """
332 self.upperboundcomp = upperboundcomp
332 self.upperboundcomp = upperboundcomp
333
333
334 self.radix = radix
334 self.radix = radix
335
335
336 self._docket_file = None
336 self._docket_file = None
337 self._indexfile = None
337 self._indexfile = None
338 self._datafile = None
338 self._datafile = None
339 self._sidedatafile = None
339 self._sidedatafile = None
340 self._nodemap_file = None
340 self._nodemap_file = None
341 self.postfix = postfix
341 self.postfix = postfix
342 self._trypending = trypending
342 self._trypending = trypending
343 self._try_split = try_split
343 self._try_split = try_split
344 self.opener = opener
344 self.opener = opener
345 if persistentnodemap:
345 if persistentnodemap:
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
347
347
348 assert target[0] in ALL_KINDS
348 assert target[0] in ALL_KINDS
349 assert len(target) == 2
349 assert len(target) == 2
350 self.target = target
350 self.target = target
351 # When True, indexfile is opened with checkambig=True at writing, to
351 # When True, indexfile is opened with checkambig=True at writing, to
352 # avoid file stat ambiguity.
352 # avoid file stat ambiguity.
353 self._checkambig = checkambig
353 self._checkambig = checkambig
354 self._mmaplargeindex = mmaplargeindex
354 self._mmaplargeindex = mmaplargeindex
355 self._censorable = censorable
355 self._censorable = censorable
356 # 3-tuple of (node, rev, text) for a raw revision.
356 # 3-tuple of (node, rev, text) for a raw revision.
357 self._revisioncache = None
357 self._revisioncache = None
358 # Maps rev to chain base rev.
358 # Maps rev to chain base rev.
359 self._chainbasecache = util.lrucachedict(100)
359 self._chainbasecache = util.lrucachedict(100)
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 self._chunkcache = (0, b'')
361 self._chunkcache = (0, b'')
362 # How much data to read and cache into the raw revlog data cache.
362 # How much data to read and cache into the raw revlog data cache.
363 self._chunkcachesize = 65536
363 self._chunkcachesize = 65536
364 self._maxchainlen = None
364 self._maxchainlen = None
365 self._deltabothparents = True
365 self._deltabothparents = True
366 self._candidate_group_chunk_size = 0
366 self._candidate_group_chunk_size = 0
367 self._debug_delta = False
367 self._debug_delta = False
368 self.index = None
368 self.index = None
369 self._docket = None
369 self._docket = None
370 self._nodemap_docket = None
370 self._nodemap_docket = None
371 # Mapping of partial identifiers to full nodes.
371 # Mapping of partial identifiers to full nodes.
372 self._pcache = {}
372 self._pcache = {}
373 # Mapping of revision integer to full node.
373 # Mapping of revision integer to full node.
374 self._compengine = b'zlib'
374 self._compengine = b'zlib'
375 self._compengineopts = {}
375 self._compengineopts = {}
376 self._maxdeltachainspan = -1
376 self._maxdeltachainspan = -1
377 self._withsparseread = False
377 self._withsparseread = False
378 self._sparserevlog = False
378 self._sparserevlog = False
379 self.hassidedata = False
379 self.hassidedata = False
380 self._srdensitythreshold = 0.50
380 self._srdensitythreshold = 0.50
381 self._srmingapsize = 262144
381 self._srmingapsize = 262144
382
382
383 # other optionnals features
383 # other optionnals features
384
384
385 # might remove rank configuration once the computation has no impact
385 # might remove rank configuration once the computation has no impact
386 self._compute_rank = False
386 self._compute_rank = False
387
387
388 # Make copy of flag processors so each revlog instance can support
388 # Make copy of flag processors so each revlog instance can support
389 # custom flags.
389 # custom flags.
390 self._flagprocessors = dict(flagutil.flagprocessors)
390 self._flagprocessors = dict(flagutil.flagprocessors)
391
391
392 # 3-tuple of file handles being used for active writing.
392 # 3-tuple of file handles being used for active writing.
393 self._writinghandles = None
393 self._writinghandles = None
394 # prevent nesting of addgroup
394 # prevent nesting of addgroup
395 self._adding_group = None
395 self._adding_group = None
396
396
397 self._loadindex()
397 self._loadindex()
398
398
399 self._concurrencychecker = concurrencychecker
399 self._concurrencychecker = concurrencychecker
400
400
401 # parent order is supposed to be semantically irrelevant, so we
401 # parent order is supposed to be semantically irrelevant, so we
402 # normally resort parents to ensure that the first parent is non-null,
402 # normally resort parents to ensure that the first parent is non-null,
403 # if there is a non-null parent at all.
403 # if there is a non-null parent at all.
404 # filelog abuses the parent order as flag to mark some instances of
404 # filelog abuses the parent order as flag to mark some instances of
405 # meta-encoded files, so allow it to disable this behavior.
405 # meta-encoded files, so allow it to disable this behavior.
406 self.canonical_parent_order = canonical_parent_order
406 self.canonical_parent_order = canonical_parent_order
407
407
408 def _init_opts(self):
408 def _init_opts(self):
409 """process options (from above/config) to setup associated default revlog mode
409 """process options (from above/config) to setup associated default revlog mode
410
410
411 These values might be affected when actually reading on disk information.
411 These values might be affected when actually reading on disk information.
412
412
413 The relevant values are returned for use in _loadindex().
413 The relevant values are returned for use in _loadindex().
414
414
415 * newversionflags:
415 * newversionflags:
416 version header to use if we need to create a new revlog
416 version header to use if we need to create a new revlog
417
417
418 * mmapindexthreshold:
418 * mmapindexthreshold:
419 minimal index size for start to use mmap
419 minimal index size for start to use mmap
420
420
421 * force_nodemap:
421 * force_nodemap:
422 force the usage of a "development" version of the nodemap code
422 force the usage of a "development" version of the nodemap code
423 """
423 """
424 mmapindexthreshold = None
424 mmapindexthreshold = None
425 opts = self.opener.options
425 opts = self.opener.options
426
426
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
428 new_header = CHANGELOGV2
428 new_header = CHANGELOGV2
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
430 elif b'revlogv2' in opts:
430 elif b'revlogv2' in opts:
431 new_header = REVLOGV2
431 new_header = REVLOGV2
432 elif b'revlogv1' in opts:
432 elif b'revlogv1' in opts:
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
434 if b'generaldelta' in opts:
434 if b'generaldelta' in opts:
435 new_header |= FLAG_GENERALDELTA
435 new_header |= FLAG_GENERALDELTA
436 elif b'revlogv0' in self.opener.options:
436 elif b'revlogv0' in self.opener.options:
437 new_header = REVLOGV0
437 new_header = REVLOGV0
438 else:
438 else:
439 new_header = REVLOG_DEFAULT_VERSION
439 new_header = REVLOG_DEFAULT_VERSION
440
440
441 if b'chunkcachesize' in opts:
441 if b'chunkcachesize' in opts:
442 self._chunkcachesize = opts[b'chunkcachesize']
442 self._chunkcachesize = opts[b'chunkcachesize']
443 if b'maxchainlen' in opts:
443 if b'maxchainlen' in opts:
444 self._maxchainlen = opts[b'maxchainlen']
444 self._maxchainlen = opts[b'maxchainlen']
445 if b'deltabothparents' in opts:
445 if b'deltabothparents' in opts:
446 self._deltabothparents = opts[b'deltabothparents']
446 self._deltabothparents = opts[b'deltabothparents']
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
448 if dps_cgds:
448 if dps_cgds:
449 self._candidate_group_chunk_size = dps_cgds
449 self._candidate_group_chunk_size = dps_cgds
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
451 self._lazydeltabase = False
451 self._lazydeltabase = False
452 if self._lazydelta:
452 if self._lazydelta:
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
454 if b'debug-delta' in opts:
454 if b'debug-delta' in opts:
455 self._debug_delta = opts[b'debug-delta']
455 self._debug_delta = opts[b'debug-delta']
456 if b'compengine' in opts:
456 if b'compengine' in opts:
457 self._compengine = opts[b'compengine']
457 self._compengine = opts[b'compengine']
458 if b'zlib.level' in opts:
458 if b'zlib.level' in opts:
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
460 if b'zstd.level' in opts:
460 if b'zstd.level' in opts:
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
462 if b'maxdeltachainspan' in opts:
462 if b'maxdeltachainspan' in opts:
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
465 mmapindexthreshold = opts[b'mmapindexthreshold']
465 mmapindexthreshold = opts[b'mmapindexthreshold']
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
468 # sparse-revlog forces sparse-read
468 # sparse-revlog forces sparse-read
469 self._withsparseread = self._sparserevlog or withsparseread
469 self._withsparseread = self._sparserevlog or withsparseread
470 if b'sparse-read-density-threshold' in opts:
470 if b'sparse-read-density-threshold' in opts:
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
472 if b'sparse-read-min-gap-size' in opts:
472 if b'sparse-read-min-gap-size' in opts:
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
474 if opts.get(b'enableellipsis'):
474 if opts.get(b'enableellipsis'):
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
476
476
477 # revlog v0 doesn't have flag processors
477 # revlog v0 doesn't have flag processors
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
480
480
481 if self._chunkcachesize <= 0:
481 if self._chunkcachesize <= 0:
482 raise error.RevlogError(
482 raise error.RevlogError(
483 _(b'revlog chunk cache size %r is not greater than 0')
483 _(b'revlog chunk cache size %r is not greater than 0')
484 % self._chunkcachesize
484 % self._chunkcachesize
485 )
485 )
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
487 raise error.RevlogError(
487 raise error.RevlogError(
488 _(b'revlog chunk cache size %r is not a power of 2')
488 _(b'revlog chunk cache size %r is not a power of 2')
489 % self._chunkcachesize
489 % self._chunkcachesize
490 )
490 )
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
492 return new_header, mmapindexthreshold, force_nodemap
492 return new_header, mmapindexthreshold, force_nodemap
493
493
494 def _get_data(self, filepath, mmap_threshold, size=None):
494 def _get_data(self, filepath, mmap_threshold, size=None):
495 """return a file content with or without mmap
495 """return a file content with or without mmap
496
496
497 If the file is missing return the empty string"""
497 If the file is missing return the empty string"""
498 try:
498 try:
499 with self.opener(filepath) as fp:
499 with self.opener(filepath) as fp:
500 if mmap_threshold is not None:
500 if mmap_threshold is not None:
501 file_size = self.opener.fstat(fp).st_size
501 file_size = self.opener.fstat(fp).st_size
502 if file_size >= mmap_threshold:
502 if file_size >= mmap_threshold:
503 if size is not None:
503 if size is not None:
504 # avoid potentiel mmap crash
504 # avoid potentiel mmap crash
505 size = min(file_size, size)
505 size = min(file_size, size)
506 # TODO: should .close() to release resources without
506 # TODO: should .close() to release resources without
507 # relying on Python GC
507 # relying on Python GC
508 if size is None:
508 if size is None:
509 return util.buffer(util.mmapread(fp))
509 return util.buffer(util.mmapread(fp))
510 else:
510 else:
511 return util.buffer(util.mmapread(fp, size))
511 return util.buffer(util.mmapread(fp, size))
512 if size is None:
512 if size is None:
513 return fp.read()
513 return fp.read()
514 else:
514 else:
515 return fp.read(size)
515 return fp.read(size)
516 except FileNotFoundError:
516 except FileNotFoundError:
517 return b''
517 return b''
518
518
519 def get_streams(self, max_linkrev, force_inline=False):
519 def get_streams(self, max_linkrev, force_inline=False):
520 """return a list of streams that represent this revlog
520 """return a list of streams that represent this revlog
521
521
522 This is used by stream-clone to do bytes to bytes copies of a repository.
522 This is used by stream-clone to do bytes to bytes copies of a repository.
523
523
524 This streams data for all revisions that refer to a changelog revision up
524 This streams data for all revisions that refer to a changelog revision up
525 to `max_linkrev`.
525 to `max_linkrev`.
526
526
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
528
528
529 It returns is a list of three-tuple:
529 It returns is a list of three-tuple:
530
530
531 [
531 [
532 (filename, bytes_stream, stream_size),
532 (filename, bytes_stream, stream_size),
533 …
533 …
534 ]
534 ]
535 """
535 """
536 n = len(self)
536 n = len(self)
537 index = self.index
537 index = self.index
538 while n > 0:
538 while n > 0:
539 linkrev = index[n - 1][4]
539 linkrev = index[n - 1][4]
540 if linkrev < max_linkrev:
540 if linkrev < max_linkrev:
541 break
541 break
542 # note: this loop will rarely go through multiple iterations, since
542 # note: this loop will rarely go through multiple iterations, since
543 # it only traverses commits created during the current streaming
543 # it only traverses commits created during the current streaming
544 # pull operation.
544 # pull operation.
545 #
545 #
546 # If this become a problem, using a binary search should cap the
546 # If this become a problem, using a binary search should cap the
547 # runtime of this.
547 # runtime of this.
548 n = n - 1
548 n = n - 1
549 if n == 0:
549 if n == 0:
550 # no data to send
550 # no data to send
551 return []
551 return []
552 index_size = n * index.entry_size
552 index_size = n * index.entry_size
553 data_size = self.end(n - 1)
553 data_size = self.end(n - 1)
554
554
555 # XXX we might have been split (or stripped) since the object
555 # XXX we might have been split (or stripped) since the object
556 # initialization, We need to close this race too, but having a way to
556 # initialization, We need to close this race too, but having a way to
557 # pre-open the file we feed to the revlog and never closing them before
557 # pre-open the file we feed to the revlog and never closing them before
558 # we are done streaming.
558 # we are done streaming.
559
559
560 if self._inline:
560 if self._inline:
561
561
562 def get_stream():
562 def get_stream():
563 with self._indexfp() as fp:
563 with self._indexfp() as fp:
564 yield None
564 yield None
565 size = index_size + data_size
565 size = index_size + data_size
566 if size <= 65536:
566 if size <= 65536:
567 yield fp.read(size)
567 yield fp.read(size)
568 else:
568 else:
569 yield from util.filechunkiter(fp, limit=size)
569 yield from util.filechunkiter(fp, limit=size)
570
570
571 inline_stream = get_stream()
571 inline_stream = get_stream()
572 next(inline_stream)
572 next(inline_stream)
573 return [
573 return [
574 (self._indexfile, inline_stream, index_size + data_size),
574 (self._indexfile, inline_stream, index_size + data_size),
575 ]
575 ]
576 elif force_inline:
576 elif force_inline:
577
577
578 def get_stream():
578 def get_stream():
579 with self._datafp() as fp_d:
579 with self._datafp() as fp_d:
580 yield None
580 yield None
581
581
582 for rev in range(n):
582 for rev in range(n):
583 idx = self.index.entry_binary(rev)
583 idx = self.index.entry_binary(rev)
584 if rev == 0 and self._docket is None:
584 if rev == 0 and self._docket is None:
585 # re-inject the inline flag
585 # re-inject the inline flag
586 header = self._format_flags
586 header = self._format_flags
587 header |= self._format_version
587 header |= self._format_version
588 header |= FLAG_INLINE_DATA
588 header |= FLAG_INLINE_DATA
589 header = self.index.pack_header(header)
589 header = self.index.pack_header(header)
590 idx = header + idx
590 idx = header + idx
591 yield idx
591 yield idx
592 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
592 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
593
593
594 inline_stream = get_stream()
594 inline_stream = get_stream()
595 next(inline_stream)
595 next(inline_stream)
596 return [
596 return [
597 (self._indexfile, inline_stream, index_size + data_size),
597 (self._indexfile, inline_stream, index_size + data_size),
598 ]
598 ]
599 else:
599 else:
600
600
601 def get_index_stream():
601 def get_index_stream():
602 with self._indexfp() as fp:
602 with self._indexfp() as fp:
603 yield None
603 yield None
604 if index_size <= 65536:
604 if index_size <= 65536:
605 yield fp.read(index_size)
605 yield fp.read(index_size)
606 else:
606 else:
607 yield from util.filechunkiter(fp, limit=index_size)
607 yield from util.filechunkiter(fp, limit=index_size)
608
608
609 def get_data_stream():
609 def get_data_stream():
610 with self._datafp() as fp:
610 with self._datafp() as fp:
611 yield None
611 yield None
612 if data_size <= 65536:
612 if data_size <= 65536:
613 yield fp.read(data_size)
613 yield fp.read(data_size)
614 else:
614 else:
615 yield from util.filechunkiter(fp, limit=data_size)
615 yield from util.filechunkiter(fp, limit=data_size)
616
616
617 index_stream = get_index_stream()
617 index_stream = get_index_stream()
618 next(index_stream)
618 next(index_stream)
619 data_stream = get_data_stream()
619 data_stream = get_data_stream()
620 next(data_stream)
620 next(data_stream)
621 return [
621 return [
622 (self._datafile, data_stream, data_size),
622 (self._datafile, data_stream, data_size),
623 (self._indexfile, index_stream, index_size),
623 (self._indexfile, index_stream, index_size),
624 ]
624 ]
625
625
626 def _loadindex(self, docket=None):
626 def _loadindex(self, docket=None):
627
627
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
629
629
630 if self.postfix is not None:
630 if self.postfix is not None:
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
633 entry_point = b'%s.i.a' % self.radix
633 entry_point = b'%s.i.a' % self.radix
634 elif self._try_split and self.opener.exists(self._split_index_file):
634 elif self._try_split and self.opener.exists(self._split_index_file):
635 entry_point = self._split_index_file
635 entry_point = self._split_index_file
636 else:
636 else:
637 entry_point = b'%s.i' % self.radix
637 entry_point = b'%s.i' % self.radix
638
638
639 if docket is not None:
639 if docket is not None:
640 self._docket = docket
640 self._docket = docket
641 self._docket_file = entry_point
641 self._docket_file = entry_point
642 else:
642 else:
643 self._initempty = True
643 self._initempty = True
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
645 if len(entry_data) > 0:
645 if len(entry_data) > 0:
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
647 self._initempty = False
647 self._initempty = False
648 else:
648 else:
649 header = new_header
649 header = new_header
650
650
651 self._format_flags = header & ~0xFFFF
651 self._format_flags = header & ~0xFFFF
652 self._format_version = header & 0xFFFF
652 self._format_version = header & 0xFFFF
653
653
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
655 if supported_flags is None:
655 if supported_flags is None:
656 msg = _(b'unknown version (%d) in revlog %s')
656 msg = _(b'unknown version (%d) in revlog %s')
657 msg %= (self._format_version, self.display_id)
657 msg %= (self._format_version, self.display_id)
658 raise error.RevlogError(msg)
658 raise error.RevlogError(msg)
659 elif self._format_flags & ~supported_flags:
659 elif self._format_flags & ~supported_flags:
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
661 display_flag = self._format_flags >> 16
661 display_flag = self._format_flags >> 16
662 msg %= (display_flag, self._format_version, self.display_id)
662 msg %= (display_flag, self._format_version, self.display_id)
663 raise error.RevlogError(msg)
663 raise error.RevlogError(msg)
664
664
665 features = FEATURES_BY_VERSION[self._format_version]
665 features = FEATURES_BY_VERSION[self._format_version]
666 self._inline = features[b'inline'](self._format_flags)
666 self._inline = features[b'inline'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
668 self.hassidedata = features[b'sidedata']
668 self.hassidedata = features[b'sidedata']
669
669
670 if not features[b'docket']:
670 if not features[b'docket']:
671 self._indexfile = entry_point
671 self._indexfile = entry_point
672 index_data = entry_data
672 index_data = entry_data
673 else:
673 else:
674 self._docket_file = entry_point
674 self._docket_file = entry_point
675 if self._initempty:
675 if self._initempty:
676 self._docket = docketutil.default_docket(self, header)
676 self._docket = docketutil.default_docket(self, header)
677 else:
677 else:
678 self._docket = docketutil.parse_docket(
678 self._docket = docketutil.parse_docket(
679 self, entry_data, use_pending=self._trypending
679 self, entry_data, use_pending=self._trypending
680 )
680 )
681
681
682 if self._docket is not None:
682 if self._docket is not None:
683 self._indexfile = self._docket.index_filepath()
683 self._indexfile = self._docket.index_filepath()
684 index_data = b''
684 index_data = b''
685 index_size = self._docket.index_end
685 index_size = self._docket.index_end
686 if index_size > 0:
686 if index_size > 0:
687 index_data = self._get_data(
687 index_data = self._get_data(
688 self._indexfile, mmapindexthreshold, size=index_size
688 self._indexfile, mmapindexthreshold, size=index_size
689 )
689 )
690 if len(index_data) < index_size:
690 if len(index_data) < index_size:
691 msg = _(b'too few index data for %s: got %d, expected %d')
691 msg = _(b'too few index data for %s: got %d, expected %d')
692 msg %= (self.display_id, len(index_data), index_size)
692 msg %= (self.display_id, len(index_data), index_size)
693 raise error.RevlogError(msg)
693 raise error.RevlogError(msg)
694
694
695 self._inline = False
695 self._inline = False
696 # generaldelta implied by version 2 revlogs.
696 # generaldelta implied by version 2 revlogs.
697 self._generaldelta = True
697 self._generaldelta = True
698 # the logic for persistent nodemap will be dealt with within the
698 # the logic for persistent nodemap will be dealt with within the
699 # main docket, so disable it for now.
699 # main docket, so disable it for now.
700 self._nodemap_file = None
700 self._nodemap_file = None
701
701
702 if self._docket is not None:
702 if self._docket is not None:
703 self._datafile = self._docket.data_filepath()
703 self._datafile = self._docket.data_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
705 elif self.postfix is None:
705 elif self.postfix is None:
706 self._datafile = b'%s.d' % self.radix
706 self._datafile = b'%s.d' % self.radix
707 else:
707 else:
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
709
709
710 self.nodeconstants = sha1nodeconstants
710 self.nodeconstants = sha1nodeconstants
711 self.nullid = self.nodeconstants.nullid
711 self.nullid = self.nodeconstants.nullid
712
712
713 # sparse-revlog can't be on without general-delta (issue6056)
713 # sparse-revlog can't be on without general-delta (issue6056)
714 if not self._generaldelta:
714 if not self._generaldelta:
715 self._sparserevlog = False
715 self._sparserevlog = False
716
716
717 self._storedeltachains = True
717 self._storedeltachains = True
718
718
719 devel_nodemap = (
719 devel_nodemap = (
720 self._nodemap_file
720 self._nodemap_file
721 and force_nodemap
721 and force_nodemap
722 and parse_index_v1_nodemap is not None
722 and parse_index_v1_nodemap is not None
723 )
723 )
724
724
725 use_rust_index = False
725 use_rust_index = False
726 if rustrevlog is not None:
726 if rustrevlog is not None:
727 if self._nodemap_file is not None:
727 if self._nodemap_file is not None:
728 use_rust_index = True
728 use_rust_index = True
729 else:
729 else:
730 use_rust_index = self.opener.options.get(b'rust.index')
730 use_rust_index = self.opener.options.get(b'rust.index')
731
731
732 self._parse_index = parse_index_v1
732 self._parse_index = parse_index_v1
733 if self._format_version == REVLOGV0:
733 if self._format_version == REVLOGV0:
734 self._parse_index = revlogv0.parse_index_v0
734 self._parse_index = revlogv0.parse_index_v0
735 elif self._format_version == REVLOGV2:
735 elif self._format_version == REVLOGV2:
736 self._parse_index = parse_index_v2
736 self._parse_index = parse_index_v2
737 elif self._format_version == CHANGELOGV2:
737 elif self._format_version == CHANGELOGV2:
738 self._parse_index = parse_index_cl_v2
738 self._parse_index = parse_index_cl_v2
739 elif devel_nodemap:
739 elif devel_nodemap:
740 self._parse_index = parse_index_v1_nodemap
740 self._parse_index = parse_index_v1_nodemap
741 elif use_rust_index:
741 elif use_rust_index:
742 self._parse_index = parse_index_v1_mixed
742 self._parse_index = parse_index_v1_mixed
743 try:
743 try:
744 d = self._parse_index(index_data, self._inline)
744 d = self._parse_index(index_data, self._inline)
745 index, chunkcache = d
745 index, chunkcache = d
746 use_nodemap = (
746 use_nodemap = (
747 not self._inline
747 not self._inline
748 and self._nodemap_file is not None
748 and self._nodemap_file is not None
749 and hasattr(index, 'update_nodemap_data')
749 and hasattr(index, 'update_nodemap_data')
750 )
750 )
751 if use_nodemap:
751 if use_nodemap:
752 nodemap_data = nodemaputil.persisted_data(self)
752 nodemap_data = nodemaputil.persisted_data(self)
753 if nodemap_data is not None:
753 if nodemap_data is not None:
754 docket = nodemap_data[0]
754 docket = nodemap_data[0]
755 if (
755 if (
756 len(d[0]) > docket.tip_rev
756 len(d[0]) > docket.tip_rev
757 and d[0][docket.tip_rev][7] == docket.tip_node
757 and d[0][docket.tip_rev][7] == docket.tip_node
758 ):
758 ):
759 # no changelog tampering
759 # no changelog tampering
760 self._nodemap_docket = docket
760 self._nodemap_docket = docket
761 index.update_nodemap_data(*nodemap_data)
761 index.update_nodemap_data(*nodemap_data)
762 except (ValueError, IndexError):
762 except (ValueError, IndexError):
763 raise error.RevlogError(
763 raise error.RevlogError(
764 _(b"index %s is corrupted") % self.display_id
764 _(b"index %s is corrupted") % self.display_id
765 )
765 )
766 self.index = index
766 self.index = index
767 self._segmentfile = randomaccessfile.randomaccessfile(
767 self._segmentfile = randomaccessfile.randomaccessfile(
768 self.opener,
768 self.opener,
769 (self._indexfile if self._inline else self._datafile),
769 (self._indexfile if self._inline else self._datafile),
770 self._chunkcachesize,
770 self._chunkcachesize,
771 chunkcache,
771 chunkcache,
772 )
772 )
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
774 self.opener,
774 self.opener,
775 self._sidedatafile,
775 self._sidedatafile,
776 self._chunkcachesize,
776 self._chunkcachesize,
777 )
777 )
778 # revnum -> (chain-length, sum-delta-length)
778 # revnum -> (chain-length, sum-delta-length)
779 self._chaininfocache = util.lrucachedict(500)
779 self._chaininfocache = util.lrucachedict(500)
780 # revlog header -> revlog compressor
780 # revlog header -> revlog compressor
781 self._decompressors = {}
781 self._decompressors = {}
782
782
783 def get_revlog(self):
783 def get_revlog(self):
784 """simple function to mirror API of other not-really-revlog API"""
784 """simple function to mirror API of other not-really-revlog API"""
785 return self
785 return self
786
786
787 @util.propertycache
787 @util.propertycache
788 def revlog_kind(self):
788 def revlog_kind(self):
789 return self.target[0]
789 return self.target[0]
790
790
791 @util.propertycache
791 @util.propertycache
792 def display_id(self):
792 def display_id(self):
793 """The public facing "ID" of the revlog that we use in message"""
793 """The public facing "ID" of the revlog that we use in message"""
794 if self.revlog_kind == KIND_FILELOG:
794 if self.revlog_kind == KIND_FILELOG:
795 # Reference the file without the "data/" prefix, so it is familiar
795 # Reference the file without the "data/" prefix, so it is familiar
796 # to the user.
796 # to the user.
797 return self.target[1]
797 return self.target[1]
798 else:
798 else:
799 return self.radix
799 return self.radix
800
800
801 def _get_decompressor(self, t):
801 def _get_decompressor(self, t):
802 try:
802 try:
803 compressor = self._decompressors[t]
803 compressor = self._decompressors[t]
804 except KeyError:
804 except KeyError:
805 try:
805 try:
806 engine = util.compengines.forrevlogheader(t)
806 engine = util.compengines.forrevlogheader(t)
807 compressor = engine.revlogcompressor(self._compengineopts)
807 compressor = engine.revlogcompressor(self._compengineopts)
808 self._decompressors[t] = compressor
808 self._decompressors[t] = compressor
809 except KeyError:
809 except KeyError:
810 raise error.RevlogError(
810 raise error.RevlogError(
811 _(b'unknown compression type %s') % binascii.hexlify(t)
811 _(b'unknown compression type %s') % binascii.hexlify(t)
812 )
812 )
813 return compressor
813 return compressor
814
814
815 @util.propertycache
815 @util.propertycache
816 def _compressor(self):
816 def _compressor(self):
817 engine = util.compengines[self._compengine]
817 engine = util.compengines[self._compengine]
818 return engine.revlogcompressor(self._compengineopts)
818 return engine.revlogcompressor(self._compengineopts)
819
819
820 @util.propertycache
820 @util.propertycache
821 def _decompressor(self):
821 def _decompressor(self):
822 """the default decompressor"""
822 """the default decompressor"""
823 if self._docket is None:
823 if self._docket is None:
824 return None
824 return None
825 t = self._docket.default_compression_header
825 t = self._docket.default_compression_header
826 c = self._get_decompressor(t)
826 c = self._get_decompressor(t)
827 return c.decompress
827 return c.decompress
828
828
829 def _indexfp(self):
829 def _indexfp(self):
830 """file object for the revlog's index file"""
830 """file object for the revlog's index file"""
831 return self.opener(self._indexfile, mode=b"r")
831 return self.opener(self._indexfile, mode=b"r")
832
832
833 def __index_write_fp(self):
833 def __index_write_fp(self):
834 # You should not use this directly and use `_writing` instead
834 # You should not use this directly and use `_writing` instead
835 try:
835 try:
836 f = self.opener(
836 f = self.opener(
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
838 )
838 )
839 if self._docket is None:
839 if self._docket is None:
840 f.seek(0, os.SEEK_END)
840 f.seek(0, os.SEEK_END)
841 else:
841 else:
842 f.seek(self._docket.index_end, os.SEEK_SET)
842 f.seek(self._docket.index_end, os.SEEK_SET)
843 return f
843 return f
844 except FileNotFoundError:
844 except FileNotFoundError:
845 return self.opener(
845 return self.opener(
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
847 )
847 )
848
848
849 def __index_new_fp(self):
849 def __index_new_fp(self):
850 # You should not use this unless you are upgrading from inline revlog
850 # You should not use this unless you are upgrading from inline revlog
851 return self.opener(
851 return self.opener(
852 self._indexfile,
852 self._indexfile,
853 mode=b"w",
853 mode=b"w",
854 checkambig=self._checkambig,
854 checkambig=self._checkambig,
855 atomictemp=True,
855 atomictemp=True,
856 )
856 )
857
857
858 def _datafp(self, mode=b'r'):
858 def _datafp(self, mode=b'r'):
859 """file object for the revlog's data file"""
859 """file object for the revlog's data file"""
860 return self.opener(self._datafile, mode=mode)
860 return self.opener(self._datafile, mode=mode)
861
861
862 @contextlib.contextmanager
862 @contextlib.contextmanager
863 def _sidedatareadfp(self):
863 def _sidedatareadfp(self):
864 """file object suitable to read sidedata"""
864 """file object suitable to read sidedata"""
865 if self._writinghandles:
865 if self._writinghandles:
866 yield self._writinghandles[2]
866 yield self._writinghandles[2]
867 else:
867 else:
868 with self.opener(self._sidedatafile) as fp:
868 with self.opener(self._sidedatafile) as fp:
869 yield fp
869 yield fp
870
870
871 def tiprev(self):
871 def tiprev(self):
872 return len(self.index) - 1
872 return len(self.index) - 1
873
873
874 def tip(self):
874 def tip(self):
875 return self.node(self.tiprev())
875 return self.node(self.tiprev())
876
876
877 def __contains__(self, rev):
877 def __contains__(self, rev):
878 return 0 <= rev < len(self)
878 return 0 <= rev < len(self)
879
879
880 def __len__(self):
880 def __len__(self):
881 return len(self.index)
881 return len(self.index)
882
882
883 def __iter__(self):
883 def __iter__(self):
884 return iter(range(len(self)))
884 return iter(range(len(self)))
885
885
886 def revs(self, start=0, stop=None):
886 def revs(self, start=0, stop=None):
887 """iterate over all rev in this revlog (from start to stop)"""
887 """iterate over all rev in this revlog (from start to stop)"""
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
889
889
890 def hasnode(self, node):
890 def hasnode(self, node):
891 try:
891 try:
892 self.rev(node)
892 self.rev(node)
893 return True
893 return True
894 except KeyError:
894 except KeyError:
895 return False
895 return False
896
896
897 def candelta(self, baserev, rev):
897 def _candelta(self, baserev, rev):
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
899 # Disable delta if either rev requires a content-changing flag
899 # Disable delta if either rev requires a content-changing flag
900 # processor (ex. LFS). This is because such flag processor can alter
900 # processor (ex. LFS). This is because such flag processor can alter
901 # the rawtext content that the delta will be based on, and two clients
901 # the rawtext content that the delta will be based on, and two clients
902 # could have a same revlog node with different flags (i.e. different
902 # could have a same revlog node with different flags (i.e. different
903 # rawtext contents) and the delta could be incompatible.
903 # rawtext contents) and the delta could be incompatible.
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
906 ):
906 ):
907 return False
907 return False
908 return True
908 return True
909
909
910 def update_caches(self, transaction):
910 def update_caches(self, transaction):
911 if self._nodemap_file is not None:
911 if self._nodemap_file is not None:
912 if transaction is None:
912 if transaction is None:
913 nodemaputil.update_persistent_nodemap(self)
913 nodemaputil.update_persistent_nodemap(self)
914 else:
914 else:
915 nodemaputil.setup_persistent_nodemap(transaction, self)
915 nodemaputil.setup_persistent_nodemap(transaction, self)
916
916
917 def clearcaches(self):
917 def clearcaches(self):
918 self._revisioncache = None
918 self._revisioncache = None
919 self._chainbasecache.clear()
919 self._chainbasecache.clear()
920 self._segmentfile.clear_cache()
920 self._segmentfile.clear_cache()
921 self._segmentfile_sidedata.clear_cache()
921 self._segmentfile_sidedata.clear_cache()
922 self._pcache = {}
922 self._pcache = {}
923 self._nodemap_docket = None
923 self._nodemap_docket = None
924 self.index.clearcaches()
924 self.index.clearcaches()
925 # The python code is the one responsible for validating the docket, we
925 # The python code is the one responsible for validating the docket, we
926 # end up having to refresh it here.
926 # end up having to refresh it here.
927 use_nodemap = (
927 use_nodemap = (
928 not self._inline
928 not self._inline
929 and self._nodemap_file is not None
929 and self._nodemap_file is not None
930 and hasattr(self.index, 'update_nodemap_data')
930 and hasattr(self.index, 'update_nodemap_data')
931 )
931 )
932 if use_nodemap:
932 if use_nodemap:
933 nodemap_data = nodemaputil.persisted_data(self)
933 nodemap_data = nodemaputil.persisted_data(self)
934 if nodemap_data is not None:
934 if nodemap_data is not None:
935 self._nodemap_docket = nodemap_data[0]
935 self._nodemap_docket = nodemap_data[0]
936 self.index.update_nodemap_data(*nodemap_data)
936 self.index.update_nodemap_data(*nodemap_data)
937
937
938 def rev(self, node):
938 def rev(self, node):
939 try:
939 try:
940 return self.index.rev(node)
940 return self.index.rev(node)
941 except TypeError:
941 except TypeError:
942 raise
942 raise
943 except error.RevlogError:
943 except error.RevlogError:
944 # parsers.c radix tree lookup failed
944 # parsers.c radix tree lookup failed
945 if (
945 if (
946 node == self.nodeconstants.wdirid
946 node == self.nodeconstants.wdirid
947 or node in self.nodeconstants.wdirfilenodeids
947 or node in self.nodeconstants.wdirfilenodeids
948 ):
948 ):
949 raise error.WdirUnsupported
949 raise error.WdirUnsupported
950 raise error.LookupError(node, self.display_id, _(b'no node'))
950 raise error.LookupError(node, self.display_id, _(b'no node'))
951
951
952 # Accessors for index entries.
952 # Accessors for index entries.
953
953
954 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
954 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
955 # are flags.
955 # are flags.
956 def start(self, rev):
956 def start(self, rev):
957 return int(self.index[rev][0] >> 16)
957 return int(self.index[rev][0] >> 16)
958
958
959 def sidedata_cut_off(self, rev):
959 def sidedata_cut_off(self, rev):
960 sd_cut_off = self.index[rev][8]
960 sd_cut_off = self.index[rev][8]
961 if sd_cut_off != 0:
961 if sd_cut_off != 0:
962 return sd_cut_off
962 return sd_cut_off
963 # This is some annoying dance, because entries without sidedata
963 # This is some annoying dance, because entries without sidedata
964 # currently use 0 as their ofsset. (instead of previous-offset +
964 # currently use 0 as their ofsset. (instead of previous-offset +
965 # previous-size)
965 # previous-size)
966 #
966 #
967 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
967 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
968 # In the meantime, we need this.
968 # In the meantime, we need this.
969 while 0 <= rev:
969 while 0 <= rev:
970 e = self.index[rev]
970 e = self.index[rev]
971 if e[9] != 0:
971 if e[9] != 0:
972 return e[8] + e[9]
972 return e[8] + e[9]
973 rev -= 1
973 rev -= 1
974 return 0
974 return 0
975
975
976 def flags(self, rev):
976 def flags(self, rev):
977 return self.index[rev][0] & 0xFFFF
977 return self.index[rev][0] & 0xFFFF
978
978
979 def length(self, rev):
979 def length(self, rev):
980 return self.index[rev][1]
980 return self.index[rev][1]
981
981
982 def sidedata_length(self, rev):
982 def sidedata_length(self, rev):
983 if not self.hassidedata:
983 if not self.hassidedata:
984 return 0
984 return 0
985 return self.index[rev][9]
985 return self.index[rev][9]
986
986
987 def rawsize(self, rev):
987 def rawsize(self, rev):
988 """return the length of the uncompressed text for a given revision"""
988 """return the length of the uncompressed text for a given revision"""
989 l = self.index[rev][2]
989 l = self.index[rev][2]
990 if l >= 0:
990 if l >= 0:
991 return l
991 return l
992
992
993 t = self.rawdata(rev)
993 t = self.rawdata(rev)
994 return len(t)
994 return len(t)
995
995
996 def size(self, rev):
996 def size(self, rev):
997 """length of non-raw text (processed by a "read" flag processor)"""
997 """length of non-raw text (processed by a "read" flag processor)"""
998 # fast path: if no "read" flag processor could change the content,
998 # fast path: if no "read" flag processor could change the content,
999 # size is rawsize. note: ELLIPSIS is known to not change the content.
999 # size is rawsize. note: ELLIPSIS is known to not change the content.
1000 flags = self.flags(rev)
1000 flags = self.flags(rev)
1001 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1001 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1002 return self.rawsize(rev)
1002 return self.rawsize(rev)
1003
1003
1004 return len(self.revision(rev))
1004 return len(self.revision(rev))
1005
1005
1006 def fast_rank(self, rev):
1006 def fast_rank(self, rev):
1007 """Return the rank of a revision if already known, or None otherwise.
1007 """Return the rank of a revision if already known, or None otherwise.
1008
1008
1009 The rank of a revision is the size of the sub-graph it defines as a
1009 The rank of a revision is the size of the sub-graph it defines as a
1010 head. Equivalently, the rank of a revision `r` is the size of the set
1010 head. Equivalently, the rank of a revision `r` is the size of the set
1011 `ancestors(r)`, `r` included.
1011 `ancestors(r)`, `r` included.
1012
1012
1013 This method returns the rank retrieved from the revlog in constant
1013 This method returns the rank retrieved from the revlog in constant
1014 time. It makes no attempt at computing unknown values for versions of
1014 time. It makes no attempt at computing unknown values for versions of
1015 the revlog which do not persist the rank.
1015 the revlog which do not persist the rank.
1016 """
1016 """
1017 rank = self.index[rev][ENTRY_RANK]
1017 rank = self.index[rev][ENTRY_RANK]
1018 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1018 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1019 return None
1019 return None
1020 if rev == nullrev:
1020 if rev == nullrev:
1021 return 0 # convention
1021 return 0 # convention
1022 return rank
1022 return rank
1023
1023
1024 def chainbase(self, rev):
1024 def chainbase(self, rev):
1025 base = self._chainbasecache.get(rev)
1025 base = self._chainbasecache.get(rev)
1026 if base is not None:
1026 if base is not None:
1027 return base
1027 return base
1028
1028
1029 index = self.index
1029 index = self.index
1030 iterrev = rev
1030 iterrev = rev
1031 base = index[iterrev][3]
1031 base = index[iterrev][3]
1032 while base != iterrev:
1032 while base != iterrev:
1033 iterrev = base
1033 iterrev = base
1034 base = index[iterrev][3]
1034 base = index[iterrev][3]
1035
1035
1036 self._chainbasecache[rev] = base
1036 self._chainbasecache[rev] = base
1037 return base
1037 return base
1038
1038
1039 def linkrev(self, rev):
1039 def linkrev(self, rev):
1040 return self.index[rev][4]
1040 return self.index[rev][4]
1041
1041
1042 def parentrevs(self, rev):
1042 def parentrevs(self, rev):
1043 try:
1043 try:
1044 entry = self.index[rev]
1044 entry = self.index[rev]
1045 except IndexError:
1045 except IndexError:
1046 if rev == wdirrev:
1046 if rev == wdirrev:
1047 raise error.WdirUnsupported
1047 raise error.WdirUnsupported
1048 raise
1048 raise
1049
1049
1050 if self.canonical_parent_order and entry[5] == nullrev:
1050 if self.canonical_parent_order and entry[5] == nullrev:
1051 return entry[6], entry[5]
1051 return entry[6], entry[5]
1052 else:
1052 else:
1053 return entry[5], entry[6]
1053 return entry[5], entry[6]
1054
1054
1055 # fast parentrevs(rev) where rev isn't filtered
1055 # fast parentrevs(rev) where rev isn't filtered
1056 _uncheckedparentrevs = parentrevs
1056 _uncheckedparentrevs = parentrevs
1057
1057
1058 def node(self, rev):
1058 def node(self, rev):
1059 try:
1059 try:
1060 return self.index[rev][7]
1060 return self.index[rev][7]
1061 except IndexError:
1061 except IndexError:
1062 if rev == wdirrev:
1062 if rev == wdirrev:
1063 raise error.WdirUnsupported
1063 raise error.WdirUnsupported
1064 raise
1064 raise
1065
1065
1066 # Derived from index values.
1066 # Derived from index values.
1067
1067
1068 def end(self, rev):
1068 def end(self, rev):
1069 return self.start(rev) + self.length(rev)
1069 return self.start(rev) + self.length(rev)
1070
1070
1071 def parents(self, node):
1071 def parents(self, node):
1072 i = self.index
1072 i = self.index
1073 d = i[self.rev(node)]
1073 d = i[self.rev(node)]
1074 # inline node() to avoid function call overhead
1074 # inline node() to avoid function call overhead
1075 if self.canonical_parent_order and d[5] == self.nullid:
1075 if self.canonical_parent_order and d[5] == self.nullid:
1076 return i[d[6]][7], i[d[5]][7]
1076 return i[d[6]][7], i[d[5]][7]
1077 else:
1077 else:
1078 return i[d[5]][7], i[d[6]][7]
1078 return i[d[5]][7], i[d[6]][7]
1079
1079
1080 def chainlen(self, rev):
1080 def chainlen(self, rev):
1081 return self._chaininfo(rev)[0]
1081 return self._chaininfo(rev)[0]
1082
1082
1083 def _chaininfo(self, rev):
1083 def _chaininfo(self, rev):
1084 chaininfocache = self._chaininfocache
1084 chaininfocache = self._chaininfocache
1085 if rev in chaininfocache:
1085 if rev in chaininfocache:
1086 return chaininfocache[rev]
1086 return chaininfocache[rev]
1087 index = self.index
1087 index = self.index
1088 generaldelta = self._generaldelta
1088 generaldelta = self._generaldelta
1089 iterrev = rev
1089 iterrev = rev
1090 e = index[iterrev]
1090 e = index[iterrev]
1091 clen = 0
1091 clen = 0
1092 compresseddeltalen = 0
1092 compresseddeltalen = 0
1093 while iterrev != e[3]:
1093 while iterrev != e[3]:
1094 clen += 1
1094 clen += 1
1095 compresseddeltalen += e[1]
1095 compresseddeltalen += e[1]
1096 if generaldelta:
1096 if generaldelta:
1097 iterrev = e[3]
1097 iterrev = e[3]
1098 else:
1098 else:
1099 iterrev -= 1
1099 iterrev -= 1
1100 if iterrev in chaininfocache:
1100 if iterrev in chaininfocache:
1101 t = chaininfocache[iterrev]
1101 t = chaininfocache[iterrev]
1102 clen += t[0]
1102 clen += t[0]
1103 compresseddeltalen += t[1]
1103 compresseddeltalen += t[1]
1104 break
1104 break
1105 e = index[iterrev]
1105 e = index[iterrev]
1106 else:
1106 else:
1107 # Add text length of base since decompressing that also takes
1107 # Add text length of base since decompressing that also takes
1108 # work. For cache hits the length is already included.
1108 # work. For cache hits the length is already included.
1109 compresseddeltalen += e[1]
1109 compresseddeltalen += e[1]
1110 r = (clen, compresseddeltalen)
1110 r = (clen, compresseddeltalen)
1111 chaininfocache[rev] = r
1111 chaininfocache[rev] = r
1112 return r
1112 return r
1113
1113
1114 def _deltachain(self, rev, stoprev=None):
1114 def _deltachain(self, rev, stoprev=None):
1115 """Obtain the delta chain for a revision.
1115 """Obtain the delta chain for a revision.
1116
1116
1117 ``stoprev`` specifies a revision to stop at. If not specified, we
1117 ``stoprev`` specifies a revision to stop at. If not specified, we
1118 stop at the base of the chain.
1118 stop at the base of the chain.
1119
1119
1120 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1120 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1121 revs in ascending order and ``stopped`` is a bool indicating whether
1121 revs in ascending order and ``stopped`` is a bool indicating whether
1122 ``stoprev`` was hit.
1122 ``stoprev`` was hit.
1123 """
1123 """
1124 # Try C implementation.
1124 # Try C implementation.
1125 try:
1125 try:
1126 return self.index.deltachain(rev, stoprev, self._generaldelta)
1126 return self.index.deltachain(rev, stoprev, self._generaldelta)
1127 except AttributeError:
1127 except AttributeError:
1128 pass
1128 pass
1129
1129
1130 chain = []
1130 chain = []
1131
1131
1132 # Alias to prevent attribute lookup in tight loop.
1132 # Alias to prevent attribute lookup in tight loop.
1133 index = self.index
1133 index = self.index
1134 generaldelta = self._generaldelta
1134 generaldelta = self._generaldelta
1135
1135
1136 iterrev = rev
1136 iterrev = rev
1137 e = index[iterrev]
1137 e = index[iterrev]
1138 while iterrev != e[3] and iterrev != stoprev:
1138 while iterrev != e[3] and iterrev != stoprev:
1139 chain.append(iterrev)
1139 chain.append(iterrev)
1140 if generaldelta:
1140 if generaldelta:
1141 iterrev = e[3]
1141 iterrev = e[3]
1142 else:
1142 else:
1143 iterrev -= 1
1143 iterrev -= 1
1144 e = index[iterrev]
1144 e = index[iterrev]
1145
1145
1146 if iterrev == stoprev:
1146 if iterrev == stoprev:
1147 stopped = True
1147 stopped = True
1148 else:
1148 else:
1149 chain.append(iterrev)
1149 chain.append(iterrev)
1150 stopped = False
1150 stopped = False
1151
1151
1152 chain.reverse()
1152 chain.reverse()
1153 return chain, stopped
1153 return chain, stopped
1154
1154
1155 def ancestors(self, revs, stoprev=0, inclusive=False):
1155 def ancestors(self, revs, stoprev=0, inclusive=False):
1156 """Generate the ancestors of 'revs' in reverse revision order.
1156 """Generate the ancestors of 'revs' in reverse revision order.
1157 Does not generate revs lower than stoprev.
1157 Does not generate revs lower than stoprev.
1158
1158
1159 See the documentation for ancestor.lazyancestors for more details."""
1159 See the documentation for ancestor.lazyancestors for more details."""
1160
1160
1161 # first, make sure start revisions aren't filtered
1161 # first, make sure start revisions aren't filtered
1162 revs = list(revs)
1162 revs = list(revs)
1163 checkrev = self.node
1163 checkrev = self.node
1164 for r in revs:
1164 for r in revs:
1165 checkrev(r)
1165 checkrev(r)
1166 # and we're sure ancestors aren't filtered as well
1166 # and we're sure ancestors aren't filtered as well
1167
1167
1168 if rustancestor is not None and self.index.rust_ext_compat:
1168 if rustancestor is not None and self.index.rust_ext_compat:
1169 lazyancestors = rustancestor.LazyAncestors
1169 lazyancestors = rustancestor.LazyAncestors
1170 arg = self.index
1170 arg = self.index
1171 else:
1171 else:
1172 lazyancestors = ancestor.lazyancestors
1172 lazyancestors = ancestor.lazyancestors
1173 arg = self._uncheckedparentrevs
1173 arg = self._uncheckedparentrevs
1174 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1174 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1175
1175
1176 def descendants(self, revs):
1176 def descendants(self, revs):
1177 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1177 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1178
1178
1179 def findcommonmissing(self, common=None, heads=None):
1179 def findcommonmissing(self, common=None, heads=None):
1180 """Return a tuple of the ancestors of common and the ancestors of heads
1180 """Return a tuple of the ancestors of common and the ancestors of heads
1181 that are not ancestors of common. In revset terminology, we return the
1181 that are not ancestors of common. In revset terminology, we return the
1182 tuple:
1182 tuple:
1183
1183
1184 ::common, (::heads) - (::common)
1184 ::common, (::heads) - (::common)
1185
1185
1186 The list is sorted by revision number, meaning it is
1186 The list is sorted by revision number, meaning it is
1187 topologically sorted.
1187 topologically sorted.
1188
1188
1189 'heads' and 'common' are both lists of node IDs. If heads is
1189 'heads' and 'common' are both lists of node IDs. If heads is
1190 not supplied, uses all of the revlog's heads. If common is not
1190 not supplied, uses all of the revlog's heads. If common is not
1191 supplied, uses nullid."""
1191 supplied, uses nullid."""
1192 if common is None:
1192 if common is None:
1193 common = [self.nullid]
1193 common = [self.nullid]
1194 if heads is None:
1194 if heads is None:
1195 heads = self.heads()
1195 heads = self.heads()
1196
1196
1197 common = [self.rev(n) for n in common]
1197 common = [self.rev(n) for n in common]
1198 heads = [self.rev(n) for n in heads]
1198 heads = [self.rev(n) for n in heads]
1199
1199
1200 # we want the ancestors, but inclusive
1200 # we want the ancestors, but inclusive
1201 class lazyset:
1201 class lazyset:
1202 def __init__(self, lazyvalues):
1202 def __init__(self, lazyvalues):
1203 self.addedvalues = set()
1203 self.addedvalues = set()
1204 self.lazyvalues = lazyvalues
1204 self.lazyvalues = lazyvalues
1205
1205
1206 def __contains__(self, value):
1206 def __contains__(self, value):
1207 return value in self.addedvalues or value in self.lazyvalues
1207 return value in self.addedvalues or value in self.lazyvalues
1208
1208
1209 def __iter__(self):
1209 def __iter__(self):
1210 added = self.addedvalues
1210 added = self.addedvalues
1211 for r in added:
1211 for r in added:
1212 yield r
1212 yield r
1213 for r in self.lazyvalues:
1213 for r in self.lazyvalues:
1214 if not r in added:
1214 if not r in added:
1215 yield r
1215 yield r
1216
1216
1217 def add(self, value):
1217 def add(self, value):
1218 self.addedvalues.add(value)
1218 self.addedvalues.add(value)
1219
1219
1220 def update(self, values):
1220 def update(self, values):
1221 self.addedvalues.update(values)
1221 self.addedvalues.update(values)
1222
1222
1223 has = lazyset(self.ancestors(common))
1223 has = lazyset(self.ancestors(common))
1224 has.add(nullrev)
1224 has.add(nullrev)
1225 has.update(common)
1225 has.update(common)
1226
1226
1227 # take all ancestors from heads that aren't in has
1227 # take all ancestors from heads that aren't in has
1228 missing = set()
1228 missing = set()
1229 visit = collections.deque(r for r in heads if r not in has)
1229 visit = collections.deque(r for r in heads if r not in has)
1230 while visit:
1230 while visit:
1231 r = visit.popleft()
1231 r = visit.popleft()
1232 if r in missing:
1232 if r in missing:
1233 continue
1233 continue
1234 else:
1234 else:
1235 missing.add(r)
1235 missing.add(r)
1236 for p in self.parentrevs(r):
1236 for p in self.parentrevs(r):
1237 if p not in has:
1237 if p not in has:
1238 visit.append(p)
1238 visit.append(p)
1239 missing = list(missing)
1239 missing = list(missing)
1240 missing.sort()
1240 missing.sort()
1241 return has, [self.node(miss) for miss in missing]
1241 return has, [self.node(miss) for miss in missing]
1242
1242
1243 def incrementalmissingrevs(self, common=None):
1243 def incrementalmissingrevs(self, common=None):
1244 """Return an object that can be used to incrementally compute the
1244 """Return an object that can be used to incrementally compute the
1245 revision numbers of the ancestors of arbitrary sets that are not
1245 revision numbers of the ancestors of arbitrary sets that are not
1246 ancestors of common. This is an ancestor.incrementalmissingancestors
1246 ancestors of common. This is an ancestor.incrementalmissingancestors
1247 object.
1247 object.
1248
1248
1249 'common' is a list of revision numbers. If common is not supplied, uses
1249 'common' is a list of revision numbers. If common is not supplied, uses
1250 nullrev.
1250 nullrev.
1251 """
1251 """
1252 if common is None:
1252 if common is None:
1253 common = [nullrev]
1253 common = [nullrev]
1254
1254
1255 if rustancestor is not None and self.index.rust_ext_compat:
1255 if rustancestor is not None and self.index.rust_ext_compat:
1256 return rustancestor.MissingAncestors(self.index, common)
1256 return rustancestor.MissingAncestors(self.index, common)
1257 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1257 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1258
1258
1259 def findmissingrevs(self, common=None, heads=None):
1259 def findmissingrevs(self, common=None, heads=None):
1260 """Return the revision numbers of the ancestors of heads that
1260 """Return the revision numbers of the ancestors of heads that
1261 are not ancestors of common.
1261 are not ancestors of common.
1262
1262
1263 More specifically, return a list of revision numbers corresponding to
1263 More specifically, return a list of revision numbers corresponding to
1264 nodes N such that every N satisfies the following constraints:
1264 nodes N such that every N satisfies the following constraints:
1265
1265
1266 1. N is an ancestor of some node in 'heads'
1266 1. N is an ancestor of some node in 'heads'
1267 2. N is not an ancestor of any node in 'common'
1267 2. N is not an ancestor of any node in 'common'
1268
1268
1269 The list is sorted by revision number, meaning it is
1269 The list is sorted by revision number, meaning it is
1270 topologically sorted.
1270 topologically sorted.
1271
1271
1272 'heads' and 'common' are both lists of revision numbers. If heads is
1272 'heads' and 'common' are both lists of revision numbers. If heads is
1273 not supplied, uses all of the revlog's heads. If common is not
1273 not supplied, uses all of the revlog's heads. If common is not
1274 supplied, uses nullid."""
1274 supplied, uses nullid."""
1275 if common is None:
1275 if common is None:
1276 common = [nullrev]
1276 common = [nullrev]
1277 if heads is None:
1277 if heads is None:
1278 heads = self.headrevs()
1278 heads = self.headrevs()
1279
1279
1280 inc = self.incrementalmissingrevs(common=common)
1280 inc = self.incrementalmissingrevs(common=common)
1281 return inc.missingancestors(heads)
1281 return inc.missingancestors(heads)
1282
1282
1283 def findmissing(self, common=None, heads=None):
1283 def findmissing(self, common=None, heads=None):
1284 """Return the ancestors of heads that are not ancestors of common.
1284 """Return the ancestors of heads that are not ancestors of common.
1285
1285
1286 More specifically, return a list of nodes N such that every N
1286 More specifically, return a list of nodes N such that every N
1287 satisfies the following constraints:
1287 satisfies the following constraints:
1288
1288
1289 1. N is an ancestor of some node in 'heads'
1289 1. N is an ancestor of some node in 'heads'
1290 2. N is not an ancestor of any node in 'common'
1290 2. N is not an ancestor of any node in 'common'
1291
1291
1292 The list is sorted by revision number, meaning it is
1292 The list is sorted by revision number, meaning it is
1293 topologically sorted.
1293 topologically sorted.
1294
1294
1295 'heads' and 'common' are both lists of node IDs. If heads is
1295 'heads' and 'common' are both lists of node IDs. If heads is
1296 not supplied, uses all of the revlog's heads. If common is not
1296 not supplied, uses all of the revlog's heads. If common is not
1297 supplied, uses nullid."""
1297 supplied, uses nullid."""
1298 if common is None:
1298 if common is None:
1299 common = [self.nullid]
1299 common = [self.nullid]
1300 if heads is None:
1300 if heads is None:
1301 heads = self.heads()
1301 heads = self.heads()
1302
1302
1303 common = [self.rev(n) for n in common]
1303 common = [self.rev(n) for n in common]
1304 heads = [self.rev(n) for n in heads]
1304 heads = [self.rev(n) for n in heads]
1305
1305
1306 inc = self.incrementalmissingrevs(common=common)
1306 inc = self.incrementalmissingrevs(common=common)
1307 return [self.node(r) for r in inc.missingancestors(heads)]
1307 return [self.node(r) for r in inc.missingancestors(heads)]
1308
1308
1309 def nodesbetween(self, roots=None, heads=None):
1309 def nodesbetween(self, roots=None, heads=None):
1310 """Return a topological path from 'roots' to 'heads'.
1310 """Return a topological path from 'roots' to 'heads'.
1311
1311
1312 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1312 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1313 topologically sorted list of all nodes N that satisfy both of
1313 topologically sorted list of all nodes N that satisfy both of
1314 these constraints:
1314 these constraints:
1315
1315
1316 1. N is a descendant of some node in 'roots'
1316 1. N is a descendant of some node in 'roots'
1317 2. N is an ancestor of some node in 'heads'
1317 2. N is an ancestor of some node in 'heads'
1318
1318
1319 Every node is considered to be both a descendant and an ancestor
1319 Every node is considered to be both a descendant and an ancestor
1320 of itself, so every reachable node in 'roots' and 'heads' will be
1320 of itself, so every reachable node in 'roots' and 'heads' will be
1321 included in 'nodes'.
1321 included in 'nodes'.
1322
1322
1323 'outroots' is the list of reachable nodes in 'roots', i.e., the
1323 'outroots' is the list of reachable nodes in 'roots', i.e., the
1324 subset of 'roots' that is returned in 'nodes'. Likewise,
1324 subset of 'roots' that is returned in 'nodes'. Likewise,
1325 'outheads' is the subset of 'heads' that is also in 'nodes'.
1325 'outheads' is the subset of 'heads' that is also in 'nodes'.
1326
1326
1327 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1327 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1328 unspecified, uses nullid as the only root. If 'heads' is
1328 unspecified, uses nullid as the only root. If 'heads' is
1329 unspecified, uses list of all of the revlog's heads."""
1329 unspecified, uses list of all of the revlog's heads."""
1330 nonodes = ([], [], [])
1330 nonodes = ([], [], [])
1331 if roots is not None:
1331 if roots is not None:
1332 roots = list(roots)
1332 roots = list(roots)
1333 if not roots:
1333 if not roots:
1334 return nonodes
1334 return nonodes
1335 lowestrev = min([self.rev(n) for n in roots])
1335 lowestrev = min([self.rev(n) for n in roots])
1336 else:
1336 else:
1337 roots = [self.nullid] # Everybody's a descendant of nullid
1337 roots = [self.nullid] # Everybody's a descendant of nullid
1338 lowestrev = nullrev
1338 lowestrev = nullrev
1339 if (lowestrev == nullrev) and (heads is None):
1339 if (lowestrev == nullrev) and (heads is None):
1340 # We want _all_ the nodes!
1340 # We want _all_ the nodes!
1341 return (
1341 return (
1342 [self.node(r) for r in self],
1342 [self.node(r) for r in self],
1343 [self.nullid],
1343 [self.nullid],
1344 list(self.heads()),
1344 list(self.heads()),
1345 )
1345 )
1346 if heads is None:
1346 if heads is None:
1347 # All nodes are ancestors, so the latest ancestor is the last
1347 # All nodes are ancestors, so the latest ancestor is the last
1348 # node.
1348 # node.
1349 highestrev = len(self) - 1
1349 highestrev = len(self) - 1
1350 # Set ancestors to None to signal that every node is an ancestor.
1350 # Set ancestors to None to signal that every node is an ancestor.
1351 ancestors = None
1351 ancestors = None
1352 # Set heads to an empty dictionary for later discovery of heads
1352 # Set heads to an empty dictionary for later discovery of heads
1353 heads = {}
1353 heads = {}
1354 else:
1354 else:
1355 heads = list(heads)
1355 heads = list(heads)
1356 if not heads:
1356 if not heads:
1357 return nonodes
1357 return nonodes
1358 ancestors = set()
1358 ancestors = set()
1359 # Turn heads into a dictionary so we can remove 'fake' heads.
1359 # Turn heads into a dictionary so we can remove 'fake' heads.
1360 # Also, later we will be using it to filter out the heads we can't
1360 # Also, later we will be using it to filter out the heads we can't
1361 # find from roots.
1361 # find from roots.
1362 heads = dict.fromkeys(heads, False)
1362 heads = dict.fromkeys(heads, False)
1363 # Start at the top and keep marking parents until we're done.
1363 # Start at the top and keep marking parents until we're done.
1364 nodestotag = set(heads)
1364 nodestotag = set(heads)
1365 # Remember where the top was so we can use it as a limit later.
1365 # Remember where the top was so we can use it as a limit later.
1366 highestrev = max([self.rev(n) for n in nodestotag])
1366 highestrev = max([self.rev(n) for n in nodestotag])
1367 while nodestotag:
1367 while nodestotag:
1368 # grab a node to tag
1368 # grab a node to tag
1369 n = nodestotag.pop()
1369 n = nodestotag.pop()
1370 # Never tag nullid
1370 # Never tag nullid
1371 if n == self.nullid:
1371 if n == self.nullid:
1372 continue
1372 continue
1373 # A node's revision number represents its place in a
1373 # A node's revision number represents its place in a
1374 # topologically sorted list of nodes.
1374 # topologically sorted list of nodes.
1375 r = self.rev(n)
1375 r = self.rev(n)
1376 if r >= lowestrev:
1376 if r >= lowestrev:
1377 if n not in ancestors:
1377 if n not in ancestors:
1378 # If we are possibly a descendant of one of the roots
1378 # If we are possibly a descendant of one of the roots
1379 # and we haven't already been marked as an ancestor
1379 # and we haven't already been marked as an ancestor
1380 ancestors.add(n) # Mark as ancestor
1380 ancestors.add(n) # Mark as ancestor
1381 # Add non-nullid parents to list of nodes to tag.
1381 # Add non-nullid parents to list of nodes to tag.
1382 nodestotag.update(
1382 nodestotag.update(
1383 [p for p in self.parents(n) if p != self.nullid]
1383 [p for p in self.parents(n) if p != self.nullid]
1384 )
1384 )
1385 elif n in heads: # We've seen it before, is it a fake head?
1385 elif n in heads: # We've seen it before, is it a fake head?
1386 # So it is, real heads should not be the ancestors of
1386 # So it is, real heads should not be the ancestors of
1387 # any other heads.
1387 # any other heads.
1388 heads.pop(n)
1388 heads.pop(n)
1389 if not ancestors:
1389 if not ancestors:
1390 return nonodes
1390 return nonodes
1391 # Now that we have our set of ancestors, we want to remove any
1391 # Now that we have our set of ancestors, we want to remove any
1392 # roots that are not ancestors.
1392 # roots that are not ancestors.
1393
1393
1394 # If one of the roots was nullid, everything is included anyway.
1394 # If one of the roots was nullid, everything is included anyway.
1395 if lowestrev > nullrev:
1395 if lowestrev > nullrev:
1396 # But, since we weren't, let's recompute the lowest rev to not
1396 # But, since we weren't, let's recompute the lowest rev to not
1397 # include roots that aren't ancestors.
1397 # include roots that aren't ancestors.
1398
1398
1399 # Filter out roots that aren't ancestors of heads
1399 # Filter out roots that aren't ancestors of heads
1400 roots = [root for root in roots if root in ancestors]
1400 roots = [root for root in roots if root in ancestors]
1401 # Recompute the lowest revision
1401 # Recompute the lowest revision
1402 if roots:
1402 if roots:
1403 lowestrev = min([self.rev(root) for root in roots])
1403 lowestrev = min([self.rev(root) for root in roots])
1404 else:
1404 else:
1405 # No more roots? Return empty list
1405 # No more roots? Return empty list
1406 return nonodes
1406 return nonodes
1407 else:
1407 else:
1408 # We are descending from nullid, and don't need to care about
1408 # We are descending from nullid, and don't need to care about
1409 # any other roots.
1409 # any other roots.
1410 lowestrev = nullrev
1410 lowestrev = nullrev
1411 roots = [self.nullid]
1411 roots = [self.nullid]
1412 # Transform our roots list into a set.
1412 # Transform our roots list into a set.
1413 descendants = set(roots)
1413 descendants = set(roots)
1414 # Also, keep the original roots so we can filter out roots that aren't
1414 # Also, keep the original roots so we can filter out roots that aren't
1415 # 'real' roots (i.e. are descended from other roots).
1415 # 'real' roots (i.e. are descended from other roots).
1416 roots = descendants.copy()
1416 roots = descendants.copy()
1417 # Our topologically sorted list of output nodes.
1417 # Our topologically sorted list of output nodes.
1418 orderedout = []
1418 orderedout = []
1419 # Don't start at nullid since we don't want nullid in our output list,
1419 # Don't start at nullid since we don't want nullid in our output list,
1420 # and if nullid shows up in descendants, empty parents will look like
1420 # and if nullid shows up in descendants, empty parents will look like
1421 # they're descendants.
1421 # they're descendants.
1422 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1422 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1423 n = self.node(r)
1423 n = self.node(r)
1424 isdescendant = False
1424 isdescendant = False
1425 if lowestrev == nullrev: # Everybody is a descendant of nullid
1425 if lowestrev == nullrev: # Everybody is a descendant of nullid
1426 isdescendant = True
1426 isdescendant = True
1427 elif n in descendants:
1427 elif n in descendants:
1428 # n is already a descendant
1428 # n is already a descendant
1429 isdescendant = True
1429 isdescendant = True
1430 # This check only needs to be done here because all the roots
1430 # This check only needs to be done here because all the roots
1431 # will start being marked is descendants before the loop.
1431 # will start being marked is descendants before the loop.
1432 if n in roots:
1432 if n in roots:
1433 # If n was a root, check if it's a 'real' root.
1433 # If n was a root, check if it's a 'real' root.
1434 p = tuple(self.parents(n))
1434 p = tuple(self.parents(n))
1435 # If any of its parents are descendants, it's not a root.
1435 # If any of its parents are descendants, it's not a root.
1436 if (p[0] in descendants) or (p[1] in descendants):
1436 if (p[0] in descendants) or (p[1] in descendants):
1437 roots.remove(n)
1437 roots.remove(n)
1438 else:
1438 else:
1439 p = tuple(self.parents(n))
1439 p = tuple(self.parents(n))
1440 # A node is a descendant if either of its parents are
1440 # A node is a descendant if either of its parents are
1441 # descendants. (We seeded the dependents list with the roots
1441 # descendants. (We seeded the dependents list with the roots
1442 # up there, remember?)
1442 # up there, remember?)
1443 if (p[0] in descendants) or (p[1] in descendants):
1443 if (p[0] in descendants) or (p[1] in descendants):
1444 descendants.add(n)
1444 descendants.add(n)
1445 isdescendant = True
1445 isdescendant = True
1446 if isdescendant and ((ancestors is None) or (n in ancestors)):
1446 if isdescendant and ((ancestors is None) or (n in ancestors)):
1447 # Only include nodes that are both descendants and ancestors.
1447 # Only include nodes that are both descendants and ancestors.
1448 orderedout.append(n)
1448 orderedout.append(n)
1449 if (ancestors is not None) and (n in heads):
1449 if (ancestors is not None) and (n in heads):
1450 # We're trying to figure out which heads are reachable
1450 # We're trying to figure out which heads are reachable
1451 # from roots.
1451 # from roots.
1452 # Mark this head as having been reached
1452 # Mark this head as having been reached
1453 heads[n] = True
1453 heads[n] = True
1454 elif ancestors is None:
1454 elif ancestors is None:
1455 # Otherwise, we're trying to discover the heads.
1455 # Otherwise, we're trying to discover the heads.
1456 # Assume this is a head because if it isn't, the next step
1456 # Assume this is a head because if it isn't, the next step
1457 # will eventually remove it.
1457 # will eventually remove it.
1458 heads[n] = True
1458 heads[n] = True
1459 # But, obviously its parents aren't.
1459 # But, obviously its parents aren't.
1460 for p in self.parents(n):
1460 for p in self.parents(n):
1461 heads.pop(p, None)
1461 heads.pop(p, None)
1462 heads = [head for head, flag in heads.items() if flag]
1462 heads = [head for head, flag in heads.items() if flag]
1463 roots = list(roots)
1463 roots = list(roots)
1464 assert orderedout
1464 assert orderedout
1465 assert roots
1465 assert roots
1466 assert heads
1466 assert heads
1467 return (orderedout, roots, heads)
1467 return (orderedout, roots, heads)
1468
1468
1469 def headrevs(self, revs=None):
1469 def headrevs(self, revs=None):
1470 if revs is None:
1470 if revs is None:
1471 try:
1471 try:
1472 return self.index.headrevs()
1472 return self.index.headrevs()
1473 except AttributeError:
1473 except AttributeError:
1474 return self._headrevs()
1474 return self._headrevs()
1475 if rustdagop is not None and self.index.rust_ext_compat:
1475 if rustdagop is not None and self.index.rust_ext_compat:
1476 return rustdagop.headrevs(self.index, revs)
1476 return rustdagop.headrevs(self.index, revs)
1477 return dagop.headrevs(revs, self._uncheckedparentrevs)
1477 return dagop.headrevs(revs, self._uncheckedparentrevs)
1478
1478
1479 def computephases(self, roots):
1479 def computephases(self, roots):
1480 return self.index.computephasesmapsets(roots)
1480 return self.index.computephasesmapsets(roots)
1481
1481
1482 def _headrevs(self):
1482 def _headrevs(self):
1483 count = len(self)
1483 count = len(self)
1484 if not count:
1484 if not count:
1485 return [nullrev]
1485 return [nullrev]
1486 # we won't iter over filtered rev so nobody is a head at start
1486 # we won't iter over filtered rev so nobody is a head at start
1487 ishead = [0] * (count + 1)
1487 ishead = [0] * (count + 1)
1488 index = self.index
1488 index = self.index
1489 for r in self:
1489 for r in self:
1490 ishead[r] = 1 # I may be an head
1490 ishead[r] = 1 # I may be an head
1491 e = index[r]
1491 e = index[r]
1492 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1492 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1493 return [r for r, val in enumerate(ishead) if val]
1493 return [r for r, val in enumerate(ishead) if val]
1494
1494
1495 def heads(self, start=None, stop=None):
1495 def heads(self, start=None, stop=None):
1496 """return the list of all nodes that have no children
1496 """return the list of all nodes that have no children
1497
1497
1498 if start is specified, only heads that are descendants of
1498 if start is specified, only heads that are descendants of
1499 start will be returned
1499 start will be returned
1500 if stop is specified, it will consider all the revs from stop
1500 if stop is specified, it will consider all the revs from stop
1501 as if they had no children
1501 as if they had no children
1502 """
1502 """
1503 if start is None and stop is None:
1503 if start is None and stop is None:
1504 if not len(self):
1504 if not len(self):
1505 return [self.nullid]
1505 return [self.nullid]
1506 return [self.node(r) for r in self.headrevs()]
1506 return [self.node(r) for r in self.headrevs()]
1507
1507
1508 if start is None:
1508 if start is None:
1509 start = nullrev
1509 start = nullrev
1510 else:
1510 else:
1511 start = self.rev(start)
1511 start = self.rev(start)
1512
1512
1513 stoprevs = {self.rev(n) for n in stop or []}
1513 stoprevs = {self.rev(n) for n in stop or []}
1514
1514
1515 revs = dagop.headrevssubset(
1515 revs = dagop.headrevssubset(
1516 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1516 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1517 )
1517 )
1518
1518
1519 return [self.node(rev) for rev in revs]
1519 return [self.node(rev) for rev in revs]
1520
1520
1521 def children(self, node):
1521 def children(self, node):
1522 """find the children of a given node"""
1522 """find the children of a given node"""
1523 c = []
1523 c = []
1524 p = self.rev(node)
1524 p = self.rev(node)
1525 for r in self.revs(start=p + 1):
1525 for r in self.revs(start=p + 1):
1526 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1526 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1527 if prevs:
1527 if prevs:
1528 for pr in prevs:
1528 for pr in prevs:
1529 if pr == p:
1529 if pr == p:
1530 c.append(self.node(r))
1530 c.append(self.node(r))
1531 elif p == nullrev:
1531 elif p == nullrev:
1532 c.append(self.node(r))
1532 c.append(self.node(r))
1533 return c
1533 return c
1534
1534
1535 def commonancestorsheads(self, a, b):
1535 def commonancestorsheads(self, a, b):
1536 """calculate all the heads of the common ancestors of nodes a and b"""
1536 """calculate all the heads of the common ancestors of nodes a and b"""
1537 a, b = self.rev(a), self.rev(b)
1537 a, b = self.rev(a), self.rev(b)
1538 ancs = self._commonancestorsheads(a, b)
1538 ancs = self._commonancestorsheads(a, b)
1539 return pycompat.maplist(self.node, ancs)
1539 return pycompat.maplist(self.node, ancs)
1540
1540
1541 def _commonancestorsheads(self, *revs):
1541 def _commonancestorsheads(self, *revs):
1542 """calculate all the heads of the common ancestors of revs"""
1542 """calculate all the heads of the common ancestors of revs"""
1543 try:
1543 try:
1544 ancs = self.index.commonancestorsheads(*revs)
1544 ancs = self.index.commonancestorsheads(*revs)
1545 except (AttributeError, OverflowError): # C implementation failed
1545 except (AttributeError, OverflowError): # C implementation failed
1546 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1546 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1547 return ancs
1547 return ancs
1548
1548
1549 def isancestor(self, a, b):
1549 def isancestor(self, a, b):
1550 """return True if node a is an ancestor of node b
1550 """return True if node a is an ancestor of node b
1551
1551
1552 A revision is considered an ancestor of itself."""
1552 A revision is considered an ancestor of itself."""
1553 a, b = self.rev(a), self.rev(b)
1553 a, b = self.rev(a), self.rev(b)
1554 return self.isancestorrev(a, b)
1554 return self.isancestorrev(a, b)
1555
1555
1556 def isancestorrev(self, a, b):
1556 def isancestorrev(self, a, b):
1557 """return True if revision a is an ancestor of revision b
1557 """return True if revision a is an ancestor of revision b
1558
1558
1559 A revision is considered an ancestor of itself.
1559 A revision is considered an ancestor of itself.
1560
1560
1561 The implementation of this is trivial but the use of
1561 The implementation of this is trivial but the use of
1562 reachableroots is not."""
1562 reachableroots is not."""
1563 if a == nullrev:
1563 if a == nullrev:
1564 return True
1564 return True
1565 elif a == b:
1565 elif a == b:
1566 return True
1566 return True
1567 elif a > b:
1567 elif a > b:
1568 return False
1568 return False
1569 return bool(self.reachableroots(a, [b], [a], includepath=False))
1569 return bool(self.reachableroots(a, [b], [a], includepath=False))
1570
1570
1571 def reachableroots(self, minroot, heads, roots, includepath=False):
1571 def reachableroots(self, minroot, heads, roots, includepath=False):
1572 """return (heads(::(<roots> and <roots>::<heads>)))
1572 """return (heads(::(<roots> and <roots>::<heads>)))
1573
1573
1574 If includepath is True, return (<roots>::<heads>)."""
1574 If includepath is True, return (<roots>::<heads>)."""
1575 try:
1575 try:
1576 return self.index.reachableroots2(
1576 return self.index.reachableroots2(
1577 minroot, heads, roots, includepath
1577 minroot, heads, roots, includepath
1578 )
1578 )
1579 except AttributeError:
1579 except AttributeError:
1580 return dagop._reachablerootspure(
1580 return dagop._reachablerootspure(
1581 self.parentrevs, minroot, roots, heads, includepath
1581 self.parentrevs, minroot, roots, heads, includepath
1582 )
1582 )
1583
1583
1584 def ancestor(self, a, b):
1584 def ancestor(self, a, b):
1585 """calculate the "best" common ancestor of nodes a and b"""
1585 """calculate the "best" common ancestor of nodes a and b"""
1586
1586
1587 a, b = self.rev(a), self.rev(b)
1587 a, b = self.rev(a), self.rev(b)
1588 try:
1588 try:
1589 ancs = self.index.ancestors(a, b)
1589 ancs = self.index.ancestors(a, b)
1590 except (AttributeError, OverflowError):
1590 except (AttributeError, OverflowError):
1591 ancs = ancestor.ancestors(self.parentrevs, a, b)
1591 ancs = ancestor.ancestors(self.parentrevs, a, b)
1592 if ancs:
1592 if ancs:
1593 # choose a consistent winner when there's a tie
1593 # choose a consistent winner when there's a tie
1594 return min(map(self.node, ancs))
1594 return min(map(self.node, ancs))
1595 return self.nullid
1595 return self.nullid
1596
1596
1597 def _match(self, id):
1597 def _match(self, id):
1598 if isinstance(id, int):
1598 if isinstance(id, int):
1599 # rev
1599 # rev
1600 return self.node(id)
1600 return self.node(id)
1601 if len(id) == self.nodeconstants.nodelen:
1601 if len(id) == self.nodeconstants.nodelen:
1602 # possibly a binary node
1602 # possibly a binary node
1603 # odds of a binary node being all hex in ASCII are 1 in 10**25
1603 # odds of a binary node being all hex in ASCII are 1 in 10**25
1604 try:
1604 try:
1605 node = id
1605 node = id
1606 self.rev(node) # quick search the index
1606 self.rev(node) # quick search the index
1607 return node
1607 return node
1608 except error.LookupError:
1608 except error.LookupError:
1609 pass # may be partial hex id
1609 pass # may be partial hex id
1610 try:
1610 try:
1611 # str(rev)
1611 # str(rev)
1612 rev = int(id)
1612 rev = int(id)
1613 if b"%d" % rev != id:
1613 if b"%d" % rev != id:
1614 raise ValueError
1614 raise ValueError
1615 if rev < 0:
1615 if rev < 0:
1616 rev = len(self) + rev
1616 rev = len(self) + rev
1617 if rev < 0 or rev >= len(self):
1617 if rev < 0 or rev >= len(self):
1618 raise ValueError
1618 raise ValueError
1619 return self.node(rev)
1619 return self.node(rev)
1620 except (ValueError, OverflowError):
1620 except (ValueError, OverflowError):
1621 pass
1621 pass
1622 if len(id) == 2 * self.nodeconstants.nodelen:
1622 if len(id) == 2 * self.nodeconstants.nodelen:
1623 try:
1623 try:
1624 # a full hex nodeid?
1624 # a full hex nodeid?
1625 node = bin(id)
1625 node = bin(id)
1626 self.rev(node)
1626 self.rev(node)
1627 return node
1627 return node
1628 except (binascii.Error, error.LookupError):
1628 except (binascii.Error, error.LookupError):
1629 pass
1629 pass
1630
1630
1631 def _partialmatch(self, id):
1631 def _partialmatch(self, id):
1632 # we don't care wdirfilenodeids as they should be always full hash
1632 # we don't care wdirfilenodeids as they should be always full hash
1633 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1633 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1634 ambiguous = False
1634 ambiguous = False
1635 try:
1635 try:
1636 partial = self.index.partialmatch(id)
1636 partial = self.index.partialmatch(id)
1637 if partial and self.hasnode(partial):
1637 if partial and self.hasnode(partial):
1638 if maybewdir:
1638 if maybewdir:
1639 # single 'ff...' match in radix tree, ambiguous with wdir
1639 # single 'ff...' match in radix tree, ambiguous with wdir
1640 ambiguous = True
1640 ambiguous = True
1641 else:
1641 else:
1642 return partial
1642 return partial
1643 elif maybewdir:
1643 elif maybewdir:
1644 # no 'ff...' match in radix tree, wdir identified
1644 # no 'ff...' match in radix tree, wdir identified
1645 raise error.WdirUnsupported
1645 raise error.WdirUnsupported
1646 else:
1646 else:
1647 return None
1647 return None
1648 except error.RevlogError:
1648 except error.RevlogError:
1649 # parsers.c radix tree lookup gave multiple matches
1649 # parsers.c radix tree lookup gave multiple matches
1650 # fast path: for unfiltered changelog, radix tree is accurate
1650 # fast path: for unfiltered changelog, radix tree is accurate
1651 if not getattr(self, 'filteredrevs', None):
1651 if not getattr(self, 'filteredrevs', None):
1652 ambiguous = True
1652 ambiguous = True
1653 # fall through to slow path that filters hidden revisions
1653 # fall through to slow path that filters hidden revisions
1654 except (AttributeError, ValueError):
1654 except (AttributeError, ValueError):
1655 # we are pure python, or key is not hex
1655 # we are pure python, or key is not hex
1656 pass
1656 pass
1657 if ambiguous:
1657 if ambiguous:
1658 raise error.AmbiguousPrefixLookupError(
1658 raise error.AmbiguousPrefixLookupError(
1659 id, self.display_id, _(b'ambiguous identifier')
1659 id, self.display_id, _(b'ambiguous identifier')
1660 )
1660 )
1661
1661
1662 if id in self._pcache:
1662 if id in self._pcache:
1663 return self._pcache[id]
1663 return self._pcache[id]
1664
1664
1665 if len(id) <= 40:
1665 if len(id) <= 40:
1666 # hex(node)[:...]
1666 # hex(node)[:...]
1667 l = len(id) // 2 * 2 # grab an even number of digits
1667 l = len(id) // 2 * 2 # grab an even number of digits
1668 try:
1668 try:
1669 # we're dropping the last digit, so let's check that it's hex,
1669 # we're dropping the last digit, so let's check that it's hex,
1670 # to avoid the expensive computation below if it's not
1670 # to avoid the expensive computation below if it's not
1671 if len(id) % 2 > 0:
1671 if len(id) % 2 > 0:
1672 if not (id[-1] in hexdigits):
1672 if not (id[-1] in hexdigits):
1673 return None
1673 return None
1674 prefix = bin(id[:l])
1674 prefix = bin(id[:l])
1675 except binascii.Error:
1675 except binascii.Error:
1676 pass
1676 pass
1677 else:
1677 else:
1678 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1678 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1679 nl = [
1679 nl = [
1680 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1680 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1681 ]
1681 ]
1682 if self.nodeconstants.nullhex.startswith(id):
1682 if self.nodeconstants.nullhex.startswith(id):
1683 nl.append(self.nullid)
1683 nl.append(self.nullid)
1684 if len(nl) > 0:
1684 if len(nl) > 0:
1685 if len(nl) == 1 and not maybewdir:
1685 if len(nl) == 1 and not maybewdir:
1686 self._pcache[id] = nl[0]
1686 self._pcache[id] = nl[0]
1687 return nl[0]
1687 return nl[0]
1688 raise error.AmbiguousPrefixLookupError(
1688 raise error.AmbiguousPrefixLookupError(
1689 id, self.display_id, _(b'ambiguous identifier')
1689 id, self.display_id, _(b'ambiguous identifier')
1690 )
1690 )
1691 if maybewdir:
1691 if maybewdir:
1692 raise error.WdirUnsupported
1692 raise error.WdirUnsupported
1693 return None
1693 return None
1694
1694
1695 def lookup(self, id):
1695 def lookup(self, id):
1696 """locate a node based on:
1696 """locate a node based on:
1697 - revision number or str(revision number)
1697 - revision number or str(revision number)
1698 - nodeid or subset of hex nodeid
1698 - nodeid or subset of hex nodeid
1699 """
1699 """
1700 n = self._match(id)
1700 n = self._match(id)
1701 if n is not None:
1701 if n is not None:
1702 return n
1702 return n
1703 n = self._partialmatch(id)
1703 n = self._partialmatch(id)
1704 if n:
1704 if n:
1705 return n
1705 return n
1706
1706
1707 raise error.LookupError(id, self.display_id, _(b'no match found'))
1707 raise error.LookupError(id, self.display_id, _(b'no match found'))
1708
1708
1709 def shortest(self, node, minlength=1):
1709 def shortest(self, node, minlength=1):
1710 """Find the shortest unambiguous prefix that matches node."""
1710 """Find the shortest unambiguous prefix that matches node."""
1711
1711
1712 def isvalid(prefix):
1712 def isvalid(prefix):
1713 try:
1713 try:
1714 matchednode = self._partialmatch(prefix)
1714 matchednode = self._partialmatch(prefix)
1715 except error.AmbiguousPrefixLookupError:
1715 except error.AmbiguousPrefixLookupError:
1716 return False
1716 return False
1717 except error.WdirUnsupported:
1717 except error.WdirUnsupported:
1718 # single 'ff...' match
1718 # single 'ff...' match
1719 return True
1719 return True
1720 if matchednode is None:
1720 if matchednode is None:
1721 raise error.LookupError(node, self.display_id, _(b'no node'))
1721 raise error.LookupError(node, self.display_id, _(b'no node'))
1722 return True
1722 return True
1723
1723
1724 def maybewdir(prefix):
1724 def maybewdir(prefix):
1725 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1725 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1726
1726
1727 hexnode = hex(node)
1727 hexnode = hex(node)
1728
1728
1729 def disambiguate(hexnode, minlength):
1729 def disambiguate(hexnode, minlength):
1730 """Disambiguate against wdirid."""
1730 """Disambiguate against wdirid."""
1731 for length in range(minlength, len(hexnode) + 1):
1731 for length in range(minlength, len(hexnode) + 1):
1732 prefix = hexnode[:length]
1732 prefix = hexnode[:length]
1733 if not maybewdir(prefix):
1733 if not maybewdir(prefix):
1734 return prefix
1734 return prefix
1735
1735
1736 if not getattr(self, 'filteredrevs', None):
1736 if not getattr(self, 'filteredrevs', None):
1737 try:
1737 try:
1738 length = max(self.index.shortest(node), minlength)
1738 length = max(self.index.shortest(node), minlength)
1739 return disambiguate(hexnode, length)
1739 return disambiguate(hexnode, length)
1740 except error.RevlogError:
1740 except error.RevlogError:
1741 if node != self.nodeconstants.wdirid:
1741 if node != self.nodeconstants.wdirid:
1742 raise error.LookupError(
1742 raise error.LookupError(
1743 node, self.display_id, _(b'no node')
1743 node, self.display_id, _(b'no node')
1744 )
1744 )
1745 except AttributeError:
1745 except AttributeError:
1746 # Fall through to pure code
1746 # Fall through to pure code
1747 pass
1747 pass
1748
1748
1749 if node == self.nodeconstants.wdirid:
1749 if node == self.nodeconstants.wdirid:
1750 for length in range(minlength, len(hexnode) + 1):
1750 for length in range(minlength, len(hexnode) + 1):
1751 prefix = hexnode[:length]
1751 prefix = hexnode[:length]
1752 if isvalid(prefix):
1752 if isvalid(prefix):
1753 return prefix
1753 return prefix
1754
1754
1755 for length in range(minlength, len(hexnode) + 1):
1755 for length in range(minlength, len(hexnode) + 1):
1756 prefix = hexnode[:length]
1756 prefix = hexnode[:length]
1757 if isvalid(prefix):
1757 if isvalid(prefix):
1758 return disambiguate(hexnode, length)
1758 return disambiguate(hexnode, length)
1759
1759
1760 def cmp(self, node, text):
1760 def cmp(self, node, text):
1761 """compare text with a given file revision
1761 """compare text with a given file revision
1762
1762
1763 returns True if text is different than what is stored.
1763 returns True if text is different than what is stored.
1764 """
1764 """
1765 p1, p2 = self.parents(node)
1765 p1, p2 = self.parents(node)
1766 return storageutil.hashrevisionsha1(text, p1, p2) != node
1766 return storageutil.hashrevisionsha1(text, p1, p2) != node
1767
1767
1768 def _getsegmentforrevs(self, startrev, endrev, df=None):
1768 def _getsegmentforrevs(self, startrev, endrev, df=None):
1769 """Obtain a segment of raw data corresponding to a range of revisions.
1769 """Obtain a segment of raw data corresponding to a range of revisions.
1770
1770
1771 Accepts the start and end revisions and an optional already-open
1771 Accepts the start and end revisions and an optional already-open
1772 file handle to be used for reading. If the file handle is read, its
1772 file handle to be used for reading. If the file handle is read, its
1773 seek position will not be preserved.
1773 seek position will not be preserved.
1774
1774
1775 Requests for data may be satisfied by a cache.
1775 Requests for data may be satisfied by a cache.
1776
1776
1777 Returns a 2-tuple of (offset, data) for the requested range of
1777 Returns a 2-tuple of (offset, data) for the requested range of
1778 revisions. Offset is the integer offset from the beginning of the
1778 revisions. Offset is the integer offset from the beginning of the
1779 revlog and data is a str or buffer of the raw byte data.
1779 revlog and data is a str or buffer of the raw byte data.
1780
1780
1781 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1781 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1782 to determine where each revision's data begins and ends.
1782 to determine where each revision's data begins and ends.
1783 """
1783 """
1784 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1784 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1785 # (functions are expensive).
1785 # (functions are expensive).
1786 index = self.index
1786 index = self.index
1787 istart = index[startrev]
1787 istart = index[startrev]
1788 start = int(istart[0] >> 16)
1788 start = int(istart[0] >> 16)
1789 if startrev == endrev:
1789 if startrev == endrev:
1790 end = start + istart[1]
1790 end = start + istart[1]
1791 else:
1791 else:
1792 iend = index[endrev]
1792 iend = index[endrev]
1793 end = int(iend[0] >> 16) + iend[1]
1793 end = int(iend[0] >> 16) + iend[1]
1794
1794
1795 if self._inline:
1795 if self._inline:
1796 start += (startrev + 1) * self.index.entry_size
1796 start += (startrev + 1) * self.index.entry_size
1797 end += (endrev + 1) * self.index.entry_size
1797 end += (endrev + 1) * self.index.entry_size
1798 length = end - start
1798 length = end - start
1799
1799
1800 return start, self._segmentfile.read_chunk(start, length, df)
1800 return start, self._segmentfile.read_chunk(start, length, df)
1801
1801
1802 def _chunk(self, rev, df=None):
1802 def _chunk(self, rev, df=None):
1803 """Obtain a single decompressed chunk for a revision.
1803 """Obtain a single decompressed chunk for a revision.
1804
1804
1805 Accepts an integer revision and an optional already-open file handle
1805 Accepts an integer revision and an optional already-open file handle
1806 to be used for reading. If used, the seek position of the file will not
1806 to be used for reading. If used, the seek position of the file will not
1807 be preserved.
1807 be preserved.
1808
1808
1809 Returns a str holding uncompressed data for the requested revision.
1809 Returns a str holding uncompressed data for the requested revision.
1810 """
1810 """
1811 compression_mode = self.index[rev][10]
1811 compression_mode = self.index[rev][10]
1812 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1812 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1813 if compression_mode == COMP_MODE_PLAIN:
1813 if compression_mode == COMP_MODE_PLAIN:
1814 return data
1814 return data
1815 elif compression_mode == COMP_MODE_DEFAULT:
1815 elif compression_mode == COMP_MODE_DEFAULT:
1816 return self._decompressor(data)
1816 return self._decompressor(data)
1817 elif compression_mode == COMP_MODE_INLINE:
1817 elif compression_mode == COMP_MODE_INLINE:
1818 return self.decompress(data)
1818 return self.decompress(data)
1819 else:
1819 else:
1820 msg = b'unknown compression mode %d'
1820 msg = b'unknown compression mode %d'
1821 msg %= compression_mode
1821 msg %= compression_mode
1822 raise error.RevlogError(msg)
1822 raise error.RevlogError(msg)
1823
1823
1824 def _chunks(self, revs, df=None, targetsize=None):
1824 def _chunks(self, revs, df=None, targetsize=None):
1825 """Obtain decompressed chunks for the specified revisions.
1825 """Obtain decompressed chunks for the specified revisions.
1826
1826
1827 Accepts an iterable of numeric revisions that are assumed to be in
1827 Accepts an iterable of numeric revisions that are assumed to be in
1828 ascending order. Also accepts an optional already-open file handle
1828 ascending order. Also accepts an optional already-open file handle
1829 to be used for reading. If used, the seek position of the file will
1829 to be used for reading. If used, the seek position of the file will
1830 not be preserved.
1830 not be preserved.
1831
1831
1832 This function is similar to calling ``self._chunk()`` multiple times,
1832 This function is similar to calling ``self._chunk()`` multiple times,
1833 but is faster.
1833 but is faster.
1834
1834
1835 Returns a list with decompressed data for each requested revision.
1835 Returns a list with decompressed data for each requested revision.
1836 """
1836 """
1837 if not revs:
1837 if not revs:
1838 return []
1838 return []
1839 start = self.start
1839 start = self.start
1840 length = self.length
1840 length = self.length
1841 inline = self._inline
1841 inline = self._inline
1842 iosize = self.index.entry_size
1842 iosize = self.index.entry_size
1843 buffer = util.buffer
1843 buffer = util.buffer
1844
1844
1845 l = []
1845 l = []
1846 ladd = l.append
1846 ladd = l.append
1847
1847
1848 if not self._withsparseread:
1848 if not self._withsparseread:
1849 slicedchunks = (revs,)
1849 slicedchunks = (revs,)
1850 else:
1850 else:
1851 slicedchunks = deltautil.slicechunk(
1851 slicedchunks = deltautil.slicechunk(
1852 self, revs, targetsize=targetsize
1852 self, revs, targetsize=targetsize
1853 )
1853 )
1854
1854
1855 for revschunk in slicedchunks:
1855 for revschunk in slicedchunks:
1856 firstrev = revschunk[0]
1856 firstrev = revschunk[0]
1857 # Skip trailing revisions with empty diff
1857 # Skip trailing revisions with empty diff
1858 for lastrev in revschunk[::-1]:
1858 for lastrev in revschunk[::-1]:
1859 if length(lastrev) != 0:
1859 if length(lastrev) != 0:
1860 break
1860 break
1861
1861
1862 try:
1862 try:
1863 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1863 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1864 except OverflowError:
1864 except OverflowError:
1865 # issue4215 - we can't cache a run of chunks greater than
1865 # issue4215 - we can't cache a run of chunks greater than
1866 # 2G on Windows
1866 # 2G on Windows
1867 return [self._chunk(rev, df=df) for rev in revschunk]
1867 return [self._chunk(rev, df=df) for rev in revschunk]
1868
1868
1869 decomp = self.decompress
1869 decomp = self.decompress
1870 # self._decompressor might be None, but will not be used in that case
1870 # self._decompressor might be None, but will not be used in that case
1871 def_decomp = self._decompressor
1871 def_decomp = self._decompressor
1872 for rev in revschunk:
1872 for rev in revschunk:
1873 chunkstart = start(rev)
1873 chunkstart = start(rev)
1874 if inline:
1874 if inline:
1875 chunkstart += (rev + 1) * iosize
1875 chunkstart += (rev + 1) * iosize
1876 chunklength = length(rev)
1876 chunklength = length(rev)
1877 comp_mode = self.index[rev][10]
1877 comp_mode = self.index[rev][10]
1878 c = buffer(data, chunkstart - offset, chunklength)
1878 c = buffer(data, chunkstart - offset, chunklength)
1879 if comp_mode == COMP_MODE_PLAIN:
1879 if comp_mode == COMP_MODE_PLAIN:
1880 ladd(c)
1880 ladd(c)
1881 elif comp_mode == COMP_MODE_INLINE:
1881 elif comp_mode == COMP_MODE_INLINE:
1882 ladd(decomp(c))
1882 ladd(decomp(c))
1883 elif comp_mode == COMP_MODE_DEFAULT:
1883 elif comp_mode == COMP_MODE_DEFAULT:
1884 ladd(def_decomp(c))
1884 ladd(def_decomp(c))
1885 else:
1885 else:
1886 msg = b'unknown compression mode %d'
1886 msg = b'unknown compression mode %d'
1887 msg %= comp_mode
1887 msg %= comp_mode
1888 raise error.RevlogError(msg)
1888 raise error.RevlogError(msg)
1889
1889
1890 return l
1890 return l
1891
1891
1892 def deltaparent(self, rev):
1892 def deltaparent(self, rev):
1893 """return deltaparent of the given revision"""
1893 """return deltaparent of the given revision"""
1894 base = self.index[rev][3]
1894 base = self.index[rev][3]
1895 if base == rev:
1895 if base == rev:
1896 return nullrev
1896 return nullrev
1897 elif self._generaldelta:
1897 elif self._generaldelta:
1898 return base
1898 return base
1899 else:
1899 else:
1900 return rev - 1
1900 return rev - 1
1901
1901
1902 def issnapshot(self, rev):
1902 def issnapshot(self, rev):
1903 """tells whether rev is a snapshot"""
1903 """tells whether rev is a snapshot"""
1904 if not self._sparserevlog:
1904 if not self._sparserevlog:
1905 return self.deltaparent(rev) == nullrev
1905 return self.deltaparent(rev) == nullrev
1906 elif hasattr(self.index, 'issnapshot'):
1906 elif hasattr(self.index, 'issnapshot'):
1907 # directly assign the method to cache the testing and access
1907 # directly assign the method to cache the testing and access
1908 self.issnapshot = self.index.issnapshot
1908 self.issnapshot = self.index.issnapshot
1909 return self.issnapshot(rev)
1909 return self.issnapshot(rev)
1910 if rev == nullrev:
1910 if rev == nullrev:
1911 return True
1911 return True
1912 entry = self.index[rev]
1912 entry = self.index[rev]
1913 base = entry[3]
1913 base = entry[3]
1914 if base == rev:
1914 if base == rev:
1915 return True
1915 return True
1916 if base == nullrev:
1916 if base == nullrev:
1917 return True
1917 return True
1918 p1 = entry[5]
1918 p1 = entry[5]
1919 while self.length(p1) == 0:
1919 while self.length(p1) == 0:
1920 b = self.deltaparent(p1)
1920 b = self.deltaparent(p1)
1921 if b == p1:
1921 if b == p1:
1922 break
1922 break
1923 p1 = b
1923 p1 = b
1924 p2 = entry[6]
1924 p2 = entry[6]
1925 while self.length(p2) == 0:
1925 while self.length(p2) == 0:
1926 b = self.deltaparent(p2)
1926 b = self.deltaparent(p2)
1927 if b == p2:
1927 if b == p2:
1928 break
1928 break
1929 p2 = b
1929 p2 = b
1930 if base == p1 or base == p2:
1930 if base == p1 or base == p2:
1931 return False
1931 return False
1932 return self.issnapshot(base)
1932 return self.issnapshot(base)
1933
1933
1934 def snapshotdepth(self, rev):
1934 def snapshotdepth(self, rev):
1935 """number of snapshot in the chain before this one"""
1935 """number of snapshot in the chain before this one"""
1936 if not self.issnapshot(rev):
1936 if not self.issnapshot(rev):
1937 raise error.ProgrammingError(b'revision %d not a snapshot')
1937 raise error.ProgrammingError(b'revision %d not a snapshot')
1938 return len(self._deltachain(rev)[0]) - 1
1938 return len(self._deltachain(rev)[0]) - 1
1939
1939
1940 def revdiff(self, rev1, rev2):
1940 def revdiff(self, rev1, rev2):
1941 """return or calculate a delta between two revisions
1941 """return or calculate a delta between two revisions
1942
1942
1943 The delta calculated is in binary form and is intended to be written to
1943 The delta calculated is in binary form and is intended to be written to
1944 revlog data directly. So this function needs raw revision data.
1944 revlog data directly. So this function needs raw revision data.
1945 """
1945 """
1946 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1946 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1947 return bytes(self._chunk(rev2))
1947 return bytes(self._chunk(rev2))
1948
1948
1949 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1949 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1950
1950
1951 def revision(self, nodeorrev, _df=None):
1951 def revision(self, nodeorrev, _df=None):
1952 """return an uncompressed revision of a given node or revision
1952 """return an uncompressed revision of a given node or revision
1953 number.
1953 number.
1954
1954
1955 _df - an existing file handle to read from. (internal-only)
1955 _df - an existing file handle to read from. (internal-only)
1956 """
1956 """
1957 return self._revisiondata(nodeorrev, _df)
1957 return self._revisiondata(nodeorrev, _df)
1958
1958
1959 def sidedata(self, nodeorrev, _df=None):
1959 def sidedata(self, nodeorrev, _df=None):
1960 """a map of extra data related to the changeset but not part of the hash
1960 """a map of extra data related to the changeset but not part of the hash
1961
1961
1962 This function currently return a dictionary. However, more advanced
1962 This function currently return a dictionary. However, more advanced
1963 mapping object will likely be used in the future for a more
1963 mapping object will likely be used in the future for a more
1964 efficient/lazy code.
1964 efficient/lazy code.
1965 """
1965 """
1966 # deal with <nodeorrev> argument type
1966 # deal with <nodeorrev> argument type
1967 if isinstance(nodeorrev, int):
1967 if isinstance(nodeorrev, int):
1968 rev = nodeorrev
1968 rev = nodeorrev
1969 else:
1969 else:
1970 rev = self.rev(nodeorrev)
1970 rev = self.rev(nodeorrev)
1971 return self._sidedata(rev)
1971 return self._sidedata(rev)
1972
1972
1973 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1973 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1974 # deal with <nodeorrev> argument type
1974 # deal with <nodeorrev> argument type
1975 if isinstance(nodeorrev, int):
1975 if isinstance(nodeorrev, int):
1976 rev = nodeorrev
1976 rev = nodeorrev
1977 node = self.node(rev)
1977 node = self.node(rev)
1978 else:
1978 else:
1979 node = nodeorrev
1979 node = nodeorrev
1980 rev = None
1980 rev = None
1981
1981
1982 # fast path the special `nullid` rev
1982 # fast path the special `nullid` rev
1983 if node == self.nullid:
1983 if node == self.nullid:
1984 return b""
1984 return b""
1985
1985
1986 # ``rawtext`` is the text as stored inside the revlog. Might be the
1986 # ``rawtext`` is the text as stored inside the revlog. Might be the
1987 # revision or might need to be processed to retrieve the revision.
1987 # revision or might need to be processed to retrieve the revision.
1988 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1988 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1989
1989
1990 if raw and validated:
1990 if raw and validated:
1991 # if we don't want to process the raw text and that raw
1991 # if we don't want to process the raw text and that raw
1992 # text is cached, we can exit early.
1992 # text is cached, we can exit early.
1993 return rawtext
1993 return rawtext
1994 if rev is None:
1994 if rev is None:
1995 rev = self.rev(node)
1995 rev = self.rev(node)
1996 # the revlog's flag for this revision
1996 # the revlog's flag for this revision
1997 # (usually alter its state or content)
1997 # (usually alter its state or content)
1998 flags = self.flags(rev)
1998 flags = self.flags(rev)
1999
1999
2000 if validated and flags == REVIDX_DEFAULT_FLAGS:
2000 if validated and flags == REVIDX_DEFAULT_FLAGS:
2001 # no extra flags set, no flag processor runs, text = rawtext
2001 # no extra flags set, no flag processor runs, text = rawtext
2002 return rawtext
2002 return rawtext
2003
2003
2004 if raw:
2004 if raw:
2005 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2005 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2006 text = rawtext
2006 text = rawtext
2007 else:
2007 else:
2008 r = flagutil.processflagsread(self, rawtext, flags)
2008 r = flagutil.processflagsread(self, rawtext, flags)
2009 text, validatehash = r
2009 text, validatehash = r
2010 if validatehash:
2010 if validatehash:
2011 self.checkhash(text, node, rev=rev)
2011 self.checkhash(text, node, rev=rev)
2012 if not validated:
2012 if not validated:
2013 self._revisioncache = (node, rev, rawtext)
2013 self._revisioncache = (node, rev, rawtext)
2014
2014
2015 return text
2015 return text
2016
2016
2017 def _rawtext(self, node, rev, _df=None):
2017 def _rawtext(self, node, rev, _df=None):
2018 """return the possibly unvalidated rawtext for a revision
2018 """return the possibly unvalidated rawtext for a revision
2019
2019
2020 returns (rev, rawtext, validated)
2020 returns (rev, rawtext, validated)
2021 """
2021 """
2022
2022
2023 # revision in the cache (could be useful to apply delta)
2023 # revision in the cache (could be useful to apply delta)
2024 cachedrev = None
2024 cachedrev = None
2025 # An intermediate text to apply deltas to
2025 # An intermediate text to apply deltas to
2026 basetext = None
2026 basetext = None
2027
2027
2028 # Check if we have the entry in cache
2028 # Check if we have the entry in cache
2029 # The cache entry looks like (node, rev, rawtext)
2029 # The cache entry looks like (node, rev, rawtext)
2030 if self._revisioncache:
2030 if self._revisioncache:
2031 if self._revisioncache[0] == node:
2031 if self._revisioncache[0] == node:
2032 return (rev, self._revisioncache[2], True)
2032 return (rev, self._revisioncache[2], True)
2033 cachedrev = self._revisioncache[1]
2033 cachedrev = self._revisioncache[1]
2034
2034
2035 if rev is None:
2035 if rev is None:
2036 rev = self.rev(node)
2036 rev = self.rev(node)
2037
2037
2038 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2038 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2039 if stopped:
2039 if stopped:
2040 basetext = self._revisioncache[2]
2040 basetext = self._revisioncache[2]
2041
2041
2042 # drop cache to save memory, the caller is expected to
2042 # drop cache to save memory, the caller is expected to
2043 # update self._revisioncache after validating the text
2043 # update self._revisioncache after validating the text
2044 self._revisioncache = None
2044 self._revisioncache = None
2045
2045
2046 targetsize = None
2046 targetsize = None
2047 rawsize = self.index[rev][2]
2047 rawsize = self.index[rev][2]
2048 if 0 <= rawsize:
2048 if 0 <= rawsize:
2049 targetsize = 4 * rawsize
2049 targetsize = 4 * rawsize
2050
2050
2051 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2051 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2052 if basetext is None:
2052 if basetext is None:
2053 basetext = bytes(bins[0])
2053 basetext = bytes(bins[0])
2054 bins = bins[1:]
2054 bins = bins[1:]
2055
2055
2056 rawtext = mdiff.patches(basetext, bins)
2056 rawtext = mdiff.patches(basetext, bins)
2057 del basetext # let us have a chance to free memory early
2057 del basetext # let us have a chance to free memory early
2058 return (rev, rawtext, False)
2058 return (rev, rawtext, False)
2059
2059
2060 def _sidedata(self, rev):
2060 def _sidedata(self, rev):
2061 """Return the sidedata for a given revision number."""
2061 """Return the sidedata for a given revision number."""
2062 index_entry = self.index[rev]
2062 index_entry = self.index[rev]
2063 sidedata_offset = index_entry[8]
2063 sidedata_offset = index_entry[8]
2064 sidedata_size = index_entry[9]
2064 sidedata_size = index_entry[9]
2065
2065
2066 if self._inline:
2066 if self._inline:
2067 sidedata_offset += self.index.entry_size * (1 + rev)
2067 sidedata_offset += self.index.entry_size * (1 + rev)
2068 if sidedata_size == 0:
2068 if sidedata_size == 0:
2069 return {}
2069 return {}
2070
2070
2071 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2071 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2072 filename = self._sidedatafile
2072 filename = self._sidedatafile
2073 end = self._docket.sidedata_end
2073 end = self._docket.sidedata_end
2074 offset = sidedata_offset
2074 offset = sidedata_offset
2075 length = sidedata_size
2075 length = sidedata_size
2076 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2076 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2077 raise error.RevlogError(m)
2077 raise error.RevlogError(m)
2078
2078
2079 comp_segment = self._segmentfile_sidedata.read_chunk(
2079 comp_segment = self._segmentfile_sidedata.read_chunk(
2080 sidedata_offset, sidedata_size
2080 sidedata_offset, sidedata_size
2081 )
2081 )
2082
2082
2083 comp = self.index[rev][11]
2083 comp = self.index[rev][11]
2084 if comp == COMP_MODE_PLAIN:
2084 if comp == COMP_MODE_PLAIN:
2085 segment = comp_segment
2085 segment = comp_segment
2086 elif comp == COMP_MODE_DEFAULT:
2086 elif comp == COMP_MODE_DEFAULT:
2087 segment = self._decompressor(comp_segment)
2087 segment = self._decompressor(comp_segment)
2088 elif comp == COMP_MODE_INLINE:
2088 elif comp == COMP_MODE_INLINE:
2089 segment = self.decompress(comp_segment)
2089 segment = self.decompress(comp_segment)
2090 else:
2090 else:
2091 msg = b'unknown compression mode %d'
2091 msg = b'unknown compression mode %d'
2092 msg %= comp
2092 msg %= comp
2093 raise error.RevlogError(msg)
2093 raise error.RevlogError(msg)
2094
2094
2095 sidedata = sidedatautil.deserialize_sidedata(segment)
2095 sidedata = sidedatautil.deserialize_sidedata(segment)
2096 return sidedata
2096 return sidedata
2097
2097
2098 def rawdata(self, nodeorrev, _df=None):
2098 def rawdata(self, nodeorrev, _df=None):
2099 """return an uncompressed raw data of a given node or revision number.
2099 """return an uncompressed raw data of a given node or revision number.
2100
2100
2101 _df - an existing file handle to read from. (internal-only)
2101 _df - an existing file handle to read from. (internal-only)
2102 """
2102 """
2103 return self._revisiondata(nodeorrev, _df, raw=True)
2103 return self._revisiondata(nodeorrev, _df, raw=True)
2104
2104
2105 def hash(self, text, p1, p2):
2105 def hash(self, text, p1, p2):
2106 """Compute a node hash.
2106 """Compute a node hash.
2107
2107
2108 Available as a function so that subclasses can replace the hash
2108 Available as a function so that subclasses can replace the hash
2109 as needed.
2109 as needed.
2110 """
2110 """
2111 return storageutil.hashrevisionsha1(text, p1, p2)
2111 return storageutil.hashrevisionsha1(text, p1, p2)
2112
2112
2113 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2113 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2114 """Check node hash integrity.
2114 """Check node hash integrity.
2115
2115
2116 Available as a function so that subclasses can extend hash mismatch
2116 Available as a function so that subclasses can extend hash mismatch
2117 behaviors as needed.
2117 behaviors as needed.
2118 """
2118 """
2119 try:
2119 try:
2120 if p1 is None and p2 is None:
2120 if p1 is None and p2 is None:
2121 p1, p2 = self.parents(node)
2121 p1, p2 = self.parents(node)
2122 if node != self.hash(text, p1, p2):
2122 if node != self.hash(text, p1, p2):
2123 # Clear the revision cache on hash failure. The revision cache
2123 # Clear the revision cache on hash failure. The revision cache
2124 # only stores the raw revision and clearing the cache does have
2124 # only stores the raw revision and clearing the cache does have
2125 # the side-effect that we won't have a cache hit when the raw
2125 # the side-effect that we won't have a cache hit when the raw
2126 # revision data is accessed. But this case should be rare and
2126 # revision data is accessed. But this case should be rare and
2127 # it is extra work to teach the cache about the hash
2127 # it is extra work to teach the cache about the hash
2128 # verification state.
2128 # verification state.
2129 if self._revisioncache and self._revisioncache[0] == node:
2129 if self._revisioncache and self._revisioncache[0] == node:
2130 self._revisioncache = None
2130 self._revisioncache = None
2131
2131
2132 revornode = rev
2132 revornode = rev
2133 if revornode is None:
2133 if revornode is None:
2134 revornode = templatefilters.short(hex(node))
2134 revornode = templatefilters.short(hex(node))
2135 raise error.RevlogError(
2135 raise error.RevlogError(
2136 _(b"integrity check failed on %s:%s")
2136 _(b"integrity check failed on %s:%s")
2137 % (self.display_id, pycompat.bytestr(revornode))
2137 % (self.display_id, pycompat.bytestr(revornode))
2138 )
2138 )
2139 except error.RevlogError:
2139 except error.RevlogError:
2140 if self._censorable and storageutil.iscensoredtext(text):
2140 if self._censorable and storageutil.iscensoredtext(text):
2141 raise error.CensoredNodeError(self.display_id, node, text)
2141 raise error.CensoredNodeError(self.display_id, node, text)
2142 raise
2142 raise
2143
2143
2144 @property
2144 @property
2145 def _split_index_file(self):
2145 def _split_index_file(self):
2146 """the path where to expect the index of an ongoing splitting operation
2146 """the path where to expect the index of an ongoing splitting operation
2147
2147
2148 The file will only exist if a splitting operation is in progress, but
2148 The file will only exist if a splitting operation is in progress, but
2149 it is always expected at the same location."""
2149 it is always expected at the same location."""
2150 parts = self.radix.split(b'/')
2150 parts = self.radix.split(b'/')
2151 if len(parts) > 1:
2151 if len(parts) > 1:
2152 # adds a '-s' prefix to the ``data/` or `meta/` base
2152 # adds a '-s' prefix to the ``data/` or `meta/` base
2153 head = parts[0] + b'-s'
2153 head = parts[0] + b'-s'
2154 mids = parts[1:-1]
2154 mids = parts[1:-1]
2155 tail = parts[-1] + b'.i'
2155 tail = parts[-1] + b'.i'
2156 pieces = [head] + mids + [tail]
2156 pieces = [head] + mids + [tail]
2157 return b'/'.join(pieces)
2157 return b'/'.join(pieces)
2158 else:
2158 else:
2159 # the revlog is stored at the root of the store (changelog or
2159 # the revlog is stored at the root of the store (changelog or
2160 # manifest), no risk of collision.
2160 # manifest), no risk of collision.
2161 return self.radix + b'.i.s'
2161 return self.radix + b'.i.s'
2162
2162
2163 def _enforceinlinesize(self, tr, side_write=True):
2163 def _enforceinlinesize(self, tr, side_write=True):
2164 """Check if the revlog is too big for inline and convert if so.
2164 """Check if the revlog is too big for inline and convert if so.
2165
2165
2166 This should be called after revisions are added to the revlog. If the
2166 This should be called after revisions are added to the revlog. If the
2167 revlog has grown too large to be an inline revlog, it will convert it
2167 revlog has grown too large to be an inline revlog, it will convert it
2168 to use multiple index and data files.
2168 to use multiple index and data files.
2169 """
2169 """
2170 tiprev = len(self) - 1
2170 tiprev = len(self) - 1
2171 total_size = self.start(tiprev) + self.length(tiprev)
2171 total_size = self.start(tiprev) + self.length(tiprev)
2172 if not self._inline or total_size < _maxinline:
2172 if not self._inline or total_size < _maxinline:
2173 return
2173 return
2174
2174
2175 troffset = tr.findoffset(self._indexfile)
2175 troffset = tr.findoffset(self._indexfile)
2176 if troffset is None:
2176 if troffset is None:
2177 raise error.RevlogError(
2177 raise error.RevlogError(
2178 _(b"%s not found in the transaction") % self._indexfile
2178 _(b"%s not found in the transaction") % self._indexfile
2179 )
2179 )
2180 if troffset:
2180 if troffset:
2181 tr.addbackup(self._indexfile, for_offset=True)
2181 tr.addbackup(self._indexfile, for_offset=True)
2182 tr.add(self._datafile, 0)
2182 tr.add(self._datafile, 0)
2183
2183
2184 existing_handles = False
2184 existing_handles = False
2185 if self._writinghandles is not None:
2185 if self._writinghandles is not None:
2186 existing_handles = True
2186 existing_handles = True
2187 fp = self._writinghandles[0]
2187 fp = self._writinghandles[0]
2188 fp.flush()
2188 fp.flush()
2189 fp.close()
2189 fp.close()
2190 # We can't use the cached file handle after close(). So prevent
2190 # We can't use the cached file handle after close(). So prevent
2191 # its usage.
2191 # its usage.
2192 self._writinghandles = None
2192 self._writinghandles = None
2193 self._segmentfile.writing_handle = None
2193 self._segmentfile.writing_handle = None
2194 # No need to deal with sidedata writing handle as it is only
2194 # No need to deal with sidedata writing handle as it is only
2195 # relevant with revlog-v2 which is never inline, not reaching
2195 # relevant with revlog-v2 which is never inline, not reaching
2196 # this code
2196 # this code
2197 if side_write:
2197 if side_write:
2198 old_index_file_path = self._indexfile
2198 old_index_file_path = self._indexfile
2199 new_index_file_path = self._split_index_file
2199 new_index_file_path = self._split_index_file
2200 opener = self.opener
2200 opener = self.opener
2201 weak_self = weakref.ref(self)
2201 weak_self = weakref.ref(self)
2202
2202
2203 # the "split" index replace the real index when the transaction is finalized
2203 # the "split" index replace the real index when the transaction is finalized
2204 def finalize_callback(tr):
2204 def finalize_callback(tr):
2205 opener.rename(
2205 opener.rename(
2206 new_index_file_path,
2206 new_index_file_path,
2207 old_index_file_path,
2207 old_index_file_path,
2208 checkambig=True,
2208 checkambig=True,
2209 )
2209 )
2210 maybe_self = weak_self()
2210 maybe_self = weak_self()
2211 if maybe_self is not None:
2211 if maybe_self is not None:
2212 maybe_self._indexfile = old_index_file_path
2212 maybe_self._indexfile = old_index_file_path
2213
2213
2214 def abort_callback(tr):
2214 def abort_callback(tr):
2215 maybe_self = weak_self()
2215 maybe_self = weak_self()
2216 if maybe_self is not None:
2216 if maybe_self is not None:
2217 maybe_self._indexfile = old_index_file_path
2217 maybe_self._indexfile = old_index_file_path
2218
2218
2219 tr.registertmp(new_index_file_path)
2219 tr.registertmp(new_index_file_path)
2220 if self.target[1] is not None:
2220 if self.target[1] is not None:
2221 callback_id = b'000-revlog-split-%d-%s' % self.target
2221 callback_id = b'000-revlog-split-%d-%s' % self.target
2222 else:
2222 else:
2223 callback_id = b'000-revlog-split-%d' % self.target[0]
2223 callback_id = b'000-revlog-split-%d' % self.target[0]
2224 tr.addfinalize(callback_id, finalize_callback)
2224 tr.addfinalize(callback_id, finalize_callback)
2225 tr.addabort(callback_id, abort_callback)
2225 tr.addabort(callback_id, abort_callback)
2226
2226
2227 new_dfh = self._datafp(b'w+')
2227 new_dfh = self._datafp(b'w+')
2228 new_dfh.truncate(0) # drop any potentially existing data
2228 new_dfh.truncate(0) # drop any potentially existing data
2229 try:
2229 try:
2230 with self._indexfp() as read_ifh:
2230 with self._indexfp() as read_ifh:
2231 for r in self:
2231 for r in self:
2232 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2232 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2233 new_dfh.flush()
2233 new_dfh.flush()
2234
2234
2235 if side_write:
2235 if side_write:
2236 self._indexfile = new_index_file_path
2236 self._indexfile = new_index_file_path
2237 with self.__index_new_fp() as fp:
2237 with self.__index_new_fp() as fp:
2238 self._format_flags &= ~FLAG_INLINE_DATA
2238 self._format_flags &= ~FLAG_INLINE_DATA
2239 self._inline = False
2239 self._inline = False
2240 for i in self:
2240 for i in self:
2241 e = self.index.entry_binary(i)
2241 e = self.index.entry_binary(i)
2242 if i == 0 and self._docket is None:
2242 if i == 0 and self._docket is None:
2243 header = self._format_flags | self._format_version
2243 header = self._format_flags | self._format_version
2244 header = self.index.pack_header(header)
2244 header = self.index.pack_header(header)
2245 e = header + e
2245 e = header + e
2246 fp.write(e)
2246 fp.write(e)
2247 if self._docket is not None:
2247 if self._docket is not None:
2248 self._docket.index_end = fp.tell()
2248 self._docket.index_end = fp.tell()
2249
2249
2250 # If we don't use side-write, the temp file replace the real
2250 # If we don't use side-write, the temp file replace the real
2251 # index when we exit the context manager
2251 # index when we exit the context manager
2252
2252
2253 nodemaputil.setup_persistent_nodemap(tr, self)
2253 nodemaputil.setup_persistent_nodemap(tr, self)
2254 self._segmentfile = randomaccessfile.randomaccessfile(
2254 self._segmentfile = randomaccessfile.randomaccessfile(
2255 self.opener,
2255 self.opener,
2256 self._datafile,
2256 self._datafile,
2257 self._chunkcachesize,
2257 self._chunkcachesize,
2258 )
2258 )
2259
2259
2260 if existing_handles:
2260 if existing_handles:
2261 # switched from inline to conventional reopen the index
2261 # switched from inline to conventional reopen the index
2262 ifh = self.__index_write_fp()
2262 ifh = self.__index_write_fp()
2263 self._writinghandles = (ifh, new_dfh, None)
2263 self._writinghandles = (ifh, new_dfh, None)
2264 self._segmentfile.writing_handle = new_dfh
2264 self._segmentfile.writing_handle = new_dfh
2265 new_dfh = None
2265 new_dfh = None
2266 # No need to deal with sidedata writing handle as it is only
2266 # No need to deal with sidedata writing handle as it is only
2267 # relevant with revlog-v2 which is never inline, not reaching
2267 # relevant with revlog-v2 which is never inline, not reaching
2268 # this code
2268 # this code
2269 finally:
2269 finally:
2270 if new_dfh is not None:
2270 if new_dfh is not None:
2271 new_dfh.close()
2271 new_dfh.close()
2272
2272
2273 def _nodeduplicatecallback(self, transaction, node):
2273 def _nodeduplicatecallback(self, transaction, node):
2274 """called when trying to add a node already stored."""
2274 """called when trying to add a node already stored."""
2275
2275
2276 @contextlib.contextmanager
2276 @contextlib.contextmanager
2277 def reading(self):
2277 def reading(self):
2278 """Context manager that keeps data and sidedata files open for reading"""
2278 """Context manager that keeps data and sidedata files open for reading"""
2279 with self._segmentfile.reading():
2279 with self._segmentfile.reading():
2280 with self._segmentfile_sidedata.reading():
2280 with self._segmentfile_sidedata.reading():
2281 yield
2281 yield
2282
2282
2283 @contextlib.contextmanager
2283 @contextlib.contextmanager
2284 def _writing(self, transaction):
2284 def _writing(self, transaction):
2285 if self._trypending:
2285 if self._trypending:
2286 msg = b'try to write in a `trypending` revlog: %s'
2286 msg = b'try to write in a `trypending` revlog: %s'
2287 msg %= self.display_id
2287 msg %= self.display_id
2288 raise error.ProgrammingError(msg)
2288 raise error.ProgrammingError(msg)
2289 if self._writinghandles is not None:
2289 if self._writinghandles is not None:
2290 yield
2290 yield
2291 else:
2291 else:
2292 ifh = dfh = sdfh = None
2292 ifh = dfh = sdfh = None
2293 try:
2293 try:
2294 r = len(self)
2294 r = len(self)
2295 # opening the data file.
2295 # opening the data file.
2296 dsize = 0
2296 dsize = 0
2297 if r:
2297 if r:
2298 dsize = self.end(r - 1)
2298 dsize = self.end(r - 1)
2299 dfh = None
2299 dfh = None
2300 if not self._inline:
2300 if not self._inline:
2301 try:
2301 try:
2302 dfh = self._datafp(b"r+")
2302 dfh = self._datafp(b"r+")
2303 if self._docket is None:
2303 if self._docket is None:
2304 dfh.seek(0, os.SEEK_END)
2304 dfh.seek(0, os.SEEK_END)
2305 else:
2305 else:
2306 dfh.seek(self._docket.data_end, os.SEEK_SET)
2306 dfh.seek(self._docket.data_end, os.SEEK_SET)
2307 except FileNotFoundError:
2307 except FileNotFoundError:
2308 dfh = self._datafp(b"w+")
2308 dfh = self._datafp(b"w+")
2309 transaction.add(self._datafile, dsize)
2309 transaction.add(self._datafile, dsize)
2310 if self._sidedatafile is not None:
2310 if self._sidedatafile is not None:
2311 # revlog-v2 does not inline, help Pytype
2311 # revlog-v2 does not inline, help Pytype
2312 assert dfh is not None
2312 assert dfh is not None
2313 try:
2313 try:
2314 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2314 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2315 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2315 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2316 except FileNotFoundError:
2316 except FileNotFoundError:
2317 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2317 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2318 transaction.add(
2318 transaction.add(
2319 self._sidedatafile, self._docket.sidedata_end
2319 self._sidedatafile, self._docket.sidedata_end
2320 )
2320 )
2321
2321
2322 # opening the index file.
2322 # opening the index file.
2323 isize = r * self.index.entry_size
2323 isize = r * self.index.entry_size
2324 ifh = self.__index_write_fp()
2324 ifh = self.__index_write_fp()
2325 if self._inline:
2325 if self._inline:
2326 transaction.add(self._indexfile, dsize + isize)
2326 transaction.add(self._indexfile, dsize + isize)
2327 else:
2327 else:
2328 transaction.add(self._indexfile, isize)
2328 transaction.add(self._indexfile, isize)
2329 # exposing all file handle for writing.
2329 # exposing all file handle for writing.
2330 self._writinghandles = (ifh, dfh, sdfh)
2330 self._writinghandles = (ifh, dfh, sdfh)
2331 self._segmentfile.writing_handle = ifh if self._inline else dfh
2331 self._segmentfile.writing_handle = ifh if self._inline else dfh
2332 self._segmentfile_sidedata.writing_handle = sdfh
2332 self._segmentfile_sidedata.writing_handle = sdfh
2333 yield
2333 yield
2334 if self._docket is not None:
2334 if self._docket is not None:
2335 self._write_docket(transaction)
2335 self._write_docket(transaction)
2336 finally:
2336 finally:
2337 self._writinghandles = None
2337 self._writinghandles = None
2338 self._segmentfile.writing_handle = None
2338 self._segmentfile.writing_handle = None
2339 self._segmentfile_sidedata.writing_handle = None
2339 self._segmentfile_sidedata.writing_handle = None
2340 if dfh is not None:
2340 if dfh is not None:
2341 dfh.close()
2341 dfh.close()
2342 if sdfh is not None:
2342 if sdfh is not None:
2343 sdfh.close()
2343 sdfh.close()
2344 # closing the index file last to avoid exposing referent to
2344 # closing the index file last to avoid exposing referent to
2345 # potential unflushed data content.
2345 # potential unflushed data content.
2346 if ifh is not None:
2346 if ifh is not None:
2347 ifh.close()
2347 ifh.close()
2348
2348
2349 def _write_docket(self, transaction):
2349 def _write_docket(self, transaction):
2350 """write the current docket on disk
2350 """write the current docket on disk
2351
2351
2352 Exist as a method to help changelog to implement transaction logic
2352 Exist as a method to help changelog to implement transaction logic
2353
2353
2354 We could also imagine using the same transaction logic for all revlog
2354 We could also imagine using the same transaction logic for all revlog
2355 since docket are cheap."""
2355 since docket are cheap."""
2356 self._docket.write(transaction)
2356 self._docket.write(transaction)
2357
2357
2358 def addrevision(
2358 def addrevision(
2359 self,
2359 self,
2360 text,
2360 text,
2361 transaction,
2361 transaction,
2362 link,
2362 link,
2363 p1,
2363 p1,
2364 p2,
2364 p2,
2365 cachedelta=None,
2365 cachedelta=None,
2366 node=None,
2366 node=None,
2367 flags=REVIDX_DEFAULT_FLAGS,
2367 flags=REVIDX_DEFAULT_FLAGS,
2368 deltacomputer=None,
2368 deltacomputer=None,
2369 sidedata=None,
2369 sidedata=None,
2370 ):
2370 ):
2371 """add a revision to the log
2371 """add a revision to the log
2372
2372
2373 text - the revision data to add
2373 text - the revision data to add
2374 transaction - the transaction object used for rollback
2374 transaction - the transaction object used for rollback
2375 link - the linkrev data to add
2375 link - the linkrev data to add
2376 p1, p2 - the parent nodeids of the revision
2376 p1, p2 - the parent nodeids of the revision
2377 cachedelta - an optional precomputed delta
2377 cachedelta - an optional precomputed delta
2378 node - nodeid of revision; typically node is not specified, and it is
2378 node - nodeid of revision; typically node is not specified, and it is
2379 computed by default as hash(text, p1, p2), however subclasses might
2379 computed by default as hash(text, p1, p2), however subclasses might
2380 use different hashing method (and override checkhash() in such case)
2380 use different hashing method (and override checkhash() in such case)
2381 flags - the known flags to set on the revision
2381 flags - the known flags to set on the revision
2382 deltacomputer - an optional deltacomputer instance shared between
2382 deltacomputer - an optional deltacomputer instance shared between
2383 multiple calls
2383 multiple calls
2384 """
2384 """
2385 if link == nullrev:
2385 if link == nullrev:
2386 raise error.RevlogError(
2386 raise error.RevlogError(
2387 _(b"attempted to add linkrev -1 to %s") % self.display_id
2387 _(b"attempted to add linkrev -1 to %s") % self.display_id
2388 )
2388 )
2389
2389
2390 if sidedata is None:
2390 if sidedata is None:
2391 sidedata = {}
2391 sidedata = {}
2392 elif sidedata and not self.hassidedata:
2392 elif sidedata and not self.hassidedata:
2393 raise error.ProgrammingError(
2393 raise error.ProgrammingError(
2394 _(b"trying to add sidedata to a revlog who don't support them")
2394 _(b"trying to add sidedata to a revlog who don't support them")
2395 )
2395 )
2396
2396
2397 if flags:
2397 if flags:
2398 node = node or self.hash(text, p1, p2)
2398 node = node or self.hash(text, p1, p2)
2399
2399
2400 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2400 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2401
2401
2402 # If the flag processor modifies the revision data, ignore any provided
2402 # If the flag processor modifies the revision data, ignore any provided
2403 # cachedelta.
2403 # cachedelta.
2404 if rawtext != text:
2404 if rawtext != text:
2405 cachedelta = None
2405 cachedelta = None
2406
2406
2407 if len(rawtext) > _maxentrysize:
2407 if len(rawtext) > _maxentrysize:
2408 raise error.RevlogError(
2408 raise error.RevlogError(
2409 _(
2409 _(
2410 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2410 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2411 )
2411 )
2412 % (self.display_id, len(rawtext))
2412 % (self.display_id, len(rawtext))
2413 )
2413 )
2414
2414
2415 node = node or self.hash(rawtext, p1, p2)
2415 node = node or self.hash(rawtext, p1, p2)
2416 rev = self.index.get_rev(node)
2416 rev = self.index.get_rev(node)
2417 if rev is not None:
2417 if rev is not None:
2418 return rev
2418 return rev
2419
2419
2420 if validatehash:
2420 if validatehash:
2421 self.checkhash(rawtext, node, p1=p1, p2=p2)
2421 self.checkhash(rawtext, node, p1=p1, p2=p2)
2422
2422
2423 return self.addrawrevision(
2423 return self.addrawrevision(
2424 rawtext,
2424 rawtext,
2425 transaction,
2425 transaction,
2426 link,
2426 link,
2427 p1,
2427 p1,
2428 p2,
2428 p2,
2429 node,
2429 node,
2430 flags,
2430 flags,
2431 cachedelta=cachedelta,
2431 cachedelta=cachedelta,
2432 deltacomputer=deltacomputer,
2432 deltacomputer=deltacomputer,
2433 sidedata=sidedata,
2433 sidedata=sidedata,
2434 )
2434 )
2435
2435
2436 def addrawrevision(
2436 def addrawrevision(
2437 self,
2437 self,
2438 rawtext,
2438 rawtext,
2439 transaction,
2439 transaction,
2440 link,
2440 link,
2441 p1,
2441 p1,
2442 p2,
2442 p2,
2443 node,
2443 node,
2444 flags,
2444 flags,
2445 cachedelta=None,
2445 cachedelta=None,
2446 deltacomputer=None,
2446 deltacomputer=None,
2447 sidedata=None,
2447 sidedata=None,
2448 ):
2448 ):
2449 """add a raw revision with known flags, node and parents
2449 """add a raw revision with known flags, node and parents
2450 useful when reusing a revision not stored in this revlog (ex: received
2450 useful when reusing a revision not stored in this revlog (ex: received
2451 over wire, or read from an external bundle).
2451 over wire, or read from an external bundle).
2452 """
2452 """
2453 with self._writing(transaction):
2453 with self._writing(transaction):
2454 return self._addrevision(
2454 return self._addrevision(
2455 node,
2455 node,
2456 rawtext,
2456 rawtext,
2457 transaction,
2457 transaction,
2458 link,
2458 link,
2459 p1,
2459 p1,
2460 p2,
2460 p2,
2461 flags,
2461 flags,
2462 cachedelta,
2462 cachedelta,
2463 deltacomputer=deltacomputer,
2463 deltacomputer=deltacomputer,
2464 sidedata=sidedata,
2464 sidedata=sidedata,
2465 )
2465 )
2466
2466
2467 def compress(self, data):
2467 def compress(self, data):
2468 """Generate a possibly-compressed representation of data."""
2468 """Generate a possibly-compressed representation of data."""
2469 if not data:
2469 if not data:
2470 return b'', data
2470 return b'', data
2471
2471
2472 compressed = self._compressor.compress(data)
2472 compressed = self._compressor.compress(data)
2473
2473
2474 if compressed:
2474 if compressed:
2475 # The revlog compressor added the header in the returned data.
2475 # The revlog compressor added the header in the returned data.
2476 return b'', compressed
2476 return b'', compressed
2477
2477
2478 if data[0:1] == b'\0':
2478 if data[0:1] == b'\0':
2479 return b'', data
2479 return b'', data
2480 return b'u', data
2480 return b'u', data
2481
2481
2482 def decompress(self, data):
2482 def decompress(self, data):
2483 """Decompress a revlog chunk.
2483 """Decompress a revlog chunk.
2484
2484
2485 The chunk is expected to begin with a header identifying the
2485 The chunk is expected to begin with a header identifying the
2486 format type so it can be routed to an appropriate decompressor.
2486 format type so it can be routed to an appropriate decompressor.
2487 """
2487 """
2488 if not data:
2488 if not data:
2489 return data
2489 return data
2490
2490
2491 # Revlogs are read much more frequently than they are written and many
2491 # Revlogs are read much more frequently than they are written and many
2492 # chunks only take microseconds to decompress, so performance is
2492 # chunks only take microseconds to decompress, so performance is
2493 # important here.
2493 # important here.
2494 #
2494 #
2495 # We can make a few assumptions about revlogs:
2495 # We can make a few assumptions about revlogs:
2496 #
2496 #
2497 # 1) the majority of chunks will be compressed (as opposed to inline
2497 # 1) the majority of chunks will be compressed (as opposed to inline
2498 # raw data).
2498 # raw data).
2499 # 2) decompressing *any* data will likely by at least 10x slower than
2499 # 2) decompressing *any* data will likely by at least 10x slower than
2500 # returning raw inline data.
2500 # returning raw inline data.
2501 # 3) we want to prioritize common and officially supported compression
2501 # 3) we want to prioritize common and officially supported compression
2502 # engines
2502 # engines
2503 #
2503 #
2504 # It follows that we want to optimize for "decompress compressed data
2504 # It follows that we want to optimize for "decompress compressed data
2505 # when encoded with common and officially supported compression engines"
2505 # when encoded with common and officially supported compression engines"
2506 # case over "raw data" and "data encoded by less common or non-official
2506 # case over "raw data" and "data encoded by less common or non-official
2507 # compression engines." That is why we have the inline lookup first
2507 # compression engines." That is why we have the inline lookup first
2508 # followed by the compengines lookup.
2508 # followed by the compengines lookup.
2509 #
2509 #
2510 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2510 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2511 # compressed chunks. And this matters for changelog and manifest reads.
2511 # compressed chunks. And this matters for changelog and manifest reads.
2512 t = data[0:1]
2512 t = data[0:1]
2513
2513
2514 if t == b'x':
2514 if t == b'x':
2515 try:
2515 try:
2516 return _zlibdecompress(data)
2516 return _zlibdecompress(data)
2517 except zlib.error as e:
2517 except zlib.error as e:
2518 raise error.RevlogError(
2518 raise error.RevlogError(
2519 _(b'revlog decompress error: %s')
2519 _(b'revlog decompress error: %s')
2520 % stringutil.forcebytestr(e)
2520 % stringutil.forcebytestr(e)
2521 )
2521 )
2522 # '\0' is more common than 'u' so it goes first.
2522 # '\0' is more common than 'u' so it goes first.
2523 elif t == b'\0':
2523 elif t == b'\0':
2524 return data
2524 return data
2525 elif t == b'u':
2525 elif t == b'u':
2526 return util.buffer(data, 1)
2526 return util.buffer(data, 1)
2527
2527
2528 compressor = self._get_decompressor(t)
2528 compressor = self._get_decompressor(t)
2529
2529
2530 return compressor.decompress(data)
2530 return compressor.decompress(data)
2531
2531
2532 def _addrevision(
2532 def _addrevision(
2533 self,
2533 self,
2534 node,
2534 node,
2535 rawtext,
2535 rawtext,
2536 transaction,
2536 transaction,
2537 link,
2537 link,
2538 p1,
2538 p1,
2539 p2,
2539 p2,
2540 flags,
2540 flags,
2541 cachedelta,
2541 cachedelta,
2542 alwayscache=False,
2542 alwayscache=False,
2543 deltacomputer=None,
2543 deltacomputer=None,
2544 sidedata=None,
2544 sidedata=None,
2545 ):
2545 ):
2546 """internal function to add revisions to the log
2546 """internal function to add revisions to the log
2547
2547
2548 see addrevision for argument descriptions.
2548 see addrevision for argument descriptions.
2549
2549
2550 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2550 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2551
2551
2552 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2552 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2553 be used.
2553 be used.
2554
2554
2555 invariants:
2555 invariants:
2556 - rawtext is optional (can be None); if not set, cachedelta must be set.
2556 - rawtext is optional (can be None); if not set, cachedelta must be set.
2557 if both are set, they must correspond to each other.
2557 if both are set, they must correspond to each other.
2558 """
2558 """
2559 if node == self.nullid:
2559 if node == self.nullid:
2560 raise error.RevlogError(
2560 raise error.RevlogError(
2561 _(b"%s: attempt to add null revision") % self.display_id
2561 _(b"%s: attempt to add null revision") % self.display_id
2562 )
2562 )
2563 if (
2563 if (
2564 node == self.nodeconstants.wdirid
2564 node == self.nodeconstants.wdirid
2565 or node in self.nodeconstants.wdirfilenodeids
2565 or node in self.nodeconstants.wdirfilenodeids
2566 ):
2566 ):
2567 raise error.RevlogError(
2567 raise error.RevlogError(
2568 _(b"%s: attempt to add wdir revision") % self.display_id
2568 _(b"%s: attempt to add wdir revision") % self.display_id
2569 )
2569 )
2570 if self._writinghandles is None:
2570 if self._writinghandles is None:
2571 msg = b'adding revision outside `revlog._writing` context'
2571 msg = b'adding revision outside `revlog._writing` context'
2572 raise error.ProgrammingError(msg)
2572 raise error.ProgrammingError(msg)
2573
2573
2574 if self._inline:
2574 if self._inline:
2575 fh = self._writinghandles[0]
2575 fh = self._writinghandles[0]
2576 else:
2576 else:
2577 fh = self._writinghandles[1]
2577 fh = self._writinghandles[1]
2578
2578
2579 btext = [rawtext]
2579 btext = [rawtext]
2580
2580
2581 curr = len(self)
2581 curr = len(self)
2582 prev = curr - 1
2582 prev = curr - 1
2583
2583
2584 offset = self._get_data_offset(prev)
2584 offset = self._get_data_offset(prev)
2585
2585
2586 if self._concurrencychecker:
2586 if self._concurrencychecker:
2587 ifh, dfh, sdfh = self._writinghandles
2587 ifh, dfh, sdfh = self._writinghandles
2588 # XXX no checking for the sidedata file
2588 # XXX no checking for the sidedata file
2589 if self._inline:
2589 if self._inline:
2590 # offset is "as if" it were in the .d file, so we need to add on
2590 # offset is "as if" it were in the .d file, so we need to add on
2591 # the size of the entry metadata.
2591 # the size of the entry metadata.
2592 self._concurrencychecker(
2592 self._concurrencychecker(
2593 ifh, self._indexfile, offset + curr * self.index.entry_size
2593 ifh, self._indexfile, offset + curr * self.index.entry_size
2594 )
2594 )
2595 else:
2595 else:
2596 # Entries in the .i are a consistent size.
2596 # Entries in the .i are a consistent size.
2597 self._concurrencychecker(
2597 self._concurrencychecker(
2598 ifh, self._indexfile, curr * self.index.entry_size
2598 ifh, self._indexfile, curr * self.index.entry_size
2599 )
2599 )
2600 self._concurrencychecker(dfh, self._datafile, offset)
2600 self._concurrencychecker(dfh, self._datafile, offset)
2601
2601
2602 p1r, p2r = self.rev(p1), self.rev(p2)
2602 p1r, p2r = self.rev(p1), self.rev(p2)
2603
2603
2604 # full versions are inserted when the needed deltas
2604 # full versions are inserted when the needed deltas
2605 # become comparable to the uncompressed text
2605 # become comparable to the uncompressed text
2606 if rawtext is None:
2606 if rawtext is None:
2607 # need rawtext size, before changed by flag processors, which is
2607 # need rawtext size, before changed by flag processors, which is
2608 # the non-raw size. use revlog explicitly to avoid filelog's extra
2608 # the non-raw size. use revlog explicitly to avoid filelog's extra
2609 # logic that might remove metadata size.
2609 # logic that might remove metadata size.
2610 textlen = mdiff.patchedsize(
2610 textlen = mdiff.patchedsize(
2611 revlog.size(self, cachedelta[0]), cachedelta[1]
2611 revlog.size(self, cachedelta[0]), cachedelta[1]
2612 )
2612 )
2613 else:
2613 else:
2614 textlen = len(rawtext)
2614 textlen = len(rawtext)
2615
2615
2616 if deltacomputer is None:
2616 if deltacomputer is None:
2617 write_debug = None
2617 write_debug = None
2618 if self._debug_delta:
2618 if self._debug_delta:
2619 write_debug = transaction._report
2619 write_debug = transaction._report
2620 deltacomputer = deltautil.deltacomputer(
2620 deltacomputer = deltautil.deltacomputer(
2621 self, write_debug=write_debug
2621 self, write_debug=write_debug
2622 )
2622 )
2623
2623
2624 if cachedelta is not None and len(cachedelta) == 2:
2624 if cachedelta is not None and len(cachedelta) == 2:
2625 # If the cached delta has no information about how it should be
2625 # If the cached delta has no information about how it should be
2626 # reused, add the default reuse instruction according to the
2626 # reused, add the default reuse instruction according to the
2627 # revlog's configuration.
2627 # revlog's configuration.
2628 if self._generaldelta and self._lazydeltabase:
2628 if self._generaldelta and self._lazydeltabase:
2629 delta_base_reuse = DELTA_BASE_REUSE_TRY
2629 delta_base_reuse = DELTA_BASE_REUSE_TRY
2630 else:
2630 else:
2631 delta_base_reuse = DELTA_BASE_REUSE_NO
2631 delta_base_reuse = DELTA_BASE_REUSE_NO
2632 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2632 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2633
2633
2634 revinfo = revlogutils.revisioninfo(
2634 revinfo = revlogutils.revisioninfo(
2635 node,
2635 node,
2636 p1,
2636 p1,
2637 p2,
2637 p2,
2638 btext,
2638 btext,
2639 textlen,
2639 textlen,
2640 cachedelta,
2640 cachedelta,
2641 flags,
2641 flags,
2642 )
2642 )
2643
2643
2644 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2644 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2645
2645
2646 compression_mode = COMP_MODE_INLINE
2646 compression_mode = COMP_MODE_INLINE
2647 if self._docket is not None:
2647 if self._docket is not None:
2648 default_comp = self._docket.default_compression_header
2648 default_comp = self._docket.default_compression_header
2649 r = deltautil.delta_compression(default_comp, deltainfo)
2649 r = deltautil.delta_compression(default_comp, deltainfo)
2650 compression_mode, deltainfo = r
2650 compression_mode, deltainfo = r
2651
2651
2652 sidedata_compression_mode = COMP_MODE_INLINE
2652 sidedata_compression_mode = COMP_MODE_INLINE
2653 if sidedata and self.hassidedata:
2653 if sidedata and self.hassidedata:
2654 sidedata_compression_mode = COMP_MODE_PLAIN
2654 sidedata_compression_mode = COMP_MODE_PLAIN
2655 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2655 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2656 sidedata_offset = self._docket.sidedata_end
2656 sidedata_offset = self._docket.sidedata_end
2657 h, comp_sidedata = self.compress(serialized_sidedata)
2657 h, comp_sidedata = self.compress(serialized_sidedata)
2658 if (
2658 if (
2659 h != b'u'
2659 h != b'u'
2660 and comp_sidedata[0:1] != b'\0'
2660 and comp_sidedata[0:1] != b'\0'
2661 and len(comp_sidedata) < len(serialized_sidedata)
2661 and len(comp_sidedata) < len(serialized_sidedata)
2662 ):
2662 ):
2663 assert not h
2663 assert not h
2664 if (
2664 if (
2665 comp_sidedata[0:1]
2665 comp_sidedata[0:1]
2666 == self._docket.default_compression_header
2666 == self._docket.default_compression_header
2667 ):
2667 ):
2668 sidedata_compression_mode = COMP_MODE_DEFAULT
2668 sidedata_compression_mode = COMP_MODE_DEFAULT
2669 serialized_sidedata = comp_sidedata
2669 serialized_sidedata = comp_sidedata
2670 else:
2670 else:
2671 sidedata_compression_mode = COMP_MODE_INLINE
2671 sidedata_compression_mode = COMP_MODE_INLINE
2672 serialized_sidedata = comp_sidedata
2672 serialized_sidedata = comp_sidedata
2673 else:
2673 else:
2674 serialized_sidedata = b""
2674 serialized_sidedata = b""
2675 # Don't store the offset if the sidedata is empty, that way
2675 # Don't store the offset if the sidedata is empty, that way
2676 # we can easily detect empty sidedata and they will be no different
2676 # we can easily detect empty sidedata and they will be no different
2677 # than ones we manually add.
2677 # than ones we manually add.
2678 sidedata_offset = 0
2678 sidedata_offset = 0
2679
2679
2680 rank = RANK_UNKNOWN
2680 rank = RANK_UNKNOWN
2681 if self._compute_rank:
2681 if self._compute_rank:
2682 if (p1r, p2r) == (nullrev, nullrev):
2682 if (p1r, p2r) == (nullrev, nullrev):
2683 rank = 1
2683 rank = 1
2684 elif p1r != nullrev and p2r == nullrev:
2684 elif p1r != nullrev and p2r == nullrev:
2685 rank = 1 + self.fast_rank(p1r)
2685 rank = 1 + self.fast_rank(p1r)
2686 elif p1r == nullrev and p2r != nullrev:
2686 elif p1r == nullrev and p2r != nullrev:
2687 rank = 1 + self.fast_rank(p2r)
2687 rank = 1 + self.fast_rank(p2r)
2688 else: # merge node
2688 else: # merge node
2689 if rustdagop is not None and self.index.rust_ext_compat:
2689 if rustdagop is not None and self.index.rust_ext_compat:
2690 rank = rustdagop.rank(self.index, p1r, p2r)
2690 rank = rustdagop.rank(self.index, p1r, p2r)
2691 else:
2691 else:
2692 pmin, pmax = sorted((p1r, p2r))
2692 pmin, pmax = sorted((p1r, p2r))
2693 rank = 1 + self.fast_rank(pmax)
2693 rank = 1 + self.fast_rank(pmax)
2694 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2694 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2695
2695
2696 e = revlogutils.entry(
2696 e = revlogutils.entry(
2697 flags=flags,
2697 flags=flags,
2698 data_offset=offset,
2698 data_offset=offset,
2699 data_compressed_length=deltainfo.deltalen,
2699 data_compressed_length=deltainfo.deltalen,
2700 data_uncompressed_length=textlen,
2700 data_uncompressed_length=textlen,
2701 data_compression_mode=compression_mode,
2701 data_compression_mode=compression_mode,
2702 data_delta_base=deltainfo.base,
2702 data_delta_base=deltainfo.base,
2703 link_rev=link,
2703 link_rev=link,
2704 parent_rev_1=p1r,
2704 parent_rev_1=p1r,
2705 parent_rev_2=p2r,
2705 parent_rev_2=p2r,
2706 node_id=node,
2706 node_id=node,
2707 sidedata_offset=sidedata_offset,
2707 sidedata_offset=sidedata_offset,
2708 sidedata_compressed_length=len(serialized_sidedata),
2708 sidedata_compressed_length=len(serialized_sidedata),
2709 sidedata_compression_mode=sidedata_compression_mode,
2709 sidedata_compression_mode=sidedata_compression_mode,
2710 rank=rank,
2710 rank=rank,
2711 )
2711 )
2712
2712
2713 self.index.append(e)
2713 self.index.append(e)
2714 entry = self.index.entry_binary(curr)
2714 entry = self.index.entry_binary(curr)
2715 if curr == 0 and self._docket is None:
2715 if curr == 0 and self._docket is None:
2716 header = self._format_flags | self._format_version
2716 header = self._format_flags | self._format_version
2717 header = self.index.pack_header(header)
2717 header = self.index.pack_header(header)
2718 entry = header + entry
2718 entry = header + entry
2719 self._writeentry(
2719 self._writeentry(
2720 transaction,
2720 transaction,
2721 entry,
2721 entry,
2722 deltainfo.data,
2722 deltainfo.data,
2723 link,
2723 link,
2724 offset,
2724 offset,
2725 serialized_sidedata,
2725 serialized_sidedata,
2726 sidedata_offset,
2726 sidedata_offset,
2727 )
2727 )
2728
2728
2729 rawtext = btext[0]
2729 rawtext = btext[0]
2730
2730
2731 if alwayscache and rawtext is None:
2731 if alwayscache and rawtext is None:
2732 rawtext = deltacomputer.buildtext(revinfo, fh)
2732 rawtext = deltacomputer.buildtext(revinfo, fh)
2733
2733
2734 if type(rawtext) == bytes: # only accept immutable objects
2734 if type(rawtext) == bytes: # only accept immutable objects
2735 self._revisioncache = (node, curr, rawtext)
2735 self._revisioncache = (node, curr, rawtext)
2736 self._chainbasecache[curr] = deltainfo.chainbase
2736 self._chainbasecache[curr] = deltainfo.chainbase
2737 return curr
2737 return curr
2738
2738
2739 def _get_data_offset(self, prev):
2739 def _get_data_offset(self, prev):
2740 """Returns the current offset in the (in-transaction) data file.
2740 """Returns the current offset in the (in-transaction) data file.
2741 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2741 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2742 file to store that information: since sidedata can be rewritten to the
2742 file to store that information: since sidedata can be rewritten to the
2743 end of the data file within a transaction, you can have cases where, for
2743 end of the data file within a transaction, you can have cases where, for
2744 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2744 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2745 to `n - 1`'s sidedata being written after `n`'s data.
2745 to `n - 1`'s sidedata being written after `n`'s data.
2746
2746
2747 TODO cache this in a docket file before getting out of experimental."""
2747 TODO cache this in a docket file before getting out of experimental."""
2748 if self._docket is None:
2748 if self._docket is None:
2749 return self.end(prev)
2749 return self.end(prev)
2750 else:
2750 else:
2751 return self._docket.data_end
2751 return self._docket.data_end
2752
2752
2753 def _writeentry(
2753 def _writeentry(
2754 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2754 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2755 ):
2755 ):
2756 # Files opened in a+ mode have inconsistent behavior on various
2756 # Files opened in a+ mode have inconsistent behavior on various
2757 # platforms. Windows requires that a file positioning call be made
2757 # platforms. Windows requires that a file positioning call be made
2758 # when the file handle transitions between reads and writes. See
2758 # when the file handle transitions between reads and writes. See
2759 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2759 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2760 # platforms, Python or the platform itself can be buggy. Some versions
2760 # platforms, Python or the platform itself can be buggy. Some versions
2761 # of Solaris have been observed to not append at the end of the file
2761 # of Solaris have been observed to not append at the end of the file
2762 # if the file was seeked to before the end. See issue4943 for more.
2762 # if the file was seeked to before the end. See issue4943 for more.
2763 #
2763 #
2764 # We work around this issue by inserting a seek() before writing.
2764 # We work around this issue by inserting a seek() before writing.
2765 # Note: This is likely not necessary on Python 3. However, because
2765 # Note: This is likely not necessary on Python 3. However, because
2766 # the file handle is reused for reads and may be seeked there, we need
2766 # the file handle is reused for reads and may be seeked there, we need
2767 # to be careful before changing this.
2767 # to be careful before changing this.
2768 if self._writinghandles is None:
2768 if self._writinghandles is None:
2769 msg = b'adding revision outside `revlog._writing` context'
2769 msg = b'adding revision outside `revlog._writing` context'
2770 raise error.ProgrammingError(msg)
2770 raise error.ProgrammingError(msg)
2771 ifh, dfh, sdfh = self._writinghandles
2771 ifh, dfh, sdfh = self._writinghandles
2772 if self._docket is None:
2772 if self._docket is None:
2773 ifh.seek(0, os.SEEK_END)
2773 ifh.seek(0, os.SEEK_END)
2774 else:
2774 else:
2775 ifh.seek(self._docket.index_end, os.SEEK_SET)
2775 ifh.seek(self._docket.index_end, os.SEEK_SET)
2776 if dfh:
2776 if dfh:
2777 if self._docket is None:
2777 if self._docket is None:
2778 dfh.seek(0, os.SEEK_END)
2778 dfh.seek(0, os.SEEK_END)
2779 else:
2779 else:
2780 dfh.seek(self._docket.data_end, os.SEEK_SET)
2780 dfh.seek(self._docket.data_end, os.SEEK_SET)
2781 if sdfh:
2781 if sdfh:
2782 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2782 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2783
2783
2784 curr = len(self) - 1
2784 curr = len(self) - 1
2785 if not self._inline:
2785 if not self._inline:
2786 transaction.add(self._datafile, offset)
2786 transaction.add(self._datafile, offset)
2787 if self._sidedatafile:
2787 if self._sidedatafile:
2788 transaction.add(self._sidedatafile, sidedata_offset)
2788 transaction.add(self._sidedatafile, sidedata_offset)
2789 transaction.add(self._indexfile, curr * len(entry))
2789 transaction.add(self._indexfile, curr * len(entry))
2790 if data[0]:
2790 if data[0]:
2791 dfh.write(data[0])
2791 dfh.write(data[0])
2792 dfh.write(data[1])
2792 dfh.write(data[1])
2793 if sidedata:
2793 if sidedata:
2794 sdfh.write(sidedata)
2794 sdfh.write(sidedata)
2795 ifh.write(entry)
2795 ifh.write(entry)
2796 else:
2796 else:
2797 offset += curr * self.index.entry_size
2797 offset += curr * self.index.entry_size
2798 transaction.add(self._indexfile, offset)
2798 transaction.add(self._indexfile, offset)
2799 ifh.write(entry)
2799 ifh.write(entry)
2800 ifh.write(data[0])
2800 ifh.write(data[0])
2801 ifh.write(data[1])
2801 ifh.write(data[1])
2802 assert not sidedata
2802 assert not sidedata
2803 self._enforceinlinesize(transaction)
2803 self._enforceinlinesize(transaction)
2804 if self._docket is not None:
2804 if self._docket is not None:
2805 # revlog-v2 always has 3 writing handles, help Pytype
2805 # revlog-v2 always has 3 writing handles, help Pytype
2806 wh1 = self._writinghandles[0]
2806 wh1 = self._writinghandles[0]
2807 wh2 = self._writinghandles[1]
2807 wh2 = self._writinghandles[1]
2808 wh3 = self._writinghandles[2]
2808 wh3 = self._writinghandles[2]
2809 assert wh1 is not None
2809 assert wh1 is not None
2810 assert wh2 is not None
2810 assert wh2 is not None
2811 assert wh3 is not None
2811 assert wh3 is not None
2812 self._docket.index_end = wh1.tell()
2812 self._docket.index_end = wh1.tell()
2813 self._docket.data_end = wh2.tell()
2813 self._docket.data_end = wh2.tell()
2814 self._docket.sidedata_end = wh3.tell()
2814 self._docket.sidedata_end = wh3.tell()
2815
2815
2816 nodemaputil.setup_persistent_nodemap(transaction, self)
2816 nodemaputil.setup_persistent_nodemap(transaction, self)
2817
2817
2818 def addgroup(
2818 def addgroup(
2819 self,
2819 self,
2820 deltas,
2820 deltas,
2821 linkmapper,
2821 linkmapper,
2822 transaction,
2822 transaction,
2823 alwayscache=False,
2823 alwayscache=False,
2824 addrevisioncb=None,
2824 addrevisioncb=None,
2825 duplicaterevisioncb=None,
2825 duplicaterevisioncb=None,
2826 debug_info=None,
2826 debug_info=None,
2827 delta_base_reuse_policy=None,
2827 delta_base_reuse_policy=None,
2828 ):
2828 ):
2829 """
2829 """
2830 add a delta group
2830 add a delta group
2831
2831
2832 given a set of deltas, add them to the revision log. the
2832 given a set of deltas, add them to the revision log. the
2833 first delta is against its parent, which should be in our
2833 first delta is against its parent, which should be in our
2834 log, the rest are against the previous delta.
2834 log, the rest are against the previous delta.
2835
2835
2836 If ``addrevisioncb`` is defined, it will be called with arguments of
2836 If ``addrevisioncb`` is defined, it will be called with arguments of
2837 this revlog and the node that was added.
2837 this revlog and the node that was added.
2838 """
2838 """
2839
2839
2840 if self._adding_group:
2840 if self._adding_group:
2841 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2841 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2842
2842
2843 # read the default delta-base reuse policy from revlog config if the
2843 # read the default delta-base reuse policy from revlog config if the
2844 # group did not specify one.
2844 # group did not specify one.
2845 if delta_base_reuse_policy is None:
2845 if delta_base_reuse_policy is None:
2846 if self._generaldelta and self._lazydeltabase:
2846 if self._generaldelta and self._lazydeltabase:
2847 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2847 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2848 else:
2848 else:
2849 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2849 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2850
2850
2851 self._adding_group = True
2851 self._adding_group = True
2852 empty = True
2852 empty = True
2853 try:
2853 try:
2854 with self._writing(transaction):
2854 with self._writing(transaction):
2855 write_debug = None
2855 write_debug = None
2856 if self._debug_delta:
2856 if self._debug_delta:
2857 write_debug = transaction._report
2857 write_debug = transaction._report
2858 deltacomputer = deltautil.deltacomputer(
2858 deltacomputer = deltautil.deltacomputer(
2859 self,
2859 self,
2860 write_debug=write_debug,
2860 write_debug=write_debug,
2861 debug_info=debug_info,
2861 debug_info=debug_info,
2862 )
2862 )
2863 # loop through our set of deltas
2863 # loop through our set of deltas
2864 for data in deltas:
2864 for data in deltas:
2865 (
2865 (
2866 node,
2866 node,
2867 p1,
2867 p1,
2868 p2,
2868 p2,
2869 linknode,
2869 linknode,
2870 deltabase,
2870 deltabase,
2871 delta,
2871 delta,
2872 flags,
2872 flags,
2873 sidedata,
2873 sidedata,
2874 ) = data
2874 ) = data
2875 link = linkmapper(linknode)
2875 link = linkmapper(linknode)
2876 flags = flags or REVIDX_DEFAULT_FLAGS
2876 flags = flags or REVIDX_DEFAULT_FLAGS
2877
2877
2878 rev = self.index.get_rev(node)
2878 rev = self.index.get_rev(node)
2879 if rev is not None:
2879 if rev is not None:
2880 # this can happen if two branches make the same change
2880 # this can happen if two branches make the same change
2881 self._nodeduplicatecallback(transaction, rev)
2881 self._nodeduplicatecallback(transaction, rev)
2882 if duplicaterevisioncb:
2882 if duplicaterevisioncb:
2883 duplicaterevisioncb(self, rev)
2883 duplicaterevisioncb(self, rev)
2884 empty = False
2884 empty = False
2885 continue
2885 continue
2886
2886
2887 for p in (p1, p2):
2887 for p in (p1, p2):
2888 if not self.index.has_node(p):
2888 if not self.index.has_node(p):
2889 raise error.LookupError(
2889 raise error.LookupError(
2890 p, self.radix, _(b'unknown parent')
2890 p, self.radix, _(b'unknown parent')
2891 )
2891 )
2892
2892
2893 if not self.index.has_node(deltabase):
2893 if not self.index.has_node(deltabase):
2894 raise error.LookupError(
2894 raise error.LookupError(
2895 deltabase, self.display_id, _(b'unknown delta base')
2895 deltabase, self.display_id, _(b'unknown delta base')
2896 )
2896 )
2897
2897
2898 baserev = self.rev(deltabase)
2898 baserev = self.rev(deltabase)
2899
2899
2900 if baserev != nullrev and self.iscensored(baserev):
2900 if baserev != nullrev and self.iscensored(baserev):
2901 # if base is censored, delta must be full replacement in a
2901 # if base is censored, delta must be full replacement in a
2902 # single patch operation
2902 # single patch operation
2903 hlen = struct.calcsize(b">lll")
2903 hlen = struct.calcsize(b">lll")
2904 oldlen = self.rawsize(baserev)
2904 oldlen = self.rawsize(baserev)
2905 newlen = len(delta) - hlen
2905 newlen = len(delta) - hlen
2906 if delta[:hlen] != mdiff.replacediffheader(
2906 if delta[:hlen] != mdiff.replacediffheader(
2907 oldlen, newlen
2907 oldlen, newlen
2908 ):
2908 ):
2909 raise error.CensoredBaseError(
2909 raise error.CensoredBaseError(
2910 self.display_id, self.node(baserev)
2910 self.display_id, self.node(baserev)
2911 )
2911 )
2912
2912
2913 if not flags and self._peek_iscensored(baserev, delta):
2913 if not flags and self._peek_iscensored(baserev, delta):
2914 flags |= REVIDX_ISCENSORED
2914 flags |= REVIDX_ISCENSORED
2915
2915
2916 # We assume consumers of addrevisioncb will want to retrieve
2916 # We assume consumers of addrevisioncb will want to retrieve
2917 # the added revision, which will require a call to
2917 # the added revision, which will require a call to
2918 # revision(). revision() will fast path if there is a cache
2918 # revision(). revision() will fast path if there is a cache
2919 # hit. So, we tell _addrevision() to always cache in this case.
2919 # hit. So, we tell _addrevision() to always cache in this case.
2920 # We're only using addgroup() in the context of changegroup
2920 # We're only using addgroup() in the context of changegroup
2921 # generation so the revision data can always be handled as raw
2921 # generation so the revision data can always be handled as raw
2922 # by the flagprocessor.
2922 # by the flagprocessor.
2923 rev = self._addrevision(
2923 rev = self._addrevision(
2924 node,
2924 node,
2925 None,
2925 None,
2926 transaction,
2926 transaction,
2927 link,
2927 link,
2928 p1,
2928 p1,
2929 p2,
2929 p2,
2930 flags,
2930 flags,
2931 (baserev, delta, delta_base_reuse_policy),
2931 (baserev, delta, delta_base_reuse_policy),
2932 alwayscache=alwayscache,
2932 alwayscache=alwayscache,
2933 deltacomputer=deltacomputer,
2933 deltacomputer=deltacomputer,
2934 sidedata=sidedata,
2934 sidedata=sidedata,
2935 )
2935 )
2936
2936
2937 if addrevisioncb:
2937 if addrevisioncb:
2938 addrevisioncb(self, rev)
2938 addrevisioncb(self, rev)
2939 empty = False
2939 empty = False
2940 finally:
2940 finally:
2941 self._adding_group = False
2941 self._adding_group = False
2942 return not empty
2942 return not empty
2943
2943
2944 def iscensored(self, rev):
2944 def iscensored(self, rev):
2945 """Check if a file revision is censored."""
2945 """Check if a file revision is censored."""
2946 if not self._censorable:
2946 if not self._censorable:
2947 return False
2947 return False
2948
2948
2949 return self.flags(rev) & REVIDX_ISCENSORED
2949 return self.flags(rev) & REVIDX_ISCENSORED
2950
2950
2951 def _peek_iscensored(self, baserev, delta):
2951 def _peek_iscensored(self, baserev, delta):
2952 """Quickly check if a delta produces a censored revision."""
2952 """Quickly check if a delta produces a censored revision."""
2953 if not self._censorable:
2953 if not self._censorable:
2954 return False
2954 return False
2955
2955
2956 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2956 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2957
2957
2958 def getstrippoint(self, minlink):
2958 def getstrippoint(self, minlink):
2959 """find the minimum rev that must be stripped to strip the linkrev
2959 """find the minimum rev that must be stripped to strip the linkrev
2960
2960
2961 Returns a tuple containing the minimum rev and a set of all revs that
2961 Returns a tuple containing the minimum rev and a set of all revs that
2962 have linkrevs that will be broken by this strip.
2962 have linkrevs that will be broken by this strip.
2963 """
2963 """
2964 return storageutil.resolvestripinfo(
2964 return storageutil.resolvestripinfo(
2965 minlink,
2965 minlink,
2966 len(self) - 1,
2966 len(self) - 1,
2967 self.headrevs(),
2967 self.headrevs(),
2968 self.linkrev,
2968 self.linkrev,
2969 self.parentrevs,
2969 self.parentrevs,
2970 )
2970 )
2971
2971
2972 def strip(self, minlink, transaction):
2972 def strip(self, minlink, transaction):
2973 """truncate the revlog on the first revision with a linkrev >= minlink
2973 """truncate the revlog on the first revision with a linkrev >= minlink
2974
2974
2975 This function is called when we're stripping revision minlink and
2975 This function is called when we're stripping revision minlink and
2976 its descendants from the repository.
2976 its descendants from the repository.
2977
2977
2978 We have to remove all revisions with linkrev >= minlink, because
2978 We have to remove all revisions with linkrev >= minlink, because
2979 the equivalent changelog revisions will be renumbered after the
2979 the equivalent changelog revisions will be renumbered after the
2980 strip.
2980 strip.
2981
2981
2982 So we truncate the revlog on the first of these revisions, and
2982 So we truncate the revlog on the first of these revisions, and
2983 trust that the caller has saved the revisions that shouldn't be
2983 trust that the caller has saved the revisions that shouldn't be
2984 removed and that it'll re-add them after this truncation.
2984 removed and that it'll re-add them after this truncation.
2985 """
2985 """
2986 if len(self) == 0:
2986 if len(self) == 0:
2987 return
2987 return
2988
2988
2989 rev, _ = self.getstrippoint(minlink)
2989 rev, _ = self.getstrippoint(minlink)
2990 if rev == len(self):
2990 if rev == len(self):
2991 return
2991 return
2992
2992
2993 # first truncate the files on disk
2993 # first truncate the files on disk
2994 data_end = self.start(rev)
2994 data_end = self.start(rev)
2995 if not self._inline:
2995 if not self._inline:
2996 transaction.add(self._datafile, data_end)
2996 transaction.add(self._datafile, data_end)
2997 end = rev * self.index.entry_size
2997 end = rev * self.index.entry_size
2998 else:
2998 else:
2999 end = data_end + (rev * self.index.entry_size)
2999 end = data_end + (rev * self.index.entry_size)
3000
3000
3001 if self._sidedatafile:
3001 if self._sidedatafile:
3002 sidedata_end = self.sidedata_cut_off(rev)
3002 sidedata_end = self.sidedata_cut_off(rev)
3003 transaction.add(self._sidedatafile, sidedata_end)
3003 transaction.add(self._sidedatafile, sidedata_end)
3004
3004
3005 transaction.add(self._indexfile, end)
3005 transaction.add(self._indexfile, end)
3006 if self._docket is not None:
3006 if self._docket is not None:
3007 # XXX we could, leverage the docket while stripping. However it is
3007 # XXX we could, leverage the docket while stripping. However it is
3008 # not powerfull enough at the time of this comment
3008 # not powerfull enough at the time of this comment
3009 self._docket.index_end = end
3009 self._docket.index_end = end
3010 self._docket.data_end = data_end
3010 self._docket.data_end = data_end
3011 self._docket.sidedata_end = sidedata_end
3011 self._docket.sidedata_end = sidedata_end
3012 self._docket.write(transaction, stripping=True)
3012 self._docket.write(transaction, stripping=True)
3013
3013
3014 # then reset internal state in memory to forget those revisions
3014 # then reset internal state in memory to forget those revisions
3015 self._revisioncache = None
3015 self._revisioncache = None
3016 self._chaininfocache = util.lrucachedict(500)
3016 self._chaininfocache = util.lrucachedict(500)
3017 self._segmentfile.clear_cache()
3017 self._segmentfile.clear_cache()
3018 self._segmentfile_sidedata.clear_cache()
3018 self._segmentfile_sidedata.clear_cache()
3019
3019
3020 del self.index[rev:-1]
3020 del self.index[rev:-1]
3021
3021
3022 def checksize(self):
3022 def checksize(self):
3023 """Check size of index and data files
3023 """Check size of index and data files
3024
3024
3025 return a (dd, di) tuple.
3025 return a (dd, di) tuple.
3026 - dd: extra bytes for the "data" file
3026 - dd: extra bytes for the "data" file
3027 - di: extra bytes for the "index" file
3027 - di: extra bytes for the "index" file
3028
3028
3029 A healthy revlog will return (0, 0).
3029 A healthy revlog will return (0, 0).
3030 """
3030 """
3031 expected = 0
3031 expected = 0
3032 if len(self):
3032 if len(self):
3033 expected = max(0, self.end(len(self) - 1))
3033 expected = max(0, self.end(len(self) - 1))
3034
3034
3035 try:
3035 try:
3036 with self._datafp() as f:
3036 with self._datafp() as f:
3037 f.seek(0, io.SEEK_END)
3037 f.seek(0, io.SEEK_END)
3038 actual = f.tell()
3038 actual = f.tell()
3039 dd = actual - expected
3039 dd = actual - expected
3040 except FileNotFoundError:
3040 except FileNotFoundError:
3041 dd = 0
3041 dd = 0
3042
3042
3043 try:
3043 try:
3044 f = self.opener(self._indexfile)
3044 f = self.opener(self._indexfile)
3045 f.seek(0, io.SEEK_END)
3045 f.seek(0, io.SEEK_END)
3046 actual = f.tell()
3046 actual = f.tell()
3047 f.close()
3047 f.close()
3048 s = self.index.entry_size
3048 s = self.index.entry_size
3049 i = max(0, actual // s)
3049 i = max(0, actual // s)
3050 di = actual - (i * s)
3050 di = actual - (i * s)
3051 if self._inline:
3051 if self._inline:
3052 databytes = 0
3052 databytes = 0
3053 for r in self:
3053 for r in self:
3054 databytes += max(0, self.length(r))
3054 databytes += max(0, self.length(r))
3055 dd = 0
3055 dd = 0
3056 di = actual - len(self) * s - databytes
3056 di = actual - len(self) * s - databytes
3057 except FileNotFoundError:
3057 except FileNotFoundError:
3058 di = 0
3058 di = 0
3059
3059
3060 return (dd, di)
3060 return (dd, di)
3061
3061
3062 def files(self):
3062 def files(self):
3063 res = [self._indexfile]
3063 res = [self._indexfile]
3064 if self._docket_file is None:
3064 if self._docket_file is None:
3065 if not self._inline:
3065 if not self._inline:
3066 res.append(self._datafile)
3066 res.append(self._datafile)
3067 else:
3067 else:
3068 res.append(self._docket_file)
3068 res.append(self._docket_file)
3069 res.extend(self._docket.old_index_filepaths(include_empty=False))
3069 res.extend(self._docket.old_index_filepaths(include_empty=False))
3070 if self._docket.data_end:
3070 if self._docket.data_end:
3071 res.append(self._datafile)
3071 res.append(self._datafile)
3072 res.extend(self._docket.old_data_filepaths(include_empty=False))
3072 res.extend(self._docket.old_data_filepaths(include_empty=False))
3073 if self._docket.sidedata_end:
3073 if self._docket.sidedata_end:
3074 res.append(self._sidedatafile)
3074 res.append(self._sidedatafile)
3075 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3075 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3076 return res
3076 return res
3077
3077
3078 def emitrevisions(
3078 def emitrevisions(
3079 self,
3079 self,
3080 nodes,
3080 nodes,
3081 nodesorder=None,
3081 nodesorder=None,
3082 revisiondata=False,
3082 revisiondata=False,
3083 assumehaveparentrevisions=False,
3083 assumehaveparentrevisions=False,
3084 deltamode=repository.CG_DELTAMODE_STD,
3084 deltamode=repository.CG_DELTAMODE_STD,
3085 sidedata_helpers=None,
3085 sidedata_helpers=None,
3086 debug_info=None,
3086 debug_info=None,
3087 ):
3087 ):
3088 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3088 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3089 raise error.ProgrammingError(
3089 raise error.ProgrammingError(
3090 b'unhandled value for nodesorder: %s' % nodesorder
3090 b'unhandled value for nodesorder: %s' % nodesorder
3091 )
3091 )
3092
3092
3093 if nodesorder is None and not self._generaldelta:
3093 if nodesorder is None and not self._generaldelta:
3094 nodesorder = b'storage'
3094 nodesorder = b'storage'
3095
3095
3096 if (
3096 if (
3097 not self._storedeltachains
3097 not self._storedeltachains
3098 and deltamode != repository.CG_DELTAMODE_PREV
3098 and deltamode != repository.CG_DELTAMODE_PREV
3099 ):
3099 ):
3100 deltamode = repository.CG_DELTAMODE_FULL
3100 deltamode = repository.CG_DELTAMODE_FULL
3101
3101
3102 return storageutil.emitrevisions(
3102 return storageutil.emitrevisions(
3103 self,
3103 self,
3104 nodes,
3104 nodes,
3105 nodesorder,
3105 nodesorder,
3106 revlogrevisiondelta,
3106 revlogrevisiondelta,
3107 deltaparentfn=self.deltaparent,
3107 deltaparentfn=self.deltaparent,
3108 candeltafn=self.candelta,
3108 candeltafn=self._candelta,
3109 rawsizefn=self.rawsize,
3109 rawsizefn=self.rawsize,
3110 revdifffn=self.revdiff,
3110 revdifffn=self.revdiff,
3111 flagsfn=self.flags,
3111 flagsfn=self.flags,
3112 deltamode=deltamode,
3112 deltamode=deltamode,
3113 revisiondata=revisiondata,
3113 revisiondata=revisiondata,
3114 assumehaveparentrevisions=assumehaveparentrevisions,
3114 assumehaveparentrevisions=assumehaveparentrevisions,
3115 sidedata_helpers=sidedata_helpers,
3115 sidedata_helpers=sidedata_helpers,
3116 debug_info=debug_info,
3116 debug_info=debug_info,
3117 )
3117 )
3118
3118
3119 DELTAREUSEALWAYS = b'always'
3119 DELTAREUSEALWAYS = b'always'
3120 DELTAREUSESAMEREVS = b'samerevs'
3120 DELTAREUSESAMEREVS = b'samerevs'
3121 DELTAREUSENEVER = b'never'
3121 DELTAREUSENEVER = b'never'
3122
3122
3123 DELTAREUSEFULLADD = b'fulladd'
3123 DELTAREUSEFULLADD = b'fulladd'
3124
3124
3125 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3125 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3126
3126
3127 def clone(
3127 def clone(
3128 self,
3128 self,
3129 tr,
3129 tr,
3130 destrevlog,
3130 destrevlog,
3131 addrevisioncb=None,
3131 addrevisioncb=None,
3132 deltareuse=DELTAREUSESAMEREVS,
3132 deltareuse=DELTAREUSESAMEREVS,
3133 forcedeltabothparents=None,
3133 forcedeltabothparents=None,
3134 sidedata_helpers=None,
3134 sidedata_helpers=None,
3135 ):
3135 ):
3136 """Copy this revlog to another, possibly with format changes.
3136 """Copy this revlog to another, possibly with format changes.
3137
3137
3138 The destination revlog will contain the same revisions and nodes.
3138 The destination revlog will contain the same revisions and nodes.
3139 However, it may not be bit-for-bit identical due to e.g. delta encoding
3139 However, it may not be bit-for-bit identical due to e.g. delta encoding
3140 differences.
3140 differences.
3141
3141
3142 The ``deltareuse`` argument control how deltas from the existing revlog
3142 The ``deltareuse`` argument control how deltas from the existing revlog
3143 are preserved in the destination revlog. The argument can have the
3143 are preserved in the destination revlog. The argument can have the
3144 following values:
3144 following values:
3145
3145
3146 DELTAREUSEALWAYS
3146 DELTAREUSEALWAYS
3147 Deltas will always be reused (if possible), even if the destination
3147 Deltas will always be reused (if possible), even if the destination
3148 revlog would not select the same revisions for the delta. This is the
3148 revlog would not select the same revisions for the delta. This is the
3149 fastest mode of operation.
3149 fastest mode of operation.
3150 DELTAREUSESAMEREVS
3150 DELTAREUSESAMEREVS
3151 Deltas will be reused if the destination revlog would pick the same
3151 Deltas will be reused if the destination revlog would pick the same
3152 revisions for the delta. This mode strikes a balance between speed
3152 revisions for the delta. This mode strikes a balance between speed
3153 and optimization.
3153 and optimization.
3154 DELTAREUSENEVER
3154 DELTAREUSENEVER
3155 Deltas will never be reused. This is the slowest mode of execution.
3155 Deltas will never be reused. This is the slowest mode of execution.
3156 This mode can be used to recompute deltas (e.g. if the diff/delta
3156 This mode can be used to recompute deltas (e.g. if the diff/delta
3157 algorithm changes).
3157 algorithm changes).
3158 DELTAREUSEFULLADD
3158 DELTAREUSEFULLADD
3159 Revision will be re-added as if their were new content. This is
3159 Revision will be re-added as if their were new content. This is
3160 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3160 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3161 eg: large file detection and handling.
3161 eg: large file detection and handling.
3162
3162
3163 Delta computation can be slow, so the choice of delta reuse policy can
3163 Delta computation can be slow, so the choice of delta reuse policy can
3164 significantly affect run time.
3164 significantly affect run time.
3165
3165
3166 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3166 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3167 two extremes. Deltas will be reused if they are appropriate. But if the
3167 two extremes. Deltas will be reused if they are appropriate. But if the
3168 delta could choose a better revision, it will do so. This means if you
3168 delta could choose a better revision, it will do so. This means if you
3169 are converting a non-generaldelta revlog to a generaldelta revlog,
3169 are converting a non-generaldelta revlog to a generaldelta revlog,
3170 deltas will be recomputed if the delta's parent isn't a parent of the
3170 deltas will be recomputed if the delta's parent isn't a parent of the
3171 revision.
3171 revision.
3172
3172
3173 In addition to the delta policy, the ``forcedeltabothparents``
3173 In addition to the delta policy, the ``forcedeltabothparents``
3174 argument controls whether to force compute deltas against both parents
3174 argument controls whether to force compute deltas against both parents
3175 for merges. By default, the current default is used.
3175 for merges. By default, the current default is used.
3176
3176
3177 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3177 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3178 `sidedata_helpers`.
3178 `sidedata_helpers`.
3179 """
3179 """
3180 if deltareuse not in self.DELTAREUSEALL:
3180 if deltareuse not in self.DELTAREUSEALL:
3181 raise ValueError(
3181 raise ValueError(
3182 _(b'value for deltareuse invalid: %s') % deltareuse
3182 _(b'value for deltareuse invalid: %s') % deltareuse
3183 )
3183 )
3184
3184
3185 if len(destrevlog):
3185 if len(destrevlog):
3186 raise ValueError(_(b'destination revlog is not empty'))
3186 raise ValueError(_(b'destination revlog is not empty'))
3187
3187
3188 if getattr(self, 'filteredrevs', None):
3188 if getattr(self, 'filteredrevs', None):
3189 raise ValueError(_(b'source revlog has filtered revisions'))
3189 raise ValueError(_(b'source revlog has filtered revisions'))
3190 if getattr(destrevlog, 'filteredrevs', None):
3190 if getattr(destrevlog, 'filteredrevs', None):
3191 raise ValueError(_(b'destination revlog has filtered revisions'))
3191 raise ValueError(_(b'destination revlog has filtered revisions'))
3192
3192
3193 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3193 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3194 # if possible.
3194 # if possible.
3195 oldlazydelta = destrevlog._lazydelta
3195 oldlazydelta = destrevlog._lazydelta
3196 oldlazydeltabase = destrevlog._lazydeltabase
3196 oldlazydeltabase = destrevlog._lazydeltabase
3197 oldamd = destrevlog._deltabothparents
3197 oldamd = destrevlog._deltabothparents
3198
3198
3199 try:
3199 try:
3200 if deltareuse == self.DELTAREUSEALWAYS:
3200 if deltareuse == self.DELTAREUSEALWAYS:
3201 destrevlog._lazydeltabase = True
3201 destrevlog._lazydeltabase = True
3202 destrevlog._lazydelta = True
3202 destrevlog._lazydelta = True
3203 elif deltareuse == self.DELTAREUSESAMEREVS:
3203 elif deltareuse == self.DELTAREUSESAMEREVS:
3204 destrevlog._lazydeltabase = False
3204 destrevlog._lazydeltabase = False
3205 destrevlog._lazydelta = True
3205 destrevlog._lazydelta = True
3206 elif deltareuse == self.DELTAREUSENEVER:
3206 elif deltareuse == self.DELTAREUSENEVER:
3207 destrevlog._lazydeltabase = False
3207 destrevlog._lazydeltabase = False
3208 destrevlog._lazydelta = False
3208 destrevlog._lazydelta = False
3209
3209
3210 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3210 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3211
3211
3212 self._clone(
3212 self._clone(
3213 tr,
3213 tr,
3214 destrevlog,
3214 destrevlog,
3215 addrevisioncb,
3215 addrevisioncb,
3216 deltareuse,
3216 deltareuse,
3217 forcedeltabothparents,
3217 forcedeltabothparents,
3218 sidedata_helpers,
3218 sidedata_helpers,
3219 )
3219 )
3220
3220
3221 finally:
3221 finally:
3222 destrevlog._lazydelta = oldlazydelta
3222 destrevlog._lazydelta = oldlazydelta
3223 destrevlog._lazydeltabase = oldlazydeltabase
3223 destrevlog._lazydeltabase = oldlazydeltabase
3224 destrevlog._deltabothparents = oldamd
3224 destrevlog._deltabothparents = oldamd
3225
3225
3226 def _clone(
3226 def _clone(
3227 self,
3227 self,
3228 tr,
3228 tr,
3229 destrevlog,
3229 destrevlog,
3230 addrevisioncb,
3230 addrevisioncb,
3231 deltareuse,
3231 deltareuse,
3232 forcedeltabothparents,
3232 forcedeltabothparents,
3233 sidedata_helpers,
3233 sidedata_helpers,
3234 ):
3234 ):
3235 """perform the core duty of `revlog.clone` after parameter processing"""
3235 """perform the core duty of `revlog.clone` after parameter processing"""
3236 write_debug = None
3236 write_debug = None
3237 if self._debug_delta:
3237 if self._debug_delta:
3238 write_debug = tr._report
3238 write_debug = tr._report
3239 deltacomputer = deltautil.deltacomputer(
3239 deltacomputer = deltautil.deltacomputer(
3240 destrevlog,
3240 destrevlog,
3241 write_debug=write_debug,
3241 write_debug=write_debug,
3242 )
3242 )
3243 index = self.index
3243 index = self.index
3244 for rev in self:
3244 for rev in self:
3245 entry = index[rev]
3245 entry = index[rev]
3246
3246
3247 # Some classes override linkrev to take filtered revs into
3247 # Some classes override linkrev to take filtered revs into
3248 # account. Use raw entry from index.
3248 # account. Use raw entry from index.
3249 flags = entry[0] & 0xFFFF
3249 flags = entry[0] & 0xFFFF
3250 linkrev = entry[4]
3250 linkrev = entry[4]
3251 p1 = index[entry[5]][7]
3251 p1 = index[entry[5]][7]
3252 p2 = index[entry[6]][7]
3252 p2 = index[entry[6]][7]
3253 node = entry[7]
3253 node = entry[7]
3254
3254
3255 # (Possibly) reuse the delta from the revlog if allowed and
3255 # (Possibly) reuse the delta from the revlog if allowed and
3256 # the revlog chunk is a delta.
3256 # the revlog chunk is a delta.
3257 cachedelta = None
3257 cachedelta = None
3258 rawtext = None
3258 rawtext = None
3259 if deltareuse == self.DELTAREUSEFULLADD:
3259 if deltareuse == self.DELTAREUSEFULLADD:
3260 text = self._revisiondata(rev)
3260 text = self._revisiondata(rev)
3261 sidedata = self.sidedata(rev)
3261 sidedata = self.sidedata(rev)
3262
3262
3263 if sidedata_helpers is not None:
3263 if sidedata_helpers is not None:
3264 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3264 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3265 self, sidedata_helpers, sidedata, rev
3265 self, sidedata_helpers, sidedata, rev
3266 )
3266 )
3267 flags = flags | new_flags[0] & ~new_flags[1]
3267 flags = flags | new_flags[0] & ~new_flags[1]
3268
3268
3269 destrevlog.addrevision(
3269 destrevlog.addrevision(
3270 text,
3270 text,
3271 tr,
3271 tr,
3272 linkrev,
3272 linkrev,
3273 p1,
3273 p1,
3274 p2,
3274 p2,
3275 cachedelta=cachedelta,
3275 cachedelta=cachedelta,
3276 node=node,
3276 node=node,
3277 flags=flags,
3277 flags=flags,
3278 deltacomputer=deltacomputer,
3278 deltacomputer=deltacomputer,
3279 sidedata=sidedata,
3279 sidedata=sidedata,
3280 )
3280 )
3281 else:
3281 else:
3282 if destrevlog._lazydelta:
3282 if destrevlog._lazydelta:
3283 dp = self.deltaparent(rev)
3283 dp = self.deltaparent(rev)
3284 if dp != nullrev:
3284 if dp != nullrev:
3285 cachedelta = (dp, bytes(self._chunk(rev)))
3285 cachedelta = (dp, bytes(self._chunk(rev)))
3286
3286
3287 sidedata = None
3287 sidedata = None
3288 if not cachedelta:
3288 if not cachedelta:
3289 rawtext = self._revisiondata(rev)
3289 rawtext = self._revisiondata(rev)
3290 sidedata = self.sidedata(rev)
3290 sidedata = self.sidedata(rev)
3291 if sidedata is None:
3291 if sidedata is None:
3292 sidedata = self.sidedata(rev)
3292 sidedata = self.sidedata(rev)
3293
3293
3294 if sidedata_helpers is not None:
3294 if sidedata_helpers is not None:
3295 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3295 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3296 self, sidedata_helpers, sidedata, rev
3296 self, sidedata_helpers, sidedata, rev
3297 )
3297 )
3298 flags = flags | new_flags[0] & ~new_flags[1]
3298 flags = flags | new_flags[0] & ~new_flags[1]
3299
3299
3300 with destrevlog._writing(tr):
3300 with destrevlog._writing(tr):
3301 destrevlog._addrevision(
3301 destrevlog._addrevision(
3302 node,
3302 node,
3303 rawtext,
3303 rawtext,
3304 tr,
3304 tr,
3305 linkrev,
3305 linkrev,
3306 p1,
3306 p1,
3307 p2,
3307 p2,
3308 flags,
3308 flags,
3309 cachedelta,
3309 cachedelta,
3310 deltacomputer=deltacomputer,
3310 deltacomputer=deltacomputer,
3311 sidedata=sidedata,
3311 sidedata=sidedata,
3312 )
3312 )
3313
3313
3314 if addrevisioncb:
3314 if addrevisioncb:
3315 addrevisioncb(self, rev, node)
3315 addrevisioncb(self, rev, node)
3316
3316
3317 def censorrevision(self, tr, censornode, tombstone=b''):
3317 def censorrevision(self, tr, censornode, tombstone=b''):
3318 if self._format_version == REVLOGV0:
3318 if self._format_version == REVLOGV0:
3319 raise error.RevlogError(
3319 raise error.RevlogError(
3320 _(b'cannot censor with version %d revlogs')
3320 _(b'cannot censor with version %d revlogs')
3321 % self._format_version
3321 % self._format_version
3322 )
3322 )
3323 elif self._format_version == REVLOGV1:
3323 elif self._format_version == REVLOGV1:
3324 rewrite.v1_censor(self, tr, censornode, tombstone)
3324 rewrite.v1_censor(self, tr, censornode, tombstone)
3325 else:
3325 else:
3326 rewrite.v2_censor(self, tr, censornode, tombstone)
3326 rewrite.v2_censor(self, tr, censornode, tombstone)
3327
3327
3328 def verifyintegrity(self, state):
3328 def verifyintegrity(self, state):
3329 """Verifies the integrity of the revlog.
3329 """Verifies the integrity of the revlog.
3330
3330
3331 Yields ``revlogproblem`` instances describing problems that are
3331 Yields ``revlogproblem`` instances describing problems that are
3332 found.
3332 found.
3333 """
3333 """
3334 dd, di = self.checksize()
3334 dd, di = self.checksize()
3335 if dd:
3335 if dd:
3336 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3336 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3337 if di:
3337 if di:
3338 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3338 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3339
3339
3340 version = self._format_version
3340 version = self._format_version
3341
3341
3342 # The verifier tells us what version revlog we should be.
3342 # The verifier tells us what version revlog we should be.
3343 if version != state[b'expectedversion']:
3343 if version != state[b'expectedversion']:
3344 yield revlogproblem(
3344 yield revlogproblem(
3345 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3345 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3346 % (self.display_id, version, state[b'expectedversion'])
3346 % (self.display_id, version, state[b'expectedversion'])
3347 )
3347 )
3348
3348
3349 state[b'skipread'] = set()
3349 state[b'skipread'] = set()
3350 state[b'safe_renamed'] = set()
3350 state[b'safe_renamed'] = set()
3351
3351
3352 for rev in self:
3352 for rev in self:
3353 node = self.node(rev)
3353 node = self.node(rev)
3354
3354
3355 # Verify contents. 4 cases to care about:
3355 # Verify contents. 4 cases to care about:
3356 #
3356 #
3357 # common: the most common case
3357 # common: the most common case
3358 # rename: with a rename
3358 # rename: with a rename
3359 # meta: file content starts with b'\1\n', the metadata
3359 # meta: file content starts with b'\1\n', the metadata
3360 # header defined in filelog.py, but without a rename
3360 # header defined in filelog.py, but without a rename
3361 # ext: content stored externally
3361 # ext: content stored externally
3362 #
3362 #
3363 # More formally, their differences are shown below:
3363 # More formally, their differences are shown below:
3364 #
3364 #
3365 # | common | rename | meta | ext
3365 # | common | rename | meta | ext
3366 # -------------------------------------------------------
3366 # -------------------------------------------------------
3367 # flags() | 0 | 0 | 0 | not 0
3367 # flags() | 0 | 0 | 0 | not 0
3368 # renamed() | False | True | False | ?
3368 # renamed() | False | True | False | ?
3369 # rawtext[0:2]=='\1\n'| False | True | True | ?
3369 # rawtext[0:2]=='\1\n'| False | True | True | ?
3370 #
3370 #
3371 # "rawtext" means the raw text stored in revlog data, which
3371 # "rawtext" means the raw text stored in revlog data, which
3372 # could be retrieved by "rawdata(rev)". "text"
3372 # could be retrieved by "rawdata(rev)". "text"
3373 # mentioned below is "revision(rev)".
3373 # mentioned below is "revision(rev)".
3374 #
3374 #
3375 # There are 3 different lengths stored physically:
3375 # There are 3 different lengths stored physically:
3376 # 1. L1: rawsize, stored in revlog index
3376 # 1. L1: rawsize, stored in revlog index
3377 # 2. L2: len(rawtext), stored in revlog data
3377 # 2. L2: len(rawtext), stored in revlog data
3378 # 3. L3: len(text), stored in revlog data if flags==0, or
3378 # 3. L3: len(text), stored in revlog data if flags==0, or
3379 # possibly somewhere else if flags!=0
3379 # possibly somewhere else if flags!=0
3380 #
3380 #
3381 # L1 should be equal to L2. L3 could be different from them.
3381 # L1 should be equal to L2. L3 could be different from them.
3382 # "text" may or may not affect commit hash depending on flag
3382 # "text" may or may not affect commit hash depending on flag
3383 # processors (see flagutil.addflagprocessor).
3383 # processors (see flagutil.addflagprocessor).
3384 #
3384 #
3385 # | common | rename | meta | ext
3385 # | common | rename | meta | ext
3386 # -------------------------------------------------
3386 # -------------------------------------------------
3387 # rawsize() | L1 | L1 | L1 | L1
3387 # rawsize() | L1 | L1 | L1 | L1
3388 # size() | L1 | L2-LM | L1(*) | L1 (?)
3388 # size() | L1 | L2-LM | L1(*) | L1 (?)
3389 # len(rawtext) | L2 | L2 | L2 | L2
3389 # len(rawtext) | L2 | L2 | L2 | L2
3390 # len(text) | L2 | L2 | L2 | L3
3390 # len(text) | L2 | L2 | L2 | L3
3391 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3391 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3392 #
3392 #
3393 # LM: length of metadata, depending on rawtext
3393 # LM: length of metadata, depending on rawtext
3394 # (*): not ideal, see comment in filelog.size
3394 # (*): not ideal, see comment in filelog.size
3395 # (?): could be "- len(meta)" if the resolved content has
3395 # (?): could be "- len(meta)" if the resolved content has
3396 # rename metadata
3396 # rename metadata
3397 #
3397 #
3398 # Checks needed to be done:
3398 # Checks needed to be done:
3399 # 1. length check: L1 == L2, in all cases.
3399 # 1. length check: L1 == L2, in all cases.
3400 # 2. hash check: depending on flag processor, we may need to
3400 # 2. hash check: depending on flag processor, we may need to
3401 # use either "text" (external), or "rawtext" (in revlog).
3401 # use either "text" (external), or "rawtext" (in revlog).
3402
3402
3403 try:
3403 try:
3404 skipflags = state.get(b'skipflags', 0)
3404 skipflags = state.get(b'skipflags', 0)
3405 if skipflags:
3405 if skipflags:
3406 skipflags &= self.flags(rev)
3406 skipflags &= self.flags(rev)
3407
3407
3408 _verify_revision(self, skipflags, state, node)
3408 _verify_revision(self, skipflags, state, node)
3409
3409
3410 l1 = self.rawsize(rev)
3410 l1 = self.rawsize(rev)
3411 l2 = len(self.rawdata(node))
3411 l2 = len(self.rawdata(node))
3412
3412
3413 if l1 != l2:
3413 if l1 != l2:
3414 yield revlogproblem(
3414 yield revlogproblem(
3415 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3415 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3416 node=node,
3416 node=node,
3417 )
3417 )
3418
3418
3419 except error.CensoredNodeError:
3419 except error.CensoredNodeError:
3420 if state[b'erroroncensored']:
3420 if state[b'erroroncensored']:
3421 yield revlogproblem(
3421 yield revlogproblem(
3422 error=_(b'censored file data'), node=node
3422 error=_(b'censored file data'), node=node
3423 )
3423 )
3424 state[b'skipread'].add(node)
3424 state[b'skipread'].add(node)
3425 except Exception as e:
3425 except Exception as e:
3426 yield revlogproblem(
3426 yield revlogproblem(
3427 error=_(b'unpacking %s: %s')
3427 error=_(b'unpacking %s: %s')
3428 % (short(node), stringutil.forcebytestr(e)),
3428 % (short(node), stringutil.forcebytestr(e)),
3429 node=node,
3429 node=node,
3430 )
3430 )
3431 state[b'skipread'].add(node)
3431 state[b'skipread'].add(node)
3432
3432
3433 def storageinfo(
3433 def storageinfo(
3434 self,
3434 self,
3435 exclusivefiles=False,
3435 exclusivefiles=False,
3436 sharedfiles=False,
3436 sharedfiles=False,
3437 revisionscount=False,
3437 revisionscount=False,
3438 trackedsize=False,
3438 trackedsize=False,
3439 storedsize=False,
3439 storedsize=False,
3440 ):
3440 ):
3441 d = {}
3441 d = {}
3442
3442
3443 if exclusivefiles:
3443 if exclusivefiles:
3444 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3444 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3445 if not self._inline:
3445 if not self._inline:
3446 d[b'exclusivefiles'].append((self.opener, self._datafile))
3446 d[b'exclusivefiles'].append((self.opener, self._datafile))
3447
3447
3448 if sharedfiles:
3448 if sharedfiles:
3449 d[b'sharedfiles'] = []
3449 d[b'sharedfiles'] = []
3450
3450
3451 if revisionscount:
3451 if revisionscount:
3452 d[b'revisionscount'] = len(self)
3452 d[b'revisionscount'] = len(self)
3453
3453
3454 if trackedsize:
3454 if trackedsize:
3455 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3455 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3456
3456
3457 if storedsize:
3457 if storedsize:
3458 d[b'storedsize'] = sum(
3458 d[b'storedsize'] = sum(
3459 self.opener.stat(path).st_size for path in self.files()
3459 self.opener.stat(path).st_size for path in self.files()
3460 )
3460 )
3461
3461
3462 return d
3462 return d
3463
3463
3464 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3464 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3465 if not self.hassidedata:
3465 if not self.hassidedata:
3466 return
3466 return
3467 # revlog formats with sidedata support does not support inline
3467 # revlog formats with sidedata support does not support inline
3468 assert not self._inline
3468 assert not self._inline
3469 if not helpers[1] and not helpers[2]:
3469 if not helpers[1] and not helpers[2]:
3470 # Nothing to generate or remove
3470 # Nothing to generate or remove
3471 return
3471 return
3472
3472
3473 new_entries = []
3473 new_entries = []
3474 # append the new sidedata
3474 # append the new sidedata
3475 with self._writing(transaction):
3475 with self._writing(transaction):
3476 ifh, dfh, sdfh = self._writinghandles
3476 ifh, dfh, sdfh = self._writinghandles
3477 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3477 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3478
3478
3479 current_offset = sdfh.tell()
3479 current_offset = sdfh.tell()
3480 for rev in range(startrev, endrev + 1):
3480 for rev in range(startrev, endrev + 1):
3481 entry = self.index[rev]
3481 entry = self.index[rev]
3482 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3482 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3483 store=self,
3483 store=self,
3484 sidedata_helpers=helpers,
3484 sidedata_helpers=helpers,
3485 sidedata={},
3485 sidedata={},
3486 rev=rev,
3486 rev=rev,
3487 )
3487 )
3488
3488
3489 serialized_sidedata = sidedatautil.serialize_sidedata(
3489 serialized_sidedata = sidedatautil.serialize_sidedata(
3490 new_sidedata
3490 new_sidedata
3491 )
3491 )
3492
3492
3493 sidedata_compression_mode = COMP_MODE_INLINE
3493 sidedata_compression_mode = COMP_MODE_INLINE
3494 if serialized_sidedata and self.hassidedata:
3494 if serialized_sidedata and self.hassidedata:
3495 sidedata_compression_mode = COMP_MODE_PLAIN
3495 sidedata_compression_mode = COMP_MODE_PLAIN
3496 h, comp_sidedata = self.compress(serialized_sidedata)
3496 h, comp_sidedata = self.compress(serialized_sidedata)
3497 if (
3497 if (
3498 h != b'u'
3498 h != b'u'
3499 and comp_sidedata[0] != b'\0'
3499 and comp_sidedata[0] != b'\0'
3500 and len(comp_sidedata) < len(serialized_sidedata)
3500 and len(comp_sidedata) < len(serialized_sidedata)
3501 ):
3501 ):
3502 assert not h
3502 assert not h
3503 if (
3503 if (
3504 comp_sidedata[0]
3504 comp_sidedata[0]
3505 == self._docket.default_compression_header
3505 == self._docket.default_compression_header
3506 ):
3506 ):
3507 sidedata_compression_mode = COMP_MODE_DEFAULT
3507 sidedata_compression_mode = COMP_MODE_DEFAULT
3508 serialized_sidedata = comp_sidedata
3508 serialized_sidedata = comp_sidedata
3509 else:
3509 else:
3510 sidedata_compression_mode = COMP_MODE_INLINE
3510 sidedata_compression_mode = COMP_MODE_INLINE
3511 serialized_sidedata = comp_sidedata
3511 serialized_sidedata = comp_sidedata
3512 if entry[8] != 0 or entry[9] != 0:
3512 if entry[8] != 0 or entry[9] != 0:
3513 # rewriting entries that already have sidedata is not
3513 # rewriting entries that already have sidedata is not
3514 # supported yet, because it introduces garbage data in the
3514 # supported yet, because it introduces garbage data in the
3515 # revlog.
3515 # revlog.
3516 msg = b"rewriting existing sidedata is not supported yet"
3516 msg = b"rewriting existing sidedata is not supported yet"
3517 raise error.Abort(msg)
3517 raise error.Abort(msg)
3518
3518
3519 # Apply (potential) flags to add and to remove after running
3519 # Apply (potential) flags to add and to remove after running
3520 # the sidedata helpers
3520 # the sidedata helpers
3521 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3521 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3522 entry_update = (
3522 entry_update = (
3523 current_offset,
3523 current_offset,
3524 len(serialized_sidedata),
3524 len(serialized_sidedata),
3525 new_offset_flags,
3525 new_offset_flags,
3526 sidedata_compression_mode,
3526 sidedata_compression_mode,
3527 )
3527 )
3528
3528
3529 # the sidedata computation might have move the file cursors around
3529 # the sidedata computation might have move the file cursors around
3530 sdfh.seek(current_offset, os.SEEK_SET)
3530 sdfh.seek(current_offset, os.SEEK_SET)
3531 sdfh.write(serialized_sidedata)
3531 sdfh.write(serialized_sidedata)
3532 new_entries.append(entry_update)
3532 new_entries.append(entry_update)
3533 current_offset += len(serialized_sidedata)
3533 current_offset += len(serialized_sidedata)
3534 self._docket.sidedata_end = sdfh.tell()
3534 self._docket.sidedata_end = sdfh.tell()
3535
3535
3536 # rewrite the new index entries
3536 # rewrite the new index entries
3537 ifh.seek(startrev * self.index.entry_size)
3537 ifh.seek(startrev * self.index.entry_size)
3538 for i, e in enumerate(new_entries):
3538 for i, e in enumerate(new_entries):
3539 rev = startrev + i
3539 rev = startrev + i
3540 self.index.replace_sidedata_info(rev, *e)
3540 self.index.replace_sidedata_info(rev, *e)
3541 packed = self.index.entry_binary(rev)
3541 packed = self.index.entry_binary(rev)
3542 if rev == 0 and self._docket is None:
3542 if rev == 0 and self._docket is None:
3543 header = self._format_flags | self._format_version
3543 header = self._format_flags | self._format_version
3544 header = self.index.pack_header(header)
3544 header = self.index.pack_header(header)
3545 packed = header + packed
3545 packed = header + packed
3546 ifh.write(packed)
3546 ifh.write(packed)
@@ -1,523 +1,523 b''
1 # test revlog interaction about raw data (flagprocessor)
1 # test revlog interaction about raw data (flagprocessor)
2
2
3
3
4 import hashlib
4 import hashlib
5 import sys
5 import sys
6
6
7 from mercurial import (
7 from mercurial import (
8 encoding,
8 encoding,
9 revlog,
9 revlog,
10 transaction,
10 transaction,
11 vfs,
11 vfs,
12 )
12 )
13
13
14 from mercurial.revlogutils import (
14 from mercurial.revlogutils import (
15 constants,
15 constants,
16 deltas,
16 deltas,
17 flagutil,
17 flagutil,
18 )
18 )
19
19
20
20
21 class _NoTransaction:
21 class _NoTransaction:
22 """transaction like object to update the nodemap outside a transaction"""
22 """transaction like object to update the nodemap outside a transaction"""
23
23
24 def __init__(self):
24 def __init__(self):
25 self._postclose = {}
25 self._postclose = {}
26
26
27 def addpostclose(self, callback_id, callback_func):
27 def addpostclose(self, callback_id, callback_func):
28 self._postclose[callback_id] = callback_func
28 self._postclose[callback_id] = callback_func
29
29
30 def registertmp(self, *args, **kwargs):
30 def registertmp(self, *args, **kwargs):
31 pass
31 pass
32
32
33 def addbackup(self, *args, **kwargs):
33 def addbackup(self, *args, **kwargs):
34 pass
34 pass
35
35
36 def add(self, *args, **kwargs):
36 def add(self, *args, **kwargs):
37 pass
37 pass
38
38
39 def addabort(self, *args, **kwargs):
39 def addabort(self, *args, **kwargs):
40 pass
40 pass
41
41
42 def _report(self, *args):
42 def _report(self, *args):
43 pass
43 pass
44
44
45
45
46 # TESTTMP is optional. This makes it convenient to run without run-tests.py
46 # TESTTMP is optional. This makes it convenient to run without run-tests.py
47 tvfs = vfs.vfs(encoding.environ.get(b'TESTTMP', b'/tmp'))
47 tvfs = vfs.vfs(encoding.environ.get(b'TESTTMP', b'/tmp'))
48
48
49 # Enable generaldelta otherwise revlog won't use delta as expected by the test
49 # Enable generaldelta otherwise revlog won't use delta as expected by the test
50 tvfs.options = {
50 tvfs.options = {
51 b'generaldelta': True,
51 b'generaldelta': True,
52 b'revlogv1': True,
52 b'revlogv1': True,
53 b'sparse-revlog': True,
53 b'sparse-revlog': True,
54 }
54 }
55
55
56
56
57 def abort(msg):
57 def abort(msg):
58 print('abort: %s' % msg)
58 print('abort: %s' % msg)
59 # Return 0 so run-tests.py could compare the output.
59 # Return 0 so run-tests.py could compare the output.
60 sys.exit()
60 sys.exit()
61
61
62
62
63 # Register a revlog processor for flag EXTSTORED.
63 # Register a revlog processor for flag EXTSTORED.
64 #
64 #
65 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
65 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
66 # insertion and replacement, and may be interesting to test revlog's line-based
66 # insertion and replacement, and may be interesting to test revlog's line-based
67 # deltas.
67 # deltas.
68 _extheader = b'E\n'
68 _extheader = b'E\n'
69
69
70
70
71 def readprocessor(self, rawtext):
71 def readprocessor(self, rawtext):
72 # True: the returned text could be used to verify hash
72 # True: the returned text could be used to verify hash
73 text = rawtext[len(_extheader) :].replace(b'i', b'1')
73 text = rawtext[len(_extheader) :].replace(b'i', b'1')
74 return text, True
74 return text, True
75
75
76
76
77 def writeprocessor(self, text):
77 def writeprocessor(self, text):
78 # False: the returned rawtext shouldn't be used to verify hash
78 # False: the returned rawtext shouldn't be used to verify hash
79 rawtext = _extheader + text.replace(b'1', b'i')
79 rawtext = _extheader + text.replace(b'1', b'i')
80 return rawtext, False
80 return rawtext, False
81
81
82
82
83 def rawprocessor(self, rawtext):
83 def rawprocessor(self, rawtext):
84 # False: do not verify hash. Only the content returned by "readprocessor"
84 # False: do not verify hash. Only the content returned by "readprocessor"
85 # can be used to verify hash.
85 # can be used to verify hash.
86 return False
86 return False
87
87
88
88
89 flagutil.addflagprocessor(
89 flagutil.addflagprocessor(
90 revlog.REVIDX_EXTSTORED, (readprocessor, writeprocessor, rawprocessor)
90 revlog.REVIDX_EXTSTORED, (readprocessor, writeprocessor, rawprocessor)
91 )
91 )
92
92
93 # Utilities about reading and appending revlog
93 # Utilities about reading and appending revlog
94
94
95
95
96 def newtransaction():
96 def newtransaction():
97 # A transaction is required to write revlogs
97 # A transaction is required to write revlogs
98 report = lambda msg: None
98 report = lambda msg: None
99 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
99 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
100
100
101
101
102 def newrevlog(name=b'_testrevlog', recreate=False):
102 def newrevlog(name=b'_testrevlog', recreate=False):
103 if recreate:
103 if recreate:
104 tvfs.tryunlink(name + b'.i')
104 tvfs.tryunlink(name + b'.i')
105 target = (constants.KIND_OTHER, b'test')
105 target = (constants.KIND_OTHER, b'test')
106 rlog = revlog.revlog(tvfs, target=target, radix=name)
106 rlog = revlog.revlog(tvfs, target=target, radix=name)
107 return rlog
107 return rlog
108
108
109
109
110 def appendrev(rlog, text, tr, isext=False, isdelta=True):
110 def appendrev(rlog, text, tr, isext=False, isdelta=True):
111 """Append a revision. If isext is True, set the EXTSTORED flag so flag
111 """Append a revision. If isext is True, set the EXTSTORED flag so flag
112 processor will be used (and rawtext is different from text). If isdelta is
112 processor will be used (and rawtext is different from text). If isdelta is
113 True, force the revision to be a delta, otherwise it's full text.
113 True, force the revision to be a delta, otherwise it's full text.
114 """
114 """
115 nextrev = len(rlog)
115 nextrev = len(rlog)
116 p1 = rlog.node(nextrev - 1)
116 p1 = rlog.node(nextrev - 1)
117 p2 = rlog.nullid
117 p2 = rlog.nullid
118 if isext:
118 if isext:
119 flags = revlog.REVIDX_EXTSTORED
119 flags = revlog.REVIDX_EXTSTORED
120 else:
120 else:
121 flags = revlog.REVIDX_DEFAULT_FLAGS
121 flags = revlog.REVIDX_DEFAULT_FLAGS
122 # Change storedeltachains temporarily, to override revlog's delta decision
122 # Change storedeltachains temporarily, to override revlog's delta decision
123 rlog._storedeltachains = isdelta
123 rlog._storedeltachains = isdelta
124 try:
124 try:
125 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
125 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
126 return nextrev
126 return nextrev
127 except Exception as ex:
127 except Exception as ex:
128 abort('rev %d: failed to append: %s' % (nextrev, ex))
128 abort('rev %d: failed to append: %s' % (nextrev, ex))
129 finally:
129 finally:
130 # Restore storedeltachains. It is always True, see revlog.__init__
130 # Restore storedeltachains. It is always True, see revlog.__init__
131 rlog._storedeltachains = True
131 rlog._storedeltachains = True
132
132
133
133
134 def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True):
134 def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True):
135 """Copy revlog to destname using revlog.addgroup. Return the copied revlog.
135 """Copy revlog to destname using revlog.addgroup. Return the copied revlog.
136
136
137 This emulates push or pull. They use changegroup. Changegroup requires
137 This emulates push or pull. They use changegroup. Changegroup requires
138 repo to work. We don't have a repo, so a dummy changegroup is used.
138 repo to work. We don't have a repo, so a dummy changegroup is used.
139
139
140 If optimaldelta is True, use optimized delta parent, so the destination
140 If optimaldelta is True, use optimized delta parent, so the destination
141 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
141 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
142 the destination revlog needs more work to use it.
142 the destination revlog needs more work to use it.
143
143
144 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
144 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
145 code path, which is not covered by "appendrev" alone.
145 code path, which is not covered by "appendrev" alone.
146 """
146 """
147
147
148 class dummychangegroup:
148 class dummychangegroup:
149 @staticmethod
149 @staticmethod
150 def deltachunk(pnode):
150 def deltachunk(pnode):
151 pnode = pnode or rlog.nullid
151 pnode = pnode or rlog.nullid
152 parentrev = rlog.rev(pnode)
152 parentrev = rlog.rev(pnode)
153 r = parentrev + 1
153 r = parentrev + 1
154 if r >= len(rlog):
154 if r >= len(rlog):
155 return {}
155 return {}
156 if optimaldelta:
156 if optimaldelta:
157 deltaparent = parentrev
157 deltaparent = parentrev
158 else:
158 else:
159 # suboptimal deltaparent
159 # suboptimal deltaparent
160 deltaparent = min(0, parentrev)
160 deltaparent = min(0, parentrev)
161 if not rlog.candelta(deltaparent, r):
161 if not rlog._candelta(deltaparent, r):
162 deltaparent = -1
162 deltaparent = -1
163 return {
163 return {
164 b'node': rlog.node(r),
164 b'node': rlog.node(r),
165 b'p1': pnode,
165 b'p1': pnode,
166 b'p2': rlog.nullid,
166 b'p2': rlog.nullid,
167 b'cs': rlog.node(rlog.linkrev(r)),
167 b'cs': rlog.node(rlog.linkrev(r)),
168 b'flags': rlog.flags(r),
168 b'flags': rlog.flags(r),
169 b'deltabase': rlog.node(deltaparent),
169 b'deltabase': rlog.node(deltaparent),
170 b'delta': rlog.revdiff(deltaparent, r),
170 b'delta': rlog.revdiff(deltaparent, r),
171 b'sidedata': rlog.sidedata(r),
171 b'sidedata': rlog.sidedata(r),
172 }
172 }
173
173
174 def deltaiter(self):
174 def deltaiter(self):
175 chain = None
175 chain = None
176 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
176 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
177 node = chunkdata[b'node']
177 node = chunkdata[b'node']
178 p1 = chunkdata[b'p1']
178 p1 = chunkdata[b'p1']
179 p2 = chunkdata[b'p2']
179 p2 = chunkdata[b'p2']
180 cs = chunkdata[b'cs']
180 cs = chunkdata[b'cs']
181 deltabase = chunkdata[b'deltabase']
181 deltabase = chunkdata[b'deltabase']
182 delta = chunkdata[b'delta']
182 delta = chunkdata[b'delta']
183 flags = chunkdata[b'flags']
183 flags = chunkdata[b'flags']
184 sidedata = chunkdata[b'sidedata']
184 sidedata = chunkdata[b'sidedata']
185
185
186 chain = node
186 chain = node
187
187
188 yield (node, p1, p2, cs, deltabase, delta, flags, sidedata)
188 yield (node, p1, p2, cs, deltabase, delta, flags, sidedata)
189
189
190 def linkmap(lnode):
190 def linkmap(lnode):
191 return rlog.rev(lnode)
191 return rlog.rev(lnode)
192
192
193 dlog = newrevlog(destname, recreate=True)
193 dlog = newrevlog(destname, recreate=True)
194 dummydeltas = dummychangegroup().deltaiter()
194 dummydeltas = dummychangegroup().deltaiter()
195 dlog.addgroup(dummydeltas, linkmap, tr)
195 dlog.addgroup(dummydeltas, linkmap, tr)
196 return dlog
196 return dlog
197
197
198
198
199 def lowlevelcopy(rlog, tr, destname=b'_destrevlog'):
199 def lowlevelcopy(rlog, tr, destname=b'_destrevlog'):
200 """Like addgroupcopy, but use the low level revlog._addrevision directly.
200 """Like addgroupcopy, but use the low level revlog._addrevision directly.
201
201
202 It exercises some code paths that are hard to reach easily otherwise.
202 It exercises some code paths that are hard to reach easily otherwise.
203 """
203 """
204 dlog = newrevlog(destname, recreate=True)
204 dlog = newrevlog(destname, recreate=True)
205 for r in rlog:
205 for r in rlog:
206 p1 = rlog.node(r - 1)
206 p1 = rlog.node(r - 1)
207 p2 = rlog.nullid
207 p2 = rlog.nullid
208 if r == 0 or (rlog.flags(r) & revlog.REVIDX_EXTSTORED):
208 if r == 0 or (rlog.flags(r) & revlog.REVIDX_EXTSTORED):
209 text = rlog.rawdata(r)
209 text = rlog.rawdata(r)
210 cachedelta = None
210 cachedelta = None
211 else:
211 else:
212 # deltaparent cannot have EXTSTORED flag.
212 # deltaparent cannot have EXTSTORED flag.
213 deltaparent = max(
213 deltaparent = max(
214 [-1]
214 [-1]
215 + [
215 + [
216 p
216 p
217 for p in range(r)
217 for p in range(r)
218 if rlog.flags(p) & revlog.REVIDX_EXTSTORED == 0
218 if rlog.flags(p) & revlog.REVIDX_EXTSTORED == 0
219 ]
219 ]
220 )
220 )
221 text = None
221 text = None
222 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
222 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
223 flags = rlog.flags(r)
223 flags = rlog.flags(r)
224 with dlog._writing(_NoTransaction()):
224 with dlog._writing(_NoTransaction()):
225 dlog._addrevision(
225 dlog._addrevision(
226 rlog.node(r),
226 rlog.node(r),
227 text,
227 text,
228 tr,
228 tr,
229 r,
229 r,
230 p1,
230 p1,
231 p2,
231 p2,
232 flags,
232 flags,
233 cachedelta,
233 cachedelta,
234 )
234 )
235 return dlog
235 return dlog
236
236
237
237
238 # Utilities to generate revisions for testing
238 # Utilities to generate revisions for testing
239
239
240
240
241 def genbits(n):
241 def genbits(n):
242 """Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
242 """Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
243 i.e. the generated numbers have a width of n bits.
243 i.e. the generated numbers have a width of n bits.
244
244
245 The combination of two adjacent numbers will cover all possible cases.
245 The combination of two adjacent numbers will cover all possible cases.
246 That is to say, given any x, y where both x, and y are in range(2 ** n),
246 That is to say, given any x, y where both x, and y are in range(2 ** n),
247 there is an x followed immediately by y in the generated sequence.
247 there is an x followed immediately by y in the generated sequence.
248 """
248 """
249 m = 2 ** n
249 m = 2 ** n
250
250
251 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
251 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
252 gray = lambda x: x ^ (x >> 1)
252 gray = lambda x: x ^ (x >> 1)
253 reversegray = {gray(i): i for i in range(m)}
253 reversegray = {gray(i): i for i in range(m)}
254
254
255 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
255 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
256 # the next unused gray code where higher n bits equal to X.
256 # the next unused gray code where higher n bits equal to X.
257
257
258 # For gray codes whose higher bits are X, a[X] of them have been used.
258 # For gray codes whose higher bits are X, a[X] of them have been used.
259 a = [0] * m
259 a = [0] * m
260
260
261 # Iterate from 0.
261 # Iterate from 0.
262 x = 0
262 x = 0
263 yield x
263 yield x
264 for i in range(m * m):
264 for i in range(m * m):
265 x = reversegray[x]
265 x = reversegray[x]
266 y = gray(a[x] + x * m) & (m - 1)
266 y = gray(a[x] + x * m) & (m - 1)
267 assert a[x] < m
267 assert a[x] < m
268 a[x] += 1
268 a[x] += 1
269 x = y
269 x = y
270 yield x
270 yield x
271
271
272
272
273 def gentext(rev):
273 def gentext(rev):
274 '''Given a revision number, generate dummy text'''
274 '''Given a revision number, generate dummy text'''
275 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
275 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
276
276
277
277
278 def writecases(rlog, tr):
278 def writecases(rlog, tr):
279 """Write some revisions interested to the test.
279 """Write some revisions interested to the test.
280
280
281 The test is interested in 3 properties of a revision:
281 The test is interested in 3 properties of a revision:
282
282
283 - Is it a delta or a full text? (isdelta)
283 - Is it a delta or a full text? (isdelta)
284 This is to catch some delta application issues.
284 This is to catch some delta application issues.
285 - Does it have a flag of EXTSTORED? (isext)
285 - Does it have a flag of EXTSTORED? (isext)
286 This is to catch some flag processor issues. Especially when
286 This is to catch some flag processor issues. Especially when
287 interacted with revlog deltas.
287 interacted with revlog deltas.
288 - Is its text empty? (isempty)
288 - Is its text empty? (isempty)
289 This is less important. It is intended to try to catch some careless
289 This is less important. It is intended to try to catch some careless
290 checks like "if text" instead of "if text is None". Note: if flag
290 checks like "if text" instead of "if text is None". Note: if flag
291 processor is involved, raw text may be not empty.
291 processor is involved, raw text may be not empty.
292
292
293 Write 65 revisions. So that all combinations of the above flags for
293 Write 65 revisions. So that all combinations of the above flags for
294 adjacent revisions are covered. That is to say,
294 adjacent revisions are covered. That is to say,
295
295
296 len(set(
296 len(set(
297 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
297 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
298 for r in range(len(rlog) - 1)
298 for r in range(len(rlog) - 1)
299 )) is 64.
299 )) is 64.
300
300
301 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
301 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
302 mentioned above.
302 mentioned above.
303
303
304 Return expected [(text, rawtext)].
304 Return expected [(text, rawtext)].
305 """
305 """
306 result = []
306 result = []
307 for i, x in enumerate(genbits(3)):
307 for i, x in enumerate(genbits(3)):
308 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
308 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
309 if isempty:
309 if isempty:
310 text = b''
310 text = b''
311 else:
311 else:
312 text = gentext(i)
312 text = gentext(i)
313 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
313 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
314
314
315 # Verify text, rawtext, and rawsize
315 # Verify text, rawtext, and rawsize
316 if isext:
316 if isext:
317 rawtext = writeprocessor(None, text)[0]
317 rawtext = writeprocessor(None, text)[0]
318 else:
318 else:
319 rawtext = text
319 rawtext = text
320 if rlog.rawsize(rev) != len(rawtext):
320 if rlog.rawsize(rev) != len(rawtext):
321 abort('rev %d: wrong rawsize' % rev)
321 abort('rev %d: wrong rawsize' % rev)
322 if rlog.revision(rev) != text:
322 if rlog.revision(rev) != text:
323 abort('rev %d: wrong text' % rev)
323 abort('rev %d: wrong text' % rev)
324 if rlog.rawdata(rev) != rawtext:
324 if rlog.rawdata(rev) != rawtext:
325 abort('rev %d: wrong rawtext' % rev)
325 abort('rev %d: wrong rawtext' % rev)
326 result.append((text, rawtext))
326 result.append((text, rawtext))
327
327
328 # Verify flags like isdelta, isext work as expected
328 # Verify flags like isdelta, isext work as expected
329 # isdelta can be overridden to False if this or p1 has isext set
329 # isdelta can be overridden to False if this or p1 has isext set
330 if bool(rlog.deltaparent(rev) > -1) and not isdelta:
330 if bool(rlog.deltaparent(rev) > -1) and not isdelta:
331 abort('rev %d: isdelta is unexpected' % rev)
331 abort('rev %d: isdelta is unexpected' % rev)
332 if bool(rlog.flags(rev)) != isext:
332 if bool(rlog.flags(rev)) != isext:
333 abort('rev %d: isext is ineffective' % rev)
333 abort('rev %d: isext is ineffective' % rev)
334 return result
334 return result
335
335
336
336
337 # Main test and checking
337 # Main test and checking
338
338
339
339
340 def checkrevlog(rlog, expected):
340 def checkrevlog(rlog, expected):
341 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
341 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
342 # Test using different access orders. This could expose some issues
342 # Test using different access orders. This could expose some issues
343 # depending on revlog caching (see revlog._cache).
343 # depending on revlog caching (see revlog._cache).
344 for r0 in range(len(rlog) - 1):
344 for r0 in range(len(rlog) - 1):
345 r1 = r0 + 1
345 r1 = r0 + 1
346 for revorder in [[r0, r1], [r1, r0]]:
346 for revorder in [[r0, r1], [r1, r0]]:
347 for raworder in [[True], [False], [True, False], [False, True]]:
347 for raworder in [[True], [False], [True, False], [False, True]]:
348 nlog = newrevlog()
348 nlog = newrevlog()
349 for rev in revorder:
349 for rev in revorder:
350 for raw in raworder:
350 for raw in raworder:
351 if raw:
351 if raw:
352 t = nlog.rawdata(rev)
352 t = nlog.rawdata(rev)
353 else:
353 else:
354 t = nlog.revision(rev)
354 t = nlog.revision(rev)
355 if t != expected[rev][int(raw)]:
355 if t != expected[rev][int(raw)]:
356 abort(
356 abort(
357 'rev %d: corrupted %stext'
357 'rev %d: corrupted %stext'
358 % (rev, raw and 'raw' or '')
358 % (rev, raw and 'raw' or '')
359 )
359 )
360
360
361
361
362 slicingdata = [
362 slicingdata = [
363 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
363 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
364 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
364 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
365 (
365 (
366 [-1, 0, 1, 2, 3, 55, 56, 58, 59, 60],
366 [-1, 0, 1, 2, 3, 55, 56, 58, 59, 60],
367 [[-1, 0, 1], [2], [58], [59, 60]],
367 [[-1, 0, 1], [2], [58], [59, 60]],
368 10,
368 10,
369 ),
369 ),
370 ]
370 ]
371
371
372
372
373 def slicingtest(rlog):
373 def slicingtest(rlog):
374 oldmin = rlog._srmingapsize
374 oldmin = rlog._srmingapsize
375 try:
375 try:
376 # the test revlog is small, we remove the floor under which we
376 # the test revlog is small, we remove the floor under which we
377 # slicing is diregarded.
377 # slicing is diregarded.
378 rlog._srmingapsize = 0
378 rlog._srmingapsize = 0
379 for item in slicingdata:
379 for item in slicingdata:
380 chain, expected, target = item
380 chain, expected, target = item
381 result = deltas.slicechunk(rlog, chain, targetsize=target)
381 result = deltas.slicechunk(rlog, chain, targetsize=target)
382 result = list(result)
382 result = list(result)
383 if result != expected:
383 if result != expected:
384 print('slicing differ:')
384 print('slicing differ:')
385 print(' chain: %s' % chain)
385 print(' chain: %s' % chain)
386 print(' target: %s' % target)
386 print(' target: %s' % target)
387 print(' expected: %s' % expected)
387 print(' expected: %s' % expected)
388 print(' result: %s' % result)
388 print(' result: %s' % result)
389 finally:
389 finally:
390 rlog._srmingapsize = oldmin
390 rlog._srmingapsize = oldmin
391
391
392
392
393 def md5sum(s):
393 def md5sum(s):
394 return hashlib.md5(s).digest()
394 return hashlib.md5(s).digest()
395
395
396
396
397 def _maketext(*coord):
397 def _maketext(*coord):
398 """create piece of text according to range of integers
398 """create piece of text according to range of integers
399
399
400 The test returned use a md5sum of the integer to make it less
400 The test returned use a md5sum of the integer to make it less
401 compressible"""
401 compressible"""
402 pieces = []
402 pieces = []
403 for start, size in coord:
403 for start, size in coord:
404 num = range(start, start + size)
404 num = range(start, start + size)
405 p = [md5sum(b'%d' % r) for r in num]
405 p = [md5sum(b'%d' % r) for r in num]
406 pieces.append(b'\n'.join(p))
406 pieces.append(b'\n'.join(p))
407 return b'\n'.join(pieces) + b'\n'
407 return b'\n'.join(pieces) + b'\n'
408
408
409
409
410 data = [
410 data = [
411 _maketext((0, 120), (456, 60)),
411 _maketext((0, 120), (456, 60)),
412 _maketext((0, 120), (345, 60)),
412 _maketext((0, 120), (345, 60)),
413 _maketext((0, 120), (734, 60)),
413 _maketext((0, 120), (734, 60)),
414 _maketext((0, 120), (734, 60), (923, 45)),
414 _maketext((0, 120), (734, 60), (923, 45)),
415 _maketext((0, 120), (734, 60), (234, 45)),
415 _maketext((0, 120), (734, 60), (234, 45)),
416 _maketext((0, 120), (734, 60), (564, 45)),
416 _maketext((0, 120), (734, 60), (564, 45)),
417 _maketext((0, 120), (734, 60), (361, 45)),
417 _maketext((0, 120), (734, 60), (361, 45)),
418 _maketext((0, 120), (734, 60), (489, 45)),
418 _maketext((0, 120), (734, 60), (489, 45)),
419 _maketext((0, 120), (123, 60)),
419 _maketext((0, 120), (123, 60)),
420 _maketext((0, 120), (145, 60)),
420 _maketext((0, 120), (145, 60)),
421 _maketext((0, 120), (104, 60)),
421 _maketext((0, 120), (104, 60)),
422 _maketext((0, 120), (430, 60)),
422 _maketext((0, 120), (430, 60)),
423 _maketext((0, 120), (430, 60), (923, 45)),
423 _maketext((0, 120), (430, 60), (923, 45)),
424 _maketext((0, 120), (430, 60), (234, 45)),
424 _maketext((0, 120), (430, 60), (234, 45)),
425 _maketext((0, 120), (430, 60), (564, 45)),
425 _maketext((0, 120), (430, 60), (564, 45)),
426 _maketext((0, 120), (430, 60), (361, 45)),
426 _maketext((0, 120), (430, 60), (361, 45)),
427 _maketext((0, 120), (430, 60), (489, 45)),
427 _maketext((0, 120), (430, 60), (489, 45)),
428 _maketext((0, 120), (249, 60)),
428 _maketext((0, 120), (249, 60)),
429 _maketext((0, 120), (832, 60)),
429 _maketext((0, 120), (832, 60)),
430 _maketext((0, 120), (891, 60)),
430 _maketext((0, 120), (891, 60)),
431 _maketext((0, 120), (543, 60)),
431 _maketext((0, 120), (543, 60)),
432 _maketext((0, 120), (120, 60)),
432 _maketext((0, 120), (120, 60)),
433 _maketext((0, 120), (60, 60), (768, 30)),
433 _maketext((0, 120), (60, 60), (768, 30)),
434 _maketext((0, 120), (60, 60), (260, 30)),
434 _maketext((0, 120), (60, 60), (260, 30)),
435 _maketext((0, 120), (60, 60), (450, 30)),
435 _maketext((0, 120), (60, 60), (450, 30)),
436 _maketext((0, 120), (60, 60), (361, 30)),
436 _maketext((0, 120), (60, 60), (361, 30)),
437 _maketext((0, 120), (60, 60), (886, 30)),
437 _maketext((0, 120), (60, 60), (886, 30)),
438 _maketext((0, 120), (60, 60), (116, 30)),
438 _maketext((0, 120), (60, 60), (116, 30)),
439 _maketext((0, 120), (60, 60), (567, 30), (629, 40)),
439 _maketext((0, 120), (60, 60), (567, 30), (629, 40)),
440 _maketext((0, 120), (60, 60), (569, 30), (745, 40)),
440 _maketext((0, 120), (60, 60), (569, 30), (745, 40)),
441 _maketext((0, 120), (60, 60), (777, 30), (700, 40)),
441 _maketext((0, 120), (60, 60), (777, 30), (700, 40)),
442 _maketext((0, 120), (60, 60), (618, 30), (398, 40), (158, 10)),
442 _maketext((0, 120), (60, 60), (618, 30), (398, 40), (158, 10)),
443 ]
443 ]
444
444
445
445
446 def makesnapshot(tr):
446 def makesnapshot(tr):
447 rl = newrevlog(name=b'_snaprevlog3', recreate=True)
447 rl = newrevlog(name=b'_snaprevlog3', recreate=True)
448 for i in data:
448 for i in data:
449 appendrev(rl, i, tr)
449 appendrev(rl, i, tr)
450 return rl
450 return rl
451
451
452
452
453 snapshots = [-1, 0, 6, 8, 11, 17, 19, 21, 25, 30]
453 snapshots = [-1, 0, 6, 8, 11, 17, 19, 21, 25, 30]
454
454
455
455
456 def issnapshottest(rlog):
456 def issnapshottest(rlog):
457 result = []
457 result = []
458 if rlog.issnapshot(-1):
458 if rlog.issnapshot(-1):
459 result.append(-1)
459 result.append(-1)
460 for rev in rlog:
460 for rev in rlog:
461 if rlog.issnapshot(rev):
461 if rlog.issnapshot(rev):
462 result.append(rev)
462 result.append(rev)
463 if snapshots != result:
463 if snapshots != result:
464 print('snapshot differ:')
464 print('snapshot differ:')
465 print(' expected: %s' % snapshots)
465 print(' expected: %s' % snapshots)
466 print(' got: %s' % result)
466 print(' got: %s' % result)
467
467
468
468
469 snapshotmapall = {0: {6, 8, 11, 17, 19, 25}, 8: {21}, -1: {0, 30}}
469 snapshotmapall = {0: {6, 8, 11, 17, 19, 25}, 8: {21}, -1: {0, 30}}
470 snapshotmap15 = {0: {17, 19, 25}, 8: {21}, -1: {30}}
470 snapshotmap15 = {0: {17, 19, 25}, 8: {21}, -1: {30}}
471
471
472
472
473 def findsnapshottest(rlog):
473 def findsnapshottest(rlog):
474 cache = deltas.SnapshotCache()
474 cache = deltas.SnapshotCache()
475 cache.update(rlog)
475 cache.update(rlog)
476 resultall = dict(cache.snapshots)
476 resultall = dict(cache.snapshots)
477 if resultall != snapshotmapall:
477 if resultall != snapshotmapall:
478 print('snapshot map differ:')
478 print('snapshot map differ:')
479 print(' expected: %s' % snapshotmapall)
479 print(' expected: %s' % snapshotmapall)
480 print(' got: %s' % resultall)
480 print(' got: %s' % resultall)
481 cache15 = deltas.SnapshotCache()
481 cache15 = deltas.SnapshotCache()
482 cache15.update(rlog, 15)
482 cache15.update(rlog, 15)
483 result15 = dict(cache15.snapshots)
483 result15 = dict(cache15.snapshots)
484 if result15 != snapshotmap15:
484 if result15 != snapshotmap15:
485 print('snapshot map differ:')
485 print('snapshot map differ:')
486 print(' expected: %s' % snapshotmap15)
486 print(' expected: %s' % snapshotmap15)
487 print(' got: %s' % result15)
487 print(' got: %s' % result15)
488
488
489
489
490 def maintest():
490 def maintest():
491 with newtransaction() as tr:
491 with newtransaction() as tr:
492 rl = newrevlog(recreate=True)
492 rl = newrevlog(recreate=True)
493 expected = writecases(rl, tr)
493 expected = writecases(rl, tr)
494 checkrevlog(rl, expected)
494 checkrevlog(rl, expected)
495 print('local test passed')
495 print('local test passed')
496 # Copy via revlog.addgroup
496 # Copy via revlog.addgroup
497 rl1 = addgroupcopy(rl, tr)
497 rl1 = addgroupcopy(rl, tr)
498 checkrevlog(rl1, expected)
498 checkrevlog(rl1, expected)
499 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
499 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
500 checkrevlog(rl2, expected)
500 checkrevlog(rl2, expected)
501 print('addgroupcopy test passed')
501 print('addgroupcopy test passed')
502 # Copy via revlog.clone
502 # Copy via revlog.clone
503 rl3 = newrevlog(name=b'_destrevlog3', recreate=True)
503 rl3 = newrevlog(name=b'_destrevlog3', recreate=True)
504 rl.clone(tr, rl3)
504 rl.clone(tr, rl3)
505 checkrevlog(rl3, expected)
505 checkrevlog(rl3, expected)
506 print('clone test passed')
506 print('clone test passed')
507 # Copy via low-level revlog._addrevision
507 # Copy via low-level revlog._addrevision
508 rl4 = lowlevelcopy(rl, tr)
508 rl4 = lowlevelcopy(rl, tr)
509 checkrevlog(rl4, expected)
509 checkrevlog(rl4, expected)
510 print('lowlevelcopy test passed')
510 print('lowlevelcopy test passed')
511 slicingtest(rl)
511 slicingtest(rl)
512 print('slicing test passed')
512 print('slicing test passed')
513 rl5 = makesnapshot(tr)
513 rl5 = makesnapshot(tr)
514 issnapshottest(rl5)
514 issnapshottest(rl5)
515 print('issnapshot test passed')
515 print('issnapshot test passed')
516 findsnapshottest(rl5)
516 findsnapshottest(rl5)
517 print('findsnapshot test passed')
517 print('findsnapshot test passed')
518
518
519
519
520 try:
520 try:
521 maintest()
521 maintest()
522 except Exception as ex:
522 except Exception as ex:
523 abort('crashed: %s' % ex)
523 abort('crashed: %s' % ex)
General Comments 0
You need to be logged in to leave comments. Login now