##// END OF EJS Templates
revlog: apply compression mode while reading sidedata...
marmoute -
r48031:87d05713 default
parent child Browse files
Show More
@@ -1,3390 +1,3402 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 FEATURES_BY_VERSION,
41 FEATURES_BY_VERSION,
42 FLAG_GENERALDELTA,
42 FLAG_GENERALDELTA,
43 FLAG_INLINE_DATA,
43 FLAG_INLINE_DATA,
44 INDEX_HEADER,
44 INDEX_HEADER,
45 REVLOGV0,
45 REVLOGV0,
46 REVLOGV1,
46 REVLOGV1,
47 REVLOGV1_FLAGS,
47 REVLOGV1_FLAGS,
48 REVLOGV2,
48 REVLOGV2,
49 REVLOGV2_FLAGS,
49 REVLOGV2_FLAGS,
50 REVLOG_DEFAULT_FLAGS,
50 REVLOG_DEFAULT_FLAGS,
51 REVLOG_DEFAULT_FORMAT,
51 REVLOG_DEFAULT_FORMAT,
52 REVLOG_DEFAULT_VERSION,
52 REVLOG_DEFAULT_VERSION,
53 SUPPORTED_FLAGS,
53 SUPPORTED_FLAGS,
54 )
54 )
55 from .revlogutils.flagutil import (
55 from .revlogutils.flagutil import (
56 REVIDX_DEFAULT_FLAGS,
56 REVIDX_DEFAULT_FLAGS,
57 REVIDX_ELLIPSIS,
57 REVIDX_ELLIPSIS,
58 REVIDX_EXTSTORED,
58 REVIDX_EXTSTORED,
59 REVIDX_FLAGS_ORDER,
59 REVIDX_FLAGS_ORDER,
60 REVIDX_HASCOPIESINFO,
60 REVIDX_HASCOPIESINFO,
61 REVIDX_ISCENSORED,
61 REVIDX_ISCENSORED,
62 REVIDX_RAWTEXT_CHANGING_FLAGS,
62 REVIDX_RAWTEXT_CHANGING_FLAGS,
63 )
63 )
64 from .thirdparty import attr
64 from .thirdparty import attr
65 from . import (
65 from . import (
66 ancestor,
66 ancestor,
67 dagop,
67 dagop,
68 error,
68 error,
69 mdiff,
69 mdiff,
70 policy,
70 policy,
71 pycompat,
71 pycompat,
72 templatefilters,
72 templatefilters,
73 util,
73 util,
74 )
74 )
75 from .interfaces import (
75 from .interfaces import (
76 repository,
76 repository,
77 util as interfaceutil,
77 util as interfaceutil,
78 )
78 )
79 from .revlogutils import (
79 from .revlogutils import (
80 deltas as deltautil,
80 deltas as deltautil,
81 docket as docketutil,
81 docket as docketutil,
82 flagutil,
82 flagutil,
83 nodemap as nodemaputil,
83 nodemap as nodemaputil,
84 revlogv0,
84 revlogv0,
85 sidedata as sidedatautil,
85 sidedata as sidedatautil,
86 )
86 )
87 from .utils import (
87 from .utils import (
88 storageutil,
88 storageutil,
89 stringutil,
89 stringutil,
90 )
90 )
91
91
92 # blanked usage of all the name to prevent pyflakes constraints
92 # blanked usage of all the name to prevent pyflakes constraints
93 # We need these name available in the module for extensions.
93 # We need these name available in the module for extensions.
94
94
95 REVLOGV0
95 REVLOGV0
96 REVLOGV1
96 REVLOGV1
97 REVLOGV2
97 REVLOGV2
98 FLAG_INLINE_DATA
98 FLAG_INLINE_DATA
99 FLAG_GENERALDELTA
99 FLAG_GENERALDELTA
100 REVLOG_DEFAULT_FLAGS
100 REVLOG_DEFAULT_FLAGS
101 REVLOG_DEFAULT_FORMAT
101 REVLOG_DEFAULT_FORMAT
102 REVLOG_DEFAULT_VERSION
102 REVLOG_DEFAULT_VERSION
103 REVLOGV1_FLAGS
103 REVLOGV1_FLAGS
104 REVLOGV2_FLAGS
104 REVLOGV2_FLAGS
105 REVIDX_ISCENSORED
105 REVIDX_ISCENSORED
106 REVIDX_ELLIPSIS
106 REVIDX_ELLIPSIS
107 REVIDX_HASCOPIESINFO
107 REVIDX_HASCOPIESINFO
108 REVIDX_EXTSTORED
108 REVIDX_EXTSTORED
109 REVIDX_DEFAULT_FLAGS
109 REVIDX_DEFAULT_FLAGS
110 REVIDX_FLAGS_ORDER
110 REVIDX_FLAGS_ORDER
111 REVIDX_RAWTEXT_CHANGING_FLAGS
111 REVIDX_RAWTEXT_CHANGING_FLAGS
112
112
113 parsers = policy.importmod('parsers')
113 parsers = policy.importmod('parsers')
114 rustancestor = policy.importrust('ancestor')
114 rustancestor = policy.importrust('ancestor')
115 rustdagop = policy.importrust('dagop')
115 rustdagop = policy.importrust('dagop')
116 rustrevlog = policy.importrust('revlog')
116 rustrevlog = policy.importrust('revlog')
117
117
118 # Aliased for performance.
118 # Aliased for performance.
119 _zlibdecompress = zlib.decompress
119 _zlibdecompress = zlib.decompress
120
120
121 # max size of revlog with inline data
121 # max size of revlog with inline data
122 _maxinline = 131072
122 _maxinline = 131072
123 _chunksize = 1048576
123 _chunksize = 1048576
124
124
125 # Flag processors for REVIDX_ELLIPSIS.
125 # Flag processors for REVIDX_ELLIPSIS.
126 def ellipsisreadprocessor(rl, text):
126 def ellipsisreadprocessor(rl, text):
127 return text, False
127 return text, False
128
128
129
129
130 def ellipsiswriteprocessor(rl, text):
130 def ellipsiswriteprocessor(rl, text):
131 return text, False
131 return text, False
132
132
133
133
134 def ellipsisrawprocessor(rl, text):
134 def ellipsisrawprocessor(rl, text):
135 return False
135 return False
136
136
137
137
138 ellipsisprocessor = (
138 ellipsisprocessor = (
139 ellipsisreadprocessor,
139 ellipsisreadprocessor,
140 ellipsiswriteprocessor,
140 ellipsiswriteprocessor,
141 ellipsisrawprocessor,
141 ellipsisrawprocessor,
142 )
142 )
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 def _verify_revision(rl, skipflags, state, node):
151 def _verify_revision(rl, skipflags, state, node):
152 """Verify the integrity of the given revlog ``node`` while providing a hook
152 """Verify the integrity of the given revlog ``node`` while providing a hook
153 point for extensions to influence the operation."""
153 point for extensions to influence the operation."""
154 if skipflags:
154 if skipflags:
155 state[b'skipread'].add(node)
155 state[b'skipread'].add(node)
156 else:
156 else:
157 # Side-effect: read content and verify hash.
157 # Side-effect: read content and verify hash.
158 rl.revision(node)
158 rl.revision(node)
159
159
160
160
161 # True if a fast implementation for persistent-nodemap is available
161 # True if a fast implementation for persistent-nodemap is available
162 #
162 #
163 # We also consider we have a "fast" implementation in "pure" python because
163 # We also consider we have a "fast" implementation in "pure" python because
164 # people using pure don't really have performance consideration (and a
164 # people using pure don't really have performance consideration (and a
165 # wheelbarrow of other slowness source)
165 # wheelbarrow of other slowness source)
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
167 parsers, 'BaseIndexObject'
167 parsers, 'BaseIndexObject'
168 )
168 )
169
169
170
170
171 @attr.s(slots=True, frozen=True)
171 @attr.s(slots=True, frozen=True)
172 class _revisioninfo(object):
172 class _revisioninfo(object):
173 """Information about a revision that allows building its fulltext
173 """Information about a revision that allows building its fulltext
174 node: expected hash of the revision
174 node: expected hash of the revision
175 p1, p2: parent revs of the revision
175 p1, p2: parent revs of the revision
176 btext: built text cache consisting of a one-element list
176 btext: built text cache consisting of a one-element list
177 cachedelta: (baserev, uncompressed_delta) or None
177 cachedelta: (baserev, uncompressed_delta) or None
178 flags: flags associated to the revision storage
178 flags: flags associated to the revision storage
179
179
180 One of btext[0] or cachedelta must be set.
180 One of btext[0] or cachedelta must be set.
181 """
181 """
182
182
183 node = attr.ib()
183 node = attr.ib()
184 p1 = attr.ib()
184 p1 = attr.ib()
185 p2 = attr.ib()
185 p2 = attr.ib()
186 btext = attr.ib()
186 btext = attr.ib()
187 textlen = attr.ib()
187 textlen = attr.ib()
188 cachedelta = attr.ib()
188 cachedelta = attr.ib()
189 flags = attr.ib()
189 flags = attr.ib()
190
190
191
191
192 @interfaceutil.implementer(repository.irevisiondelta)
192 @interfaceutil.implementer(repository.irevisiondelta)
193 @attr.s(slots=True)
193 @attr.s(slots=True)
194 class revlogrevisiondelta(object):
194 class revlogrevisiondelta(object):
195 node = attr.ib()
195 node = attr.ib()
196 p1node = attr.ib()
196 p1node = attr.ib()
197 p2node = attr.ib()
197 p2node = attr.ib()
198 basenode = attr.ib()
198 basenode = attr.ib()
199 flags = attr.ib()
199 flags = attr.ib()
200 baserevisionsize = attr.ib()
200 baserevisionsize = attr.ib()
201 revision = attr.ib()
201 revision = attr.ib()
202 delta = attr.ib()
202 delta = attr.ib()
203 sidedata = attr.ib()
203 sidedata = attr.ib()
204 protocol_flags = attr.ib()
204 protocol_flags = attr.ib()
205 linknode = attr.ib(default=None)
205 linknode = attr.ib(default=None)
206
206
207
207
208 @interfaceutil.implementer(repository.iverifyproblem)
208 @interfaceutil.implementer(repository.iverifyproblem)
209 @attr.s(frozen=True)
209 @attr.s(frozen=True)
210 class revlogproblem(object):
210 class revlogproblem(object):
211 warning = attr.ib(default=None)
211 warning = attr.ib(default=None)
212 error = attr.ib(default=None)
212 error = attr.ib(default=None)
213 node = attr.ib(default=None)
213 node = attr.ib(default=None)
214
214
215
215
216 def parse_index_v1(data, inline):
216 def parse_index_v1(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline)
218 index, cache = parsers.parse_index2(data, inline)
219 return index, cache
219 return index, cache
220
220
221
221
222 def parse_index_v2(data, inline):
222 def parse_index_v2(data, inline):
223 # call the C implementation to parse the index data
223 # call the C implementation to parse the index data
224 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
224 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
225 return index, cache
225 return index, cache
226
226
227
227
228 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
228 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
229
229
230 def parse_index_v1_nodemap(data, inline):
230 def parse_index_v1_nodemap(data, inline):
231 index, cache = parsers.parse_index_devel_nodemap(data, inline)
231 index, cache = parsers.parse_index_devel_nodemap(data, inline)
232 return index, cache
232 return index, cache
233
233
234
234
235 else:
235 else:
236 parse_index_v1_nodemap = None
236 parse_index_v1_nodemap = None
237
237
238
238
239 def parse_index_v1_mixed(data, inline):
239 def parse_index_v1_mixed(data, inline):
240 index, cache = parse_index_v1(data, inline)
240 index, cache = parse_index_v1(data, inline)
241 return rustrevlog.MixedIndex(index), cache
241 return rustrevlog.MixedIndex(index), cache
242
242
243
243
244 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
244 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
245 # signed integer)
245 # signed integer)
246 _maxentrysize = 0x7FFFFFFF
246 _maxentrysize = 0x7FFFFFFF
247
247
248
248
249 class revlog(object):
249 class revlog(object):
250 """
250 """
251 the underlying revision storage object
251 the underlying revision storage object
252
252
253 A revlog consists of two parts, an index and the revision data.
253 A revlog consists of two parts, an index and the revision data.
254
254
255 The index is a file with a fixed record size containing
255 The index is a file with a fixed record size containing
256 information on each revision, including its nodeid (hash), the
256 information on each revision, including its nodeid (hash), the
257 nodeids of its parents, the position and offset of its data within
257 nodeids of its parents, the position and offset of its data within
258 the data file, and the revision it's based on. Finally, each entry
258 the data file, and the revision it's based on. Finally, each entry
259 contains a linkrev entry that can serve as a pointer to external
259 contains a linkrev entry that can serve as a pointer to external
260 data.
260 data.
261
261
262 The revision data itself is a linear collection of data chunks.
262 The revision data itself is a linear collection of data chunks.
263 Each chunk represents a revision and is usually represented as a
263 Each chunk represents a revision and is usually represented as a
264 delta against the previous chunk. To bound lookup time, runs of
264 delta against the previous chunk. To bound lookup time, runs of
265 deltas are limited to about 2 times the length of the original
265 deltas are limited to about 2 times the length of the original
266 version data. This makes retrieval of a version proportional to
266 version data. This makes retrieval of a version proportional to
267 its size, or O(1) relative to the number of revisions.
267 its size, or O(1) relative to the number of revisions.
268
268
269 Both pieces of the revlog are written to in an append-only
269 Both pieces of the revlog are written to in an append-only
270 fashion, which means we never need to rewrite a file to insert or
270 fashion, which means we never need to rewrite a file to insert or
271 remove data, and can use some simple techniques to avoid the need
271 remove data, and can use some simple techniques to avoid the need
272 for locking while reading.
272 for locking while reading.
273
273
274 If checkambig, indexfile is opened with checkambig=True at
274 If checkambig, indexfile is opened with checkambig=True at
275 writing, to avoid file stat ambiguity.
275 writing, to avoid file stat ambiguity.
276
276
277 If mmaplargeindex is True, and an mmapindexthreshold is set, the
277 If mmaplargeindex is True, and an mmapindexthreshold is set, the
278 index will be mmapped rather than read if it is larger than the
278 index will be mmapped rather than read if it is larger than the
279 configured threshold.
279 configured threshold.
280
280
281 If censorable is True, the revlog can have censored revisions.
281 If censorable is True, the revlog can have censored revisions.
282
282
283 If `upperboundcomp` is not None, this is the expected maximal gain from
283 If `upperboundcomp` is not None, this is the expected maximal gain from
284 compression for the data content.
284 compression for the data content.
285
285
286 `concurrencychecker` is an optional function that receives 3 arguments: a
286 `concurrencychecker` is an optional function that receives 3 arguments: a
287 file handle, a filename, and an expected position. It should check whether
287 file handle, a filename, and an expected position. It should check whether
288 the current position in the file handle is valid, and log/warn/fail (by
288 the current position in the file handle is valid, and log/warn/fail (by
289 raising).
289 raising).
290
290
291
291
292 Internal details
292 Internal details
293 ----------------
293 ----------------
294
294
295 A large part of the revlog logic deals with revisions' "index entries", tuple
295 A large part of the revlog logic deals with revisions' "index entries", tuple
296 objects that contains the same "items" whatever the revlog version.
296 objects that contains the same "items" whatever the revlog version.
297 Different versions will have different ways of storing these items (sometimes
297 Different versions will have different ways of storing these items (sometimes
298 not having them at all), but the tuple will always be the same. New fields
298 not having them at all), but the tuple will always be the same. New fields
299 are usually added at the end to avoid breaking existing code that relies
299 are usually added at the end to avoid breaking existing code that relies
300 on the existing order. The field are defined as follows:
300 on the existing order. The field are defined as follows:
301
301
302 [0] offset:
302 [0] offset:
303 The byte index of the start of revision data chunk.
303 The byte index of the start of revision data chunk.
304 That value is shifted up by 16 bits. use "offset = field >> 16" to
304 That value is shifted up by 16 bits. use "offset = field >> 16" to
305 retrieve it.
305 retrieve it.
306
306
307 flags:
307 flags:
308 A flag field that carries special information or changes the behavior
308 A flag field that carries special information or changes the behavior
309 of the revision. (see `REVIDX_*` constants for details)
309 of the revision. (see `REVIDX_*` constants for details)
310 The flag field only occupies the first 16 bits of this field,
310 The flag field only occupies the first 16 bits of this field,
311 use "flags = field & 0xFFFF" to retrieve the value.
311 use "flags = field & 0xFFFF" to retrieve the value.
312
312
313 [1] compressed length:
313 [1] compressed length:
314 The size, in bytes, of the chunk on disk
314 The size, in bytes, of the chunk on disk
315
315
316 [2] uncompressed length:
316 [2] uncompressed length:
317 The size, in bytes, of the full revision once reconstructed.
317 The size, in bytes, of the full revision once reconstructed.
318
318
319 [3] base rev:
319 [3] base rev:
320 Either the base of the revision delta chain (without general
320 Either the base of the revision delta chain (without general
321 delta), or the base of the delta (stored in the data chunk)
321 delta), or the base of the delta (stored in the data chunk)
322 with general delta.
322 with general delta.
323
323
324 [4] link rev:
324 [4] link rev:
325 Changelog revision number of the changeset introducing this
325 Changelog revision number of the changeset introducing this
326 revision.
326 revision.
327
327
328 [5] parent 1 rev:
328 [5] parent 1 rev:
329 Revision number of the first parent
329 Revision number of the first parent
330
330
331 [6] parent 2 rev:
331 [6] parent 2 rev:
332 Revision number of the second parent
332 Revision number of the second parent
333
333
334 [7] node id:
334 [7] node id:
335 The node id of the current revision
335 The node id of the current revision
336
336
337 [8] sidedata offset:
337 [8] sidedata offset:
338 The byte index of the start of the revision's side-data chunk.
338 The byte index of the start of the revision's side-data chunk.
339
339
340 [9] sidedata chunk length:
340 [9] sidedata chunk length:
341 The size, in bytes, of the revision's side-data chunk.
341 The size, in bytes, of the revision's side-data chunk.
342
342
343 [10] data compression mode:
343 [10] data compression mode:
344 two bits that detail the way the data chunk is compressed on disk.
344 two bits that detail the way the data chunk is compressed on disk.
345 (see "COMP_MODE_*" constants for details). For revlog version 0 and
345 (see "COMP_MODE_*" constants for details). For revlog version 0 and
346 1 this will always be COMP_MODE_INLINE.
346 1 this will always be COMP_MODE_INLINE.
347
347
348 [11] side-data compression mode:
348 [11] side-data compression mode:
349 two bits that detail the way the sidedata chunk is compressed on disk.
349 two bits that detail the way the sidedata chunk is compressed on disk.
350 (see "COMP_MODE_*" constants for details)
350 (see "COMP_MODE_*" constants for details)
351 """
351 """
352
352
353 _flagserrorclass = error.RevlogError
353 _flagserrorclass = error.RevlogError
354
354
355 def __init__(
355 def __init__(
356 self,
356 self,
357 opener,
357 opener,
358 target,
358 target,
359 radix,
359 radix,
360 postfix=None, # only exist for `tmpcensored` now
360 postfix=None, # only exist for `tmpcensored` now
361 checkambig=False,
361 checkambig=False,
362 mmaplargeindex=False,
362 mmaplargeindex=False,
363 censorable=False,
363 censorable=False,
364 upperboundcomp=None,
364 upperboundcomp=None,
365 persistentnodemap=False,
365 persistentnodemap=False,
366 concurrencychecker=None,
366 concurrencychecker=None,
367 trypending=False,
367 trypending=False,
368 ):
368 ):
369 """
369 """
370 create a revlog object
370 create a revlog object
371
371
372 opener is a function that abstracts the file opening operation
372 opener is a function that abstracts the file opening operation
373 and can be used to implement COW semantics or the like.
373 and can be used to implement COW semantics or the like.
374
374
375 `target`: a (KIND, ID) tuple that identify the content stored in
375 `target`: a (KIND, ID) tuple that identify the content stored in
376 this revlog. It help the rest of the code to understand what the revlog
376 this revlog. It help the rest of the code to understand what the revlog
377 is about without having to resort to heuristic and index filename
377 is about without having to resort to heuristic and index filename
378 analysis. Note: that this must be reliably be set by normal code, but
378 analysis. Note: that this must be reliably be set by normal code, but
379 that test, debug, or performance measurement code might not set this to
379 that test, debug, or performance measurement code might not set this to
380 accurate value.
380 accurate value.
381 """
381 """
382 self.upperboundcomp = upperboundcomp
382 self.upperboundcomp = upperboundcomp
383
383
384 self.radix = radix
384 self.radix = radix
385
385
386 self._docket_file = None
386 self._docket_file = None
387 self._indexfile = None
387 self._indexfile = None
388 self._datafile = None
388 self._datafile = None
389 self._nodemap_file = None
389 self._nodemap_file = None
390 self.postfix = postfix
390 self.postfix = postfix
391 self._trypending = trypending
391 self._trypending = trypending
392 self.opener = opener
392 self.opener = opener
393 if persistentnodemap:
393 if persistentnodemap:
394 self._nodemap_file = nodemaputil.get_nodemap_file(self)
394 self._nodemap_file = nodemaputil.get_nodemap_file(self)
395
395
396 assert target[0] in ALL_KINDS
396 assert target[0] in ALL_KINDS
397 assert len(target) == 2
397 assert len(target) == 2
398 self.target = target
398 self.target = target
399 # When True, indexfile is opened with checkambig=True at writing, to
399 # When True, indexfile is opened with checkambig=True at writing, to
400 # avoid file stat ambiguity.
400 # avoid file stat ambiguity.
401 self._checkambig = checkambig
401 self._checkambig = checkambig
402 self._mmaplargeindex = mmaplargeindex
402 self._mmaplargeindex = mmaplargeindex
403 self._censorable = censorable
403 self._censorable = censorable
404 # 3-tuple of (node, rev, text) for a raw revision.
404 # 3-tuple of (node, rev, text) for a raw revision.
405 self._revisioncache = None
405 self._revisioncache = None
406 # Maps rev to chain base rev.
406 # Maps rev to chain base rev.
407 self._chainbasecache = util.lrucachedict(100)
407 self._chainbasecache = util.lrucachedict(100)
408 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
408 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
409 self._chunkcache = (0, b'')
409 self._chunkcache = (0, b'')
410 # How much data to read and cache into the raw revlog data cache.
410 # How much data to read and cache into the raw revlog data cache.
411 self._chunkcachesize = 65536
411 self._chunkcachesize = 65536
412 self._maxchainlen = None
412 self._maxchainlen = None
413 self._deltabothparents = True
413 self._deltabothparents = True
414 self.index = None
414 self.index = None
415 self._docket = None
415 self._docket = None
416 self._nodemap_docket = None
416 self._nodemap_docket = None
417 # Mapping of partial identifiers to full nodes.
417 # Mapping of partial identifiers to full nodes.
418 self._pcache = {}
418 self._pcache = {}
419 # Mapping of revision integer to full node.
419 # Mapping of revision integer to full node.
420 self._compengine = b'zlib'
420 self._compengine = b'zlib'
421 self._compengineopts = {}
421 self._compengineopts = {}
422 self._maxdeltachainspan = -1
422 self._maxdeltachainspan = -1
423 self._withsparseread = False
423 self._withsparseread = False
424 self._sparserevlog = False
424 self._sparserevlog = False
425 self.hassidedata = False
425 self.hassidedata = False
426 self._srdensitythreshold = 0.50
426 self._srdensitythreshold = 0.50
427 self._srmingapsize = 262144
427 self._srmingapsize = 262144
428
428
429 # Make copy of flag processors so each revlog instance can support
429 # Make copy of flag processors so each revlog instance can support
430 # custom flags.
430 # custom flags.
431 self._flagprocessors = dict(flagutil.flagprocessors)
431 self._flagprocessors = dict(flagutil.flagprocessors)
432
432
433 # 2-tuple of file handles being used for active writing.
433 # 2-tuple of file handles being used for active writing.
434 self._writinghandles = None
434 self._writinghandles = None
435 # prevent nesting of addgroup
435 # prevent nesting of addgroup
436 self._adding_group = None
436 self._adding_group = None
437
437
438 self._loadindex()
438 self._loadindex()
439
439
440 self._concurrencychecker = concurrencychecker
440 self._concurrencychecker = concurrencychecker
441
441
442 def _init_opts(self):
442 def _init_opts(self):
443 """process options (from above/config) to setup associated default revlog mode
443 """process options (from above/config) to setup associated default revlog mode
444
444
445 These values might be affected when actually reading on disk information.
445 These values might be affected when actually reading on disk information.
446
446
447 The relevant values are returned for use in _loadindex().
447 The relevant values are returned for use in _loadindex().
448
448
449 * newversionflags:
449 * newversionflags:
450 version header to use if we need to create a new revlog
450 version header to use if we need to create a new revlog
451
451
452 * mmapindexthreshold:
452 * mmapindexthreshold:
453 minimal index size for start to use mmap
453 minimal index size for start to use mmap
454
454
455 * force_nodemap:
455 * force_nodemap:
456 force the usage of a "development" version of the nodemap code
456 force the usage of a "development" version of the nodemap code
457 """
457 """
458 mmapindexthreshold = None
458 mmapindexthreshold = None
459 opts = self.opener.options
459 opts = self.opener.options
460
460
461 if b'revlogv2' in opts:
461 if b'revlogv2' in opts:
462 new_header = REVLOGV2 | FLAG_INLINE_DATA
462 new_header = REVLOGV2 | FLAG_INLINE_DATA
463 elif b'revlogv1' in opts:
463 elif b'revlogv1' in opts:
464 new_header = REVLOGV1 | FLAG_INLINE_DATA
464 new_header = REVLOGV1 | FLAG_INLINE_DATA
465 if b'generaldelta' in opts:
465 if b'generaldelta' in opts:
466 new_header |= FLAG_GENERALDELTA
466 new_header |= FLAG_GENERALDELTA
467 elif b'revlogv0' in self.opener.options:
467 elif b'revlogv0' in self.opener.options:
468 new_header = REVLOGV0
468 new_header = REVLOGV0
469 else:
469 else:
470 new_header = REVLOG_DEFAULT_VERSION
470 new_header = REVLOG_DEFAULT_VERSION
471
471
472 if b'chunkcachesize' in opts:
472 if b'chunkcachesize' in opts:
473 self._chunkcachesize = opts[b'chunkcachesize']
473 self._chunkcachesize = opts[b'chunkcachesize']
474 if b'maxchainlen' in opts:
474 if b'maxchainlen' in opts:
475 self._maxchainlen = opts[b'maxchainlen']
475 self._maxchainlen = opts[b'maxchainlen']
476 if b'deltabothparents' in opts:
476 if b'deltabothparents' in opts:
477 self._deltabothparents = opts[b'deltabothparents']
477 self._deltabothparents = opts[b'deltabothparents']
478 self._lazydelta = bool(opts.get(b'lazydelta', True))
478 self._lazydelta = bool(opts.get(b'lazydelta', True))
479 self._lazydeltabase = False
479 self._lazydeltabase = False
480 if self._lazydelta:
480 if self._lazydelta:
481 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
481 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
482 if b'compengine' in opts:
482 if b'compengine' in opts:
483 self._compengine = opts[b'compengine']
483 self._compengine = opts[b'compengine']
484 if b'zlib.level' in opts:
484 if b'zlib.level' in opts:
485 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
485 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
486 if b'zstd.level' in opts:
486 if b'zstd.level' in opts:
487 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
487 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
488 if b'maxdeltachainspan' in opts:
488 if b'maxdeltachainspan' in opts:
489 self._maxdeltachainspan = opts[b'maxdeltachainspan']
489 self._maxdeltachainspan = opts[b'maxdeltachainspan']
490 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
490 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
491 mmapindexthreshold = opts[b'mmapindexthreshold']
491 mmapindexthreshold = opts[b'mmapindexthreshold']
492 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
492 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
493 withsparseread = bool(opts.get(b'with-sparse-read', False))
493 withsparseread = bool(opts.get(b'with-sparse-read', False))
494 # sparse-revlog forces sparse-read
494 # sparse-revlog forces sparse-read
495 self._withsparseread = self._sparserevlog or withsparseread
495 self._withsparseread = self._sparserevlog or withsparseread
496 if b'sparse-read-density-threshold' in opts:
496 if b'sparse-read-density-threshold' in opts:
497 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
497 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
498 if b'sparse-read-min-gap-size' in opts:
498 if b'sparse-read-min-gap-size' in opts:
499 self._srmingapsize = opts[b'sparse-read-min-gap-size']
499 self._srmingapsize = opts[b'sparse-read-min-gap-size']
500 if opts.get(b'enableellipsis'):
500 if opts.get(b'enableellipsis'):
501 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
501 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
502
502
503 # revlog v0 doesn't have flag processors
503 # revlog v0 doesn't have flag processors
504 for flag, processor in pycompat.iteritems(
504 for flag, processor in pycompat.iteritems(
505 opts.get(b'flagprocessors', {})
505 opts.get(b'flagprocessors', {})
506 ):
506 ):
507 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
507 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
508
508
509 if self._chunkcachesize <= 0:
509 if self._chunkcachesize <= 0:
510 raise error.RevlogError(
510 raise error.RevlogError(
511 _(b'revlog chunk cache size %r is not greater than 0')
511 _(b'revlog chunk cache size %r is not greater than 0')
512 % self._chunkcachesize
512 % self._chunkcachesize
513 )
513 )
514 elif self._chunkcachesize & (self._chunkcachesize - 1):
514 elif self._chunkcachesize & (self._chunkcachesize - 1):
515 raise error.RevlogError(
515 raise error.RevlogError(
516 _(b'revlog chunk cache size %r is not a power of 2')
516 _(b'revlog chunk cache size %r is not a power of 2')
517 % self._chunkcachesize
517 % self._chunkcachesize
518 )
518 )
519 force_nodemap = opts.get(b'devel-force-nodemap', False)
519 force_nodemap = opts.get(b'devel-force-nodemap', False)
520 return new_header, mmapindexthreshold, force_nodemap
520 return new_header, mmapindexthreshold, force_nodemap
521
521
522 def _get_data(self, filepath, mmap_threshold, size=None):
522 def _get_data(self, filepath, mmap_threshold, size=None):
523 """return a file content with or without mmap
523 """return a file content with or without mmap
524
524
525 If the file is missing return the empty string"""
525 If the file is missing return the empty string"""
526 try:
526 try:
527 with self.opener(filepath) as fp:
527 with self.opener(filepath) as fp:
528 if mmap_threshold is not None:
528 if mmap_threshold is not None:
529 file_size = self.opener.fstat(fp).st_size
529 file_size = self.opener.fstat(fp).st_size
530 if file_size >= mmap_threshold:
530 if file_size >= mmap_threshold:
531 if size is not None:
531 if size is not None:
532 # avoid potentiel mmap crash
532 # avoid potentiel mmap crash
533 size = min(file_size, size)
533 size = min(file_size, size)
534 # TODO: should .close() to release resources without
534 # TODO: should .close() to release resources without
535 # relying on Python GC
535 # relying on Python GC
536 if size is None:
536 if size is None:
537 return util.buffer(util.mmapread(fp))
537 return util.buffer(util.mmapread(fp))
538 else:
538 else:
539 return util.buffer(util.mmapread(fp, size))
539 return util.buffer(util.mmapread(fp, size))
540 if size is None:
540 if size is None:
541 return fp.read()
541 return fp.read()
542 else:
542 else:
543 return fp.read(size)
543 return fp.read(size)
544 except IOError as inst:
544 except IOError as inst:
545 if inst.errno != errno.ENOENT:
545 if inst.errno != errno.ENOENT:
546 raise
546 raise
547 return b''
547 return b''
548
548
549 def _loadindex(self):
549 def _loadindex(self):
550
550
551 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
551 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
552
552
553 if self.postfix is not None:
553 if self.postfix is not None:
554 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
554 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
555 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
555 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
556 entry_point = b'%s.i.a' % self.radix
556 entry_point = b'%s.i.a' % self.radix
557 else:
557 else:
558 entry_point = b'%s.i' % self.radix
558 entry_point = b'%s.i' % self.radix
559
559
560 entry_data = b''
560 entry_data = b''
561 self._initempty = True
561 self._initempty = True
562 entry_data = self._get_data(entry_point, mmapindexthreshold)
562 entry_data = self._get_data(entry_point, mmapindexthreshold)
563 if len(entry_data) > 0:
563 if len(entry_data) > 0:
564 header = INDEX_HEADER.unpack(entry_data[:4])[0]
564 header = INDEX_HEADER.unpack(entry_data[:4])[0]
565 self._initempty = False
565 self._initempty = False
566 else:
566 else:
567 header = new_header
567 header = new_header
568
568
569 self._format_flags = header & ~0xFFFF
569 self._format_flags = header & ~0xFFFF
570 self._format_version = header & 0xFFFF
570 self._format_version = header & 0xFFFF
571
571
572 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
572 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
573 if supported_flags is None:
573 if supported_flags is None:
574 msg = _(b'unknown version (%d) in revlog %s')
574 msg = _(b'unknown version (%d) in revlog %s')
575 msg %= (self._format_version, self.display_id)
575 msg %= (self._format_version, self.display_id)
576 raise error.RevlogError(msg)
576 raise error.RevlogError(msg)
577 elif self._format_flags & ~supported_flags:
577 elif self._format_flags & ~supported_flags:
578 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
578 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
579 display_flag = self._format_flags >> 16
579 display_flag = self._format_flags >> 16
580 msg %= (display_flag, self._format_version, self.display_id)
580 msg %= (display_flag, self._format_version, self.display_id)
581 raise error.RevlogError(msg)
581 raise error.RevlogError(msg)
582
582
583 features = FEATURES_BY_VERSION[self._format_version]
583 features = FEATURES_BY_VERSION[self._format_version]
584 self._inline = features[b'inline'](self._format_flags)
584 self._inline = features[b'inline'](self._format_flags)
585 self._generaldelta = features[b'generaldelta'](self._format_flags)
585 self._generaldelta = features[b'generaldelta'](self._format_flags)
586 self.hassidedata = features[b'sidedata']
586 self.hassidedata = features[b'sidedata']
587
587
588 if not features[b'docket']:
588 if not features[b'docket']:
589 self._indexfile = entry_point
589 self._indexfile = entry_point
590 index_data = entry_data
590 index_data = entry_data
591 else:
591 else:
592 self._docket_file = entry_point
592 self._docket_file = entry_point
593 if self._initempty:
593 if self._initempty:
594 self._docket = docketutil.default_docket(self, header)
594 self._docket = docketutil.default_docket(self, header)
595 else:
595 else:
596 self._docket = docketutil.parse_docket(
596 self._docket = docketutil.parse_docket(
597 self, entry_data, use_pending=self._trypending
597 self, entry_data, use_pending=self._trypending
598 )
598 )
599 self._indexfile = self._docket.index_filepath()
599 self._indexfile = self._docket.index_filepath()
600 index_data = b''
600 index_data = b''
601 index_size = self._docket.index_end
601 index_size = self._docket.index_end
602 if index_size > 0:
602 if index_size > 0:
603 index_data = self._get_data(
603 index_data = self._get_data(
604 self._indexfile, mmapindexthreshold, size=index_size
604 self._indexfile, mmapindexthreshold, size=index_size
605 )
605 )
606 if len(index_data) < index_size:
606 if len(index_data) < index_size:
607 msg = _(b'too few index data for %s: got %d, expected %d')
607 msg = _(b'too few index data for %s: got %d, expected %d')
608 msg %= (self.display_id, len(index_data), index_size)
608 msg %= (self.display_id, len(index_data), index_size)
609 raise error.RevlogError(msg)
609 raise error.RevlogError(msg)
610
610
611 self._inline = False
611 self._inline = False
612 # generaldelta implied by version 2 revlogs.
612 # generaldelta implied by version 2 revlogs.
613 self._generaldelta = True
613 self._generaldelta = True
614 # the logic for persistent nodemap will be dealt with within the
614 # the logic for persistent nodemap will be dealt with within the
615 # main docket, so disable it for now.
615 # main docket, so disable it for now.
616 self._nodemap_file = None
616 self._nodemap_file = None
617
617
618 if self.postfix is None:
618 if self.postfix is None:
619 self._datafile = b'%s.d' % self.radix
619 self._datafile = b'%s.d' % self.radix
620 else:
620 else:
621 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
621 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
622
622
623 self.nodeconstants = sha1nodeconstants
623 self.nodeconstants = sha1nodeconstants
624 self.nullid = self.nodeconstants.nullid
624 self.nullid = self.nodeconstants.nullid
625
625
626 # sparse-revlog can't be on without general-delta (issue6056)
626 # sparse-revlog can't be on without general-delta (issue6056)
627 if not self._generaldelta:
627 if not self._generaldelta:
628 self._sparserevlog = False
628 self._sparserevlog = False
629
629
630 self._storedeltachains = True
630 self._storedeltachains = True
631
631
632 devel_nodemap = (
632 devel_nodemap = (
633 self._nodemap_file
633 self._nodemap_file
634 and force_nodemap
634 and force_nodemap
635 and parse_index_v1_nodemap is not None
635 and parse_index_v1_nodemap is not None
636 )
636 )
637
637
638 use_rust_index = False
638 use_rust_index = False
639 if rustrevlog is not None:
639 if rustrevlog is not None:
640 if self._nodemap_file is not None:
640 if self._nodemap_file is not None:
641 use_rust_index = True
641 use_rust_index = True
642 else:
642 else:
643 use_rust_index = self.opener.options.get(b'rust.index')
643 use_rust_index = self.opener.options.get(b'rust.index')
644
644
645 self._parse_index = parse_index_v1
645 self._parse_index = parse_index_v1
646 if self._format_version == REVLOGV0:
646 if self._format_version == REVLOGV0:
647 self._parse_index = revlogv0.parse_index_v0
647 self._parse_index = revlogv0.parse_index_v0
648 elif self._format_version == REVLOGV2:
648 elif self._format_version == REVLOGV2:
649 self._parse_index = parse_index_v2
649 self._parse_index = parse_index_v2
650 elif devel_nodemap:
650 elif devel_nodemap:
651 self._parse_index = parse_index_v1_nodemap
651 self._parse_index = parse_index_v1_nodemap
652 elif use_rust_index:
652 elif use_rust_index:
653 self._parse_index = parse_index_v1_mixed
653 self._parse_index = parse_index_v1_mixed
654 try:
654 try:
655 d = self._parse_index(index_data, self._inline)
655 d = self._parse_index(index_data, self._inline)
656 index, _chunkcache = d
656 index, _chunkcache = d
657 use_nodemap = (
657 use_nodemap = (
658 not self._inline
658 not self._inline
659 and self._nodemap_file is not None
659 and self._nodemap_file is not None
660 and util.safehasattr(index, 'update_nodemap_data')
660 and util.safehasattr(index, 'update_nodemap_data')
661 )
661 )
662 if use_nodemap:
662 if use_nodemap:
663 nodemap_data = nodemaputil.persisted_data(self)
663 nodemap_data = nodemaputil.persisted_data(self)
664 if nodemap_data is not None:
664 if nodemap_data is not None:
665 docket = nodemap_data[0]
665 docket = nodemap_data[0]
666 if (
666 if (
667 len(d[0]) > docket.tip_rev
667 len(d[0]) > docket.tip_rev
668 and d[0][docket.tip_rev][7] == docket.tip_node
668 and d[0][docket.tip_rev][7] == docket.tip_node
669 ):
669 ):
670 # no changelog tampering
670 # no changelog tampering
671 self._nodemap_docket = docket
671 self._nodemap_docket = docket
672 index.update_nodemap_data(*nodemap_data)
672 index.update_nodemap_data(*nodemap_data)
673 except (ValueError, IndexError):
673 except (ValueError, IndexError):
674 raise error.RevlogError(
674 raise error.RevlogError(
675 _(b"index %s is corrupted") % self.display_id
675 _(b"index %s is corrupted") % self.display_id
676 )
676 )
677 self.index, self._chunkcache = d
677 self.index, self._chunkcache = d
678 if not self._chunkcache:
678 if not self._chunkcache:
679 self._chunkclear()
679 self._chunkclear()
680 # revnum -> (chain-length, sum-delta-length)
680 # revnum -> (chain-length, sum-delta-length)
681 self._chaininfocache = util.lrucachedict(500)
681 self._chaininfocache = util.lrucachedict(500)
682 # revlog header -> revlog compressor
682 # revlog header -> revlog compressor
683 self._decompressors = {}
683 self._decompressors = {}
684
684
685 @util.propertycache
685 @util.propertycache
686 def revlog_kind(self):
686 def revlog_kind(self):
687 return self.target[0]
687 return self.target[0]
688
688
689 @util.propertycache
689 @util.propertycache
690 def display_id(self):
690 def display_id(self):
691 """The public facing "ID" of the revlog that we use in message"""
691 """The public facing "ID" of the revlog that we use in message"""
692 # Maybe we should build a user facing representation of
692 # Maybe we should build a user facing representation of
693 # revlog.target instead of using `self.radix`
693 # revlog.target instead of using `self.radix`
694 return self.radix
694 return self.radix
695
695
696 def _get_decompressor(self, t):
696 def _get_decompressor(self, t):
697 try:
697 try:
698 compressor = self._decompressors[t]
698 compressor = self._decompressors[t]
699 except KeyError:
699 except KeyError:
700 try:
700 try:
701 engine = util.compengines.forrevlogheader(t)
701 engine = util.compengines.forrevlogheader(t)
702 compressor = engine.revlogcompressor(self._compengineopts)
702 compressor = engine.revlogcompressor(self._compengineopts)
703 self._decompressors[t] = compressor
703 self._decompressors[t] = compressor
704 except KeyError:
704 except KeyError:
705 raise error.RevlogError(
705 raise error.RevlogError(
706 _(b'unknown compression type %s') % binascii.hexlify(t)
706 _(b'unknown compression type %s') % binascii.hexlify(t)
707 )
707 )
708 return compressor
708 return compressor
709
709
710 @util.propertycache
710 @util.propertycache
711 def _compressor(self):
711 def _compressor(self):
712 engine = util.compengines[self._compengine]
712 engine = util.compengines[self._compengine]
713 return engine.revlogcompressor(self._compengineopts)
713 return engine.revlogcompressor(self._compengineopts)
714
714
715 @util.propertycache
715 @util.propertycache
716 def _decompressor(self):
716 def _decompressor(self):
717 """the default decompressor"""
717 """the default decompressor"""
718 if self._docket is None:
718 if self._docket is None:
719 return None
719 return None
720 t = self._docket.default_compression_header
720 t = self._docket.default_compression_header
721 c = self._get_decompressor(t)
721 c = self._get_decompressor(t)
722 return c.decompress
722 return c.decompress
723
723
724 def _indexfp(self):
724 def _indexfp(self):
725 """file object for the revlog's index file"""
725 """file object for the revlog's index file"""
726 return self.opener(self._indexfile, mode=b"r")
726 return self.opener(self._indexfile, mode=b"r")
727
727
728 def __index_write_fp(self):
728 def __index_write_fp(self):
729 # You should not use this directly and use `_writing` instead
729 # You should not use this directly and use `_writing` instead
730 try:
730 try:
731 f = self.opener(
731 f = self.opener(
732 self._indexfile, mode=b"r+", checkambig=self._checkambig
732 self._indexfile, mode=b"r+", checkambig=self._checkambig
733 )
733 )
734 if self._docket is None:
734 if self._docket is None:
735 f.seek(0, os.SEEK_END)
735 f.seek(0, os.SEEK_END)
736 else:
736 else:
737 f.seek(self._docket.index_end, os.SEEK_SET)
737 f.seek(self._docket.index_end, os.SEEK_SET)
738 return f
738 return f
739 except IOError as inst:
739 except IOError as inst:
740 if inst.errno != errno.ENOENT:
740 if inst.errno != errno.ENOENT:
741 raise
741 raise
742 return self.opener(
742 return self.opener(
743 self._indexfile, mode=b"w+", checkambig=self._checkambig
743 self._indexfile, mode=b"w+", checkambig=self._checkambig
744 )
744 )
745
745
746 def __index_new_fp(self):
746 def __index_new_fp(self):
747 # You should not use this unless you are upgrading from inline revlog
747 # You should not use this unless you are upgrading from inline revlog
748 return self.opener(
748 return self.opener(
749 self._indexfile,
749 self._indexfile,
750 mode=b"w",
750 mode=b"w",
751 checkambig=self._checkambig,
751 checkambig=self._checkambig,
752 atomictemp=True,
752 atomictemp=True,
753 )
753 )
754
754
755 def _datafp(self, mode=b'r'):
755 def _datafp(self, mode=b'r'):
756 """file object for the revlog's data file"""
756 """file object for the revlog's data file"""
757 return self.opener(self._datafile, mode=mode)
757 return self.opener(self._datafile, mode=mode)
758
758
759 @contextlib.contextmanager
759 @contextlib.contextmanager
760 def _datareadfp(self, existingfp=None):
760 def _datareadfp(self, existingfp=None):
761 """file object suitable to read data"""
761 """file object suitable to read data"""
762 # Use explicit file handle, if given.
762 # Use explicit file handle, if given.
763 if existingfp is not None:
763 if existingfp is not None:
764 yield existingfp
764 yield existingfp
765
765
766 # Use a file handle being actively used for writes, if available.
766 # Use a file handle being actively used for writes, if available.
767 # There is some danger to doing this because reads will seek the
767 # There is some danger to doing this because reads will seek the
768 # file. However, _writeentry() performs a SEEK_END before all writes,
768 # file. However, _writeentry() performs a SEEK_END before all writes,
769 # so we should be safe.
769 # so we should be safe.
770 elif self._writinghandles:
770 elif self._writinghandles:
771 if self._inline:
771 if self._inline:
772 yield self._writinghandles[0]
772 yield self._writinghandles[0]
773 else:
773 else:
774 yield self._writinghandles[1]
774 yield self._writinghandles[1]
775
775
776 # Otherwise open a new file handle.
776 # Otherwise open a new file handle.
777 else:
777 else:
778 if self._inline:
778 if self._inline:
779 func = self._indexfp
779 func = self._indexfp
780 else:
780 else:
781 func = self._datafp
781 func = self._datafp
782 with func() as fp:
782 with func() as fp:
783 yield fp
783 yield fp
784
784
785 def tiprev(self):
785 def tiprev(self):
786 return len(self.index) - 1
786 return len(self.index) - 1
787
787
788 def tip(self):
788 def tip(self):
789 return self.node(self.tiprev())
789 return self.node(self.tiprev())
790
790
791 def __contains__(self, rev):
791 def __contains__(self, rev):
792 return 0 <= rev < len(self)
792 return 0 <= rev < len(self)
793
793
794 def __len__(self):
794 def __len__(self):
795 return len(self.index)
795 return len(self.index)
796
796
797 def __iter__(self):
797 def __iter__(self):
798 return iter(pycompat.xrange(len(self)))
798 return iter(pycompat.xrange(len(self)))
799
799
800 def revs(self, start=0, stop=None):
800 def revs(self, start=0, stop=None):
801 """iterate over all rev in this revlog (from start to stop)"""
801 """iterate over all rev in this revlog (from start to stop)"""
802 return storageutil.iterrevs(len(self), start=start, stop=stop)
802 return storageutil.iterrevs(len(self), start=start, stop=stop)
803
803
804 @property
804 @property
805 def nodemap(self):
805 def nodemap(self):
806 msg = (
806 msg = (
807 b"revlog.nodemap is deprecated, "
807 b"revlog.nodemap is deprecated, "
808 b"use revlog.index.[has_node|rev|get_rev]"
808 b"use revlog.index.[has_node|rev|get_rev]"
809 )
809 )
810 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
810 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
811 return self.index.nodemap
811 return self.index.nodemap
812
812
813 @property
813 @property
814 def _nodecache(self):
814 def _nodecache(self):
815 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
815 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
816 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
816 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
817 return self.index.nodemap
817 return self.index.nodemap
818
818
819 def hasnode(self, node):
819 def hasnode(self, node):
820 try:
820 try:
821 self.rev(node)
821 self.rev(node)
822 return True
822 return True
823 except KeyError:
823 except KeyError:
824 return False
824 return False
825
825
826 def candelta(self, baserev, rev):
826 def candelta(self, baserev, rev):
827 """whether two revisions (baserev, rev) can be delta-ed or not"""
827 """whether two revisions (baserev, rev) can be delta-ed or not"""
828 # Disable delta if either rev requires a content-changing flag
828 # Disable delta if either rev requires a content-changing flag
829 # processor (ex. LFS). This is because such flag processor can alter
829 # processor (ex. LFS). This is because such flag processor can alter
830 # the rawtext content that the delta will be based on, and two clients
830 # the rawtext content that the delta will be based on, and two clients
831 # could have a same revlog node with different flags (i.e. different
831 # could have a same revlog node with different flags (i.e. different
832 # rawtext contents) and the delta could be incompatible.
832 # rawtext contents) and the delta could be incompatible.
833 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
833 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
834 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
834 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
835 ):
835 ):
836 return False
836 return False
837 return True
837 return True
838
838
839 def update_caches(self, transaction):
839 def update_caches(self, transaction):
840 if self._nodemap_file is not None:
840 if self._nodemap_file is not None:
841 if transaction is None:
841 if transaction is None:
842 nodemaputil.update_persistent_nodemap(self)
842 nodemaputil.update_persistent_nodemap(self)
843 else:
843 else:
844 nodemaputil.setup_persistent_nodemap(transaction, self)
844 nodemaputil.setup_persistent_nodemap(transaction, self)
845
845
846 def clearcaches(self):
846 def clearcaches(self):
847 self._revisioncache = None
847 self._revisioncache = None
848 self._chainbasecache.clear()
848 self._chainbasecache.clear()
849 self._chunkcache = (0, b'')
849 self._chunkcache = (0, b'')
850 self._pcache = {}
850 self._pcache = {}
851 self._nodemap_docket = None
851 self._nodemap_docket = None
852 self.index.clearcaches()
852 self.index.clearcaches()
853 # The python code is the one responsible for validating the docket, we
853 # The python code is the one responsible for validating the docket, we
854 # end up having to refresh it here.
854 # end up having to refresh it here.
855 use_nodemap = (
855 use_nodemap = (
856 not self._inline
856 not self._inline
857 and self._nodemap_file is not None
857 and self._nodemap_file is not None
858 and util.safehasattr(self.index, 'update_nodemap_data')
858 and util.safehasattr(self.index, 'update_nodemap_data')
859 )
859 )
860 if use_nodemap:
860 if use_nodemap:
861 nodemap_data = nodemaputil.persisted_data(self)
861 nodemap_data = nodemaputil.persisted_data(self)
862 if nodemap_data is not None:
862 if nodemap_data is not None:
863 self._nodemap_docket = nodemap_data[0]
863 self._nodemap_docket = nodemap_data[0]
864 self.index.update_nodemap_data(*nodemap_data)
864 self.index.update_nodemap_data(*nodemap_data)
865
865
866 def rev(self, node):
866 def rev(self, node):
867 try:
867 try:
868 return self.index.rev(node)
868 return self.index.rev(node)
869 except TypeError:
869 except TypeError:
870 raise
870 raise
871 except error.RevlogError:
871 except error.RevlogError:
872 # parsers.c radix tree lookup failed
872 # parsers.c radix tree lookup failed
873 if (
873 if (
874 node == self.nodeconstants.wdirid
874 node == self.nodeconstants.wdirid
875 or node in self.nodeconstants.wdirfilenodeids
875 or node in self.nodeconstants.wdirfilenodeids
876 ):
876 ):
877 raise error.WdirUnsupported
877 raise error.WdirUnsupported
878 raise error.LookupError(node, self.display_id, _(b'no node'))
878 raise error.LookupError(node, self.display_id, _(b'no node'))
879
879
880 # Accessors for index entries.
880 # Accessors for index entries.
881
881
882 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
882 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
883 # are flags.
883 # are flags.
884 def start(self, rev):
884 def start(self, rev):
885 return int(self.index[rev][0] >> 16)
885 return int(self.index[rev][0] >> 16)
886
886
887 def flags(self, rev):
887 def flags(self, rev):
888 return self.index[rev][0] & 0xFFFF
888 return self.index[rev][0] & 0xFFFF
889
889
890 def length(self, rev):
890 def length(self, rev):
891 return self.index[rev][1]
891 return self.index[rev][1]
892
892
893 def sidedata_length(self, rev):
893 def sidedata_length(self, rev):
894 if not self.hassidedata:
894 if not self.hassidedata:
895 return 0
895 return 0
896 return self.index[rev][9]
896 return self.index[rev][9]
897
897
898 def rawsize(self, rev):
898 def rawsize(self, rev):
899 """return the length of the uncompressed text for a given revision"""
899 """return the length of the uncompressed text for a given revision"""
900 l = self.index[rev][2]
900 l = self.index[rev][2]
901 if l >= 0:
901 if l >= 0:
902 return l
902 return l
903
903
904 t = self.rawdata(rev)
904 t = self.rawdata(rev)
905 return len(t)
905 return len(t)
906
906
907 def size(self, rev):
907 def size(self, rev):
908 """length of non-raw text (processed by a "read" flag processor)"""
908 """length of non-raw text (processed by a "read" flag processor)"""
909 # fast path: if no "read" flag processor could change the content,
909 # fast path: if no "read" flag processor could change the content,
910 # size is rawsize. note: ELLIPSIS is known to not change the content.
910 # size is rawsize. note: ELLIPSIS is known to not change the content.
911 flags = self.flags(rev)
911 flags = self.flags(rev)
912 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
912 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
913 return self.rawsize(rev)
913 return self.rawsize(rev)
914
914
915 return len(self.revision(rev, raw=False))
915 return len(self.revision(rev, raw=False))
916
916
917 def chainbase(self, rev):
917 def chainbase(self, rev):
918 base = self._chainbasecache.get(rev)
918 base = self._chainbasecache.get(rev)
919 if base is not None:
919 if base is not None:
920 return base
920 return base
921
921
922 index = self.index
922 index = self.index
923 iterrev = rev
923 iterrev = rev
924 base = index[iterrev][3]
924 base = index[iterrev][3]
925 while base != iterrev:
925 while base != iterrev:
926 iterrev = base
926 iterrev = base
927 base = index[iterrev][3]
927 base = index[iterrev][3]
928
928
929 self._chainbasecache[rev] = base
929 self._chainbasecache[rev] = base
930 return base
930 return base
931
931
932 def linkrev(self, rev):
932 def linkrev(self, rev):
933 return self.index[rev][4]
933 return self.index[rev][4]
934
934
935 def parentrevs(self, rev):
935 def parentrevs(self, rev):
936 try:
936 try:
937 entry = self.index[rev]
937 entry = self.index[rev]
938 except IndexError:
938 except IndexError:
939 if rev == wdirrev:
939 if rev == wdirrev:
940 raise error.WdirUnsupported
940 raise error.WdirUnsupported
941 raise
941 raise
942 if entry[5] == nullrev:
942 if entry[5] == nullrev:
943 return entry[6], entry[5]
943 return entry[6], entry[5]
944 else:
944 else:
945 return entry[5], entry[6]
945 return entry[5], entry[6]
946
946
947 # fast parentrevs(rev) where rev isn't filtered
947 # fast parentrevs(rev) where rev isn't filtered
948 _uncheckedparentrevs = parentrevs
948 _uncheckedparentrevs = parentrevs
949
949
950 def node(self, rev):
950 def node(self, rev):
951 try:
951 try:
952 return self.index[rev][7]
952 return self.index[rev][7]
953 except IndexError:
953 except IndexError:
954 if rev == wdirrev:
954 if rev == wdirrev:
955 raise error.WdirUnsupported
955 raise error.WdirUnsupported
956 raise
956 raise
957
957
958 # Derived from index values.
958 # Derived from index values.
959
959
960 def end(self, rev):
960 def end(self, rev):
961 return self.start(rev) + self.length(rev)
961 return self.start(rev) + self.length(rev)
962
962
963 def parents(self, node):
963 def parents(self, node):
964 i = self.index
964 i = self.index
965 d = i[self.rev(node)]
965 d = i[self.rev(node)]
966 # inline node() to avoid function call overhead
966 # inline node() to avoid function call overhead
967 if d[5] == self.nullid:
967 if d[5] == self.nullid:
968 return i[d[6]][7], i[d[5]][7]
968 return i[d[6]][7], i[d[5]][7]
969 else:
969 else:
970 return i[d[5]][7], i[d[6]][7]
970 return i[d[5]][7], i[d[6]][7]
971
971
972 def chainlen(self, rev):
972 def chainlen(self, rev):
973 return self._chaininfo(rev)[0]
973 return self._chaininfo(rev)[0]
974
974
975 def _chaininfo(self, rev):
975 def _chaininfo(self, rev):
976 chaininfocache = self._chaininfocache
976 chaininfocache = self._chaininfocache
977 if rev in chaininfocache:
977 if rev in chaininfocache:
978 return chaininfocache[rev]
978 return chaininfocache[rev]
979 index = self.index
979 index = self.index
980 generaldelta = self._generaldelta
980 generaldelta = self._generaldelta
981 iterrev = rev
981 iterrev = rev
982 e = index[iterrev]
982 e = index[iterrev]
983 clen = 0
983 clen = 0
984 compresseddeltalen = 0
984 compresseddeltalen = 0
985 while iterrev != e[3]:
985 while iterrev != e[3]:
986 clen += 1
986 clen += 1
987 compresseddeltalen += e[1]
987 compresseddeltalen += e[1]
988 if generaldelta:
988 if generaldelta:
989 iterrev = e[3]
989 iterrev = e[3]
990 else:
990 else:
991 iterrev -= 1
991 iterrev -= 1
992 if iterrev in chaininfocache:
992 if iterrev in chaininfocache:
993 t = chaininfocache[iterrev]
993 t = chaininfocache[iterrev]
994 clen += t[0]
994 clen += t[0]
995 compresseddeltalen += t[1]
995 compresseddeltalen += t[1]
996 break
996 break
997 e = index[iterrev]
997 e = index[iterrev]
998 else:
998 else:
999 # Add text length of base since decompressing that also takes
999 # Add text length of base since decompressing that also takes
1000 # work. For cache hits the length is already included.
1000 # work. For cache hits the length is already included.
1001 compresseddeltalen += e[1]
1001 compresseddeltalen += e[1]
1002 r = (clen, compresseddeltalen)
1002 r = (clen, compresseddeltalen)
1003 chaininfocache[rev] = r
1003 chaininfocache[rev] = r
1004 return r
1004 return r
1005
1005
1006 def _deltachain(self, rev, stoprev=None):
1006 def _deltachain(self, rev, stoprev=None):
1007 """Obtain the delta chain for a revision.
1007 """Obtain the delta chain for a revision.
1008
1008
1009 ``stoprev`` specifies a revision to stop at. If not specified, we
1009 ``stoprev`` specifies a revision to stop at. If not specified, we
1010 stop at the base of the chain.
1010 stop at the base of the chain.
1011
1011
1012 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1012 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1013 revs in ascending order and ``stopped`` is a bool indicating whether
1013 revs in ascending order and ``stopped`` is a bool indicating whether
1014 ``stoprev`` was hit.
1014 ``stoprev`` was hit.
1015 """
1015 """
1016 # Try C implementation.
1016 # Try C implementation.
1017 try:
1017 try:
1018 return self.index.deltachain(rev, stoprev, self._generaldelta)
1018 return self.index.deltachain(rev, stoprev, self._generaldelta)
1019 except AttributeError:
1019 except AttributeError:
1020 pass
1020 pass
1021
1021
1022 chain = []
1022 chain = []
1023
1023
1024 # Alias to prevent attribute lookup in tight loop.
1024 # Alias to prevent attribute lookup in tight loop.
1025 index = self.index
1025 index = self.index
1026 generaldelta = self._generaldelta
1026 generaldelta = self._generaldelta
1027
1027
1028 iterrev = rev
1028 iterrev = rev
1029 e = index[iterrev]
1029 e = index[iterrev]
1030 while iterrev != e[3] and iterrev != stoprev:
1030 while iterrev != e[3] and iterrev != stoprev:
1031 chain.append(iterrev)
1031 chain.append(iterrev)
1032 if generaldelta:
1032 if generaldelta:
1033 iterrev = e[3]
1033 iterrev = e[3]
1034 else:
1034 else:
1035 iterrev -= 1
1035 iterrev -= 1
1036 e = index[iterrev]
1036 e = index[iterrev]
1037
1037
1038 if iterrev == stoprev:
1038 if iterrev == stoprev:
1039 stopped = True
1039 stopped = True
1040 else:
1040 else:
1041 chain.append(iterrev)
1041 chain.append(iterrev)
1042 stopped = False
1042 stopped = False
1043
1043
1044 chain.reverse()
1044 chain.reverse()
1045 return chain, stopped
1045 return chain, stopped
1046
1046
1047 def ancestors(self, revs, stoprev=0, inclusive=False):
1047 def ancestors(self, revs, stoprev=0, inclusive=False):
1048 """Generate the ancestors of 'revs' in reverse revision order.
1048 """Generate the ancestors of 'revs' in reverse revision order.
1049 Does not generate revs lower than stoprev.
1049 Does not generate revs lower than stoprev.
1050
1050
1051 See the documentation for ancestor.lazyancestors for more details."""
1051 See the documentation for ancestor.lazyancestors for more details."""
1052
1052
1053 # first, make sure start revisions aren't filtered
1053 # first, make sure start revisions aren't filtered
1054 revs = list(revs)
1054 revs = list(revs)
1055 checkrev = self.node
1055 checkrev = self.node
1056 for r in revs:
1056 for r in revs:
1057 checkrev(r)
1057 checkrev(r)
1058 # and we're sure ancestors aren't filtered as well
1058 # and we're sure ancestors aren't filtered as well
1059
1059
1060 if rustancestor is not None:
1060 if rustancestor is not None:
1061 lazyancestors = rustancestor.LazyAncestors
1061 lazyancestors = rustancestor.LazyAncestors
1062 arg = self.index
1062 arg = self.index
1063 else:
1063 else:
1064 lazyancestors = ancestor.lazyancestors
1064 lazyancestors = ancestor.lazyancestors
1065 arg = self._uncheckedparentrevs
1065 arg = self._uncheckedparentrevs
1066 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1066 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1067
1067
1068 def descendants(self, revs):
1068 def descendants(self, revs):
1069 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1069 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1070
1070
1071 def findcommonmissing(self, common=None, heads=None):
1071 def findcommonmissing(self, common=None, heads=None):
1072 """Return a tuple of the ancestors of common and the ancestors of heads
1072 """Return a tuple of the ancestors of common and the ancestors of heads
1073 that are not ancestors of common. In revset terminology, we return the
1073 that are not ancestors of common. In revset terminology, we return the
1074 tuple:
1074 tuple:
1075
1075
1076 ::common, (::heads) - (::common)
1076 ::common, (::heads) - (::common)
1077
1077
1078 The list is sorted by revision number, meaning it is
1078 The list is sorted by revision number, meaning it is
1079 topologically sorted.
1079 topologically sorted.
1080
1080
1081 'heads' and 'common' are both lists of node IDs. If heads is
1081 'heads' and 'common' are both lists of node IDs. If heads is
1082 not supplied, uses all of the revlog's heads. If common is not
1082 not supplied, uses all of the revlog's heads. If common is not
1083 supplied, uses nullid."""
1083 supplied, uses nullid."""
1084 if common is None:
1084 if common is None:
1085 common = [self.nullid]
1085 common = [self.nullid]
1086 if heads is None:
1086 if heads is None:
1087 heads = self.heads()
1087 heads = self.heads()
1088
1088
1089 common = [self.rev(n) for n in common]
1089 common = [self.rev(n) for n in common]
1090 heads = [self.rev(n) for n in heads]
1090 heads = [self.rev(n) for n in heads]
1091
1091
1092 # we want the ancestors, but inclusive
1092 # we want the ancestors, but inclusive
1093 class lazyset(object):
1093 class lazyset(object):
1094 def __init__(self, lazyvalues):
1094 def __init__(self, lazyvalues):
1095 self.addedvalues = set()
1095 self.addedvalues = set()
1096 self.lazyvalues = lazyvalues
1096 self.lazyvalues = lazyvalues
1097
1097
1098 def __contains__(self, value):
1098 def __contains__(self, value):
1099 return value in self.addedvalues or value in self.lazyvalues
1099 return value in self.addedvalues or value in self.lazyvalues
1100
1100
1101 def __iter__(self):
1101 def __iter__(self):
1102 added = self.addedvalues
1102 added = self.addedvalues
1103 for r in added:
1103 for r in added:
1104 yield r
1104 yield r
1105 for r in self.lazyvalues:
1105 for r in self.lazyvalues:
1106 if not r in added:
1106 if not r in added:
1107 yield r
1107 yield r
1108
1108
1109 def add(self, value):
1109 def add(self, value):
1110 self.addedvalues.add(value)
1110 self.addedvalues.add(value)
1111
1111
1112 def update(self, values):
1112 def update(self, values):
1113 self.addedvalues.update(values)
1113 self.addedvalues.update(values)
1114
1114
1115 has = lazyset(self.ancestors(common))
1115 has = lazyset(self.ancestors(common))
1116 has.add(nullrev)
1116 has.add(nullrev)
1117 has.update(common)
1117 has.update(common)
1118
1118
1119 # take all ancestors from heads that aren't in has
1119 # take all ancestors from heads that aren't in has
1120 missing = set()
1120 missing = set()
1121 visit = collections.deque(r for r in heads if r not in has)
1121 visit = collections.deque(r for r in heads if r not in has)
1122 while visit:
1122 while visit:
1123 r = visit.popleft()
1123 r = visit.popleft()
1124 if r in missing:
1124 if r in missing:
1125 continue
1125 continue
1126 else:
1126 else:
1127 missing.add(r)
1127 missing.add(r)
1128 for p in self.parentrevs(r):
1128 for p in self.parentrevs(r):
1129 if p not in has:
1129 if p not in has:
1130 visit.append(p)
1130 visit.append(p)
1131 missing = list(missing)
1131 missing = list(missing)
1132 missing.sort()
1132 missing.sort()
1133 return has, [self.node(miss) for miss in missing]
1133 return has, [self.node(miss) for miss in missing]
1134
1134
1135 def incrementalmissingrevs(self, common=None):
1135 def incrementalmissingrevs(self, common=None):
1136 """Return an object that can be used to incrementally compute the
1136 """Return an object that can be used to incrementally compute the
1137 revision numbers of the ancestors of arbitrary sets that are not
1137 revision numbers of the ancestors of arbitrary sets that are not
1138 ancestors of common. This is an ancestor.incrementalmissingancestors
1138 ancestors of common. This is an ancestor.incrementalmissingancestors
1139 object.
1139 object.
1140
1140
1141 'common' is a list of revision numbers. If common is not supplied, uses
1141 'common' is a list of revision numbers. If common is not supplied, uses
1142 nullrev.
1142 nullrev.
1143 """
1143 """
1144 if common is None:
1144 if common is None:
1145 common = [nullrev]
1145 common = [nullrev]
1146
1146
1147 if rustancestor is not None:
1147 if rustancestor is not None:
1148 return rustancestor.MissingAncestors(self.index, common)
1148 return rustancestor.MissingAncestors(self.index, common)
1149 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1149 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1150
1150
1151 def findmissingrevs(self, common=None, heads=None):
1151 def findmissingrevs(self, common=None, heads=None):
1152 """Return the revision numbers of the ancestors of heads that
1152 """Return the revision numbers of the ancestors of heads that
1153 are not ancestors of common.
1153 are not ancestors of common.
1154
1154
1155 More specifically, return a list of revision numbers corresponding to
1155 More specifically, return a list of revision numbers corresponding to
1156 nodes N such that every N satisfies the following constraints:
1156 nodes N such that every N satisfies the following constraints:
1157
1157
1158 1. N is an ancestor of some node in 'heads'
1158 1. N is an ancestor of some node in 'heads'
1159 2. N is not an ancestor of any node in 'common'
1159 2. N is not an ancestor of any node in 'common'
1160
1160
1161 The list is sorted by revision number, meaning it is
1161 The list is sorted by revision number, meaning it is
1162 topologically sorted.
1162 topologically sorted.
1163
1163
1164 'heads' and 'common' are both lists of revision numbers. If heads is
1164 'heads' and 'common' are both lists of revision numbers. If heads is
1165 not supplied, uses all of the revlog's heads. If common is not
1165 not supplied, uses all of the revlog's heads. If common is not
1166 supplied, uses nullid."""
1166 supplied, uses nullid."""
1167 if common is None:
1167 if common is None:
1168 common = [nullrev]
1168 common = [nullrev]
1169 if heads is None:
1169 if heads is None:
1170 heads = self.headrevs()
1170 heads = self.headrevs()
1171
1171
1172 inc = self.incrementalmissingrevs(common=common)
1172 inc = self.incrementalmissingrevs(common=common)
1173 return inc.missingancestors(heads)
1173 return inc.missingancestors(heads)
1174
1174
1175 def findmissing(self, common=None, heads=None):
1175 def findmissing(self, common=None, heads=None):
1176 """Return the ancestors of heads that are not ancestors of common.
1176 """Return the ancestors of heads that are not ancestors of common.
1177
1177
1178 More specifically, return a list of nodes N such that every N
1178 More specifically, return a list of nodes N such that every N
1179 satisfies the following constraints:
1179 satisfies the following constraints:
1180
1180
1181 1. N is an ancestor of some node in 'heads'
1181 1. N is an ancestor of some node in 'heads'
1182 2. N is not an ancestor of any node in 'common'
1182 2. N is not an ancestor of any node in 'common'
1183
1183
1184 The list is sorted by revision number, meaning it is
1184 The list is sorted by revision number, meaning it is
1185 topologically sorted.
1185 topologically sorted.
1186
1186
1187 'heads' and 'common' are both lists of node IDs. If heads is
1187 'heads' and 'common' are both lists of node IDs. If heads is
1188 not supplied, uses all of the revlog's heads. If common is not
1188 not supplied, uses all of the revlog's heads. If common is not
1189 supplied, uses nullid."""
1189 supplied, uses nullid."""
1190 if common is None:
1190 if common is None:
1191 common = [self.nullid]
1191 common = [self.nullid]
1192 if heads is None:
1192 if heads is None:
1193 heads = self.heads()
1193 heads = self.heads()
1194
1194
1195 common = [self.rev(n) for n in common]
1195 common = [self.rev(n) for n in common]
1196 heads = [self.rev(n) for n in heads]
1196 heads = [self.rev(n) for n in heads]
1197
1197
1198 inc = self.incrementalmissingrevs(common=common)
1198 inc = self.incrementalmissingrevs(common=common)
1199 return [self.node(r) for r in inc.missingancestors(heads)]
1199 return [self.node(r) for r in inc.missingancestors(heads)]
1200
1200
1201 def nodesbetween(self, roots=None, heads=None):
1201 def nodesbetween(self, roots=None, heads=None):
1202 """Return a topological path from 'roots' to 'heads'.
1202 """Return a topological path from 'roots' to 'heads'.
1203
1203
1204 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1204 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1205 topologically sorted list of all nodes N that satisfy both of
1205 topologically sorted list of all nodes N that satisfy both of
1206 these constraints:
1206 these constraints:
1207
1207
1208 1. N is a descendant of some node in 'roots'
1208 1. N is a descendant of some node in 'roots'
1209 2. N is an ancestor of some node in 'heads'
1209 2. N is an ancestor of some node in 'heads'
1210
1210
1211 Every node is considered to be both a descendant and an ancestor
1211 Every node is considered to be both a descendant and an ancestor
1212 of itself, so every reachable node in 'roots' and 'heads' will be
1212 of itself, so every reachable node in 'roots' and 'heads' will be
1213 included in 'nodes'.
1213 included in 'nodes'.
1214
1214
1215 'outroots' is the list of reachable nodes in 'roots', i.e., the
1215 'outroots' is the list of reachable nodes in 'roots', i.e., the
1216 subset of 'roots' that is returned in 'nodes'. Likewise,
1216 subset of 'roots' that is returned in 'nodes'. Likewise,
1217 'outheads' is the subset of 'heads' that is also in 'nodes'.
1217 'outheads' is the subset of 'heads' that is also in 'nodes'.
1218
1218
1219 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1219 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1220 unspecified, uses nullid as the only root. If 'heads' is
1220 unspecified, uses nullid as the only root. If 'heads' is
1221 unspecified, uses list of all of the revlog's heads."""
1221 unspecified, uses list of all of the revlog's heads."""
1222 nonodes = ([], [], [])
1222 nonodes = ([], [], [])
1223 if roots is not None:
1223 if roots is not None:
1224 roots = list(roots)
1224 roots = list(roots)
1225 if not roots:
1225 if not roots:
1226 return nonodes
1226 return nonodes
1227 lowestrev = min([self.rev(n) for n in roots])
1227 lowestrev = min([self.rev(n) for n in roots])
1228 else:
1228 else:
1229 roots = [self.nullid] # Everybody's a descendant of nullid
1229 roots = [self.nullid] # Everybody's a descendant of nullid
1230 lowestrev = nullrev
1230 lowestrev = nullrev
1231 if (lowestrev == nullrev) and (heads is None):
1231 if (lowestrev == nullrev) and (heads is None):
1232 # We want _all_ the nodes!
1232 # We want _all_ the nodes!
1233 return (
1233 return (
1234 [self.node(r) for r in self],
1234 [self.node(r) for r in self],
1235 [self.nullid],
1235 [self.nullid],
1236 list(self.heads()),
1236 list(self.heads()),
1237 )
1237 )
1238 if heads is None:
1238 if heads is None:
1239 # All nodes are ancestors, so the latest ancestor is the last
1239 # All nodes are ancestors, so the latest ancestor is the last
1240 # node.
1240 # node.
1241 highestrev = len(self) - 1
1241 highestrev = len(self) - 1
1242 # Set ancestors to None to signal that every node is an ancestor.
1242 # Set ancestors to None to signal that every node is an ancestor.
1243 ancestors = None
1243 ancestors = None
1244 # Set heads to an empty dictionary for later discovery of heads
1244 # Set heads to an empty dictionary for later discovery of heads
1245 heads = {}
1245 heads = {}
1246 else:
1246 else:
1247 heads = list(heads)
1247 heads = list(heads)
1248 if not heads:
1248 if not heads:
1249 return nonodes
1249 return nonodes
1250 ancestors = set()
1250 ancestors = set()
1251 # Turn heads into a dictionary so we can remove 'fake' heads.
1251 # Turn heads into a dictionary so we can remove 'fake' heads.
1252 # Also, later we will be using it to filter out the heads we can't
1252 # Also, later we will be using it to filter out the heads we can't
1253 # find from roots.
1253 # find from roots.
1254 heads = dict.fromkeys(heads, False)
1254 heads = dict.fromkeys(heads, False)
1255 # Start at the top and keep marking parents until we're done.
1255 # Start at the top and keep marking parents until we're done.
1256 nodestotag = set(heads)
1256 nodestotag = set(heads)
1257 # Remember where the top was so we can use it as a limit later.
1257 # Remember where the top was so we can use it as a limit later.
1258 highestrev = max([self.rev(n) for n in nodestotag])
1258 highestrev = max([self.rev(n) for n in nodestotag])
1259 while nodestotag:
1259 while nodestotag:
1260 # grab a node to tag
1260 # grab a node to tag
1261 n = nodestotag.pop()
1261 n = nodestotag.pop()
1262 # Never tag nullid
1262 # Never tag nullid
1263 if n == self.nullid:
1263 if n == self.nullid:
1264 continue
1264 continue
1265 # A node's revision number represents its place in a
1265 # A node's revision number represents its place in a
1266 # topologically sorted list of nodes.
1266 # topologically sorted list of nodes.
1267 r = self.rev(n)
1267 r = self.rev(n)
1268 if r >= lowestrev:
1268 if r >= lowestrev:
1269 if n not in ancestors:
1269 if n not in ancestors:
1270 # If we are possibly a descendant of one of the roots
1270 # If we are possibly a descendant of one of the roots
1271 # and we haven't already been marked as an ancestor
1271 # and we haven't already been marked as an ancestor
1272 ancestors.add(n) # Mark as ancestor
1272 ancestors.add(n) # Mark as ancestor
1273 # Add non-nullid parents to list of nodes to tag.
1273 # Add non-nullid parents to list of nodes to tag.
1274 nodestotag.update(
1274 nodestotag.update(
1275 [p for p in self.parents(n) if p != self.nullid]
1275 [p for p in self.parents(n) if p != self.nullid]
1276 )
1276 )
1277 elif n in heads: # We've seen it before, is it a fake head?
1277 elif n in heads: # We've seen it before, is it a fake head?
1278 # So it is, real heads should not be the ancestors of
1278 # So it is, real heads should not be the ancestors of
1279 # any other heads.
1279 # any other heads.
1280 heads.pop(n)
1280 heads.pop(n)
1281 if not ancestors:
1281 if not ancestors:
1282 return nonodes
1282 return nonodes
1283 # Now that we have our set of ancestors, we want to remove any
1283 # Now that we have our set of ancestors, we want to remove any
1284 # roots that are not ancestors.
1284 # roots that are not ancestors.
1285
1285
1286 # If one of the roots was nullid, everything is included anyway.
1286 # If one of the roots was nullid, everything is included anyway.
1287 if lowestrev > nullrev:
1287 if lowestrev > nullrev:
1288 # But, since we weren't, let's recompute the lowest rev to not
1288 # But, since we weren't, let's recompute the lowest rev to not
1289 # include roots that aren't ancestors.
1289 # include roots that aren't ancestors.
1290
1290
1291 # Filter out roots that aren't ancestors of heads
1291 # Filter out roots that aren't ancestors of heads
1292 roots = [root for root in roots if root in ancestors]
1292 roots = [root for root in roots if root in ancestors]
1293 # Recompute the lowest revision
1293 # Recompute the lowest revision
1294 if roots:
1294 if roots:
1295 lowestrev = min([self.rev(root) for root in roots])
1295 lowestrev = min([self.rev(root) for root in roots])
1296 else:
1296 else:
1297 # No more roots? Return empty list
1297 # No more roots? Return empty list
1298 return nonodes
1298 return nonodes
1299 else:
1299 else:
1300 # We are descending from nullid, and don't need to care about
1300 # We are descending from nullid, and don't need to care about
1301 # any other roots.
1301 # any other roots.
1302 lowestrev = nullrev
1302 lowestrev = nullrev
1303 roots = [self.nullid]
1303 roots = [self.nullid]
1304 # Transform our roots list into a set.
1304 # Transform our roots list into a set.
1305 descendants = set(roots)
1305 descendants = set(roots)
1306 # Also, keep the original roots so we can filter out roots that aren't
1306 # Also, keep the original roots so we can filter out roots that aren't
1307 # 'real' roots (i.e. are descended from other roots).
1307 # 'real' roots (i.e. are descended from other roots).
1308 roots = descendants.copy()
1308 roots = descendants.copy()
1309 # Our topologically sorted list of output nodes.
1309 # Our topologically sorted list of output nodes.
1310 orderedout = []
1310 orderedout = []
1311 # Don't start at nullid since we don't want nullid in our output list,
1311 # Don't start at nullid since we don't want nullid in our output list,
1312 # and if nullid shows up in descendants, empty parents will look like
1312 # and if nullid shows up in descendants, empty parents will look like
1313 # they're descendants.
1313 # they're descendants.
1314 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1314 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1315 n = self.node(r)
1315 n = self.node(r)
1316 isdescendant = False
1316 isdescendant = False
1317 if lowestrev == nullrev: # Everybody is a descendant of nullid
1317 if lowestrev == nullrev: # Everybody is a descendant of nullid
1318 isdescendant = True
1318 isdescendant = True
1319 elif n in descendants:
1319 elif n in descendants:
1320 # n is already a descendant
1320 # n is already a descendant
1321 isdescendant = True
1321 isdescendant = True
1322 # This check only needs to be done here because all the roots
1322 # This check only needs to be done here because all the roots
1323 # will start being marked is descendants before the loop.
1323 # will start being marked is descendants before the loop.
1324 if n in roots:
1324 if n in roots:
1325 # If n was a root, check if it's a 'real' root.
1325 # If n was a root, check if it's a 'real' root.
1326 p = tuple(self.parents(n))
1326 p = tuple(self.parents(n))
1327 # If any of its parents are descendants, it's not a root.
1327 # If any of its parents are descendants, it's not a root.
1328 if (p[0] in descendants) or (p[1] in descendants):
1328 if (p[0] in descendants) or (p[1] in descendants):
1329 roots.remove(n)
1329 roots.remove(n)
1330 else:
1330 else:
1331 p = tuple(self.parents(n))
1331 p = tuple(self.parents(n))
1332 # A node is a descendant if either of its parents are
1332 # A node is a descendant if either of its parents are
1333 # descendants. (We seeded the dependents list with the roots
1333 # descendants. (We seeded the dependents list with the roots
1334 # up there, remember?)
1334 # up there, remember?)
1335 if (p[0] in descendants) or (p[1] in descendants):
1335 if (p[0] in descendants) or (p[1] in descendants):
1336 descendants.add(n)
1336 descendants.add(n)
1337 isdescendant = True
1337 isdescendant = True
1338 if isdescendant and ((ancestors is None) or (n in ancestors)):
1338 if isdescendant and ((ancestors is None) or (n in ancestors)):
1339 # Only include nodes that are both descendants and ancestors.
1339 # Only include nodes that are both descendants and ancestors.
1340 orderedout.append(n)
1340 orderedout.append(n)
1341 if (ancestors is not None) and (n in heads):
1341 if (ancestors is not None) and (n in heads):
1342 # We're trying to figure out which heads are reachable
1342 # We're trying to figure out which heads are reachable
1343 # from roots.
1343 # from roots.
1344 # Mark this head as having been reached
1344 # Mark this head as having been reached
1345 heads[n] = True
1345 heads[n] = True
1346 elif ancestors is None:
1346 elif ancestors is None:
1347 # Otherwise, we're trying to discover the heads.
1347 # Otherwise, we're trying to discover the heads.
1348 # Assume this is a head because if it isn't, the next step
1348 # Assume this is a head because if it isn't, the next step
1349 # will eventually remove it.
1349 # will eventually remove it.
1350 heads[n] = True
1350 heads[n] = True
1351 # But, obviously its parents aren't.
1351 # But, obviously its parents aren't.
1352 for p in self.parents(n):
1352 for p in self.parents(n):
1353 heads.pop(p, None)
1353 heads.pop(p, None)
1354 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1354 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1355 roots = list(roots)
1355 roots = list(roots)
1356 assert orderedout
1356 assert orderedout
1357 assert roots
1357 assert roots
1358 assert heads
1358 assert heads
1359 return (orderedout, roots, heads)
1359 return (orderedout, roots, heads)
1360
1360
1361 def headrevs(self, revs=None):
1361 def headrevs(self, revs=None):
1362 if revs is None:
1362 if revs is None:
1363 try:
1363 try:
1364 return self.index.headrevs()
1364 return self.index.headrevs()
1365 except AttributeError:
1365 except AttributeError:
1366 return self._headrevs()
1366 return self._headrevs()
1367 if rustdagop is not None:
1367 if rustdagop is not None:
1368 return rustdagop.headrevs(self.index, revs)
1368 return rustdagop.headrevs(self.index, revs)
1369 return dagop.headrevs(revs, self._uncheckedparentrevs)
1369 return dagop.headrevs(revs, self._uncheckedparentrevs)
1370
1370
1371 def computephases(self, roots):
1371 def computephases(self, roots):
1372 return self.index.computephasesmapsets(roots)
1372 return self.index.computephasesmapsets(roots)
1373
1373
1374 def _headrevs(self):
1374 def _headrevs(self):
1375 count = len(self)
1375 count = len(self)
1376 if not count:
1376 if not count:
1377 return [nullrev]
1377 return [nullrev]
1378 # we won't iter over filtered rev so nobody is a head at start
1378 # we won't iter over filtered rev so nobody is a head at start
1379 ishead = [0] * (count + 1)
1379 ishead = [0] * (count + 1)
1380 index = self.index
1380 index = self.index
1381 for r in self:
1381 for r in self:
1382 ishead[r] = 1 # I may be an head
1382 ishead[r] = 1 # I may be an head
1383 e = index[r]
1383 e = index[r]
1384 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1384 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1385 return [r for r, val in enumerate(ishead) if val]
1385 return [r for r, val in enumerate(ishead) if val]
1386
1386
1387 def heads(self, start=None, stop=None):
1387 def heads(self, start=None, stop=None):
1388 """return the list of all nodes that have no children
1388 """return the list of all nodes that have no children
1389
1389
1390 if start is specified, only heads that are descendants of
1390 if start is specified, only heads that are descendants of
1391 start will be returned
1391 start will be returned
1392 if stop is specified, it will consider all the revs from stop
1392 if stop is specified, it will consider all the revs from stop
1393 as if they had no children
1393 as if they had no children
1394 """
1394 """
1395 if start is None and stop is None:
1395 if start is None and stop is None:
1396 if not len(self):
1396 if not len(self):
1397 return [self.nullid]
1397 return [self.nullid]
1398 return [self.node(r) for r in self.headrevs()]
1398 return [self.node(r) for r in self.headrevs()]
1399
1399
1400 if start is None:
1400 if start is None:
1401 start = nullrev
1401 start = nullrev
1402 else:
1402 else:
1403 start = self.rev(start)
1403 start = self.rev(start)
1404
1404
1405 stoprevs = {self.rev(n) for n in stop or []}
1405 stoprevs = {self.rev(n) for n in stop or []}
1406
1406
1407 revs = dagop.headrevssubset(
1407 revs = dagop.headrevssubset(
1408 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1408 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1409 )
1409 )
1410
1410
1411 return [self.node(rev) for rev in revs]
1411 return [self.node(rev) for rev in revs]
1412
1412
1413 def children(self, node):
1413 def children(self, node):
1414 """find the children of a given node"""
1414 """find the children of a given node"""
1415 c = []
1415 c = []
1416 p = self.rev(node)
1416 p = self.rev(node)
1417 for r in self.revs(start=p + 1):
1417 for r in self.revs(start=p + 1):
1418 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1418 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1419 if prevs:
1419 if prevs:
1420 for pr in prevs:
1420 for pr in prevs:
1421 if pr == p:
1421 if pr == p:
1422 c.append(self.node(r))
1422 c.append(self.node(r))
1423 elif p == nullrev:
1423 elif p == nullrev:
1424 c.append(self.node(r))
1424 c.append(self.node(r))
1425 return c
1425 return c
1426
1426
1427 def commonancestorsheads(self, a, b):
1427 def commonancestorsheads(self, a, b):
1428 """calculate all the heads of the common ancestors of nodes a and b"""
1428 """calculate all the heads of the common ancestors of nodes a and b"""
1429 a, b = self.rev(a), self.rev(b)
1429 a, b = self.rev(a), self.rev(b)
1430 ancs = self._commonancestorsheads(a, b)
1430 ancs = self._commonancestorsheads(a, b)
1431 return pycompat.maplist(self.node, ancs)
1431 return pycompat.maplist(self.node, ancs)
1432
1432
1433 def _commonancestorsheads(self, *revs):
1433 def _commonancestorsheads(self, *revs):
1434 """calculate all the heads of the common ancestors of revs"""
1434 """calculate all the heads of the common ancestors of revs"""
1435 try:
1435 try:
1436 ancs = self.index.commonancestorsheads(*revs)
1436 ancs = self.index.commonancestorsheads(*revs)
1437 except (AttributeError, OverflowError): # C implementation failed
1437 except (AttributeError, OverflowError): # C implementation failed
1438 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1438 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1439 return ancs
1439 return ancs
1440
1440
1441 def isancestor(self, a, b):
1441 def isancestor(self, a, b):
1442 """return True if node a is an ancestor of node b
1442 """return True if node a is an ancestor of node b
1443
1443
1444 A revision is considered an ancestor of itself."""
1444 A revision is considered an ancestor of itself."""
1445 a, b = self.rev(a), self.rev(b)
1445 a, b = self.rev(a), self.rev(b)
1446 return self.isancestorrev(a, b)
1446 return self.isancestorrev(a, b)
1447
1447
1448 def isancestorrev(self, a, b):
1448 def isancestorrev(self, a, b):
1449 """return True if revision a is an ancestor of revision b
1449 """return True if revision a is an ancestor of revision b
1450
1450
1451 A revision is considered an ancestor of itself.
1451 A revision is considered an ancestor of itself.
1452
1452
1453 The implementation of this is trivial but the use of
1453 The implementation of this is trivial but the use of
1454 reachableroots is not."""
1454 reachableroots is not."""
1455 if a == nullrev:
1455 if a == nullrev:
1456 return True
1456 return True
1457 elif a == b:
1457 elif a == b:
1458 return True
1458 return True
1459 elif a > b:
1459 elif a > b:
1460 return False
1460 return False
1461 return bool(self.reachableroots(a, [b], [a], includepath=False))
1461 return bool(self.reachableroots(a, [b], [a], includepath=False))
1462
1462
1463 def reachableroots(self, minroot, heads, roots, includepath=False):
1463 def reachableroots(self, minroot, heads, roots, includepath=False):
1464 """return (heads(::(<roots> and <roots>::<heads>)))
1464 """return (heads(::(<roots> and <roots>::<heads>)))
1465
1465
1466 If includepath is True, return (<roots>::<heads>)."""
1466 If includepath is True, return (<roots>::<heads>)."""
1467 try:
1467 try:
1468 return self.index.reachableroots2(
1468 return self.index.reachableroots2(
1469 minroot, heads, roots, includepath
1469 minroot, heads, roots, includepath
1470 )
1470 )
1471 except AttributeError:
1471 except AttributeError:
1472 return dagop._reachablerootspure(
1472 return dagop._reachablerootspure(
1473 self.parentrevs, minroot, roots, heads, includepath
1473 self.parentrevs, minroot, roots, heads, includepath
1474 )
1474 )
1475
1475
1476 def ancestor(self, a, b):
1476 def ancestor(self, a, b):
1477 """calculate the "best" common ancestor of nodes a and b"""
1477 """calculate the "best" common ancestor of nodes a and b"""
1478
1478
1479 a, b = self.rev(a), self.rev(b)
1479 a, b = self.rev(a), self.rev(b)
1480 try:
1480 try:
1481 ancs = self.index.ancestors(a, b)
1481 ancs = self.index.ancestors(a, b)
1482 except (AttributeError, OverflowError):
1482 except (AttributeError, OverflowError):
1483 ancs = ancestor.ancestors(self.parentrevs, a, b)
1483 ancs = ancestor.ancestors(self.parentrevs, a, b)
1484 if ancs:
1484 if ancs:
1485 # choose a consistent winner when there's a tie
1485 # choose a consistent winner when there's a tie
1486 return min(map(self.node, ancs))
1486 return min(map(self.node, ancs))
1487 return self.nullid
1487 return self.nullid
1488
1488
1489 def _match(self, id):
1489 def _match(self, id):
1490 if isinstance(id, int):
1490 if isinstance(id, int):
1491 # rev
1491 # rev
1492 return self.node(id)
1492 return self.node(id)
1493 if len(id) == self.nodeconstants.nodelen:
1493 if len(id) == self.nodeconstants.nodelen:
1494 # possibly a binary node
1494 # possibly a binary node
1495 # odds of a binary node being all hex in ASCII are 1 in 10**25
1495 # odds of a binary node being all hex in ASCII are 1 in 10**25
1496 try:
1496 try:
1497 node = id
1497 node = id
1498 self.rev(node) # quick search the index
1498 self.rev(node) # quick search the index
1499 return node
1499 return node
1500 except error.LookupError:
1500 except error.LookupError:
1501 pass # may be partial hex id
1501 pass # may be partial hex id
1502 try:
1502 try:
1503 # str(rev)
1503 # str(rev)
1504 rev = int(id)
1504 rev = int(id)
1505 if b"%d" % rev != id:
1505 if b"%d" % rev != id:
1506 raise ValueError
1506 raise ValueError
1507 if rev < 0:
1507 if rev < 0:
1508 rev = len(self) + rev
1508 rev = len(self) + rev
1509 if rev < 0 or rev >= len(self):
1509 if rev < 0 or rev >= len(self):
1510 raise ValueError
1510 raise ValueError
1511 return self.node(rev)
1511 return self.node(rev)
1512 except (ValueError, OverflowError):
1512 except (ValueError, OverflowError):
1513 pass
1513 pass
1514 if len(id) == 2 * self.nodeconstants.nodelen:
1514 if len(id) == 2 * self.nodeconstants.nodelen:
1515 try:
1515 try:
1516 # a full hex nodeid?
1516 # a full hex nodeid?
1517 node = bin(id)
1517 node = bin(id)
1518 self.rev(node)
1518 self.rev(node)
1519 return node
1519 return node
1520 except (TypeError, error.LookupError):
1520 except (TypeError, error.LookupError):
1521 pass
1521 pass
1522
1522
1523 def _partialmatch(self, id):
1523 def _partialmatch(self, id):
1524 # we don't care wdirfilenodeids as they should be always full hash
1524 # we don't care wdirfilenodeids as they should be always full hash
1525 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1525 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1526 try:
1526 try:
1527 partial = self.index.partialmatch(id)
1527 partial = self.index.partialmatch(id)
1528 if partial and self.hasnode(partial):
1528 if partial and self.hasnode(partial):
1529 if maybewdir:
1529 if maybewdir:
1530 # single 'ff...' match in radix tree, ambiguous with wdir
1530 # single 'ff...' match in radix tree, ambiguous with wdir
1531 raise error.RevlogError
1531 raise error.RevlogError
1532 return partial
1532 return partial
1533 if maybewdir:
1533 if maybewdir:
1534 # no 'ff...' match in radix tree, wdir identified
1534 # no 'ff...' match in radix tree, wdir identified
1535 raise error.WdirUnsupported
1535 raise error.WdirUnsupported
1536 return None
1536 return None
1537 except error.RevlogError:
1537 except error.RevlogError:
1538 # parsers.c radix tree lookup gave multiple matches
1538 # parsers.c radix tree lookup gave multiple matches
1539 # fast path: for unfiltered changelog, radix tree is accurate
1539 # fast path: for unfiltered changelog, radix tree is accurate
1540 if not getattr(self, 'filteredrevs', None):
1540 if not getattr(self, 'filteredrevs', None):
1541 raise error.AmbiguousPrefixLookupError(
1541 raise error.AmbiguousPrefixLookupError(
1542 id, self.display_id, _(b'ambiguous identifier')
1542 id, self.display_id, _(b'ambiguous identifier')
1543 )
1543 )
1544 # fall through to slow path that filters hidden revisions
1544 # fall through to slow path that filters hidden revisions
1545 except (AttributeError, ValueError):
1545 except (AttributeError, ValueError):
1546 # we are pure python, or key was too short to search radix tree
1546 # we are pure python, or key was too short to search radix tree
1547 pass
1547 pass
1548
1548
1549 if id in self._pcache:
1549 if id in self._pcache:
1550 return self._pcache[id]
1550 return self._pcache[id]
1551
1551
1552 if len(id) <= 40:
1552 if len(id) <= 40:
1553 try:
1553 try:
1554 # hex(node)[:...]
1554 # hex(node)[:...]
1555 l = len(id) // 2 # grab an even number of digits
1555 l = len(id) // 2 # grab an even number of digits
1556 prefix = bin(id[: l * 2])
1556 prefix = bin(id[: l * 2])
1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1558 nl = [
1558 nl = [
1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1560 ]
1560 ]
1561 if self.nodeconstants.nullhex.startswith(id):
1561 if self.nodeconstants.nullhex.startswith(id):
1562 nl.append(self.nullid)
1562 nl.append(self.nullid)
1563 if len(nl) > 0:
1563 if len(nl) > 0:
1564 if len(nl) == 1 and not maybewdir:
1564 if len(nl) == 1 and not maybewdir:
1565 self._pcache[id] = nl[0]
1565 self._pcache[id] = nl[0]
1566 return nl[0]
1566 return nl[0]
1567 raise error.AmbiguousPrefixLookupError(
1567 raise error.AmbiguousPrefixLookupError(
1568 id, self.display_id, _(b'ambiguous identifier')
1568 id, self.display_id, _(b'ambiguous identifier')
1569 )
1569 )
1570 if maybewdir:
1570 if maybewdir:
1571 raise error.WdirUnsupported
1571 raise error.WdirUnsupported
1572 return None
1572 return None
1573 except TypeError:
1573 except TypeError:
1574 pass
1574 pass
1575
1575
1576 def lookup(self, id):
1576 def lookup(self, id):
1577 """locate a node based on:
1577 """locate a node based on:
1578 - revision number or str(revision number)
1578 - revision number or str(revision number)
1579 - nodeid or subset of hex nodeid
1579 - nodeid or subset of hex nodeid
1580 """
1580 """
1581 n = self._match(id)
1581 n = self._match(id)
1582 if n is not None:
1582 if n is not None:
1583 return n
1583 return n
1584 n = self._partialmatch(id)
1584 n = self._partialmatch(id)
1585 if n:
1585 if n:
1586 return n
1586 return n
1587
1587
1588 raise error.LookupError(id, self.display_id, _(b'no match found'))
1588 raise error.LookupError(id, self.display_id, _(b'no match found'))
1589
1589
1590 def shortest(self, node, minlength=1):
1590 def shortest(self, node, minlength=1):
1591 """Find the shortest unambiguous prefix that matches node."""
1591 """Find the shortest unambiguous prefix that matches node."""
1592
1592
1593 def isvalid(prefix):
1593 def isvalid(prefix):
1594 try:
1594 try:
1595 matchednode = self._partialmatch(prefix)
1595 matchednode = self._partialmatch(prefix)
1596 except error.AmbiguousPrefixLookupError:
1596 except error.AmbiguousPrefixLookupError:
1597 return False
1597 return False
1598 except error.WdirUnsupported:
1598 except error.WdirUnsupported:
1599 # single 'ff...' match
1599 # single 'ff...' match
1600 return True
1600 return True
1601 if matchednode is None:
1601 if matchednode is None:
1602 raise error.LookupError(node, self.display_id, _(b'no node'))
1602 raise error.LookupError(node, self.display_id, _(b'no node'))
1603 return True
1603 return True
1604
1604
1605 def maybewdir(prefix):
1605 def maybewdir(prefix):
1606 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1606 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1607
1607
1608 hexnode = hex(node)
1608 hexnode = hex(node)
1609
1609
1610 def disambiguate(hexnode, minlength):
1610 def disambiguate(hexnode, minlength):
1611 """Disambiguate against wdirid."""
1611 """Disambiguate against wdirid."""
1612 for length in range(minlength, len(hexnode) + 1):
1612 for length in range(minlength, len(hexnode) + 1):
1613 prefix = hexnode[:length]
1613 prefix = hexnode[:length]
1614 if not maybewdir(prefix):
1614 if not maybewdir(prefix):
1615 return prefix
1615 return prefix
1616
1616
1617 if not getattr(self, 'filteredrevs', None):
1617 if not getattr(self, 'filteredrevs', None):
1618 try:
1618 try:
1619 length = max(self.index.shortest(node), minlength)
1619 length = max(self.index.shortest(node), minlength)
1620 return disambiguate(hexnode, length)
1620 return disambiguate(hexnode, length)
1621 except error.RevlogError:
1621 except error.RevlogError:
1622 if node != self.nodeconstants.wdirid:
1622 if node != self.nodeconstants.wdirid:
1623 raise error.LookupError(
1623 raise error.LookupError(
1624 node, self.display_id, _(b'no node')
1624 node, self.display_id, _(b'no node')
1625 )
1625 )
1626 except AttributeError:
1626 except AttributeError:
1627 # Fall through to pure code
1627 # Fall through to pure code
1628 pass
1628 pass
1629
1629
1630 if node == self.nodeconstants.wdirid:
1630 if node == self.nodeconstants.wdirid:
1631 for length in range(minlength, len(hexnode) + 1):
1631 for length in range(minlength, len(hexnode) + 1):
1632 prefix = hexnode[:length]
1632 prefix = hexnode[:length]
1633 if isvalid(prefix):
1633 if isvalid(prefix):
1634 return prefix
1634 return prefix
1635
1635
1636 for length in range(minlength, len(hexnode) + 1):
1636 for length in range(minlength, len(hexnode) + 1):
1637 prefix = hexnode[:length]
1637 prefix = hexnode[:length]
1638 if isvalid(prefix):
1638 if isvalid(prefix):
1639 return disambiguate(hexnode, length)
1639 return disambiguate(hexnode, length)
1640
1640
1641 def cmp(self, node, text):
1641 def cmp(self, node, text):
1642 """compare text with a given file revision
1642 """compare text with a given file revision
1643
1643
1644 returns True if text is different than what is stored.
1644 returns True if text is different than what is stored.
1645 """
1645 """
1646 p1, p2 = self.parents(node)
1646 p1, p2 = self.parents(node)
1647 return storageutil.hashrevisionsha1(text, p1, p2) != node
1647 return storageutil.hashrevisionsha1(text, p1, p2) != node
1648
1648
1649 def _cachesegment(self, offset, data):
1649 def _cachesegment(self, offset, data):
1650 """Add a segment to the revlog cache.
1650 """Add a segment to the revlog cache.
1651
1651
1652 Accepts an absolute offset and the data that is at that location.
1652 Accepts an absolute offset and the data that is at that location.
1653 """
1653 """
1654 o, d = self._chunkcache
1654 o, d = self._chunkcache
1655 # try to add to existing cache
1655 # try to add to existing cache
1656 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1656 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1657 self._chunkcache = o, d + data
1657 self._chunkcache = o, d + data
1658 else:
1658 else:
1659 self._chunkcache = offset, data
1659 self._chunkcache = offset, data
1660
1660
1661 def _readsegment(self, offset, length, df=None):
1661 def _readsegment(self, offset, length, df=None):
1662 """Load a segment of raw data from the revlog.
1662 """Load a segment of raw data from the revlog.
1663
1663
1664 Accepts an absolute offset, length to read, and an optional existing
1664 Accepts an absolute offset, length to read, and an optional existing
1665 file handle to read from.
1665 file handle to read from.
1666
1666
1667 If an existing file handle is passed, it will be seeked and the
1667 If an existing file handle is passed, it will be seeked and the
1668 original seek position will NOT be restored.
1668 original seek position will NOT be restored.
1669
1669
1670 Returns a str or buffer of raw byte data.
1670 Returns a str or buffer of raw byte data.
1671
1671
1672 Raises if the requested number of bytes could not be read.
1672 Raises if the requested number of bytes could not be read.
1673 """
1673 """
1674 # Cache data both forward and backward around the requested
1674 # Cache data both forward and backward around the requested
1675 # data, in a fixed size window. This helps speed up operations
1675 # data, in a fixed size window. This helps speed up operations
1676 # involving reading the revlog backwards.
1676 # involving reading the revlog backwards.
1677 cachesize = self._chunkcachesize
1677 cachesize = self._chunkcachesize
1678 realoffset = offset & ~(cachesize - 1)
1678 realoffset = offset & ~(cachesize - 1)
1679 reallength = (
1679 reallength = (
1680 (offset + length + cachesize) & ~(cachesize - 1)
1680 (offset + length + cachesize) & ~(cachesize - 1)
1681 ) - realoffset
1681 ) - realoffset
1682 with self._datareadfp(df) as df:
1682 with self._datareadfp(df) as df:
1683 df.seek(realoffset)
1683 df.seek(realoffset)
1684 d = df.read(reallength)
1684 d = df.read(reallength)
1685
1685
1686 self._cachesegment(realoffset, d)
1686 self._cachesegment(realoffset, d)
1687 if offset != realoffset or reallength != length:
1687 if offset != realoffset or reallength != length:
1688 startoffset = offset - realoffset
1688 startoffset = offset - realoffset
1689 if len(d) - startoffset < length:
1689 if len(d) - startoffset < length:
1690 raise error.RevlogError(
1690 raise error.RevlogError(
1691 _(
1691 _(
1692 b'partial read of revlog %s; expected %d bytes from '
1692 b'partial read of revlog %s; expected %d bytes from '
1693 b'offset %d, got %d'
1693 b'offset %d, got %d'
1694 )
1694 )
1695 % (
1695 % (
1696 self._indexfile if self._inline else self._datafile,
1696 self._indexfile if self._inline else self._datafile,
1697 length,
1697 length,
1698 offset,
1698 offset,
1699 len(d) - startoffset,
1699 len(d) - startoffset,
1700 )
1700 )
1701 )
1701 )
1702
1702
1703 return util.buffer(d, startoffset, length)
1703 return util.buffer(d, startoffset, length)
1704
1704
1705 if len(d) < length:
1705 if len(d) < length:
1706 raise error.RevlogError(
1706 raise error.RevlogError(
1707 _(
1707 _(
1708 b'partial read of revlog %s; expected %d bytes from offset '
1708 b'partial read of revlog %s; expected %d bytes from offset '
1709 b'%d, got %d'
1709 b'%d, got %d'
1710 )
1710 )
1711 % (
1711 % (
1712 self._indexfile if self._inline else self._datafile,
1712 self._indexfile if self._inline else self._datafile,
1713 length,
1713 length,
1714 offset,
1714 offset,
1715 len(d),
1715 len(d),
1716 )
1716 )
1717 )
1717 )
1718
1718
1719 return d
1719 return d
1720
1720
1721 def _getsegment(self, offset, length, df=None):
1721 def _getsegment(self, offset, length, df=None):
1722 """Obtain a segment of raw data from the revlog.
1722 """Obtain a segment of raw data from the revlog.
1723
1723
1724 Accepts an absolute offset, length of bytes to obtain, and an
1724 Accepts an absolute offset, length of bytes to obtain, and an
1725 optional file handle to the already-opened revlog. If the file
1725 optional file handle to the already-opened revlog. If the file
1726 handle is used, it's original seek position will not be preserved.
1726 handle is used, it's original seek position will not be preserved.
1727
1727
1728 Requests for data may be returned from a cache.
1728 Requests for data may be returned from a cache.
1729
1729
1730 Returns a str or a buffer instance of raw byte data.
1730 Returns a str or a buffer instance of raw byte data.
1731 """
1731 """
1732 o, d = self._chunkcache
1732 o, d = self._chunkcache
1733 l = len(d)
1733 l = len(d)
1734
1734
1735 # is it in the cache?
1735 # is it in the cache?
1736 cachestart = offset - o
1736 cachestart = offset - o
1737 cacheend = cachestart + length
1737 cacheend = cachestart + length
1738 if cachestart >= 0 and cacheend <= l:
1738 if cachestart >= 0 and cacheend <= l:
1739 if cachestart == 0 and cacheend == l:
1739 if cachestart == 0 and cacheend == l:
1740 return d # avoid a copy
1740 return d # avoid a copy
1741 return util.buffer(d, cachestart, cacheend - cachestart)
1741 return util.buffer(d, cachestart, cacheend - cachestart)
1742
1742
1743 return self._readsegment(offset, length, df=df)
1743 return self._readsegment(offset, length, df=df)
1744
1744
1745 def _getsegmentforrevs(self, startrev, endrev, df=None):
1745 def _getsegmentforrevs(self, startrev, endrev, df=None):
1746 """Obtain a segment of raw data corresponding to a range of revisions.
1746 """Obtain a segment of raw data corresponding to a range of revisions.
1747
1747
1748 Accepts the start and end revisions and an optional already-open
1748 Accepts the start and end revisions and an optional already-open
1749 file handle to be used for reading. If the file handle is read, its
1749 file handle to be used for reading. If the file handle is read, its
1750 seek position will not be preserved.
1750 seek position will not be preserved.
1751
1751
1752 Requests for data may be satisfied by a cache.
1752 Requests for data may be satisfied by a cache.
1753
1753
1754 Returns a 2-tuple of (offset, data) for the requested range of
1754 Returns a 2-tuple of (offset, data) for the requested range of
1755 revisions. Offset is the integer offset from the beginning of the
1755 revisions. Offset is the integer offset from the beginning of the
1756 revlog and data is a str or buffer of the raw byte data.
1756 revlog and data is a str or buffer of the raw byte data.
1757
1757
1758 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1758 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1759 to determine where each revision's data begins and ends.
1759 to determine where each revision's data begins and ends.
1760 """
1760 """
1761 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1761 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1762 # (functions are expensive).
1762 # (functions are expensive).
1763 index = self.index
1763 index = self.index
1764 istart = index[startrev]
1764 istart = index[startrev]
1765 start = int(istart[0] >> 16)
1765 start = int(istart[0] >> 16)
1766 if startrev == endrev:
1766 if startrev == endrev:
1767 end = start + istart[1]
1767 end = start + istart[1]
1768 else:
1768 else:
1769 iend = index[endrev]
1769 iend = index[endrev]
1770 end = int(iend[0] >> 16) + iend[1]
1770 end = int(iend[0] >> 16) + iend[1]
1771
1771
1772 if self._inline:
1772 if self._inline:
1773 start += (startrev + 1) * self.index.entry_size
1773 start += (startrev + 1) * self.index.entry_size
1774 end += (endrev + 1) * self.index.entry_size
1774 end += (endrev + 1) * self.index.entry_size
1775 length = end - start
1775 length = end - start
1776
1776
1777 return start, self._getsegment(start, length, df=df)
1777 return start, self._getsegment(start, length, df=df)
1778
1778
1779 def _chunk(self, rev, df=None):
1779 def _chunk(self, rev, df=None):
1780 """Obtain a single decompressed chunk for a revision.
1780 """Obtain a single decompressed chunk for a revision.
1781
1781
1782 Accepts an integer revision and an optional already-open file handle
1782 Accepts an integer revision and an optional already-open file handle
1783 to be used for reading. If used, the seek position of the file will not
1783 to be used for reading. If used, the seek position of the file will not
1784 be preserved.
1784 be preserved.
1785
1785
1786 Returns a str holding uncompressed data for the requested revision.
1786 Returns a str holding uncompressed data for the requested revision.
1787 """
1787 """
1788 compression_mode = self.index[rev][10]
1788 compression_mode = self.index[rev][10]
1789 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1789 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1790 if compression_mode == COMP_MODE_PLAIN:
1790 if compression_mode == COMP_MODE_PLAIN:
1791 return data
1791 return data
1792 elif compression_mode == COMP_MODE_DEFAULT:
1792 elif compression_mode == COMP_MODE_DEFAULT:
1793 return self._decompressor(data)
1793 return self._decompressor(data)
1794 elif compression_mode == COMP_MODE_INLINE:
1794 elif compression_mode == COMP_MODE_INLINE:
1795 return self.decompress(data)
1795 return self.decompress(data)
1796 else:
1796 else:
1797 msg = 'unknown compression mode %d'
1797 msg = 'unknown compression mode %d'
1798 msg %= compression_mode
1798 msg %= compression_mode
1799 raise error.RevlogError(msg)
1799 raise error.RevlogError(msg)
1800
1800
1801 def _chunks(self, revs, df=None, targetsize=None):
1801 def _chunks(self, revs, df=None, targetsize=None):
1802 """Obtain decompressed chunks for the specified revisions.
1802 """Obtain decompressed chunks for the specified revisions.
1803
1803
1804 Accepts an iterable of numeric revisions that are assumed to be in
1804 Accepts an iterable of numeric revisions that are assumed to be in
1805 ascending order. Also accepts an optional already-open file handle
1805 ascending order. Also accepts an optional already-open file handle
1806 to be used for reading. If used, the seek position of the file will
1806 to be used for reading. If used, the seek position of the file will
1807 not be preserved.
1807 not be preserved.
1808
1808
1809 This function is similar to calling ``self._chunk()`` multiple times,
1809 This function is similar to calling ``self._chunk()`` multiple times,
1810 but is faster.
1810 but is faster.
1811
1811
1812 Returns a list with decompressed data for each requested revision.
1812 Returns a list with decompressed data for each requested revision.
1813 """
1813 """
1814 if not revs:
1814 if not revs:
1815 return []
1815 return []
1816 start = self.start
1816 start = self.start
1817 length = self.length
1817 length = self.length
1818 inline = self._inline
1818 inline = self._inline
1819 iosize = self.index.entry_size
1819 iosize = self.index.entry_size
1820 buffer = util.buffer
1820 buffer = util.buffer
1821
1821
1822 l = []
1822 l = []
1823 ladd = l.append
1823 ladd = l.append
1824
1824
1825 if not self._withsparseread:
1825 if not self._withsparseread:
1826 slicedchunks = (revs,)
1826 slicedchunks = (revs,)
1827 else:
1827 else:
1828 slicedchunks = deltautil.slicechunk(
1828 slicedchunks = deltautil.slicechunk(
1829 self, revs, targetsize=targetsize
1829 self, revs, targetsize=targetsize
1830 )
1830 )
1831
1831
1832 for revschunk in slicedchunks:
1832 for revschunk in slicedchunks:
1833 firstrev = revschunk[0]
1833 firstrev = revschunk[0]
1834 # Skip trailing revisions with empty diff
1834 # Skip trailing revisions with empty diff
1835 for lastrev in revschunk[::-1]:
1835 for lastrev in revschunk[::-1]:
1836 if length(lastrev) != 0:
1836 if length(lastrev) != 0:
1837 break
1837 break
1838
1838
1839 try:
1839 try:
1840 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1840 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1841 except OverflowError:
1841 except OverflowError:
1842 # issue4215 - we can't cache a run of chunks greater than
1842 # issue4215 - we can't cache a run of chunks greater than
1843 # 2G on Windows
1843 # 2G on Windows
1844 return [self._chunk(rev, df=df) for rev in revschunk]
1844 return [self._chunk(rev, df=df) for rev in revschunk]
1845
1845
1846 decomp = self.decompress
1846 decomp = self.decompress
1847 # self._decompressor might be None, but will not be used in that case
1847 # self._decompressor might be None, but will not be used in that case
1848 def_decomp = self._decompressor
1848 def_decomp = self._decompressor
1849 for rev in revschunk:
1849 for rev in revschunk:
1850 chunkstart = start(rev)
1850 chunkstart = start(rev)
1851 if inline:
1851 if inline:
1852 chunkstart += (rev + 1) * iosize
1852 chunkstart += (rev + 1) * iosize
1853 chunklength = length(rev)
1853 chunklength = length(rev)
1854 comp_mode = self.index[rev][10]
1854 comp_mode = self.index[rev][10]
1855 c = buffer(data, chunkstart - offset, chunklength)
1855 c = buffer(data, chunkstart - offset, chunklength)
1856 if comp_mode == COMP_MODE_PLAIN:
1856 if comp_mode == COMP_MODE_PLAIN:
1857 ladd(c)
1857 ladd(c)
1858 elif comp_mode == COMP_MODE_INLINE:
1858 elif comp_mode == COMP_MODE_INLINE:
1859 ladd(decomp(c))
1859 ladd(decomp(c))
1860 elif comp_mode == COMP_MODE_DEFAULT:
1860 elif comp_mode == COMP_MODE_DEFAULT:
1861 ladd(def_decomp(c))
1861 ladd(def_decomp(c))
1862 else:
1862 else:
1863 msg = 'unknown compression mode %d'
1863 msg = 'unknown compression mode %d'
1864 msg %= comp_mode
1864 msg %= comp_mode
1865 raise error.RevlogError(msg)
1865 raise error.RevlogError(msg)
1866
1866
1867 return l
1867 return l
1868
1868
1869 def _chunkclear(self):
1869 def _chunkclear(self):
1870 """Clear the raw chunk cache."""
1870 """Clear the raw chunk cache."""
1871 self._chunkcache = (0, b'')
1871 self._chunkcache = (0, b'')
1872
1872
1873 def deltaparent(self, rev):
1873 def deltaparent(self, rev):
1874 """return deltaparent of the given revision"""
1874 """return deltaparent of the given revision"""
1875 base = self.index[rev][3]
1875 base = self.index[rev][3]
1876 if base == rev:
1876 if base == rev:
1877 return nullrev
1877 return nullrev
1878 elif self._generaldelta:
1878 elif self._generaldelta:
1879 return base
1879 return base
1880 else:
1880 else:
1881 return rev - 1
1881 return rev - 1
1882
1882
1883 def issnapshot(self, rev):
1883 def issnapshot(self, rev):
1884 """tells whether rev is a snapshot"""
1884 """tells whether rev is a snapshot"""
1885 if not self._sparserevlog:
1885 if not self._sparserevlog:
1886 return self.deltaparent(rev) == nullrev
1886 return self.deltaparent(rev) == nullrev
1887 elif util.safehasattr(self.index, b'issnapshot'):
1887 elif util.safehasattr(self.index, b'issnapshot'):
1888 # directly assign the method to cache the testing and access
1888 # directly assign the method to cache the testing and access
1889 self.issnapshot = self.index.issnapshot
1889 self.issnapshot = self.index.issnapshot
1890 return self.issnapshot(rev)
1890 return self.issnapshot(rev)
1891 if rev == nullrev:
1891 if rev == nullrev:
1892 return True
1892 return True
1893 entry = self.index[rev]
1893 entry = self.index[rev]
1894 base = entry[3]
1894 base = entry[3]
1895 if base == rev:
1895 if base == rev:
1896 return True
1896 return True
1897 if base == nullrev:
1897 if base == nullrev:
1898 return True
1898 return True
1899 p1 = entry[5]
1899 p1 = entry[5]
1900 p2 = entry[6]
1900 p2 = entry[6]
1901 if base == p1 or base == p2:
1901 if base == p1 or base == p2:
1902 return False
1902 return False
1903 return self.issnapshot(base)
1903 return self.issnapshot(base)
1904
1904
1905 def snapshotdepth(self, rev):
1905 def snapshotdepth(self, rev):
1906 """number of snapshot in the chain before this one"""
1906 """number of snapshot in the chain before this one"""
1907 if not self.issnapshot(rev):
1907 if not self.issnapshot(rev):
1908 raise error.ProgrammingError(b'revision %d not a snapshot')
1908 raise error.ProgrammingError(b'revision %d not a snapshot')
1909 return len(self._deltachain(rev)[0]) - 1
1909 return len(self._deltachain(rev)[0]) - 1
1910
1910
1911 def revdiff(self, rev1, rev2):
1911 def revdiff(self, rev1, rev2):
1912 """return or calculate a delta between two revisions
1912 """return or calculate a delta between two revisions
1913
1913
1914 The delta calculated is in binary form and is intended to be written to
1914 The delta calculated is in binary form and is intended to be written to
1915 revlog data directly. So this function needs raw revision data.
1915 revlog data directly. So this function needs raw revision data.
1916 """
1916 """
1917 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1917 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1918 return bytes(self._chunk(rev2))
1918 return bytes(self._chunk(rev2))
1919
1919
1920 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1920 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1921
1921
1922 def _processflags(self, text, flags, operation, raw=False):
1922 def _processflags(self, text, flags, operation, raw=False):
1923 """deprecated entry point to access flag processors"""
1923 """deprecated entry point to access flag processors"""
1924 msg = b'_processflag(...) use the specialized variant'
1924 msg = b'_processflag(...) use the specialized variant'
1925 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1925 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1926 if raw:
1926 if raw:
1927 return text, flagutil.processflagsraw(self, text, flags)
1927 return text, flagutil.processflagsraw(self, text, flags)
1928 elif operation == b'read':
1928 elif operation == b'read':
1929 return flagutil.processflagsread(self, text, flags)
1929 return flagutil.processflagsread(self, text, flags)
1930 else: # write operation
1930 else: # write operation
1931 return flagutil.processflagswrite(self, text, flags)
1931 return flagutil.processflagswrite(self, text, flags)
1932
1932
1933 def revision(self, nodeorrev, _df=None, raw=False):
1933 def revision(self, nodeorrev, _df=None, raw=False):
1934 """return an uncompressed revision of a given node or revision
1934 """return an uncompressed revision of a given node or revision
1935 number.
1935 number.
1936
1936
1937 _df - an existing file handle to read from. (internal-only)
1937 _df - an existing file handle to read from. (internal-only)
1938 raw - an optional argument specifying if the revision data is to be
1938 raw - an optional argument specifying if the revision data is to be
1939 treated as raw data when applying flag transforms. 'raw' should be set
1939 treated as raw data when applying flag transforms. 'raw' should be set
1940 to True when generating changegroups or in debug commands.
1940 to True when generating changegroups or in debug commands.
1941 """
1941 """
1942 if raw:
1942 if raw:
1943 msg = (
1943 msg = (
1944 b'revlog.revision(..., raw=True) is deprecated, '
1944 b'revlog.revision(..., raw=True) is deprecated, '
1945 b'use revlog.rawdata(...)'
1945 b'use revlog.rawdata(...)'
1946 )
1946 )
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1948 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1948 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1949
1949
1950 def sidedata(self, nodeorrev, _df=None):
1950 def sidedata(self, nodeorrev, _df=None):
1951 """a map of extra data related to the changeset but not part of the hash
1951 """a map of extra data related to the changeset but not part of the hash
1952
1952
1953 This function currently return a dictionary. However, more advanced
1953 This function currently return a dictionary. However, more advanced
1954 mapping object will likely be used in the future for a more
1954 mapping object will likely be used in the future for a more
1955 efficient/lazy code.
1955 efficient/lazy code.
1956 """
1956 """
1957 return self._revisiondata(nodeorrev, _df)[1]
1957 return self._revisiondata(nodeorrev, _df)[1]
1958
1958
1959 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1959 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1960 # deal with <nodeorrev> argument type
1960 # deal with <nodeorrev> argument type
1961 if isinstance(nodeorrev, int):
1961 if isinstance(nodeorrev, int):
1962 rev = nodeorrev
1962 rev = nodeorrev
1963 node = self.node(rev)
1963 node = self.node(rev)
1964 else:
1964 else:
1965 node = nodeorrev
1965 node = nodeorrev
1966 rev = None
1966 rev = None
1967
1967
1968 # fast path the special `nullid` rev
1968 # fast path the special `nullid` rev
1969 if node == self.nullid:
1969 if node == self.nullid:
1970 return b"", {}
1970 return b"", {}
1971
1971
1972 # ``rawtext`` is the text as stored inside the revlog. Might be the
1972 # ``rawtext`` is the text as stored inside the revlog. Might be the
1973 # revision or might need to be processed to retrieve the revision.
1973 # revision or might need to be processed to retrieve the revision.
1974 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1974 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1975
1975
1976 if self.hassidedata:
1976 if self.hassidedata:
1977 if rev is None:
1977 if rev is None:
1978 rev = self.rev(node)
1978 rev = self.rev(node)
1979 sidedata = self._sidedata(rev)
1979 sidedata = self._sidedata(rev)
1980 else:
1980 else:
1981 sidedata = {}
1981 sidedata = {}
1982
1982
1983 if raw and validated:
1983 if raw and validated:
1984 # if we don't want to process the raw text and that raw
1984 # if we don't want to process the raw text and that raw
1985 # text is cached, we can exit early.
1985 # text is cached, we can exit early.
1986 return rawtext, sidedata
1986 return rawtext, sidedata
1987 if rev is None:
1987 if rev is None:
1988 rev = self.rev(node)
1988 rev = self.rev(node)
1989 # the revlog's flag for this revision
1989 # the revlog's flag for this revision
1990 # (usually alter its state or content)
1990 # (usually alter its state or content)
1991 flags = self.flags(rev)
1991 flags = self.flags(rev)
1992
1992
1993 if validated and flags == REVIDX_DEFAULT_FLAGS:
1993 if validated and flags == REVIDX_DEFAULT_FLAGS:
1994 # no extra flags set, no flag processor runs, text = rawtext
1994 # no extra flags set, no flag processor runs, text = rawtext
1995 return rawtext, sidedata
1995 return rawtext, sidedata
1996
1996
1997 if raw:
1997 if raw:
1998 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1998 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1999 text = rawtext
1999 text = rawtext
2000 else:
2000 else:
2001 r = flagutil.processflagsread(self, rawtext, flags)
2001 r = flagutil.processflagsread(self, rawtext, flags)
2002 text, validatehash = r
2002 text, validatehash = r
2003 if validatehash:
2003 if validatehash:
2004 self.checkhash(text, node, rev=rev)
2004 self.checkhash(text, node, rev=rev)
2005 if not validated:
2005 if not validated:
2006 self._revisioncache = (node, rev, rawtext)
2006 self._revisioncache = (node, rev, rawtext)
2007
2007
2008 return text, sidedata
2008 return text, sidedata
2009
2009
2010 def _rawtext(self, node, rev, _df=None):
2010 def _rawtext(self, node, rev, _df=None):
2011 """return the possibly unvalidated rawtext for a revision
2011 """return the possibly unvalidated rawtext for a revision
2012
2012
2013 returns (rev, rawtext, validated)
2013 returns (rev, rawtext, validated)
2014 """
2014 """
2015
2015
2016 # revision in the cache (could be useful to apply delta)
2016 # revision in the cache (could be useful to apply delta)
2017 cachedrev = None
2017 cachedrev = None
2018 # An intermediate text to apply deltas to
2018 # An intermediate text to apply deltas to
2019 basetext = None
2019 basetext = None
2020
2020
2021 # Check if we have the entry in cache
2021 # Check if we have the entry in cache
2022 # The cache entry looks like (node, rev, rawtext)
2022 # The cache entry looks like (node, rev, rawtext)
2023 if self._revisioncache:
2023 if self._revisioncache:
2024 if self._revisioncache[0] == node:
2024 if self._revisioncache[0] == node:
2025 return (rev, self._revisioncache[2], True)
2025 return (rev, self._revisioncache[2], True)
2026 cachedrev = self._revisioncache[1]
2026 cachedrev = self._revisioncache[1]
2027
2027
2028 if rev is None:
2028 if rev is None:
2029 rev = self.rev(node)
2029 rev = self.rev(node)
2030
2030
2031 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2031 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2032 if stopped:
2032 if stopped:
2033 basetext = self._revisioncache[2]
2033 basetext = self._revisioncache[2]
2034
2034
2035 # drop cache to save memory, the caller is expected to
2035 # drop cache to save memory, the caller is expected to
2036 # update self._revisioncache after validating the text
2036 # update self._revisioncache after validating the text
2037 self._revisioncache = None
2037 self._revisioncache = None
2038
2038
2039 targetsize = None
2039 targetsize = None
2040 rawsize = self.index[rev][2]
2040 rawsize = self.index[rev][2]
2041 if 0 <= rawsize:
2041 if 0 <= rawsize:
2042 targetsize = 4 * rawsize
2042 targetsize = 4 * rawsize
2043
2043
2044 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2044 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2045 if basetext is None:
2045 if basetext is None:
2046 basetext = bytes(bins[0])
2046 basetext = bytes(bins[0])
2047 bins = bins[1:]
2047 bins = bins[1:]
2048
2048
2049 rawtext = mdiff.patches(basetext, bins)
2049 rawtext = mdiff.patches(basetext, bins)
2050 del basetext # let us have a chance to free memory early
2050 del basetext # let us have a chance to free memory early
2051 return (rev, rawtext, False)
2051 return (rev, rawtext, False)
2052
2052
2053 def _sidedata(self, rev):
2053 def _sidedata(self, rev):
2054 """Return the sidedata for a given revision number."""
2054 """Return the sidedata for a given revision number."""
2055 index_entry = self.index[rev]
2055 index_entry = self.index[rev]
2056 sidedata_offset = index_entry[8]
2056 sidedata_offset = index_entry[8]
2057 sidedata_size = index_entry[9]
2057 sidedata_size = index_entry[9]
2058
2058
2059 if self._inline:
2059 if self._inline:
2060 sidedata_offset += self.index.entry_size * (1 + rev)
2060 sidedata_offset += self.index.entry_size * (1 + rev)
2061 if sidedata_size == 0:
2061 if sidedata_size == 0:
2062 return {}
2062 return {}
2063
2063
2064 segment = self._getsegment(sidedata_offset, sidedata_size)
2064 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2065 comp = self.index[rev][11]
2066 if comp == COMP_MODE_PLAIN:
2067 segment = comp_segment
2068 elif comp == COMP_MODE_DEFAULT:
2069 segment = self._decompressor(comp_segment)
2070 elif comp == COMP_MODE_INLINE:
2071 segment = self.decompress(comp_segment)
2072 else:
2073 msg = 'unknown compression mode %d'
2074 msg %= comp
2075 raise error.RevlogError(msg)
2076
2065 sidedata = sidedatautil.deserialize_sidedata(segment)
2077 sidedata = sidedatautil.deserialize_sidedata(segment)
2066 return sidedata
2078 return sidedata
2067
2079
2068 def rawdata(self, nodeorrev, _df=None):
2080 def rawdata(self, nodeorrev, _df=None):
2069 """return an uncompressed raw data of a given node or revision number.
2081 """return an uncompressed raw data of a given node or revision number.
2070
2082
2071 _df - an existing file handle to read from. (internal-only)
2083 _df - an existing file handle to read from. (internal-only)
2072 """
2084 """
2073 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2085 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2074
2086
2075 def hash(self, text, p1, p2):
2087 def hash(self, text, p1, p2):
2076 """Compute a node hash.
2088 """Compute a node hash.
2077
2089
2078 Available as a function so that subclasses can replace the hash
2090 Available as a function so that subclasses can replace the hash
2079 as needed.
2091 as needed.
2080 """
2092 """
2081 return storageutil.hashrevisionsha1(text, p1, p2)
2093 return storageutil.hashrevisionsha1(text, p1, p2)
2082
2094
2083 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2095 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2084 """Check node hash integrity.
2096 """Check node hash integrity.
2085
2097
2086 Available as a function so that subclasses can extend hash mismatch
2098 Available as a function so that subclasses can extend hash mismatch
2087 behaviors as needed.
2099 behaviors as needed.
2088 """
2100 """
2089 try:
2101 try:
2090 if p1 is None and p2 is None:
2102 if p1 is None and p2 is None:
2091 p1, p2 = self.parents(node)
2103 p1, p2 = self.parents(node)
2092 if node != self.hash(text, p1, p2):
2104 if node != self.hash(text, p1, p2):
2093 # Clear the revision cache on hash failure. The revision cache
2105 # Clear the revision cache on hash failure. The revision cache
2094 # only stores the raw revision and clearing the cache does have
2106 # only stores the raw revision and clearing the cache does have
2095 # the side-effect that we won't have a cache hit when the raw
2107 # the side-effect that we won't have a cache hit when the raw
2096 # revision data is accessed. But this case should be rare and
2108 # revision data is accessed. But this case should be rare and
2097 # it is extra work to teach the cache about the hash
2109 # it is extra work to teach the cache about the hash
2098 # verification state.
2110 # verification state.
2099 if self._revisioncache and self._revisioncache[0] == node:
2111 if self._revisioncache and self._revisioncache[0] == node:
2100 self._revisioncache = None
2112 self._revisioncache = None
2101
2113
2102 revornode = rev
2114 revornode = rev
2103 if revornode is None:
2115 if revornode is None:
2104 revornode = templatefilters.short(hex(node))
2116 revornode = templatefilters.short(hex(node))
2105 raise error.RevlogError(
2117 raise error.RevlogError(
2106 _(b"integrity check failed on %s:%s")
2118 _(b"integrity check failed on %s:%s")
2107 % (self.display_id, pycompat.bytestr(revornode))
2119 % (self.display_id, pycompat.bytestr(revornode))
2108 )
2120 )
2109 except error.RevlogError:
2121 except error.RevlogError:
2110 if self._censorable and storageutil.iscensoredtext(text):
2122 if self._censorable and storageutil.iscensoredtext(text):
2111 raise error.CensoredNodeError(self.display_id, node, text)
2123 raise error.CensoredNodeError(self.display_id, node, text)
2112 raise
2124 raise
2113
2125
2114 def _enforceinlinesize(self, tr):
2126 def _enforceinlinesize(self, tr):
2115 """Check if the revlog is too big for inline and convert if so.
2127 """Check if the revlog is too big for inline and convert if so.
2116
2128
2117 This should be called after revisions are added to the revlog. If the
2129 This should be called after revisions are added to the revlog. If the
2118 revlog has grown too large to be an inline revlog, it will convert it
2130 revlog has grown too large to be an inline revlog, it will convert it
2119 to use multiple index and data files.
2131 to use multiple index and data files.
2120 """
2132 """
2121 tiprev = len(self) - 1
2133 tiprev = len(self) - 1
2122 total_size = self.start(tiprev) + self.length(tiprev)
2134 total_size = self.start(tiprev) + self.length(tiprev)
2123 if not self._inline or total_size < _maxinline:
2135 if not self._inline or total_size < _maxinline:
2124 return
2136 return
2125
2137
2126 troffset = tr.findoffset(self._indexfile)
2138 troffset = tr.findoffset(self._indexfile)
2127 if troffset is None:
2139 if troffset is None:
2128 raise error.RevlogError(
2140 raise error.RevlogError(
2129 _(b"%s not found in the transaction") % self._indexfile
2141 _(b"%s not found in the transaction") % self._indexfile
2130 )
2142 )
2131 trindex = 0
2143 trindex = 0
2132 tr.add(self._datafile, 0)
2144 tr.add(self._datafile, 0)
2133
2145
2134 existing_handles = False
2146 existing_handles = False
2135 if self._writinghandles is not None:
2147 if self._writinghandles is not None:
2136 existing_handles = True
2148 existing_handles = True
2137 fp = self._writinghandles[0]
2149 fp = self._writinghandles[0]
2138 fp.flush()
2150 fp.flush()
2139 fp.close()
2151 fp.close()
2140 # We can't use the cached file handle after close(). So prevent
2152 # We can't use the cached file handle after close(). So prevent
2141 # its usage.
2153 # its usage.
2142 self._writinghandles = None
2154 self._writinghandles = None
2143
2155
2144 new_dfh = self._datafp(b'w+')
2156 new_dfh = self._datafp(b'w+')
2145 new_dfh.truncate(0) # drop any potentially existing data
2157 new_dfh.truncate(0) # drop any potentially existing data
2146 try:
2158 try:
2147 with self._indexfp() as read_ifh:
2159 with self._indexfp() as read_ifh:
2148 for r in self:
2160 for r in self:
2149 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2161 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2150 if troffset <= self.start(r):
2162 if troffset <= self.start(r):
2151 trindex = r
2163 trindex = r
2152 new_dfh.flush()
2164 new_dfh.flush()
2153
2165
2154 with self.__index_new_fp() as fp:
2166 with self.__index_new_fp() as fp:
2155 self._format_flags &= ~FLAG_INLINE_DATA
2167 self._format_flags &= ~FLAG_INLINE_DATA
2156 self._inline = False
2168 self._inline = False
2157 for i in self:
2169 for i in self:
2158 e = self.index.entry_binary(i)
2170 e = self.index.entry_binary(i)
2159 if i == 0 and self._docket is None:
2171 if i == 0 and self._docket is None:
2160 header = self._format_flags | self._format_version
2172 header = self._format_flags | self._format_version
2161 header = self.index.pack_header(header)
2173 header = self.index.pack_header(header)
2162 e = header + e
2174 e = header + e
2163 fp.write(e)
2175 fp.write(e)
2164 if self._docket is not None:
2176 if self._docket is not None:
2165 self._docket.index_end = fp.tell()
2177 self._docket.index_end = fp.tell()
2166 # the temp file replace the real index when we exit the context
2178 # the temp file replace the real index when we exit the context
2167 # manager
2179 # manager
2168
2180
2169 tr.replace(self._indexfile, trindex * self.index.entry_size)
2181 tr.replace(self._indexfile, trindex * self.index.entry_size)
2170 nodemaputil.setup_persistent_nodemap(tr, self)
2182 nodemaputil.setup_persistent_nodemap(tr, self)
2171 self._chunkclear()
2183 self._chunkclear()
2172
2184
2173 if existing_handles:
2185 if existing_handles:
2174 # switched from inline to conventional reopen the index
2186 # switched from inline to conventional reopen the index
2175 ifh = self.__index_write_fp()
2187 ifh = self.__index_write_fp()
2176 self._writinghandles = (ifh, new_dfh)
2188 self._writinghandles = (ifh, new_dfh)
2177 new_dfh = None
2189 new_dfh = None
2178 finally:
2190 finally:
2179 if new_dfh is not None:
2191 if new_dfh is not None:
2180 new_dfh.close()
2192 new_dfh.close()
2181
2193
2182 def _nodeduplicatecallback(self, transaction, node):
2194 def _nodeduplicatecallback(self, transaction, node):
2183 """called when trying to add a node already stored."""
2195 """called when trying to add a node already stored."""
2184
2196
2185 @contextlib.contextmanager
2197 @contextlib.contextmanager
2186 def _writing(self, transaction):
2198 def _writing(self, transaction):
2187 if self._trypending:
2199 if self._trypending:
2188 msg = b'try to write in a `trypending` revlog: %s'
2200 msg = b'try to write in a `trypending` revlog: %s'
2189 msg %= self.display_id
2201 msg %= self.display_id
2190 raise error.ProgrammingError(msg)
2202 raise error.ProgrammingError(msg)
2191 if self._writinghandles is not None:
2203 if self._writinghandles is not None:
2192 yield
2204 yield
2193 else:
2205 else:
2194 r = len(self)
2206 r = len(self)
2195 dsize = 0
2207 dsize = 0
2196 if r:
2208 if r:
2197 dsize = self.end(r - 1)
2209 dsize = self.end(r - 1)
2198 dfh = None
2210 dfh = None
2199 if not self._inline:
2211 if not self._inline:
2200 try:
2212 try:
2201 dfh = self._datafp(b"r+")
2213 dfh = self._datafp(b"r+")
2202 if self._docket is None:
2214 if self._docket is None:
2203 dfh.seek(0, os.SEEK_END)
2215 dfh.seek(0, os.SEEK_END)
2204 else:
2216 else:
2205 dfh.seek(self._docket.data_end, os.SEEK_SET)
2217 dfh.seek(self._docket.data_end, os.SEEK_SET)
2206 except IOError as inst:
2218 except IOError as inst:
2207 if inst.errno != errno.ENOENT:
2219 if inst.errno != errno.ENOENT:
2208 raise
2220 raise
2209 dfh = self._datafp(b"w+")
2221 dfh = self._datafp(b"w+")
2210 transaction.add(self._datafile, dsize)
2222 transaction.add(self._datafile, dsize)
2211 try:
2223 try:
2212 isize = r * self.index.entry_size
2224 isize = r * self.index.entry_size
2213 ifh = self.__index_write_fp()
2225 ifh = self.__index_write_fp()
2214 if self._inline:
2226 if self._inline:
2215 transaction.add(self._indexfile, dsize + isize)
2227 transaction.add(self._indexfile, dsize + isize)
2216 else:
2228 else:
2217 transaction.add(self._indexfile, isize)
2229 transaction.add(self._indexfile, isize)
2218 try:
2230 try:
2219 self._writinghandles = (ifh, dfh)
2231 self._writinghandles = (ifh, dfh)
2220 try:
2232 try:
2221 yield
2233 yield
2222 if self._docket is not None:
2234 if self._docket is not None:
2223 self._write_docket(transaction)
2235 self._write_docket(transaction)
2224 finally:
2236 finally:
2225 self._writinghandles = None
2237 self._writinghandles = None
2226 finally:
2238 finally:
2227 ifh.close()
2239 ifh.close()
2228 finally:
2240 finally:
2229 if dfh is not None:
2241 if dfh is not None:
2230 dfh.close()
2242 dfh.close()
2231
2243
2232 def _write_docket(self, transaction):
2244 def _write_docket(self, transaction):
2233 """write the current docket on disk
2245 """write the current docket on disk
2234
2246
2235 Exist as a method to help changelog to implement transaction logic
2247 Exist as a method to help changelog to implement transaction logic
2236
2248
2237 We could also imagine using the same transaction logic for all revlog
2249 We could also imagine using the same transaction logic for all revlog
2238 since docket are cheap."""
2250 since docket are cheap."""
2239 self._docket.write(transaction)
2251 self._docket.write(transaction)
2240
2252
2241 def addrevision(
2253 def addrevision(
2242 self,
2254 self,
2243 text,
2255 text,
2244 transaction,
2256 transaction,
2245 link,
2257 link,
2246 p1,
2258 p1,
2247 p2,
2259 p2,
2248 cachedelta=None,
2260 cachedelta=None,
2249 node=None,
2261 node=None,
2250 flags=REVIDX_DEFAULT_FLAGS,
2262 flags=REVIDX_DEFAULT_FLAGS,
2251 deltacomputer=None,
2263 deltacomputer=None,
2252 sidedata=None,
2264 sidedata=None,
2253 ):
2265 ):
2254 """add a revision to the log
2266 """add a revision to the log
2255
2267
2256 text - the revision data to add
2268 text - the revision data to add
2257 transaction - the transaction object used for rollback
2269 transaction - the transaction object used for rollback
2258 link - the linkrev data to add
2270 link - the linkrev data to add
2259 p1, p2 - the parent nodeids of the revision
2271 p1, p2 - the parent nodeids of the revision
2260 cachedelta - an optional precomputed delta
2272 cachedelta - an optional precomputed delta
2261 node - nodeid of revision; typically node is not specified, and it is
2273 node - nodeid of revision; typically node is not specified, and it is
2262 computed by default as hash(text, p1, p2), however subclasses might
2274 computed by default as hash(text, p1, p2), however subclasses might
2263 use different hashing method (and override checkhash() in such case)
2275 use different hashing method (and override checkhash() in such case)
2264 flags - the known flags to set on the revision
2276 flags - the known flags to set on the revision
2265 deltacomputer - an optional deltacomputer instance shared between
2277 deltacomputer - an optional deltacomputer instance shared between
2266 multiple calls
2278 multiple calls
2267 """
2279 """
2268 if link == nullrev:
2280 if link == nullrev:
2269 raise error.RevlogError(
2281 raise error.RevlogError(
2270 _(b"attempted to add linkrev -1 to %s") % self.display_id
2282 _(b"attempted to add linkrev -1 to %s") % self.display_id
2271 )
2283 )
2272
2284
2273 if sidedata is None:
2285 if sidedata is None:
2274 sidedata = {}
2286 sidedata = {}
2275 elif sidedata and not self.hassidedata:
2287 elif sidedata and not self.hassidedata:
2276 raise error.ProgrammingError(
2288 raise error.ProgrammingError(
2277 _(b"trying to add sidedata to a revlog who don't support them")
2289 _(b"trying to add sidedata to a revlog who don't support them")
2278 )
2290 )
2279
2291
2280 if flags:
2292 if flags:
2281 node = node or self.hash(text, p1, p2)
2293 node = node or self.hash(text, p1, p2)
2282
2294
2283 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2295 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2284
2296
2285 # If the flag processor modifies the revision data, ignore any provided
2297 # If the flag processor modifies the revision data, ignore any provided
2286 # cachedelta.
2298 # cachedelta.
2287 if rawtext != text:
2299 if rawtext != text:
2288 cachedelta = None
2300 cachedelta = None
2289
2301
2290 if len(rawtext) > _maxentrysize:
2302 if len(rawtext) > _maxentrysize:
2291 raise error.RevlogError(
2303 raise error.RevlogError(
2292 _(
2304 _(
2293 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2305 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2294 )
2306 )
2295 % (self.display_id, len(rawtext))
2307 % (self.display_id, len(rawtext))
2296 )
2308 )
2297
2309
2298 node = node or self.hash(rawtext, p1, p2)
2310 node = node or self.hash(rawtext, p1, p2)
2299 rev = self.index.get_rev(node)
2311 rev = self.index.get_rev(node)
2300 if rev is not None:
2312 if rev is not None:
2301 return rev
2313 return rev
2302
2314
2303 if validatehash:
2315 if validatehash:
2304 self.checkhash(rawtext, node, p1=p1, p2=p2)
2316 self.checkhash(rawtext, node, p1=p1, p2=p2)
2305
2317
2306 return self.addrawrevision(
2318 return self.addrawrevision(
2307 rawtext,
2319 rawtext,
2308 transaction,
2320 transaction,
2309 link,
2321 link,
2310 p1,
2322 p1,
2311 p2,
2323 p2,
2312 node,
2324 node,
2313 flags,
2325 flags,
2314 cachedelta=cachedelta,
2326 cachedelta=cachedelta,
2315 deltacomputer=deltacomputer,
2327 deltacomputer=deltacomputer,
2316 sidedata=sidedata,
2328 sidedata=sidedata,
2317 )
2329 )
2318
2330
2319 def addrawrevision(
2331 def addrawrevision(
2320 self,
2332 self,
2321 rawtext,
2333 rawtext,
2322 transaction,
2334 transaction,
2323 link,
2335 link,
2324 p1,
2336 p1,
2325 p2,
2337 p2,
2326 node,
2338 node,
2327 flags,
2339 flags,
2328 cachedelta=None,
2340 cachedelta=None,
2329 deltacomputer=None,
2341 deltacomputer=None,
2330 sidedata=None,
2342 sidedata=None,
2331 ):
2343 ):
2332 """add a raw revision with known flags, node and parents
2344 """add a raw revision with known flags, node and parents
2333 useful when reusing a revision not stored in this revlog (ex: received
2345 useful when reusing a revision not stored in this revlog (ex: received
2334 over wire, or read from an external bundle).
2346 over wire, or read from an external bundle).
2335 """
2347 """
2336 with self._writing(transaction):
2348 with self._writing(transaction):
2337 return self._addrevision(
2349 return self._addrevision(
2338 node,
2350 node,
2339 rawtext,
2351 rawtext,
2340 transaction,
2352 transaction,
2341 link,
2353 link,
2342 p1,
2354 p1,
2343 p2,
2355 p2,
2344 flags,
2356 flags,
2345 cachedelta,
2357 cachedelta,
2346 deltacomputer=deltacomputer,
2358 deltacomputer=deltacomputer,
2347 sidedata=sidedata,
2359 sidedata=sidedata,
2348 )
2360 )
2349
2361
2350 def compress(self, data):
2362 def compress(self, data):
2351 """Generate a possibly-compressed representation of data."""
2363 """Generate a possibly-compressed representation of data."""
2352 if not data:
2364 if not data:
2353 return b'', data
2365 return b'', data
2354
2366
2355 compressed = self._compressor.compress(data)
2367 compressed = self._compressor.compress(data)
2356
2368
2357 if compressed:
2369 if compressed:
2358 # The revlog compressor added the header in the returned data.
2370 # The revlog compressor added the header in the returned data.
2359 return b'', compressed
2371 return b'', compressed
2360
2372
2361 if data[0:1] == b'\0':
2373 if data[0:1] == b'\0':
2362 return b'', data
2374 return b'', data
2363 return b'u', data
2375 return b'u', data
2364
2376
2365 def decompress(self, data):
2377 def decompress(self, data):
2366 """Decompress a revlog chunk.
2378 """Decompress a revlog chunk.
2367
2379
2368 The chunk is expected to begin with a header identifying the
2380 The chunk is expected to begin with a header identifying the
2369 format type so it can be routed to an appropriate decompressor.
2381 format type so it can be routed to an appropriate decompressor.
2370 """
2382 """
2371 if not data:
2383 if not data:
2372 return data
2384 return data
2373
2385
2374 # Revlogs are read much more frequently than they are written and many
2386 # Revlogs are read much more frequently than they are written and many
2375 # chunks only take microseconds to decompress, so performance is
2387 # chunks only take microseconds to decompress, so performance is
2376 # important here.
2388 # important here.
2377 #
2389 #
2378 # We can make a few assumptions about revlogs:
2390 # We can make a few assumptions about revlogs:
2379 #
2391 #
2380 # 1) the majority of chunks will be compressed (as opposed to inline
2392 # 1) the majority of chunks will be compressed (as opposed to inline
2381 # raw data).
2393 # raw data).
2382 # 2) decompressing *any* data will likely by at least 10x slower than
2394 # 2) decompressing *any* data will likely by at least 10x slower than
2383 # returning raw inline data.
2395 # returning raw inline data.
2384 # 3) we want to prioritize common and officially supported compression
2396 # 3) we want to prioritize common and officially supported compression
2385 # engines
2397 # engines
2386 #
2398 #
2387 # It follows that we want to optimize for "decompress compressed data
2399 # It follows that we want to optimize for "decompress compressed data
2388 # when encoded with common and officially supported compression engines"
2400 # when encoded with common and officially supported compression engines"
2389 # case over "raw data" and "data encoded by less common or non-official
2401 # case over "raw data" and "data encoded by less common or non-official
2390 # compression engines." That is why we have the inline lookup first
2402 # compression engines." That is why we have the inline lookup first
2391 # followed by the compengines lookup.
2403 # followed by the compengines lookup.
2392 #
2404 #
2393 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2394 # compressed chunks. And this matters for changelog and manifest reads.
2406 # compressed chunks. And this matters for changelog and manifest reads.
2395 t = data[0:1]
2407 t = data[0:1]
2396
2408
2397 if t == b'x':
2409 if t == b'x':
2398 try:
2410 try:
2399 return _zlibdecompress(data)
2411 return _zlibdecompress(data)
2400 except zlib.error as e:
2412 except zlib.error as e:
2401 raise error.RevlogError(
2413 raise error.RevlogError(
2402 _(b'revlog decompress error: %s')
2414 _(b'revlog decompress error: %s')
2403 % stringutil.forcebytestr(e)
2415 % stringutil.forcebytestr(e)
2404 )
2416 )
2405 # '\0' is more common than 'u' so it goes first.
2417 # '\0' is more common than 'u' so it goes first.
2406 elif t == b'\0':
2418 elif t == b'\0':
2407 return data
2419 return data
2408 elif t == b'u':
2420 elif t == b'u':
2409 return util.buffer(data, 1)
2421 return util.buffer(data, 1)
2410
2422
2411 compressor = self._get_decompressor(t)
2423 compressor = self._get_decompressor(t)
2412
2424
2413 return compressor.decompress(data)
2425 return compressor.decompress(data)
2414
2426
2415 def _addrevision(
2427 def _addrevision(
2416 self,
2428 self,
2417 node,
2429 node,
2418 rawtext,
2430 rawtext,
2419 transaction,
2431 transaction,
2420 link,
2432 link,
2421 p1,
2433 p1,
2422 p2,
2434 p2,
2423 flags,
2435 flags,
2424 cachedelta,
2436 cachedelta,
2425 alwayscache=False,
2437 alwayscache=False,
2426 deltacomputer=None,
2438 deltacomputer=None,
2427 sidedata=None,
2439 sidedata=None,
2428 ):
2440 ):
2429 """internal function to add revisions to the log
2441 """internal function to add revisions to the log
2430
2442
2431 see addrevision for argument descriptions.
2443 see addrevision for argument descriptions.
2432
2444
2433 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2445 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2434
2446
2435 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2447 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2436 be used.
2448 be used.
2437
2449
2438 invariants:
2450 invariants:
2439 - rawtext is optional (can be None); if not set, cachedelta must be set.
2451 - rawtext is optional (can be None); if not set, cachedelta must be set.
2440 if both are set, they must correspond to each other.
2452 if both are set, they must correspond to each other.
2441 """
2453 """
2442 if node == self.nullid:
2454 if node == self.nullid:
2443 raise error.RevlogError(
2455 raise error.RevlogError(
2444 _(b"%s: attempt to add null revision") % self.display_id
2456 _(b"%s: attempt to add null revision") % self.display_id
2445 )
2457 )
2446 if (
2458 if (
2447 node == self.nodeconstants.wdirid
2459 node == self.nodeconstants.wdirid
2448 or node in self.nodeconstants.wdirfilenodeids
2460 or node in self.nodeconstants.wdirfilenodeids
2449 ):
2461 ):
2450 raise error.RevlogError(
2462 raise error.RevlogError(
2451 _(b"%s: attempt to add wdir revision") % self.display_id
2463 _(b"%s: attempt to add wdir revision") % self.display_id
2452 )
2464 )
2453 if self._writinghandles is None:
2465 if self._writinghandles is None:
2454 msg = b'adding revision outside `revlog._writing` context'
2466 msg = b'adding revision outside `revlog._writing` context'
2455 raise error.ProgrammingError(msg)
2467 raise error.ProgrammingError(msg)
2456
2468
2457 if self._inline:
2469 if self._inline:
2458 fh = self._writinghandles[0]
2470 fh = self._writinghandles[0]
2459 else:
2471 else:
2460 fh = self._writinghandles[1]
2472 fh = self._writinghandles[1]
2461
2473
2462 btext = [rawtext]
2474 btext = [rawtext]
2463
2475
2464 curr = len(self)
2476 curr = len(self)
2465 prev = curr - 1
2477 prev = curr - 1
2466
2478
2467 offset = self._get_data_offset(prev)
2479 offset = self._get_data_offset(prev)
2468
2480
2469 if self._concurrencychecker:
2481 if self._concurrencychecker:
2470 ifh, dfh = self._writinghandles
2482 ifh, dfh = self._writinghandles
2471 if self._inline:
2483 if self._inline:
2472 # offset is "as if" it were in the .d file, so we need to add on
2484 # offset is "as if" it were in the .d file, so we need to add on
2473 # the size of the entry metadata.
2485 # the size of the entry metadata.
2474 self._concurrencychecker(
2486 self._concurrencychecker(
2475 ifh, self._indexfile, offset + curr * self.index.entry_size
2487 ifh, self._indexfile, offset + curr * self.index.entry_size
2476 )
2488 )
2477 else:
2489 else:
2478 # Entries in the .i are a consistent size.
2490 # Entries in the .i are a consistent size.
2479 self._concurrencychecker(
2491 self._concurrencychecker(
2480 ifh, self._indexfile, curr * self.index.entry_size
2492 ifh, self._indexfile, curr * self.index.entry_size
2481 )
2493 )
2482 self._concurrencychecker(dfh, self._datafile, offset)
2494 self._concurrencychecker(dfh, self._datafile, offset)
2483
2495
2484 p1r, p2r = self.rev(p1), self.rev(p2)
2496 p1r, p2r = self.rev(p1), self.rev(p2)
2485
2497
2486 # full versions are inserted when the needed deltas
2498 # full versions are inserted when the needed deltas
2487 # become comparable to the uncompressed text
2499 # become comparable to the uncompressed text
2488 if rawtext is None:
2500 if rawtext is None:
2489 # need rawtext size, before changed by flag processors, which is
2501 # need rawtext size, before changed by flag processors, which is
2490 # the non-raw size. use revlog explicitly to avoid filelog's extra
2502 # the non-raw size. use revlog explicitly to avoid filelog's extra
2491 # logic that might remove metadata size.
2503 # logic that might remove metadata size.
2492 textlen = mdiff.patchedsize(
2504 textlen = mdiff.patchedsize(
2493 revlog.size(self, cachedelta[0]), cachedelta[1]
2505 revlog.size(self, cachedelta[0]), cachedelta[1]
2494 )
2506 )
2495 else:
2507 else:
2496 textlen = len(rawtext)
2508 textlen = len(rawtext)
2497
2509
2498 if deltacomputer is None:
2510 if deltacomputer is None:
2499 deltacomputer = deltautil.deltacomputer(self)
2511 deltacomputer = deltautil.deltacomputer(self)
2500
2512
2501 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2513 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2502
2514
2503 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2515 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2504
2516
2505 compression_mode = COMP_MODE_INLINE
2517 compression_mode = COMP_MODE_INLINE
2506 if self._docket is not None:
2518 if self._docket is not None:
2507 h, d = deltainfo.data
2519 h, d = deltainfo.data
2508 if not h and not d:
2520 if not h and not d:
2509 # not data to store at all... declare them uncompressed
2521 # not data to store at all... declare them uncompressed
2510 compression_mode = COMP_MODE_PLAIN
2522 compression_mode = COMP_MODE_PLAIN
2511 elif not h:
2523 elif not h:
2512 t = d[0:1]
2524 t = d[0:1]
2513 if t == b'\0':
2525 if t == b'\0':
2514 compression_mode = COMP_MODE_PLAIN
2526 compression_mode = COMP_MODE_PLAIN
2515 elif t == self._docket.default_compression_header:
2527 elif t == self._docket.default_compression_header:
2516 compression_mode = COMP_MODE_DEFAULT
2528 compression_mode = COMP_MODE_DEFAULT
2517 elif h == b'u':
2529 elif h == b'u':
2518 # we have a more efficient way to declare uncompressed
2530 # we have a more efficient way to declare uncompressed
2519 h = b''
2531 h = b''
2520 compression_mode = COMP_MODE_PLAIN
2532 compression_mode = COMP_MODE_PLAIN
2521 deltainfo = deltautil.drop_u_compression(deltainfo)
2533 deltainfo = deltautil.drop_u_compression(deltainfo)
2522
2534
2523 sidedata_compression_mode = COMP_MODE_INLINE
2535 sidedata_compression_mode = COMP_MODE_INLINE
2524 if sidedata and self.hassidedata:
2536 if sidedata and self.hassidedata:
2525 sidedata_compression_mode = COMP_MODE_PLAIN
2537 sidedata_compression_mode = COMP_MODE_PLAIN
2526 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2538 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2527 sidedata_offset = offset + deltainfo.deltalen
2539 sidedata_offset = offset + deltainfo.deltalen
2528 else:
2540 else:
2529 serialized_sidedata = b""
2541 serialized_sidedata = b""
2530 # Don't store the offset if the sidedata is empty, that way
2542 # Don't store the offset if the sidedata is empty, that way
2531 # we can easily detect empty sidedata and they will be no different
2543 # we can easily detect empty sidedata and they will be no different
2532 # than ones we manually add.
2544 # than ones we manually add.
2533 sidedata_offset = 0
2545 sidedata_offset = 0
2534
2546
2535 e = (
2547 e = (
2536 offset_type(offset, flags),
2548 offset_type(offset, flags),
2537 deltainfo.deltalen,
2549 deltainfo.deltalen,
2538 textlen,
2550 textlen,
2539 deltainfo.base,
2551 deltainfo.base,
2540 link,
2552 link,
2541 p1r,
2553 p1r,
2542 p2r,
2554 p2r,
2543 node,
2555 node,
2544 sidedata_offset,
2556 sidedata_offset,
2545 len(serialized_sidedata),
2557 len(serialized_sidedata),
2546 compression_mode,
2558 compression_mode,
2547 sidedata_compression_mode,
2559 sidedata_compression_mode,
2548 )
2560 )
2549
2561
2550 self.index.append(e)
2562 self.index.append(e)
2551 entry = self.index.entry_binary(curr)
2563 entry = self.index.entry_binary(curr)
2552 if curr == 0 and self._docket is None:
2564 if curr == 0 and self._docket is None:
2553 header = self._format_flags | self._format_version
2565 header = self._format_flags | self._format_version
2554 header = self.index.pack_header(header)
2566 header = self.index.pack_header(header)
2555 entry = header + entry
2567 entry = header + entry
2556 self._writeentry(
2568 self._writeentry(
2557 transaction,
2569 transaction,
2558 entry,
2570 entry,
2559 deltainfo.data,
2571 deltainfo.data,
2560 link,
2572 link,
2561 offset,
2573 offset,
2562 serialized_sidedata,
2574 serialized_sidedata,
2563 )
2575 )
2564
2576
2565 rawtext = btext[0]
2577 rawtext = btext[0]
2566
2578
2567 if alwayscache and rawtext is None:
2579 if alwayscache and rawtext is None:
2568 rawtext = deltacomputer.buildtext(revinfo, fh)
2580 rawtext = deltacomputer.buildtext(revinfo, fh)
2569
2581
2570 if type(rawtext) == bytes: # only accept immutable objects
2582 if type(rawtext) == bytes: # only accept immutable objects
2571 self._revisioncache = (node, curr, rawtext)
2583 self._revisioncache = (node, curr, rawtext)
2572 self._chainbasecache[curr] = deltainfo.chainbase
2584 self._chainbasecache[curr] = deltainfo.chainbase
2573 return curr
2585 return curr
2574
2586
2575 def _get_data_offset(self, prev):
2587 def _get_data_offset(self, prev):
2576 """Returns the current offset in the (in-transaction) data file.
2588 """Returns the current offset in the (in-transaction) data file.
2577 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2589 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2578 file to store that information: since sidedata can be rewritten to the
2590 file to store that information: since sidedata can be rewritten to the
2579 end of the data file within a transaction, you can have cases where, for
2591 end of the data file within a transaction, you can have cases where, for
2580 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2592 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2581 to `n - 1`'s sidedata being written after `n`'s data.
2593 to `n - 1`'s sidedata being written after `n`'s data.
2582
2594
2583 TODO cache this in a docket file before getting out of experimental."""
2595 TODO cache this in a docket file before getting out of experimental."""
2584 if self._docket is None:
2596 if self._docket is None:
2585 return self.end(prev)
2597 return self.end(prev)
2586 else:
2598 else:
2587 return self._docket.data_end
2599 return self._docket.data_end
2588
2600
2589 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2601 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2590 # Files opened in a+ mode have inconsistent behavior on various
2602 # Files opened in a+ mode have inconsistent behavior on various
2591 # platforms. Windows requires that a file positioning call be made
2603 # platforms. Windows requires that a file positioning call be made
2592 # when the file handle transitions between reads and writes. See
2604 # when the file handle transitions between reads and writes. See
2593 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2605 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2594 # platforms, Python or the platform itself can be buggy. Some versions
2606 # platforms, Python or the platform itself can be buggy. Some versions
2595 # of Solaris have been observed to not append at the end of the file
2607 # of Solaris have been observed to not append at the end of the file
2596 # if the file was seeked to before the end. See issue4943 for more.
2608 # if the file was seeked to before the end. See issue4943 for more.
2597 #
2609 #
2598 # We work around this issue by inserting a seek() before writing.
2610 # We work around this issue by inserting a seek() before writing.
2599 # Note: This is likely not necessary on Python 3. However, because
2611 # Note: This is likely not necessary on Python 3. However, because
2600 # the file handle is reused for reads and may be seeked there, we need
2612 # the file handle is reused for reads and may be seeked there, we need
2601 # to be careful before changing this.
2613 # to be careful before changing this.
2602 if self._writinghandles is None:
2614 if self._writinghandles is None:
2603 msg = b'adding revision outside `revlog._writing` context'
2615 msg = b'adding revision outside `revlog._writing` context'
2604 raise error.ProgrammingError(msg)
2616 raise error.ProgrammingError(msg)
2605 ifh, dfh = self._writinghandles
2617 ifh, dfh = self._writinghandles
2606 if self._docket is None:
2618 if self._docket is None:
2607 ifh.seek(0, os.SEEK_END)
2619 ifh.seek(0, os.SEEK_END)
2608 else:
2620 else:
2609 ifh.seek(self._docket.index_end, os.SEEK_SET)
2621 ifh.seek(self._docket.index_end, os.SEEK_SET)
2610 if dfh:
2622 if dfh:
2611 if self._docket is None:
2623 if self._docket is None:
2612 dfh.seek(0, os.SEEK_END)
2624 dfh.seek(0, os.SEEK_END)
2613 else:
2625 else:
2614 dfh.seek(self._docket.data_end, os.SEEK_SET)
2626 dfh.seek(self._docket.data_end, os.SEEK_SET)
2615
2627
2616 curr = len(self) - 1
2628 curr = len(self) - 1
2617 if not self._inline:
2629 if not self._inline:
2618 transaction.add(self._datafile, offset)
2630 transaction.add(self._datafile, offset)
2619 transaction.add(self._indexfile, curr * len(entry))
2631 transaction.add(self._indexfile, curr * len(entry))
2620 if data[0]:
2632 if data[0]:
2621 dfh.write(data[0])
2633 dfh.write(data[0])
2622 dfh.write(data[1])
2634 dfh.write(data[1])
2623 if sidedata:
2635 if sidedata:
2624 dfh.write(sidedata)
2636 dfh.write(sidedata)
2625 ifh.write(entry)
2637 ifh.write(entry)
2626 else:
2638 else:
2627 offset += curr * self.index.entry_size
2639 offset += curr * self.index.entry_size
2628 transaction.add(self._indexfile, offset)
2640 transaction.add(self._indexfile, offset)
2629 ifh.write(entry)
2641 ifh.write(entry)
2630 ifh.write(data[0])
2642 ifh.write(data[0])
2631 ifh.write(data[1])
2643 ifh.write(data[1])
2632 if sidedata:
2644 if sidedata:
2633 ifh.write(sidedata)
2645 ifh.write(sidedata)
2634 self._enforceinlinesize(transaction)
2646 self._enforceinlinesize(transaction)
2635 if self._docket is not None:
2647 if self._docket is not None:
2636 self._docket.index_end = self._writinghandles[0].tell()
2648 self._docket.index_end = self._writinghandles[0].tell()
2637 self._docket.data_end = self._writinghandles[1].tell()
2649 self._docket.data_end = self._writinghandles[1].tell()
2638
2650
2639 nodemaputil.setup_persistent_nodemap(transaction, self)
2651 nodemaputil.setup_persistent_nodemap(transaction, self)
2640
2652
2641 def addgroup(
2653 def addgroup(
2642 self,
2654 self,
2643 deltas,
2655 deltas,
2644 linkmapper,
2656 linkmapper,
2645 transaction,
2657 transaction,
2646 alwayscache=False,
2658 alwayscache=False,
2647 addrevisioncb=None,
2659 addrevisioncb=None,
2648 duplicaterevisioncb=None,
2660 duplicaterevisioncb=None,
2649 ):
2661 ):
2650 """
2662 """
2651 add a delta group
2663 add a delta group
2652
2664
2653 given a set of deltas, add them to the revision log. the
2665 given a set of deltas, add them to the revision log. the
2654 first delta is against its parent, which should be in our
2666 first delta is against its parent, which should be in our
2655 log, the rest are against the previous delta.
2667 log, the rest are against the previous delta.
2656
2668
2657 If ``addrevisioncb`` is defined, it will be called with arguments of
2669 If ``addrevisioncb`` is defined, it will be called with arguments of
2658 this revlog and the node that was added.
2670 this revlog and the node that was added.
2659 """
2671 """
2660
2672
2661 if self._adding_group:
2673 if self._adding_group:
2662 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2674 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2663
2675
2664 self._adding_group = True
2676 self._adding_group = True
2665 empty = True
2677 empty = True
2666 try:
2678 try:
2667 with self._writing(transaction):
2679 with self._writing(transaction):
2668 deltacomputer = deltautil.deltacomputer(self)
2680 deltacomputer = deltautil.deltacomputer(self)
2669 # loop through our set of deltas
2681 # loop through our set of deltas
2670 for data in deltas:
2682 for data in deltas:
2671 (
2683 (
2672 node,
2684 node,
2673 p1,
2685 p1,
2674 p2,
2686 p2,
2675 linknode,
2687 linknode,
2676 deltabase,
2688 deltabase,
2677 delta,
2689 delta,
2678 flags,
2690 flags,
2679 sidedata,
2691 sidedata,
2680 ) = data
2692 ) = data
2681 link = linkmapper(linknode)
2693 link = linkmapper(linknode)
2682 flags = flags or REVIDX_DEFAULT_FLAGS
2694 flags = flags or REVIDX_DEFAULT_FLAGS
2683
2695
2684 rev = self.index.get_rev(node)
2696 rev = self.index.get_rev(node)
2685 if rev is not None:
2697 if rev is not None:
2686 # this can happen if two branches make the same change
2698 # this can happen if two branches make the same change
2687 self._nodeduplicatecallback(transaction, rev)
2699 self._nodeduplicatecallback(transaction, rev)
2688 if duplicaterevisioncb:
2700 if duplicaterevisioncb:
2689 duplicaterevisioncb(self, rev)
2701 duplicaterevisioncb(self, rev)
2690 empty = False
2702 empty = False
2691 continue
2703 continue
2692
2704
2693 for p in (p1, p2):
2705 for p in (p1, p2):
2694 if not self.index.has_node(p):
2706 if not self.index.has_node(p):
2695 raise error.LookupError(
2707 raise error.LookupError(
2696 p, self.radix, _(b'unknown parent')
2708 p, self.radix, _(b'unknown parent')
2697 )
2709 )
2698
2710
2699 if not self.index.has_node(deltabase):
2711 if not self.index.has_node(deltabase):
2700 raise error.LookupError(
2712 raise error.LookupError(
2701 deltabase, self.display_id, _(b'unknown delta base')
2713 deltabase, self.display_id, _(b'unknown delta base')
2702 )
2714 )
2703
2715
2704 baserev = self.rev(deltabase)
2716 baserev = self.rev(deltabase)
2705
2717
2706 if baserev != nullrev and self.iscensored(baserev):
2718 if baserev != nullrev and self.iscensored(baserev):
2707 # if base is censored, delta must be full replacement in a
2719 # if base is censored, delta must be full replacement in a
2708 # single patch operation
2720 # single patch operation
2709 hlen = struct.calcsize(b">lll")
2721 hlen = struct.calcsize(b">lll")
2710 oldlen = self.rawsize(baserev)
2722 oldlen = self.rawsize(baserev)
2711 newlen = len(delta) - hlen
2723 newlen = len(delta) - hlen
2712 if delta[:hlen] != mdiff.replacediffheader(
2724 if delta[:hlen] != mdiff.replacediffheader(
2713 oldlen, newlen
2725 oldlen, newlen
2714 ):
2726 ):
2715 raise error.CensoredBaseError(
2727 raise error.CensoredBaseError(
2716 self.display_id, self.node(baserev)
2728 self.display_id, self.node(baserev)
2717 )
2729 )
2718
2730
2719 if not flags and self._peek_iscensored(baserev, delta):
2731 if not flags and self._peek_iscensored(baserev, delta):
2720 flags |= REVIDX_ISCENSORED
2732 flags |= REVIDX_ISCENSORED
2721
2733
2722 # We assume consumers of addrevisioncb will want to retrieve
2734 # We assume consumers of addrevisioncb will want to retrieve
2723 # the added revision, which will require a call to
2735 # the added revision, which will require a call to
2724 # revision(). revision() will fast path if there is a cache
2736 # revision(). revision() will fast path if there is a cache
2725 # hit. So, we tell _addrevision() to always cache in this case.
2737 # hit. So, we tell _addrevision() to always cache in this case.
2726 # We're only using addgroup() in the context of changegroup
2738 # We're only using addgroup() in the context of changegroup
2727 # generation so the revision data can always be handled as raw
2739 # generation so the revision data can always be handled as raw
2728 # by the flagprocessor.
2740 # by the flagprocessor.
2729 rev = self._addrevision(
2741 rev = self._addrevision(
2730 node,
2742 node,
2731 None,
2743 None,
2732 transaction,
2744 transaction,
2733 link,
2745 link,
2734 p1,
2746 p1,
2735 p2,
2747 p2,
2736 flags,
2748 flags,
2737 (baserev, delta),
2749 (baserev, delta),
2738 alwayscache=alwayscache,
2750 alwayscache=alwayscache,
2739 deltacomputer=deltacomputer,
2751 deltacomputer=deltacomputer,
2740 sidedata=sidedata,
2752 sidedata=sidedata,
2741 )
2753 )
2742
2754
2743 if addrevisioncb:
2755 if addrevisioncb:
2744 addrevisioncb(self, rev)
2756 addrevisioncb(self, rev)
2745 empty = False
2757 empty = False
2746 finally:
2758 finally:
2747 self._adding_group = False
2759 self._adding_group = False
2748 return not empty
2760 return not empty
2749
2761
2750 def iscensored(self, rev):
2762 def iscensored(self, rev):
2751 """Check if a file revision is censored."""
2763 """Check if a file revision is censored."""
2752 if not self._censorable:
2764 if not self._censorable:
2753 return False
2765 return False
2754
2766
2755 return self.flags(rev) & REVIDX_ISCENSORED
2767 return self.flags(rev) & REVIDX_ISCENSORED
2756
2768
2757 def _peek_iscensored(self, baserev, delta):
2769 def _peek_iscensored(self, baserev, delta):
2758 """Quickly check if a delta produces a censored revision."""
2770 """Quickly check if a delta produces a censored revision."""
2759 if not self._censorable:
2771 if not self._censorable:
2760 return False
2772 return False
2761
2773
2762 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2774 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2763
2775
2764 def getstrippoint(self, minlink):
2776 def getstrippoint(self, minlink):
2765 """find the minimum rev that must be stripped to strip the linkrev
2777 """find the minimum rev that must be stripped to strip the linkrev
2766
2778
2767 Returns a tuple containing the minimum rev and a set of all revs that
2779 Returns a tuple containing the minimum rev and a set of all revs that
2768 have linkrevs that will be broken by this strip.
2780 have linkrevs that will be broken by this strip.
2769 """
2781 """
2770 return storageutil.resolvestripinfo(
2782 return storageutil.resolvestripinfo(
2771 minlink,
2783 minlink,
2772 len(self) - 1,
2784 len(self) - 1,
2773 self.headrevs(),
2785 self.headrevs(),
2774 self.linkrev,
2786 self.linkrev,
2775 self.parentrevs,
2787 self.parentrevs,
2776 )
2788 )
2777
2789
2778 def strip(self, minlink, transaction):
2790 def strip(self, minlink, transaction):
2779 """truncate the revlog on the first revision with a linkrev >= minlink
2791 """truncate the revlog on the first revision with a linkrev >= minlink
2780
2792
2781 This function is called when we're stripping revision minlink and
2793 This function is called when we're stripping revision minlink and
2782 its descendants from the repository.
2794 its descendants from the repository.
2783
2795
2784 We have to remove all revisions with linkrev >= minlink, because
2796 We have to remove all revisions with linkrev >= minlink, because
2785 the equivalent changelog revisions will be renumbered after the
2797 the equivalent changelog revisions will be renumbered after the
2786 strip.
2798 strip.
2787
2799
2788 So we truncate the revlog on the first of these revisions, and
2800 So we truncate the revlog on the first of these revisions, and
2789 trust that the caller has saved the revisions that shouldn't be
2801 trust that the caller has saved the revisions that shouldn't be
2790 removed and that it'll re-add them after this truncation.
2802 removed and that it'll re-add them after this truncation.
2791 """
2803 """
2792 if len(self) == 0:
2804 if len(self) == 0:
2793 return
2805 return
2794
2806
2795 rev, _ = self.getstrippoint(minlink)
2807 rev, _ = self.getstrippoint(minlink)
2796 if rev == len(self):
2808 if rev == len(self):
2797 return
2809 return
2798
2810
2799 # first truncate the files on disk
2811 # first truncate the files on disk
2800 data_end = self.start(rev)
2812 data_end = self.start(rev)
2801 if not self._inline:
2813 if not self._inline:
2802 transaction.add(self._datafile, data_end)
2814 transaction.add(self._datafile, data_end)
2803 end = rev * self.index.entry_size
2815 end = rev * self.index.entry_size
2804 else:
2816 else:
2805 end = data_end + (rev * self.index.entry_size)
2817 end = data_end + (rev * self.index.entry_size)
2806
2818
2807 transaction.add(self._indexfile, end)
2819 transaction.add(self._indexfile, end)
2808 if self._docket is not None:
2820 if self._docket is not None:
2809 # XXX we could, leverage the docket while stripping. However it is
2821 # XXX we could, leverage the docket while stripping. However it is
2810 # not powerfull enough at the time of this comment
2822 # not powerfull enough at the time of this comment
2811 self._docket.index_end = end
2823 self._docket.index_end = end
2812 self._docket.data_end = data_end
2824 self._docket.data_end = data_end
2813 self._docket.write(transaction, stripping=True)
2825 self._docket.write(transaction, stripping=True)
2814
2826
2815 # then reset internal state in memory to forget those revisions
2827 # then reset internal state in memory to forget those revisions
2816 self._revisioncache = None
2828 self._revisioncache = None
2817 self._chaininfocache = util.lrucachedict(500)
2829 self._chaininfocache = util.lrucachedict(500)
2818 self._chunkclear()
2830 self._chunkclear()
2819
2831
2820 del self.index[rev:-1]
2832 del self.index[rev:-1]
2821
2833
2822 def checksize(self):
2834 def checksize(self):
2823 """Check size of index and data files
2835 """Check size of index and data files
2824
2836
2825 return a (dd, di) tuple.
2837 return a (dd, di) tuple.
2826 - dd: extra bytes for the "data" file
2838 - dd: extra bytes for the "data" file
2827 - di: extra bytes for the "index" file
2839 - di: extra bytes for the "index" file
2828
2840
2829 A healthy revlog will return (0, 0).
2841 A healthy revlog will return (0, 0).
2830 """
2842 """
2831 expected = 0
2843 expected = 0
2832 if len(self):
2844 if len(self):
2833 expected = max(0, self.end(len(self) - 1))
2845 expected = max(0, self.end(len(self) - 1))
2834
2846
2835 try:
2847 try:
2836 with self._datafp() as f:
2848 with self._datafp() as f:
2837 f.seek(0, io.SEEK_END)
2849 f.seek(0, io.SEEK_END)
2838 actual = f.tell()
2850 actual = f.tell()
2839 dd = actual - expected
2851 dd = actual - expected
2840 except IOError as inst:
2852 except IOError as inst:
2841 if inst.errno != errno.ENOENT:
2853 if inst.errno != errno.ENOENT:
2842 raise
2854 raise
2843 dd = 0
2855 dd = 0
2844
2856
2845 try:
2857 try:
2846 f = self.opener(self._indexfile)
2858 f = self.opener(self._indexfile)
2847 f.seek(0, io.SEEK_END)
2859 f.seek(0, io.SEEK_END)
2848 actual = f.tell()
2860 actual = f.tell()
2849 f.close()
2861 f.close()
2850 s = self.index.entry_size
2862 s = self.index.entry_size
2851 i = max(0, actual // s)
2863 i = max(0, actual // s)
2852 di = actual - (i * s)
2864 di = actual - (i * s)
2853 if self._inline:
2865 if self._inline:
2854 databytes = 0
2866 databytes = 0
2855 for r in self:
2867 for r in self:
2856 databytes += max(0, self.length(r))
2868 databytes += max(0, self.length(r))
2857 dd = 0
2869 dd = 0
2858 di = actual - len(self) * s - databytes
2870 di = actual - len(self) * s - databytes
2859 except IOError as inst:
2871 except IOError as inst:
2860 if inst.errno != errno.ENOENT:
2872 if inst.errno != errno.ENOENT:
2861 raise
2873 raise
2862 di = 0
2874 di = 0
2863
2875
2864 return (dd, di)
2876 return (dd, di)
2865
2877
2866 def files(self):
2878 def files(self):
2867 res = [self._indexfile]
2879 res = [self._indexfile]
2868 if not self._inline:
2880 if not self._inline:
2869 res.append(self._datafile)
2881 res.append(self._datafile)
2870 return res
2882 return res
2871
2883
2872 def emitrevisions(
2884 def emitrevisions(
2873 self,
2885 self,
2874 nodes,
2886 nodes,
2875 nodesorder=None,
2887 nodesorder=None,
2876 revisiondata=False,
2888 revisiondata=False,
2877 assumehaveparentrevisions=False,
2889 assumehaveparentrevisions=False,
2878 deltamode=repository.CG_DELTAMODE_STD,
2890 deltamode=repository.CG_DELTAMODE_STD,
2879 sidedata_helpers=None,
2891 sidedata_helpers=None,
2880 ):
2892 ):
2881 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2893 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2882 raise error.ProgrammingError(
2894 raise error.ProgrammingError(
2883 b'unhandled value for nodesorder: %s' % nodesorder
2895 b'unhandled value for nodesorder: %s' % nodesorder
2884 )
2896 )
2885
2897
2886 if nodesorder is None and not self._generaldelta:
2898 if nodesorder is None and not self._generaldelta:
2887 nodesorder = b'storage'
2899 nodesorder = b'storage'
2888
2900
2889 if (
2901 if (
2890 not self._storedeltachains
2902 not self._storedeltachains
2891 and deltamode != repository.CG_DELTAMODE_PREV
2903 and deltamode != repository.CG_DELTAMODE_PREV
2892 ):
2904 ):
2893 deltamode = repository.CG_DELTAMODE_FULL
2905 deltamode = repository.CG_DELTAMODE_FULL
2894
2906
2895 return storageutil.emitrevisions(
2907 return storageutil.emitrevisions(
2896 self,
2908 self,
2897 nodes,
2909 nodes,
2898 nodesorder,
2910 nodesorder,
2899 revlogrevisiondelta,
2911 revlogrevisiondelta,
2900 deltaparentfn=self.deltaparent,
2912 deltaparentfn=self.deltaparent,
2901 candeltafn=self.candelta,
2913 candeltafn=self.candelta,
2902 rawsizefn=self.rawsize,
2914 rawsizefn=self.rawsize,
2903 revdifffn=self.revdiff,
2915 revdifffn=self.revdiff,
2904 flagsfn=self.flags,
2916 flagsfn=self.flags,
2905 deltamode=deltamode,
2917 deltamode=deltamode,
2906 revisiondata=revisiondata,
2918 revisiondata=revisiondata,
2907 assumehaveparentrevisions=assumehaveparentrevisions,
2919 assumehaveparentrevisions=assumehaveparentrevisions,
2908 sidedata_helpers=sidedata_helpers,
2920 sidedata_helpers=sidedata_helpers,
2909 )
2921 )
2910
2922
2911 DELTAREUSEALWAYS = b'always'
2923 DELTAREUSEALWAYS = b'always'
2912 DELTAREUSESAMEREVS = b'samerevs'
2924 DELTAREUSESAMEREVS = b'samerevs'
2913 DELTAREUSENEVER = b'never'
2925 DELTAREUSENEVER = b'never'
2914
2926
2915 DELTAREUSEFULLADD = b'fulladd'
2927 DELTAREUSEFULLADD = b'fulladd'
2916
2928
2917 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2929 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2918
2930
2919 def clone(
2931 def clone(
2920 self,
2932 self,
2921 tr,
2933 tr,
2922 destrevlog,
2934 destrevlog,
2923 addrevisioncb=None,
2935 addrevisioncb=None,
2924 deltareuse=DELTAREUSESAMEREVS,
2936 deltareuse=DELTAREUSESAMEREVS,
2925 forcedeltabothparents=None,
2937 forcedeltabothparents=None,
2926 sidedata_helpers=None,
2938 sidedata_helpers=None,
2927 ):
2939 ):
2928 """Copy this revlog to another, possibly with format changes.
2940 """Copy this revlog to another, possibly with format changes.
2929
2941
2930 The destination revlog will contain the same revisions and nodes.
2942 The destination revlog will contain the same revisions and nodes.
2931 However, it may not be bit-for-bit identical due to e.g. delta encoding
2943 However, it may not be bit-for-bit identical due to e.g. delta encoding
2932 differences.
2944 differences.
2933
2945
2934 The ``deltareuse`` argument control how deltas from the existing revlog
2946 The ``deltareuse`` argument control how deltas from the existing revlog
2935 are preserved in the destination revlog. The argument can have the
2947 are preserved in the destination revlog. The argument can have the
2936 following values:
2948 following values:
2937
2949
2938 DELTAREUSEALWAYS
2950 DELTAREUSEALWAYS
2939 Deltas will always be reused (if possible), even if the destination
2951 Deltas will always be reused (if possible), even if the destination
2940 revlog would not select the same revisions for the delta. This is the
2952 revlog would not select the same revisions for the delta. This is the
2941 fastest mode of operation.
2953 fastest mode of operation.
2942 DELTAREUSESAMEREVS
2954 DELTAREUSESAMEREVS
2943 Deltas will be reused if the destination revlog would pick the same
2955 Deltas will be reused if the destination revlog would pick the same
2944 revisions for the delta. This mode strikes a balance between speed
2956 revisions for the delta. This mode strikes a balance between speed
2945 and optimization.
2957 and optimization.
2946 DELTAREUSENEVER
2958 DELTAREUSENEVER
2947 Deltas will never be reused. This is the slowest mode of execution.
2959 Deltas will never be reused. This is the slowest mode of execution.
2948 This mode can be used to recompute deltas (e.g. if the diff/delta
2960 This mode can be used to recompute deltas (e.g. if the diff/delta
2949 algorithm changes).
2961 algorithm changes).
2950 DELTAREUSEFULLADD
2962 DELTAREUSEFULLADD
2951 Revision will be re-added as if their were new content. This is
2963 Revision will be re-added as if their were new content. This is
2952 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2964 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2953 eg: large file detection and handling.
2965 eg: large file detection and handling.
2954
2966
2955 Delta computation can be slow, so the choice of delta reuse policy can
2967 Delta computation can be slow, so the choice of delta reuse policy can
2956 significantly affect run time.
2968 significantly affect run time.
2957
2969
2958 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2970 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2959 two extremes. Deltas will be reused if they are appropriate. But if the
2971 two extremes. Deltas will be reused if they are appropriate. But if the
2960 delta could choose a better revision, it will do so. This means if you
2972 delta could choose a better revision, it will do so. This means if you
2961 are converting a non-generaldelta revlog to a generaldelta revlog,
2973 are converting a non-generaldelta revlog to a generaldelta revlog,
2962 deltas will be recomputed if the delta's parent isn't a parent of the
2974 deltas will be recomputed if the delta's parent isn't a parent of the
2963 revision.
2975 revision.
2964
2976
2965 In addition to the delta policy, the ``forcedeltabothparents``
2977 In addition to the delta policy, the ``forcedeltabothparents``
2966 argument controls whether to force compute deltas against both parents
2978 argument controls whether to force compute deltas against both parents
2967 for merges. By default, the current default is used.
2979 for merges. By default, the current default is used.
2968
2980
2969 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2981 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2970 `sidedata_helpers`.
2982 `sidedata_helpers`.
2971 """
2983 """
2972 if deltareuse not in self.DELTAREUSEALL:
2984 if deltareuse not in self.DELTAREUSEALL:
2973 raise ValueError(
2985 raise ValueError(
2974 _(b'value for deltareuse invalid: %s') % deltareuse
2986 _(b'value for deltareuse invalid: %s') % deltareuse
2975 )
2987 )
2976
2988
2977 if len(destrevlog):
2989 if len(destrevlog):
2978 raise ValueError(_(b'destination revlog is not empty'))
2990 raise ValueError(_(b'destination revlog is not empty'))
2979
2991
2980 if getattr(self, 'filteredrevs', None):
2992 if getattr(self, 'filteredrevs', None):
2981 raise ValueError(_(b'source revlog has filtered revisions'))
2993 raise ValueError(_(b'source revlog has filtered revisions'))
2982 if getattr(destrevlog, 'filteredrevs', None):
2994 if getattr(destrevlog, 'filteredrevs', None):
2983 raise ValueError(_(b'destination revlog has filtered revisions'))
2995 raise ValueError(_(b'destination revlog has filtered revisions'))
2984
2996
2985 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2997 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2986 # if possible.
2998 # if possible.
2987 oldlazydelta = destrevlog._lazydelta
2999 oldlazydelta = destrevlog._lazydelta
2988 oldlazydeltabase = destrevlog._lazydeltabase
3000 oldlazydeltabase = destrevlog._lazydeltabase
2989 oldamd = destrevlog._deltabothparents
3001 oldamd = destrevlog._deltabothparents
2990
3002
2991 try:
3003 try:
2992 if deltareuse == self.DELTAREUSEALWAYS:
3004 if deltareuse == self.DELTAREUSEALWAYS:
2993 destrevlog._lazydeltabase = True
3005 destrevlog._lazydeltabase = True
2994 destrevlog._lazydelta = True
3006 destrevlog._lazydelta = True
2995 elif deltareuse == self.DELTAREUSESAMEREVS:
3007 elif deltareuse == self.DELTAREUSESAMEREVS:
2996 destrevlog._lazydeltabase = False
3008 destrevlog._lazydeltabase = False
2997 destrevlog._lazydelta = True
3009 destrevlog._lazydelta = True
2998 elif deltareuse == self.DELTAREUSENEVER:
3010 elif deltareuse == self.DELTAREUSENEVER:
2999 destrevlog._lazydeltabase = False
3011 destrevlog._lazydeltabase = False
3000 destrevlog._lazydelta = False
3012 destrevlog._lazydelta = False
3001
3013
3002 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3014 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3003
3015
3004 self._clone(
3016 self._clone(
3005 tr,
3017 tr,
3006 destrevlog,
3018 destrevlog,
3007 addrevisioncb,
3019 addrevisioncb,
3008 deltareuse,
3020 deltareuse,
3009 forcedeltabothparents,
3021 forcedeltabothparents,
3010 sidedata_helpers,
3022 sidedata_helpers,
3011 )
3023 )
3012
3024
3013 finally:
3025 finally:
3014 destrevlog._lazydelta = oldlazydelta
3026 destrevlog._lazydelta = oldlazydelta
3015 destrevlog._lazydeltabase = oldlazydeltabase
3027 destrevlog._lazydeltabase = oldlazydeltabase
3016 destrevlog._deltabothparents = oldamd
3028 destrevlog._deltabothparents = oldamd
3017
3029
3018 def _clone(
3030 def _clone(
3019 self,
3031 self,
3020 tr,
3032 tr,
3021 destrevlog,
3033 destrevlog,
3022 addrevisioncb,
3034 addrevisioncb,
3023 deltareuse,
3035 deltareuse,
3024 forcedeltabothparents,
3036 forcedeltabothparents,
3025 sidedata_helpers,
3037 sidedata_helpers,
3026 ):
3038 ):
3027 """perform the core duty of `revlog.clone` after parameter processing"""
3039 """perform the core duty of `revlog.clone` after parameter processing"""
3028 deltacomputer = deltautil.deltacomputer(destrevlog)
3040 deltacomputer = deltautil.deltacomputer(destrevlog)
3029 index = self.index
3041 index = self.index
3030 for rev in self:
3042 for rev in self:
3031 entry = index[rev]
3043 entry = index[rev]
3032
3044
3033 # Some classes override linkrev to take filtered revs into
3045 # Some classes override linkrev to take filtered revs into
3034 # account. Use raw entry from index.
3046 # account. Use raw entry from index.
3035 flags = entry[0] & 0xFFFF
3047 flags = entry[0] & 0xFFFF
3036 linkrev = entry[4]
3048 linkrev = entry[4]
3037 p1 = index[entry[5]][7]
3049 p1 = index[entry[5]][7]
3038 p2 = index[entry[6]][7]
3050 p2 = index[entry[6]][7]
3039 node = entry[7]
3051 node = entry[7]
3040
3052
3041 # (Possibly) reuse the delta from the revlog if allowed and
3053 # (Possibly) reuse the delta from the revlog if allowed and
3042 # the revlog chunk is a delta.
3054 # the revlog chunk is a delta.
3043 cachedelta = None
3055 cachedelta = None
3044 rawtext = None
3056 rawtext = None
3045 if deltareuse == self.DELTAREUSEFULLADD:
3057 if deltareuse == self.DELTAREUSEFULLADD:
3046 text, sidedata = self._revisiondata(rev)
3058 text, sidedata = self._revisiondata(rev)
3047
3059
3048 if sidedata_helpers is not None:
3060 if sidedata_helpers is not None:
3049 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3061 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3050 self, sidedata_helpers, sidedata, rev
3062 self, sidedata_helpers, sidedata, rev
3051 )
3063 )
3052 flags = flags | new_flags[0] & ~new_flags[1]
3064 flags = flags | new_flags[0] & ~new_flags[1]
3053
3065
3054 destrevlog.addrevision(
3066 destrevlog.addrevision(
3055 text,
3067 text,
3056 tr,
3068 tr,
3057 linkrev,
3069 linkrev,
3058 p1,
3070 p1,
3059 p2,
3071 p2,
3060 cachedelta=cachedelta,
3072 cachedelta=cachedelta,
3061 node=node,
3073 node=node,
3062 flags=flags,
3074 flags=flags,
3063 deltacomputer=deltacomputer,
3075 deltacomputer=deltacomputer,
3064 sidedata=sidedata,
3076 sidedata=sidedata,
3065 )
3077 )
3066 else:
3078 else:
3067 if destrevlog._lazydelta:
3079 if destrevlog._lazydelta:
3068 dp = self.deltaparent(rev)
3080 dp = self.deltaparent(rev)
3069 if dp != nullrev:
3081 if dp != nullrev:
3070 cachedelta = (dp, bytes(self._chunk(rev)))
3082 cachedelta = (dp, bytes(self._chunk(rev)))
3071
3083
3072 sidedata = None
3084 sidedata = None
3073 if not cachedelta:
3085 if not cachedelta:
3074 rawtext, sidedata = self._revisiondata(rev)
3086 rawtext, sidedata = self._revisiondata(rev)
3075 if sidedata is None:
3087 if sidedata is None:
3076 sidedata = self.sidedata(rev)
3088 sidedata = self.sidedata(rev)
3077
3089
3078 if sidedata_helpers is not None:
3090 if sidedata_helpers is not None:
3079 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3091 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3080 self, sidedata_helpers, sidedata, rev
3092 self, sidedata_helpers, sidedata, rev
3081 )
3093 )
3082 flags = flags | new_flags[0] & ~new_flags[1]
3094 flags = flags | new_flags[0] & ~new_flags[1]
3083
3095
3084 with destrevlog._writing(tr):
3096 with destrevlog._writing(tr):
3085 destrevlog._addrevision(
3097 destrevlog._addrevision(
3086 node,
3098 node,
3087 rawtext,
3099 rawtext,
3088 tr,
3100 tr,
3089 linkrev,
3101 linkrev,
3090 p1,
3102 p1,
3091 p2,
3103 p2,
3092 flags,
3104 flags,
3093 cachedelta,
3105 cachedelta,
3094 deltacomputer=deltacomputer,
3106 deltacomputer=deltacomputer,
3095 sidedata=sidedata,
3107 sidedata=sidedata,
3096 )
3108 )
3097
3109
3098 if addrevisioncb:
3110 if addrevisioncb:
3099 addrevisioncb(self, rev, node)
3111 addrevisioncb(self, rev, node)
3100
3112
3101 def censorrevision(self, tr, censornode, tombstone=b''):
3113 def censorrevision(self, tr, censornode, tombstone=b''):
3102 if self._format_version == REVLOGV0:
3114 if self._format_version == REVLOGV0:
3103 raise error.RevlogError(
3115 raise error.RevlogError(
3104 _(b'cannot censor with version %d revlogs')
3116 _(b'cannot censor with version %d revlogs')
3105 % self._format_version
3117 % self._format_version
3106 )
3118 )
3107
3119
3108 censorrev = self.rev(censornode)
3120 censorrev = self.rev(censornode)
3109 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3121 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3110
3122
3111 if len(tombstone) > self.rawsize(censorrev):
3123 if len(tombstone) > self.rawsize(censorrev):
3112 raise error.Abort(
3124 raise error.Abort(
3113 _(b'censor tombstone must be no longer than censored data')
3125 _(b'censor tombstone must be no longer than censored data')
3114 )
3126 )
3115
3127
3116 # Rewriting the revlog in place is hard. Our strategy for censoring is
3128 # Rewriting the revlog in place is hard. Our strategy for censoring is
3117 # to create a new revlog, copy all revisions to it, then replace the
3129 # to create a new revlog, copy all revisions to it, then replace the
3118 # revlogs on transaction close.
3130 # revlogs on transaction close.
3119 #
3131 #
3120 # This is a bit dangerous. We could easily have a mismatch of state.
3132 # This is a bit dangerous. We could easily have a mismatch of state.
3121 newrl = revlog(
3133 newrl = revlog(
3122 self.opener,
3134 self.opener,
3123 target=self.target,
3135 target=self.target,
3124 radix=self.radix,
3136 radix=self.radix,
3125 postfix=b'tmpcensored',
3137 postfix=b'tmpcensored',
3126 censorable=True,
3138 censorable=True,
3127 )
3139 )
3128 newrl._format_version = self._format_version
3140 newrl._format_version = self._format_version
3129 newrl._format_flags = self._format_flags
3141 newrl._format_flags = self._format_flags
3130 newrl._generaldelta = self._generaldelta
3142 newrl._generaldelta = self._generaldelta
3131 newrl._parse_index = self._parse_index
3143 newrl._parse_index = self._parse_index
3132
3144
3133 for rev in self.revs():
3145 for rev in self.revs():
3134 node = self.node(rev)
3146 node = self.node(rev)
3135 p1, p2 = self.parents(node)
3147 p1, p2 = self.parents(node)
3136
3148
3137 if rev == censorrev:
3149 if rev == censorrev:
3138 newrl.addrawrevision(
3150 newrl.addrawrevision(
3139 tombstone,
3151 tombstone,
3140 tr,
3152 tr,
3141 self.linkrev(censorrev),
3153 self.linkrev(censorrev),
3142 p1,
3154 p1,
3143 p2,
3155 p2,
3144 censornode,
3156 censornode,
3145 REVIDX_ISCENSORED,
3157 REVIDX_ISCENSORED,
3146 )
3158 )
3147
3159
3148 if newrl.deltaparent(rev) != nullrev:
3160 if newrl.deltaparent(rev) != nullrev:
3149 raise error.Abort(
3161 raise error.Abort(
3150 _(
3162 _(
3151 b'censored revision stored as delta; '
3163 b'censored revision stored as delta; '
3152 b'cannot censor'
3164 b'cannot censor'
3153 ),
3165 ),
3154 hint=_(
3166 hint=_(
3155 b'censoring of revlogs is not '
3167 b'censoring of revlogs is not '
3156 b'fully implemented; please report '
3168 b'fully implemented; please report '
3157 b'this bug'
3169 b'this bug'
3158 ),
3170 ),
3159 )
3171 )
3160 continue
3172 continue
3161
3173
3162 if self.iscensored(rev):
3174 if self.iscensored(rev):
3163 if self.deltaparent(rev) != nullrev:
3175 if self.deltaparent(rev) != nullrev:
3164 raise error.Abort(
3176 raise error.Abort(
3165 _(
3177 _(
3166 b'cannot censor due to censored '
3178 b'cannot censor due to censored '
3167 b'revision having delta stored'
3179 b'revision having delta stored'
3168 )
3180 )
3169 )
3181 )
3170 rawtext = self._chunk(rev)
3182 rawtext = self._chunk(rev)
3171 else:
3183 else:
3172 rawtext = self.rawdata(rev)
3184 rawtext = self.rawdata(rev)
3173
3185
3174 newrl.addrawrevision(
3186 newrl.addrawrevision(
3175 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3187 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3176 )
3188 )
3177
3189
3178 tr.addbackup(self._indexfile, location=b'store')
3190 tr.addbackup(self._indexfile, location=b'store')
3179 if not self._inline:
3191 if not self._inline:
3180 tr.addbackup(self._datafile, location=b'store')
3192 tr.addbackup(self._datafile, location=b'store')
3181
3193
3182 self.opener.rename(newrl._indexfile, self._indexfile)
3194 self.opener.rename(newrl._indexfile, self._indexfile)
3183 if not self._inline:
3195 if not self._inline:
3184 self.opener.rename(newrl._datafile, self._datafile)
3196 self.opener.rename(newrl._datafile, self._datafile)
3185
3197
3186 self.clearcaches()
3198 self.clearcaches()
3187 self._loadindex()
3199 self._loadindex()
3188
3200
3189 def verifyintegrity(self, state):
3201 def verifyintegrity(self, state):
3190 """Verifies the integrity of the revlog.
3202 """Verifies the integrity of the revlog.
3191
3203
3192 Yields ``revlogproblem`` instances describing problems that are
3204 Yields ``revlogproblem`` instances describing problems that are
3193 found.
3205 found.
3194 """
3206 """
3195 dd, di = self.checksize()
3207 dd, di = self.checksize()
3196 if dd:
3208 if dd:
3197 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3209 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3198 if di:
3210 if di:
3199 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3211 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3200
3212
3201 version = self._format_version
3213 version = self._format_version
3202
3214
3203 # The verifier tells us what version revlog we should be.
3215 # The verifier tells us what version revlog we should be.
3204 if version != state[b'expectedversion']:
3216 if version != state[b'expectedversion']:
3205 yield revlogproblem(
3217 yield revlogproblem(
3206 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3218 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3207 % (self.display_id, version, state[b'expectedversion'])
3219 % (self.display_id, version, state[b'expectedversion'])
3208 )
3220 )
3209
3221
3210 state[b'skipread'] = set()
3222 state[b'skipread'] = set()
3211 state[b'safe_renamed'] = set()
3223 state[b'safe_renamed'] = set()
3212
3224
3213 for rev in self:
3225 for rev in self:
3214 node = self.node(rev)
3226 node = self.node(rev)
3215
3227
3216 # Verify contents. 4 cases to care about:
3228 # Verify contents. 4 cases to care about:
3217 #
3229 #
3218 # common: the most common case
3230 # common: the most common case
3219 # rename: with a rename
3231 # rename: with a rename
3220 # meta: file content starts with b'\1\n', the metadata
3232 # meta: file content starts with b'\1\n', the metadata
3221 # header defined in filelog.py, but without a rename
3233 # header defined in filelog.py, but without a rename
3222 # ext: content stored externally
3234 # ext: content stored externally
3223 #
3235 #
3224 # More formally, their differences are shown below:
3236 # More formally, their differences are shown below:
3225 #
3237 #
3226 # | common | rename | meta | ext
3238 # | common | rename | meta | ext
3227 # -------------------------------------------------------
3239 # -------------------------------------------------------
3228 # flags() | 0 | 0 | 0 | not 0
3240 # flags() | 0 | 0 | 0 | not 0
3229 # renamed() | False | True | False | ?
3241 # renamed() | False | True | False | ?
3230 # rawtext[0:2]=='\1\n'| False | True | True | ?
3242 # rawtext[0:2]=='\1\n'| False | True | True | ?
3231 #
3243 #
3232 # "rawtext" means the raw text stored in revlog data, which
3244 # "rawtext" means the raw text stored in revlog data, which
3233 # could be retrieved by "rawdata(rev)". "text"
3245 # could be retrieved by "rawdata(rev)". "text"
3234 # mentioned below is "revision(rev)".
3246 # mentioned below is "revision(rev)".
3235 #
3247 #
3236 # There are 3 different lengths stored physically:
3248 # There are 3 different lengths stored physically:
3237 # 1. L1: rawsize, stored in revlog index
3249 # 1. L1: rawsize, stored in revlog index
3238 # 2. L2: len(rawtext), stored in revlog data
3250 # 2. L2: len(rawtext), stored in revlog data
3239 # 3. L3: len(text), stored in revlog data if flags==0, or
3251 # 3. L3: len(text), stored in revlog data if flags==0, or
3240 # possibly somewhere else if flags!=0
3252 # possibly somewhere else if flags!=0
3241 #
3253 #
3242 # L1 should be equal to L2. L3 could be different from them.
3254 # L1 should be equal to L2. L3 could be different from them.
3243 # "text" may or may not affect commit hash depending on flag
3255 # "text" may or may not affect commit hash depending on flag
3244 # processors (see flagutil.addflagprocessor).
3256 # processors (see flagutil.addflagprocessor).
3245 #
3257 #
3246 # | common | rename | meta | ext
3258 # | common | rename | meta | ext
3247 # -------------------------------------------------
3259 # -------------------------------------------------
3248 # rawsize() | L1 | L1 | L1 | L1
3260 # rawsize() | L1 | L1 | L1 | L1
3249 # size() | L1 | L2-LM | L1(*) | L1 (?)
3261 # size() | L1 | L2-LM | L1(*) | L1 (?)
3250 # len(rawtext) | L2 | L2 | L2 | L2
3262 # len(rawtext) | L2 | L2 | L2 | L2
3251 # len(text) | L2 | L2 | L2 | L3
3263 # len(text) | L2 | L2 | L2 | L3
3252 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3264 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3253 #
3265 #
3254 # LM: length of metadata, depending on rawtext
3266 # LM: length of metadata, depending on rawtext
3255 # (*): not ideal, see comment in filelog.size
3267 # (*): not ideal, see comment in filelog.size
3256 # (?): could be "- len(meta)" if the resolved content has
3268 # (?): could be "- len(meta)" if the resolved content has
3257 # rename metadata
3269 # rename metadata
3258 #
3270 #
3259 # Checks needed to be done:
3271 # Checks needed to be done:
3260 # 1. length check: L1 == L2, in all cases.
3272 # 1. length check: L1 == L2, in all cases.
3261 # 2. hash check: depending on flag processor, we may need to
3273 # 2. hash check: depending on flag processor, we may need to
3262 # use either "text" (external), or "rawtext" (in revlog).
3274 # use either "text" (external), or "rawtext" (in revlog).
3263
3275
3264 try:
3276 try:
3265 skipflags = state.get(b'skipflags', 0)
3277 skipflags = state.get(b'skipflags', 0)
3266 if skipflags:
3278 if skipflags:
3267 skipflags &= self.flags(rev)
3279 skipflags &= self.flags(rev)
3268
3280
3269 _verify_revision(self, skipflags, state, node)
3281 _verify_revision(self, skipflags, state, node)
3270
3282
3271 l1 = self.rawsize(rev)
3283 l1 = self.rawsize(rev)
3272 l2 = len(self.rawdata(node))
3284 l2 = len(self.rawdata(node))
3273
3285
3274 if l1 != l2:
3286 if l1 != l2:
3275 yield revlogproblem(
3287 yield revlogproblem(
3276 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3288 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3277 node=node,
3289 node=node,
3278 )
3290 )
3279
3291
3280 except error.CensoredNodeError:
3292 except error.CensoredNodeError:
3281 if state[b'erroroncensored']:
3293 if state[b'erroroncensored']:
3282 yield revlogproblem(
3294 yield revlogproblem(
3283 error=_(b'censored file data'), node=node
3295 error=_(b'censored file data'), node=node
3284 )
3296 )
3285 state[b'skipread'].add(node)
3297 state[b'skipread'].add(node)
3286 except Exception as e:
3298 except Exception as e:
3287 yield revlogproblem(
3299 yield revlogproblem(
3288 error=_(b'unpacking %s: %s')
3300 error=_(b'unpacking %s: %s')
3289 % (short(node), stringutil.forcebytestr(e)),
3301 % (short(node), stringutil.forcebytestr(e)),
3290 node=node,
3302 node=node,
3291 )
3303 )
3292 state[b'skipread'].add(node)
3304 state[b'skipread'].add(node)
3293
3305
3294 def storageinfo(
3306 def storageinfo(
3295 self,
3307 self,
3296 exclusivefiles=False,
3308 exclusivefiles=False,
3297 sharedfiles=False,
3309 sharedfiles=False,
3298 revisionscount=False,
3310 revisionscount=False,
3299 trackedsize=False,
3311 trackedsize=False,
3300 storedsize=False,
3312 storedsize=False,
3301 ):
3313 ):
3302 d = {}
3314 d = {}
3303
3315
3304 if exclusivefiles:
3316 if exclusivefiles:
3305 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3317 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3306 if not self._inline:
3318 if not self._inline:
3307 d[b'exclusivefiles'].append((self.opener, self._datafile))
3319 d[b'exclusivefiles'].append((self.opener, self._datafile))
3308
3320
3309 if sharedfiles:
3321 if sharedfiles:
3310 d[b'sharedfiles'] = []
3322 d[b'sharedfiles'] = []
3311
3323
3312 if revisionscount:
3324 if revisionscount:
3313 d[b'revisionscount'] = len(self)
3325 d[b'revisionscount'] = len(self)
3314
3326
3315 if trackedsize:
3327 if trackedsize:
3316 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3328 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3317
3329
3318 if storedsize:
3330 if storedsize:
3319 d[b'storedsize'] = sum(
3331 d[b'storedsize'] = sum(
3320 self.opener.stat(path).st_size for path in self.files()
3332 self.opener.stat(path).st_size for path in self.files()
3321 )
3333 )
3322
3334
3323 return d
3335 return d
3324
3336
3325 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3337 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3326 if not self.hassidedata:
3338 if not self.hassidedata:
3327 return
3339 return
3328 # revlog formats with sidedata support does not support inline
3340 # revlog formats with sidedata support does not support inline
3329 assert not self._inline
3341 assert not self._inline
3330 if not helpers[1] and not helpers[2]:
3342 if not helpers[1] and not helpers[2]:
3331 # Nothing to generate or remove
3343 # Nothing to generate or remove
3332 return
3344 return
3333
3345
3334 new_entries = []
3346 new_entries = []
3335 # append the new sidedata
3347 # append the new sidedata
3336 with self._writing(transaction):
3348 with self._writing(transaction):
3337 ifh, dfh = self._writinghandles
3349 ifh, dfh = self._writinghandles
3338 if self._docket is not None:
3350 if self._docket is not None:
3339 dfh.seek(self._docket.data_end, os.SEEK_SET)
3351 dfh.seek(self._docket.data_end, os.SEEK_SET)
3340 else:
3352 else:
3341 dfh.seek(0, os.SEEK_END)
3353 dfh.seek(0, os.SEEK_END)
3342
3354
3343 current_offset = dfh.tell()
3355 current_offset = dfh.tell()
3344 for rev in range(startrev, endrev + 1):
3356 for rev in range(startrev, endrev + 1):
3345 entry = self.index[rev]
3357 entry = self.index[rev]
3346 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3358 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3347 store=self,
3359 store=self,
3348 sidedata_helpers=helpers,
3360 sidedata_helpers=helpers,
3349 sidedata={},
3361 sidedata={},
3350 rev=rev,
3362 rev=rev,
3351 )
3363 )
3352
3364
3353 serialized_sidedata = sidedatautil.serialize_sidedata(
3365 serialized_sidedata = sidedatautil.serialize_sidedata(
3354 new_sidedata
3366 new_sidedata
3355 )
3367 )
3356 if entry[8] != 0 or entry[9] != 0:
3368 if entry[8] != 0 or entry[9] != 0:
3357 # rewriting entries that already have sidedata is not
3369 # rewriting entries that already have sidedata is not
3358 # supported yet, because it introduces garbage data in the
3370 # supported yet, because it introduces garbage data in the
3359 # revlog.
3371 # revlog.
3360 msg = b"rewriting existing sidedata is not supported yet"
3372 msg = b"rewriting existing sidedata is not supported yet"
3361 raise error.Abort(msg)
3373 raise error.Abort(msg)
3362
3374
3363 # Apply (potential) flags to add and to remove after running
3375 # Apply (potential) flags to add and to remove after running
3364 # the sidedata helpers
3376 # the sidedata helpers
3365 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3377 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3366 entry_update = (
3378 entry_update = (
3367 current_offset,
3379 current_offset,
3368 len(serialized_sidedata),
3380 len(serialized_sidedata),
3369 new_offset_flags,
3381 new_offset_flags,
3370 )
3382 )
3371
3383
3372 # the sidedata computation might have move the file cursors around
3384 # the sidedata computation might have move the file cursors around
3373 dfh.seek(current_offset, os.SEEK_SET)
3385 dfh.seek(current_offset, os.SEEK_SET)
3374 dfh.write(serialized_sidedata)
3386 dfh.write(serialized_sidedata)
3375 new_entries.append(entry_update)
3387 new_entries.append(entry_update)
3376 current_offset += len(serialized_sidedata)
3388 current_offset += len(serialized_sidedata)
3377 if self._docket is not None:
3389 if self._docket is not None:
3378 self._docket.data_end = dfh.tell()
3390 self._docket.data_end = dfh.tell()
3379
3391
3380 # rewrite the new index entries
3392 # rewrite the new index entries
3381 ifh.seek(startrev * self.index.entry_size)
3393 ifh.seek(startrev * self.index.entry_size)
3382 for i, e in enumerate(new_entries):
3394 for i, e in enumerate(new_entries):
3383 rev = startrev + i
3395 rev = startrev + i
3384 self.index.replace_sidedata_info(rev, *e)
3396 self.index.replace_sidedata_info(rev, *e)
3385 packed = self.index.entry_binary(rev)
3397 packed = self.index.entry_binary(rev)
3386 if rev == 0 and self._docket is None:
3398 if rev == 0 and self._docket is None:
3387 header = self._format_flags | self._format_version
3399 header = self._format_flags | self._format_version
3388 header = self.index.pack_header(header)
3400 header = self.index.pack_header(header)
3389 packed = header + packed
3401 packed = header + packed
3390 ifh.write(packed)
3402 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now