##// END OF EJS Templates
changelogv2: use a dedicated version number...
marmoute -
r48040:921648d3 default
parent child Browse files
Show More
@@ -1,3442 +1,3445 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
41 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
42 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
43 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
44 INDEX_HEADER,
45 INDEX_HEADER,
45 KIND_CHANGELOG,
46 KIND_CHANGELOG,
46 REVLOGV0,
47 REVLOGV0,
47 REVLOGV1,
48 REVLOGV1,
48 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
49 REVLOGV2,
50 REVLOGV2,
50 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
51 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
54 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
55 )
56 )
56 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
57 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
59 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
60 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
63 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 )
65 )
65 from .thirdparty import attr
66 from .thirdparty import attr
66 from . import (
67 from . import (
67 ancestor,
68 ancestor,
68 dagop,
69 dagop,
69 error,
70 error,
70 mdiff,
71 mdiff,
71 policy,
72 policy,
72 pycompat,
73 pycompat,
73 templatefilters,
74 templatefilters,
74 util,
75 util,
75 )
76 )
76 from .interfaces import (
77 from .interfaces import (
77 repository,
78 repository,
78 util as interfaceutil,
79 util as interfaceutil,
79 )
80 )
80 from .revlogutils import (
81 from .revlogutils import (
81 deltas as deltautil,
82 deltas as deltautil,
82 docket as docketutil,
83 docket as docketutil,
83 flagutil,
84 flagutil,
84 nodemap as nodemaputil,
85 nodemap as nodemaputil,
85 revlogv0,
86 revlogv0,
86 sidedata as sidedatautil,
87 sidedata as sidedatautil,
87 )
88 )
88 from .utils import (
89 from .utils import (
89 storageutil,
90 storageutil,
90 stringutil,
91 stringutil,
91 )
92 )
92
93
93 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
94 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
95
96
96 REVLOGV0
97 REVLOGV0
97 REVLOGV1
98 REVLOGV1
98 REVLOGV2
99 REVLOGV2
99 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
100 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
101 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
104 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
105 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
106 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
107 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
108 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
109 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
110 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
112 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113
114
114 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
115 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
116 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
117 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
118
119
119 # Aliased for performance.
120 # Aliased for performance.
120 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
121
122
122 # max size of revlog with inline data
123 # max size of revlog with inline data
123 _maxinline = 131072
124 _maxinline = 131072
124 _chunksize = 1048576
125 _chunksize = 1048576
125
126
126 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
127 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
128 return text, False
129 return text, False
129
130
130
131
131 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
132 return text, False
133 return text, False
133
134
134
135
135 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
136 return False
137 return False
137
138
138
139
139 ellipsisprocessor = (
140 ellipsisprocessor = (
140 ellipsisreadprocessor,
141 ellipsisreadprocessor,
141 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
142 ellipsisrawprocessor,
143 ellipsisrawprocessor,
143 )
144 )
144
145
145
146
146 def offset_type(offset, type):
147 def offset_type(offset, type):
147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
149 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
150
151
151
152
152 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
155 if skipflags:
156 if skipflags:
156 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
157 else:
158 else:
158 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
159 rl.revision(node)
160 rl.revision(node)
160
161
161
162
162 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
163 #
164 #
164 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
165 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
166 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
167 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
169 )
170 )
170
171
171
172
172 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
173 class _revisioninfo(object):
174 class _revisioninfo(object):
174 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
175 node: expected hash of the revision
176 node: expected hash of the revision
176 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
177 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
178 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
179 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
180
181
181 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
182 """
183 """
183
184
184 node = attr.ib()
185 node = attr.ib()
185 p1 = attr.ib()
186 p1 = attr.ib()
186 p2 = attr.ib()
187 p2 = attr.ib()
187 btext = attr.ib()
188 btext = attr.ib()
188 textlen = attr.ib()
189 textlen = attr.ib()
189 cachedelta = attr.ib()
190 cachedelta = attr.ib()
190 flags = attr.ib()
191 flags = attr.ib()
191
192
192
193
193 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @attr.s(slots=True)
195 @attr.s(slots=True)
195 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
196 node = attr.ib()
197 node = attr.ib()
197 p1node = attr.ib()
198 p1node = attr.ib()
198 p2node = attr.ib()
199 p2node = attr.ib()
199 basenode = attr.ib()
200 basenode = attr.ib()
200 flags = attr.ib()
201 flags = attr.ib()
201 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
202 revision = attr.ib()
203 revision = attr.ib()
203 delta = attr.ib()
204 delta = attr.ib()
204 sidedata = attr.ib()
205 sidedata = attr.ib()
205 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
206 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
207
208
208
209
209 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @attr.s(frozen=True)
211 @attr.s(frozen=True)
211 class revlogproblem(object):
212 class revlogproblem(object):
212 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
213 error = attr.ib(default=None)
214 error = attr.ib(default=None)
214 node = attr.ib(default=None)
215 node = attr.ib(default=None)
215
216
216
217
217 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
218 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
219 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
220 return index, cache
221 return index, cache
221
222
222
223
223 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
224 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
225 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 return index, cache
227 return index, cache
227
228
228
229
229 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
230 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
230
231
231 def parse_index_v1_nodemap(data, inline):
232 def parse_index_v1_nodemap(data, inline):
232 index, cache = parsers.parse_index_devel_nodemap(data, inline)
233 index, cache = parsers.parse_index_devel_nodemap(data, inline)
233 return index, cache
234 return index, cache
234
235
235
236
236 else:
237 else:
237 parse_index_v1_nodemap = None
238 parse_index_v1_nodemap = None
238
239
239
240
240 def parse_index_v1_mixed(data, inline):
241 def parse_index_v1_mixed(data, inline):
241 index, cache = parse_index_v1(data, inline)
242 index, cache = parse_index_v1(data, inline)
242 return rustrevlog.MixedIndex(index), cache
243 return rustrevlog.MixedIndex(index), cache
243
244
244
245
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # signed integer)
247 # signed integer)
247 _maxentrysize = 0x7FFFFFFF
248 _maxentrysize = 0x7FFFFFFF
248
249
249
250
250 class revlog(object):
251 class revlog(object):
251 """
252 """
252 the underlying revision storage object
253 the underlying revision storage object
253
254
254 A revlog consists of two parts, an index and the revision data.
255 A revlog consists of two parts, an index and the revision data.
255
256
256 The index is a file with a fixed record size containing
257 The index is a file with a fixed record size containing
257 information on each revision, including its nodeid (hash), the
258 information on each revision, including its nodeid (hash), the
258 nodeids of its parents, the position and offset of its data within
259 nodeids of its parents, the position and offset of its data within
259 the data file, and the revision it's based on. Finally, each entry
260 the data file, and the revision it's based on. Finally, each entry
260 contains a linkrev entry that can serve as a pointer to external
261 contains a linkrev entry that can serve as a pointer to external
261 data.
262 data.
262
263
263 The revision data itself is a linear collection of data chunks.
264 The revision data itself is a linear collection of data chunks.
264 Each chunk represents a revision and is usually represented as a
265 Each chunk represents a revision and is usually represented as a
265 delta against the previous chunk. To bound lookup time, runs of
266 delta against the previous chunk. To bound lookup time, runs of
266 deltas are limited to about 2 times the length of the original
267 deltas are limited to about 2 times the length of the original
267 version data. This makes retrieval of a version proportional to
268 version data. This makes retrieval of a version proportional to
268 its size, or O(1) relative to the number of revisions.
269 its size, or O(1) relative to the number of revisions.
269
270
270 Both pieces of the revlog are written to in an append-only
271 Both pieces of the revlog are written to in an append-only
271 fashion, which means we never need to rewrite a file to insert or
272 fashion, which means we never need to rewrite a file to insert or
272 remove data, and can use some simple techniques to avoid the need
273 remove data, and can use some simple techniques to avoid the need
273 for locking while reading.
274 for locking while reading.
274
275
275 If checkambig, indexfile is opened with checkambig=True at
276 If checkambig, indexfile is opened with checkambig=True at
276 writing, to avoid file stat ambiguity.
277 writing, to avoid file stat ambiguity.
277
278
278 If mmaplargeindex is True, and an mmapindexthreshold is set, the
279 If mmaplargeindex is True, and an mmapindexthreshold is set, the
279 index will be mmapped rather than read if it is larger than the
280 index will be mmapped rather than read if it is larger than the
280 configured threshold.
281 configured threshold.
281
282
282 If censorable is True, the revlog can have censored revisions.
283 If censorable is True, the revlog can have censored revisions.
283
284
284 If `upperboundcomp` is not None, this is the expected maximal gain from
285 If `upperboundcomp` is not None, this is the expected maximal gain from
285 compression for the data content.
286 compression for the data content.
286
287
287 `concurrencychecker` is an optional function that receives 3 arguments: a
288 `concurrencychecker` is an optional function that receives 3 arguments: a
288 file handle, a filename, and an expected position. It should check whether
289 file handle, a filename, and an expected position. It should check whether
289 the current position in the file handle is valid, and log/warn/fail (by
290 the current position in the file handle is valid, and log/warn/fail (by
290 raising).
291 raising).
291
292
292
293
293 Internal details
294 Internal details
294 ----------------
295 ----------------
295
296
296 A large part of the revlog logic deals with revisions' "index entries", tuple
297 A large part of the revlog logic deals with revisions' "index entries", tuple
297 objects that contains the same "items" whatever the revlog version.
298 objects that contains the same "items" whatever the revlog version.
298 Different versions will have different ways of storing these items (sometimes
299 Different versions will have different ways of storing these items (sometimes
299 not having them at all), but the tuple will always be the same. New fields
300 not having them at all), but the tuple will always be the same. New fields
300 are usually added at the end to avoid breaking existing code that relies
301 are usually added at the end to avoid breaking existing code that relies
301 on the existing order. The field are defined as follows:
302 on the existing order. The field are defined as follows:
302
303
303 [0] offset:
304 [0] offset:
304 The byte index of the start of revision data chunk.
305 The byte index of the start of revision data chunk.
305 That value is shifted up by 16 bits. use "offset = field >> 16" to
306 That value is shifted up by 16 bits. use "offset = field >> 16" to
306 retrieve it.
307 retrieve it.
307
308
308 flags:
309 flags:
309 A flag field that carries special information or changes the behavior
310 A flag field that carries special information or changes the behavior
310 of the revision. (see `REVIDX_*` constants for details)
311 of the revision. (see `REVIDX_*` constants for details)
311 The flag field only occupies the first 16 bits of this field,
312 The flag field only occupies the first 16 bits of this field,
312 use "flags = field & 0xFFFF" to retrieve the value.
313 use "flags = field & 0xFFFF" to retrieve the value.
313
314
314 [1] compressed length:
315 [1] compressed length:
315 The size, in bytes, of the chunk on disk
316 The size, in bytes, of the chunk on disk
316
317
317 [2] uncompressed length:
318 [2] uncompressed length:
318 The size, in bytes, of the full revision once reconstructed.
319 The size, in bytes, of the full revision once reconstructed.
319
320
320 [3] base rev:
321 [3] base rev:
321 Either the base of the revision delta chain (without general
322 Either the base of the revision delta chain (without general
322 delta), or the base of the delta (stored in the data chunk)
323 delta), or the base of the delta (stored in the data chunk)
323 with general delta.
324 with general delta.
324
325
325 [4] link rev:
326 [4] link rev:
326 Changelog revision number of the changeset introducing this
327 Changelog revision number of the changeset introducing this
327 revision.
328 revision.
328
329
329 [5] parent 1 rev:
330 [5] parent 1 rev:
330 Revision number of the first parent
331 Revision number of the first parent
331
332
332 [6] parent 2 rev:
333 [6] parent 2 rev:
333 Revision number of the second parent
334 Revision number of the second parent
334
335
335 [7] node id:
336 [7] node id:
336 The node id of the current revision
337 The node id of the current revision
337
338
338 [8] sidedata offset:
339 [8] sidedata offset:
339 The byte index of the start of the revision's side-data chunk.
340 The byte index of the start of the revision's side-data chunk.
340
341
341 [9] sidedata chunk length:
342 [9] sidedata chunk length:
342 The size, in bytes, of the revision's side-data chunk.
343 The size, in bytes, of the revision's side-data chunk.
343
344
344 [10] data compression mode:
345 [10] data compression mode:
345 two bits that detail the way the data chunk is compressed on disk.
346 two bits that detail the way the data chunk is compressed on disk.
346 (see "COMP_MODE_*" constants for details). For revlog version 0 and
347 (see "COMP_MODE_*" constants for details). For revlog version 0 and
347 1 this will always be COMP_MODE_INLINE.
348 1 this will always be COMP_MODE_INLINE.
348
349
349 [11] side-data compression mode:
350 [11] side-data compression mode:
350 two bits that detail the way the sidedata chunk is compressed on disk.
351 two bits that detail the way the sidedata chunk is compressed on disk.
351 (see "COMP_MODE_*" constants for details)
352 (see "COMP_MODE_*" constants for details)
352 """
353 """
353
354
354 _flagserrorclass = error.RevlogError
355 _flagserrorclass = error.RevlogError
355
356
356 def __init__(
357 def __init__(
357 self,
358 self,
358 opener,
359 opener,
359 target,
360 target,
360 radix,
361 radix,
361 postfix=None, # only exist for `tmpcensored` now
362 postfix=None, # only exist for `tmpcensored` now
362 checkambig=False,
363 checkambig=False,
363 mmaplargeindex=False,
364 mmaplargeindex=False,
364 censorable=False,
365 censorable=False,
365 upperboundcomp=None,
366 upperboundcomp=None,
366 persistentnodemap=False,
367 persistentnodemap=False,
367 concurrencychecker=None,
368 concurrencychecker=None,
368 trypending=False,
369 trypending=False,
369 ):
370 ):
370 """
371 """
371 create a revlog object
372 create a revlog object
372
373
373 opener is a function that abstracts the file opening operation
374 opener is a function that abstracts the file opening operation
374 and can be used to implement COW semantics or the like.
375 and can be used to implement COW semantics or the like.
375
376
376 `target`: a (KIND, ID) tuple that identify the content stored in
377 `target`: a (KIND, ID) tuple that identify the content stored in
377 this revlog. It help the rest of the code to understand what the revlog
378 this revlog. It help the rest of the code to understand what the revlog
378 is about without having to resort to heuristic and index filename
379 is about without having to resort to heuristic and index filename
379 analysis. Note: that this must be reliably be set by normal code, but
380 analysis. Note: that this must be reliably be set by normal code, but
380 that test, debug, or performance measurement code might not set this to
381 that test, debug, or performance measurement code might not set this to
381 accurate value.
382 accurate value.
382 """
383 """
383 self.upperboundcomp = upperboundcomp
384 self.upperboundcomp = upperboundcomp
384
385
385 self.radix = radix
386 self.radix = radix
386
387
387 self._docket_file = None
388 self._docket_file = None
388 self._indexfile = None
389 self._indexfile = None
389 self._datafile = None
390 self._datafile = None
390 self._nodemap_file = None
391 self._nodemap_file = None
391 self.postfix = postfix
392 self.postfix = postfix
392 self._trypending = trypending
393 self._trypending = trypending
393 self.opener = opener
394 self.opener = opener
394 if persistentnodemap:
395 if persistentnodemap:
395 self._nodemap_file = nodemaputil.get_nodemap_file(self)
396 self._nodemap_file = nodemaputil.get_nodemap_file(self)
396
397
397 assert target[0] in ALL_KINDS
398 assert target[0] in ALL_KINDS
398 assert len(target) == 2
399 assert len(target) == 2
399 self.target = target
400 self.target = target
400 # When True, indexfile is opened with checkambig=True at writing, to
401 # When True, indexfile is opened with checkambig=True at writing, to
401 # avoid file stat ambiguity.
402 # avoid file stat ambiguity.
402 self._checkambig = checkambig
403 self._checkambig = checkambig
403 self._mmaplargeindex = mmaplargeindex
404 self._mmaplargeindex = mmaplargeindex
404 self._censorable = censorable
405 self._censorable = censorable
405 # 3-tuple of (node, rev, text) for a raw revision.
406 # 3-tuple of (node, rev, text) for a raw revision.
406 self._revisioncache = None
407 self._revisioncache = None
407 # Maps rev to chain base rev.
408 # Maps rev to chain base rev.
408 self._chainbasecache = util.lrucachedict(100)
409 self._chainbasecache = util.lrucachedict(100)
409 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
410 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
410 self._chunkcache = (0, b'')
411 self._chunkcache = (0, b'')
411 # How much data to read and cache into the raw revlog data cache.
412 # How much data to read and cache into the raw revlog data cache.
412 self._chunkcachesize = 65536
413 self._chunkcachesize = 65536
413 self._maxchainlen = None
414 self._maxchainlen = None
414 self._deltabothparents = True
415 self._deltabothparents = True
415 self.index = None
416 self.index = None
416 self._docket = None
417 self._docket = None
417 self._nodemap_docket = None
418 self._nodemap_docket = None
418 # Mapping of partial identifiers to full nodes.
419 # Mapping of partial identifiers to full nodes.
419 self._pcache = {}
420 self._pcache = {}
420 # Mapping of revision integer to full node.
421 # Mapping of revision integer to full node.
421 self._compengine = b'zlib'
422 self._compengine = b'zlib'
422 self._compengineopts = {}
423 self._compengineopts = {}
423 self._maxdeltachainspan = -1
424 self._maxdeltachainspan = -1
424 self._withsparseread = False
425 self._withsparseread = False
425 self._sparserevlog = False
426 self._sparserevlog = False
426 self.hassidedata = False
427 self.hassidedata = False
427 self._srdensitythreshold = 0.50
428 self._srdensitythreshold = 0.50
428 self._srmingapsize = 262144
429 self._srmingapsize = 262144
429
430
430 # Make copy of flag processors so each revlog instance can support
431 # Make copy of flag processors so each revlog instance can support
431 # custom flags.
432 # custom flags.
432 self._flagprocessors = dict(flagutil.flagprocessors)
433 self._flagprocessors = dict(flagutil.flagprocessors)
433
434
434 # 2-tuple of file handles being used for active writing.
435 # 2-tuple of file handles being used for active writing.
435 self._writinghandles = None
436 self._writinghandles = None
436 # prevent nesting of addgroup
437 # prevent nesting of addgroup
437 self._adding_group = None
438 self._adding_group = None
438
439
439 self._loadindex()
440 self._loadindex()
440
441
441 self._concurrencychecker = concurrencychecker
442 self._concurrencychecker = concurrencychecker
442
443
443 def _init_opts(self):
444 def _init_opts(self):
444 """process options (from above/config) to setup associated default revlog mode
445 """process options (from above/config) to setup associated default revlog mode
445
446
446 These values might be affected when actually reading on disk information.
447 These values might be affected when actually reading on disk information.
447
448
448 The relevant values are returned for use in _loadindex().
449 The relevant values are returned for use in _loadindex().
449
450
450 * newversionflags:
451 * newversionflags:
451 version header to use if we need to create a new revlog
452 version header to use if we need to create a new revlog
452
453
453 * mmapindexthreshold:
454 * mmapindexthreshold:
454 minimal index size for start to use mmap
455 minimal index size for start to use mmap
455
456
456 * force_nodemap:
457 * force_nodemap:
457 force the usage of a "development" version of the nodemap code
458 force the usage of a "development" version of the nodemap code
458 """
459 """
459 mmapindexthreshold = None
460 mmapindexthreshold = None
460 opts = self.opener.options
461 opts = self.opener.options
461
462
462 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
463 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
463 new_header = REVLOGV2
464 new_header = CHANGELOGV2
464 elif b'revlogv2' in opts:
465 elif b'revlogv2' in opts:
465 new_header = REVLOGV2
466 new_header = REVLOGV2
466 elif b'revlogv1' in opts:
467 elif b'revlogv1' in opts:
467 new_header = REVLOGV1 | FLAG_INLINE_DATA
468 new_header = REVLOGV1 | FLAG_INLINE_DATA
468 if b'generaldelta' in opts:
469 if b'generaldelta' in opts:
469 new_header |= FLAG_GENERALDELTA
470 new_header |= FLAG_GENERALDELTA
470 elif b'revlogv0' in self.opener.options:
471 elif b'revlogv0' in self.opener.options:
471 new_header = REVLOGV0
472 new_header = REVLOGV0
472 else:
473 else:
473 new_header = REVLOG_DEFAULT_VERSION
474 new_header = REVLOG_DEFAULT_VERSION
474
475
475 if b'chunkcachesize' in opts:
476 if b'chunkcachesize' in opts:
476 self._chunkcachesize = opts[b'chunkcachesize']
477 self._chunkcachesize = opts[b'chunkcachesize']
477 if b'maxchainlen' in opts:
478 if b'maxchainlen' in opts:
478 self._maxchainlen = opts[b'maxchainlen']
479 self._maxchainlen = opts[b'maxchainlen']
479 if b'deltabothparents' in opts:
480 if b'deltabothparents' in opts:
480 self._deltabothparents = opts[b'deltabothparents']
481 self._deltabothparents = opts[b'deltabothparents']
481 self._lazydelta = bool(opts.get(b'lazydelta', True))
482 self._lazydelta = bool(opts.get(b'lazydelta', True))
482 self._lazydeltabase = False
483 self._lazydeltabase = False
483 if self._lazydelta:
484 if self._lazydelta:
484 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
485 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
485 if b'compengine' in opts:
486 if b'compengine' in opts:
486 self._compengine = opts[b'compengine']
487 self._compengine = opts[b'compengine']
487 if b'zlib.level' in opts:
488 if b'zlib.level' in opts:
488 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
489 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
489 if b'zstd.level' in opts:
490 if b'zstd.level' in opts:
490 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
491 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
491 if b'maxdeltachainspan' in opts:
492 if b'maxdeltachainspan' in opts:
492 self._maxdeltachainspan = opts[b'maxdeltachainspan']
493 self._maxdeltachainspan = opts[b'maxdeltachainspan']
493 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
494 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
494 mmapindexthreshold = opts[b'mmapindexthreshold']
495 mmapindexthreshold = opts[b'mmapindexthreshold']
495 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
496 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
496 withsparseread = bool(opts.get(b'with-sparse-read', False))
497 withsparseread = bool(opts.get(b'with-sparse-read', False))
497 # sparse-revlog forces sparse-read
498 # sparse-revlog forces sparse-read
498 self._withsparseread = self._sparserevlog or withsparseread
499 self._withsparseread = self._sparserevlog or withsparseread
499 if b'sparse-read-density-threshold' in opts:
500 if b'sparse-read-density-threshold' in opts:
500 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
501 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
501 if b'sparse-read-min-gap-size' in opts:
502 if b'sparse-read-min-gap-size' in opts:
502 self._srmingapsize = opts[b'sparse-read-min-gap-size']
503 self._srmingapsize = opts[b'sparse-read-min-gap-size']
503 if opts.get(b'enableellipsis'):
504 if opts.get(b'enableellipsis'):
504 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
505 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
505
506
506 # revlog v0 doesn't have flag processors
507 # revlog v0 doesn't have flag processors
507 for flag, processor in pycompat.iteritems(
508 for flag, processor in pycompat.iteritems(
508 opts.get(b'flagprocessors', {})
509 opts.get(b'flagprocessors', {})
509 ):
510 ):
510 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
511 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
511
512
512 if self._chunkcachesize <= 0:
513 if self._chunkcachesize <= 0:
513 raise error.RevlogError(
514 raise error.RevlogError(
514 _(b'revlog chunk cache size %r is not greater than 0')
515 _(b'revlog chunk cache size %r is not greater than 0')
515 % self._chunkcachesize
516 % self._chunkcachesize
516 )
517 )
517 elif self._chunkcachesize & (self._chunkcachesize - 1):
518 elif self._chunkcachesize & (self._chunkcachesize - 1):
518 raise error.RevlogError(
519 raise error.RevlogError(
519 _(b'revlog chunk cache size %r is not a power of 2')
520 _(b'revlog chunk cache size %r is not a power of 2')
520 % self._chunkcachesize
521 % self._chunkcachesize
521 )
522 )
522 force_nodemap = opts.get(b'devel-force-nodemap', False)
523 force_nodemap = opts.get(b'devel-force-nodemap', False)
523 return new_header, mmapindexthreshold, force_nodemap
524 return new_header, mmapindexthreshold, force_nodemap
524
525
525 def _get_data(self, filepath, mmap_threshold, size=None):
526 def _get_data(self, filepath, mmap_threshold, size=None):
526 """return a file content with or without mmap
527 """return a file content with or without mmap
527
528
528 If the file is missing return the empty string"""
529 If the file is missing return the empty string"""
529 try:
530 try:
530 with self.opener(filepath) as fp:
531 with self.opener(filepath) as fp:
531 if mmap_threshold is not None:
532 if mmap_threshold is not None:
532 file_size = self.opener.fstat(fp).st_size
533 file_size = self.opener.fstat(fp).st_size
533 if file_size >= mmap_threshold:
534 if file_size >= mmap_threshold:
534 if size is not None:
535 if size is not None:
535 # avoid potentiel mmap crash
536 # avoid potentiel mmap crash
536 size = min(file_size, size)
537 size = min(file_size, size)
537 # TODO: should .close() to release resources without
538 # TODO: should .close() to release resources without
538 # relying on Python GC
539 # relying on Python GC
539 if size is None:
540 if size is None:
540 return util.buffer(util.mmapread(fp))
541 return util.buffer(util.mmapread(fp))
541 else:
542 else:
542 return util.buffer(util.mmapread(fp, size))
543 return util.buffer(util.mmapread(fp, size))
543 if size is None:
544 if size is None:
544 return fp.read()
545 return fp.read()
545 else:
546 else:
546 return fp.read(size)
547 return fp.read(size)
547 except IOError as inst:
548 except IOError as inst:
548 if inst.errno != errno.ENOENT:
549 if inst.errno != errno.ENOENT:
549 raise
550 raise
550 return b''
551 return b''
551
552
552 def _loadindex(self):
553 def _loadindex(self):
553
554
554 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
555 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
555
556
556 if self.postfix is not None:
557 if self.postfix is not None:
557 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
558 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
558 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
559 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
559 entry_point = b'%s.i.a' % self.radix
560 entry_point = b'%s.i.a' % self.radix
560 else:
561 else:
561 entry_point = b'%s.i' % self.radix
562 entry_point = b'%s.i' % self.radix
562
563
563 entry_data = b''
564 entry_data = b''
564 self._initempty = True
565 self._initempty = True
565 entry_data = self._get_data(entry_point, mmapindexthreshold)
566 entry_data = self._get_data(entry_point, mmapindexthreshold)
566 if len(entry_data) > 0:
567 if len(entry_data) > 0:
567 header = INDEX_HEADER.unpack(entry_data[:4])[0]
568 header = INDEX_HEADER.unpack(entry_data[:4])[0]
568 self._initempty = False
569 self._initempty = False
569 else:
570 else:
570 header = new_header
571 header = new_header
571
572
572 self._format_flags = header & ~0xFFFF
573 self._format_flags = header & ~0xFFFF
573 self._format_version = header & 0xFFFF
574 self._format_version = header & 0xFFFF
574
575
575 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
576 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
576 if supported_flags is None:
577 if supported_flags is None:
577 msg = _(b'unknown version (%d) in revlog %s')
578 msg = _(b'unknown version (%d) in revlog %s')
578 msg %= (self._format_version, self.display_id)
579 msg %= (self._format_version, self.display_id)
579 raise error.RevlogError(msg)
580 raise error.RevlogError(msg)
580 elif self._format_flags & ~supported_flags:
581 elif self._format_flags & ~supported_flags:
581 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
582 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
582 display_flag = self._format_flags >> 16
583 display_flag = self._format_flags >> 16
583 msg %= (display_flag, self._format_version, self.display_id)
584 msg %= (display_flag, self._format_version, self.display_id)
584 raise error.RevlogError(msg)
585 raise error.RevlogError(msg)
585
586
586 features = FEATURES_BY_VERSION[self._format_version]
587 features = FEATURES_BY_VERSION[self._format_version]
587 self._inline = features[b'inline'](self._format_flags)
588 self._inline = features[b'inline'](self._format_flags)
588 self._generaldelta = features[b'generaldelta'](self._format_flags)
589 self._generaldelta = features[b'generaldelta'](self._format_flags)
589 self.hassidedata = features[b'sidedata']
590 self.hassidedata = features[b'sidedata']
590
591
591 if not features[b'docket']:
592 if not features[b'docket']:
592 self._indexfile = entry_point
593 self._indexfile = entry_point
593 index_data = entry_data
594 index_data = entry_data
594 else:
595 else:
595 self._docket_file = entry_point
596 self._docket_file = entry_point
596 if self._initempty:
597 if self._initempty:
597 self._docket = docketutil.default_docket(self, header)
598 self._docket = docketutil.default_docket(self, header)
598 else:
599 else:
599 self._docket = docketutil.parse_docket(
600 self._docket = docketutil.parse_docket(
600 self, entry_data, use_pending=self._trypending
601 self, entry_data, use_pending=self._trypending
601 )
602 )
602 self._indexfile = self._docket.index_filepath()
603 self._indexfile = self._docket.index_filepath()
603 index_data = b''
604 index_data = b''
604 index_size = self._docket.index_end
605 index_size = self._docket.index_end
605 if index_size > 0:
606 if index_size > 0:
606 index_data = self._get_data(
607 index_data = self._get_data(
607 self._indexfile, mmapindexthreshold, size=index_size
608 self._indexfile, mmapindexthreshold, size=index_size
608 )
609 )
609 if len(index_data) < index_size:
610 if len(index_data) < index_size:
610 msg = _(b'too few index data for %s: got %d, expected %d')
611 msg = _(b'too few index data for %s: got %d, expected %d')
611 msg %= (self.display_id, len(index_data), index_size)
612 msg %= (self.display_id, len(index_data), index_size)
612 raise error.RevlogError(msg)
613 raise error.RevlogError(msg)
613
614
614 self._inline = False
615 self._inline = False
615 # generaldelta implied by version 2 revlogs.
616 # generaldelta implied by version 2 revlogs.
616 self._generaldelta = True
617 self._generaldelta = True
617 # the logic for persistent nodemap will be dealt with within the
618 # the logic for persistent nodemap will be dealt with within the
618 # main docket, so disable it for now.
619 # main docket, so disable it for now.
619 self._nodemap_file = None
620 self._nodemap_file = None
620
621
621 if self.postfix is None:
622 if self.postfix is None:
622 self._datafile = b'%s.d' % self.radix
623 self._datafile = b'%s.d' % self.radix
623 else:
624 else:
624 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
625 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
625
626
626 self.nodeconstants = sha1nodeconstants
627 self.nodeconstants = sha1nodeconstants
627 self.nullid = self.nodeconstants.nullid
628 self.nullid = self.nodeconstants.nullid
628
629
629 # sparse-revlog can't be on without general-delta (issue6056)
630 # sparse-revlog can't be on without general-delta (issue6056)
630 if not self._generaldelta:
631 if not self._generaldelta:
631 self._sparserevlog = False
632 self._sparserevlog = False
632
633
633 self._storedeltachains = True
634 self._storedeltachains = True
634
635
635 devel_nodemap = (
636 devel_nodemap = (
636 self._nodemap_file
637 self._nodemap_file
637 and force_nodemap
638 and force_nodemap
638 and parse_index_v1_nodemap is not None
639 and parse_index_v1_nodemap is not None
639 )
640 )
640
641
641 use_rust_index = False
642 use_rust_index = False
642 if rustrevlog is not None:
643 if rustrevlog is not None:
643 if self._nodemap_file is not None:
644 if self._nodemap_file is not None:
644 use_rust_index = True
645 use_rust_index = True
645 else:
646 else:
646 use_rust_index = self.opener.options.get(b'rust.index')
647 use_rust_index = self.opener.options.get(b'rust.index')
647
648
648 self._parse_index = parse_index_v1
649 self._parse_index = parse_index_v1
649 if self._format_version == REVLOGV0:
650 if self._format_version == REVLOGV0:
650 self._parse_index = revlogv0.parse_index_v0
651 self._parse_index = revlogv0.parse_index_v0
651 elif self._format_version == REVLOGV2:
652 elif self._format_version == REVLOGV2:
652 self._parse_index = parse_index_v2
653 self._parse_index = parse_index_v2
654 elif self._format_version == CHANGELOGV2:
655 self._parse_index = parse_index_v2
653 elif devel_nodemap:
656 elif devel_nodemap:
654 self._parse_index = parse_index_v1_nodemap
657 self._parse_index = parse_index_v1_nodemap
655 elif use_rust_index:
658 elif use_rust_index:
656 self._parse_index = parse_index_v1_mixed
659 self._parse_index = parse_index_v1_mixed
657 try:
660 try:
658 d = self._parse_index(index_data, self._inline)
661 d = self._parse_index(index_data, self._inline)
659 index, _chunkcache = d
662 index, _chunkcache = d
660 use_nodemap = (
663 use_nodemap = (
661 not self._inline
664 not self._inline
662 and self._nodemap_file is not None
665 and self._nodemap_file is not None
663 and util.safehasattr(index, 'update_nodemap_data')
666 and util.safehasattr(index, 'update_nodemap_data')
664 )
667 )
665 if use_nodemap:
668 if use_nodemap:
666 nodemap_data = nodemaputil.persisted_data(self)
669 nodemap_data = nodemaputil.persisted_data(self)
667 if nodemap_data is not None:
670 if nodemap_data is not None:
668 docket = nodemap_data[0]
671 docket = nodemap_data[0]
669 if (
672 if (
670 len(d[0]) > docket.tip_rev
673 len(d[0]) > docket.tip_rev
671 and d[0][docket.tip_rev][7] == docket.tip_node
674 and d[0][docket.tip_rev][7] == docket.tip_node
672 ):
675 ):
673 # no changelog tampering
676 # no changelog tampering
674 self._nodemap_docket = docket
677 self._nodemap_docket = docket
675 index.update_nodemap_data(*nodemap_data)
678 index.update_nodemap_data(*nodemap_data)
676 except (ValueError, IndexError):
679 except (ValueError, IndexError):
677 raise error.RevlogError(
680 raise error.RevlogError(
678 _(b"index %s is corrupted") % self.display_id
681 _(b"index %s is corrupted") % self.display_id
679 )
682 )
680 self.index, self._chunkcache = d
683 self.index, self._chunkcache = d
681 if not self._chunkcache:
684 if not self._chunkcache:
682 self._chunkclear()
685 self._chunkclear()
683 # revnum -> (chain-length, sum-delta-length)
686 # revnum -> (chain-length, sum-delta-length)
684 self._chaininfocache = util.lrucachedict(500)
687 self._chaininfocache = util.lrucachedict(500)
685 # revlog header -> revlog compressor
688 # revlog header -> revlog compressor
686 self._decompressors = {}
689 self._decompressors = {}
687
690
688 @util.propertycache
691 @util.propertycache
689 def revlog_kind(self):
692 def revlog_kind(self):
690 return self.target[0]
693 return self.target[0]
691
694
692 @util.propertycache
695 @util.propertycache
693 def display_id(self):
696 def display_id(self):
694 """The public facing "ID" of the revlog that we use in message"""
697 """The public facing "ID" of the revlog that we use in message"""
695 # Maybe we should build a user facing representation of
698 # Maybe we should build a user facing representation of
696 # revlog.target instead of using `self.radix`
699 # revlog.target instead of using `self.radix`
697 return self.radix
700 return self.radix
698
701
699 def _get_decompressor(self, t):
702 def _get_decompressor(self, t):
700 try:
703 try:
701 compressor = self._decompressors[t]
704 compressor = self._decompressors[t]
702 except KeyError:
705 except KeyError:
703 try:
706 try:
704 engine = util.compengines.forrevlogheader(t)
707 engine = util.compengines.forrevlogheader(t)
705 compressor = engine.revlogcompressor(self._compengineopts)
708 compressor = engine.revlogcompressor(self._compengineopts)
706 self._decompressors[t] = compressor
709 self._decompressors[t] = compressor
707 except KeyError:
710 except KeyError:
708 raise error.RevlogError(
711 raise error.RevlogError(
709 _(b'unknown compression type %s') % binascii.hexlify(t)
712 _(b'unknown compression type %s') % binascii.hexlify(t)
710 )
713 )
711 return compressor
714 return compressor
712
715
713 @util.propertycache
716 @util.propertycache
714 def _compressor(self):
717 def _compressor(self):
715 engine = util.compengines[self._compengine]
718 engine = util.compengines[self._compengine]
716 return engine.revlogcompressor(self._compengineopts)
719 return engine.revlogcompressor(self._compengineopts)
717
720
718 @util.propertycache
721 @util.propertycache
719 def _decompressor(self):
722 def _decompressor(self):
720 """the default decompressor"""
723 """the default decompressor"""
721 if self._docket is None:
724 if self._docket is None:
722 return None
725 return None
723 t = self._docket.default_compression_header
726 t = self._docket.default_compression_header
724 c = self._get_decompressor(t)
727 c = self._get_decompressor(t)
725 return c.decompress
728 return c.decompress
726
729
727 def _indexfp(self):
730 def _indexfp(self):
728 """file object for the revlog's index file"""
731 """file object for the revlog's index file"""
729 return self.opener(self._indexfile, mode=b"r")
732 return self.opener(self._indexfile, mode=b"r")
730
733
731 def __index_write_fp(self):
734 def __index_write_fp(self):
732 # You should not use this directly and use `_writing` instead
735 # You should not use this directly and use `_writing` instead
733 try:
736 try:
734 f = self.opener(
737 f = self.opener(
735 self._indexfile, mode=b"r+", checkambig=self._checkambig
738 self._indexfile, mode=b"r+", checkambig=self._checkambig
736 )
739 )
737 if self._docket is None:
740 if self._docket is None:
738 f.seek(0, os.SEEK_END)
741 f.seek(0, os.SEEK_END)
739 else:
742 else:
740 f.seek(self._docket.index_end, os.SEEK_SET)
743 f.seek(self._docket.index_end, os.SEEK_SET)
741 return f
744 return f
742 except IOError as inst:
745 except IOError as inst:
743 if inst.errno != errno.ENOENT:
746 if inst.errno != errno.ENOENT:
744 raise
747 raise
745 return self.opener(
748 return self.opener(
746 self._indexfile, mode=b"w+", checkambig=self._checkambig
749 self._indexfile, mode=b"w+", checkambig=self._checkambig
747 )
750 )
748
751
749 def __index_new_fp(self):
752 def __index_new_fp(self):
750 # You should not use this unless you are upgrading from inline revlog
753 # You should not use this unless you are upgrading from inline revlog
751 return self.opener(
754 return self.opener(
752 self._indexfile,
755 self._indexfile,
753 mode=b"w",
756 mode=b"w",
754 checkambig=self._checkambig,
757 checkambig=self._checkambig,
755 atomictemp=True,
758 atomictemp=True,
756 )
759 )
757
760
758 def _datafp(self, mode=b'r'):
761 def _datafp(self, mode=b'r'):
759 """file object for the revlog's data file"""
762 """file object for the revlog's data file"""
760 return self.opener(self._datafile, mode=mode)
763 return self.opener(self._datafile, mode=mode)
761
764
762 @contextlib.contextmanager
765 @contextlib.contextmanager
763 def _datareadfp(self, existingfp=None):
766 def _datareadfp(self, existingfp=None):
764 """file object suitable to read data"""
767 """file object suitable to read data"""
765 # Use explicit file handle, if given.
768 # Use explicit file handle, if given.
766 if existingfp is not None:
769 if existingfp is not None:
767 yield existingfp
770 yield existingfp
768
771
769 # Use a file handle being actively used for writes, if available.
772 # Use a file handle being actively used for writes, if available.
770 # There is some danger to doing this because reads will seek the
773 # There is some danger to doing this because reads will seek the
771 # file. However, _writeentry() performs a SEEK_END before all writes,
774 # file. However, _writeentry() performs a SEEK_END before all writes,
772 # so we should be safe.
775 # so we should be safe.
773 elif self._writinghandles:
776 elif self._writinghandles:
774 if self._inline:
777 if self._inline:
775 yield self._writinghandles[0]
778 yield self._writinghandles[0]
776 else:
779 else:
777 yield self._writinghandles[1]
780 yield self._writinghandles[1]
778
781
779 # Otherwise open a new file handle.
782 # Otherwise open a new file handle.
780 else:
783 else:
781 if self._inline:
784 if self._inline:
782 func = self._indexfp
785 func = self._indexfp
783 else:
786 else:
784 func = self._datafp
787 func = self._datafp
785 with func() as fp:
788 with func() as fp:
786 yield fp
789 yield fp
787
790
788 def tiprev(self):
791 def tiprev(self):
789 return len(self.index) - 1
792 return len(self.index) - 1
790
793
791 def tip(self):
794 def tip(self):
792 return self.node(self.tiprev())
795 return self.node(self.tiprev())
793
796
794 def __contains__(self, rev):
797 def __contains__(self, rev):
795 return 0 <= rev < len(self)
798 return 0 <= rev < len(self)
796
799
797 def __len__(self):
800 def __len__(self):
798 return len(self.index)
801 return len(self.index)
799
802
800 def __iter__(self):
803 def __iter__(self):
801 return iter(pycompat.xrange(len(self)))
804 return iter(pycompat.xrange(len(self)))
802
805
803 def revs(self, start=0, stop=None):
806 def revs(self, start=0, stop=None):
804 """iterate over all rev in this revlog (from start to stop)"""
807 """iterate over all rev in this revlog (from start to stop)"""
805 return storageutil.iterrevs(len(self), start=start, stop=stop)
808 return storageutil.iterrevs(len(self), start=start, stop=stop)
806
809
807 @property
810 @property
808 def nodemap(self):
811 def nodemap(self):
809 msg = (
812 msg = (
810 b"revlog.nodemap is deprecated, "
813 b"revlog.nodemap is deprecated, "
811 b"use revlog.index.[has_node|rev|get_rev]"
814 b"use revlog.index.[has_node|rev|get_rev]"
812 )
815 )
813 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
816 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
814 return self.index.nodemap
817 return self.index.nodemap
815
818
816 @property
819 @property
817 def _nodecache(self):
820 def _nodecache(self):
818 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
821 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
819 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
822 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
820 return self.index.nodemap
823 return self.index.nodemap
821
824
822 def hasnode(self, node):
825 def hasnode(self, node):
823 try:
826 try:
824 self.rev(node)
827 self.rev(node)
825 return True
828 return True
826 except KeyError:
829 except KeyError:
827 return False
830 return False
828
831
829 def candelta(self, baserev, rev):
832 def candelta(self, baserev, rev):
830 """whether two revisions (baserev, rev) can be delta-ed or not"""
833 """whether two revisions (baserev, rev) can be delta-ed or not"""
831 # Disable delta if either rev requires a content-changing flag
834 # Disable delta if either rev requires a content-changing flag
832 # processor (ex. LFS). This is because such flag processor can alter
835 # processor (ex. LFS). This is because such flag processor can alter
833 # the rawtext content that the delta will be based on, and two clients
836 # the rawtext content that the delta will be based on, and two clients
834 # could have a same revlog node with different flags (i.e. different
837 # could have a same revlog node with different flags (i.e. different
835 # rawtext contents) and the delta could be incompatible.
838 # rawtext contents) and the delta could be incompatible.
836 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
839 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
837 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
840 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
838 ):
841 ):
839 return False
842 return False
840 return True
843 return True
841
844
842 def update_caches(self, transaction):
845 def update_caches(self, transaction):
843 if self._nodemap_file is not None:
846 if self._nodemap_file is not None:
844 if transaction is None:
847 if transaction is None:
845 nodemaputil.update_persistent_nodemap(self)
848 nodemaputil.update_persistent_nodemap(self)
846 else:
849 else:
847 nodemaputil.setup_persistent_nodemap(transaction, self)
850 nodemaputil.setup_persistent_nodemap(transaction, self)
848
851
849 def clearcaches(self):
852 def clearcaches(self):
850 self._revisioncache = None
853 self._revisioncache = None
851 self._chainbasecache.clear()
854 self._chainbasecache.clear()
852 self._chunkcache = (0, b'')
855 self._chunkcache = (0, b'')
853 self._pcache = {}
856 self._pcache = {}
854 self._nodemap_docket = None
857 self._nodemap_docket = None
855 self.index.clearcaches()
858 self.index.clearcaches()
856 # The python code is the one responsible for validating the docket, we
859 # The python code is the one responsible for validating the docket, we
857 # end up having to refresh it here.
860 # end up having to refresh it here.
858 use_nodemap = (
861 use_nodemap = (
859 not self._inline
862 not self._inline
860 and self._nodemap_file is not None
863 and self._nodemap_file is not None
861 and util.safehasattr(self.index, 'update_nodemap_data')
864 and util.safehasattr(self.index, 'update_nodemap_data')
862 )
865 )
863 if use_nodemap:
866 if use_nodemap:
864 nodemap_data = nodemaputil.persisted_data(self)
867 nodemap_data = nodemaputil.persisted_data(self)
865 if nodemap_data is not None:
868 if nodemap_data is not None:
866 self._nodemap_docket = nodemap_data[0]
869 self._nodemap_docket = nodemap_data[0]
867 self.index.update_nodemap_data(*nodemap_data)
870 self.index.update_nodemap_data(*nodemap_data)
868
871
869 def rev(self, node):
872 def rev(self, node):
870 try:
873 try:
871 return self.index.rev(node)
874 return self.index.rev(node)
872 except TypeError:
875 except TypeError:
873 raise
876 raise
874 except error.RevlogError:
877 except error.RevlogError:
875 # parsers.c radix tree lookup failed
878 # parsers.c radix tree lookup failed
876 if (
879 if (
877 node == self.nodeconstants.wdirid
880 node == self.nodeconstants.wdirid
878 or node in self.nodeconstants.wdirfilenodeids
881 or node in self.nodeconstants.wdirfilenodeids
879 ):
882 ):
880 raise error.WdirUnsupported
883 raise error.WdirUnsupported
881 raise error.LookupError(node, self.display_id, _(b'no node'))
884 raise error.LookupError(node, self.display_id, _(b'no node'))
882
885
883 # Accessors for index entries.
886 # Accessors for index entries.
884
887
885 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
888 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
886 # are flags.
889 # are flags.
887 def start(self, rev):
890 def start(self, rev):
888 return int(self.index[rev][0] >> 16)
891 return int(self.index[rev][0] >> 16)
889
892
890 def flags(self, rev):
893 def flags(self, rev):
891 return self.index[rev][0] & 0xFFFF
894 return self.index[rev][0] & 0xFFFF
892
895
893 def length(self, rev):
896 def length(self, rev):
894 return self.index[rev][1]
897 return self.index[rev][1]
895
898
896 def sidedata_length(self, rev):
899 def sidedata_length(self, rev):
897 if not self.hassidedata:
900 if not self.hassidedata:
898 return 0
901 return 0
899 return self.index[rev][9]
902 return self.index[rev][9]
900
903
901 def rawsize(self, rev):
904 def rawsize(self, rev):
902 """return the length of the uncompressed text for a given revision"""
905 """return the length of the uncompressed text for a given revision"""
903 l = self.index[rev][2]
906 l = self.index[rev][2]
904 if l >= 0:
907 if l >= 0:
905 return l
908 return l
906
909
907 t = self.rawdata(rev)
910 t = self.rawdata(rev)
908 return len(t)
911 return len(t)
909
912
910 def size(self, rev):
913 def size(self, rev):
911 """length of non-raw text (processed by a "read" flag processor)"""
914 """length of non-raw text (processed by a "read" flag processor)"""
912 # fast path: if no "read" flag processor could change the content,
915 # fast path: if no "read" flag processor could change the content,
913 # size is rawsize. note: ELLIPSIS is known to not change the content.
916 # size is rawsize. note: ELLIPSIS is known to not change the content.
914 flags = self.flags(rev)
917 flags = self.flags(rev)
915 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
918 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
916 return self.rawsize(rev)
919 return self.rawsize(rev)
917
920
918 return len(self.revision(rev, raw=False))
921 return len(self.revision(rev, raw=False))
919
922
920 def chainbase(self, rev):
923 def chainbase(self, rev):
921 base = self._chainbasecache.get(rev)
924 base = self._chainbasecache.get(rev)
922 if base is not None:
925 if base is not None:
923 return base
926 return base
924
927
925 index = self.index
928 index = self.index
926 iterrev = rev
929 iterrev = rev
927 base = index[iterrev][3]
930 base = index[iterrev][3]
928 while base != iterrev:
931 while base != iterrev:
929 iterrev = base
932 iterrev = base
930 base = index[iterrev][3]
933 base = index[iterrev][3]
931
934
932 self._chainbasecache[rev] = base
935 self._chainbasecache[rev] = base
933 return base
936 return base
934
937
935 def linkrev(self, rev):
938 def linkrev(self, rev):
936 return self.index[rev][4]
939 return self.index[rev][4]
937
940
938 def parentrevs(self, rev):
941 def parentrevs(self, rev):
939 try:
942 try:
940 entry = self.index[rev]
943 entry = self.index[rev]
941 except IndexError:
944 except IndexError:
942 if rev == wdirrev:
945 if rev == wdirrev:
943 raise error.WdirUnsupported
946 raise error.WdirUnsupported
944 raise
947 raise
945 if entry[5] == nullrev:
948 if entry[5] == nullrev:
946 return entry[6], entry[5]
949 return entry[6], entry[5]
947 else:
950 else:
948 return entry[5], entry[6]
951 return entry[5], entry[6]
949
952
950 # fast parentrevs(rev) where rev isn't filtered
953 # fast parentrevs(rev) where rev isn't filtered
951 _uncheckedparentrevs = parentrevs
954 _uncheckedparentrevs = parentrevs
952
955
953 def node(self, rev):
956 def node(self, rev):
954 try:
957 try:
955 return self.index[rev][7]
958 return self.index[rev][7]
956 except IndexError:
959 except IndexError:
957 if rev == wdirrev:
960 if rev == wdirrev:
958 raise error.WdirUnsupported
961 raise error.WdirUnsupported
959 raise
962 raise
960
963
961 # Derived from index values.
964 # Derived from index values.
962
965
963 def end(self, rev):
966 def end(self, rev):
964 return self.start(rev) + self.length(rev)
967 return self.start(rev) + self.length(rev)
965
968
966 def parents(self, node):
969 def parents(self, node):
967 i = self.index
970 i = self.index
968 d = i[self.rev(node)]
971 d = i[self.rev(node)]
969 # inline node() to avoid function call overhead
972 # inline node() to avoid function call overhead
970 if d[5] == self.nullid:
973 if d[5] == self.nullid:
971 return i[d[6]][7], i[d[5]][7]
974 return i[d[6]][7], i[d[5]][7]
972 else:
975 else:
973 return i[d[5]][7], i[d[6]][7]
976 return i[d[5]][7], i[d[6]][7]
974
977
975 def chainlen(self, rev):
978 def chainlen(self, rev):
976 return self._chaininfo(rev)[0]
979 return self._chaininfo(rev)[0]
977
980
978 def _chaininfo(self, rev):
981 def _chaininfo(self, rev):
979 chaininfocache = self._chaininfocache
982 chaininfocache = self._chaininfocache
980 if rev in chaininfocache:
983 if rev in chaininfocache:
981 return chaininfocache[rev]
984 return chaininfocache[rev]
982 index = self.index
985 index = self.index
983 generaldelta = self._generaldelta
986 generaldelta = self._generaldelta
984 iterrev = rev
987 iterrev = rev
985 e = index[iterrev]
988 e = index[iterrev]
986 clen = 0
989 clen = 0
987 compresseddeltalen = 0
990 compresseddeltalen = 0
988 while iterrev != e[3]:
991 while iterrev != e[3]:
989 clen += 1
992 clen += 1
990 compresseddeltalen += e[1]
993 compresseddeltalen += e[1]
991 if generaldelta:
994 if generaldelta:
992 iterrev = e[3]
995 iterrev = e[3]
993 else:
996 else:
994 iterrev -= 1
997 iterrev -= 1
995 if iterrev in chaininfocache:
998 if iterrev in chaininfocache:
996 t = chaininfocache[iterrev]
999 t = chaininfocache[iterrev]
997 clen += t[0]
1000 clen += t[0]
998 compresseddeltalen += t[1]
1001 compresseddeltalen += t[1]
999 break
1002 break
1000 e = index[iterrev]
1003 e = index[iterrev]
1001 else:
1004 else:
1002 # Add text length of base since decompressing that also takes
1005 # Add text length of base since decompressing that also takes
1003 # work. For cache hits the length is already included.
1006 # work. For cache hits the length is already included.
1004 compresseddeltalen += e[1]
1007 compresseddeltalen += e[1]
1005 r = (clen, compresseddeltalen)
1008 r = (clen, compresseddeltalen)
1006 chaininfocache[rev] = r
1009 chaininfocache[rev] = r
1007 return r
1010 return r
1008
1011
1009 def _deltachain(self, rev, stoprev=None):
1012 def _deltachain(self, rev, stoprev=None):
1010 """Obtain the delta chain for a revision.
1013 """Obtain the delta chain for a revision.
1011
1014
1012 ``stoprev`` specifies a revision to stop at. If not specified, we
1015 ``stoprev`` specifies a revision to stop at. If not specified, we
1013 stop at the base of the chain.
1016 stop at the base of the chain.
1014
1017
1015 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1018 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1016 revs in ascending order and ``stopped`` is a bool indicating whether
1019 revs in ascending order and ``stopped`` is a bool indicating whether
1017 ``stoprev`` was hit.
1020 ``stoprev`` was hit.
1018 """
1021 """
1019 # Try C implementation.
1022 # Try C implementation.
1020 try:
1023 try:
1021 return self.index.deltachain(rev, stoprev, self._generaldelta)
1024 return self.index.deltachain(rev, stoprev, self._generaldelta)
1022 except AttributeError:
1025 except AttributeError:
1023 pass
1026 pass
1024
1027
1025 chain = []
1028 chain = []
1026
1029
1027 # Alias to prevent attribute lookup in tight loop.
1030 # Alias to prevent attribute lookup in tight loop.
1028 index = self.index
1031 index = self.index
1029 generaldelta = self._generaldelta
1032 generaldelta = self._generaldelta
1030
1033
1031 iterrev = rev
1034 iterrev = rev
1032 e = index[iterrev]
1035 e = index[iterrev]
1033 while iterrev != e[3] and iterrev != stoprev:
1036 while iterrev != e[3] and iterrev != stoprev:
1034 chain.append(iterrev)
1037 chain.append(iterrev)
1035 if generaldelta:
1038 if generaldelta:
1036 iterrev = e[3]
1039 iterrev = e[3]
1037 else:
1040 else:
1038 iterrev -= 1
1041 iterrev -= 1
1039 e = index[iterrev]
1042 e = index[iterrev]
1040
1043
1041 if iterrev == stoprev:
1044 if iterrev == stoprev:
1042 stopped = True
1045 stopped = True
1043 else:
1046 else:
1044 chain.append(iterrev)
1047 chain.append(iterrev)
1045 stopped = False
1048 stopped = False
1046
1049
1047 chain.reverse()
1050 chain.reverse()
1048 return chain, stopped
1051 return chain, stopped
1049
1052
1050 def ancestors(self, revs, stoprev=0, inclusive=False):
1053 def ancestors(self, revs, stoprev=0, inclusive=False):
1051 """Generate the ancestors of 'revs' in reverse revision order.
1054 """Generate the ancestors of 'revs' in reverse revision order.
1052 Does not generate revs lower than stoprev.
1055 Does not generate revs lower than stoprev.
1053
1056
1054 See the documentation for ancestor.lazyancestors for more details."""
1057 See the documentation for ancestor.lazyancestors for more details."""
1055
1058
1056 # first, make sure start revisions aren't filtered
1059 # first, make sure start revisions aren't filtered
1057 revs = list(revs)
1060 revs = list(revs)
1058 checkrev = self.node
1061 checkrev = self.node
1059 for r in revs:
1062 for r in revs:
1060 checkrev(r)
1063 checkrev(r)
1061 # and we're sure ancestors aren't filtered as well
1064 # and we're sure ancestors aren't filtered as well
1062
1065
1063 if rustancestor is not None:
1066 if rustancestor is not None:
1064 lazyancestors = rustancestor.LazyAncestors
1067 lazyancestors = rustancestor.LazyAncestors
1065 arg = self.index
1068 arg = self.index
1066 else:
1069 else:
1067 lazyancestors = ancestor.lazyancestors
1070 lazyancestors = ancestor.lazyancestors
1068 arg = self._uncheckedparentrevs
1071 arg = self._uncheckedparentrevs
1069 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1072 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1070
1073
1071 def descendants(self, revs):
1074 def descendants(self, revs):
1072 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1075 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1073
1076
1074 def findcommonmissing(self, common=None, heads=None):
1077 def findcommonmissing(self, common=None, heads=None):
1075 """Return a tuple of the ancestors of common and the ancestors of heads
1078 """Return a tuple of the ancestors of common and the ancestors of heads
1076 that are not ancestors of common. In revset terminology, we return the
1079 that are not ancestors of common. In revset terminology, we return the
1077 tuple:
1080 tuple:
1078
1081
1079 ::common, (::heads) - (::common)
1082 ::common, (::heads) - (::common)
1080
1083
1081 The list is sorted by revision number, meaning it is
1084 The list is sorted by revision number, meaning it is
1082 topologically sorted.
1085 topologically sorted.
1083
1086
1084 'heads' and 'common' are both lists of node IDs. If heads is
1087 'heads' and 'common' are both lists of node IDs. If heads is
1085 not supplied, uses all of the revlog's heads. If common is not
1088 not supplied, uses all of the revlog's heads. If common is not
1086 supplied, uses nullid."""
1089 supplied, uses nullid."""
1087 if common is None:
1090 if common is None:
1088 common = [self.nullid]
1091 common = [self.nullid]
1089 if heads is None:
1092 if heads is None:
1090 heads = self.heads()
1093 heads = self.heads()
1091
1094
1092 common = [self.rev(n) for n in common]
1095 common = [self.rev(n) for n in common]
1093 heads = [self.rev(n) for n in heads]
1096 heads = [self.rev(n) for n in heads]
1094
1097
1095 # we want the ancestors, but inclusive
1098 # we want the ancestors, but inclusive
1096 class lazyset(object):
1099 class lazyset(object):
1097 def __init__(self, lazyvalues):
1100 def __init__(self, lazyvalues):
1098 self.addedvalues = set()
1101 self.addedvalues = set()
1099 self.lazyvalues = lazyvalues
1102 self.lazyvalues = lazyvalues
1100
1103
1101 def __contains__(self, value):
1104 def __contains__(self, value):
1102 return value in self.addedvalues or value in self.lazyvalues
1105 return value in self.addedvalues or value in self.lazyvalues
1103
1106
1104 def __iter__(self):
1107 def __iter__(self):
1105 added = self.addedvalues
1108 added = self.addedvalues
1106 for r in added:
1109 for r in added:
1107 yield r
1110 yield r
1108 for r in self.lazyvalues:
1111 for r in self.lazyvalues:
1109 if not r in added:
1112 if not r in added:
1110 yield r
1113 yield r
1111
1114
1112 def add(self, value):
1115 def add(self, value):
1113 self.addedvalues.add(value)
1116 self.addedvalues.add(value)
1114
1117
1115 def update(self, values):
1118 def update(self, values):
1116 self.addedvalues.update(values)
1119 self.addedvalues.update(values)
1117
1120
1118 has = lazyset(self.ancestors(common))
1121 has = lazyset(self.ancestors(common))
1119 has.add(nullrev)
1122 has.add(nullrev)
1120 has.update(common)
1123 has.update(common)
1121
1124
1122 # take all ancestors from heads that aren't in has
1125 # take all ancestors from heads that aren't in has
1123 missing = set()
1126 missing = set()
1124 visit = collections.deque(r for r in heads if r not in has)
1127 visit = collections.deque(r for r in heads if r not in has)
1125 while visit:
1128 while visit:
1126 r = visit.popleft()
1129 r = visit.popleft()
1127 if r in missing:
1130 if r in missing:
1128 continue
1131 continue
1129 else:
1132 else:
1130 missing.add(r)
1133 missing.add(r)
1131 for p in self.parentrevs(r):
1134 for p in self.parentrevs(r):
1132 if p not in has:
1135 if p not in has:
1133 visit.append(p)
1136 visit.append(p)
1134 missing = list(missing)
1137 missing = list(missing)
1135 missing.sort()
1138 missing.sort()
1136 return has, [self.node(miss) for miss in missing]
1139 return has, [self.node(miss) for miss in missing]
1137
1140
1138 def incrementalmissingrevs(self, common=None):
1141 def incrementalmissingrevs(self, common=None):
1139 """Return an object that can be used to incrementally compute the
1142 """Return an object that can be used to incrementally compute the
1140 revision numbers of the ancestors of arbitrary sets that are not
1143 revision numbers of the ancestors of arbitrary sets that are not
1141 ancestors of common. This is an ancestor.incrementalmissingancestors
1144 ancestors of common. This is an ancestor.incrementalmissingancestors
1142 object.
1145 object.
1143
1146
1144 'common' is a list of revision numbers. If common is not supplied, uses
1147 'common' is a list of revision numbers. If common is not supplied, uses
1145 nullrev.
1148 nullrev.
1146 """
1149 """
1147 if common is None:
1150 if common is None:
1148 common = [nullrev]
1151 common = [nullrev]
1149
1152
1150 if rustancestor is not None:
1153 if rustancestor is not None:
1151 return rustancestor.MissingAncestors(self.index, common)
1154 return rustancestor.MissingAncestors(self.index, common)
1152 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1155 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1153
1156
1154 def findmissingrevs(self, common=None, heads=None):
1157 def findmissingrevs(self, common=None, heads=None):
1155 """Return the revision numbers of the ancestors of heads that
1158 """Return the revision numbers of the ancestors of heads that
1156 are not ancestors of common.
1159 are not ancestors of common.
1157
1160
1158 More specifically, return a list of revision numbers corresponding to
1161 More specifically, return a list of revision numbers corresponding to
1159 nodes N such that every N satisfies the following constraints:
1162 nodes N such that every N satisfies the following constraints:
1160
1163
1161 1. N is an ancestor of some node in 'heads'
1164 1. N is an ancestor of some node in 'heads'
1162 2. N is not an ancestor of any node in 'common'
1165 2. N is not an ancestor of any node in 'common'
1163
1166
1164 The list is sorted by revision number, meaning it is
1167 The list is sorted by revision number, meaning it is
1165 topologically sorted.
1168 topologically sorted.
1166
1169
1167 'heads' and 'common' are both lists of revision numbers. If heads is
1170 'heads' and 'common' are both lists of revision numbers. If heads is
1168 not supplied, uses all of the revlog's heads. If common is not
1171 not supplied, uses all of the revlog's heads. If common is not
1169 supplied, uses nullid."""
1172 supplied, uses nullid."""
1170 if common is None:
1173 if common is None:
1171 common = [nullrev]
1174 common = [nullrev]
1172 if heads is None:
1175 if heads is None:
1173 heads = self.headrevs()
1176 heads = self.headrevs()
1174
1177
1175 inc = self.incrementalmissingrevs(common=common)
1178 inc = self.incrementalmissingrevs(common=common)
1176 return inc.missingancestors(heads)
1179 return inc.missingancestors(heads)
1177
1180
1178 def findmissing(self, common=None, heads=None):
1181 def findmissing(self, common=None, heads=None):
1179 """Return the ancestors of heads that are not ancestors of common.
1182 """Return the ancestors of heads that are not ancestors of common.
1180
1183
1181 More specifically, return a list of nodes N such that every N
1184 More specifically, return a list of nodes N such that every N
1182 satisfies the following constraints:
1185 satisfies the following constraints:
1183
1186
1184 1. N is an ancestor of some node in 'heads'
1187 1. N is an ancestor of some node in 'heads'
1185 2. N is not an ancestor of any node in 'common'
1188 2. N is not an ancestor of any node in 'common'
1186
1189
1187 The list is sorted by revision number, meaning it is
1190 The list is sorted by revision number, meaning it is
1188 topologically sorted.
1191 topologically sorted.
1189
1192
1190 'heads' and 'common' are both lists of node IDs. If heads is
1193 'heads' and 'common' are both lists of node IDs. If heads is
1191 not supplied, uses all of the revlog's heads. If common is not
1194 not supplied, uses all of the revlog's heads. If common is not
1192 supplied, uses nullid."""
1195 supplied, uses nullid."""
1193 if common is None:
1196 if common is None:
1194 common = [self.nullid]
1197 common = [self.nullid]
1195 if heads is None:
1198 if heads is None:
1196 heads = self.heads()
1199 heads = self.heads()
1197
1200
1198 common = [self.rev(n) for n in common]
1201 common = [self.rev(n) for n in common]
1199 heads = [self.rev(n) for n in heads]
1202 heads = [self.rev(n) for n in heads]
1200
1203
1201 inc = self.incrementalmissingrevs(common=common)
1204 inc = self.incrementalmissingrevs(common=common)
1202 return [self.node(r) for r in inc.missingancestors(heads)]
1205 return [self.node(r) for r in inc.missingancestors(heads)]
1203
1206
1204 def nodesbetween(self, roots=None, heads=None):
1207 def nodesbetween(self, roots=None, heads=None):
1205 """Return a topological path from 'roots' to 'heads'.
1208 """Return a topological path from 'roots' to 'heads'.
1206
1209
1207 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1210 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1208 topologically sorted list of all nodes N that satisfy both of
1211 topologically sorted list of all nodes N that satisfy both of
1209 these constraints:
1212 these constraints:
1210
1213
1211 1. N is a descendant of some node in 'roots'
1214 1. N is a descendant of some node in 'roots'
1212 2. N is an ancestor of some node in 'heads'
1215 2. N is an ancestor of some node in 'heads'
1213
1216
1214 Every node is considered to be both a descendant and an ancestor
1217 Every node is considered to be both a descendant and an ancestor
1215 of itself, so every reachable node in 'roots' and 'heads' will be
1218 of itself, so every reachable node in 'roots' and 'heads' will be
1216 included in 'nodes'.
1219 included in 'nodes'.
1217
1220
1218 'outroots' is the list of reachable nodes in 'roots', i.e., the
1221 'outroots' is the list of reachable nodes in 'roots', i.e., the
1219 subset of 'roots' that is returned in 'nodes'. Likewise,
1222 subset of 'roots' that is returned in 'nodes'. Likewise,
1220 'outheads' is the subset of 'heads' that is also in 'nodes'.
1223 'outheads' is the subset of 'heads' that is also in 'nodes'.
1221
1224
1222 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1225 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1223 unspecified, uses nullid as the only root. If 'heads' is
1226 unspecified, uses nullid as the only root. If 'heads' is
1224 unspecified, uses list of all of the revlog's heads."""
1227 unspecified, uses list of all of the revlog's heads."""
1225 nonodes = ([], [], [])
1228 nonodes = ([], [], [])
1226 if roots is not None:
1229 if roots is not None:
1227 roots = list(roots)
1230 roots = list(roots)
1228 if not roots:
1231 if not roots:
1229 return nonodes
1232 return nonodes
1230 lowestrev = min([self.rev(n) for n in roots])
1233 lowestrev = min([self.rev(n) for n in roots])
1231 else:
1234 else:
1232 roots = [self.nullid] # Everybody's a descendant of nullid
1235 roots = [self.nullid] # Everybody's a descendant of nullid
1233 lowestrev = nullrev
1236 lowestrev = nullrev
1234 if (lowestrev == nullrev) and (heads is None):
1237 if (lowestrev == nullrev) and (heads is None):
1235 # We want _all_ the nodes!
1238 # We want _all_ the nodes!
1236 return (
1239 return (
1237 [self.node(r) for r in self],
1240 [self.node(r) for r in self],
1238 [self.nullid],
1241 [self.nullid],
1239 list(self.heads()),
1242 list(self.heads()),
1240 )
1243 )
1241 if heads is None:
1244 if heads is None:
1242 # All nodes are ancestors, so the latest ancestor is the last
1245 # All nodes are ancestors, so the latest ancestor is the last
1243 # node.
1246 # node.
1244 highestrev = len(self) - 1
1247 highestrev = len(self) - 1
1245 # Set ancestors to None to signal that every node is an ancestor.
1248 # Set ancestors to None to signal that every node is an ancestor.
1246 ancestors = None
1249 ancestors = None
1247 # Set heads to an empty dictionary for later discovery of heads
1250 # Set heads to an empty dictionary for later discovery of heads
1248 heads = {}
1251 heads = {}
1249 else:
1252 else:
1250 heads = list(heads)
1253 heads = list(heads)
1251 if not heads:
1254 if not heads:
1252 return nonodes
1255 return nonodes
1253 ancestors = set()
1256 ancestors = set()
1254 # Turn heads into a dictionary so we can remove 'fake' heads.
1257 # Turn heads into a dictionary so we can remove 'fake' heads.
1255 # Also, later we will be using it to filter out the heads we can't
1258 # Also, later we will be using it to filter out the heads we can't
1256 # find from roots.
1259 # find from roots.
1257 heads = dict.fromkeys(heads, False)
1260 heads = dict.fromkeys(heads, False)
1258 # Start at the top and keep marking parents until we're done.
1261 # Start at the top and keep marking parents until we're done.
1259 nodestotag = set(heads)
1262 nodestotag = set(heads)
1260 # Remember where the top was so we can use it as a limit later.
1263 # Remember where the top was so we can use it as a limit later.
1261 highestrev = max([self.rev(n) for n in nodestotag])
1264 highestrev = max([self.rev(n) for n in nodestotag])
1262 while nodestotag:
1265 while nodestotag:
1263 # grab a node to tag
1266 # grab a node to tag
1264 n = nodestotag.pop()
1267 n = nodestotag.pop()
1265 # Never tag nullid
1268 # Never tag nullid
1266 if n == self.nullid:
1269 if n == self.nullid:
1267 continue
1270 continue
1268 # A node's revision number represents its place in a
1271 # A node's revision number represents its place in a
1269 # topologically sorted list of nodes.
1272 # topologically sorted list of nodes.
1270 r = self.rev(n)
1273 r = self.rev(n)
1271 if r >= lowestrev:
1274 if r >= lowestrev:
1272 if n not in ancestors:
1275 if n not in ancestors:
1273 # If we are possibly a descendant of one of the roots
1276 # If we are possibly a descendant of one of the roots
1274 # and we haven't already been marked as an ancestor
1277 # and we haven't already been marked as an ancestor
1275 ancestors.add(n) # Mark as ancestor
1278 ancestors.add(n) # Mark as ancestor
1276 # Add non-nullid parents to list of nodes to tag.
1279 # Add non-nullid parents to list of nodes to tag.
1277 nodestotag.update(
1280 nodestotag.update(
1278 [p for p in self.parents(n) if p != self.nullid]
1281 [p for p in self.parents(n) if p != self.nullid]
1279 )
1282 )
1280 elif n in heads: # We've seen it before, is it a fake head?
1283 elif n in heads: # We've seen it before, is it a fake head?
1281 # So it is, real heads should not be the ancestors of
1284 # So it is, real heads should not be the ancestors of
1282 # any other heads.
1285 # any other heads.
1283 heads.pop(n)
1286 heads.pop(n)
1284 if not ancestors:
1287 if not ancestors:
1285 return nonodes
1288 return nonodes
1286 # Now that we have our set of ancestors, we want to remove any
1289 # Now that we have our set of ancestors, we want to remove any
1287 # roots that are not ancestors.
1290 # roots that are not ancestors.
1288
1291
1289 # If one of the roots was nullid, everything is included anyway.
1292 # If one of the roots was nullid, everything is included anyway.
1290 if lowestrev > nullrev:
1293 if lowestrev > nullrev:
1291 # But, since we weren't, let's recompute the lowest rev to not
1294 # But, since we weren't, let's recompute the lowest rev to not
1292 # include roots that aren't ancestors.
1295 # include roots that aren't ancestors.
1293
1296
1294 # Filter out roots that aren't ancestors of heads
1297 # Filter out roots that aren't ancestors of heads
1295 roots = [root for root in roots if root in ancestors]
1298 roots = [root for root in roots if root in ancestors]
1296 # Recompute the lowest revision
1299 # Recompute the lowest revision
1297 if roots:
1300 if roots:
1298 lowestrev = min([self.rev(root) for root in roots])
1301 lowestrev = min([self.rev(root) for root in roots])
1299 else:
1302 else:
1300 # No more roots? Return empty list
1303 # No more roots? Return empty list
1301 return nonodes
1304 return nonodes
1302 else:
1305 else:
1303 # We are descending from nullid, and don't need to care about
1306 # We are descending from nullid, and don't need to care about
1304 # any other roots.
1307 # any other roots.
1305 lowestrev = nullrev
1308 lowestrev = nullrev
1306 roots = [self.nullid]
1309 roots = [self.nullid]
1307 # Transform our roots list into a set.
1310 # Transform our roots list into a set.
1308 descendants = set(roots)
1311 descendants = set(roots)
1309 # Also, keep the original roots so we can filter out roots that aren't
1312 # Also, keep the original roots so we can filter out roots that aren't
1310 # 'real' roots (i.e. are descended from other roots).
1313 # 'real' roots (i.e. are descended from other roots).
1311 roots = descendants.copy()
1314 roots = descendants.copy()
1312 # Our topologically sorted list of output nodes.
1315 # Our topologically sorted list of output nodes.
1313 orderedout = []
1316 orderedout = []
1314 # Don't start at nullid since we don't want nullid in our output list,
1317 # Don't start at nullid since we don't want nullid in our output list,
1315 # and if nullid shows up in descendants, empty parents will look like
1318 # and if nullid shows up in descendants, empty parents will look like
1316 # they're descendants.
1319 # they're descendants.
1317 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1320 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1318 n = self.node(r)
1321 n = self.node(r)
1319 isdescendant = False
1322 isdescendant = False
1320 if lowestrev == nullrev: # Everybody is a descendant of nullid
1323 if lowestrev == nullrev: # Everybody is a descendant of nullid
1321 isdescendant = True
1324 isdescendant = True
1322 elif n in descendants:
1325 elif n in descendants:
1323 # n is already a descendant
1326 # n is already a descendant
1324 isdescendant = True
1327 isdescendant = True
1325 # This check only needs to be done here because all the roots
1328 # This check only needs to be done here because all the roots
1326 # will start being marked is descendants before the loop.
1329 # will start being marked is descendants before the loop.
1327 if n in roots:
1330 if n in roots:
1328 # If n was a root, check if it's a 'real' root.
1331 # If n was a root, check if it's a 'real' root.
1329 p = tuple(self.parents(n))
1332 p = tuple(self.parents(n))
1330 # If any of its parents are descendants, it's not a root.
1333 # If any of its parents are descendants, it's not a root.
1331 if (p[0] in descendants) or (p[1] in descendants):
1334 if (p[0] in descendants) or (p[1] in descendants):
1332 roots.remove(n)
1335 roots.remove(n)
1333 else:
1336 else:
1334 p = tuple(self.parents(n))
1337 p = tuple(self.parents(n))
1335 # A node is a descendant if either of its parents are
1338 # A node is a descendant if either of its parents are
1336 # descendants. (We seeded the dependents list with the roots
1339 # descendants. (We seeded the dependents list with the roots
1337 # up there, remember?)
1340 # up there, remember?)
1338 if (p[0] in descendants) or (p[1] in descendants):
1341 if (p[0] in descendants) or (p[1] in descendants):
1339 descendants.add(n)
1342 descendants.add(n)
1340 isdescendant = True
1343 isdescendant = True
1341 if isdescendant and ((ancestors is None) or (n in ancestors)):
1344 if isdescendant and ((ancestors is None) or (n in ancestors)):
1342 # Only include nodes that are both descendants and ancestors.
1345 # Only include nodes that are both descendants and ancestors.
1343 orderedout.append(n)
1346 orderedout.append(n)
1344 if (ancestors is not None) and (n in heads):
1347 if (ancestors is not None) and (n in heads):
1345 # We're trying to figure out which heads are reachable
1348 # We're trying to figure out which heads are reachable
1346 # from roots.
1349 # from roots.
1347 # Mark this head as having been reached
1350 # Mark this head as having been reached
1348 heads[n] = True
1351 heads[n] = True
1349 elif ancestors is None:
1352 elif ancestors is None:
1350 # Otherwise, we're trying to discover the heads.
1353 # Otherwise, we're trying to discover the heads.
1351 # Assume this is a head because if it isn't, the next step
1354 # Assume this is a head because if it isn't, the next step
1352 # will eventually remove it.
1355 # will eventually remove it.
1353 heads[n] = True
1356 heads[n] = True
1354 # But, obviously its parents aren't.
1357 # But, obviously its parents aren't.
1355 for p in self.parents(n):
1358 for p in self.parents(n):
1356 heads.pop(p, None)
1359 heads.pop(p, None)
1357 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1360 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1358 roots = list(roots)
1361 roots = list(roots)
1359 assert orderedout
1362 assert orderedout
1360 assert roots
1363 assert roots
1361 assert heads
1364 assert heads
1362 return (orderedout, roots, heads)
1365 return (orderedout, roots, heads)
1363
1366
1364 def headrevs(self, revs=None):
1367 def headrevs(self, revs=None):
1365 if revs is None:
1368 if revs is None:
1366 try:
1369 try:
1367 return self.index.headrevs()
1370 return self.index.headrevs()
1368 except AttributeError:
1371 except AttributeError:
1369 return self._headrevs()
1372 return self._headrevs()
1370 if rustdagop is not None:
1373 if rustdagop is not None:
1371 return rustdagop.headrevs(self.index, revs)
1374 return rustdagop.headrevs(self.index, revs)
1372 return dagop.headrevs(revs, self._uncheckedparentrevs)
1375 return dagop.headrevs(revs, self._uncheckedparentrevs)
1373
1376
1374 def computephases(self, roots):
1377 def computephases(self, roots):
1375 return self.index.computephasesmapsets(roots)
1378 return self.index.computephasesmapsets(roots)
1376
1379
1377 def _headrevs(self):
1380 def _headrevs(self):
1378 count = len(self)
1381 count = len(self)
1379 if not count:
1382 if not count:
1380 return [nullrev]
1383 return [nullrev]
1381 # we won't iter over filtered rev so nobody is a head at start
1384 # we won't iter over filtered rev so nobody is a head at start
1382 ishead = [0] * (count + 1)
1385 ishead = [0] * (count + 1)
1383 index = self.index
1386 index = self.index
1384 for r in self:
1387 for r in self:
1385 ishead[r] = 1 # I may be an head
1388 ishead[r] = 1 # I may be an head
1386 e = index[r]
1389 e = index[r]
1387 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1390 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1388 return [r for r, val in enumerate(ishead) if val]
1391 return [r for r, val in enumerate(ishead) if val]
1389
1392
1390 def heads(self, start=None, stop=None):
1393 def heads(self, start=None, stop=None):
1391 """return the list of all nodes that have no children
1394 """return the list of all nodes that have no children
1392
1395
1393 if start is specified, only heads that are descendants of
1396 if start is specified, only heads that are descendants of
1394 start will be returned
1397 start will be returned
1395 if stop is specified, it will consider all the revs from stop
1398 if stop is specified, it will consider all the revs from stop
1396 as if they had no children
1399 as if they had no children
1397 """
1400 """
1398 if start is None and stop is None:
1401 if start is None and stop is None:
1399 if not len(self):
1402 if not len(self):
1400 return [self.nullid]
1403 return [self.nullid]
1401 return [self.node(r) for r in self.headrevs()]
1404 return [self.node(r) for r in self.headrevs()]
1402
1405
1403 if start is None:
1406 if start is None:
1404 start = nullrev
1407 start = nullrev
1405 else:
1408 else:
1406 start = self.rev(start)
1409 start = self.rev(start)
1407
1410
1408 stoprevs = {self.rev(n) for n in stop or []}
1411 stoprevs = {self.rev(n) for n in stop or []}
1409
1412
1410 revs = dagop.headrevssubset(
1413 revs = dagop.headrevssubset(
1411 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1414 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1412 )
1415 )
1413
1416
1414 return [self.node(rev) for rev in revs]
1417 return [self.node(rev) for rev in revs]
1415
1418
1416 def children(self, node):
1419 def children(self, node):
1417 """find the children of a given node"""
1420 """find the children of a given node"""
1418 c = []
1421 c = []
1419 p = self.rev(node)
1422 p = self.rev(node)
1420 for r in self.revs(start=p + 1):
1423 for r in self.revs(start=p + 1):
1421 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1424 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1422 if prevs:
1425 if prevs:
1423 for pr in prevs:
1426 for pr in prevs:
1424 if pr == p:
1427 if pr == p:
1425 c.append(self.node(r))
1428 c.append(self.node(r))
1426 elif p == nullrev:
1429 elif p == nullrev:
1427 c.append(self.node(r))
1430 c.append(self.node(r))
1428 return c
1431 return c
1429
1432
1430 def commonancestorsheads(self, a, b):
1433 def commonancestorsheads(self, a, b):
1431 """calculate all the heads of the common ancestors of nodes a and b"""
1434 """calculate all the heads of the common ancestors of nodes a and b"""
1432 a, b = self.rev(a), self.rev(b)
1435 a, b = self.rev(a), self.rev(b)
1433 ancs = self._commonancestorsheads(a, b)
1436 ancs = self._commonancestorsheads(a, b)
1434 return pycompat.maplist(self.node, ancs)
1437 return pycompat.maplist(self.node, ancs)
1435
1438
1436 def _commonancestorsheads(self, *revs):
1439 def _commonancestorsheads(self, *revs):
1437 """calculate all the heads of the common ancestors of revs"""
1440 """calculate all the heads of the common ancestors of revs"""
1438 try:
1441 try:
1439 ancs = self.index.commonancestorsheads(*revs)
1442 ancs = self.index.commonancestorsheads(*revs)
1440 except (AttributeError, OverflowError): # C implementation failed
1443 except (AttributeError, OverflowError): # C implementation failed
1441 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1444 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1442 return ancs
1445 return ancs
1443
1446
1444 def isancestor(self, a, b):
1447 def isancestor(self, a, b):
1445 """return True if node a is an ancestor of node b
1448 """return True if node a is an ancestor of node b
1446
1449
1447 A revision is considered an ancestor of itself."""
1450 A revision is considered an ancestor of itself."""
1448 a, b = self.rev(a), self.rev(b)
1451 a, b = self.rev(a), self.rev(b)
1449 return self.isancestorrev(a, b)
1452 return self.isancestorrev(a, b)
1450
1453
1451 def isancestorrev(self, a, b):
1454 def isancestorrev(self, a, b):
1452 """return True if revision a is an ancestor of revision b
1455 """return True if revision a is an ancestor of revision b
1453
1456
1454 A revision is considered an ancestor of itself.
1457 A revision is considered an ancestor of itself.
1455
1458
1456 The implementation of this is trivial but the use of
1459 The implementation of this is trivial but the use of
1457 reachableroots is not."""
1460 reachableroots is not."""
1458 if a == nullrev:
1461 if a == nullrev:
1459 return True
1462 return True
1460 elif a == b:
1463 elif a == b:
1461 return True
1464 return True
1462 elif a > b:
1465 elif a > b:
1463 return False
1466 return False
1464 return bool(self.reachableroots(a, [b], [a], includepath=False))
1467 return bool(self.reachableroots(a, [b], [a], includepath=False))
1465
1468
1466 def reachableroots(self, minroot, heads, roots, includepath=False):
1469 def reachableroots(self, minroot, heads, roots, includepath=False):
1467 """return (heads(::(<roots> and <roots>::<heads>)))
1470 """return (heads(::(<roots> and <roots>::<heads>)))
1468
1471
1469 If includepath is True, return (<roots>::<heads>)."""
1472 If includepath is True, return (<roots>::<heads>)."""
1470 try:
1473 try:
1471 return self.index.reachableroots2(
1474 return self.index.reachableroots2(
1472 minroot, heads, roots, includepath
1475 minroot, heads, roots, includepath
1473 )
1476 )
1474 except AttributeError:
1477 except AttributeError:
1475 return dagop._reachablerootspure(
1478 return dagop._reachablerootspure(
1476 self.parentrevs, minroot, roots, heads, includepath
1479 self.parentrevs, minroot, roots, heads, includepath
1477 )
1480 )
1478
1481
1479 def ancestor(self, a, b):
1482 def ancestor(self, a, b):
1480 """calculate the "best" common ancestor of nodes a and b"""
1483 """calculate the "best" common ancestor of nodes a and b"""
1481
1484
1482 a, b = self.rev(a), self.rev(b)
1485 a, b = self.rev(a), self.rev(b)
1483 try:
1486 try:
1484 ancs = self.index.ancestors(a, b)
1487 ancs = self.index.ancestors(a, b)
1485 except (AttributeError, OverflowError):
1488 except (AttributeError, OverflowError):
1486 ancs = ancestor.ancestors(self.parentrevs, a, b)
1489 ancs = ancestor.ancestors(self.parentrevs, a, b)
1487 if ancs:
1490 if ancs:
1488 # choose a consistent winner when there's a tie
1491 # choose a consistent winner when there's a tie
1489 return min(map(self.node, ancs))
1492 return min(map(self.node, ancs))
1490 return self.nullid
1493 return self.nullid
1491
1494
1492 def _match(self, id):
1495 def _match(self, id):
1493 if isinstance(id, int):
1496 if isinstance(id, int):
1494 # rev
1497 # rev
1495 return self.node(id)
1498 return self.node(id)
1496 if len(id) == self.nodeconstants.nodelen:
1499 if len(id) == self.nodeconstants.nodelen:
1497 # possibly a binary node
1500 # possibly a binary node
1498 # odds of a binary node being all hex in ASCII are 1 in 10**25
1501 # odds of a binary node being all hex in ASCII are 1 in 10**25
1499 try:
1502 try:
1500 node = id
1503 node = id
1501 self.rev(node) # quick search the index
1504 self.rev(node) # quick search the index
1502 return node
1505 return node
1503 except error.LookupError:
1506 except error.LookupError:
1504 pass # may be partial hex id
1507 pass # may be partial hex id
1505 try:
1508 try:
1506 # str(rev)
1509 # str(rev)
1507 rev = int(id)
1510 rev = int(id)
1508 if b"%d" % rev != id:
1511 if b"%d" % rev != id:
1509 raise ValueError
1512 raise ValueError
1510 if rev < 0:
1513 if rev < 0:
1511 rev = len(self) + rev
1514 rev = len(self) + rev
1512 if rev < 0 or rev >= len(self):
1515 if rev < 0 or rev >= len(self):
1513 raise ValueError
1516 raise ValueError
1514 return self.node(rev)
1517 return self.node(rev)
1515 except (ValueError, OverflowError):
1518 except (ValueError, OverflowError):
1516 pass
1519 pass
1517 if len(id) == 2 * self.nodeconstants.nodelen:
1520 if len(id) == 2 * self.nodeconstants.nodelen:
1518 try:
1521 try:
1519 # a full hex nodeid?
1522 # a full hex nodeid?
1520 node = bin(id)
1523 node = bin(id)
1521 self.rev(node)
1524 self.rev(node)
1522 return node
1525 return node
1523 except (TypeError, error.LookupError):
1526 except (TypeError, error.LookupError):
1524 pass
1527 pass
1525
1528
1526 def _partialmatch(self, id):
1529 def _partialmatch(self, id):
1527 # we don't care wdirfilenodeids as they should be always full hash
1530 # we don't care wdirfilenodeids as they should be always full hash
1528 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1531 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1529 try:
1532 try:
1530 partial = self.index.partialmatch(id)
1533 partial = self.index.partialmatch(id)
1531 if partial and self.hasnode(partial):
1534 if partial and self.hasnode(partial):
1532 if maybewdir:
1535 if maybewdir:
1533 # single 'ff...' match in radix tree, ambiguous with wdir
1536 # single 'ff...' match in radix tree, ambiguous with wdir
1534 raise error.RevlogError
1537 raise error.RevlogError
1535 return partial
1538 return partial
1536 if maybewdir:
1539 if maybewdir:
1537 # no 'ff...' match in radix tree, wdir identified
1540 # no 'ff...' match in radix tree, wdir identified
1538 raise error.WdirUnsupported
1541 raise error.WdirUnsupported
1539 return None
1542 return None
1540 except error.RevlogError:
1543 except error.RevlogError:
1541 # parsers.c radix tree lookup gave multiple matches
1544 # parsers.c radix tree lookup gave multiple matches
1542 # fast path: for unfiltered changelog, radix tree is accurate
1545 # fast path: for unfiltered changelog, radix tree is accurate
1543 if not getattr(self, 'filteredrevs', None):
1546 if not getattr(self, 'filteredrevs', None):
1544 raise error.AmbiguousPrefixLookupError(
1547 raise error.AmbiguousPrefixLookupError(
1545 id, self.display_id, _(b'ambiguous identifier')
1548 id, self.display_id, _(b'ambiguous identifier')
1546 )
1549 )
1547 # fall through to slow path that filters hidden revisions
1550 # fall through to slow path that filters hidden revisions
1548 except (AttributeError, ValueError):
1551 except (AttributeError, ValueError):
1549 # we are pure python, or key was too short to search radix tree
1552 # we are pure python, or key was too short to search radix tree
1550 pass
1553 pass
1551
1554
1552 if id in self._pcache:
1555 if id in self._pcache:
1553 return self._pcache[id]
1556 return self._pcache[id]
1554
1557
1555 if len(id) <= 40:
1558 if len(id) <= 40:
1556 try:
1559 try:
1557 # hex(node)[:...]
1560 # hex(node)[:...]
1558 l = len(id) // 2 # grab an even number of digits
1561 l = len(id) // 2 # grab an even number of digits
1559 prefix = bin(id[: l * 2])
1562 prefix = bin(id[: l * 2])
1560 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1563 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1561 nl = [
1564 nl = [
1562 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1565 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1563 ]
1566 ]
1564 if self.nodeconstants.nullhex.startswith(id):
1567 if self.nodeconstants.nullhex.startswith(id):
1565 nl.append(self.nullid)
1568 nl.append(self.nullid)
1566 if len(nl) > 0:
1569 if len(nl) > 0:
1567 if len(nl) == 1 and not maybewdir:
1570 if len(nl) == 1 and not maybewdir:
1568 self._pcache[id] = nl[0]
1571 self._pcache[id] = nl[0]
1569 return nl[0]
1572 return nl[0]
1570 raise error.AmbiguousPrefixLookupError(
1573 raise error.AmbiguousPrefixLookupError(
1571 id, self.display_id, _(b'ambiguous identifier')
1574 id, self.display_id, _(b'ambiguous identifier')
1572 )
1575 )
1573 if maybewdir:
1576 if maybewdir:
1574 raise error.WdirUnsupported
1577 raise error.WdirUnsupported
1575 return None
1578 return None
1576 except TypeError:
1579 except TypeError:
1577 pass
1580 pass
1578
1581
1579 def lookup(self, id):
1582 def lookup(self, id):
1580 """locate a node based on:
1583 """locate a node based on:
1581 - revision number or str(revision number)
1584 - revision number or str(revision number)
1582 - nodeid or subset of hex nodeid
1585 - nodeid or subset of hex nodeid
1583 """
1586 """
1584 n = self._match(id)
1587 n = self._match(id)
1585 if n is not None:
1588 if n is not None:
1586 return n
1589 return n
1587 n = self._partialmatch(id)
1590 n = self._partialmatch(id)
1588 if n:
1591 if n:
1589 return n
1592 return n
1590
1593
1591 raise error.LookupError(id, self.display_id, _(b'no match found'))
1594 raise error.LookupError(id, self.display_id, _(b'no match found'))
1592
1595
1593 def shortest(self, node, minlength=1):
1596 def shortest(self, node, minlength=1):
1594 """Find the shortest unambiguous prefix that matches node."""
1597 """Find the shortest unambiguous prefix that matches node."""
1595
1598
1596 def isvalid(prefix):
1599 def isvalid(prefix):
1597 try:
1600 try:
1598 matchednode = self._partialmatch(prefix)
1601 matchednode = self._partialmatch(prefix)
1599 except error.AmbiguousPrefixLookupError:
1602 except error.AmbiguousPrefixLookupError:
1600 return False
1603 return False
1601 except error.WdirUnsupported:
1604 except error.WdirUnsupported:
1602 # single 'ff...' match
1605 # single 'ff...' match
1603 return True
1606 return True
1604 if matchednode is None:
1607 if matchednode is None:
1605 raise error.LookupError(node, self.display_id, _(b'no node'))
1608 raise error.LookupError(node, self.display_id, _(b'no node'))
1606 return True
1609 return True
1607
1610
1608 def maybewdir(prefix):
1611 def maybewdir(prefix):
1609 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1612 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1610
1613
1611 hexnode = hex(node)
1614 hexnode = hex(node)
1612
1615
1613 def disambiguate(hexnode, minlength):
1616 def disambiguate(hexnode, minlength):
1614 """Disambiguate against wdirid."""
1617 """Disambiguate against wdirid."""
1615 for length in range(minlength, len(hexnode) + 1):
1618 for length in range(minlength, len(hexnode) + 1):
1616 prefix = hexnode[:length]
1619 prefix = hexnode[:length]
1617 if not maybewdir(prefix):
1620 if not maybewdir(prefix):
1618 return prefix
1621 return prefix
1619
1622
1620 if not getattr(self, 'filteredrevs', None):
1623 if not getattr(self, 'filteredrevs', None):
1621 try:
1624 try:
1622 length = max(self.index.shortest(node), minlength)
1625 length = max(self.index.shortest(node), minlength)
1623 return disambiguate(hexnode, length)
1626 return disambiguate(hexnode, length)
1624 except error.RevlogError:
1627 except error.RevlogError:
1625 if node != self.nodeconstants.wdirid:
1628 if node != self.nodeconstants.wdirid:
1626 raise error.LookupError(
1629 raise error.LookupError(
1627 node, self.display_id, _(b'no node')
1630 node, self.display_id, _(b'no node')
1628 )
1631 )
1629 except AttributeError:
1632 except AttributeError:
1630 # Fall through to pure code
1633 # Fall through to pure code
1631 pass
1634 pass
1632
1635
1633 if node == self.nodeconstants.wdirid:
1636 if node == self.nodeconstants.wdirid:
1634 for length in range(minlength, len(hexnode) + 1):
1637 for length in range(minlength, len(hexnode) + 1):
1635 prefix = hexnode[:length]
1638 prefix = hexnode[:length]
1636 if isvalid(prefix):
1639 if isvalid(prefix):
1637 return prefix
1640 return prefix
1638
1641
1639 for length in range(minlength, len(hexnode) + 1):
1642 for length in range(minlength, len(hexnode) + 1):
1640 prefix = hexnode[:length]
1643 prefix = hexnode[:length]
1641 if isvalid(prefix):
1644 if isvalid(prefix):
1642 return disambiguate(hexnode, length)
1645 return disambiguate(hexnode, length)
1643
1646
1644 def cmp(self, node, text):
1647 def cmp(self, node, text):
1645 """compare text with a given file revision
1648 """compare text with a given file revision
1646
1649
1647 returns True if text is different than what is stored.
1650 returns True if text is different than what is stored.
1648 """
1651 """
1649 p1, p2 = self.parents(node)
1652 p1, p2 = self.parents(node)
1650 return storageutil.hashrevisionsha1(text, p1, p2) != node
1653 return storageutil.hashrevisionsha1(text, p1, p2) != node
1651
1654
1652 def _cachesegment(self, offset, data):
1655 def _cachesegment(self, offset, data):
1653 """Add a segment to the revlog cache.
1656 """Add a segment to the revlog cache.
1654
1657
1655 Accepts an absolute offset and the data that is at that location.
1658 Accepts an absolute offset and the data that is at that location.
1656 """
1659 """
1657 o, d = self._chunkcache
1660 o, d = self._chunkcache
1658 # try to add to existing cache
1661 # try to add to existing cache
1659 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1662 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1660 self._chunkcache = o, d + data
1663 self._chunkcache = o, d + data
1661 else:
1664 else:
1662 self._chunkcache = offset, data
1665 self._chunkcache = offset, data
1663
1666
1664 def _readsegment(self, offset, length, df=None):
1667 def _readsegment(self, offset, length, df=None):
1665 """Load a segment of raw data from the revlog.
1668 """Load a segment of raw data from the revlog.
1666
1669
1667 Accepts an absolute offset, length to read, and an optional existing
1670 Accepts an absolute offset, length to read, and an optional existing
1668 file handle to read from.
1671 file handle to read from.
1669
1672
1670 If an existing file handle is passed, it will be seeked and the
1673 If an existing file handle is passed, it will be seeked and the
1671 original seek position will NOT be restored.
1674 original seek position will NOT be restored.
1672
1675
1673 Returns a str or buffer of raw byte data.
1676 Returns a str or buffer of raw byte data.
1674
1677
1675 Raises if the requested number of bytes could not be read.
1678 Raises if the requested number of bytes could not be read.
1676 """
1679 """
1677 # Cache data both forward and backward around the requested
1680 # Cache data both forward and backward around the requested
1678 # data, in a fixed size window. This helps speed up operations
1681 # data, in a fixed size window. This helps speed up operations
1679 # involving reading the revlog backwards.
1682 # involving reading the revlog backwards.
1680 cachesize = self._chunkcachesize
1683 cachesize = self._chunkcachesize
1681 realoffset = offset & ~(cachesize - 1)
1684 realoffset = offset & ~(cachesize - 1)
1682 reallength = (
1685 reallength = (
1683 (offset + length + cachesize) & ~(cachesize - 1)
1686 (offset + length + cachesize) & ~(cachesize - 1)
1684 ) - realoffset
1687 ) - realoffset
1685 with self._datareadfp(df) as df:
1688 with self._datareadfp(df) as df:
1686 df.seek(realoffset)
1689 df.seek(realoffset)
1687 d = df.read(reallength)
1690 d = df.read(reallength)
1688
1691
1689 self._cachesegment(realoffset, d)
1692 self._cachesegment(realoffset, d)
1690 if offset != realoffset or reallength != length:
1693 if offset != realoffset or reallength != length:
1691 startoffset = offset - realoffset
1694 startoffset = offset - realoffset
1692 if len(d) - startoffset < length:
1695 if len(d) - startoffset < length:
1693 raise error.RevlogError(
1696 raise error.RevlogError(
1694 _(
1697 _(
1695 b'partial read of revlog %s; expected %d bytes from '
1698 b'partial read of revlog %s; expected %d bytes from '
1696 b'offset %d, got %d'
1699 b'offset %d, got %d'
1697 )
1700 )
1698 % (
1701 % (
1699 self._indexfile if self._inline else self._datafile,
1702 self._indexfile if self._inline else self._datafile,
1700 length,
1703 length,
1701 offset,
1704 offset,
1702 len(d) - startoffset,
1705 len(d) - startoffset,
1703 )
1706 )
1704 )
1707 )
1705
1708
1706 return util.buffer(d, startoffset, length)
1709 return util.buffer(d, startoffset, length)
1707
1710
1708 if len(d) < length:
1711 if len(d) < length:
1709 raise error.RevlogError(
1712 raise error.RevlogError(
1710 _(
1713 _(
1711 b'partial read of revlog %s; expected %d bytes from offset '
1714 b'partial read of revlog %s; expected %d bytes from offset '
1712 b'%d, got %d'
1715 b'%d, got %d'
1713 )
1716 )
1714 % (
1717 % (
1715 self._indexfile if self._inline else self._datafile,
1718 self._indexfile if self._inline else self._datafile,
1716 length,
1719 length,
1717 offset,
1720 offset,
1718 len(d),
1721 len(d),
1719 )
1722 )
1720 )
1723 )
1721
1724
1722 return d
1725 return d
1723
1726
1724 def _getsegment(self, offset, length, df=None):
1727 def _getsegment(self, offset, length, df=None):
1725 """Obtain a segment of raw data from the revlog.
1728 """Obtain a segment of raw data from the revlog.
1726
1729
1727 Accepts an absolute offset, length of bytes to obtain, and an
1730 Accepts an absolute offset, length of bytes to obtain, and an
1728 optional file handle to the already-opened revlog. If the file
1731 optional file handle to the already-opened revlog. If the file
1729 handle is used, it's original seek position will not be preserved.
1732 handle is used, it's original seek position will not be preserved.
1730
1733
1731 Requests for data may be returned from a cache.
1734 Requests for data may be returned from a cache.
1732
1735
1733 Returns a str or a buffer instance of raw byte data.
1736 Returns a str or a buffer instance of raw byte data.
1734 """
1737 """
1735 o, d = self._chunkcache
1738 o, d = self._chunkcache
1736 l = len(d)
1739 l = len(d)
1737
1740
1738 # is it in the cache?
1741 # is it in the cache?
1739 cachestart = offset - o
1742 cachestart = offset - o
1740 cacheend = cachestart + length
1743 cacheend = cachestart + length
1741 if cachestart >= 0 and cacheend <= l:
1744 if cachestart >= 0 and cacheend <= l:
1742 if cachestart == 0 and cacheend == l:
1745 if cachestart == 0 and cacheend == l:
1743 return d # avoid a copy
1746 return d # avoid a copy
1744 return util.buffer(d, cachestart, cacheend - cachestart)
1747 return util.buffer(d, cachestart, cacheend - cachestart)
1745
1748
1746 return self._readsegment(offset, length, df=df)
1749 return self._readsegment(offset, length, df=df)
1747
1750
1748 def _getsegmentforrevs(self, startrev, endrev, df=None):
1751 def _getsegmentforrevs(self, startrev, endrev, df=None):
1749 """Obtain a segment of raw data corresponding to a range of revisions.
1752 """Obtain a segment of raw data corresponding to a range of revisions.
1750
1753
1751 Accepts the start and end revisions and an optional already-open
1754 Accepts the start and end revisions and an optional already-open
1752 file handle to be used for reading. If the file handle is read, its
1755 file handle to be used for reading. If the file handle is read, its
1753 seek position will not be preserved.
1756 seek position will not be preserved.
1754
1757
1755 Requests for data may be satisfied by a cache.
1758 Requests for data may be satisfied by a cache.
1756
1759
1757 Returns a 2-tuple of (offset, data) for the requested range of
1760 Returns a 2-tuple of (offset, data) for the requested range of
1758 revisions. Offset is the integer offset from the beginning of the
1761 revisions. Offset is the integer offset from the beginning of the
1759 revlog and data is a str or buffer of the raw byte data.
1762 revlog and data is a str or buffer of the raw byte data.
1760
1763
1761 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1764 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1762 to determine where each revision's data begins and ends.
1765 to determine where each revision's data begins and ends.
1763 """
1766 """
1764 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1767 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1765 # (functions are expensive).
1768 # (functions are expensive).
1766 index = self.index
1769 index = self.index
1767 istart = index[startrev]
1770 istart = index[startrev]
1768 start = int(istart[0] >> 16)
1771 start = int(istart[0] >> 16)
1769 if startrev == endrev:
1772 if startrev == endrev:
1770 end = start + istart[1]
1773 end = start + istart[1]
1771 else:
1774 else:
1772 iend = index[endrev]
1775 iend = index[endrev]
1773 end = int(iend[0] >> 16) + iend[1]
1776 end = int(iend[0] >> 16) + iend[1]
1774
1777
1775 if self._inline:
1778 if self._inline:
1776 start += (startrev + 1) * self.index.entry_size
1779 start += (startrev + 1) * self.index.entry_size
1777 end += (endrev + 1) * self.index.entry_size
1780 end += (endrev + 1) * self.index.entry_size
1778 length = end - start
1781 length = end - start
1779
1782
1780 return start, self._getsegment(start, length, df=df)
1783 return start, self._getsegment(start, length, df=df)
1781
1784
1782 def _chunk(self, rev, df=None):
1785 def _chunk(self, rev, df=None):
1783 """Obtain a single decompressed chunk for a revision.
1786 """Obtain a single decompressed chunk for a revision.
1784
1787
1785 Accepts an integer revision and an optional already-open file handle
1788 Accepts an integer revision and an optional already-open file handle
1786 to be used for reading. If used, the seek position of the file will not
1789 to be used for reading. If used, the seek position of the file will not
1787 be preserved.
1790 be preserved.
1788
1791
1789 Returns a str holding uncompressed data for the requested revision.
1792 Returns a str holding uncompressed data for the requested revision.
1790 """
1793 """
1791 compression_mode = self.index[rev][10]
1794 compression_mode = self.index[rev][10]
1792 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1795 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1793 if compression_mode == COMP_MODE_PLAIN:
1796 if compression_mode == COMP_MODE_PLAIN:
1794 return data
1797 return data
1795 elif compression_mode == COMP_MODE_DEFAULT:
1798 elif compression_mode == COMP_MODE_DEFAULT:
1796 return self._decompressor(data)
1799 return self._decompressor(data)
1797 elif compression_mode == COMP_MODE_INLINE:
1800 elif compression_mode == COMP_MODE_INLINE:
1798 return self.decompress(data)
1801 return self.decompress(data)
1799 else:
1802 else:
1800 msg = 'unknown compression mode %d'
1803 msg = 'unknown compression mode %d'
1801 msg %= compression_mode
1804 msg %= compression_mode
1802 raise error.RevlogError(msg)
1805 raise error.RevlogError(msg)
1803
1806
1804 def _chunks(self, revs, df=None, targetsize=None):
1807 def _chunks(self, revs, df=None, targetsize=None):
1805 """Obtain decompressed chunks for the specified revisions.
1808 """Obtain decompressed chunks for the specified revisions.
1806
1809
1807 Accepts an iterable of numeric revisions that are assumed to be in
1810 Accepts an iterable of numeric revisions that are assumed to be in
1808 ascending order. Also accepts an optional already-open file handle
1811 ascending order. Also accepts an optional already-open file handle
1809 to be used for reading. If used, the seek position of the file will
1812 to be used for reading. If used, the seek position of the file will
1810 not be preserved.
1813 not be preserved.
1811
1814
1812 This function is similar to calling ``self._chunk()`` multiple times,
1815 This function is similar to calling ``self._chunk()`` multiple times,
1813 but is faster.
1816 but is faster.
1814
1817
1815 Returns a list with decompressed data for each requested revision.
1818 Returns a list with decompressed data for each requested revision.
1816 """
1819 """
1817 if not revs:
1820 if not revs:
1818 return []
1821 return []
1819 start = self.start
1822 start = self.start
1820 length = self.length
1823 length = self.length
1821 inline = self._inline
1824 inline = self._inline
1822 iosize = self.index.entry_size
1825 iosize = self.index.entry_size
1823 buffer = util.buffer
1826 buffer = util.buffer
1824
1827
1825 l = []
1828 l = []
1826 ladd = l.append
1829 ladd = l.append
1827
1830
1828 if not self._withsparseread:
1831 if not self._withsparseread:
1829 slicedchunks = (revs,)
1832 slicedchunks = (revs,)
1830 else:
1833 else:
1831 slicedchunks = deltautil.slicechunk(
1834 slicedchunks = deltautil.slicechunk(
1832 self, revs, targetsize=targetsize
1835 self, revs, targetsize=targetsize
1833 )
1836 )
1834
1837
1835 for revschunk in slicedchunks:
1838 for revschunk in slicedchunks:
1836 firstrev = revschunk[0]
1839 firstrev = revschunk[0]
1837 # Skip trailing revisions with empty diff
1840 # Skip trailing revisions with empty diff
1838 for lastrev in revschunk[::-1]:
1841 for lastrev in revschunk[::-1]:
1839 if length(lastrev) != 0:
1842 if length(lastrev) != 0:
1840 break
1843 break
1841
1844
1842 try:
1845 try:
1843 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1846 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1844 except OverflowError:
1847 except OverflowError:
1845 # issue4215 - we can't cache a run of chunks greater than
1848 # issue4215 - we can't cache a run of chunks greater than
1846 # 2G on Windows
1849 # 2G on Windows
1847 return [self._chunk(rev, df=df) for rev in revschunk]
1850 return [self._chunk(rev, df=df) for rev in revschunk]
1848
1851
1849 decomp = self.decompress
1852 decomp = self.decompress
1850 # self._decompressor might be None, but will not be used in that case
1853 # self._decompressor might be None, but will not be used in that case
1851 def_decomp = self._decompressor
1854 def_decomp = self._decompressor
1852 for rev in revschunk:
1855 for rev in revschunk:
1853 chunkstart = start(rev)
1856 chunkstart = start(rev)
1854 if inline:
1857 if inline:
1855 chunkstart += (rev + 1) * iosize
1858 chunkstart += (rev + 1) * iosize
1856 chunklength = length(rev)
1859 chunklength = length(rev)
1857 comp_mode = self.index[rev][10]
1860 comp_mode = self.index[rev][10]
1858 c = buffer(data, chunkstart - offset, chunklength)
1861 c = buffer(data, chunkstart - offset, chunklength)
1859 if comp_mode == COMP_MODE_PLAIN:
1862 if comp_mode == COMP_MODE_PLAIN:
1860 ladd(c)
1863 ladd(c)
1861 elif comp_mode == COMP_MODE_INLINE:
1864 elif comp_mode == COMP_MODE_INLINE:
1862 ladd(decomp(c))
1865 ladd(decomp(c))
1863 elif comp_mode == COMP_MODE_DEFAULT:
1866 elif comp_mode == COMP_MODE_DEFAULT:
1864 ladd(def_decomp(c))
1867 ladd(def_decomp(c))
1865 else:
1868 else:
1866 msg = 'unknown compression mode %d'
1869 msg = 'unknown compression mode %d'
1867 msg %= comp_mode
1870 msg %= comp_mode
1868 raise error.RevlogError(msg)
1871 raise error.RevlogError(msg)
1869
1872
1870 return l
1873 return l
1871
1874
1872 def _chunkclear(self):
1875 def _chunkclear(self):
1873 """Clear the raw chunk cache."""
1876 """Clear the raw chunk cache."""
1874 self._chunkcache = (0, b'')
1877 self._chunkcache = (0, b'')
1875
1878
1876 def deltaparent(self, rev):
1879 def deltaparent(self, rev):
1877 """return deltaparent of the given revision"""
1880 """return deltaparent of the given revision"""
1878 base = self.index[rev][3]
1881 base = self.index[rev][3]
1879 if base == rev:
1882 if base == rev:
1880 return nullrev
1883 return nullrev
1881 elif self._generaldelta:
1884 elif self._generaldelta:
1882 return base
1885 return base
1883 else:
1886 else:
1884 return rev - 1
1887 return rev - 1
1885
1888
1886 def issnapshot(self, rev):
1889 def issnapshot(self, rev):
1887 """tells whether rev is a snapshot"""
1890 """tells whether rev is a snapshot"""
1888 if not self._sparserevlog:
1891 if not self._sparserevlog:
1889 return self.deltaparent(rev) == nullrev
1892 return self.deltaparent(rev) == nullrev
1890 elif util.safehasattr(self.index, b'issnapshot'):
1893 elif util.safehasattr(self.index, b'issnapshot'):
1891 # directly assign the method to cache the testing and access
1894 # directly assign the method to cache the testing and access
1892 self.issnapshot = self.index.issnapshot
1895 self.issnapshot = self.index.issnapshot
1893 return self.issnapshot(rev)
1896 return self.issnapshot(rev)
1894 if rev == nullrev:
1897 if rev == nullrev:
1895 return True
1898 return True
1896 entry = self.index[rev]
1899 entry = self.index[rev]
1897 base = entry[3]
1900 base = entry[3]
1898 if base == rev:
1901 if base == rev:
1899 return True
1902 return True
1900 if base == nullrev:
1903 if base == nullrev:
1901 return True
1904 return True
1902 p1 = entry[5]
1905 p1 = entry[5]
1903 p2 = entry[6]
1906 p2 = entry[6]
1904 if base == p1 or base == p2:
1907 if base == p1 or base == p2:
1905 return False
1908 return False
1906 return self.issnapshot(base)
1909 return self.issnapshot(base)
1907
1910
1908 def snapshotdepth(self, rev):
1911 def snapshotdepth(self, rev):
1909 """number of snapshot in the chain before this one"""
1912 """number of snapshot in the chain before this one"""
1910 if not self.issnapshot(rev):
1913 if not self.issnapshot(rev):
1911 raise error.ProgrammingError(b'revision %d not a snapshot')
1914 raise error.ProgrammingError(b'revision %d not a snapshot')
1912 return len(self._deltachain(rev)[0]) - 1
1915 return len(self._deltachain(rev)[0]) - 1
1913
1916
1914 def revdiff(self, rev1, rev2):
1917 def revdiff(self, rev1, rev2):
1915 """return or calculate a delta between two revisions
1918 """return or calculate a delta between two revisions
1916
1919
1917 The delta calculated is in binary form and is intended to be written to
1920 The delta calculated is in binary form and is intended to be written to
1918 revlog data directly. So this function needs raw revision data.
1921 revlog data directly. So this function needs raw revision data.
1919 """
1922 """
1920 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1923 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1921 return bytes(self._chunk(rev2))
1924 return bytes(self._chunk(rev2))
1922
1925
1923 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1926 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1924
1927
1925 def _processflags(self, text, flags, operation, raw=False):
1928 def _processflags(self, text, flags, operation, raw=False):
1926 """deprecated entry point to access flag processors"""
1929 """deprecated entry point to access flag processors"""
1927 msg = b'_processflag(...) use the specialized variant'
1930 msg = b'_processflag(...) use the specialized variant'
1928 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1931 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1929 if raw:
1932 if raw:
1930 return text, flagutil.processflagsraw(self, text, flags)
1933 return text, flagutil.processflagsraw(self, text, flags)
1931 elif operation == b'read':
1934 elif operation == b'read':
1932 return flagutil.processflagsread(self, text, flags)
1935 return flagutil.processflagsread(self, text, flags)
1933 else: # write operation
1936 else: # write operation
1934 return flagutil.processflagswrite(self, text, flags)
1937 return flagutil.processflagswrite(self, text, flags)
1935
1938
1936 def revision(self, nodeorrev, _df=None, raw=False):
1939 def revision(self, nodeorrev, _df=None, raw=False):
1937 """return an uncompressed revision of a given node or revision
1940 """return an uncompressed revision of a given node or revision
1938 number.
1941 number.
1939
1942
1940 _df - an existing file handle to read from. (internal-only)
1943 _df - an existing file handle to read from. (internal-only)
1941 raw - an optional argument specifying if the revision data is to be
1944 raw - an optional argument specifying if the revision data is to be
1942 treated as raw data when applying flag transforms. 'raw' should be set
1945 treated as raw data when applying flag transforms. 'raw' should be set
1943 to True when generating changegroups or in debug commands.
1946 to True when generating changegroups or in debug commands.
1944 """
1947 """
1945 if raw:
1948 if raw:
1946 msg = (
1949 msg = (
1947 b'revlog.revision(..., raw=True) is deprecated, '
1950 b'revlog.revision(..., raw=True) is deprecated, '
1948 b'use revlog.rawdata(...)'
1951 b'use revlog.rawdata(...)'
1949 )
1952 )
1950 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1953 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1951 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1954 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1952
1955
1953 def sidedata(self, nodeorrev, _df=None):
1956 def sidedata(self, nodeorrev, _df=None):
1954 """a map of extra data related to the changeset but not part of the hash
1957 """a map of extra data related to the changeset but not part of the hash
1955
1958
1956 This function currently return a dictionary. However, more advanced
1959 This function currently return a dictionary. However, more advanced
1957 mapping object will likely be used in the future for a more
1960 mapping object will likely be used in the future for a more
1958 efficient/lazy code.
1961 efficient/lazy code.
1959 """
1962 """
1960 return self._revisiondata(nodeorrev, _df)[1]
1963 return self._revisiondata(nodeorrev, _df)[1]
1961
1964
1962 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1965 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1963 # deal with <nodeorrev> argument type
1966 # deal with <nodeorrev> argument type
1964 if isinstance(nodeorrev, int):
1967 if isinstance(nodeorrev, int):
1965 rev = nodeorrev
1968 rev = nodeorrev
1966 node = self.node(rev)
1969 node = self.node(rev)
1967 else:
1970 else:
1968 node = nodeorrev
1971 node = nodeorrev
1969 rev = None
1972 rev = None
1970
1973
1971 # fast path the special `nullid` rev
1974 # fast path the special `nullid` rev
1972 if node == self.nullid:
1975 if node == self.nullid:
1973 return b"", {}
1976 return b"", {}
1974
1977
1975 # ``rawtext`` is the text as stored inside the revlog. Might be the
1978 # ``rawtext`` is the text as stored inside the revlog. Might be the
1976 # revision or might need to be processed to retrieve the revision.
1979 # revision or might need to be processed to retrieve the revision.
1977 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1980 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1978
1981
1979 if self.hassidedata:
1982 if self.hassidedata:
1980 if rev is None:
1983 if rev is None:
1981 rev = self.rev(node)
1984 rev = self.rev(node)
1982 sidedata = self._sidedata(rev)
1985 sidedata = self._sidedata(rev)
1983 else:
1986 else:
1984 sidedata = {}
1987 sidedata = {}
1985
1988
1986 if raw and validated:
1989 if raw and validated:
1987 # if we don't want to process the raw text and that raw
1990 # if we don't want to process the raw text and that raw
1988 # text is cached, we can exit early.
1991 # text is cached, we can exit early.
1989 return rawtext, sidedata
1992 return rawtext, sidedata
1990 if rev is None:
1993 if rev is None:
1991 rev = self.rev(node)
1994 rev = self.rev(node)
1992 # the revlog's flag for this revision
1995 # the revlog's flag for this revision
1993 # (usually alter its state or content)
1996 # (usually alter its state or content)
1994 flags = self.flags(rev)
1997 flags = self.flags(rev)
1995
1998
1996 if validated and flags == REVIDX_DEFAULT_FLAGS:
1999 if validated and flags == REVIDX_DEFAULT_FLAGS:
1997 # no extra flags set, no flag processor runs, text = rawtext
2000 # no extra flags set, no flag processor runs, text = rawtext
1998 return rawtext, sidedata
2001 return rawtext, sidedata
1999
2002
2000 if raw:
2003 if raw:
2001 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2004 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2002 text = rawtext
2005 text = rawtext
2003 else:
2006 else:
2004 r = flagutil.processflagsread(self, rawtext, flags)
2007 r = flagutil.processflagsread(self, rawtext, flags)
2005 text, validatehash = r
2008 text, validatehash = r
2006 if validatehash:
2009 if validatehash:
2007 self.checkhash(text, node, rev=rev)
2010 self.checkhash(text, node, rev=rev)
2008 if not validated:
2011 if not validated:
2009 self._revisioncache = (node, rev, rawtext)
2012 self._revisioncache = (node, rev, rawtext)
2010
2013
2011 return text, sidedata
2014 return text, sidedata
2012
2015
2013 def _rawtext(self, node, rev, _df=None):
2016 def _rawtext(self, node, rev, _df=None):
2014 """return the possibly unvalidated rawtext for a revision
2017 """return the possibly unvalidated rawtext for a revision
2015
2018
2016 returns (rev, rawtext, validated)
2019 returns (rev, rawtext, validated)
2017 """
2020 """
2018
2021
2019 # revision in the cache (could be useful to apply delta)
2022 # revision in the cache (could be useful to apply delta)
2020 cachedrev = None
2023 cachedrev = None
2021 # An intermediate text to apply deltas to
2024 # An intermediate text to apply deltas to
2022 basetext = None
2025 basetext = None
2023
2026
2024 # Check if we have the entry in cache
2027 # Check if we have the entry in cache
2025 # The cache entry looks like (node, rev, rawtext)
2028 # The cache entry looks like (node, rev, rawtext)
2026 if self._revisioncache:
2029 if self._revisioncache:
2027 if self._revisioncache[0] == node:
2030 if self._revisioncache[0] == node:
2028 return (rev, self._revisioncache[2], True)
2031 return (rev, self._revisioncache[2], True)
2029 cachedrev = self._revisioncache[1]
2032 cachedrev = self._revisioncache[1]
2030
2033
2031 if rev is None:
2034 if rev is None:
2032 rev = self.rev(node)
2035 rev = self.rev(node)
2033
2036
2034 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2037 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2035 if stopped:
2038 if stopped:
2036 basetext = self._revisioncache[2]
2039 basetext = self._revisioncache[2]
2037
2040
2038 # drop cache to save memory, the caller is expected to
2041 # drop cache to save memory, the caller is expected to
2039 # update self._revisioncache after validating the text
2042 # update self._revisioncache after validating the text
2040 self._revisioncache = None
2043 self._revisioncache = None
2041
2044
2042 targetsize = None
2045 targetsize = None
2043 rawsize = self.index[rev][2]
2046 rawsize = self.index[rev][2]
2044 if 0 <= rawsize:
2047 if 0 <= rawsize:
2045 targetsize = 4 * rawsize
2048 targetsize = 4 * rawsize
2046
2049
2047 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2050 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2048 if basetext is None:
2051 if basetext is None:
2049 basetext = bytes(bins[0])
2052 basetext = bytes(bins[0])
2050 bins = bins[1:]
2053 bins = bins[1:]
2051
2054
2052 rawtext = mdiff.patches(basetext, bins)
2055 rawtext = mdiff.patches(basetext, bins)
2053 del basetext # let us have a chance to free memory early
2056 del basetext # let us have a chance to free memory early
2054 return (rev, rawtext, False)
2057 return (rev, rawtext, False)
2055
2058
2056 def _sidedata(self, rev):
2059 def _sidedata(self, rev):
2057 """Return the sidedata for a given revision number."""
2060 """Return the sidedata for a given revision number."""
2058 index_entry = self.index[rev]
2061 index_entry = self.index[rev]
2059 sidedata_offset = index_entry[8]
2062 sidedata_offset = index_entry[8]
2060 sidedata_size = index_entry[9]
2063 sidedata_size = index_entry[9]
2061
2064
2062 if self._inline:
2065 if self._inline:
2063 sidedata_offset += self.index.entry_size * (1 + rev)
2066 sidedata_offset += self.index.entry_size * (1 + rev)
2064 if sidedata_size == 0:
2067 if sidedata_size == 0:
2065 return {}
2068 return {}
2066
2069
2067 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2070 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2068 comp = self.index[rev][11]
2071 comp = self.index[rev][11]
2069 if comp == COMP_MODE_PLAIN:
2072 if comp == COMP_MODE_PLAIN:
2070 segment = comp_segment
2073 segment = comp_segment
2071 elif comp == COMP_MODE_DEFAULT:
2074 elif comp == COMP_MODE_DEFAULT:
2072 segment = self._decompressor(comp_segment)
2075 segment = self._decompressor(comp_segment)
2073 elif comp == COMP_MODE_INLINE:
2076 elif comp == COMP_MODE_INLINE:
2074 segment = self.decompress(comp_segment)
2077 segment = self.decompress(comp_segment)
2075 else:
2078 else:
2076 msg = 'unknown compression mode %d'
2079 msg = 'unknown compression mode %d'
2077 msg %= comp
2080 msg %= comp
2078 raise error.RevlogError(msg)
2081 raise error.RevlogError(msg)
2079
2082
2080 sidedata = sidedatautil.deserialize_sidedata(segment)
2083 sidedata = sidedatautil.deserialize_sidedata(segment)
2081 return sidedata
2084 return sidedata
2082
2085
2083 def rawdata(self, nodeorrev, _df=None):
2086 def rawdata(self, nodeorrev, _df=None):
2084 """return an uncompressed raw data of a given node or revision number.
2087 """return an uncompressed raw data of a given node or revision number.
2085
2088
2086 _df - an existing file handle to read from. (internal-only)
2089 _df - an existing file handle to read from. (internal-only)
2087 """
2090 """
2088 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2091 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2089
2092
2090 def hash(self, text, p1, p2):
2093 def hash(self, text, p1, p2):
2091 """Compute a node hash.
2094 """Compute a node hash.
2092
2095
2093 Available as a function so that subclasses can replace the hash
2096 Available as a function so that subclasses can replace the hash
2094 as needed.
2097 as needed.
2095 """
2098 """
2096 return storageutil.hashrevisionsha1(text, p1, p2)
2099 return storageutil.hashrevisionsha1(text, p1, p2)
2097
2100
2098 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2101 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2099 """Check node hash integrity.
2102 """Check node hash integrity.
2100
2103
2101 Available as a function so that subclasses can extend hash mismatch
2104 Available as a function so that subclasses can extend hash mismatch
2102 behaviors as needed.
2105 behaviors as needed.
2103 """
2106 """
2104 try:
2107 try:
2105 if p1 is None and p2 is None:
2108 if p1 is None and p2 is None:
2106 p1, p2 = self.parents(node)
2109 p1, p2 = self.parents(node)
2107 if node != self.hash(text, p1, p2):
2110 if node != self.hash(text, p1, p2):
2108 # Clear the revision cache on hash failure. The revision cache
2111 # Clear the revision cache on hash failure. The revision cache
2109 # only stores the raw revision and clearing the cache does have
2112 # only stores the raw revision and clearing the cache does have
2110 # the side-effect that we won't have a cache hit when the raw
2113 # the side-effect that we won't have a cache hit when the raw
2111 # revision data is accessed. But this case should be rare and
2114 # revision data is accessed. But this case should be rare and
2112 # it is extra work to teach the cache about the hash
2115 # it is extra work to teach the cache about the hash
2113 # verification state.
2116 # verification state.
2114 if self._revisioncache and self._revisioncache[0] == node:
2117 if self._revisioncache and self._revisioncache[0] == node:
2115 self._revisioncache = None
2118 self._revisioncache = None
2116
2119
2117 revornode = rev
2120 revornode = rev
2118 if revornode is None:
2121 if revornode is None:
2119 revornode = templatefilters.short(hex(node))
2122 revornode = templatefilters.short(hex(node))
2120 raise error.RevlogError(
2123 raise error.RevlogError(
2121 _(b"integrity check failed on %s:%s")
2124 _(b"integrity check failed on %s:%s")
2122 % (self.display_id, pycompat.bytestr(revornode))
2125 % (self.display_id, pycompat.bytestr(revornode))
2123 )
2126 )
2124 except error.RevlogError:
2127 except error.RevlogError:
2125 if self._censorable and storageutil.iscensoredtext(text):
2128 if self._censorable and storageutil.iscensoredtext(text):
2126 raise error.CensoredNodeError(self.display_id, node, text)
2129 raise error.CensoredNodeError(self.display_id, node, text)
2127 raise
2130 raise
2128
2131
2129 def _enforceinlinesize(self, tr):
2132 def _enforceinlinesize(self, tr):
2130 """Check if the revlog is too big for inline and convert if so.
2133 """Check if the revlog is too big for inline and convert if so.
2131
2134
2132 This should be called after revisions are added to the revlog. If the
2135 This should be called after revisions are added to the revlog. If the
2133 revlog has grown too large to be an inline revlog, it will convert it
2136 revlog has grown too large to be an inline revlog, it will convert it
2134 to use multiple index and data files.
2137 to use multiple index and data files.
2135 """
2138 """
2136 tiprev = len(self) - 1
2139 tiprev = len(self) - 1
2137 total_size = self.start(tiprev) + self.length(tiprev)
2140 total_size = self.start(tiprev) + self.length(tiprev)
2138 if not self._inline or total_size < _maxinline:
2141 if not self._inline or total_size < _maxinline:
2139 return
2142 return
2140
2143
2141 troffset = tr.findoffset(self._indexfile)
2144 troffset = tr.findoffset(self._indexfile)
2142 if troffset is None:
2145 if troffset is None:
2143 raise error.RevlogError(
2146 raise error.RevlogError(
2144 _(b"%s not found in the transaction") % self._indexfile
2147 _(b"%s not found in the transaction") % self._indexfile
2145 )
2148 )
2146 trindex = 0
2149 trindex = 0
2147 tr.add(self._datafile, 0)
2150 tr.add(self._datafile, 0)
2148
2151
2149 existing_handles = False
2152 existing_handles = False
2150 if self._writinghandles is not None:
2153 if self._writinghandles is not None:
2151 existing_handles = True
2154 existing_handles = True
2152 fp = self._writinghandles[0]
2155 fp = self._writinghandles[0]
2153 fp.flush()
2156 fp.flush()
2154 fp.close()
2157 fp.close()
2155 # We can't use the cached file handle after close(). So prevent
2158 # We can't use the cached file handle after close(). So prevent
2156 # its usage.
2159 # its usage.
2157 self._writinghandles = None
2160 self._writinghandles = None
2158
2161
2159 new_dfh = self._datafp(b'w+')
2162 new_dfh = self._datafp(b'w+')
2160 new_dfh.truncate(0) # drop any potentially existing data
2163 new_dfh.truncate(0) # drop any potentially existing data
2161 try:
2164 try:
2162 with self._indexfp() as read_ifh:
2165 with self._indexfp() as read_ifh:
2163 for r in self:
2166 for r in self:
2164 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2167 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2165 if troffset <= self.start(r):
2168 if troffset <= self.start(r):
2166 trindex = r
2169 trindex = r
2167 new_dfh.flush()
2170 new_dfh.flush()
2168
2171
2169 with self.__index_new_fp() as fp:
2172 with self.__index_new_fp() as fp:
2170 self._format_flags &= ~FLAG_INLINE_DATA
2173 self._format_flags &= ~FLAG_INLINE_DATA
2171 self._inline = False
2174 self._inline = False
2172 for i in self:
2175 for i in self:
2173 e = self.index.entry_binary(i)
2176 e = self.index.entry_binary(i)
2174 if i == 0 and self._docket is None:
2177 if i == 0 and self._docket is None:
2175 header = self._format_flags | self._format_version
2178 header = self._format_flags | self._format_version
2176 header = self.index.pack_header(header)
2179 header = self.index.pack_header(header)
2177 e = header + e
2180 e = header + e
2178 fp.write(e)
2181 fp.write(e)
2179 if self._docket is not None:
2182 if self._docket is not None:
2180 self._docket.index_end = fp.tell()
2183 self._docket.index_end = fp.tell()
2181 # the temp file replace the real index when we exit the context
2184 # the temp file replace the real index when we exit the context
2182 # manager
2185 # manager
2183
2186
2184 tr.replace(self._indexfile, trindex * self.index.entry_size)
2187 tr.replace(self._indexfile, trindex * self.index.entry_size)
2185 nodemaputil.setup_persistent_nodemap(tr, self)
2188 nodemaputil.setup_persistent_nodemap(tr, self)
2186 self._chunkclear()
2189 self._chunkclear()
2187
2190
2188 if existing_handles:
2191 if existing_handles:
2189 # switched from inline to conventional reopen the index
2192 # switched from inline to conventional reopen the index
2190 ifh = self.__index_write_fp()
2193 ifh = self.__index_write_fp()
2191 self._writinghandles = (ifh, new_dfh)
2194 self._writinghandles = (ifh, new_dfh)
2192 new_dfh = None
2195 new_dfh = None
2193 finally:
2196 finally:
2194 if new_dfh is not None:
2197 if new_dfh is not None:
2195 new_dfh.close()
2198 new_dfh.close()
2196
2199
2197 def _nodeduplicatecallback(self, transaction, node):
2200 def _nodeduplicatecallback(self, transaction, node):
2198 """called when trying to add a node already stored."""
2201 """called when trying to add a node already stored."""
2199
2202
2200 @contextlib.contextmanager
2203 @contextlib.contextmanager
2201 def _writing(self, transaction):
2204 def _writing(self, transaction):
2202 if self._trypending:
2205 if self._trypending:
2203 msg = b'try to write in a `trypending` revlog: %s'
2206 msg = b'try to write in a `trypending` revlog: %s'
2204 msg %= self.display_id
2207 msg %= self.display_id
2205 raise error.ProgrammingError(msg)
2208 raise error.ProgrammingError(msg)
2206 if self._writinghandles is not None:
2209 if self._writinghandles is not None:
2207 yield
2210 yield
2208 else:
2211 else:
2209 r = len(self)
2212 r = len(self)
2210 dsize = 0
2213 dsize = 0
2211 if r:
2214 if r:
2212 dsize = self.end(r - 1)
2215 dsize = self.end(r - 1)
2213 dfh = None
2216 dfh = None
2214 if not self._inline:
2217 if not self._inline:
2215 try:
2218 try:
2216 dfh = self._datafp(b"r+")
2219 dfh = self._datafp(b"r+")
2217 if self._docket is None:
2220 if self._docket is None:
2218 dfh.seek(0, os.SEEK_END)
2221 dfh.seek(0, os.SEEK_END)
2219 else:
2222 else:
2220 dfh.seek(self._docket.data_end, os.SEEK_SET)
2223 dfh.seek(self._docket.data_end, os.SEEK_SET)
2221 except IOError as inst:
2224 except IOError as inst:
2222 if inst.errno != errno.ENOENT:
2225 if inst.errno != errno.ENOENT:
2223 raise
2226 raise
2224 dfh = self._datafp(b"w+")
2227 dfh = self._datafp(b"w+")
2225 transaction.add(self._datafile, dsize)
2228 transaction.add(self._datafile, dsize)
2226 try:
2229 try:
2227 isize = r * self.index.entry_size
2230 isize = r * self.index.entry_size
2228 ifh = self.__index_write_fp()
2231 ifh = self.__index_write_fp()
2229 if self._inline:
2232 if self._inline:
2230 transaction.add(self._indexfile, dsize + isize)
2233 transaction.add(self._indexfile, dsize + isize)
2231 else:
2234 else:
2232 transaction.add(self._indexfile, isize)
2235 transaction.add(self._indexfile, isize)
2233 try:
2236 try:
2234 self._writinghandles = (ifh, dfh)
2237 self._writinghandles = (ifh, dfh)
2235 try:
2238 try:
2236 yield
2239 yield
2237 if self._docket is not None:
2240 if self._docket is not None:
2238 self._write_docket(transaction)
2241 self._write_docket(transaction)
2239 finally:
2242 finally:
2240 self._writinghandles = None
2243 self._writinghandles = None
2241 finally:
2244 finally:
2242 ifh.close()
2245 ifh.close()
2243 finally:
2246 finally:
2244 if dfh is not None:
2247 if dfh is not None:
2245 dfh.close()
2248 dfh.close()
2246
2249
2247 def _write_docket(self, transaction):
2250 def _write_docket(self, transaction):
2248 """write the current docket on disk
2251 """write the current docket on disk
2249
2252
2250 Exist as a method to help changelog to implement transaction logic
2253 Exist as a method to help changelog to implement transaction logic
2251
2254
2252 We could also imagine using the same transaction logic for all revlog
2255 We could also imagine using the same transaction logic for all revlog
2253 since docket are cheap."""
2256 since docket are cheap."""
2254 self._docket.write(transaction)
2257 self._docket.write(transaction)
2255
2258
2256 def addrevision(
2259 def addrevision(
2257 self,
2260 self,
2258 text,
2261 text,
2259 transaction,
2262 transaction,
2260 link,
2263 link,
2261 p1,
2264 p1,
2262 p2,
2265 p2,
2263 cachedelta=None,
2266 cachedelta=None,
2264 node=None,
2267 node=None,
2265 flags=REVIDX_DEFAULT_FLAGS,
2268 flags=REVIDX_DEFAULT_FLAGS,
2266 deltacomputer=None,
2269 deltacomputer=None,
2267 sidedata=None,
2270 sidedata=None,
2268 ):
2271 ):
2269 """add a revision to the log
2272 """add a revision to the log
2270
2273
2271 text - the revision data to add
2274 text - the revision data to add
2272 transaction - the transaction object used for rollback
2275 transaction - the transaction object used for rollback
2273 link - the linkrev data to add
2276 link - the linkrev data to add
2274 p1, p2 - the parent nodeids of the revision
2277 p1, p2 - the parent nodeids of the revision
2275 cachedelta - an optional precomputed delta
2278 cachedelta - an optional precomputed delta
2276 node - nodeid of revision; typically node is not specified, and it is
2279 node - nodeid of revision; typically node is not specified, and it is
2277 computed by default as hash(text, p1, p2), however subclasses might
2280 computed by default as hash(text, p1, p2), however subclasses might
2278 use different hashing method (and override checkhash() in such case)
2281 use different hashing method (and override checkhash() in such case)
2279 flags - the known flags to set on the revision
2282 flags - the known flags to set on the revision
2280 deltacomputer - an optional deltacomputer instance shared between
2283 deltacomputer - an optional deltacomputer instance shared between
2281 multiple calls
2284 multiple calls
2282 """
2285 """
2283 if link == nullrev:
2286 if link == nullrev:
2284 raise error.RevlogError(
2287 raise error.RevlogError(
2285 _(b"attempted to add linkrev -1 to %s") % self.display_id
2288 _(b"attempted to add linkrev -1 to %s") % self.display_id
2286 )
2289 )
2287
2290
2288 if sidedata is None:
2291 if sidedata is None:
2289 sidedata = {}
2292 sidedata = {}
2290 elif sidedata and not self.hassidedata:
2293 elif sidedata and not self.hassidedata:
2291 raise error.ProgrammingError(
2294 raise error.ProgrammingError(
2292 _(b"trying to add sidedata to a revlog who don't support them")
2295 _(b"trying to add sidedata to a revlog who don't support them")
2293 )
2296 )
2294
2297
2295 if flags:
2298 if flags:
2296 node = node or self.hash(text, p1, p2)
2299 node = node or self.hash(text, p1, p2)
2297
2300
2298 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2301 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2299
2302
2300 # If the flag processor modifies the revision data, ignore any provided
2303 # If the flag processor modifies the revision data, ignore any provided
2301 # cachedelta.
2304 # cachedelta.
2302 if rawtext != text:
2305 if rawtext != text:
2303 cachedelta = None
2306 cachedelta = None
2304
2307
2305 if len(rawtext) > _maxentrysize:
2308 if len(rawtext) > _maxentrysize:
2306 raise error.RevlogError(
2309 raise error.RevlogError(
2307 _(
2310 _(
2308 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2311 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2309 )
2312 )
2310 % (self.display_id, len(rawtext))
2313 % (self.display_id, len(rawtext))
2311 )
2314 )
2312
2315
2313 node = node or self.hash(rawtext, p1, p2)
2316 node = node or self.hash(rawtext, p1, p2)
2314 rev = self.index.get_rev(node)
2317 rev = self.index.get_rev(node)
2315 if rev is not None:
2318 if rev is not None:
2316 return rev
2319 return rev
2317
2320
2318 if validatehash:
2321 if validatehash:
2319 self.checkhash(rawtext, node, p1=p1, p2=p2)
2322 self.checkhash(rawtext, node, p1=p1, p2=p2)
2320
2323
2321 return self.addrawrevision(
2324 return self.addrawrevision(
2322 rawtext,
2325 rawtext,
2323 transaction,
2326 transaction,
2324 link,
2327 link,
2325 p1,
2328 p1,
2326 p2,
2329 p2,
2327 node,
2330 node,
2328 flags,
2331 flags,
2329 cachedelta=cachedelta,
2332 cachedelta=cachedelta,
2330 deltacomputer=deltacomputer,
2333 deltacomputer=deltacomputer,
2331 sidedata=sidedata,
2334 sidedata=sidedata,
2332 )
2335 )
2333
2336
2334 def addrawrevision(
2337 def addrawrevision(
2335 self,
2338 self,
2336 rawtext,
2339 rawtext,
2337 transaction,
2340 transaction,
2338 link,
2341 link,
2339 p1,
2342 p1,
2340 p2,
2343 p2,
2341 node,
2344 node,
2342 flags,
2345 flags,
2343 cachedelta=None,
2346 cachedelta=None,
2344 deltacomputer=None,
2347 deltacomputer=None,
2345 sidedata=None,
2348 sidedata=None,
2346 ):
2349 ):
2347 """add a raw revision with known flags, node and parents
2350 """add a raw revision with known flags, node and parents
2348 useful when reusing a revision not stored in this revlog (ex: received
2351 useful when reusing a revision not stored in this revlog (ex: received
2349 over wire, or read from an external bundle).
2352 over wire, or read from an external bundle).
2350 """
2353 """
2351 with self._writing(transaction):
2354 with self._writing(transaction):
2352 return self._addrevision(
2355 return self._addrevision(
2353 node,
2356 node,
2354 rawtext,
2357 rawtext,
2355 transaction,
2358 transaction,
2356 link,
2359 link,
2357 p1,
2360 p1,
2358 p2,
2361 p2,
2359 flags,
2362 flags,
2360 cachedelta,
2363 cachedelta,
2361 deltacomputer=deltacomputer,
2364 deltacomputer=deltacomputer,
2362 sidedata=sidedata,
2365 sidedata=sidedata,
2363 )
2366 )
2364
2367
2365 def compress(self, data):
2368 def compress(self, data):
2366 """Generate a possibly-compressed representation of data."""
2369 """Generate a possibly-compressed representation of data."""
2367 if not data:
2370 if not data:
2368 return b'', data
2371 return b'', data
2369
2372
2370 compressed = self._compressor.compress(data)
2373 compressed = self._compressor.compress(data)
2371
2374
2372 if compressed:
2375 if compressed:
2373 # The revlog compressor added the header in the returned data.
2376 # The revlog compressor added the header in the returned data.
2374 return b'', compressed
2377 return b'', compressed
2375
2378
2376 if data[0:1] == b'\0':
2379 if data[0:1] == b'\0':
2377 return b'', data
2380 return b'', data
2378 return b'u', data
2381 return b'u', data
2379
2382
2380 def decompress(self, data):
2383 def decompress(self, data):
2381 """Decompress a revlog chunk.
2384 """Decompress a revlog chunk.
2382
2385
2383 The chunk is expected to begin with a header identifying the
2386 The chunk is expected to begin with a header identifying the
2384 format type so it can be routed to an appropriate decompressor.
2387 format type so it can be routed to an appropriate decompressor.
2385 """
2388 """
2386 if not data:
2389 if not data:
2387 return data
2390 return data
2388
2391
2389 # Revlogs are read much more frequently than they are written and many
2392 # Revlogs are read much more frequently than they are written and many
2390 # chunks only take microseconds to decompress, so performance is
2393 # chunks only take microseconds to decompress, so performance is
2391 # important here.
2394 # important here.
2392 #
2395 #
2393 # We can make a few assumptions about revlogs:
2396 # We can make a few assumptions about revlogs:
2394 #
2397 #
2395 # 1) the majority of chunks will be compressed (as opposed to inline
2398 # 1) the majority of chunks will be compressed (as opposed to inline
2396 # raw data).
2399 # raw data).
2397 # 2) decompressing *any* data will likely by at least 10x slower than
2400 # 2) decompressing *any* data will likely by at least 10x slower than
2398 # returning raw inline data.
2401 # returning raw inline data.
2399 # 3) we want to prioritize common and officially supported compression
2402 # 3) we want to prioritize common and officially supported compression
2400 # engines
2403 # engines
2401 #
2404 #
2402 # It follows that we want to optimize for "decompress compressed data
2405 # It follows that we want to optimize for "decompress compressed data
2403 # when encoded with common and officially supported compression engines"
2406 # when encoded with common and officially supported compression engines"
2404 # case over "raw data" and "data encoded by less common or non-official
2407 # case over "raw data" and "data encoded by less common or non-official
2405 # compression engines." That is why we have the inline lookup first
2408 # compression engines." That is why we have the inline lookup first
2406 # followed by the compengines lookup.
2409 # followed by the compengines lookup.
2407 #
2410 #
2408 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2411 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2409 # compressed chunks. And this matters for changelog and manifest reads.
2412 # compressed chunks. And this matters for changelog and manifest reads.
2410 t = data[0:1]
2413 t = data[0:1]
2411
2414
2412 if t == b'x':
2415 if t == b'x':
2413 try:
2416 try:
2414 return _zlibdecompress(data)
2417 return _zlibdecompress(data)
2415 except zlib.error as e:
2418 except zlib.error as e:
2416 raise error.RevlogError(
2419 raise error.RevlogError(
2417 _(b'revlog decompress error: %s')
2420 _(b'revlog decompress error: %s')
2418 % stringutil.forcebytestr(e)
2421 % stringutil.forcebytestr(e)
2419 )
2422 )
2420 # '\0' is more common than 'u' so it goes first.
2423 # '\0' is more common than 'u' so it goes first.
2421 elif t == b'\0':
2424 elif t == b'\0':
2422 return data
2425 return data
2423 elif t == b'u':
2426 elif t == b'u':
2424 return util.buffer(data, 1)
2427 return util.buffer(data, 1)
2425
2428
2426 compressor = self._get_decompressor(t)
2429 compressor = self._get_decompressor(t)
2427
2430
2428 return compressor.decompress(data)
2431 return compressor.decompress(data)
2429
2432
2430 def _addrevision(
2433 def _addrevision(
2431 self,
2434 self,
2432 node,
2435 node,
2433 rawtext,
2436 rawtext,
2434 transaction,
2437 transaction,
2435 link,
2438 link,
2436 p1,
2439 p1,
2437 p2,
2440 p2,
2438 flags,
2441 flags,
2439 cachedelta,
2442 cachedelta,
2440 alwayscache=False,
2443 alwayscache=False,
2441 deltacomputer=None,
2444 deltacomputer=None,
2442 sidedata=None,
2445 sidedata=None,
2443 ):
2446 ):
2444 """internal function to add revisions to the log
2447 """internal function to add revisions to the log
2445
2448
2446 see addrevision for argument descriptions.
2449 see addrevision for argument descriptions.
2447
2450
2448 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2451 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2449
2452
2450 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2453 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2451 be used.
2454 be used.
2452
2455
2453 invariants:
2456 invariants:
2454 - rawtext is optional (can be None); if not set, cachedelta must be set.
2457 - rawtext is optional (can be None); if not set, cachedelta must be set.
2455 if both are set, they must correspond to each other.
2458 if both are set, they must correspond to each other.
2456 """
2459 """
2457 if node == self.nullid:
2460 if node == self.nullid:
2458 raise error.RevlogError(
2461 raise error.RevlogError(
2459 _(b"%s: attempt to add null revision") % self.display_id
2462 _(b"%s: attempt to add null revision") % self.display_id
2460 )
2463 )
2461 if (
2464 if (
2462 node == self.nodeconstants.wdirid
2465 node == self.nodeconstants.wdirid
2463 or node in self.nodeconstants.wdirfilenodeids
2466 or node in self.nodeconstants.wdirfilenodeids
2464 ):
2467 ):
2465 raise error.RevlogError(
2468 raise error.RevlogError(
2466 _(b"%s: attempt to add wdir revision") % self.display_id
2469 _(b"%s: attempt to add wdir revision") % self.display_id
2467 )
2470 )
2468 if self._writinghandles is None:
2471 if self._writinghandles is None:
2469 msg = b'adding revision outside `revlog._writing` context'
2472 msg = b'adding revision outside `revlog._writing` context'
2470 raise error.ProgrammingError(msg)
2473 raise error.ProgrammingError(msg)
2471
2474
2472 if self._inline:
2475 if self._inline:
2473 fh = self._writinghandles[0]
2476 fh = self._writinghandles[0]
2474 else:
2477 else:
2475 fh = self._writinghandles[1]
2478 fh = self._writinghandles[1]
2476
2479
2477 btext = [rawtext]
2480 btext = [rawtext]
2478
2481
2479 curr = len(self)
2482 curr = len(self)
2480 prev = curr - 1
2483 prev = curr - 1
2481
2484
2482 offset = self._get_data_offset(prev)
2485 offset = self._get_data_offset(prev)
2483
2486
2484 if self._concurrencychecker:
2487 if self._concurrencychecker:
2485 ifh, dfh = self._writinghandles
2488 ifh, dfh = self._writinghandles
2486 if self._inline:
2489 if self._inline:
2487 # offset is "as if" it were in the .d file, so we need to add on
2490 # offset is "as if" it were in the .d file, so we need to add on
2488 # the size of the entry metadata.
2491 # the size of the entry metadata.
2489 self._concurrencychecker(
2492 self._concurrencychecker(
2490 ifh, self._indexfile, offset + curr * self.index.entry_size
2493 ifh, self._indexfile, offset + curr * self.index.entry_size
2491 )
2494 )
2492 else:
2495 else:
2493 # Entries in the .i are a consistent size.
2496 # Entries in the .i are a consistent size.
2494 self._concurrencychecker(
2497 self._concurrencychecker(
2495 ifh, self._indexfile, curr * self.index.entry_size
2498 ifh, self._indexfile, curr * self.index.entry_size
2496 )
2499 )
2497 self._concurrencychecker(dfh, self._datafile, offset)
2500 self._concurrencychecker(dfh, self._datafile, offset)
2498
2501
2499 p1r, p2r = self.rev(p1), self.rev(p2)
2502 p1r, p2r = self.rev(p1), self.rev(p2)
2500
2503
2501 # full versions are inserted when the needed deltas
2504 # full versions are inserted when the needed deltas
2502 # become comparable to the uncompressed text
2505 # become comparable to the uncompressed text
2503 if rawtext is None:
2506 if rawtext is None:
2504 # need rawtext size, before changed by flag processors, which is
2507 # need rawtext size, before changed by flag processors, which is
2505 # the non-raw size. use revlog explicitly to avoid filelog's extra
2508 # the non-raw size. use revlog explicitly to avoid filelog's extra
2506 # logic that might remove metadata size.
2509 # logic that might remove metadata size.
2507 textlen = mdiff.patchedsize(
2510 textlen = mdiff.patchedsize(
2508 revlog.size(self, cachedelta[0]), cachedelta[1]
2511 revlog.size(self, cachedelta[0]), cachedelta[1]
2509 )
2512 )
2510 else:
2513 else:
2511 textlen = len(rawtext)
2514 textlen = len(rawtext)
2512
2515
2513 if deltacomputer is None:
2516 if deltacomputer is None:
2514 deltacomputer = deltautil.deltacomputer(self)
2517 deltacomputer = deltautil.deltacomputer(self)
2515
2518
2516 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2519 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2517
2520
2518 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2521 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2519
2522
2520 compression_mode = COMP_MODE_INLINE
2523 compression_mode = COMP_MODE_INLINE
2521 if self._docket is not None:
2524 if self._docket is not None:
2522 h, d = deltainfo.data
2525 h, d = deltainfo.data
2523 if not h and not d:
2526 if not h and not d:
2524 # not data to store at all... declare them uncompressed
2527 # not data to store at all... declare them uncompressed
2525 compression_mode = COMP_MODE_PLAIN
2528 compression_mode = COMP_MODE_PLAIN
2526 elif not h:
2529 elif not h:
2527 t = d[0:1]
2530 t = d[0:1]
2528 if t == b'\0':
2531 if t == b'\0':
2529 compression_mode = COMP_MODE_PLAIN
2532 compression_mode = COMP_MODE_PLAIN
2530 elif t == self._docket.default_compression_header:
2533 elif t == self._docket.default_compression_header:
2531 compression_mode = COMP_MODE_DEFAULT
2534 compression_mode = COMP_MODE_DEFAULT
2532 elif h == b'u':
2535 elif h == b'u':
2533 # we have a more efficient way to declare uncompressed
2536 # we have a more efficient way to declare uncompressed
2534 h = b''
2537 h = b''
2535 compression_mode = COMP_MODE_PLAIN
2538 compression_mode = COMP_MODE_PLAIN
2536 deltainfo = deltautil.drop_u_compression(deltainfo)
2539 deltainfo = deltautil.drop_u_compression(deltainfo)
2537
2540
2538 sidedata_compression_mode = COMP_MODE_INLINE
2541 sidedata_compression_mode = COMP_MODE_INLINE
2539 if sidedata and self.hassidedata:
2542 if sidedata and self.hassidedata:
2540 sidedata_compression_mode = COMP_MODE_PLAIN
2543 sidedata_compression_mode = COMP_MODE_PLAIN
2541 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2544 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2542 sidedata_offset = offset + deltainfo.deltalen
2545 sidedata_offset = offset + deltainfo.deltalen
2543 h, comp_sidedata = self.compress(serialized_sidedata)
2546 h, comp_sidedata = self.compress(serialized_sidedata)
2544 if (
2547 if (
2545 h != b'u'
2548 h != b'u'
2546 and comp_sidedata[0:1] != b'\0'
2549 and comp_sidedata[0:1] != b'\0'
2547 and len(comp_sidedata) < len(serialized_sidedata)
2550 and len(comp_sidedata) < len(serialized_sidedata)
2548 ):
2551 ):
2549 assert not h
2552 assert not h
2550 if (
2553 if (
2551 comp_sidedata[0:1]
2554 comp_sidedata[0:1]
2552 == self._docket.default_compression_header
2555 == self._docket.default_compression_header
2553 ):
2556 ):
2554 sidedata_compression_mode = COMP_MODE_DEFAULT
2557 sidedata_compression_mode = COMP_MODE_DEFAULT
2555 serialized_sidedata = comp_sidedata
2558 serialized_sidedata = comp_sidedata
2556 else:
2559 else:
2557 sidedata_compression_mode = COMP_MODE_INLINE
2560 sidedata_compression_mode = COMP_MODE_INLINE
2558 serialized_sidedata = comp_sidedata
2561 serialized_sidedata = comp_sidedata
2559 else:
2562 else:
2560 serialized_sidedata = b""
2563 serialized_sidedata = b""
2561 # Don't store the offset if the sidedata is empty, that way
2564 # Don't store the offset if the sidedata is empty, that way
2562 # we can easily detect empty sidedata and they will be no different
2565 # we can easily detect empty sidedata and they will be no different
2563 # than ones we manually add.
2566 # than ones we manually add.
2564 sidedata_offset = 0
2567 sidedata_offset = 0
2565
2568
2566 e = (
2569 e = (
2567 offset_type(offset, flags),
2570 offset_type(offset, flags),
2568 deltainfo.deltalen,
2571 deltainfo.deltalen,
2569 textlen,
2572 textlen,
2570 deltainfo.base,
2573 deltainfo.base,
2571 link,
2574 link,
2572 p1r,
2575 p1r,
2573 p2r,
2576 p2r,
2574 node,
2577 node,
2575 sidedata_offset,
2578 sidedata_offset,
2576 len(serialized_sidedata),
2579 len(serialized_sidedata),
2577 compression_mode,
2580 compression_mode,
2578 sidedata_compression_mode,
2581 sidedata_compression_mode,
2579 )
2582 )
2580
2583
2581 self.index.append(e)
2584 self.index.append(e)
2582 entry = self.index.entry_binary(curr)
2585 entry = self.index.entry_binary(curr)
2583 if curr == 0 and self._docket is None:
2586 if curr == 0 and self._docket is None:
2584 header = self._format_flags | self._format_version
2587 header = self._format_flags | self._format_version
2585 header = self.index.pack_header(header)
2588 header = self.index.pack_header(header)
2586 entry = header + entry
2589 entry = header + entry
2587 self._writeentry(
2590 self._writeentry(
2588 transaction,
2591 transaction,
2589 entry,
2592 entry,
2590 deltainfo.data,
2593 deltainfo.data,
2591 link,
2594 link,
2592 offset,
2595 offset,
2593 serialized_sidedata,
2596 serialized_sidedata,
2594 )
2597 )
2595
2598
2596 rawtext = btext[0]
2599 rawtext = btext[0]
2597
2600
2598 if alwayscache and rawtext is None:
2601 if alwayscache and rawtext is None:
2599 rawtext = deltacomputer.buildtext(revinfo, fh)
2602 rawtext = deltacomputer.buildtext(revinfo, fh)
2600
2603
2601 if type(rawtext) == bytes: # only accept immutable objects
2604 if type(rawtext) == bytes: # only accept immutable objects
2602 self._revisioncache = (node, curr, rawtext)
2605 self._revisioncache = (node, curr, rawtext)
2603 self._chainbasecache[curr] = deltainfo.chainbase
2606 self._chainbasecache[curr] = deltainfo.chainbase
2604 return curr
2607 return curr
2605
2608
2606 def _get_data_offset(self, prev):
2609 def _get_data_offset(self, prev):
2607 """Returns the current offset in the (in-transaction) data file.
2610 """Returns the current offset in the (in-transaction) data file.
2608 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2611 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2609 file to store that information: since sidedata can be rewritten to the
2612 file to store that information: since sidedata can be rewritten to the
2610 end of the data file within a transaction, you can have cases where, for
2613 end of the data file within a transaction, you can have cases where, for
2611 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2614 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2612 to `n - 1`'s sidedata being written after `n`'s data.
2615 to `n - 1`'s sidedata being written after `n`'s data.
2613
2616
2614 TODO cache this in a docket file before getting out of experimental."""
2617 TODO cache this in a docket file before getting out of experimental."""
2615 if self._docket is None:
2618 if self._docket is None:
2616 return self.end(prev)
2619 return self.end(prev)
2617 else:
2620 else:
2618 return self._docket.data_end
2621 return self._docket.data_end
2619
2622
2620 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2623 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2621 # Files opened in a+ mode have inconsistent behavior on various
2624 # Files opened in a+ mode have inconsistent behavior on various
2622 # platforms. Windows requires that a file positioning call be made
2625 # platforms. Windows requires that a file positioning call be made
2623 # when the file handle transitions between reads and writes. See
2626 # when the file handle transitions between reads and writes. See
2624 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2627 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2625 # platforms, Python or the platform itself can be buggy. Some versions
2628 # platforms, Python or the platform itself can be buggy. Some versions
2626 # of Solaris have been observed to not append at the end of the file
2629 # of Solaris have been observed to not append at the end of the file
2627 # if the file was seeked to before the end. See issue4943 for more.
2630 # if the file was seeked to before the end. See issue4943 for more.
2628 #
2631 #
2629 # We work around this issue by inserting a seek() before writing.
2632 # We work around this issue by inserting a seek() before writing.
2630 # Note: This is likely not necessary on Python 3. However, because
2633 # Note: This is likely not necessary on Python 3. However, because
2631 # the file handle is reused for reads and may be seeked there, we need
2634 # the file handle is reused for reads and may be seeked there, we need
2632 # to be careful before changing this.
2635 # to be careful before changing this.
2633 if self._writinghandles is None:
2636 if self._writinghandles is None:
2634 msg = b'adding revision outside `revlog._writing` context'
2637 msg = b'adding revision outside `revlog._writing` context'
2635 raise error.ProgrammingError(msg)
2638 raise error.ProgrammingError(msg)
2636 ifh, dfh = self._writinghandles
2639 ifh, dfh = self._writinghandles
2637 if self._docket is None:
2640 if self._docket is None:
2638 ifh.seek(0, os.SEEK_END)
2641 ifh.seek(0, os.SEEK_END)
2639 else:
2642 else:
2640 ifh.seek(self._docket.index_end, os.SEEK_SET)
2643 ifh.seek(self._docket.index_end, os.SEEK_SET)
2641 if dfh:
2644 if dfh:
2642 if self._docket is None:
2645 if self._docket is None:
2643 dfh.seek(0, os.SEEK_END)
2646 dfh.seek(0, os.SEEK_END)
2644 else:
2647 else:
2645 dfh.seek(self._docket.data_end, os.SEEK_SET)
2648 dfh.seek(self._docket.data_end, os.SEEK_SET)
2646
2649
2647 curr = len(self) - 1
2650 curr = len(self) - 1
2648 if not self._inline:
2651 if not self._inline:
2649 transaction.add(self._datafile, offset)
2652 transaction.add(self._datafile, offset)
2650 transaction.add(self._indexfile, curr * len(entry))
2653 transaction.add(self._indexfile, curr * len(entry))
2651 if data[0]:
2654 if data[0]:
2652 dfh.write(data[0])
2655 dfh.write(data[0])
2653 dfh.write(data[1])
2656 dfh.write(data[1])
2654 if sidedata:
2657 if sidedata:
2655 dfh.write(sidedata)
2658 dfh.write(sidedata)
2656 ifh.write(entry)
2659 ifh.write(entry)
2657 else:
2660 else:
2658 offset += curr * self.index.entry_size
2661 offset += curr * self.index.entry_size
2659 transaction.add(self._indexfile, offset)
2662 transaction.add(self._indexfile, offset)
2660 ifh.write(entry)
2663 ifh.write(entry)
2661 ifh.write(data[0])
2664 ifh.write(data[0])
2662 ifh.write(data[1])
2665 ifh.write(data[1])
2663 if sidedata:
2666 if sidedata:
2664 ifh.write(sidedata)
2667 ifh.write(sidedata)
2665 self._enforceinlinesize(transaction)
2668 self._enforceinlinesize(transaction)
2666 if self._docket is not None:
2669 if self._docket is not None:
2667 self._docket.index_end = self._writinghandles[0].tell()
2670 self._docket.index_end = self._writinghandles[0].tell()
2668 self._docket.data_end = self._writinghandles[1].tell()
2671 self._docket.data_end = self._writinghandles[1].tell()
2669
2672
2670 nodemaputil.setup_persistent_nodemap(transaction, self)
2673 nodemaputil.setup_persistent_nodemap(transaction, self)
2671
2674
2672 def addgroup(
2675 def addgroup(
2673 self,
2676 self,
2674 deltas,
2677 deltas,
2675 linkmapper,
2678 linkmapper,
2676 transaction,
2679 transaction,
2677 alwayscache=False,
2680 alwayscache=False,
2678 addrevisioncb=None,
2681 addrevisioncb=None,
2679 duplicaterevisioncb=None,
2682 duplicaterevisioncb=None,
2680 ):
2683 ):
2681 """
2684 """
2682 add a delta group
2685 add a delta group
2683
2686
2684 given a set of deltas, add them to the revision log. the
2687 given a set of deltas, add them to the revision log. the
2685 first delta is against its parent, which should be in our
2688 first delta is against its parent, which should be in our
2686 log, the rest are against the previous delta.
2689 log, the rest are against the previous delta.
2687
2690
2688 If ``addrevisioncb`` is defined, it will be called with arguments of
2691 If ``addrevisioncb`` is defined, it will be called with arguments of
2689 this revlog and the node that was added.
2692 this revlog and the node that was added.
2690 """
2693 """
2691
2694
2692 if self._adding_group:
2695 if self._adding_group:
2693 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2696 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2694
2697
2695 self._adding_group = True
2698 self._adding_group = True
2696 empty = True
2699 empty = True
2697 try:
2700 try:
2698 with self._writing(transaction):
2701 with self._writing(transaction):
2699 deltacomputer = deltautil.deltacomputer(self)
2702 deltacomputer = deltautil.deltacomputer(self)
2700 # loop through our set of deltas
2703 # loop through our set of deltas
2701 for data in deltas:
2704 for data in deltas:
2702 (
2705 (
2703 node,
2706 node,
2704 p1,
2707 p1,
2705 p2,
2708 p2,
2706 linknode,
2709 linknode,
2707 deltabase,
2710 deltabase,
2708 delta,
2711 delta,
2709 flags,
2712 flags,
2710 sidedata,
2713 sidedata,
2711 ) = data
2714 ) = data
2712 link = linkmapper(linknode)
2715 link = linkmapper(linknode)
2713 flags = flags or REVIDX_DEFAULT_FLAGS
2716 flags = flags or REVIDX_DEFAULT_FLAGS
2714
2717
2715 rev = self.index.get_rev(node)
2718 rev = self.index.get_rev(node)
2716 if rev is not None:
2719 if rev is not None:
2717 # this can happen if two branches make the same change
2720 # this can happen if two branches make the same change
2718 self._nodeduplicatecallback(transaction, rev)
2721 self._nodeduplicatecallback(transaction, rev)
2719 if duplicaterevisioncb:
2722 if duplicaterevisioncb:
2720 duplicaterevisioncb(self, rev)
2723 duplicaterevisioncb(self, rev)
2721 empty = False
2724 empty = False
2722 continue
2725 continue
2723
2726
2724 for p in (p1, p2):
2727 for p in (p1, p2):
2725 if not self.index.has_node(p):
2728 if not self.index.has_node(p):
2726 raise error.LookupError(
2729 raise error.LookupError(
2727 p, self.radix, _(b'unknown parent')
2730 p, self.radix, _(b'unknown parent')
2728 )
2731 )
2729
2732
2730 if not self.index.has_node(deltabase):
2733 if not self.index.has_node(deltabase):
2731 raise error.LookupError(
2734 raise error.LookupError(
2732 deltabase, self.display_id, _(b'unknown delta base')
2735 deltabase, self.display_id, _(b'unknown delta base')
2733 )
2736 )
2734
2737
2735 baserev = self.rev(deltabase)
2738 baserev = self.rev(deltabase)
2736
2739
2737 if baserev != nullrev and self.iscensored(baserev):
2740 if baserev != nullrev and self.iscensored(baserev):
2738 # if base is censored, delta must be full replacement in a
2741 # if base is censored, delta must be full replacement in a
2739 # single patch operation
2742 # single patch operation
2740 hlen = struct.calcsize(b">lll")
2743 hlen = struct.calcsize(b">lll")
2741 oldlen = self.rawsize(baserev)
2744 oldlen = self.rawsize(baserev)
2742 newlen = len(delta) - hlen
2745 newlen = len(delta) - hlen
2743 if delta[:hlen] != mdiff.replacediffheader(
2746 if delta[:hlen] != mdiff.replacediffheader(
2744 oldlen, newlen
2747 oldlen, newlen
2745 ):
2748 ):
2746 raise error.CensoredBaseError(
2749 raise error.CensoredBaseError(
2747 self.display_id, self.node(baserev)
2750 self.display_id, self.node(baserev)
2748 )
2751 )
2749
2752
2750 if not flags and self._peek_iscensored(baserev, delta):
2753 if not flags and self._peek_iscensored(baserev, delta):
2751 flags |= REVIDX_ISCENSORED
2754 flags |= REVIDX_ISCENSORED
2752
2755
2753 # We assume consumers of addrevisioncb will want to retrieve
2756 # We assume consumers of addrevisioncb will want to retrieve
2754 # the added revision, which will require a call to
2757 # the added revision, which will require a call to
2755 # revision(). revision() will fast path if there is a cache
2758 # revision(). revision() will fast path if there is a cache
2756 # hit. So, we tell _addrevision() to always cache in this case.
2759 # hit. So, we tell _addrevision() to always cache in this case.
2757 # We're only using addgroup() in the context of changegroup
2760 # We're only using addgroup() in the context of changegroup
2758 # generation so the revision data can always be handled as raw
2761 # generation so the revision data can always be handled as raw
2759 # by the flagprocessor.
2762 # by the flagprocessor.
2760 rev = self._addrevision(
2763 rev = self._addrevision(
2761 node,
2764 node,
2762 None,
2765 None,
2763 transaction,
2766 transaction,
2764 link,
2767 link,
2765 p1,
2768 p1,
2766 p2,
2769 p2,
2767 flags,
2770 flags,
2768 (baserev, delta),
2771 (baserev, delta),
2769 alwayscache=alwayscache,
2772 alwayscache=alwayscache,
2770 deltacomputer=deltacomputer,
2773 deltacomputer=deltacomputer,
2771 sidedata=sidedata,
2774 sidedata=sidedata,
2772 )
2775 )
2773
2776
2774 if addrevisioncb:
2777 if addrevisioncb:
2775 addrevisioncb(self, rev)
2778 addrevisioncb(self, rev)
2776 empty = False
2779 empty = False
2777 finally:
2780 finally:
2778 self._adding_group = False
2781 self._adding_group = False
2779 return not empty
2782 return not empty
2780
2783
2781 def iscensored(self, rev):
2784 def iscensored(self, rev):
2782 """Check if a file revision is censored."""
2785 """Check if a file revision is censored."""
2783 if not self._censorable:
2786 if not self._censorable:
2784 return False
2787 return False
2785
2788
2786 return self.flags(rev) & REVIDX_ISCENSORED
2789 return self.flags(rev) & REVIDX_ISCENSORED
2787
2790
2788 def _peek_iscensored(self, baserev, delta):
2791 def _peek_iscensored(self, baserev, delta):
2789 """Quickly check if a delta produces a censored revision."""
2792 """Quickly check if a delta produces a censored revision."""
2790 if not self._censorable:
2793 if not self._censorable:
2791 return False
2794 return False
2792
2795
2793 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2796 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2794
2797
2795 def getstrippoint(self, minlink):
2798 def getstrippoint(self, minlink):
2796 """find the minimum rev that must be stripped to strip the linkrev
2799 """find the minimum rev that must be stripped to strip the linkrev
2797
2800
2798 Returns a tuple containing the minimum rev and a set of all revs that
2801 Returns a tuple containing the minimum rev and a set of all revs that
2799 have linkrevs that will be broken by this strip.
2802 have linkrevs that will be broken by this strip.
2800 """
2803 """
2801 return storageutil.resolvestripinfo(
2804 return storageutil.resolvestripinfo(
2802 minlink,
2805 minlink,
2803 len(self) - 1,
2806 len(self) - 1,
2804 self.headrevs(),
2807 self.headrevs(),
2805 self.linkrev,
2808 self.linkrev,
2806 self.parentrevs,
2809 self.parentrevs,
2807 )
2810 )
2808
2811
2809 def strip(self, minlink, transaction):
2812 def strip(self, minlink, transaction):
2810 """truncate the revlog on the first revision with a linkrev >= minlink
2813 """truncate the revlog on the first revision with a linkrev >= minlink
2811
2814
2812 This function is called when we're stripping revision minlink and
2815 This function is called when we're stripping revision minlink and
2813 its descendants from the repository.
2816 its descendants from the repository.
2814
2817
2815 We have to remove all revisions with linkrev >= minlink, because
2818 We have to remove all revisions with linkrev >= minlink, because
2816 the equivalent changelog revisions will be renumbered after the
2819 the equivalent changelog revisions will be renumbered after the
2817 strip.
2820 strip.
2818
2821
2819 So we truncate the revlog on the first of these revisions, and
2822 So we truncate the revlog on the first of these revisions, and
2820 trust that the caller has saved the revisions that shouldn't be
2823 trust that the caller has saved the revisions that shouldn't be
2821 removed and that it'll re-add them after this truncation.
2824 removed and that it'll re-add them after this truncation.
2822 """
2825 """
2823 if len(self) == 0:
2826 if len(self) == 0:
2824 return
2827 return
2825
2828
2826 rev, _ = self.getstrippoint(minlink)
2829 rev, _ = self.getstrippoint(minlink)
2827 if rev == len(self):
2830 if rev == len(self):
2828 return
2831 return
2829
2832
2830 # first truncate the files on disk
2833 # first truncate the files on disk
2831 data_end = self.start(rev)
2834 data_end = self.start(rev)
2832 if not self._inline:
2835 if not self._inline:
2833 transaction.add(self._datafile, data_end)
2836 transaction.add(self._datafile, data_end)
2834 end = rev * self.index.entry_size
2837 end = rev * self.index.entry_size
2835 else:
2838 else:
2836 end = data_end + (rev * self.index.entry_size)
2839 end = data_end + (rev * self.index.entry_size)
2837
2840
2838 transaction.add(self._indexfile, end)
2841 transaction.add(self._indexfile, end)
2839 if self._docket is not None:
2842 if self._docket is not None:
2840 # XXX we could, leverage the docket while stripping. However it is
2843 # XXX we could, leverage the docket while stripping. However it is
2841 # not powerfull enough at the time of this comment
2844 # not powerfull enough at the time of this comment
2842 self._docket.index_end = end
2845 self._docket.index_end = end
2843 self._docket.data_end = data_end
2846 self._docket.data_end = data_end
2844 self._docket.write(transaction, stripping=True)
2847 self._docket.write(transaction, stripping=True)
2845
2848
2846 # then reset internal state in memory to forget those revisions
2849 # then reset internal state in memory to forget those revisions
2847 self._revisioncache = None
2850 self._revisioncache = None
2848 self._chaininfocache = util.lrucachedict(500)
2851 self._chaininfocache = util.lrucachedict(500)
2849 self._chunkclear()
2852 self._chunkclear()
2850
2853
2851 del self.index[rev:-1]
2854 del self.index[rev:-1]
2852
2855
2853 def checksize(self):
2856 def checksize(self):
2854 """Check size of index and data files
2857 """Check size of index and data files
2855
2858
2856 return a (dd, di) tuple.
2859 return a (dd, di) tuple.
2857 - dd: extra bytes for the "data" file
2860 - dd: extra bytes for the "data" file
2858 - di: extra bytes for the "index" file
2861 - di: extra bytes for the "index" file
2859
2862
2860 A healthy revlog will return (0, 0).
2863 A healthy revlog will return (0, 0).
2861 """
2864 """
2862 expected = 0
2865 expected = 0
2863 if len(self):
2866 if len(self):
2864 expected = max(0, self.end(len(self) - 1))
2867 expected = max(0, self.end(len(self) - 1))
2865
2868
2866 try:
2869 try:
2867 with self._datafp() as f:
2870 with self._datafp() as f:
2868 f.seek(0, io.SEEK_END)
2871 f.seek(0, io.SEEK_END)
2869 actual = f.tell()
2872 actual = f.tell()
2870 dd = actual - expected
2873 dd = actual - expected
2871 except IOError as inst:
2874 except IOError as inst:
2872 if inst.errno != errno.ENOENT:
2875 if inst.errno != errno.ENOENT:
2873 raise
2876 raise
2874 dd = 0
2877 dd = 0
2875
2878
2876 try:
2879 try:
2877 f = self.opener(self._indexfile)
2880 f = self.opener(self._indexfile)
2878 f.seek(0, io.SEEK_END)
2881 f.seek(0, io.SEEK_END)
2879 actual = f.tell()
2882 actual = f.tell()
2880 f.close()
2883 f.close()
2881 s = self.index.entry_size
2884 s = self.index.entry_size
2882 i = max(0, actual // s)
2885 i = max(0, actual // s)
2883 di = actual - (i * s)
2886 di = actual - (i * s)
2884 if self._inline:
2887 if self._inline:
2885 databytes = 0
2888 databytes = 0
2886 for r in self:
2889 for r in self:
2887 databytes += max(0, self.length(r))
2890 databytes += max(0, self.length(r))
2888 dd = 0
2891 dd = 0
2889 di = actual - len(self) * s - databytes
2892 di = actual - len(self) * s - databytes
2890 except IOError as inst:
2893 except IOError as inst:
2891 if inst.errno != errno.ENOENT:
2894 if inst.errno != errno.ENOENT:
2892 raise
2895 raise
2893 di = 0
2896 di = 0
2894
2897
2895 return (dd, di)
2898 return (dd, di)
2896
2899
2897 def files(self):
2900 def files(self):
2898 res = [self._indexfile]
2901 res = [self._indexfile]
2899 if not self._inline:
2902 if not self._inline:
2900 res.append(self._datafile)
2903 res.append(self._datafile)
2901 return res
2904 return res
2902
2905
2903 def emitrevisions(
2906 def emitrevisions(
2904 self,
2907 self,
2905 nodes,
2908 nodes,
2906 nodesorder=None,
2909 nodesorder=None,
2907 revisiondata=False,
2910 revisiondata=False,
2908 assumehaveparentrevisions=False,
2911 assumehaveparentrevisions=False,
2909 deltamode=repository.CG_DELTAMODE_STD,
2912 deltamode=repository.CG_DELTAMODE_STD,
2910 sidedata_helpers=None,
2913 sidedata_helpers=None,
2911 ):
2914 ):
2912 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2915 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2913 raise error.ProgrammingError(
2916 raise error.ProgrammingError(
2914 b'unhandled value for nodesorder: %s' % nodesorder
2917 b'unhandled value for nodesorder: %s' % nodesorder
2915 )
2918 )
2916
2919
2917 if nodesorder is None and not self._generaldelta:
2920 if nodesorder is None and not self._generaldelta:
2918 nodesorder = b'storage'
2921 nodesorder = b'storage'
2919
2922
2920 if (
2923 if (
2921 not self._storedeltachains
2924 not self._storedeltachains
2922 and deltamode != repository.CG_DELTAMODE_PREV
2925 and deltamode != repository.CG_DELTAMODE_PREV
2923 ):
2926 ):
2924 deltamode = repository.CG_DELTAMODE_FULL
2927 deltamode = repository.CG_DELTAMODE_FULL
2925
2928
2926 return storageutil.emitrevisions(
2929 return storageutil.emitrevisions(
2927 self,
2930 self,
2928 nodes,
2931 nodes,
2929 nodesorder,
2932 nodesorder,
2930 revlogrevisiondelta,
2933 revlogrevisiondelta,
2931 deltaparentfn=self.deltaparent,
2934 deltaparentfn=self.deltaparent,
2932 candeltafn=self.candelta,
2935 candeltafn=self.candelta,
2933 rawsizefn=self.rawsize,
2936 rawsizefn=self.rawsize,
2934 revdifffn=self.revdiff,
2937 revdifffn=self.revdiff,
2935 flagsfn=self.flags,
2938 flagsfn=self.flags,
2936 deltamode=deltamode,
2939 deltamode=deltamode,
2937 revisiondata=revisiondata,
2940 revisiondata=revisiondata,
2938 assumehaveparentrevisions=assumehaveparentrevisions,
2941 assumehaveparentrevisions=assumehaveparentrevisions,
2939 sidedata_helpers=sidedata_helpers,
2942 sidedata_helpers=sidedata_helpers,
2940 )
2943 )
2941
2944
2942 DELTAREUSEALWAYS = b'always'
2945 DELTAREUSEALWAYS = b'always'
2943 DELTAREUSESAMEREVS = b'samerevs'
2946 DELTAREUSESAMEREVS = b'samerevs'
2944 DELTAREUSENEVER = b'never'
2947 DELTAREUSENEVER = b'never'
2945
2948
2946 DELTAREUSEFULLADD = b'fulladd'
2949 DELTAREUSEFULLADD = b'fulladd'
2947
2950
2948 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2951 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2949
2952
2950 def clone(
2953 def clone(
2951 self,
2954 self,
2952 tr,
2955 tr,
2953 destrevlog,
2956 destrevlog,
2954 addrevisioncb=None,
2957 addrevisioncb=None,
2955 deltareuse=DELTAREUSESAMEREVS,
2958 deltareuse=DELTAREUSESAMEREVS,
2956 forcedeltabothparents=None,
2959 forcedeltabothparents=None,
2957 sidedata_helpers=None,
2960 sidedata_helpers=None,
2958 ):
2961 ):
2959 """Copy this revlog to another, possibly with format changes.
2962 """Copy this revlog to another, possibly with format changes.
2960
2963
2961 The destination revlog will contain the same revisions and nodes.
2964 The destination revlog will contain the same revisions and nodes.
2962 However, it may not be bit-for-bit identical due to e.g. delta encoding
2965 However, it may not be bit-for-bit identical due to e.g. delta encoding
2963 differences.
2966 differences.
2964
2967
2965 The ``deltareuse`` argument control how deltas from the existing revlog
2968 The ``deltareuse`` argument control how deltas from the existing revlog
2966 are preserved in the destination revlog. The argument can have the
2969 are preserved in the destination revlog. The argument can have the
2967 following values:
2970 following values:
2968
2971
2969 DELTAREUSEALWAYS
2972 DELTAREUSEALWAYS
2970 Deltas will always be reused (if possible), even if the destination
2973 Deltas will always be reused (if possible), even if the destination
2971 revlog would not select the same revisions for the delta. This is the
2974 revlog would not select the same revisions for the delta. This is the
2972 fastest mode of operation.
2975 fastest mode of operation.
2973 DELTAREUSESAMEREVS
2976 DELTAREUSESAMEREVS
2974 Deltas will be reused if the destination revlog would pick the same
2977 Deltas will be reused if the destination revlog would pick the same
2975 revisions for the delta. This mode strikes a balance between speed
2978 revisions for the delta. This mode strikes a balance between speed
2976 and optimization.
2979 and optimization.
2977 DELTAREUSENEVER
2980 DELTAREUSENEVER
2978 Deltas will never be reused. This is the slowest mode of execution.
2981 Deltas will never be reused. This is the slowest mode of execution.
2979 This mode can be used to recompute deltas (e.g. if the diff/delta
2982 This mode can be used to recompute deltas (e.g. if the diff/delta
2980 algorithm changes).
2983 algorithm changes).
2981 DELTAREUSEFULLADD
2984 DELTAREUSEFULLADD
2982 Revision will be re-added as if their were new content. This is
2985 Revision will be re-added as if their were new content. This is
2983 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2986 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2984 eg: large file detection and handling.
2987 eg: large file detection and handling.
2985
2988
2986 Delta computation can be slow, so the choice of delta reuse policy can
2989 Delta computation can be slow, so the choice of delta reuse policy can
2987 significantly affect run time.
2990 significantly affect run time.
2988
2991
2989 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2992 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2990 two extremes. Deltas will be reused if they are appropriate. But if the
2993 two extremes. Deltas will be reused if they are appropriate. But if the
2991 delta could choose a better revision, it will do so. This means if you
2994 delta could choose a better revision, it will do so. This means if you
2992 are converting a non-generaldelta revlog to a generaldelta revlog,
2995 are converting a non-generaldelta revlog to a generaldelta revlog,
2993 deltas will be recomputed if the delta's parent isn't a parent of the
2996 deltas will be recomputed if the delta's parent isn't a parent of the
2994 revision.
2997 revision.
2995
2998
2996 In addition to the delta policy, the ``forcedeltabothparents``
2999 In addition to the delta policy, the ``forcedeltabothparents``
2997 argument controls whether to force compute deltas against both parents
3000 argument controls whether to force compute deltas against both parents
2998 for merges. By default, the current default is used.
3001 for merges. By default, the current default is used.
2999
3002
3000 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3003 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3001 `sidedata_helpers`.
3004 `sidedata_helpers`.
3002 """
3005 """
3003 if deltareuse not in self.DELTAREUSEALL:
3006 if deltareuse not in self.DELTAREUSEALL:
3004 raise ValueError(
3007 raise ValueError(
3005 _(b'value for deltareuse invalid: %s') % deltareuse
3008 _(b'value for deltareuse invalid: %s') % deltareuse
3006 )
3009 )
3007
3010
3008 if len(destrevlog):
3011 if len(destrevlog):
3009 raise ValueError(_(b'destination revlog is not empty'))
3012 raise ValueError(_(b'destination revlog is not empty'))
3010
3013
3011 if getattr(self, 'filteredrevs', None):
3014 if getattr(self, 'filteredrevs', None):
3012 raise ValueError(_(b'source revlog has filtered revisions'))
3015 raise ValueError(_(b'source revlog has filtered revisions'))
3013 if getattr(destrevlog, 'filteredrevs', None):
3016 if getattr(destrevlog, 'filteredrevs', None):
3014 raise ValueError(_(b'destination revlog has filtered revisions'))
3017 raise ValueError(_(b'destination revlog has filtered revisions'))
3015
3018
3016 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3019 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3017 # if possible.
3020 # if possible.
3018 oldlazydelta = destrevlog._lazydelta
3021 oldlazydelta = destrevlog._lazydelta
3019 oldlazydeltabase = destrevlog._lazydeltabase
3022 oldlazydeltabase = destrevlog._lazydeltabase
3020 oldamd = destrevlog._deltabothparents
3023 oldamd = destrevlog._deltabothparents
3021
3024
3022 try:
3025 try:
3023 if deltareuse == self.DELTAREUSEALWAYS:
3026 if deltareuse == self.DELTAREUSEALWAYS:
3024 destrevlog._lazydeltabase = True
3027 destrevlog._lazydeltabase = True
3025 destrevlog._lazydelta = True
3028 destrevlog._lazydelta = True
3026 elif deltareuse == self.DELTAREUSESAMEREVS:
3029 elif deltareuse == self.DELTAREUSESAMEREVS:
3027 destrevlog._lazydeltabase = False
3030 destrevlog._lazydeltabase = False
3028 destrevlog._lazydelta = True
3031 destrevlog._lazydelta = True
3029 elif deltareuse == self.DELTAREUSENEVER:
3032 elif deltareuse == self.DELTAREUSENEVER:
3030 destrevlog._lazydeltabase = False
3033 destrevlog._lazydeltabase = False
3031 destrevlog._lazydelta = False
3034 destrevlog._lazydelta = False
3032
3035
3033 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3036 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3034
3037
3035 self._clone(
3038 self._clone(
3036 tr,
3039 tr,
3037 destrevlog,
3040 destrevlog,
3038 addrevisioncb,
3041 addrevisioncb,
3039 deltareuse,
3042 deltareuse,
3040 forcedeltabothparents,
3043 forcedeltabothparents,
3041 sidedata_helpers,
3044 sidedata_helpers,
3042 )
3045 )
3043
3046
3044 finally:
3047 finally:
3045 destrevlog._lazydelta = oldlazydelta
3048 destrevlog._lazydelta = oldlazydelta
3046 destrevlog._lazydeltabase = oldlazydeltabase
3049 destrevlog._lazydeltabase = oldlazydeltabase
3047 destrevlog._deltabothparents = oldamd
3050 destrevlog._deltabothparents = oldamd
3048
3051
3049 def _clone(
3052 def _clone(
3050 self,
3053 self,
3051 tr,
3054 tr,
3052 destrevlog,
3055 destrevlog,
3053 addrevisioncb,
3056 addrevisioncb,
3054 deltareuse,
3057 deltareuse,
3055 forcedeltabothparents,
3058 forcedeltabothparents,
3056 sidedata_helpers,
3059 sidedata_helpers,
3057 ):
3060 ):
3058 """perform the core duty of `revlog.clone` after parameter processing"""
3061 """perform the core duty of `revlog.clone` after parameter processing"""
3059 deltacomputer = deltautil.deltacomputer(destrevlog)
3062 deltacomputer = deltautil.deltacomputer(destrevlog)
3060 index = self.index
3063 index = self.index
3061 for rev in self:
3064 for rev in self:
3062 entry = index[rev]
3065 entry = index[rev]
3063
3066
3064 # Some classes override linkrev to take filtered revs into
3067 # Some classes override linkrev to take filtered revs into
3065 # account. Use raw entry from index.
3068 # account. Use raw entry from index.
3066 flags = entry[0] & 0xFFFF
3069 flags = entry[0] & 0xFFFF
3067 linkrev = entry[4]
3070 linkrev = entry[4]
3068 p1 = index[entry[5]][7]
3071 p1 = index[entry[5]][7]
3069 p2 = index[entry[6]][7]
3072 p2 = index[entry[6]][7]
3070 node = entry[7]
3073 node = entry[7]
3071
3074
3072 # (Possibly) reuse the delta from the revlog if allowed and
3075 # (Possibly) reuse the delta from the revlog if allowed and
3073 # the revlog chunk is a delta.
3076 # the revlog chunk is a delta.
3074 cachedelta = None
3077 cachedelta = None
3075 rawtext = None
3078 rawtext = None
3076 if deltareuse == self.DELTAREUSEFULLADD:
3079 if deltareuse == self.DELTAREUSEFULLADD:
3077 text, sidedata = self._revisiondata(rev)
3080 text, sidedata = self._revisiondata(rev)
3078
3081
3079 if sidedata_helpers is not None:
3082 if sidedata_helpers is not None:
3080 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3083 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3081 self, sidedata_helpers, sidedata, rev
3084 self, sidedata_helpers, sidedata, rev
3082 )
3085 )
3083 flags = flags | new_flags[0] & ~new_flags[1]
3086 flags = flags | new_flags[0] & ~new_flags[1]
3084
3087
3085 destrevlog.addrevision(
3088 destrevlog.addrevision(
3086 text,
3089 text,
3087 tr,
3090 tr,
3088 linkrev,
3091 linkrev,
3089 p1,
3092 p1,
3090 p2,
3093 p2,
3091 cachedelta=cachedelta,
3094 cachedelta=cachedelta,
3092 node=node,
3095 node=node,
3093 flags=flags,
3096 flags=flags,
3094 deltacomputer=deltacomputer,
3097 deltacomputer=deltacomputer,
3095 sidedata=sidedata,
3098 sidedata=sidedata,
3096 )
3099 )
3097 else:
3100 else:
3098 if destrevlog._lazydelta:
3101 if destrevlog._lazydelta:
3099 dp = self.deltaparent(rev)
3102 dp = self.deltaparent(rev)
3100 if dp != nullrev:
3103 if dp != nullrev:
3101 cachedelta = (dp, bytes(self._chunk(rev)))
3104 cachedelta = (dp, bytes(self._chunk(rev)))
3102
3105
3103 sidedata = None
3106 sidedata = None
3104 if not cachedelta:
3107 if not cachedelta:
3105 rawtext, sidedata = self._revisiondata(rev)
3108 rawtext, sidedata = self._revisiondata(rev)
3106 if sidedata is None:
3109 if sidedata is None:
3107 sidedata = self.sidedata(rev)
3110 sidedata = self.sidedata(rev)
3108
3111
3109 if sidedata_helpers is not None:
3112 if sidedata_helpers is not None:
3110 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3113 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3111 self, sidedata_helpers, sidedata, rev
3114 self, sidedata_helpers, sidedata, rev
3112 )
3115 )
3113 flags = flags | new_flags[0] & ~new_flags[1]
3116 flags = flags | new_flags[0] & ~new_flags[1]
3114
3117
3115 with destrevlog._writing(tr):
3118 with destrevlog._writing(tr):
3116 destrevlog._addrevision(
3119 destrevlog._addrevision(
3117 node,
3120 node,
3118 rawtext,
3121 rawtext,
3119 tr,
3122 tr,
3120 linkrev,
3123 linkrev,
3121 p1,
3124 p1,
3122 p2,
3125 p2,
3123 flags,
3126 flags,
3124 cachedelta,
3127 cachedelta,
3125 deltacomputer=deltacomputer,
3128 deltacomputer=deltacomputer,
3126 sidedata=sidedata,
3129 sidedata=sidedata,
3127 )
3130 )
3128
3131
3129 if addrevisioncb:
3132 if addrevisioncb:
3130 addrevisioncb(self, rev, node)
3133 addrevisioncb(self, rev, node)
3131
3134
3132 def censorrevision(self, tr, censornode, tombstone=b''):
3135 def censorrevision(self, tr, censornode, tombstone=b''):
3133 if self._format_version == REVLOGV0:
3136 if self._format_version == REVLOGV0:
3134 raise error.RevlogError(
3137 raise error.RevlogError(
3135 _(b'cannot censor with version %d revlogs')
3138 _(b'cannot censor with version %d revlogs')
3136 % self._format_version
3139 % self._format_version
3137 )
3140 )
3138
3141
3139 censorrev = self.rev(censornode)
3142 censorrev = self.rev(censornode)
3140 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3143 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3141
3144
3142 if len(tombstone) > self.rawsize(censorrev):
3145 if len(tombstone) > self.rawsize(censorrev):
3143 raise error.Abort(
3146 raise error.Abort(
3144 _(b'censor tombstone must be no longer than censored data')
3147 _(b'censor tombstone must be no longer than censored data')
3145 )
3148 )
3146
3149
3147 # Rewriting the revlog in place is hard. Our strategy for censoring is
3150 # Rewriting the revlog in place is hard. Our strategy for censoring is
3148 # to create a new revlog, copy all revisions to it, then replace the
3151 # to create a new revlog, copy all revisions to it, then replace the
3149 # revlogs on transaction close.
3152 # revlogs on transaction close.
3150 #
3153 #
3151 # This is a bit dangerous. We could easily have a mismatch of state.
3154 # This is a bit dangerous. We could easily have a mismatch of state.
3152 newrl = revlog(
3155 newrl = revlog(
3153 self.opener,
3156 self.opener,
3154 target=self.target,
3157 target=self.target,
3155 radix=self.radix,
3158 radix=self.radix,
3156 postfix=b'tmpcensored',
3159 postfix=b'tmpcensored',
3157 censorable=True,
3160 censorable=True,
3158 )
3161 )
3159 newrl._format_version = self._format_version
3162 newrl._format_version = self._format_version
3160 newrl._format_flags = self._format_flags
3163 newrl._format_flags = self._format_flags
3161 newrl._generaldelta = self._generaldelta
3164 newrl._generaldelta = self._generaldelta
3162 newrl._parse_index = self._parse_index
3165 newrl._parse_index = self._parse_index
3163
3166
3164 for rev in self.revs():
3167 for rev in self.revs():
3165 node = self.node(rev)
3168 node = self.node(rev)
3166 p1, p2 = self.parents(node)
3169 p1, p2 = self.parents(node)
3167
3170
3168 if rev == censorrev:
3171 if rev == censorrev:
3169 newrl.addrawrevision(
3172 newrl.addrawrevision(
3170 tombstone,
3173 tombstone,
3171 tr,
3174 tr,
3172 self.linkrev(censorrev),
3175 self.linkrev(censorrev),
3173 p1,
3176 p1,
3174 p2,
3177 p2,
3175 censornode,
3178 censornode,
3176 REVIDX_ISCENSORED,
3179 REVIDX_ISCENSORED,
3177 )
3180 )
3178
3181
3179 if newrl.deltaparent(rev) != nullrev:
3182 if newrl.deltaparent(rev) != nullrev:
3180 raise error.Abort(
3183 raise error.Abort(
3181 _(
3184 _(
3182 b'censored revision stored as delta; '
3185 b'censored revision stored as delta; '
3183 b'cannot censor'
3186 b'cannot censor'
3184 ),
3187 ),
3185 hint=_(
3188 hint=_(
3186 b'censoring of revlogs is not '
3189 b'censoring of revlogs is not '
3187 b'fully implemented; please report '
3190 b'fully implemented; please report '
3188 b'this bug'
3191 b'this bug'
3189 ),
3192 ),
3190 )
3193 )
3191 continue
3194 continue
3192
3195
3193 if self.iscensored(rev):
3196 if self.iscensored(rev):
3194 if self.deltaparent(rev) != nullrev:
3197 if self.deltaparent(rev) != nullrev:
3195 raise error.Abort(
3198 raise error.Abort(
3196 _(
3199 _(
3197 b'cannot censor due to censored '
3200 b'cannot censor due to censored '
3198 b'revision having delta stored'
3201 b'revision having delta stored'
3199 )
3202 )
3200 )
3203 )
3201 rawtext = self._chunk(rev)
3204 rawtext = self._chunk(rev)
3202 else:
3205 else:
3203 rawtext = self.rawdata(rev)
3206 rawtext = self.rawdata(rev)
3204
3207
3205 newrl.addrawrevision(
3208 newrl.addrawrevision(
3206 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3209 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3207 )
3210 )
3208
3211
3209 tr.addbackup(self._indexfile, location=b'store')
3212 tr.addbackup(self._indexfile, location=b'store')
3210 if not self._inline:
3213 if not self._inline:
3211 tr.addbackup(self._datafile, location=b'store')
3214 tr.addbackup(self._datafile, location=b'store')
3212
3215
3213 self.opener.rename(newrl._indexfile, self._indexfile)
3216 self.opener.rename(newrl._indexfile, self._indexfile)
3214 if not self._inline:
3217 if not self._inline:
3215 self.opener.rename(newrl._datafile, self._datafile)
3218 self.opener.rename(newrl._datafile, self._datafile)
3216
3219
3217 self.clearcaches()
3220 self.clearcaches()
3218 self._loadindex()
3221 self._loadindex()
3219
3222
3220 def verifyintegrity(self, state):
3223 def verifyintegrity(self, state):
3221 """Verifies the integrity of the revlog.
3224 """Verifies the integrity of the revlog.
3222
3225
3223 Yields ``revlogproblem`` instances describing problems that are
3226 Yields ``revlogproblem`` instances describing problems that are
3224 found.
3227 found.
3225 """
3228 """
3226 dd, di = self.checksize()
3229 dd, di = self.checksize()
3227 if dd:
3230 if dd:
3228 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3231 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3229 if di:
3232 if di:
3230 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3233 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3231
3234
3232 version = self._format_version
3235 version = self._format_version
3233
3236
3234 # The verifier tells us what version revlog we should be.
3237 # The verifier tells us what version revlog we should be.
3235 if version != state[b'expectedversion']:
3238 if version != state[b'expectedversion']:
3236 yield revlogproblem(
3239 yield revlogproblem(
3237 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3240 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3238 % (self.display_id, version, state[b'expectedversion'])
3241 % (self.display_id, version, state[b'expectedversion'])
3239 )
3242 )
3240
3243
3241 state[b'skipread'] = set()
3244 state[b'skipread'] = set()
3242 state[b'safe_renamed'] = set()
3245 state[b'safe_renamed'] = set()
3243
3246
3244 for rev in self:
3247 for rev in self:
3245 node = self.node(rev)
3248 node = self.node(rev)
3246
3249
3247 # Verify contents. 4 cases to care about:
3250 # Verify contents. 4 cases to care about:
3248 #
3251 #
3249 # common: the most common case
3252 # common: the most common case
3250 # rename: with a rename
3253 # rename: with a rename
3251 # meta: file content starts with b'\1\n', the metadata
3254 # meta: file content starts with b'\1\n', the metadata
3252 # header defined in filelog.py, but without a rename
3255 # header defined in filelog.py, but without a rename
3253 # ext: content stored externally
3256 # ext: content stored externally
3254 #
3257 #
3255 # More formally, their differences are shown below:
3258 # More formally, their differences are shown below:
3256 #
3259 #
3257 # | common | rename | meta | ext
3260 # | common | rename | meta | ext
3258 # -------------------------------------------------------
3261 # -------------------------------------------------------
3259 # flags() | 0 | 0 | 0 | not 0
3262 # flags() | 0 | 0 | 0 | not 0
3260 # renamed() | False | True | False | ?
3263 # renamed() | False | True | False | ?
3261 # rawtext[0:2]=='\1\n'| False | True | True | ?
3264 # rawtext[0:2]=='\1\n'| False | True | True | ?
3262 #
3265 #
3263 # "rawtext" means the raw text stored in revlog data, which
3266 # "rawtext" means the raw text stored in revlog data, which
3264 # could be retrieved by "rawdata(rev)". "text"
3267 # could be retrieved by "rawdata(rev)". "text"
3265 # mentioned below is "revision(rev)".
3268 # mentioned below is "revision(rev)".
3266 #
3269 #
3267 # There are 3 different lengths stored physically:
3270 # There are 3 different lengths stored physically:
3268 # 1. L1: rawsize, stored in revlog index
3271 # 1. L1: rawsize, stored in revlog index
3269 # 2. L2: len(rawtext), stored in revlog data
3272 # 2. L2: len(rawtext), stored in revlog data
3270 # 3. L3: len(text), stored in revlog data if flags==0, or
3273 # 3. L3: len(text), stored in revlog data if flags==0, or
3271 # possibly somewhere else if flags!=0
3274 # possibly somewhere else if flags!=0
3272 #
3275 #
3273 # L1 should be equal to L2. L3 could be different from them.
3276 # L1 should be equal to L2. L3 could be different from them.
3274 # "text" may or may not affect commit hash depending on flag
3277 # "text" may or may not affect commit hash depending on flag
3275 # processors (see flagutil.addflagprocessor).
3278 # processors (see flagutil.addflagprocessor).
3276 #
3279 #
3277 # | common | rename | meta | ext
3280 # | common | rename | meta | ext
3278 # -------------------------------------------------
3281 # -------------------------------------------------
3279 # rawsize() | L1 | L1 | L1 | L1
3282 # rawsize() | L1 | L1 | L1 | L1
3280 # size() | L1 | L2-LM | L1(*) | L1 (?)
3283 # size() | L1 | L2-LM | L1(*) | L1 (?)
3281 # len(rawtext) | L2 | L2 | L2 | L2
3284 # len(rawtext) | L2 | L2 | L2 | L2
3282 # len(text) | L2 | L2 | L2 | L3
3285 # len(text) | L2 | L2 | L2 | L3
3283 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3286 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3284 #
3287 #
3285 # LM: length of metadata, depending on rawtext
3288 # LM: length of metadata, depending on rawtext
3286 # (*): not ideal, see comment in filelog.size
3289 # (*): not ideal, see comment in filelog.size
3287 # (?): could be "- len(meta)" if the resolved content has
3290 # (?): could be "- len(meta)" if the resolved content has
3288 # rename metadata
3291 # rename metadata
3289 #
3292 #
3290 # Checks needed to be done:
3293 # Checks needed to be done:
3291 # 1. length check: L1 == L2, in all cases.
3294 # 1. length check: L1 == L2, in all cases.
3292 # 2. hash check: depending on flag processor, we may need to
3295 # 2. hash check: depending on flag processor, we may need to
3293 # use either "text" (external), or "rawtext" (in revlog).
3296 # use either "text" (external), or "rawtext" (in revlog).
3294
3297
3295 try:
3298 try:
3296 skipflags = state.get(b'skipflags', 0)
3299 skipflags = state.get(b'skipflags', 0)
3297 if skipflags:
3300 if skipflags:
3298 skipflags &= self.flags(rev)
3301 skipflags &= self.flags(rev)
3299
3302
3300 _verify_revision(self, skipflags, state, node)
3303 _verify_revision(self, skipflags, state, node)
3301
3304
3302 l1 = self.rawsize(rev)
3305 l1 = self.rawsize(rev)
3303 l2 = len(self.rawdata(node))
3306 l2 = len(self.rawdata(node))
3304
3307
3305 if l1 != l2:
3308 if l1 != l2:
3306 yield revlogproblem(
3309 yield revlogproblem(
3307 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3310 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3308 node=node,
3311 node=node,
3309 )
3312 )
3310
3313
3311 except error.CensoredNodeError:
3314 except error.CensoredNodeError:
3312 if state[b'erroroncensored']:
3315 if state[b'erroroncensored']:
3313 yield revlogproblem(
3316 yield revlogproblem(
3314 error=_(b'censored file data'), node=node
3317 error=_(b'censored file data'), node=node
3315 )
3318 )
3316 state[b'skipread'].add(node)
3319 state[b'skipread'].add(node)
3317 except Exception as e:
3320 except Exception as e:
3318 yield revlogproblem(
3321 yield revlogproblem(
3319 error=_(b'unpacking %s: %s')
3322 error=_(b'unpacking %s: %s')
3320 % (short(node), stringutil.forcebytestr(e)),
3323 % (short(node), stringutil.forcebytestr(e)),
3321 node=node,
3324 node=node,
3322 )
3325 )
3323 state[b'skipread'].add(node)
3326 state[b'skipread'].add(node)
3324
3327
3325 def storageinfo(
3328 def storageinfo(
3326 self,
3329 self,
3327 exclusivefiles=False,
3330 exclusivefiles=False,
3328 sharedfiles=False,
3331 sharedfiles=False,
3329 revisionscount=False,
3332 revisionscount=False,
3330 trackedsize=False,
3333 trackedsize=False,
3331 storedsize=False,
3334 storedsize=False,
3332 ):
3335 ):
3333 d = {}
3336 d = {}
3334
3337
3335 if exclusivefiles:
3338 if exclusivefiles:
3336 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3339 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3337 if not self._inline:
3340 if not self._inline:
3338 d[b'exclusivefiles'].append((self.opener, self._datafile))
3341 d[b'exclusivefiles'].append((self.opener, self._datafile))
3339
3342
3340 if sharedfiles:
3343 if sharedfiles:
3341 d[b'sharedfiles'] = []
3344 d[b'sharedfiles'] = []
3342
3345
3343 if revisionscount:
3346 if revisionscount:
3344 d[b'revisionscount'] = len(self)
3347 d[b'revisionscount'] = len(self)
3345
3348
3346 if trackedsize:
3349 if trackedsize:
3347 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3350 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3348
3351
3349 if storedsize:
3352 if storedsize:
3350 d[b'storedsize'] = sum(
3353 d[b'storedsize'] = sum(
3351 self.opener.stat(path).st_size for path in self.files()
3354 self.opener.stat(path).st_size for path in self.files()
3352 )
3355 )
3353
3356
3354 return d
3357 return d
3355
3358
3356 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3359 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3357 if not self.hassidedata:
3360 if not self.hassidedata:
3358 return
3361 return
3359 # revlog formats with sidedata support does not support inline
3362 # revlog formats with sidedata support does not support inline
3360 assert not self._inline
3363 assert not self._inline
3361 if not helpers[1] and not helpers[2]:
3364 if not helpers[1] and not helpers[2]:
3362 # Nothing to generate or remove
3365 # Nothing to generate or remove
3363 return
3366 return
3364
3367
3365 new_entries = []
3368 new_entries = []
3366 # append the new sidedata
3369 # append the new sidedata
3367 with self._writing(transaction):
3370 with self._writing(transaction):
3368 ifh, dfh = self._writinghandles
3371 ifh, dfh = self._writinghandles
3369 if self._docket is not None:
3372 if self._docket is not None:
3370 dfh.seek(self._docket.data_end, os.SEEK_SET)
3373 dfh.seek(self._docket.data_end, os.SEEK_SET)
3371 else:
3374 else:
3372 dfh.seek(0, os.SEEK_END)
3375 dfh.seek(0, os.SEEK_END)
3373
3376
3374 current_offset = dfh.tell()
3377 current_offset = dfh.tell()
3375 for rev in range(startrev, endrev + 1):
3378 for rev in range(startrev, endrev + 1):
3376 entry = self.index[rev]
3379 entry = self.index[rev]
3377 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3380 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3378 store=self,
3381 store=self,
3379 sidedata_helpers=helpers,
3382 sidedata_helpers=helpers,
3380 sidedata={},
3383 sidedata={},
3381 rev=rev,
3384 rev=rev,
3382 )
3385 )
3383
3386
3384 serialized_sidedata = sidedatautil.serialize_sidedata(
3387 serialized_sidedata = sidedatautil.serialize_sidedata(
3385 new_sidedata
3388 new_sidedata
3386 )
3389 )
3387
3390
3388 sidedata_compression_mode = COMP_MODE_INLINE
3391 sidedata_compression_mode = COMP_MODE_INLINE
3389 if serialized_sidedata and self.hassidedata:
3392 if serialized_sidedata and self.hassidedata:
3390 sidedata_compression_mode = COMP_MODE_PLAIN
3393 sidedata_compression_mode = COMP_MODE_PLAIN
3391 h, comp_sidedata = self.compress(serialized_sidedata)
3394 h, comp_sidedata = self.compress(serialized_sidedata)
3392 if (
3395 if (
3393 h != b'u'
3396 h != b'u'
3394 and comp_sidedata[0] != b'\0'
3397 and comp_sidedata[0] != b'\0'
3395 and len(comp_sidedata) < len(serialized_sidedata)
3398 and len(comp_sidedata) < len(serialized_sidedata)
3396 ):
3399 ):
3397 assert not h
3400 assert not h
3398 if (
3401 if (
3399 comp_sidedata[0]
3402 comp_sidedata[0]
3400 == self._docket.default_compression_header
3403 == self._docket.default_compression_header
3401 ):
3404 ):
3402 sidedata_compression_mode = COMP_MODE_DEFAULT
3405 sidedata_compression_mode = COMP_MODE_DEFAULT
3403 serialized_sidedata = comp_sidedata
3406 serialized_sidedata = comp_sidedata
3404 else:
3407 else:
3405 sidedata_compression_mode = COMP_MODE_INLINE
3408 sidedata_compression_mode = COMP_MODE_INLINE
3406 serialized_sidedata = comp_sidedata
3409 serialized_sidedata = comp_sidedata
3407 if entry[8] != 0 or entry[9] != 0:
3410 if entry[8] != 0 or entry[9] != 0:
3408 # rewriting entries that already have sidedata is not
3411 # rewriting entries that already have sidedata is not
3409 # supported yet, because it introduces garbage data in the
3412 # supported yet, because it introduces garbage data in the
3410 # revlog.
3413 # revlog.
3411 msg = b"rewriting existing sidedata is not supported yet"
3414 msg = b"rewriting existing sidedata is not supported yet"
3412 raise error.Abort(msg)
3415 raise error.Abort(msg)
3413
3416
3414 # Apply (potential) flags to add and to remove after running
3417 # Apply (potential) flags to add and to remove after running
3415 # the sidedata helpers
3418 # the sidedata helpers
3416 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3419 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3417 entry_update = (
3420 entry_update = (
3418 current_offset,
3421 current_offset,
3419 len(serialized_sidedata),
3422 len(serialized_sidedata),
3420 new_offset_flags,
3423 new_offset_flags,
3421 sidedata_compression_mode,
3424 sidedata_compression_mode,
3422 )
3425 )
3423
3426
3424 # the sidedata computation might have move the file cursors around
3427 # the sidedata computation might have move the file cursors around
3425 dfh.seek(current_offset, os.SEEK_SET)
3428 dfh.seek(current_offset, os.SEEK_SET)
3426 dfh.write(serialized_sidedata)
3429 dfh.write(serialized_sidedata)
3427 new_entries.append(entry_update)
3430 new_entries.append(entry_update)
3428 current_offset += len(serialized_sidedata)
3431 current_offset += len(serialized_sidedata)
3429 if self._docket is not None:
3432 if self._docket is not None:
3430 self._docket.data_end = dfh.tell()
3433 self._docket.data_end = dfh.tell()
3431
3434
3432 # rewrite the new index entries
3435 # rewrite the new index entries
3433 ifh.seek(startrev * self.index.entry_size)
3436 ifh.seek(startrev * self.index.entry_size)
3434 for i, e in enumerate(new_entries):
3437 for i, e in enumerate(new_entries):
3435 rev = startrev + i
3438 rev = startrev + i
3436 self.index.replace_sidedata_info(rev, *e)
3439 self.index.replace_sidedata_info(rev, *e)
3437 packed = self.index.entry_binary(rev)
3440 packed = self.index.entry_binary(rev)
3438 if rev == 0 and self._docket is None:
3441 if rev == 0 and self._docket is None:
3439 header = self._format_flags | self._format_version
3442 header = self._format_flags | self._format_version
3440 header = self.index.pack_header(header)
3443 header = self.index.pack_header(header)
3441 packed = header + packed
3444 packed = header + packed
3442 ifh.write(packed)
3445 ifh.write(packed)
@@ -1,179 +1,190 b''
1 # revlogdeltas.py - constant used for revlog logic.
1 # revlogdeltas.py - constant used for revlog logic.
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import struct
12 import struct
13
13
14 from ..interfaces import repository
14 from ..interfaces import repository
15
15
16 ### Internal utily constants
16 ### Internal utily constants
17
17
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
18 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 KIND_MANIFESTLOG = 1002
19 KIND_MANIFESTLOG = 1002
20 KIND_FILELOG = 1003
20 KIND_FILELOG = 1003
21 KIND_OTHER = 1004
21 KIND_OTHER = 1004
22
22
23 ALL_KINDS = {
23 ALL_KINDS = {
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_MANIFESTLOG,
25 KIND_MANIFESTLOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_OTHER,
27 KIND_OTHER,
28 }
28 }
29
29
30 ### main revlog header
30 ### main revlog header
31
31
32 INDEX_HEADER = struct.Struct(b">I")
32 INDEX_HEADER = struct.Struct(b">I")
33
33
34 ## revlog version
34 ## revlog version
35 REVLOGV0 = 0
35 REVLOGV0 = 0
36 REVLOGV1 = 1
36 REVLOGV1 = 1
37 # Dummy value until file format is finalized.
37 # Dummy value until file format is finalized.
38 REVLOGV2 = 0xDEAD
38 REVLOGV2 = 0xDEAD
39 # Dummy value until file format is finalized.
40 CHANGELOGV2 = 0xD34D
39
41
40 ## global revlog header flags
42 ## global revlog header flags
41 # Shared across v1 and v2.
43 # Shared across v1 and v2.
42 FLAG_INLINE_DATA = 1 << 16
44 FLAG_INLINE_DATA = 1 << 16
43 # Only used by v1, implied by v2.
45 # Only used by v1, implied by v2.
44 FLAG_GENERALDELTA = 1 << 17
46 FLAG_GENERALDELTA = 1 << 17
45 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
47 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
46 REVLOG_DEFAULT_FORMAT = REVLOGV1
48 REVLOG_DEFAULT_FORMAT = REVLOGV1
47 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
49 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
48 REVLOGV0_FLAGS = 0
50 REVLOGV0_FLAGS = 0
49 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
51 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
50 REVLOGV2_FLAGS = FLAG_INLINE_DATA
52 REVLOGV2_FLAGS = FLAG_INLINE_DATA
53 CHANGELOGV2_FLAGS = 0
51
54
52 ### individual entry
55 ### individual entry
53
56
54 ## index v0:
57 ## index v0:
55 # 4 bytes: offset
58 # 4 bytes: offset
56 # 4 bytes: compressed length
59 # 4 bytes: compressed length
57 # 4 bytes: base rev
60 # 4 bytes: base rev
58 # 4 bytes: link rev
61 # 4 bytes: link rev
59 # 20 bytes: parent 1 nodeid
62 # 20 bytes: parent 1 nodeid
60 # 20 bytes: parent 2 nodeid
63 # 20 bytes: parent 2 nodeid
61 # 20 bytes: nodeid
64 # 20 bytes: nodeid
62 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
65 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
63
66
64 ## index v1
67 ## index v1
65 # 6 bytes: offset
68 # 6 bytes: offset
66 # 2 bytes: flags
69 # 2 bytes: flags
67 # 4 bytes: compressed length
70 # 4 bytes: compressed length
68 # 4 bytes: uncompressed length
71 # 4 bytes: uncompressed length
69 # 4 bytes: base rev
72 # 4 bytes: base rev
70 # 4 bytes: link rev
73 # 4 bytes: link rev
71 # 4 bytes: parent 1 rev
74 # 4 bytes: parent 1 rev
72 # 4 bytes: parent 2 rev
75 # 4 bytes: parent 2 rev
73 # 32 bytes: nodeid
76 # 32 bytes: nodeid
74 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
77 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
75 assert INDEX_ENTRY_V1.size == 32 * 2
78 assert INDEX_ENTRY_V1.size == 32 * 2
76
79
77 # 6 bytes: offset
80 # 6 bytes: offset
78 # 2 bytes: flags
81 # 2 bytes: flags
79 # 4 bytes: compressed length
82 # 4 bytes: compressed length
80 # 4 bytes: uncompressed length
83 # 4 bytes: uncompressed length
81 # 4 bytes: base rev
84 # 4 bytes: base rev
82 # 4 bytes: link rev
85 # 4 bytes: link rev
83 # 4 bytes: parent 1 rev
86 # 4 bytes: parent 1 rev
84 # 4 bytes: parent 2 rev
87 # 4 bytes: parent 2 rev
85 # 32 bytes: nodeid
88 # 32 bytes: nodeid
86 # 8 bytes: sidedata offset
89 # 8 bytes: sidedata offset
87 # 4 bytes: sidedata compressed length
90 # 4 bytes: sidedata compressed length
88 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
91 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
89 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
92 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
90 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
93 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
91 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
94 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
92
95
93 # revlog index flags
96 # revlog index flags
94
97
95 # For historical reasons, revlog's internal flags were exposed via the
98 # For historical reasons, revlog's internal flags were exposed via the
96 # wire protocol and are even exposed in parts of the storage APIs.
99 # wire protocol and are even exposed in parts of the storage APIs.
97
100
98 # revision has censor metadata, must be verified
101 # revision has censor metadata, must be verified
99 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
102 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
100 # revision hash does not match data (narrowhg)
103 # revision hash does not match data (narrowhg)
101 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
104 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
102 # revision data is stored externally
105 # revision data is stored externally
103 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
106 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
104 # revision changes files in a way that could affect copy tracing.
107 # revision changes files in a way that could affect copy tracing.
105 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
108 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
106 REVIDX_DEFAULT_FLAGS = 0
109 REVIDX_DEFAULT_FLAGS = 0
107 # stable order in which flags need to be processed and their processors applied
110 # stable order in which flags need to be processed and their processors applied
108 REVIDX_FLAGS_ORDER = [
111 REVIDX_FLAGS_ORDER = [
109 REVIDX_ISCENSORED,
112 REVIDX_ISCENSORED,
110 REVIDX_ELLIPSIS,
113 REVIDX_ELLIPSIS,
111 REVIDX_EXTSTORED,
114 REVIDX_EXTSTORED,
112 REVIDX_HASCOPIESINFO,
115 REVIDX_HASCOPIESINFO,
113 ]
116 ]
114
117
115 # bitmark for flags that could cause rawdata content change
118 # bitmark for flags that could cause rawdata content change
116 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
119 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
117
120
118 ## chunk compression mode constants:
121 ## chunk compression mode constants:
119 # These constants are used in revlog version >=2 to denote the compression used
122 # These constants are used in revlog version >=2 to denote the compression used
120 # for a chunk.
123 # for a chunk.
121
124
122 # Chunk use no compression, the data stored on disk can be directly use as
125 # Chunk use no compression, the data stored on disk can be directly use as
123 # chunk value. Without any header information prefixed.
126 # chunk value. Without any header information prefixed.
124 COMP_MODE_PLAIN = 0
127 COMP_MODE_PLAIN = 0
125
128
126 # Chunk use the "default compression" for the revlog (usually defined in the
129 # Chunk use the "default compression" for the revlog (usually defined in the
127 # revlog docket). A header is still used.
130 # revlog docket). A header is still used.
128 #
131 #
129 # XXX: keeping a header is probably not useful and we should probably drop it.
132 # XXX: keeping a header is probably not useful and we should probably drop it.
130 #
133 #
131 # XXX: The value of allow mixed type of compression in the revlog is unclear
134 # XXX: The value of allow mixed type of compression in the revlog is unclear
132 # and we should consider making PLAIN/DEFAULT the only available mode for
135 # and we should consider making PLAIN/DEFAULT the only available mode for
133 # revlog v2, disallowing INLINE mode.
136 # revlog v2, disallowing INLINE mode.
134 COMP_MODE_DEFAULT = 1
137 COMP_MODE_DEFAULT = 1
135
138
136 # Chunk use a compression mode stored "inline" at the start of the chunk
139 # Chunk use a compression mode stored "inline" at the start of the chunk
137 # itself. This is the mode always used for revlog version "0" and "1"
140 # itself. This is the mode always used for revlog version "0" and "1"
138 COMP_MODE_INLINE = 2
141 COMP_MODE_INLINE = 2
139
142
140 SUPPORTED_FLAGS = {
143 SUPPORTED_FLAGS = {
141 REVLOGV0: REVLOGV0_FLAGS,
144 REVLOGV0: REVLOGV0_FLAGS,
142 REVLOGV1: REVLOGV1_FLAGS,
145 REVLOGV1: REVLOGV1_FLAGS,
143 REVLOGV2: REVLOGV2_FLAGS,
146 REVLOGV2: REVLOGV2_FLAGS,
147 CHANGELOGV2: CHANGELOGV2_FLAGS,
144 }
148 }
145
149
146 _no = lambda flags: False
150 _no = lambda flags: False
147 _yes = lambda flags: True
151 _yes = lambda flags: True
148
152
149
153
150 def _from_flag(flag):
154 def _from_flag(flag):
151 return lambda flags: bool(flags & flag)
155 return lambda flags: bool(flags & flag)
152
156
153
157
154 FEATURES_BY_VERSION = {
158 FEATURES_BY_VERSION = {
155 REVLOGV0: {
159 REVLOGV0: {
156 b'inline': _no,
160 b'inline': _no,
157 b'generaldelta': _no,
161 b'generaldelta': _no,
158 b'sidedata': False,
162 b'sidedata': False,
159 b'docket': False,
163 b'docket': False,
160 },
164 },
161 REVLOGV1: {
165 REVLOGV1: {
162 b'inline': _from_flag(FLAG_INLINE_DATA),
166 b'inline': _from_flag(FLAG_INLINE_DATA),
163 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
167 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
164 b'sidedata': False,
168 b'sidedata': False,
165 b'docket': False,
169 b'docket': False,
166 },
170 },
167 REVLOGV2: {
171 REVLOGV2: {
168 # The point of inline-revlog is to reduce the number of files used in
172 # The point of inline-revlog is to reduce the number of files used in
169 # the store. Using a docket defeat this purpose. So we needs other
173 # the store. Using a docket defeat this purpose. So we needs other
170 # means to reduce the number of files for revlogv2.
174 # means to reduce the number of files for revlogv2.
171 b'inline': _no,
175 b'inline': _no,
172 b'generaldelta': _yes,
176 b'generaldelta': _yes,
173 b'sidedata': True,
177 b'sidedata': True,
174 b'docket': True,
178 b'docket': True,
175 },
179 },
180 CHANGELOGV2: {
181 b'inline': _no,
182 # General delta is useless for changelog since we don't do any delta
183 b'generaldelta': _no,
184 b'sidedata': True,
185 b'docket': True,
186 },
176 }
187 }
177
188
178
189
179 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
190 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
@@ -1,179 +1,180 b''
1 # docket - code related to revlog "docket"
1 # docket - code related to revlog "docket"
2 #
2 #
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 ### Revlog docket file
8 ### Revlog docket file
9 #
9 #
10 # The revlog is stored on disk using multiple files:
10 # The revlog is stored on disk using multiple files:
11 #
11 #
12 # * a small docket file, containing metadata and a pointer,
12 # * a small docket file, containing metadata and a pointer,
13 #
13 #
14 # * an index file, containing fixed width information about revisions,
14 # * an index file, containing fixed width information about revisions,
15 #
15 #
16 # * a data file, containing variable width data for these revisions,
16 # * a data file, containing variable width data for these revisions,
17
17
18 from __future__ import absolute_import
18 from __future__ import absolute_import
19
19
20 import struct
20 import struct
21
21
22 from .. import (
22 from .. import (
23 error,
23 error,
24 util,
24 util,
25 )
25 )
26
26
27 from . import (
27 from . import (
28 constants,
28 constants,
29 )
29 )
30
30
31 # Docket format
31 # Docket format
32 #
32 #
33 # * 4 bytes: revlog version
33 # * 4 bytes: revlog version
34 # | This is mandatory as docket must be compatible with the previous
34 # | This is mandatory as docket must be compatible with the previous
35 # | revlog index header.
35 # | revlog index header.
36 # * 8 bytes: size of index-data
36 # * 8 bytes: size of index-data
37 # * 8 bytes: pending size of index-data
37 # * 8 bytes: pending size of index-data
38 # * 8 bytes: size of data
38 # * 8 bytes: size of data
39 # * 8 bytes: pending size of data
39 # * 8 bytes: pending size of data
40 # * 1 bytes: default compression header
40 # * 1 bytes: default compression header
41 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc')
41 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc')
42
42
43
43
44 class RevlogDocket(object):
44 class RevlogDocket(object):
45 """metadata associated with revlog"""
45 """metadata associated with revlog"""
46
46
47 def __init__(
47 def __init__(
48 self,
48 self,
49 revlog,
49 revlog,
50 use_pending=False,
50 use_pending=False,
51 version_header=None,
51 version_header=None,
52 index_end=0,
52 index_end=0,
53 pending_index_end=0,
53 pending_index_end=0,
54 data_end=0,
54 data_end=0,
55 pending_data_end=0,
55 pending_data_end=0,
56 default_compression_header=None,
56 default_compression_header=None,
57 ):
57 ):
58 self._version_header = version_header
58 self._version_header = version_header
59 self._read_only = bool(use_pending)
59 self._read_only = bool(use_pending)
60 self._dirty = False
60 self._dirty = False
61 self._radix = revlog.radix
61 self._radix = revlog.radix
62 self._path = revlog._docket_file
62 self._path = revlog._docket_file
63 self._opener = revlog.opener
63 self._opener = revlog.opener
64 # thes asserts should be True as long as we have a single index filename
64 # thes asserts should be True as long as we have a single index filename
65 assert index_end <= pending_index_end
65 assert index_end <= pending_index_end
66 assert data_end <= pending_data_end
66 assert data_end <= pending_data_end
67 self._initial_index_end = index_end
67 self._initial_index_end = index_end
68 self._pending_index_end = pending_index_end
68 self._pending_index_end = pending_index_end
69 self._initial_data_end = data_end
69 self._initial_data_end = data_end
70 self._pending_data_end = pending_data_end
70 self._pending_data_end = pending_data_end
71 if use_pending:
71 if use_pending:
72 self._index_end = self._pending_index_end
72 self._index_end = self._pending_index_end
73 self._data_end = self._pending_data_end
73 self._data_end = self._pending_data_end
74 else:
74 else:
75 self._index_end = self._initial_index_end
75 self._index_end = self._initial_index_end
76 self._data_end = self._initial_data_end
76 self._data_end = self._initial_data_end
77 self.default_compression_header = default_compression_header
77 self.default_compression_header = default_compression_header
78
78
79 def index_filepath(self):
79 def index_filepath(self):
80 """file path to the current index file associated to this docket"""
80 """file path to the current index file associated to this docket"""
81 # very simplistic version at first
81 # very simplistic version at first
82 return b"%s.idx" % self._radix
82 return b"%s.idx" % self._radix
83
83
84 @property
84 @property
85 def index_end(self):
85 def index_end(self):
86 return self._index_end
86 return self._index_end
87
87
88 @index_end.setter
88 @index_end.setter
89 def index_end(self, new_size):
89 def index_end(self, new_size):
90 if new_size != self._index_end:
90 if new_size != self._index_end:
91 self._index_end = new_size
91 self._index_end = new_size
92 self._dirty = True
92 self._dirty = True
93
93
94 @property
94 @property
95 def data_end(self):
95 def data_end(self):
96 return self._data_end
96 return self._data_end
97
97
98 @data_end.setter
98 @data_end.setter
99 def data_end(self, new_size):
99 def data_end(self, new_size):
100 if new_size != self._data_end:
100 if new_size != self._data_end:
101 self._data_end = new_size
101 self._data_end = new_size
102 self._dirty = True
102 self._dirty = True
103
103
104 def write(self, transaction, pending=False, stripping=False):
104 def write(self, transaction, pending=False, stripping=False):
105 """write the modification of disk if any
105 """write the modification of disk if any
106
106
107 This make the new content visible to all process"""
107 This make the new content visible to all process"""
108 if not self._dirty:
108 if not self._dirty:
109 return False
109 return False
110 else:
110 else:
111 if self._read_only:
111 if self._read_only:
112 msg = b'writing read-only docket: %s'
112 msg = b'writing read-only docket: %s'
113 msg %= self._path
113 msg %= self._path
114 raise error.ProgrammingError(msg)
114 raise error.ProgrammingError(msg)
115 if not stripping:
115 if not stripping:
116 # XXX we could, leverage the docket while stripping. However it
116 # XXX we could, leverage the docket while stripping. However it
117 # is not powerfull enough at the time of this comment
117 # is not powerfull enough at the time of this comment
118 transaction.addbackup(self._path, location=b'store')
118 transaction.addbackup(self._path, location=b'store')
119 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
119 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
120 f.write(self._serialize(pending=pending))
120 f.write(self._serialize(pending=pending))
121 # if pending we still need to the write final data eventually
121 # if pending we still need to the write final data eventually
122 self._dirty = pending
122 self._dirty = pending
123 return True
123 return True
124
124
125 def _serialize(self, pending=False):
125 def _serialize(self, pending=False):
126 if pending:
126 if pending:
127 official_index_end = self._initial_index_end
127 official_index_end = self._initial_index_end
128 official_data_end = self._initial_data_end
128 official_data_end = self._initial_data_end
129 else:
129 else:
130 official_index_end = self._index_end
130 official_index_end = self._index_end
131 official_data_end = self._data_end
131 official_data_end = self._data_end
132
132
133 # this assert should be True as long as we have a single index filename
133 # this assert should be True as long as we have a single index filename
134 assert official_data_end <= self._data_end
134 assert official_data_end <= self._data_end
135 data = (
135 data = (
136 self._version_header,
136 self._version_header,
137 official_index_end,
137 official_index_end,
138 self._index_end,
138 self._index_end,
139 official_data_end,
139 official_data_end,
140 self._data_end,
140 self._data_end,
141 self.default_compression_header,
141 self.default_compression_header,
142 )
142 )
143 return S_HEADER.pack(*data)
143 return S_HEADER.pack(*data)
144
144
145
145
146 def default_docket(revlog, version_header):
146 def default_docket(revlog, version_header):
147 """given a revlog version a new docket object for the given revlog"""
147 """given a revlog version a new docket object for the given revlog"""
148 if (version_header & 0xFFFF) != constants.REVLOGV2:
148 rl_version = version_header & 0xFFFF
149 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
149 return None
150 return None
150 comp = util.compengines[revlog._compengine].revlogheader()
151 comp = util.compengines[revlog._compengine].revlogheader()
151 docket = RevlogDocket(
152 docket = RevlogDocket(
152 revlog,
153 revlog,
153 version_header=version_header,
154 version_header=version_header,
154 default_compression_header=comp,
155 default_compression_header=comp,
155 )
156 )
156 docket._dirty = True
157 docket._dirty = True
157 return docket
158 return docket
158
159
159
160
160 def parse_docket(revlog, data, use_pending=False):
161 def parse_docket(revlog, data, use_pending=False):
161 """given some docket data return a docket object for the given revlog"""
162 """given some docket data return a docket object for the given revlog"""
162 header = S_HEADER.unpack(data[: S_HEADER.size])
163 header = S_HEADER.unpack(data[: S_HEADER.size])
163 version_header = header[0]
164 version_header = header[0]
164 index_size = header[1]
165 index_size = header[1]
165 pending_index_size = header[2]
166 pending_index_size = header[2]
166 data_size = header[3]
167 data_size = header[3]
167 pending_data_size = header[4]
168 pending_data_size = header[4]
168 default_compression_header = header[5]
169 default_compression_header = header[5]
169 docket = RevlogDocket(
170 docket = RevlogDocket(
170 revlog,
171 revlog,
171 use_pending=use_pending,
172 use_pending=use_pending,
172 version_header=version_header,
173 version_header=version_header,
173 index_end=index_size,
174 index_end=index_size,
174 pending_index_end=pending_index_size,
175 pending_index_end=pending_index_size,
175 data_end=data_size,
176 data_end=data_size,
176 pending_data_end=pending_data_size,
177 pending_data_end=pending_data_size,
177 default_compression_header=default_compression_header,
178 default_compression_header=default_compression_header,
178 )
179 )
179 return docket
180 return docket
General Comments 0
You need to be logged in to leave comments. Login now