##// END OF EJS Templates
revlogv2: use a unique filename for data...
marmoute -
r48105:5035d31e default draft
parent child Browse files
Show More
@@ -1,3466 +1,3468 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 )
65 )
66 from .thirdparty import attr
66 from .thirdparty import attr
67 from . import (
67 from . import (
68 ancestor,
68 ancestor,
69 dagop,
69 dagop,
70 error,
70 error,
71 mdiff,
71 mdiff,
72 policy,
72 policy,
73 pycompat,
73 pycompat,
74 templatefilters,
74 templatefilters,
75 util,
75 util,
76 )
76 )
77 from .interfaces import (
77 from .interfaces import (
78 repository,
78 repository,
79 util as interfaceutil,
79 util as interfaceutil,
80 )
80 )
81 from .revlogutils import (
81 from .revlogutils import (
82 deltas as deltautil,
82 deltas as deltautil,
83 docket as docketutil,
83 docket as docketutil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 revlogv0,
86 revlogv0,
87 sidedata as sidedatautil,
87 sidedata as sidedatautil,
88 )
88 )
89 from .utils import (
89 from .utils import (
90 storageutil,
90 storageutil,
91 stringutil,
91 stringutil,
92 )
92 )
93
93
94 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
95 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
96
96
97 REVLOGV0
97 REVLOGV0
98 REVLOGV1
98 REVLOGV1
99 REVLOGV2
99 REVLOGV2
100 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
101 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
104 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
105 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
106 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
107 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
108 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
109 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
110 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
112 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
114
114
115 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
116 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
117 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
118 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
119
119
120 # Aliased for performance.
120 # Aliased for performance.
121 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
122
122
123 # max size of revlog with inline data
123 # max size of revlog with inline data
124 _maxinline = 131072
124 _maxinline = 131072
125 _chunksize = 1048576
125 _chunksize = 1048576
126
126
127 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
128 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
129 return text, False
129 return text, False
130
130
131
131
132 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
137 return False
137 return False
138
138
139
139
140 ellipsisprocessor = (
140 ellipsisprocessor = (
141 ellipsisreadprocessor,
141 ellipsisreadprocessor,
142 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
143 ellipsisrawprocessor,
143 ellipsisrawprocessor,
144 )
144 )
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
225 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 return index, cache
227 return index, cache
228
228
229
229
230 def parse_index_cl_v2(data, inline):
230 def parse_index_cl_v2(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 assert not inline
232 assert not inline
233 from .pure.parsers import parse_index_cl_v2
233 from .pure.parsers import parse_index_cl_v2
234
234
235 index, cache = parse_index_cl_v2(data)
235 index, cache = parse_index_cl_v2(data)
236 return index, cache
236 return index, cache
237
237
238
238
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240
240
241 def parse_index_v1_nodemap(data, inline):
241 def parse_index_v1_nodemap(data, inline):
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 return index, cache
243 return index, cache
244
244
245
245
246 else:
246 else:
247 parse_index_v1_nodemap = None
247 parse_index_v1_nodemap = None
248
248
249
249
250 def parse_index_v1_mixed(data, inline):
250 def parse_index_v1_mixed(data, inline):
251 index, cache = parse_index_v1(data, inline)
251 index, cache = parse_index_v1(data, inline)
252 return rustrevlog.MixedIndex(index), cache
252 return rustrevlog.MixedIndex(index), cache
253
253
254
254
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 # signed integer)
256 # signed integer)
257 _maxentrysize = 0x7FFFFFFF
257 _maxentrysize = 0x7FFFFFFF
258
258
259
259
260 class revlog(object):
260 class revlog(object):
261 """
261 """
262 the underlying revision storage object
262 the underlying revision storage object
263
263
264 A revlog consists of two parts, an index and the revision data.
264 A revlog consists of two parts, an index and the revision data.
265
265
266 The index is a file with a fixed record size containing
266 The index is a file with a fixed record size containing
267 information on each revision, including its nodeid (hash), the
267 information on each revision, including its nodeid (hash), the
268 nodeids of its parents, the position and offset of its data within
268 nodeids of its parents, the position and offset of its data within
269 the data file, and the revision it's based on. Finally, each entry
269 the data file, and the revision it's based on. Finally, each entry
270 contains a linkrev entry that can serve as a pointer to external
270 contains a linkrev entry that can serve as a pointer to external
271 data.
271 data.
272
272
273 The revision data itself is a linear collection of data chunks.
273 The revision data itself is a linear collection of data chunks.
274 Each chunk represents a revision and is usually represented as a
274 Each chunk represents a revision and is usually represented as a
275 delta against the previous chunk. To bound lookup time, runs of
275 delta against the previous chunk. To bound lookup time, runs of
276 deltas are limited to about 2 times the length of the original
276 deltas are limited to about 2 times the length of the original
277 version data. This makes retrieval of a version proportional to
277 version data. This makes retrieval of a version proportional to
278 its size, or O(1) relative to the number of revisions.
278 its size, or O(1) relative to the number of revisions.
279
279
280 Both pieces of the revlog are written to in an append-only
280 Both pieces of the revlog are written to in an append-only
281 fashion, which means we never need to rewrite a file to insert or
281 fashion, which means we never need to rewrite a file to insert or
282 remove data, and can use some simple techniques to avoid the need
282 remove data, and can use some simple techniques to avoid the need
283 for locking while reading.
283 for locking while reading.
284
284
285 If checkambig, indexfile is opened with checkambig=True at
285 If checkambig, indexfile is opened with checkambig=True at
286 writing, to avoid file stat ambiguity.
286 writing, to avoid file stat ambiguity.
287
287
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 index will be mmapped rather than read if it is larger than the
289 index will be mmapped rather than read if it is larger than the
290 configured threshold.
290 configured threshold.
291
291
292 If censorable is True, the revlog can have censored revisions.
292 If censorable is True, the revlog can have censored revisions.
293
293
294 If `upperboundcomp` is not None, this is the expected maximal gain from
294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 compression for the data content.
295 compression for the data content.
296
296
297 `concurrencychecker` is an optional function that receives 3 arguments: a
297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 file handle, a filename, and an expected position. It should check whether
298 file handle, a filename, and an expected position. It should check whether
299 the current position in the file handle is valid, and log/warn/fail (by
299 the current position in the file handle is valid, and log/warn/fail (by
300 raising).
300 raising).
301
301
302
302
303 Internal details
303 Internal details
304 ----------------
304 ----------------
305
305
306 A large part of the revlog logic deals with revisions' "index entries", tuple
306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 objects that contains the same "items" whatever the revlog version.
307 objects that contains the same "items" whatever the revlog version.
308 Different versions will have different ways of storing these items (sometimes
308 Different versions will have different ways of storing these items (sometimes
309 not having them at all), but the tuple will always be the same. New fields
309 not having them at all), but the tuple will always be the same. New fields
310 are usually added at the end to avoid breaking existing code that relies
310 are usually added at the end to avoid breaking existing code that relies
311 on the existing order. The field are defined as follows:
311 on the existing order. The field are defined as follows:
312
312
313 [0] offset:
313 [0] offset:
314 The byte index of the start of revision data chunk.
314 The byte index of the start of revision data chunk.
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 retrieve it.
316 retrieve it.
317
317
318 flags:
318 flags:
319 A flag field that carries special information or changes the behavior
319 A flag field that carries special information or changes the behavior
320 of the revision. (see `REVIDX_*` constants for details)
320 of the revision. (see `REVIDX_*` constants for details)
321 The flag field only occupies the first 16 bits of this field,
321 The flag field only occupies the first 16 bits of this field,
322 use "flags = field & 0xFFFF" to retrieve the value.
322 use "flags = field & 0xFFFF" to retrieve the value.
323
323
324 [1] compressed length:
324 [1] compressed length:
325 The size, in bytes, of the chunk on disk
325 The size, in bytes, of the chunk on disk
326
326
327 [2] uncompressed length:
327 [2] uncompressed length:
328 The size, in bytes, of the full revision once reconstructed.
328 The size, in bytes, of the full revision once reconstructed.
329
329
330 [3] base rev:
330 [3] base rev:
331 Either the base of the revision delta chain (without general
331 Either the base of the revision delta chain (without general
332 delta), or the base of the delta (stored in the data chunk)
332 delta), or the base of the delta (stored in the data chunk)
333 with general delta.
333 with general delta.
334
334
335 [4] link rev:
335 [4] link rev:
336 Changelog revision number of the changeset introducing this
336 Changelog revision number of the changeset introducing this
337 revision.
337 revision.
338
338
339 [5] parent 1 rev:
339 [5] parent 1 rev:
340 Revision number of the first parent
340 Revision number of the first parent
341
341
342 [6] parent 2 rev:
342 [6] parent 2 rev:
343 Revision number of the second parent
343 Revision number of the second parent
344
344
345 [7] node id:
345 [7] node id:
346 The node id of the current revision
346 The node id of the current revision
347
347
348 [8] sidedata offset:
348 [8] sidedata offset:
349 The byte index of the start of the revision's side-data chunk.
349 The byte index of the start of the revision's side-data chunk.
350
350
351 [9] sidedata chunk length:
351 [9] sidedata chunk length:
352 The size, in bytes, of the revision's side-data chunk.
352 The size, in bytes, of the revision's side-data chunk.
353
353
354 [10] data compression mode:
354 [10] data compression mode:
355 two bits that detail the way the data chunk is compressed on disk.
355 two bits that detail the way the data chunk is compressed on disk.
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 1 this will always be COMP_MODE_INLINE.
357 1 this will always be COMP_MODE_INLINE.
358
358
359 [11] side-data compression mode:
359 [11] side-data compression mode:
360 two bits that detail the way the sidedata chunk is compressed on disk.
360 two bits that detail the way the sidedata chunk is compressed on disk.
361 (see "COMP_MODE_*" constants for details)
361 (see "COMP_MODE_*" constants for details)
362 """
362 """
363
363
364 _flagserrorclass = error.RevlogError
364 _flagserrorclass = error.RevlogError
365
365
366 def __init__(
366 def __init__(
367 self,
367 self,
368 opener,
368 opener,
369 target,
369 target,
370 radix,
370 radix,
371 postfix=None, # only exist for `tmpcensored` now
371 postfix=None, # only exist for `tmpcensored` now
372 checkambig=False,
372 checkambig=False,
373 mmaplargeindex=False,
373 mmaplargeindex=False,
374 censorable=False,
374 censorable=False,
375 upperboundcomp=None,
375 upperboundcomp=None,
376 persistentnodemap=False,
376 persistentnodemap=False,
377 concurrencychecker=None,
377 concurrencychecker=None,
378 trypending=False,
378 trypending=False,
379 ):
379 ):
380 """
380 """
381 create a revlog object
381 create a revlog object
382
382
383 opener is a function that abstracts the file opening operation
383 opener is a function that abstracts the file opening operation
384 and can be used to implement COW semantics or the like.
384 and can be used to implement COW semantics or the like.
385
385
386 `target`: a (KIND, ID) tuple that identify the content stored in
386 `target`: a (KIND, ID) tuple that identify the content stored in
387 this revlog. It help the rest of the code to understand what the revlog
387 this revlog. It help the rest of the code to understand what the revlog
388 is about without having to resort to heuristic and index filename
388 is about without having to resort to heuristic and index filename
389 analysis. Note: that this must be reliably be set by normal code, but
389 analysis. Note: that this must be reliably be set by normal code, but
390 that test, debug, or performance measurement code might not set this to
390 that test, debug, or performance measurement code might not set this to
391 accurate value.
391 accurate value.
392 """
392 """
393 self.upperboundcomp = upperboundcomp
393 self.upperboundcomp = upperboundcomp
394
394
395 self.radix = radix
395 self.radix = radix
396
396
397 self._docket_file = None
397 self._docket_file = None
398 self._indexfile = None
398 self._indexfile = None
399 self._datafile = None
399 self._datafile = None
400 self._nodemap_file = None
400 self._nodemap_file = None
401 self.postfix = postfix
401 self.postfix = postfix
402 self._trypending = trypending
402 self._trypending = trypending
403 self.opener = opener
403 self.opener = opener
404 if persistentnodemap:
404 if persistentnodemap:
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406
406
407 assert target[0] in ALL_KINDS
407 assert target[0] in ALL_KINDS
408 assert len(target) == 2
408 assert len(target) == 2
409 self.target = target
409 self.target = target
410 # When True, indexfile is opened with checkambig=True at writing, to
410 # When True, indexfile is opened with checkambig=True at writing, to
411 # avoid file stat ambiguity.
411 # avoid file stat ambiguity.
412 self._checkambig = checkambig
412 self._checkambig = checkambig
413 self._mmaplargeindex = mmaplargeindex
413 self._mmaplargeindex = mmaplargeindex
414 self._censorable = censorable
414 self._censorable = censorable
415 # 3-tuple of (node, rev, text) for a raw revision.
415 # 3-tuple of (node, rev, text) for a raw revision.
416 self._revisioncache = None
416 self._revisioncache = None
417 # Maps rev to chain base rev.
417 # Maps rev to chain base rev.
418 self._chainbasecache = util.lrucachedict(100)
418 self._chainbasecache = util.lrucachedict(100)
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 self._chunkcache = (0, b'')
420 self._chunkcache = (0, b'')
421 # How much data to read and cache into the raw revlog data cache.
421 # How much data to read and cache into the raw revlog data cache.
422 self._chunkcachesize = 65536
422 self._chunkcachesize = 65536
423 self._maxchainlen = None
423 self._maxchainlen = None
424 self._deltabothparents = True
424 self._deltabothparents = True
425 self.index = None
425 self.index = None
426 self._docket = None
426 self._docket = None
427 self._nodemap_docket = None
427 self._nodemap_docket = None
428 # Mapping of partial identifiers to full nodes.
428 # Mapping of partial identifiers to full nodes.
429 self._pcache = {}
429 self._pcache = {}
430 # Mapping of revision integer to full node.
430 # Mapping of revision integer to full node.
431 self._compengine = b'zlib'
431 self._compengine = b'zlib'
432 self._compengineopts = {}
432 self._compengineopts = {}
433 self._maxdeltachainspan = -1
433 self._maxdeltachainspan = -1
434 self._withsparseread = False
434 self._withsparseread = False
435 self._sparserevlog = False
435 self._sparserevlog = False
436 self.hassidedata = False
436 self.hassidedata = False
437 self._srdensitythreshold = 0.50
437 self._srdensitythreshold = 0.50
438 self._srmingapsize = 262144
438 self._srmingapsize = 262144
439
439
440 # Make copy of flag processors so each revlog instance can support
440 # Make copy of flag processors so each revlog instance can support
441 # custom flags.
441 # custom flags.
442 self._flagprocessors = dict(flagutil.flagprocessors)
442 self._flagprocessors = dict(flagutil.flagprocessors)
443
443
444 # 2-tuple of file handles being used for active writing.
444 # 2-tuple of file handles being used for active writing.
445 self._writinghandles = None
445 self._writinghandles = None
446 # prevent nesting of addgroup
446 # prevent nesting of addgroup
447 self._adding_group = None
447 self._adding_group = None
448
448
449 self._loadindex()
449 self._loadindex()
450
450
451 self._concurrencychecker = concurrencychecker
451 self._concurrencychecker = concurrencychecker
452
452
453 def _init_opts(self):
453 def _init_opts(self):
454 """process options (from above/config) to setup associated default revlog mode
454 """process options (from above/config) to setup associated default revlog mode
455
455
456 These values might be affected when actually reading on disk information.
456 These values might be affected when actually reading on disk information.
457
457
458 The relevant values are returned for use in _loadindex().
458 The relevant values are returned for use in _loadindex().
459
459
460 * newversionflags:
460 * newversionflags:
461 version header to use if we need to create a new revlog
461 version header to use if we need to create a new revlog
462
462
463 * mmapindexthreshold:
463 * mmapindexthreshold:
464 minimal index size for start to use mmap
464 minimal index size for start to use mmap
465
465
466 * force_nodemap:
466 * force_nodemap:
467 force the usage of a "development" version of the nodemap code
467 force the usage of a "development" version of the nodemap code
468 """
468 """
469 mmapindexthreshold = None
469 mmapindexthreshold = None
470 opts = self.opener.options
470 opts = self.opener.options
471
471
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 new_header = CHANGELOGV2
473 new_header = CHANGELOGV2
474 elif b'revlogv2' in opts:
474 elif b'revlogv2' in opts:
475 new_header = REVLOGV2
475 new_header = REVLOGV2
476 elif b'revlogv1' in opts:
476 elif b'revlogv1' in opts:
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 if b'generaldelta' in opts:
478 if b'generaldelta' in opts:
479 new_header |= FLAG_GENERALDELTA
479 new_header |= FLAG_GENERALDELTA
480 elif b'revlogv0' in self.opener.options:
480 elif b'revlogv0' in self.opener.options:
481 new_header = REVLOGV0
481 new_header = REVLOGV0
482 else:
482 else:
483 new_header = REVLOG_DEFAULT_VERSION
483 new_header = REVLOG_DEFAULT_VERSION
484
484
485 if b'chunkcachesize' in opts:
485 if b'chunkcachesize' in opts:
486 self._chunkcachesize = opts[b'chunkcachesize']
486 self._chunkcachesize = opts[b'chunkcachesize']
487 if b'maxchainlen' in opts:
487 if b'maxchainlen' in opts:
488 self._maxchainlen = opts[b'maxchainlen']
488 self._maxchainlen = opts[b'maxchainlen']
489 if b'deltabothparents' in opts:
489 if b'deltabothparents' in opts:
490 self._deltabothparents = opts[b'deltabothparents']
490 self._deltabothparents = opts[b'deltabothparents']
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 self._lazydeltabase = False
492 self._lazydeltabase = False
493 if self._lazydelta:
493 if self._lazydelta:
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 if b'compengine' in opts:
495 if b'compengine' in opts:
496 self._compengine = opts[b'compengine']
496 self._compengine = opts[b'compengine']
497 if b'zlib.level' in opts:
497 if b'zlib.level' in opts:
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 if b'zstd.level' in opts:
499 if b'zstd.level' in opts:
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 if b'maxdeltachainspan' in opts:
501 if b'maxdeltachainspan' in opts:
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 mmapindexthreshold = opts[b'mmapindexthreshold']
504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 # sparse-revlog forces sparse-read
507 # sparse-revlog forces sparse-read
508 self._withsparseread = self._sparserevlog or withsparseread
508 self._withsparseread = self._sparserevlog or withsparseread
509 if b'sparse-read-density-threshold' in opts:
509 if b'sparse-read-density-threshold' in opts:
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 if b'sparse-read-min-gap-size' in opts:
511 if b'sparse-read-min-gap-size' in opts:
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 if opts.get(b'enableellipsis'):
513 if opts.get(b'enableellipsis'):
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515
515
516 # revlog v0 doesn't have flag processors
516 # revlog v0 doesn't have flag processors
517 for flag, processor in pycompat.iteritems(
517 for flag, processor in pycompat.iteritems(
518 opts.get(b'flagprocessors', {})
518 opts.get(b'flagprocessors', {})
519 ):
519 ):
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521
521
522 if self._chunkcachesize <= 0:
522 if self._chunkcachesize <= 0:
523 raise error.RevlogError(
523 raise error.RevlogError(
524 _(b'revlog chunk cache size %r is not greater than 0')
524 _(b'revlog chunk cache size %r is not greater than 0')
525 % self._chunkcachesize
525 % self._chunkcachesize
526 )
526 )
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 raise error.RevlogError(
528 raise error.RevlogError(
529 _(b'revlog chunk cache size %r is not a power of 2')
529 _(b'revlog chunk cache size %r is not a power of 2')
530 % self._chunkcachesize
530 % self._chunkcachesize
531 )
531 )
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 return new_header, mmapindexthreshold, force_nodemap
533 return new_header, mmapindexthreshold, force_nodemap
534
534
535 def _get_data(self, filepath, mmap_threshold, size=None):
535 def _get_data(self, filepath, mmap_threshold, size=None):
536 """return a file content with or without mmap
536 """return a file content with or without mmap
537
537
538 If the file is missing return the empty string"""
538 If the file is missing return the empty string"""
539 try:
539 try:
540 with self.opener(filepath) as fp:
540 with self.opener(filepath) as fp:
541 if mmap_threshold is not None:
541 if mmap_threshold is not None:
542 file_size = self.opener.fstat(fp).st_size
542 file_size = self.opener.fstat(fp).st_size
543 if file_size >= mmap_threshold:
543 if file_size >= mmap_threshold:
544 if size is not None:
544 if size is not None:
545 # avoid potentiel mmap crash
545 # avoid potentiel mmap crash
546 size = min(file_size, size)
546 size = min(file_size, size)
547 # TODO: should .close() to release resources without
547 # TODO: should .close() to release resources without
548 # relying on Python GC
548 # relying on Python GC
549 if size is None:
549 if size is None:
550 return util.buffer(util.mmapread(fp))
550 return util.buffer(util.mmapread(fp))
551 else:
551 else:
552 return util.buffer(util.mmapread(fp, size))
552 return util.buffer(util.mmapread(fp, size))
553 if size is None:
553 if size is None:
554 return fp.read()
554 return fp.read()
555 else:
555 else:
556 return fp.read(size)
556 return fp.read(size)
557 except IOError as inst:
557 except IOError as inst:
558 if inst.errno != errno.ENOENT:
558 if inst.errno != errno.ENOENT:
559 raise
559 raise
560 return b''
560 return b''
561
561
562 def _loadindex(self):
562 def _loadindex(self):
563
563
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565
565
566 if self.postfix is not None:
566 if self.postfix is not None:
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 entry_point = b'%s.i.a' % self.radix
569 entry_point = b'%s.i.a' % self.radix
570 else:
570 else:
571 entry_point = b'%s.i' % self.radix
571 entry_point = b'%s.i' % self.radix
572
572
573 entry_data = b''
573 entry_data = b''
574 self._initempty = True
574 self._initempty = True
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 if len(entry_data) > 0:
576 if len(entry_data) > 0:
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 self._initempty = False
578 self._initempty = False
579 else:
579 else:
580 header = new_header
580 header = new_header
581
581
582 self._format_flags = header & ~0xFFFF
582 self._format_flags = header & ~0xFFFF
583 self._format_version = header & 0xFFFF
583 self._format_version = header & 0xFFFF
584
584
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 if supported_flags is None:
586 if supported_flags is None:
587 msg = _(b'unknown version (%d) in revlog %s')
587 msg = _(b'unknown version (%d) in revlog %s')
588 msg %= (self._format_version, self.display_id)
588 msg %= (self._format_version, self.display_id)
589 raise error.RevlogError(msg)
589 raise error.RevlogError(msg)
590 elif self._format_flags & ~supported_flags:
590 elif self._format_flags & ~supported_flags:
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 display_flag = self._format_flags >> 16
592 display_flag = self._format_flags >> 16
593 msg %= (display_flag, self._format_version, self.display_id)
593 msg %= (display_flag, self._format_version, self.display_id)
594 raise error.RevlogError(msg)
594 raise error.RevlogError(msg)
595
595
596 features = FEATURES_BY_VERSION[self._format_version]
596 features = FEATURES_BY_VERSION[self._format_version]
597 self._inline = features[b'inline'](self._format_flags)
597 self._inline = features[b'inline'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 self.hassidedata = features[b'sidedata']
599 self.hassidedata = features[b'sidedata']
600
600
601 if not features[b'docket']:
601 if not features[b'docket']:
602 self._indexfile = entry_point
602 self._indexfile = entry_point
603 index_data = entry_data
603 index_data = entry_data
604 else:
604 else:
605 self._docket_file = entry_point
605 self._docket_file = entry_point
606 if self._initempty:
606 if self._initempty:
607 self._docket = docketutil.default_docket(self, header)
607 self._docket = docketutil.default_docket(self, header)
608 else:
608 else:
609 self._docket = docketutil.parse_docket(
609 self._docket = docketutil.parse_docket(
610 self, entry_data, use_pending=self._trypending
610 self, entry_data, use_pending=self._trypending
611 )
611 )
612 self._indexfile = self._docket.index_filepath()
612 self._indexfile = self._docket.index_filepath()
613 index_data = b''
613 index_data = b''
614 index_size = self._docket.index_end
614 index_size = self._docket.index_end
615 if index_size > 0:
615 if index_size > 0:
616 index_data = self._get_data(
616 index_data = self._get_data(
617 self._indexfile, mmapindexthreshold, size=index_size
617 self._indexfile, mmapindexthreshold, size=index_size
618 )
618 )
619 if len(index_data) < index_size:
619 if len(index_data) < index_size:
620 msg = _(b'too few index data for %s: got %d, expected %d')
620 msg = _(b'too few index data for %s: got %d, expected %d')
621 msg %= (self.display_id, len(index_data), index_size)
621 msg %= (self.display_id, len(index_data), index_size)
622 raise error.RevlogError(msg)
622 raise error.RevlogError(msg)
623
623
624 self._inline = False
624 self._inline = False
625 # generaldelta implied by version 2 revlogs.
625 # generaldelta implied by version 2 revlogs.
626 self._generaldelta = True
626 self._generaldelta = True
627 # the logic for persistent nodemap will be dealt with within the
627 # the logic for persistent nodemap will be dealt with within the
628 # main docket, so disable it for now.
628 # main docket, so disable it for now.
629 self._nodemap_file = None
629 self._nodemap_file = None
630
630
631 if self.postfix is None:
631 if self._docket is not None:
632 self._datafile = self._docket.data_filepath()
633 elif self.postfix is None:
632 self._datafile = b'%s.d' % self.radix
634 self._datafile = b'%s.d' % self.radix
633 else:
635 else:
634 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
635
637
636 self.nodeconstants = sha1nodeconstants
638 self.nodeconstants = sha1nodeconstants
637 self.nullid = self.nodeconstants.nullid
639 self.nullid = self.nodeconstants.nullid
638
640
639 # sparse-revlog can't be on without general-delta (issue6056)
641 # sparse-revlog can't be on without general-delta (issue6056)
640 if not self._generaldelta:
642 if not self._generaldelta:
641 self._sparserevlog = False
643 self._sparserevlog = False
642
644
643 self._storedeltachains = True
645 self._storedeltachains = True
644
646
645 devel_nodemap = (
647 devel_nodemap = (
646 self._nodemap_file
648 self._nodemap_file
647 and force_nodemap
649 and force_nodemap
648 and parse_index_v1_nodemap is not None
650 and parse_index_v1_nodemap is not None
649 )
651 )
650
652
651 use_rust_index = False
653 use_rust_index = False
652 if rustrevlog is not None:
654 if rustrevlog is not None:
653 if self._nodemap_file is not None:
655 if self._nodemap_file is not None:
654 use_rust_index = True
656 use_rust_index = True
655 else:
657 else:
656 use_rust_index = self.opener.options.get(b'rust.index')
658 use_rust_index = self.opener.options.get(b'rust.index')
657
659
658 self._parse_index = parse_index_v1
660 self._parse_index = parse_index_v1
659 if self._format_version == REVLOGV0:
661 if self._format_version == REVLOGV0:
660 self._parse_index = revlogv0.parse_index_v0
662 self._parse_index = revlogv0.parse_index_v0
661 elif self._format_version == REVLOGV2:
663 elif self._format_version == REVLOGV2:
662 self._parse_index = parse_index_v2
664 self._parse_index = parse_index_v2
663 elif self._format_version == CHANGELOGV2:
665 elif self._format_version == CHANGELOGV2:
664 self._parse_index = parse_index_cl_v2
666 self._parse_index = parse_index_cl_v2
665 elif devel_nodemap:
667 elif devel_nodemap:
666 self._parse_index = parse_index_v1_nodemap
668 self._parse_index = parse_index_v1_nodemap
667 elif use_rust_index:
669 elif use_rust_index:
668 self._parse_index = parse_index_v1_mixed
670 self._parse_index = parse_index_v1_mixed
669 try:
671 try:
670 d = self._parse_index(index_data, self._inline)
672 d = self._parse_index(index_data, self._inline)
671 index, _chunkcache = d
673 index, _chunkcache = d
672 use_nodemap = (
674 use_nodemap = (
673 not self._inline
675 not self._inline
674 and self._nodemap_file is not None
676 and self._nodemap_file is not None
675 and util.safehasattr(index, 'update_nodemap_data')
677 and util.safehasattr(index, 'update_nodemap_data')
676 )
678 )
677 if use_nodemap:
679 if use_nodemap:
678 nodemap_data = nodemaputil.persisted_data(self)
680 nodemap_data = nodemaputil.persisted_data(self)
679 if nodemap_data is not None:
681 if nodemap_data is not None:
680 docket = nodemap_data[0]
682 docket = nodemap_data[0]
681 if (
683 if (
682 len(d[0]) > docket.tip_rev
684 len(d[0]) > docket.tip_rev
683 and d[0][docket.tip_rev][7] == docket.tip_node
685 and d[0][docket.tip_rev][7] == docket.tip_node
684 ):
686 ):
685 # no changelog tampering
687 # no changelog tampering
686 self._nodemap_docket = docket
688 self._nodemap_docket = docket
687 index.update_nodemap_data(*nodemap_data)
689 index.update_nodemap_data(*nodemap_data)
688 except (ValueError, IndexError):
690 except (ValueError, IndexError):
689 raise error.RevlogError(
691 raise error.RevlogError(
690 _(b"index %s is corrupted") % self.display_id
692 _(b"index %s is corrupted") % self.display_id
691 )
693 )
692 self.index, self._chunkcache = d
694 self.index, self._chunkcache = d
693 if not self._chunkcache:
695 if not self._chunkcache:
694 self._chunkclear()
696 self._chunkclear()
695 # revnum -> (chain-length, sum-delta-length)
697 # revnum -> (chain-length, sum-delta-length)
696 self._chaininfocache = util.lrucachedict(500)
698 self._chaininfocache = util.lrucachedict(500)
697 # revlog header -> revlog compressor
699 # revlog header -> revlog compressor
698 self._decompressors = {}
700 self._decompressors = {}
699
701
700 @util.propertycache
702 @util.propertycache
701 def revlog_kind(self):
703 def revlog_kind(self):
702 return self.target[0]
704 return self.target[0]
703
705
704 @util.propertycache
706 @util.propertycache
705 def display_id(self):
707 def display_id(self):
706 """The public facing "ID" of the revlog that we use in message"""
708 """The public facing "ID" of the revlog that we use in message"""
707 # Maybe we should build a user facing representation of
709 # Maybe we should build a user facing representation of
708 # revlog.target instead of using `self.radix`
710 # revlog.target instead of using `self.radix`
709 return self.radix
711 return self.radix
710
712
711 def _get_decompressor(self, t):
713 def _get_decompressor(self, t):
712 try:
714 try:
713 compressor = self._decompressors[t]
715 compressor = self._decompressors[t]
714 except KeyError:
716 except KeyError:
715 try:
717 try:
716 engine = util.compengines.forrevlogheader(t)
718 engine = util.compengines.forrevlogheader(t)
717 compressor = engine.revlogcompressor(self._compengineopts)
719 compressor = engine.revlogcompressor(self._compengineopts)
718 self._decompressors[t] = compressor
720 self._decompressors[t] = compressor
719 except KeyError:
721 except KeyError:
720 raise error.RevlogError(
722 raise error.RevlogError(
721 _(b'unknown compression type %s') % binascii.hexlify(t)
723 _(b'unknown compression type %s') % binascii.hexlify(t)
722 )
724 )
723 return compressor
725 return compressor
724
726
725 @util.propertycache
727 @util.propertycache
726 def _compressor(self):
728 def _compressor(self):
727 engine = util.compengines[self._compengine]
729 engine = util.compengines[self._compengine]
728 return engine.revlogcompressor(self._compengineopts)
730 return engine.revlogcompressor(self._compengineopts)
729
731
730 @util.propertycache
732 @util.propertycache
731 def _decompressor(self):
733 def _decompressor(self):
732 """the default decompressor"""
734 """the default decompressor"""
733 if self._docket is None:
735 if self._docket is None:
734 return None
736 return None
735 t = self._docket.default_compression_header
737 t = self._docket.default_compression_header
736 c = self._get_decompressor(t)
738 c = self._get_decompressor(t)
737 return c.decompress
739 return c.decompress
738
740
739 def _indexfp(self):
741 def _indexfp(self):
740 """file object for the revlog's index file"""
742 """file object for the revlog's index file"""
741 return self.opener(self._indexfile, mode=b"r")
743 return self.opener(self._indexfile, mode=b"r")
742
744
743 def __index_write_fp(self):
745 def __index_write_fp(self):
744 # You should not use this directly and use `_writing` instead
746 # You should not use this directly and use `_writing` instead
745 try:
747 try:
746 f = self.opener(
748 f = self.opener(
747 self._indexfile, mode=b"r+", checkambig=self._checkambig
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
748 )
750 )
749 if self._docket is None:
751 if self._docket is None:
750 f.seek(0, os.SEEK_END)
752 f.seek(0, os.SEEK_END)
751 else:
753 else:
752 f.seek(self._docket.index_end, os.SEEK_SET)
754 f.seek(self._docket.index_end, os.SEEK_SET)
753 return f
755 return f
754 except IOError as inst:
756 except IOError as inst:
755 if inst.errno != errno.ENOENT:
757 if inst.errno != errno.ENOENT:
756 raise
758 raise
757 return self.opener(
759 return self.opener(
758 self._indexfile, mode=b"w+", checkambig=self._checkambig
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
759 )
761 )
760
762
761 def __index_new_fp(self):
763 def __index_new_fp(self):
762 # You should not use this unless you are upgrading from inline revlog
764 # You should not use this unless you are upgrading from inline revlog
763 return self.opener(
765 return self.opener(
764 self._indexfile,
766 self._indexfile,
765 mode=b"w",
767 mode=b"w",
766 checkambig=self._checkambig,
768 checkambig=self._checkambig,
767 atomictemp=True,
769 atomictemp=True,
768 )
770 )
769
771
770 def _datafp(self, mode=b'r'):
772 def _datafp(self, mode=b'r'):
771 """file object for the revlog's data file"""
773 """file object for the revlog's data file"""
772 return self.opener(self._datafile, mode=mode)
774 return self.opener(self._datafile, mode=mode)
773
775
774 @contextlib.contextmanager
776 @contextlib.contextmanager
775 def _datareadfp(self, existingfp=None):
777 def _datareadfp(self, existingfp=None):
776 """file object suitable to read data"""
778 """file object suitable to read data"""
777 # Use explicit file handle, if given.
779 # Use explicit file handle, if given.
778 if existingfp is not None:
780 if existingfp is not None:
779 yield existingfp
781 yield existingfp
780
782
781 # Use a file handle being actively used for writes, if available.
783 # Use a file handle being actively used for writes, if available.
782 # There is some danger to doing this because reads will seek the
784 # There is some danger to doing this because reads will seek the
783 # file. However, _writeentry() performs a SEEK_END before all writes,
785 # file. However, _writeentry() performs a SEEK_END before all writes,
784 # so we should be safe.
786 # so we should be safe.
785 elif self._writinghandles:
787 elif self._writinghandles:
786 if self._inline:
788 if self._inline:
787 yield self._writinghandles[0]
789 yield self._writinghandles[0]
788 else:
790 else:
789 yield self._writinghandles[1]
791 yield self._writinghandles[1]
790
792
791 # Otherwise open a new file handle.
793 # Otherwise open a new file handle.
792 else:
794 else:
793 if self._inline:
795 if self._inline:
794 func = self._indexfp
796 func = self._indexfp
795 else:
797 else:
796 func = self._datafp
798 func = self._datafp
797 with func() as fp:
799 with func() as fp:
798 yield fp
800 yield fp
799
801
800 def tiprev(self):
802 def tiprev(self):
801 return len(self.index) - 1
803 return len(self.index) - 1
802
804
803 def tip(self):
805 def tip(self):
804 return self.node(self.tiprev())
806 return self.node(self.tiprev())
805
807
806 def __contains__(self, rev):
808 def __contains__(self, rev):
807 return 0 <= rev < len(self)
809 return 0 <= rev < len(self)
808
810
809 def __len__(self):
811 def __len__(self):
810 return len(self.index)
812 return len(self.index)
811
813
812 def __iter__(self):
814 def __iter__(self):
813 return iter(pycompat.xrange(len(self)))
815 return iter(pycompat.xrange(len(self)))
814
816
815 def revs(self, start=0, stop=None):
817 def revs(self, start=0, stop=None):
816 """iterate over all rev in this revlog (from start to stop)"""
818 """iterate over all rev in this revlog (from start to stop)"""
817 return storageutil.iterrevs(len(self), start=start, stop=stop)
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
818
820
819 @property
821 @property
820 def nodemap(self):
822 def nodemap(self):
821 msg = (
823 msg = (
822 b"revlog.nodemap is deprecated, "
824 b"revlog.nodemap is deprecated, "
823 b"use revlog.index.[has_node|rev|get_rev]"
825 b"use revlog.index.[has_node|rev|get_rev]"
824 )
826 )
825 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
826 return self.index.nodemap
828 return self.index.nodemap
827
829
828 @property
830 @property
829 def _nodecache(self):
831 def _nodecache(self):
830 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
831 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
832 return self.index.nodemap
834 return self.index.nodemap
833
835
834 def hasnode(self, node):
836 def hasnode(self, node):
835 try:
837 try:
836 self.rev(node)
838 self.rev(node)
837 return True
839 return True
838 except KeyError:
840 except KeyError:
839 return False
841 return False
840
842
841 def candelta(self, baserev, rev):
843 def candelta(self, baserev, rev):
842 """whether two revisions (baserev, rev) can be delta-ed or not"""
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
843 # Disable delta if either rev requires a content-changing flag
845 # Disable delta if either rev requires a content-changing flag
844 # processor (ex. LFS). This is because such flag processor can alter
846 # processor (ex. LFS). This is because such flag processor can alter
845 # the rawtext content that the delta will be based on, and two clients
847 # the rawtext content that the delta will be based on, and two clients
846 # could have a same revlog node with different flags (i.e. different
848 # could have a same revlog node with different flags (i.e. different
847 # rawtext contents) and the delta could be incompatible.
849 # rawtext contents) and the delta could be incompatible.
848 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
849 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
850 ):
852 ):
851 return False
853 return False
852 return True
854 return True
853
855
854 def update_caches(self, transaction):
856 def update_caches(self, transaction):
855 if self._nodemap_file is not None:
857 if self._nodemap_file is not None:
856 if transaction is None:
858 if transaction is None:
857 nodemaputil.update_persistent_nodemap(self)
859 nodemaputil.update_persistent_nodemap(self)
858 else:
860 else:
859 nodemaputil.setup_persistent_nodemap(transaction, self)
861 nodemaputil.setup_persistent_nodemap(transaction, self)
860
862
861 def clearcaches(self):
863 def clearcaches(self):
862 self._revisioncache = None
864 self._revisioncache = None
863 self._chainbasecache.clear()
865 self._chainbasecache.clear()
864 self._chunkcache = (0, b'')
866 self._chunkcache = (0, b'')
865 self._pcache = {}
867 self._pcache = {}
866 self._nodemap_docket = None
868 self._nodemap_docket = None
867 self.index.clearcaches()
869 self.index.clearcaches()
868 # The python code is the one responsible for validating the docket, we
870 # The python code is the one responsible for validating the docket, we
869 # end up having to refresh it here.
871 # end up having to refresh it here.
870 use_nodemap = (
872 use_nodemap = (
871 not self._inline
873 not self._inline
872 and self._nodemap_file is not None
874 and self._nodemap_file is not None
873 and util.safehasattr(self.index, 'update_nodemap_data')
875 and util.safehasattr(self.index, 'update_nodemap_data')
874 )
876 )
875 if use_nodemap:
877 if use_nodemap:
876 nodemap_data = nodemaputil.persisted_data(self)
878 nodemap_data = nodemaputil.persisted_data(self)
877 if nodemap_data is not None:
879 if nodemap_data is not None:
878 self._nodemap_docket = nodemap_data[0]
880 self._nodemap_docket = nodemap_data[0]
879 self.index.update_nodemap_data(*nodemap_data)
881 self.index.update_nodemap_data(*nodemap_data)
880
882
881 def rev(self, node):
883 def rev(self, node):
882 try:
884 try:
883 return self.index.rev(node)
885 return self.index.rev(node)
884 except TypeError:
886 except TypeError:
885 raise
887 raise
886 except error.RevlogError:
888 except error.RevlogError:
887 # parsers.c radix tree lookup failed
889 # parsers.c radix tree lookup failed
888 if (
890 if (
889 node == self.nodeconstants.wdirid
891 node == self.nodeconstants.wdirid
890 or node in self.nodeconstants.wdirfilenodeids
892 or node in self.nodeconstants.wdirfilenodeids
891 ):
893 ):
892 raise error.WdirUnsupported
894 raise error.WdirUnsupported
893 raise error.LookupError(node, self.display_id, _(b'no node'))
895 raise error.LookupError(node, self.display_id, _(b'no node'))
894
896
895 # Accessors for index entries.
897 # Accessors for index entries.
896
898
897 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
898 # are flags.
900 # are flags.
899 def start(self, rev):
901 def start(self, rev):
900 return int(self.index[rev][0] >> 16)
902 return int(self.index[rev][0] >> 16)
901
903
902 def flags(self, rev):
904 def flags(self, rev):
903 return self.index[rev][0] & 0xFFFF
905 return self.index[rev][0] & 0xFFFF
904
906
905 def length(self, rev):
907 def length(self, rev):
906 return self.index[rev][1]
908 return self.index[rev][1]
907
909
908 def sidedata_length(self, rev):
910 def sidedata_length(self, rev):
909 if not self.hassidedata:
911 if not self.hassidedata:
910 return 0
912 return 0
911 return self.index[rev][9]
913 return self.index[rev][9]
912
914
913 def rawsize(self, rev):
915 def rawsize(self, rev):
914 """return the length of the uncompressed text for a given revision"""
916 """return the length of the uncompressed text for a given revision"""
915 l = self.index[rev][2]
917 l = self.index[rev][2]
916 if l >= 0:
918 if l >= 0:
917 return l
919 return l
918
920
919 t = self.rawdata(rev)
921 t = self.rawdata(rev)
920 return len(t)
922 return len(t)
921
923
922 def size(self, rev):
924 def size(self, rev):
923 """length of non-raw text (processed by a "read" flag processor)"""
925 """length of non-raw text (processed by a "read" flag processor)"""
924 # fast path: if no "read" flag processor could change the content,
926 # fast path: if no "read" flag processor could change the content,
925 # size is rawsize. note: ELLIPSIS is known to not change the content.
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
926 flags = self.flags(rev)
928 flags = self.flags(rev)
927 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
928 return self.rawsize(rev)
930 return self.rawsize(rev)
929
931
930 return len(self.revision(rev, raw=False))
932 return len(self.revision(rev, raw=False))
931
933
932 def chainbase(self, rev):
934 def chainbase(self, rev):
933 base = self._chainbasecache.get(rev)
935 base = self._chainbasecache.get(rev)
934 if base is not None:
936 if base is not None:
935 return base
937 return base
936
938
937 index = self.index
939 index = self.index
938 iterrev = rev
940 iterrev = rev
939 base = index[iterrev][3]
941 base = index[iterrev][3]
940 while base != iterrev:
942 while base != iterrev:
941 iterrev = base
943 iterrev = base
942 base = index[iterrev][3]
944 base = index[iterrev][3]
943
945
944 self._chainbasecache[rev] = base
946 self._chainbasecache[rev] = base
945 return base
947 return base
946
948
947 def linkrev(self, rev):
949 def linkrev(self, rev):
948 return self.index[rev][4]
950 return self.index[rev][4]
949
951
950 def parentrevs(self, rev):
952 def parentrevs(self, rev):
951 try:
953 try:
952 entry = self.index[rev]
954 entry = self.index[rev]
953 except IndexError:
955 except IndexError:
954 if rev == wdirrev:
956 if rev == wdirrev:
955 raise error.WdirUnsupported
957 raise error.WdirUnsupported
956 raise
958 raise
957 if entry[5] == nullrev:
959 if entry[5] == nullrev:
958 return entry[6], entry[5]
960 return entry[6], entry[5]
959 else:
961 else:
960 return entry[5], entry[6]
962 return entry[5], entry[6]
961
963
962 # fast parentrevs(rev) where rev isn't filtered
964 # fast parentrevs(rev) where rev isn't filtered
963 _uncheckedparentrevs = parentrevs
965 _uncheckedparentrevs = parentrevs
964
966
965 def node(self, rev):
967 def node(self, rev):
966 try:
968 try:
967 return self.index[rev][7]
969 return self.index[rev][7]
968 except IndexError:
970 except IndexError:
969 if rev == wdirrev:
971 if rev == wdirrev:
970 raise error.WdirUnsupported
972 raise error.WdirUnsupported
971 raise
973 raise
972
974
973 # Derived from index values.
975 # Derived from index values.
974
976
975 def end(self, rev):
977 def end(self, rev):
976 return self.start(rev) + self.length(rev)
978 return self.start(rev) + self.length(rev)
977
979
978 def parents(self, node):
980 def parents(self, node):
979 i = self.index
981 i = self.index
980 d = i[self.rev(node)]
982 d = i[self.rev(node)]
981 # inline node() to avoid function call overhead
983 # inline node() to avoid function call overhead
982 if d[5] == self.nullid:
984 if d[5] == self.nullid:
983 return i[d[6]][7], i[d[5]][7]
985 return i[d[6]][7], i[d[5]][7]
984 else:
986 else:
985 return i[d[5]][7], i[d[6]][7]
987 return i[d[5]][7], i[d[6]][7]
986
988
987 def chainlen(self, rev):
989 def chainlen(self, rev):
988 return self._chaininfo(rev)[0]
990 return self._chaininfo(rev)[0]
989
991
990 def _chaininfo(self, rev):
992 def _chaininfo(self, rev):
991 chaininfocache = self._chaininfocache
993 chaininfocache = self._chaininfocache
992 if rev in chaininfocache:
994 if rev in chaininfocache:
993 return chaininfocache[rev]
995 return chaininfocache[rev]
994 index = self.index
996 index = self.index
995 generaldelta = self._generaldelta
997 generaldelta = self._generaldelta
996 iterrev = rev
998 iterrev = rev
997 e = index[iterrev]
999 e = index[iterrev]
998 clen = 0
1000 clen = 0
999 compresseddeltalen = 0
1001 compresseddeltalen = 0
1000 while iterrev != e[3]:
1002 while iterrev != e[3]:
1001 clen += 1
1003 clen += 1
1002 compresseddeltalen += e[1]
1004 compresseddeltalen += e[1]
1003 if generaldelta:
1005 if generaldelta:
1004 iterrev = e[3]
1006 iterrev = e[3]
1005 else:
1007 else:
1006 iterrev -= 1
1008 iterrev -= 1
1007 if iterrev in chaininfocache:
1009 if iterrev in chaininfocache:
1008 t = chaininfocache[iterrev]
1010 t = chaininfocache[iterrev]
1009 clen += t[0]
1011 clen += t[0]
1010 compresseddeltalen += t[1]
1012 compresseddeltalen += t[1]
1011 break
1013 break
1012 e = index[iterrev]
1014 e = index[iterrev]
1013 else:
1015 else:
1014 # Add text length of base since decompressing that also takes
1016 # Add text length of base since decompressing that also takes
1015 # work. For cache hits the length is already included.
1017 # work. For cache hits the length is already included.
1016 compresseddeltalen += e[1]
1018 compresseddeltalen += e[1]
1017 r = (clen, compresseddeltalen)
1019 r = (clen, compresseddeltalen)
1018 chaininfocache[rev] = r
1020 chaininfocache[rev] = r
1019 return r
1021 return r
1020
1022
1021 def _deltachain(self, rev, stoprev=None):
1023 def _deltachain(self, rev, stoprev=None):
1022 """Obtain the delta chain for a revision.
1024 """Obtain the delta chain for a revision.
1023
1025
1024 ``stoprev`` specifies a revision to stop at. If not specified, we
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1025 stop at the base of the chain.
1027 stop at the base of the chain.
1026
1028
1027 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1028 revs in ascending order and ``stopped`` is a bool indicating whether
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1029 ``stoprev`` was hit.
1031 ``stoprev`` was hit.
1030 """
1032 """
1031 # Try C implementation.
1033 # Try C implementation.
1032 try:
1034 try:
1033 return self.index.deltachain(rev, stoprev, self._generaldelta)
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1034 except AttributeError:
1036 except AttributeError:
1035 pass
1037 pass
1036
1038
1037 chain = []
1039 chain = []
1038
1040
1039 # Alias to prevent attribute lookup in tight loop.
1041 # Alias to prevent attribute lookup in tight loop.
1040 index = self.index
1042 index = self.index
1041 generaldelta = self._generaldelta
1043 generaldelta = self._generaldelta
1042
1044
1043 iterrev = rev
1045 iterrev = rev
1044 e = index[iterrev]
1046 e = index[iterrev]
1045 while iterrev != e[3] and iterrev != stoprev:
1047 while iterrev != e[3] and iterrev != stoprev:
1046 chain.append(iterrev)
1048 chain.append(iterrev)
1047 if generaldelta:
1049 if generaldelta:
1048 iterrev = e[3]
1050 iterrev = e[3]
1049 else:
1051 else:
1050 iterrev -= 1
1052 iterrev -= 1
1051 e = index[iterrev]
1053 e = index[iterrev]
1052
1054
1053 if iterrev == stoprev:
1055 if iterrev == stoprev:
1054 stopped = True
1056 stopped = True
1055 else:
1057 else:
1056 chain.append(iterrev)
1058 chain.append(iterrev)
1057 stopped = False
1059 stopped = False
1058
1060
1059 chain.reverse()
1061 chain.reverse()
1060 return chain, stopped
1062 return chain, stopped
1061
1063
1062 def ancestors(self, revs, stoprev=0, inclusive=False):
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1063 """Generate the ancestors of 'revs' in reverse revision order.
1065 """Generate the ancestors of 'revs' in reverse revision order.
1064 Does not generate revs lower than stoprev.
1066 Does not generate revs lower than stoprev.
1065
1067
1066 See the documentation for ancestor.lazyancestors for more details."""
1068 See the documentation for ancestor.lazyancestors for more details."""
1067
1069
1068 # first, make sure start revisions aren't filtered
1070 # first, make sure start revisions aren't filtered
1069 revs = list(revs)
1071 revs = list(revs)
1070 checkrev = self.node
1072 checkrev = self.node
1071 for r in revs:
1073 for r in revs:
1072 checkrev(r)
1074 checkrev(r)
1073 # and we're sure ancestors aren't filtered as well
1075 # and we're sure ancestors aren't filtered as well
1074
1076
1075 if rustancestor is not None and self.index.rust_ext_compat:
1077 if rustancestor is not None and self.index.rust_ext_compat:
1076 lazyancestors = rustancestor.LazyAncestors
1078 lazyancestors = rustancestor.LazyAncestors
1077 arg = self.index
1079 arg = self.index
1078 else:
1080 else:
1079 lazyancestors = ancestor.lazyancestors
1081 lazyancestors = ancestor.lazyancestors
1080 arg = self._uncheckedparentrevs
1082 arg = self._uncheckedparentrevs
1081 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1082
1084
1083 def descendants(self, revs):
1085 def descendants(self, revs):
1084 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1085
1087
1086 def findcommonmissing(self, common=None, heads=None):
1088 def findcommonmissing(self, common=None, heads=None):
1087 """Return a tuple of the ancestors of common and the ancestors of heads
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1088 that are not ancestors of common. In revset terminology, we return the
1090 that are not ancestors of common. In revset terminology, we return the
1089 tuple:
1091 tuple:
1090
1092
1091 ::common, (::heads) - (::common)
1093 ::common, (::heads) - (::common)
1092
1094
1093 The list is sorted by revision number, meaning it is
1095 The list is sorted by revision number, meaning it is
1094 topologically sorted.
1096 topologically sorted.
1095
1097
1096 'heads' and 'common' are both lists of node IDs. If heads is
1098 'heads' and 'common' are both lists of node IDs. If heads is
1097 not supplied, uses all of the revlog's heads. If common is not
1099 not supplied, uses all of the revlog's heads. If common is not
1098 supplied, uses nullid."""
1100 supplied, uses nullid."""
1099 if common is None:
1101 if common is None:
1100 common = [self.nullid]
1102 common = [self.nullid]
1101 if heads is None:
1103 if heads is None:
1102 heads = self.heads()
1104 heads = self.heads()
1103
1105
1104 common = [self.rev(n) for n in common]
1106 common = [self.rev(n) for n in common]
1105 heads = [self.rev(n) for n in heads]
1107 heads = [self.rev(n) for n in heads]
1106
1108
1107 # we want the ancestors, but inclusive
1109 # we want the ancestors, but inclusive
1108 class lazyset(object):
1110 class lazyset(object):
1109 def __init__(self, lazyvalues):
1111 def __init__(self, lazyvalues):
1110 self.addedvalues = set()
1112 self.addedvalues = set()
1111 self.lazyvalues = lazyvalues
1113 self.lazyvalues = lazyvalues
1112
1114
1113 def __contains__(self, value):
1115 def __contains__(self, value):
1114 return value in self.addedvalues or value in self.lazyvalues
1116 return value in self.addedvalues or value in self.lazyvalues
1115
1117
1116 def __iter__(self):
1118 def __iter__(self):
1117 added = self.addedvalues
1119 added = self.addedvalues
1118 for r in added:
1120 for r in added:
1119 yield r
1121 yield r
1120 for r in self.lazyvalues:
1122 for r in self.lazyvalues:
1121 if not r in added:
1123 if not r in added:
1122 yield r
1124 yield r
1123
1125
1124 def add(self, value):
1126 def add(self, value):
1125 self.addedvalues.add(value)
1127 self.addedvalues.add(value)
1126
1128
1127 def update(self, values):
1129 def update(self, values):
1128 self.addedvalues.update(values)
1130 self.addedvalues.update(values)
1129
1131
1130 has = lazyset(self.ancestors(common))
1132 has = lazyset(self.ancestors(common))
1131 has.add(nullrev)
1133 has.add(nullrev)
1132 has.update(common)
1134 has.update(common)
1133
1135
1134 # take all ancestors from heads that aren't in has
1136 # take all ancestors from heads that aren't in has
1135 missing = set()
1137 missing = set()
1136 visit = collections.deque(r for r in heads if r not in has)
1138 visit = collections.deque(r for r in heads if r not in has)
1137 while visit:
1139 while visit:
1138 r = visit.popleft()
1140 r = visit.popleft()
1139 if r in missing:
1141 if r in missing:
1140 continue
1142 continue
1141 else:
1143 else:
1142 missing.add(r)
1144 missing.add(r)
1143 for p in self.parentrevs(r):
1145 for p in self.parentrevs(r):
1144 if p not in has:
1146 if p not in has:
1145 visit.append(p)
1147 visit.append(p)
1146 missing = list(missing)
1148 missing = list(missing)
1147 missing.sort()
1149 missing.sort()
1148 return has, [self.node(miss) for miss in missing]
1150 return has, [self.node(miss) for miss in missing]
1149
1151
1150 def incrementalmissingrevs(self, common=None):
1152 def incrementalmissingrevs(self, common=None):
1151 """Return an object that can be used to incrementally compute the
1153 """Return an object that can be used to incrementally compute the
1152 revision numbers of the ancestors of arbitrary sets that are not
1154 revision numbers of the ancestors of arbitrary sets that are not
1153 ancestors of common. This is an ancestor.incrementalmissingancestors
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1154 object.
1156 object.
1155
1157
1156 'common' is a list of revision numbers. If common is not supplied, uses
1158 'common' is a list of revision numbers. If common is not supplied, uses
1157 nullrev.
1159 nullrev.
1158 """
1160 """
1159 if common is None:
1161 if common is None:
1160 common = [nullrev]
1162 common = [nullrev]
1161
1163
1162 if rustancestor is not None and self.index.rust_ext_compat:
1164 if rustancestor is not None and self.index.rust_ext_compat:
1163 return rustancestor.MissingAncestors(self.index, common)
1165 return rustancestor.MissingAncestors(self.index, common)
1164 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1165
1167
1166 def findmissingrevs(self, common=None, heads=None):
1168 def findmissingrevs(self, common=None, heads=None):
1167 """Return the revision numbers of the ancestors of heads that
1169 """Return the revision numbers of the ancestors of heads that
1168 are not ancestors of common.
1170 are not ancestors of common.
1169
1171
1170 More specifically, return a list of revision numbers corresponding to
1172 More specifically, return a list of revision numbers corresponding to
1171 nodes N such that every N satisfies the following constraints:
1173 nodes N such that every N satisfies the following constraints:
1172
1174
1173 1. N is an ancestor of some node in 'heads'
1175 1. N is an ancestor of some node in 'heads'
1174 2. N is not an ancestor of any node in 'common'
1176 2. N is not an ancestor of any node in 'common'
1175
1177
1176 The list is sorted by revision number, meaning it is
1178 The list is sorted by revision number, meaning it is
1177 topologically sorted.
1179 topologically sorted.
1178
1180
1179 'heads' and 'common' are both lists of revision numbers. If heads is
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1180 not supplied, uses all of the revlog's heads. If common is not
1182 not supplied, uses all of the revlog's heads. If common is not
1181 supplied, uses nullid."""
1183 supplied, uses nullid."""
1182 if common is None:
1184 if common is None:
1183 common = [nullrev]
1185 common = [nullrev]
1184 if heads is None:
1186 if heads is None:
1185 heads = self.headrevs()
1187 heads = self.headrevs()
1186
1188
1187 inc = self.incrementalmissingrevs(common=common)
1189 inc = self.incrementalmissingrevs(common=common)
1188 return inc.missingancestors(heads)
1190 return inc.missingancestors(heads)
1189
1191
1190 def findmissing(self, common=None, heads=None):
1192 def findmissing(self, common=None, heads=None):
1191 """Return the ancestors of heads that are not ancestors of common.
1193 """Return the ancestors of heads that are not ancestors of common.
1192
1194
1193 More specifically, return a list of nodes N such that every N
1195 More specifically, return a list of nodes N such that every N
1194 satisfies the following constraints:
1196 satisfies the following constraints:
1195
1197
1196 1. N is an ancestor of some node in 'heads'
1198 1. N is an ancestor of some node in 'heads'
1197 2. N is not an ancestor of any node in 'common'
1199 2. N is not an ancestor of any node in 'common'
1198
1200
1199 The list is sorted by revision number, meaning it is
1201 The list is sorted by revision number, meaning it is
1200 topologically sorted.
1202 topologically sorted.
1201
1203
1202 'heads' and 'common' are both lists of node IDs. If heads is
1204 'heads' and 'common' are both lists of node IDs. If heads is
1203 not supplied, uses all of the revlog's heads. If common is not
1205 not supplied, uses all of the revlog's heads. If common is not
1204 supplied, uses nullid."""
1206 supplied, uses nullid."""
1205 if common is None:
1207 if common is None:
1206 common = [self.nullid]
1208 common = [self.nullid]
1207 if heads is None:
1209 if heads is None:
1208 heads = self.heads()
1210 heads = self.heads()
1209
1211
1210 common = [self.rev(n) for n in common]
1212 common = [self.rev(n) for n in common]
1211 heads = [self.rev(n) for n in heads]
1213 heads = [self.rev(n) for n in heads]
1212
1214
1213 inc = self.incrementalmissingrevs(common=common)
1215 inc = self.incrementalmissingrevs(common=common)
1214 return [self.node(r) for r in inc.missingancestors(heads)]
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1215
1217
1216 def nodesbetween(self, roots=None, heads=None):
1218 def nodesbetween(self, roots=None, heads=None):
1217 """Return a topological path from 'roots' to 'heads'.
1219 """Return a topological path from 'roots' to 'heads'.
1218
1220
1219 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1220 topologically sorted list of all nodes N that satisfy both of
1222 topologically sorted list of all nodes N that satisfy both of
1221 these constraints:
1223 these constraints:
1222
1224
1223 1. N is a descendant of some node in 'roots'
1225 1. N is a descendant of some node in 'roots'
1224 2. N is an ancestor of some node in 'heads'
1226 2. N is an ancestor of some node in 'heads'
1225
1227
1226 Every node is considered to be both a descendant and an ancestor
1228 Every node is considered to be both a descendant and an ancestor
1227 of itself, so every reachable node in 'roots' and 'heads' will be
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1228 included in 'nodes'.
1230 included in 'nodes'.
1229
1231
1230 'outroots' is the list of reachable nodes in 'roots', i.e., the
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1231 subset of 'roots' that is returned in 'nodes'. Likewise,
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1232 'outheads' is the subset of 'heads' that is also in 'nodes'.
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1233
1235
1234 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1235 unspecified, uses nullid as the only root. If 'heads' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1236 unspecified, uses list of all of the revlog's heads."""
1238 unspecified, uses list of all of the revlog's heads."""
1237 nonodes = ([], [], [])
1239 nonodes = ([], [], [])
1238 if roots is not None:
1240 if roots is not None:
1239 roots = list(roots)
1241 roots = list(roots)
1240 if not roots:
1242 if not roots:
1241 return nonodes
1243 return nonodes
1242 lowestrev = min([self.rev(n) for n in roots])
1244 lowestrev = min([self.rev(n) for n in roots])
1243 else:
1245 else:
1244 roots = [self.nullid] # Everybody's a descendant of nullid
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1245 lowestrev = nullrev
1247 lowestrev = nullrev
1246 if (lowestrev == nullrev) and (heads is None):
1248 if (lowestrev == nullrev) and (heads is None):
1247 # We want _all_ the nodes!
1249 # We want _all_ the nodes!
1248 return (
1250 return (
1249 [self.node(r) for r in self],
1251 [self.node(r) for r in self],
1250 [self.nullid],
1252 [self.nullid],
1251 list(self.heads()),
1253 list(self.heads()),
1252 )
1254 )
1253 if heads is None:
1255 if heads is None:
1254 # All nodes are ancestors, so the latest ancestor is the last
1256 # All nodes are ancestors, so the latest ancestor is the last
1255 # node.
1257 # node.
1256 highestrev = len(self) - 1
1258 highestrev = len(self) - 1
1257 # Set ancestors to None to signal that every node is an ancestor.
1259 # Set ancestors to None to signal that every node is an ancestor.
1258 ancestors = None
1260 ancestors = None
1259 # Set heads to an empty dictionary for later discovery of heads
1261 # Set heads to an empty dictionary for later discovery of heads
1260 heads = {}
1262 heads = {}
1261 else:
1263 else:
1262 heads = list(heads)
1264 heads = list(heads)
1263 if not heads:
1265 if not heads:
1264 return nonodes
1266 return nonodes
1265 ancestors = set()
1267 ancestors = set()
1266 # Turn heads into a dictionary so we can remove 'fake' heads.
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1267 # Also, later we will be using it to filter out the heads we can't
1269 # Also, later we will be using it to filter out the heads we can't
1268 # find from roots.
1270 # find from roots.
1269 heads = dict.fromkeys(heads, False)
1271 heads = dict.fromkeys(heads, False)
1270 # Start at the top and keep marking parents until we're done.
1272 # Start at the top and keep marking parents until we're done.
1271 nodestotag = set(heads)
1273 nodestotag = set(heads)
1272 # Remember where the top was so we can use it as a limit later.
1274 # Remember where the top was so we can use it as a limit later.
1273 highestrev = max([self.rev(n) for n in nodestotag])
1275 highestrev = max([self.rev(n) for n in nodestotag])
1274 while nodestotag:
1276 while nodestotag:
1275 # grab a node to tag
1277 # grab a node to tag
1276 n = nodestotag.pop()
1278 n = nodestotag.pop()
1277 # Never tag nullid
1279 # Never tag nullid
1278 if n == self.nullid:
1280 if n == self.nullid:
1279 continue
1281 continue
1280 # A node's revision number represents its place in a
1282 # A node's revision number represents its place in a
1281 # topologically sorted list of nodes.
1283 # topologically sorted list of nodes.
1282 r = self.rev(n)
1284 r = self.rev(n)
1283 if r >= lowestrev:
1285 if r >= lowestrev:
1284 if n not in ancestors:
1286 if n not in ancestors:
1285 # If we are possibly a descendant of one of the roots
1287 # If we are possibly a descendant of one of the roots
1286 # and we haven't already been marked as an ancestor
1288 # and we haven't already been marked as an ancestor
1287 ancestors.add(n) # Mark as ancestor
1289 ancestors.add(n) # Mark as ancestor
1288 # Add non-nullid parents to list of nodes to tag.
1290 # Add non-nullid parents to list of nodes to tag.
1289 nodestotag.update(
1291 nodestotag.update(
1290 [p for p in self.parents(n) if p != self.nullid]
1292 [p for p in self.parents(n) if p != self.nullid]
1291 )
1293 )
1292 elif n in heads: # We've seen it before, is it a fake head?
1294 elif n in heads: # We've seen it before, is it a fake head?
1293 # So it is, real heads should not be the ancestors of
1295 # So it is, real heads should not be the ancestors of
1294 # any other heads.
1296 # any other heads.
1295 heads.pop(n)
1297 heads.pop(n)
1296 if not ancestors:
1298 if not ancestors:
1297 return nonodes
1299 return nonodes
1298 # Now that we have our set of ancestors, we want to remove any
1300 # Now that we have our set of ancestors, we want to remove any
1299 # roots that are not ancestors.
1301 # roots that are not ancestors.
1300
1302
1301 # If one of the roots was nullid, everything is included anyway.
1303 # If one of the roots was nullid, everything is included anyway.
1302 if lowestrev > nullrev:
1304 if lowestrev > nullrev:
1303 # But, since we weren't, let's recompute the lowest rev to not
1305 # But, since we weren't, let's recompute the lowest rev to not
1304 # include roots that aren't ancestors.
1306 # include roots that aren't ancestors.
1305
1307
1306 # Filter out roots that aren't ancestors of heads
1308 # Filter out roots that aren't ancestors of heads
1307 roots = [root for root in roots if root in ancestors]
1309 roots = [root for root in roots if root in ancestors]
1308 # Recompute the lowest revision
1310 # Recompute the lowest revision
1309 if roots:
1311 if roots:
1310 lowestrev = min([self.rev(root) for root in roots])
1312 lowestrev = min([self.rev(root) for root in roots])
1311 else:
1313 else:
1312 # No more roots? Return empty list
1314 # No more roots? Return empty list
1313 return nonodes
1315 return nonodes
1314 else:
1316 else:
1315 # We are descending from nullid, and don't need to care about
1317 # We are descending from nullid, and don't need to care about
1316 # any other roots.
1318 # any other roots.
1317 lowestrev = nullrev
1319 lowestrev = nullrev
1318 roots = [self.nullid]
1320 roots = [self.nullid]
1319 # Transform our roots list into a set.
1321 # Transform our roots list into a set.
1320 descendants = set(roots)
1322 descendants = set(roots)
1321 # Also, keep the original roots so we can filter out roots that aren't
1323 # Also, keep the original roots so we can filter out roots that aren't
1322 # 'real' roots (i.e. are descended from other roots).
1324 # 'real' roots (i.e. are descended from other roots).
1323 roots = descendants.copy()
1325 roots = descendants.copy()
1324 # Our topologically sorted list of output nodes.
1326 # Our topologically sorted list of output nodes.
1325 orderedout = []
1327 orderedout = []
1326 # Don't start at nullid since we don't want nullid in our output list,
1328 # Don't start at nullid since we don't want nullid in our output list,
1327 # and if nullid shows up in descendants, empty parents will look like
1329 # and if nullid shows up in descendants, empty parents will look like
1328 # they're descendants.
1330 # they're descendants.
1329 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1330 n = self.node(r)
1332 n = self.node(r)
1331 isdescendant = False
1333 isdescendant = False
1332 if lowestrev == nullrev: # Everybody is a descendant of nullid
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1333 isdescendant = True
1335 isdescendant = True
1334 elif n in descendants:
1336 elif n in descendants:
1335 # n is already a descendant
1337 # n is already a descendant
1336 isdescendant = True
1338 isdescendant = True
1337 # This check only needs to be done here because all the roots
1339 # This check only needs to be done here because all the roots
1338 # will start being marked is descendants before the loop.
1340 # will start being marked is descendants before the loop.
1339 if n in roots:
1341 if n in roots:
1340 # If n was a root, check if it's a 'real' root.
1342 # If n was a root, check if it's a 'real' root.
1341 p = tuple(self.parents(n))
1343 p = tuple(self.parents(n))
1342 # If any of its parents are descendants, it's not a root.
1344 # If any of its parents are descendants, it's not a root.
1343 if (p[0] in descendants) or (p[1] in descendants):
1345 if (p[0] in descendants) or (p[1] in descendants):
1344 roots.remove(n)
1346 roots.remove(n)
1345 else:
1347 else:
1346 p = tuple(self.parents(n))
1348 p = tuple(self.parents(n))
1347 # A node is a descendant if either of its parents are
1349 # A node is a descendant if either of its parents are
1348 # descendants. (We seeded the dependents list with the roots
1350 # descendants. (We seeded the dependents list with the roots
1349 # up there, remember?)
1351 # up there, remember?)
1350 if (p[0] in descendants) or (p[1] in descendants):
1352 if (p[0] in descendants) or (p[1] in descendants):
1351 descendants.add(n)
1353 descendants.add(n)
1352 isdescendant = True
1354 isdescendant = True
1353 if isdescendant and ((ancestors is None) or (n in ancestors)):
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1354 # Only include nodes that are both descendants and ancestors.
1356 # Only include nodes that are both descendants and ancestors.
1355 orderedout.append(n)
1357 orderedout.append(n)
1356 if (ancestors is not None) and (n in heads):
1358 if (ancestors is not None) and (n in heads):
1357 # We're trying to figure out which heads are reachable
1359 # We're trying to figure out which heads are reachable
1358 # from roots.
1360 # from roots.
1359 # Mark this head as having been reached
1361 # Mark this head as having been reached
1360 heads[n] = True
1362 heads[n] = True
1361 elif ancestors is None:
1363 elif ancestors is None:
1362 # Otherwise, we're trying to discover the heads.
1364 # Otherwise, we're trying to discover the heads.
1363 # Assume this is a head because if it isn't, the next step
1365 # Assume this is a head because if it isn't, the next step
1364 # will eventually remove it.
1366 # will eventually remove it.
1365 heads[n] = True
1367 heads[n] = True
1366 # But, obviously its parents aren't.
1368 # But, obviously its parents aren't.
1367 for p in self.parents(n):
1369 for p in self.parents(n):
1368 heads.pop(p, None)
1370 heads.pop(p, None)
1369 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1370 roots = list(roots)
1372 roots = list(roots)
1371 assert orderedout
1373 assert orderedout
1372 assert roots
1374 assert roots
1373 assert heads
1375 assert heads
1374 return (orderedout, roots, heads)
1376 return (orderedout, roots, heads)
1375
1377
1376 def headrevs(self, revs=None):
1378 def headrevs(self, revs=None):
1377 if revs is None:
1379 if revs is None:
1378 try:
1380 try:
1379 return self.index.headrevs()
1381 return self.index.headrevs()
1380 except AttributeError:
1382 except AttributeError:
1381 return self._headrevs()
1383 return self._headrevs()
1382 if rustdagop is not None and self.index.rust_ext_compat:
1384 if rustdagop is not None and self.index.rust_ext_compat:
1383 return rustdagop.headrevs(self.index, revs)
1385 return rustdagop.headrevs(self.index, revs)
1384 return dagop.headrevs(revs, self._uncheckedparentrevs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1385
1387
1386 def computephases(self, roots):
1388 def computephases(self, roots):
1387 return self.index.computephasesmapsets(roots)
1389 return self.index.computephasesmapsets(roots)
1388
1390
1389 def _headrevs(self):
1391 def _headrevs(self):
1390 count = len(self)
1392 count = len(self)
1391 if not count:
1393 if not count:
1392 return [nullrev]
1394 return [nullrev]
1393 # we won't iter over filtered rev so nobody is a head at start
1395 # we won't iter over filtered rev so nobody is a head at start
1394 ishead = [0] * (count + 1)
1396 ishead = [0] * (count + 1)
1395 index = self.index
1397 index = self.index
1396 for r in self:
1398 for r in self:
1397 ishead[r] = 1 # I may be an head
1399 ishead[r] = 1 # I may be an head
1398 e = index[r]
1400 e = index[r]
1399 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1400 return [r for r, val in enumerate(ishead) if val]
1402 return [r for r, val in enumerate(ishead) if val]
1401
1403
1402 def heads(self, start=None, stop=None):
1404 def heads(self, start=None, stop=None):
1403 """return the list of all nodes that have no children
1405 """return the list of all nodes that have no children
1404
1406
1405 if start is specified, only heads that are descendants of
1407 if start is specified, only heads that are descendants of
1406 start will be returned
1408 start will be returned
1407 if stop is specified, it will consider all the revs from stop
1409 if stop is specified, it will consider all the revs from stop
1408 as if they had no children
1410 as if they had no children
1409 """
1411 """
1410 if start is None and stop is None:
1412 if start is None and stop is None:
1411 if not len(self):
1413 if not len(self):
1412 return [self.nullid]
1414 return [self.nullid]
1413 return [self.node(r) for r in self.headrevs()]
1415 return [self.node(r) for r in self.headrevs()]
1414
1416
1415 if start is None:
1417 if start is None:
1416 start = nullrev
1418 start = nullrev
1417 else:
1419 else:
1418 start = self.rev(start)
1420 start = self.rev(start)
1419
1421
1420 stoprevs = {self.rev(n) for n in stop or []}
1422 stoprevs = {self.rev(n) for n in stop or []}
1421
1423
1422 revs = dagop.headrevssubset(
1424 revs = dagop.headrevssubset(
1423 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1424 )
1426 )
1425
1427
1426 return [self.node(rev) for rev in revs]
1428 return [self.node(rev) for rev in revs]
1427
1429
1428 def children(self, node):
1430 def children(self, node):
1429 """find the children of a given node"""
1431 """find the children of a given node"""
1430 c = []
1432 c = []
1431 p = self.rev(node)
1433 p = self.rev(node)
1432 for r in self.revs(start=p + 1):
1434 for r in self.revs(start=p + 1):
1433 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1434 if prevs:
1436 if prevs:
1435 for pr in prevs:
1437 for pr in prevs:
1436 if pr == p:
1438 if pr == p:
1437 c.append(self.node(r))
1439 c.append(self.node(r))
1438 elif p == nullrev:
1440 elif p == nullrev:
1439 c.append(self.node(r))
1441 c.append(self.node(r))
1440 return c
1442 return c
1441
1443
1442 def commonancestorsheads(self, a, b):
1444 def commonancestorsheads(self, a, b):
1443 """calculate all the heads of the common ancestors of nodes a and b"""
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1444 a, b = self.rev(a), self.rev(b)
1446 a, b = self.rev(a), self.rev(b)
1445 ancs = self._commonancestorsheads(a, b)
1447 ancs = self._commonancestorsheads(a, b)
1446 return pycompat.maplist(self.node, ancs)
1448 return pycompat.maplist(self.node, ancs)
1447
1449
1448 def _commonancestorsheads(self, *revs):
1450 def _commonancestorsheads(self, *revs):
1449 """calculate all the heads of the common ancestors of revs"""
1451 """calculate all the heads of the common ancestors of revs"""
1450 try:
1452 try:
1451 ancs = self.index.commonancestorsheads(*revs)
1453 ancs = self.index.commonancestorsheads(*revs)
1452 except (AttributeError, OverflowError): # C implementation failed
1454 except (AttributeError, OverflowError): # C implementation failed
1453 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1454 return ancs
1456 return ancs
1455
1457
1456 def isancestor(self, a, b):
1458 def isancestor(self, a, b):
1457 """return True if node a is an ancestor of node b
1459 """return True if node a is an ancestor of node b
1458
1460
1459 A revision is considered an ancestor of itself."""
1461 A revision is considered an ancestor of itself."""
1460 a, b = self.rev(a), self.rev(b)
1462 a, b = self.rev(a), self.rev(b)
1461 return self.isancestorrev(a, b)
1463 return self.isancestorrev(a, b)
1462
1464
1463 def isancestorrev(self, a, b):
1465 def isancestorrev(self, a, b):
1464 """return True if revision a is an ancestor of revision b
1466 """return True if revision a is an ancestor of revision b
1465
1467
1466 A revision is considered an ancestor of itself.
1468 A revision is considered an ancestor of itself.
1467
1469
1468 The implementation of this is trivial but the use of
1470 The implementation of this is trivial but the use of
1469 reachableroots is not."""
1471 reachableroots is not."""
1470 if a == nullrev:
1472 if a == nullrev:
1471 return True
1473 return True
1472 elif a == b:
1474 elif a == b:
1473 return True
1475 return True
1474 elif a > b:
1476 elif a > b:
1475 return False
1477 return False
1476 return bool(self.reachableroots(a, [b], [a], includepath=False))
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1477
1479
1478 def reachableroots(self, minroot, heads, roots, includepath=False):
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1479 """return (heads(::(<roots> and <roots>::<heads>)))
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1480
1482
1481 If includepath is True, return (<roots>::<heads>)."""
1483 If includepath is True, return (<roots>::<heads>)."""
1482 try:
1484 try:
1483 return self.index.reachableroots2(
1485 return self.index.reachableroots2(
1484 minroot, heads, roots, includepath
1486 minroot, heads, roots, includepath
1485 )
1487 )
1486 except AttributeError:
1488 except AttributeError:
1487 return dagop._reachablerootspure(
1489 return dagop._reachablerootspure(
1488 self.parentrevs, minroot, roots, heads, includepath
1490 self.parentrevs, minroot, roots, heads, includepath
1489 )
1491 )
1490
1492
1491 def ancestor(self, a, b):
1493 def ancestor(self, a, b):
1492 """calculate the "best" common ancestor of nodes a and b"""
1494 """calculate the "best" common ancestor of nodes a and b"""
1493
1495
1494 a, b = self.rev(a), self.rev(b)
1496 a, b = self.rev(a), self.rev(b)
1495 try:
1497 try:
1496 ancs = self.index.ancestors(a, b)
1498 ancs = self.index.ancestors(a, b)
1497 except (AttributeError, OverflowError):
1499 except (AttributeError, OverflowError):
1498 ancs = ancestor.ancestors(self.parentrevs, a, b)
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1499 if ancs:
1501 if ancs:
1500 # choose a consistent winner when there's a tie
1502 # choose a consistent winner when there's a tie
1501 return min(map(self.node, ancs))
1503 return min(map(self.node, ancs))
1502 return self.nullid
1504 return self.nullid
1503
1505
1504 def _match(self, id):
1506 def _match(self, id):
1505 if isinstance(id, int):
1507 if isinstance(id, int):
1506 # rev
1508 # rev
1507 return self.node(id)
1509 return self.node(id)
1508 if len(id) == self.nodeconstants.nodelen:
1510 if len(id) == self.nodeconstants.nodelen:
1509 # possibly a binary node
1511 # possibly a binary node
1510 # odds of a binary node being all hex in ASCII are 1 in 10**25
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1511 try:
1513 try:
1512 node = id
1514 node = id
1513 self.rev(node) # quick search the index
1515 self.rev(node) # quick search the index
1514 return node
1516 return node
1515 except error.LookupError:
1517 except error.LookupError:
1516 pass # may be partial hex id
1518 pass # may be partial hex id
1517 try:
1519 try:
1518 # str(rev)
1520 # str(rev)
1519 rev = int(id)
1521 rev = int(id)
1520 if b"%d" % rev != id:
1522 if b"%d" % rev != id:
1521 raise ValueError
1523 raise ValueError
1522 if rev < 0:
1524 if rev < 0:
1523 rev = len(self) + rev
1525 rev = len(self) + rev
1524 if rev < 0 or rev >= len(self):
1526 if rev < 0 or rev >= len(self):
1525 raise ValueError
1527 raise ValueError
1526 return self.node(rev)
1528 return self.node(rev)
1527 except (ValueError, OverflowError):
1529 except (ValueError, OverflowError):
1528 pass
1530 pass
1529 if len(id) == 2 * self.nodeconstants.nodelen:
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1530 try:
1532 try:
1531 # a full hex nodeid?
1533 # a full hex nodeid?
1532 node = bin(id)
1534 node = bin(id)
1533 self.rev(node)
1535 self.rev(node)
1534 return node
1536 return node
1535 except (TypeError, error.LookupError):
1537 except (TypeError, error.LookupError):
1536 pass
1538 pass
1537
1539
1538 def _partialmatch(self, id):
1540 def _partialmatch(self, id):
1539 # we don't care wdirfilenodeids as they should be always full hash
1541 # we don't care wdirfilenodeids as they should be always full hash
1540 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1541 ambiguous = False
1543 ambiguous = False
1542 try:
1544 try:
1543 partial = self.index.partialmatch(id)
1545 partial = self.index.partialmatch(id)
1544 if partial and self.hasnode(partial):
1546 if partial and self.hasnode(partial):
1545 if maybewdir:
1547 if maybewdir:
1546 # single 'ff...' match in radix tree, ambiguous with wdir
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1547 ambiguous = True
1549 ambiguous = True
1548 else:
1550 else:
1549 return partial
1551 return partial
1550 elif maybewdir:
1552 elif maybewdir:
1551 # no 'ff...' match in radix tree, wdir identified
1553 # no 'ff...' match in radix tree, wdir identified
1552 raise error.WdirUnsupported
1554 raise error.WdirUnsupported
1553 else:
1555 else:
1554 return None
1556 return None
1555 except error.RevlogError:
1557 except error.RevlogError:
1556 # parsers.c radix tree lookup gave multiple matches
1558 # parsers.c radix tree lookup gave multiple matches
1557 # fast path: for unfiltered changelog, radix tree is accurate
1559 # fast path: for unfiltered changelog, radix tree is accurate
1558 if not getattr(self, 'filteredrevs', None):
1560 if not getattr(self, 'filteredrevs', None):
1559 ambiguous = True
1561 ambiguous = True
1560 # fall through to slow path that filters hidden revisions
1562 # fall through to slow path that filters hidden revisions
1561 except (AttributeError, ValueError):
1563 except (AttributeError, ValueError):
1562 # we are pure python, or key was too short to search radix tree
1564 # we are pure python, or key was too short to search radix tree
1563 pass
1565 pass
1564 if ambiguous:
1566 if ambiguous:
1565 raise error.AmbiguousPrefixLookupError(
1567 raise error.AmbiguousPrefixLookupError(
1566 id, self.display_id, _(b'ambiguous identifier')
1568 id, self.display_id, _(b'ambiguous identifier')
1567 )
1569 )
1568
1570
1569 if id in self._pcache:
1571 if id in self._pcache:
1570 return self._pcache[id]
1572 return self._pcache[id]
1571
1573
1572 if len(id) <= 40:
1574 if len(id) <= 40:
1573 try:
1575 try:
1574 # hex(node)[:...]
1576 # hex(node)[:...]
1575 l = len(id) // 2 # grab an even number of digits
1577 l = len(id) // 2 # grab an even number of digits
1576 prefix = bin(id[: l * 2])
1578 prefix = bin(id[: l * 2])
1577 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1578 nl = [
1580 nl = [
1579 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1580 ]
1582 ]
1581 if self.nodeconstants.nullhex.startswith(id):
1583 if self.nodeconstants.nullhex.startswith(id):
1582 nl.append(self.nullid)
1584 nl.append(self.nullid)
1583 if len(nl) > 0:
1585 if len(nl) > 0:
1584 if len(nl) == 1 and not maybewdir:
1586 if len(nl) == 1 and not maybewdir:
1585 self._pcache[id] = nl[0]
1587 self._pcache[id] = nl[0]
1586 return nl[0]
1588 return nl[0]
1587 raise error.AmbiguousPrefixLookupError(
1589 raise error.AmbiguousPrefixLookupError(
1588 id, self.display_id, _(b'ambiguous identifier')
1590 id, self.display_id, _(b'ambiguous identifier')
1589 )
1591 )
1590 if maybewdir:
1592 if maybewdir:
1591 raise error.WdirUnsupported
1593 raise error.WdirUnsupported
1592 return None
1594 return None
1593 except TypeError:
1595 except TypeError:
1594 pass
1596 pass
1595
1597
1596 def lookup(self, id):
1598 def lookup(self, id):
1597 """locate a node based on:
1599 """locate a node based on:
1598 - revision number or str(revision number)
1600 - revision number or str(revision number)
1599 - nodeid or subset of hex nodeid
1601 - nodeid or subset of hex nodeid
1600 """
1602 """
1601 n = self._match(id)
1603 n = self._match(id)
1602 if n is not None:
1604 if n is not None:
1603 return n
1605 return n
1604 n = self._partialmatch(id)
1606 n = self._partialmatch(id)
1605 if n:
1607 if n:
1606 return n
1608 return n
1607
1609
1608 raise error.LookupError(id, self.display_id, _(b'no match found'))
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1609
1611
1610 def shortest(self, node, minlength=1):
1612 def shortest(self, node, minlength=1):
1611 """Find the shortest unambiguous prefix that matches node."""
1613 """Find the shortest unambiguous prefix that matches node."""
1612
1614
1613 def isvalid(prefix):
1615 def isvalid(prefix):
1614 try:
1616 try:
1615 matchednode = self._partialmatch(prefix)
1617 matchednode = self._partialmatch(prefix)
1616 except error.AmbiguousPrefixLookupError:
1618 except error.AmbiguousPrefixLookupError:
1617 return False
1619 return False
1618 except error.WdirUnsupported:
1620 except error.WdirUnsupported:
1619 # single 'ff...' match
1621 # single 'ff...' match
1620 return True
1622 return True
1621 if matchednode is None:
1623 if matchednode is None:
1622 raise error.LookupError(node, self.display_id, _(b'no node'))
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1623 return True
1625 return True
1624
1626
1625 def maybewdir(prefix):
1627 def maybewdir(prefix):
1626 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1627
1629
1628 hexnode = hex(node)
1630 hexnode = hex(node)
1629
1631
1630 def disambiguate(hexnode, minlength):
1632 def disambiguate(hexnode, minlength):
1631 """Disambiguate against wdirid."""
1633 """Disambiguate against wdirid."""
1632 for length in range(minlength, len(hexnode) + 1):
1634 for length in range(minlength, len(hexnode) + 1):
1633 prefix = hexnode[:length]
1635 prefix = hexnode[:length]
1634 if not maybewdir(prefix):
1636 if not maybewdir(prefix):
1635 return prefix
1637 return prefix
1636
1638
1637 if not getattr(self, 'filteredrevs', None):
1639 if not getattr(self, 'filteredrevs', None):
1638 try:
1640 try:
1639 length = max(self.index.shortest(node), minlength)
1641 length = max(self.index.shortest(node), minlength)
1640 return disambiguate(hexnode, length)
1642 return disambiguate(hexnode, length)
1641 except error.RevlogError:
1643 except error.RevlogError:
1642 if node != self.nodeconstants.wdirid:
1644 if node != self.nodeconstants.wdirid:
1643 raise error.LookupError(
1645 raise error.LookupError(
1644 node, self.display_id, _(b'no node')
1646 node, self.display_id, _(b'no node')
1645 )
1647 )
1646 except AttributeError:
1648 except AttributeError:
1647 # Fall through to pure code
1649 # Fall through to pure code
1648 pass
1650 pass
1649
1651
1650 if node == self.nodeconstants.wdirid:
1652 if node == self.nodeconstants.wdirid:
1651 for length in range(minlength, len(hexnode) + 1):
1653 for length in range(minlength, len(hexnode) + 1):
1652 prefix = hexnode[:length]
1654 prefix = hexnode[:length]
1653 if isvalid(prefix):
1655 if isvalid(prefix):
1654 return prefix
1656 return prefix
1655
1657
1656 for length in range(minlength, len(hexnode) + 1):
1658 for length in range(minlength, len(hexnode) + 1):
1657 prefix = hexnode[:length]
1659 prefix = hexnode[:length]
1658 if isvalid(prefix):
1660 if isvalid(prefix):
1659 return disambiguate(hexnode, length)
1661 return disambiguate(hexnode, length)
1660
1662
1661 def cmp(self, node, text):
1663 def cmp(self, node, text):
1662 """compare text with a given file revision
1664 """compare text with a given file revision
1663
1665
1664 returns True if text is different than what is stored.
1666 returns True if text is different than what is stored.
1665 """
1667 """
1666 p1, p2 = self.parents(node)
1668 p1, p2 = self.parents(node)
1667 return storageutil.hashrevisionsha1(text, p1, p2) != node
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1668
1670
1669 def _cachesegment(self, offset, data):
1671 def _cachesegment(self, offset, data):
1670 """Add a segment to the revlog cache.
1672 """Add a segment to the revlog cache.
1671
1673
1672 Accepts an absolute offset and the data that is at that location.
1674 Accepts an absolute offset and the data that is at that location.
1673 """
1675 """
1674 o, d = self._chunkcache
1676 o, d = self._chunkcache
1675 # try to add to existing cache
1677 # try to add to existing cache
1676 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1677 self._chunkcache = o, d + data
1679 self._chunkcache = o, d + data
1678 else:
1680 else:
1679 self._chunkcache = offset, data
1681 self._chunkcache = offset, data
1680
1682
1681 def _readsegment(self, offset, length, df=None):
1683 def _readsegment(self, offset, length, df=None):
1682 """Load a segment of raw data from the revlog.
1684 """Load a segment of raw data from the revlog.
1683
1685
1684 Accepts an absolute offset, length to read, and an optional existing
1686 Accepts an absolute offset, length to read, and an optional existing
1685 file handle to read from.
1687 file handle to read from.
1686
1688
1687 If an existing file handle is passed, it will be seeked and the
1689 If an existing file handle is passed, it will be seeked and the
1688 original seek position will NOT be restored.
1690 original seek position will NOT be restored.
1689
1691
1690 Returns a str or buffer of raw byte data.
1692 Returns a str or buffer of raw byte data.
1691
1693
1692 Raises if the requested number of bytes could not be read.
1694 Raises if the requested number of bytes could not be read.
1693 """
1695 """
1694 # Cache data both forward and backward around the requested
1696 # Cache data both forward and backward around the requested
1695 # data, in a fixed size window. This helps speed up operations
1697 # data, in a fixed size window. This helps speed up operations
1696 # involving reading the revlog backwards.
1698 # involving reading the revlog backwards.
1697 cachesize = self._chunkcachesize
1699 cachesize = self._chunkcachesize
1698 realoffset = offset & ~(cachesize - 1)
1700 realoffset = offset & ~(cachesize - 1)
1699 reallength = (
1701 reallength = (
1700 (offset + length + cachesize) & ~(cachesize - 1)
1702 (offset + length + cachesize) & ~(cachesize - 1)
1701 ) - realoffset
1703 ) - realoffset
1702 with self._datareadfp(df) as df:
1704 with self._datareadfp(df) as df:
1703 df.seek(realoffset)
1705 df.seek(realoffset)
1704 d = df.read(reallength)
1706 d = df.read(reallength)
1705
1707
1706 self._cachesegment(realoffset, d)
1708 self._cachesegment(realoffset, d)
1707 if offset != realoffset or reallength != length:
1709 if offset != realoffset or reallength != length:
1708 startoffset = offset - realoffset
1710 startoffset = offset - realoffset
1709 if len(d) - startoffset < length:
1711 if len(d) - startoffset < length:
1710 raise error.RevlogError(
1712 raise error.RevlogError(
1711 _(
1713 _(
1712 b'partial read of revlog %s; expected %d bytes from '
1714 b'partial read of revlog %s; expected %d bytes from '
1713 b'offset %d, got %d'
1715 b'offset %d, got %d'
1714 )
1716 )
1715 % (
1717 % (
1716 self._indexfile if self._inline else self._datafile,
1718 self._indexfile if self._inline else self._datafile,
1717 length,
1719 length,
1718 offset,
1720 offset,
1719 len(d) - startoffset,
1721 len(d) - startoffset,
1720 )
1722 )
1721 )
1723 )
1722
1724
1723 return util.buffer(d, startoffset, length)
1725 return util.buffer(d, startoffset, length)
1724
1726
1725 if len(d) < length:
1727 if len(d) < length:
1726 raise error.RevlogError(
1728 raise error.RevlogError(
1727 _(
1729 _(
1728 b'partial read of revlog %s; expected %d bytes from offset '
1730 b'partial read of revlog %s; expected %d bytes from offset '
1729 b'%d, got %d'
1731 b'%d, got %d'
1730 )
1732 )
1731 % (
1733 % (
1732 self._indexfile if self._inline else self._datafile,
1734 self._indexfile if self._inline else self._datafile,
1733 length,
1735 length,
1734 offset,
1736 offset,
1735 len(d),
1737 len(d),
1736 )
1738 )
1737 )
1739 )
1738
1740
1739 return d
1741 return d
1740
1742
1741 def _getsegment(self, offset, length, df=None):
1743 def _getsegment(self, offset, length, df=None):
1742 """Obtain a segment of raw data from the revlog.
1744 """Obtain a segment of raw data from the revlog.
1743
1745
1744 Accepts an absolute offset, length of bytes to obtain, and an
1746 Accepts an absolute offset, length of bytes to obtain, and an
1745 optional file handle to the already-opened revlog. If the file
1747 optional file handle to the already-opened revlog. If the file
1746 handle is used, it's original seek position will not be preserved.
1748 handle is used, it's original seek position will not be preserved.
1747
1749
1748 Requests for data may be returned from a cache.
1750 Requests for data may be returned from a cache.
1749
1751
1750 Returns a str or a buffer instance of raw byte data.
1752 Returns a str or a buffer instance of raw byte data.
1751 """
1753 """
1752 o, d = self._chunkcache
1754 o, d = self._chunkcache
1753 l = len(d)
1755 l = len(d)
1754
1756
1755 # is it in the cache?
1757 # is it in the cache?
1756 cachestart = offset - o
1758 cachestart = offset - o
1757 cacheend = cachestart + length
1759 cacheend = cachestart + length
1758 if cachestart >= 0 and cacheend <= l:
1760 if cachestart >= 0 and cacheend <= l:
1759 if cachestart == 0 and cacheend == l:
1761 if cachestart == 0 and cacheend == l:
1760 return d # avoid a copy
1762 return d # avoid a copy
1761 return util.buffer(d, cachestart, cacheend - cachestart)
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1762
1764
1763 return self._readsegment(offset, length, df=df)
1765 return self._readsegment(offset, length, df=df)
1764
1766
1765 def _getsegmentforrevs(self, startrev, endrev, df=None):
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1766 """Obtain a segment of raw data corresponding to a range of revisions.
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1767
1769
1768 Accepts the start and end revisions and an optional already-open
1770 Accepts the start and end revisions and an optional already-open
1769 file handle to be used for reading. If the file handle is read, its
1771 file handle to be used for reading. If the file handle is read, its
1770 seek position will not be preserved.
1772 seek position will not be preserved.
1771
1773
1772 Requests for data may be satisfied by a cache.
1774 Requests for data may be satisfied by a cache.
1773
1775
1774 Returns a 2-tuple of (offset, data) for the requested range of
1776 Returns a 2-tuple of (offset, data) for the requested range of
1775 revisions. Offset is the integer offset from the beginning of the
1777 revisions. Offset is the integer offset from the beginning of the
1776 revlog and data is a str or buffer of the raw byte data.
1778 revlog and data is a str or buffer of the raw byte data.
1777
1779
1778 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1779 to determine where each revision's data begins and ends.
1781 to determine where each revision's data begins and ends.
1780 """
1782 """
1781 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1782 # (functions are expensive).
1784 # (functions are expensive).
1783 index = self.index
1785 index = self.index
1784 istart = index[startrev]
1786 istart = index[startrev]
1785 start = int(istart[0] >> 16)
1787 start = int(istart[0] >> 16)
1786 if startrev == endrev:
1788 if startrev == endrev:
1787 end = start + istart[1]
1789 end = start + istart[1]
1788 else:
1790 else:
1789 iend = index[endrev]
1791 iend = index[endrev]
1790 end = int(iend[0] >> 16) + iend[1]
1792 end = int(iend[0] >> 16) + iend[1]
1791
1793
1792 if self._inline:
1794 if self._inline:
1793 start += (startrev + 1) * self.index.entry_size
1795 start += (startrev + 1) * self.index.entry_size
1794 end += (endrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1795 length = end - start
1797 length = end - start
1796
1798
1797 return start, self._getsegment(start, length, df=df)
1799 return start, self._getsegment(start, length, df=df)
1798
1800
1799 def _chunk(self, rev, df=None):
1801 def _chunk(self, rev, df=None):
1800 """Obtain a single decompressed chunk for a revision.
1802 """Obtain a single decompressed chunk for a revision.
1801
1803
1802 Accepts an integer revision and an optional already-open file handle
1804 Accepts an integer revision and an optional already-open file handle
1803 to be used for reading. If used, the seek position of the file will not
1805 to be used for reading. If used, the seek position of the file will not
1804 be preserved.
1806 be preserved.
1805
1807
1806 Returns a str holding uncompressed data for the requested revision.
1808 Returns a str holding uncompressed data for the requested revision.
1807 """
1809 """
1808 compression_mode = self.index[rev][10]
1810 compression_mode = self.index[rev][10]
1809 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1810 if compression_mode == COMP_MODE_PLAIN:
1812 if compression_mode == COMP_MODE_PLAIN:
1811 return data
1813 return data
1812 elif compression_mode == COMP_MODE_DEFAULT:
1814 elif compression_mode == COMP_MODE_DEFAULT:
1813 return self._decompressor(data)
1815 return self._decompressor(data)
1814 elif compression_mode == COMP_MODE_INLINE:
1816 elif compression_mode == COMP_MODE_INLINE:
1815 return self.decompress(data)
1817 return self.decompress(data)
1816 else:
1818 else:
1817 msg = 'unknown compression mode %d'
1819 msg = 'unknown compression mode %d'
1818 msg %= compression_mode
1820 msg %= compression_mode
1819 raise error.RevlogError(msg)
1821 raise error.RevlogError(msg)
1820
1822
1821 def _chunks(self, revs, df=None, targetsize=None):
1823 def _chunks(self, revs, df=None, targetsize=None):
1822 """Obtain decompressed chunks for the specified revisions.
1824 """Obtain decompressed chunks for the specified revisions.
1823
1825
1824 Accepts an iterable of numeric revisions that are assumed to be in
1826 Accepts an iterable of numeric revisions that are assumed to be in
1825 ascending order. Also accepts an optional already-open file handle
1827 ascending order. Also accepts an optional already-open file handle
1826 to be used for reading. If used, the seek position of the file will
1828 to be used for reading. If used, the seek position of the file will
1827 not be preserved.
1829 not be preserved.
1828
1830
1829 This function is similar to calling ``self._chunk()`` multiple times,
1831 This function is similar to calling ``self._chunk()`` multiple times,
1830 but is faster.
1832 but is faster.
1831
1833
1832 Returns a list with decompressed data for each requested revision.
1834 Returns a list with decompressed data for each requested revision.
1833 """
1835 """
1834 if not revs:
1836 if not revs:
1835 return []
1837 return []
1836 start = self.start
1838 start = self.start
1837 length = self.length
1839 length = self.length
1838 inline = self._inline
1840 inline = self._inline
1839 iosize = self.index.entry_size
1841 iosize = self.index.entry_size
1840 buffer = util.buffer
1842 buffer = util.buffer
1841
1843
1842 l = []
1844 l = []
1843 ladd = l.append
1845 ladd = l.append
1844
1846
1845 if not self._withsparseread:
1847 if not self._withsparseread:
1846 slicedchunks = (revs,)
1848 slicedchunks = (revs,)
1847 else:
1849 else:
1848 slicedchunks = deltautil.slicechunk(
1850 slicedchunks = deltautil.slicechunk(
1849 self, revs, targetsize=targetsize
1851 self, revs, targetsize=targetsize
1850 )
1852 )
1851
1853
1852 for revschunk in slicedchunks:
1854 for revschunk in slicedchunks:
1853 firstrev = revschunk[0]
1855 firstrev = revschunk[0]
1854 # Skip trailing revisions with empty diff
1856 # Skip trailing revisions with empty diff
1855 for lastrev in revschunk[::-1]:
1857 for lastrev in revschunk[::-1]:
1856 if length(lastrev) != 0:
1858 if length(lastrev) != 0:
1857 break
1859 break
1858
1860
1859 try:
1861 try:
1860 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1861 except OverflowError:
1863 except OverflowError:
1862 # issue4215 - we can't cache a run of chunks greater than
1864 # issue4215 - we can't cache a run of chunks greater than
1863 # 2G on Windows
1865 # 2G on Windows
1864 return [self._chunk(rev, df=df) for rev in revschunk]
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1865
1867
1866 decomp = self.decompress
1868 decomp = self.decompress
1867 # self._decompressor might be None, but will not be used in that case
1869 # self._decompressor might be None, but will not be used in that case
1868 def_decomp = self._decompressor
1870 def_decomp = self._decompressor
1869 for rev in revschunk:
1871 for rev in revschunk:
1870 chunkstart = start(rev)
1872 chunkstart = start(rev)
1871 if inline:
1873 if inline:
1872 chunkstart += (rev + 1) * iosize
1874 chunkstart += (rev + 1) * iosize
1873 chunklength = length(rev)
1875 chunklength = length(rev)
1874 comp_mode = self.index[rev][10]
1876 comp_mode = self.index[rev][10]
1875 c = buffer(data, chunkstart - offset, chunklength)
1877 c = buffer(data, chunkstart - offset, chunklength)
1876 if comp_mode == COMP_MODE_PLAIN:
1878 if comp_mode == COMP_MODE_PLAIN:
1877 ladd(c)
1879 ladd(c)
1878 elif comp_mode == COMP_MODE_INLINE:
1880 elif comp_mode == COMP_MODE_INLINE:
1879 ladd(decomp(c))
1881 ladd(decomp(c))
1880 elif comp_mode == COMP_MODE_DEFAULT:
1882 elif comp_mode == COMP_MODE_DEFAULT:
1881 ladd(def_decomp(c))
1883 ladd(def_decomp(c))
1882 else:
1884 else:
1883 msg = 'unknown compression mode %d'
1885 msg = 'unknown compression mode %d'
1884 msg %= comp_mode
1886 msg %= comp_mode
1885 raise error.RevlogError(msg)
1887 raise error.RevlogError(msg)
1886
1888
1887 return l
1889 return l
1888
1890
1889 def _chunkclear(self):
1891 def _chunkclear(self):
1890 """Clear the raw chunk cache."""
1892 """Clear the raw chunk cache."""
1891 self._chunkcache = (0, b'')
1893 self._chunkcache = (0, b'')
1892
1894
1893 def deltaparent(self, rev):
1895 def deltaparent(self, rev):
1894 """return deltaparent of the given revision"""
1896 """return deltaparent of the given revision"""
1895 base = self.index[rev][3]
1897 base = self.index[rev][3]
1896 if base == rev:
1898 if base == rev:
1897 return nullrev
1899 return nullrev
1898 elif self._generaldelta:
1900 elif self._generaldelta:
1899 return base
1901 return base
1900 else:
1902 else:
1901 return rev - 1
1903 return rev - 1
1902
1904
1903 def issnapshot(self, rev):
1905 def issnapshot(self, rev):
1904 """tells whether rev is a snapshot"""
1906 """tells whether rev is a snapshot"""
1905 if not self._sparserevlog:
1907 if not self._sparserevlog:
1906 return self.deltaparent(rev) == nullrev
1908 return self.deltaparent(rev) == nullrev
1907 elif util.safehasattr(self.index, b'issnapshot'):
1909 elif util.safehasattr(self.index, b'issnapshot'):
1908 # directly assign the method to cache the testing and access
1910 # directly assign the method to cache the testing and access
1909 self.issnapshot = self.index.issnapshot
1911 self.issnapshot = self.index.issnapshot
1910 return self.issnapshot(rev)
1912 return self.issnapshot(rev)
1911 if rev == nullrev:
1913 if rev == nullrev:
1912 return True
1914 return True
1913 entry = self.index[rev]
1915 entry = self.index[rev]
1914 base = entry[3]
1916 base = entry[3]
1915 if base == rev:
1917 if base == rev:
1916 return True
1918 return True
1917 if base == nullrev:
1919 if base == nullrev:
1918 return True
1920 return True
1919 p1 = entry[5]
1921 p1 = entry[5]
1920 p2 = entry[6]
1922 p2 = entry[6]
1921 if base == p1 or base == p2:
1923 if base == p1 or base == p2:
1922 return False
1924 return False
1923 return self.issnapshot(base)
1925 return self.issnapshot(base)
1924
1926
1925 def snapshotdepth(self, rev):
1927 def snapshotdepth(self, rev):
1926 """number of snapshot in the chain before this one"""
1928 """number of snapshot in the chain before this one"""
1927 if not self.issnapshot(rev):
1929 if not self.issnapshot(rev):
1928 raise error.ProgrammingError(b'revision %d not a snapshot')
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1929 return len(self._deltachain(rev)[0]) - 1
1931 return len(self._deltachain(rev)[0]) - 1
1930
1932
1931 def revdiff(self, rev1, rev2):
1933 def revdiff(self, rev1, rev2):
1932 """return or calculate a delta between two revisions
1934 """return or calculate a delta between two revisions
1933
1935
1934 The delta calculated is in binary form and is intended to be written to
1936 The delta calculated is in binary form and is intended to be written to
1935 revlog data directly. So this function needs raw revision data.
1937 revlog data directly. So this function needs raw revision data.
1936 """
1938 """
1937 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1938 return bytes(self._chunk(rev2))
1940 return bytes(self._chunk(rev2))
1939
1941
1940 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1941
1943
1942 def _processflags(self, text, flags, operation, raw=False):
1944 def _processflags(self, text, flags, operation, raw=False):
1943 """deprecated entry point to access flag processors"""
1945 """deprecated entry point to access flag processors"""
1944 msg = b'_processflag(...) use the specialized variant'
1946 msg = b'_processflag(...) use the specialized variant'
1945 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1946 if raw:
1948 if raw:
1947 return text, flagutil.processflagsraw(self, text, flags)
1949 return text, flagutil.processflagsraw(self, text, flags)
1948 elif operation == b'read':
1950 elif operation == b'read':
1949 return flagutil.processflagsread(self, text, flags)
1951 return flagutil.processflagsread(self, text, flags)
1950 else: # write operation
1952 else: # write operation
1951 return flagutil.processflagswrite(self, text, flags)
1953 return flagutil.processflagswrite(self, text, flags)
1952
1954
1953 def revision(self, nodeorrev, _df=None, raw=False):
1955 def revision(self, nodeorrev, _df=None, raw=False):
1954 """return an uncompressed revision of a given node or revision
1956 """return an uncompressed revision of a given node or revision
1955 number.
1957 number.
1956
1958
1957 _df - an existing file handle to read from. (internal-only)
1959 _df - an existing file handle to read from. (internal-only)
1958 raw - an optional argument specifying if the revision data is to be
1960 raw - an optional argument specifying if the revision data is to be
1959 treated as raw data when applying flag transforms. 'raw' should be set
1961 treated as raw data when applying flag transforms. 'raw' should be set
1960 to True when generating changegroups or in debug commands.
1962 to True when generating changegroups or in debug commands.
1961 """
1963 """
1962 if raw:
1964 if raw:
1963 msg = (
1965 msg = (
1964 b'revlog.revision(..., raw=True) is deprecated, '
1966 b'revlog.revision(..., raw=True) is deprecated, '
1965 b'use revlog.rawdata(...)'
1967 b'use revlog.rawdata(...)'
1966 )
1968 )
1967 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1968 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1969
1971
1970 def sidedata(self, nodeorrev, _df=None):
1972 def sidedata(self, nodeorrev, _df=None):
1971 """a map of extra data related to the changeset but not part of the hash
1973 """a map of extra data related to the changeset but not part of the hash
1972
1974
1973 This function currently return a dictionary. However, more advanced
1975 This function currently return a dictionary. However, more advanced
1974 mapping object will likely be used in the future for a more
1976 mapping object will likely be used in the future for a more
1975 efficient/lazy code.
1977 efficient/lazy code.
1976 """
1978 """
1977 return self._revisiondata(nodeorrev, _df)[1]
1979 return self._revisiondata(nodeorrev, _df)[1]
1978
1980
1979 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1981 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1980 # deal with <nodeorrev> argument type
1982 # deal with <nodeorrev> argument type
1981 if isinstance(nodeorrev, int):
1983 if isinstance(nodeorrev, int):
1982 rev = nodeorrev
1984 rev = nodeorrev
1983 node = self.node(rev)
1985 node = self.node(rev)
1984 else:
1986 else:
1985 node = nodeorrev
1987 node = nodeorrev
1986 rev = None
1988 rev = None
1987
1989
1988 # fast path the special `nullid` rev
1990 # fast path the special `nullid` rev
1989 if node == self.nullid:
1991 if node == self.nullid:
1990 return b"", {}
1992 return b"", {}
1991
1993
1992 # ``rawtext`` is the text as stored inside the revlog. Might be the
1994 # ``rawtext`` is the text as stored inside the revlog. Might be the
1993 # revision or might need to be processed to retrieve the revision.
1995 # revision or might need to be processed to retrieve the revision.
1994 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1996 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1995
1997
1996 if self.hassidedata:
1998 if self.hassidedata:
1997 if rev is None:
1999 if rev is None:
1998 rev = self.rev(node)
2000 rev = self.rev(node)
1999 sidedata = self._sidedata(rev)
2001 sidedata = self._sidedata(rev)
2000 else:
2002 else:
2001 sidedata = {}
2003 sidedata = {}
2002
2004
2003 if raw and validated:
2005 if raw and validated:
2004 # if we don't want to process the raw text and that raw
2006 # if we don't want to process the raw text and that raw
2005 # text is cached, we can exit early.
2007 # text is cached, we can exit early.
2006 return rawtext, sidedata
2008 return rawtext, sidedata
2007 if rev is None:
2009 if rev is None:
2008 rev = self.rev(node)
2010 rev = self.rev(node)
2009 # the revlog's flag for this revision
2011 # the revlog's flag for this revision
2010 # (usually alter its state or content)
2012 # (usually alter its state or content)
2011 flags = self.flags(rev)
2013 flags = self.flags(rev)
2012
2014
2013 if validated and flags == REVIDX_DEFAULT_FLAGS:
2015 if validated and flags == REVIDX_DEFAULT_FLAGS:
2014 # no extra flags set, no flag processor runs, text = rawtext
2016 # no extra flags set, no flag processor runs, text = rawtext
2015 return rawtext, sidedata
2017 return rawtext, sidedata
2016
2018
2017 if raw:
2019 if raw:
2018 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2020 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2019 text = rawtext
2021 text = rawtext
2020 else:
2022 else:
2021 r = flagutil.processflagsread(self, rawtext, flags)
2023 r = flagutil.processflagsread(self, rawtext, flags)
2022 text, validatehash = r
2024 text, validatehash = r
2023 if validatehash:
2025 if validatehash:
2024 self.checkhash(text, node, rev=rev)
2026 self.checkhash(text, node, rev=rev)
2025 if not validated:
2027 if not validated:
2026 self._revisioncache = (node, rev, rawtext)
2028 self._revisioncache = (node, rev, rawtext)
2027
2029
2028 return text, sidedata
2030 return text, sidedata
2029
2031
2030 def _rawtext(self, node, rev, _df=None):
2032 def _rawtext(self, node, rev, _df=None):
2031 """return the possibly unvalidated rawtext for a revision
2033 """return the possibly unvalidated rawtext for a revision
2032
2034
2033 returns (rev, rawtext, validated)
2035 returns (rev, rawtext, validated)
2034 """
2036 """
2035
2037
2036 # revision in the cache (could be useful to apply delta)
2038 # revision in the cache (could be useful to apply delta)
2037 cachedrev = None
2039 cachedrev = None
2038 # An intermediate text to apply deltas to
2040 # An intermediate text to apply deltas to
2039 basetext = None
2041 basetext = None
2040
2042
2041 # Check if we have the entry in cache
2043 # Check if we have the entry in cache
2042 # The cache entry looks like (node, rev, rawtext)
2044 # The cache entry looks like (node, rev, rawtext)
2043 if self._revisioncache:
2045 if self._revisioncache:
2044 if self._revisioncache[0] == node:
2046 if self._revisioncache[0] == node:
2045 return (rev, self._revisioncache[2], True)
2047 return (rev, self._revisioncache[2], True)
2046 cachedrev = self._revisioncache[1]
2048 cachedrev = self._revisioncache[1]
2047
2049
2048 if rev is None:
2050 if rev is None:
2049 rev = self.rev(node)
2051 rev = self.rev(node)
2050
2052
2051 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2053 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2052 if stopped:
2054 if stopped:
2053 basetext = self._revisioncache[2]
2055 basetext = self._revisioncache[2]
2054
2056
2055 # drop cache to save memory, the caller is expected to
2057 # drop cache to save memory, the caller is expected to
2056 # update self._revisioncache after validating the text
2058 # update self._revisioncache after validating the text
2057 self._revisioncache = None
2059 self._revisioncache = None
2058
2060
2059 targetsize = None
2061 targetsize = None
2060 rawsize = self.index[rev][2]
2062 rawsize = self.index[rev][2]
2061 if 0 <= rawsize:
2063 if 0 <= rawsize:
2062 targetsize = 4 * rawsize
2064 targetsize = 4 * rawsize
2063
2065
2064 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2066 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2065 if basetext is None:
2067 if basetext is None:
2066 basetext = bytes(bins[0])
2068 basetext = bytes(bins[0])
2067 bins = bins[1:]
2069 bins = bins[1:]
2068
2070
2069 rawtext = mdiff.patches(basetext, bins)
2071 rawtext = mdiff.patches(basetext, bins)
2070 del basetext # let us have a chance to free memory early
2072 del basetext # let us have a chance to free memory early
2071 return (rev, rawtext, False)
2073 return (rev, rawtext, False)
2072
2074
2073 def _sidedata(self, rev):
2075 def _sidedata(self, rev):
2074 """Return the sidedata for a given revision number."""
2076 """Return the sidedata for a given revision number."""
2075 index_entry = self.index[rev]
2077 index_entry = self.index[rev]
2076 sidedata_offset = index_entry[8]
2078 sidedata_offset = index_entry[8]
2077 sidedata_size = index_entry[9]
2079 sidedata_size = index_entry[9]
2078
2080
2079 if self._inline:
2081 if self._inline:
2080 sidedata_offset += self.index.entry_size * (1 + rev)
2082 sidedata_offset += self.index.entry_size * (1 + rev)
2081 if sidedata_size == 0:
2083 if sidedata_size == 0:
2082 return {}
2084 return {}
2083
2085
2084 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2086 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2085 comp = self.index[rev][11]
2087 comp = self.index[rev][11]
2086 if comp == COMP_MODE_PLAIN:
2088 if comp == COMP_MODE_PLAIN:
2087 segment = comp_segment
2089 segment = comp_segment
2088 elif comp == COMP_MODE_DEFAULT:
2090 elif comp == COMP_MODE_DEFAULT:
2089 segment = self._decompressor(comp_segment)
2091 segment = self._decompressor(comp_segment)
2090 elif comp == COMP_MODE_INLINE:
2092 elif comp == COMP_MODE_INLINE:
2091 segment = self.decompress(comp_segment)
2093 segment = self.decompress(comp_segment)
2092 else:
2094 else:
2093 msg = 'unknown compression mode %d'
2095 msg = 'unknown compression mode %d'
2094 msg %= comp
2096 msg %= comp
2095 raise error.RevlogError(msg)
2097 raise error.RevlogError(msg)
2096
2098
2097 sidedata = sidedatautil.deserialize_sidedata(segment)
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2098 return sidedata
2100 return sidedata
2099
2101
2100 def rawdata(self, nodeorrev, _df=None):
2102 def rawdata(self, nodeorrev, _df=None):
2101 """return an uncompressed raw data of a given node or revision number.
2103 """return an uncompressed raw data of a given node or revision number.
2102
2104
2103 _df - an existing file handle to read from. (internal-only)
2105 _df - an existing file handle to read from. (internal-only)
2104 """
2106 """
2105 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2107 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2106
2108
2107 def hash(self, text, p1, p2):
2109 def hash(self, text, p1, p2):
2108 """Compute a node hash.
2110 """Compute a node hash.
2109
2111
2110 Available as a function so that subclasses can replace the hash
2112 Available as a function so that subclasses can replace the hash
2111 as needed.
2113 as needed.
2112 """
2114 """
2113 return storageutil.hashrevisionsha1(text, p1, p2)
2115 return storageutil.hashrevisionsha1(text, p1, p2)
2114
2116
2115 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2116 """Check node hash integrity.
2118 """Check node hash integrity.
2117
2119
2118 Available as a function so that subclasses can extend hash mismatch
2120 Available as a function so that subclasses can extend hash mismatch
2119 behaviors as needed.
2121 behaviors as needed.
2120 """
2122 """
2121 try:
2123 try:
2122 if p1 is None and p2 is None:
2124 if p1 is None and p2 is None:
2123 p1, p2 = self.parents(node)
2125 p1, p2 = self.parents(node)
2124 if node != self.hash(text, p1, p2):
2126 if node != self.hash(text, p1, p2):
2125 # Clear the revision cache on hash failure. The revision cache
2127 # Clear the revision cache on hash failure. The revision cache
2126 # only stores the raw revision and clearing the cache does have
2128 # only stores the raw revision and clearing the cache does have
2127 # the side-effect that we won't have a cache hit when the raw
2129 # the side-effect that we won't have a cache hit when the raw
2128 # revision data is accessed. But this case should be rare and
2130 # revision data is accessed. But this case should be rare and
2129 # it is extra work to teach the cache about the hash
2131 # it is extra work to teach the cache about the hash
2130 # verification state.
2132 # verification state.
2131 if self._revisioncache and self._revisioncache[0] == node:
2133 if self._revisioncache and self._revisioncache[0] == node:
2132 self._revisioncache = None
2134 self._revisioncache = None
2133
2135
2134 revornode = rev
2136 revornode = rev
2135 if revornode is None:
2137 if revornode is None:
2136 revornode = templatefilters.short(hex(node))
2138 revornode = templatefilters.short(hex(node))
2137 raise error.RevlogError(
2139 raise error.RevlogError(
2138 _(b"integrity check failed on %s:%s")
2140 _(b"integrity check failed on %s:%s")
2139 % (self.display_id, pycompat.bytestr(revornode))
2141 % (self.display_id, pycompat.bytestr(revornode))
2140 )
2142 )
2141 except error.RevlogError:
2143 except error.RevlogError:
2142 if self._censorable and storageutil.iscensoredtext(text):
2144 if self._censorable and storageutil.iscensoredtext(text):
2143 raise error.CensoredNodeError(self.display_id, node, text)
2145 raise error.CensoredNodeError(self.display_id, node, text)
2144 raise
2146 raise
2145
2147
2146 def _enforceinlinesize(self, tr):
2148 def _enforceinlinesize(self, tr):
2147 """Check if the revlog is too big for inline and convert if so.
2149 """Check if the revlog is too big for inline and convert if so.
2148
2150
2149 This should be called after revisions are added to the revlog. If the
2151 This should be called after revisions are added to the revlog. If the
2150 revlog has grown too large to be an inline revlog, it will convert it
2152 revlog has grown too large to be an inline revlog, it will convert it
2151 to use multiple index and data files.
2153 to use multiple index and data files.
2152 """
2154 """
2153 tiprev = len(self) - 1
2155 tiprev = len(self) - 1
2154 total_size = self.start(tiprev) + self.length(tiprev)
2156 total_size = self.start(tiprev) + self.length(tiprev)
2155 if not self._inline or total_size < _maxinline:
2157 if not self._inline or total_size < _maxinline:
2156 return
2158 return
2157
2159
2158 troffset = tr.findoffset(self._indexfile)
2160 troffset = tr.findoffset(self._indexfile)
2159 if troffset is None:
2161 if troffset is None:
2160 raise error.RevlogError(
2162 raise error.RevlogError(
2161 _(b"%s not found in the transaction") % self._indexfile
2163 _(b"%s not found in the transaction") % self._indexfile
2162 )
2164 )
2163 trindex = 0
2165 trindex = 0
2164 tr.add(self._datafile, 0)
2166 tr.add(self._datafile, 0)
2165
2167
2166 existing_handles = False
2168 existing_handles = False
2167 if self._writinghandles is not None:
2169 if self._writinghandles is not None:
2168 existing_handles = True
2170 existing_handles = True
2169 fp = self._writinghandles[0]
2171 fp = self._writinghandles[0]
2170 fp.flush()
2172 fp.flush()
2171 fp.close()
2173 fp.close()
2172 # We can't use the cached file handle after close(). So prevent
2174 # We can't use the cached file handle after close(). So prevent
2173 # its usage.
2175 # its usage.
2174 self._writinghandles = None
2176 self._writinghandles = None
2175
2177
2176 new_dfh = self._datafp(b'w+')
2178 new_dfh = self._datafp(b'w+')
2177 new_dfh.truncate(0) # drop any potentially existing data
2179 new_dfh.truncate(0) # drop any potentially existing data
2178 try:
2180 try:
2179 with self._indexfp() as read_ifh:
2181 with self._indexfp() as read_ifh:
2180 for r in self:
2182 for r in self:
2181 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2183 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2182 if troffset <= self.start(r) + r * self.index.entry_size:
2184 if troffset <= self.start(r) + r * self.index.entry_size:
2183 trindex = r
2185 trindex = r
2184 new_dfh.flush()
2186 new_dfh.flush()
2185
2187
2186 with self.__index_new_fp() as fp:
2188 with self.__index_new_fp() as fp:
2187 self._format_flags &= ~FLAG_INLINE_DATA
2189 self._format_flags &= ~FLAG_INLINE_DATA
2188 self._inline = False
2190 self._inline = False
2189 for i in self:
2191 for i in self:
2190 e = self.index.entry_binary(i)
2192 e = self.index.entry_binary(i)
2191 if i == 0 and self._docket is None:
2193 if i == 0 and self._docket is None:
2192 header = self._format_flags | self._format_version
2194 header = self._format_flags | self._format_version
2193 header = self.index.pack_header(header)
2195 header = self.index.pack_header(header)
2194 e = header + e
2196 e = header + e
2195 fp.write(e)
2197 fp.write(e)
2196 if self._docket is not None:
2198 if self._docket is not None:
2197 self._docket.index_end = fp.tell()
2199 self._docket.index_end = fp.tell()
2198
2200
2199 # There is a small transactional race here. If the rename of
2201 # There is a small transactional race here. If the rename of
2200 # the index fails, we should remove the datafile. It is more
2202 # the index fails, we should remove the datafile. It is more
2201 # important to ensure that the data file is not truncated
2203 # important to ensure that the data file is not truncated
2202 # when the index is replaced as otherwise data is lost.
2204 # when the index is replaced as otherwise data is lost.
2203 tr.replace(self._datafile, self.start(trindex))
2205 tr.replace(self._datafile, self.start(trindex))
2204
2206
2205 # the temp file replace the real index when we exit the context
2207 # the temp file replace the real index when we exit the context
2206 # manager
2208 # manager
2207
2209
2208 tr.replace(self._indexfile, trindex * self.index.entry_size)
2210 tr.replace(self._indexfile, trindex * self.index.entry_size)
2209 nodemaputil.setup_persistent_nodemap(tr, self)
2211 nodemaputil.setup_persistent_nodemap(tr, self)
2210 self._chunkclear()
2212 self._chunkclear()
2211
2213
2212 if existing_handles:
2214 if existing_handles:
2213 # switched from inline to conventional reopen the index
2215 # switched from inline to conventional reopen the index
2214 ifh = self.__index_write_fp()
2216 ifh = self.__index_write_fp()
2215 self._writinghandles = (ifh, new_dfh)
2217 self._writinghandles = (ifh, new_dfh)
2216 new_dfh = None
2218 new_dfh = None
2217 finally:
2219 finally:
2218 if new_dfh is not None:
2220 if new_dfh is not None:
2219 new_dfh.close()
2221 new_dfh.close()
2220
2222
2221 def _nodeduplicatecallback(self, transaction, node):
2223 def _nodeduplicatecallback(self, transaction, node):
2222 """called when trying to add a node already stored."""
2224 """called when trying to add a node already stored."""
2223
2225
2224 @contextlib.contextmanager
2226 @contextlib.contextmanager
2225 def _writing(self, transaction):
2227 def _writing(self, transaction):
2226 if self._trypending:
2228 if self._trypending:
2227 msg = b'try to write in a `trypending` revlog: %s'
2229 msg = b'try to write in a `trypending` revlog: %s'
2228 msg %= self.display_id
2230 msg %= self.display_id
2229 raise error.ProgrammingError(msg)
2231 raise error.ProgrammingError(msg)
2230 if self._writinghandles is not None:
2232 if self._writinghandles is not None:
2231 yield
2233 yield
2232 else:
2234 else:
2233 r = len(self)
2235 r = len(self)
2234 dsize = 0
2236 dsize = 0
2235 if r:
2237 if r:
2236 dsize = self.end(r - 1)
2238 dsize = self.end(r - 1)
2237 dfh = None
2239 dfh = None
2238 if not self._inline:
2240 if not self._inline:
2239 try:
2241 try:
2240 dfh = self._datafp(b"r+")
2242 dfh = self._datafp(b"r+")
2241 if self._docket is None:
2243 if self._docket is None:
2242 dfh.seek(0, os.SEEK_END)
2244 dfh.seek(0, os.SEEK_END)
2243 else:
2245 else:
2244 dfh.seek(self._docket.data_end, os.SEEK_SET)
2246 dfh.seek(self._docket.data_end, os.SEEK_SET)
2245 except IOError as inst:
2247 except IOError as inst:
2246 if inst.errno != errno.ENOENT:
2248 if inst.errno != errno.ENOENT:
2247 raise
2249 raise
2248 dfh = self._datafp(b"w+")
2250 dfh = self._datafp(b"w+")
2249 transaction.add(self._datafile, dsize)
2251 transaction.add(self._datafile, dsize)
2250 try:
2252 try:
2251 isize = r * self.index.entry_size
2253 isize = r * self.index.entry_size
2252 ifh = self.__index_write_fp()
2254 ifh = self.__index_write_fp()
2253 if self._inline:
2255 if self._inline:
2254 transaction.add(self._indexfile, dsize + isize)
2256 transaction.add(self._indexfile, dsize + isize)
2255 else:
2257 else:
2256 transaction.add(self._indexfile, isize)
2258 transaction.add(self._indexfile, isize)
2257 try:
2259 try:
2258 self._writinghandles = (ifh, dfh)
2260 self._writinghandles = (ifh, dfh)
2259 try:
2261 try:
2260 yield
2262 yield
2261 if self._docket is not None:
2263 if self._docket is not None:
2262 self._write_docket(transaction)
2264 self._write_docket(transaction)
2263 finally:
2265 finally:
2264 self._writinghandles = None
2266 self._writinghandles = None
2265 finally:
2267 finally:
2266 ifh.close()
2268 ifh.close()
2267 finally:
2269 finally:
2268 if dfh is not None:
2270 if dfh is not None:
2269 dfh.close()
2271 dfh.close()
2270
2272
2271 def _write_docket(self, transaction):
2273 def _write_docket(self, transaction):
2272 """write the current docket on disk
2274 """write the current docket on disk
2273
2275
2274 Exist as a method to help changelog to implement transaction logic
2276 Exist as a method to help changelog to implement transaction logic
2275
2277
2276 We could also imagine using the same transaction logic for all revlog
2278 We could also imagine using the same transaction logic for all revlog
2277 since docket are cheap."""
2279 since docket are cheap."""
2278 self._docket.write(transaction)
2280 self._docket.write(transaction)
2279
2281
2280 def addrevision(
2282 def addrevision(
2281 self,
2283 self,
2282 text,
2284 text,
2283 transaction,
2285 transaction,
2284 link,
2286 link,
2285 p1,
2287 p1,
2286 p2,
2288 p2,
2287 cachedelta=None,
2289 cachedelta=None,
2288 node=None,
2290 node=None,
2289 flags=REVIDX_DEFAULT_FLAGS,
2291 flags=REVIDX_DEFAULT_FLAGS,
2290 deltacomputer=None,
2292 deltacomputer=None,
2291 sidedata=None,
2293 sidedata=None,
2292 ):
2294 ):
2293 """add a revision to the log
2295 """add a revision to the log
2294
2296
2295 text - the revision data to add
2297 text - the revision data to add
2296 transaction - the transaction object used for rollback
2298 transaction - the transaction object used for rollback
2297 link - the linkrev data to add
2299 link - the linkrev data to add
2298 p1, p2 - the parent nodeids of the revision
2300 p1, p2 - the parent nodeids of the revision
2299 cachedelta - an optional precomputed delta
2301 cachedelta - an optional precomputed delta
2300 node - nodeid of revision; typically node is not specified, and it is
2302 node - nodeid of revision; typically node is not specified, and it is
2301 computed by default as hash(text, p1, p2), however subclasses might
2303 computed by default as hash(text, p1, p2), however subclasses might
2302 use different hashing method (and override checkhash() in such case)
2304 use different hashing method (and override checkhash() in such case)
2303 flags - the known flags to set on the revision
2305 flags - the known flags to set on the revision
2304 deltacomputer - an optional deltacomputer instance shared between
2306 deltacomputer - an optional deltacomputer instance shared between
2305 multiple calls
2307 multiple calls
2306 """
2308 """
2307 if link == nullrev:
2309 if link == nullrev:
2308 raise error.RevlogError(
2310 raise error.RevlogError(
2309 _(b"attempted to add linkrev -1 to %s") % self.display_id
2311 _(b"attempted to add linkrev -1 to %s") % self.display_id
2310 )
2312 )
2311
2313
2312 if sidedata is None:
2314 if sidedata is None:
2313 sidedata = {}
2315 sidedata = {}
2314 elif sidedata and not self.hassidedata:
2316 elif sidedata and not self.hassidedata:
2315 raise error.ProgrammingError(
2317 raise error.ProgrammingError(
2316 _(b"trying to add sidedata to a revlog who don't support them")
2318 _(b"trying to add sidedata to a revlog who don't support them")
2317 )
2319 )
2318
2320
2319 if flags:
2321 if flags:
2320 node = node or self.hash(text, p1, p2)
2322 node = node or self.hash(text, p1, p2)
2321
2323
2322 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2324 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2323
2325
2324 # If the flag processor modifies the revision data, ignore any provided
2326 # If the flag processor modifies the revision data, ignore any provided
2325 # cachedelta.
2327 # cachedelta.
2326 if rawtext != text:
2328 if rawtext != text:
2327 cachedelta = None
2329 cachedelta = None
2328
2330
2329 if len(rawtext) > _maxentrysize:
2331 if len(rawtext) > _maxentrysize:
2330 raise error.RevlogError(
2332 raise error.RevlogError(
2331 _(
2333 _(
2332 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2334 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2333 )
2335 )
2334 % (self.display_id, len(rawtext))
2336 % (self.display_id, len(rawtext))
2335 )
2337 )
2336
2338
2337 node = node or self.hash(rawtext, p1, p2)
2339 node = node or self.hash(rawtext, p1, p2)
2338 rev = self.index.get_rev(node)
2340 rev = self.index.get_rev(node)
2339 if rev is not None:
2341 if rev is not None:
2340 return rev
2342 return rev
2341
2343
2342 if validatehash:
2344 if validatehash:
2343 self.checkhash(rawtext, node, p1=p1, p2=p2)
2345 self.checkhash(rawtext, node, p1=p1, p2=p2)
2344
2346
2345 return self.addrawrevision(
2347 return self.addrawrevision(
2346 rawtext,
2348 rawtext,
2347 transaction,
2349 transaction,
2348 link,
2350 link,
2349 p1,
2351 p1,
2350 p2,
2352 p2,
2351 node,
2353 node,
2352 flags,
2354 flags,
2353 cachedelta=cachedelta,
2355 cachedelta=cachedelta,
2354 deltacomputer=deltacomputer,
2356 deltacomputer=deltacomputer,
2355 sidedata=sidedata,
2357 sidedata=sidedata,
2356 )
2358 )
2357
2359
2358 def addrawrevision(
2360 def addrawrevision(
2359 self,
2361 self,
2360 rawtext,
2362 rawtext,
2361 transaction,
2363 transaction,
2362 link,
2364 link,
2363 p1,
2365 p1,
2364 p2,
2366 p2,
2365 node,
2367 node,
2366 flags,
2368 flags,
2367 cachedelta=None,
2369 cachedelta=None,
2368 deltacomputer=None,
2370 deltacomputer=None,
2369 sidedata=None,
2371 sidedata=None,
2370 ):
2372 ):
2371 """add a raw revision with known flags, node and parents
2373 """add a raw revision with known flags, node and parents
2372 useful when reusing a revision not stored in this revlog (ex: received
2374 useful when reusing a revision not stored in this revlog (ex: received
2373 over wire, or read from an external bundle).
2375 over wire, or read from an external bundle).
2374 """
2376 """
2375 with self._writing(transaction):
2377 with self._writing(transaction):
2376 return self._addrevision(
2378 return self._addrevision(
2377 node,
2379 node,
2378 rawtext,
2380 rawtext,
2379 transaction,
2381 transaction,
2380 link,
2382 link,
2381 p1,
2383 p1,
2382 p2,
2384 p2,
2383 flags,
2385 flags,
2384 cachedelta,
2386 cachedelta,
2385 deltacomputer=deltacomputer,
2387 deltacomputer=deltacomputer,
2386 sidedata=sidedata,
2388 sidedata=sidedata,
2387 )
2389 )
2388
2390
2389 def compress(self, data):
2391 def compress(self, data):
2390 """Generate a possibly-compressed representation of data."""
2392 """Generate a possibly-compressed representation of data."""
2391 if not data:
2393 if not data:
2392 return b'', data
2394 return b'', data
2393
2395
2394 compressed = self._compressor.compress(data)
2396 compressed = self._compressor.compress(data)
2395
2397
2396 if compressed:
2398 if compressed:
2397 # The revlog compressor added the header in the returned data.
2399 # The revlog compressor added the header in the returned data.
2398 return b'', compressed
2400 return b'', compressed
2399
2401
2400 if data[0:1] == b'\0':
2402 if data[0:1] == b'\0':
2401 return b'', data
2403 return b'', data
2402 return b'u', data
2404 return b'u', data
2403
2405
2404 def decompress(self, data):
2406 def decompress(self, data):
2405 """Decompress a revlog chunk.
2407 """Decompress a revlog chunk.
2406
2408
2407 The chunk is expected to begin with a header identifying the
2409 The chunk is expected to begin with a header identifying the
2408 format type so it can be routed to an appropriate decompressor.
2410 format type so it can be routed to an appropriate decompressor.
2409 """
2411 """
2410 if not data:
2412 if not data:
2411 return data
2413 return data
2412
2414
2413 # Revlogs are read much more frequently than they are written and many
2415 # Revlogs are read much more frequently than they are written and many
2414 # chunks only take microseconds to decompress, so performance is
2416 # chunks only take microseconds to decompress, so performance is
2415 # important here.
2417 # important here.
2416 #
2418 #
2417 # We can make a few assumptions about revlogs:
2419 # We can make a few assumptions about revlogs:
2418 #
2420 #
2419 # 1) the majority of chunks will be compressed (as opposed to inline
2421 # 1) the majority of chunks will be compressed (as opposed to inline
2420 # raw data).
2422 # raw data).
2421 # 2) decompressing *any* data will likely by at least 10x slower than
2423 # 2) decompressing *any* data will likely by at least 10x slower than
2422 # returning raw inline data.
2424 # returning raw inline data.
2423 # 3) we want to prioritize common and officially supported compression
2425 # 3) we want to prioritize common and officially supported compression
2424 # engines
2426 # engines
2425 #
2427 #
2426 # It follows that we want to optimize for "decompress compressed data
2428 # It follows that we want to optimize for "decompress compressed data
2427 # when encoded with common and officially supported compression engines"
2429 # when encoded with common and officially supported compression engines"
2428 # case over "raw data" and "data encoded by less common or non-official
2430 # case over "raw data" and "data encoded by less common or non-official
2429 # compression engines." That is why we have the inline lookup first
2431 # compression engines." That is why we have the inline lookup first
2430 # followed by the compengines lookup.
2432 # followed by the compengines lookup.
2431 #
2433 #
2432 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2434 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2433 # compressed chunks. And this matters for changelog and manifest reads.
2435 # compressed chunks. And this matters for changelog and manifest reads.
2434 t = data[0:1]
2436 t = data[0:1]
2435
2437
2436 if t == b'x':
2438 if t == b'x':
2437 try:
2439 try:
2438 return _zlibdecompress(data)
2440 return _zlibdecompress(data)
2439 except zlib.error as e:
2441 except zlib.error as e:
2440 raise error.RevlogError(
2442 raise error.RevlogError(
2441 _(b'revlog decompress error: %s')
2443 _(b'revlog decompress error: %s')
2442 % stringutil.forcebytestr(e)
2444 % stringutil.forcebytestr(e)
2443 )
2445 )
2444 # '\0' is more common than 'u' so it goes first.
2446 # '\0' is more common than 'u' so it goes first.
2445 elif t == b'\0':
2447 elif t == b'\0':
2446 return data
2448 return data
2447 elif t == b'u':
2449 elif t == b'u':
2448 return util.buffer(data, 1)
2450 return util.buffer(data, 1)
2449
2451
2450 compressor = self._get_decompressor(t)
2452 compressor = self._get_decompressor(t)
2451
2453
2452 return compressor.decompress(data)
2454 return compressor.decompress(data)
2453
2455
2454 def _addrevision(
2456 def _addrevision(
2455 self,
2457 self,
2456 node,
2458 node,
2457 rawtext,
2459 rawtext,
2458 transaction,
2460 transaction,
2459 link,
2461 link,
2460 p1,
2462 p1,
2461 p2,
2463 p2,
2462 flags,
2464 flags,
2463 cachedelta,
2465 cachedelta,
2464 alwayscache=False,
2466 alwayscache=False,
2465 deltacomputer=None,
2467 deltacomputer=None,
2466 sidedata=None,
2468 sidedata=None,
2467 ):
2469 ):
2468 """internal function to add revisions to the log
2470 """internal function to add revisions to the log
2469
2471
2470 see addrevision for argument descriptions.
2472 see addrevision for argument descriptions.
2471
2473
2472 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2474 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2473
2475
2474 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2476 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2475 be used.
2477 be used.
2476
2478
2477 invariants:
2479 invariants:
2478 - rawtext is optional (can be None); if not set, cachedelta must be set.
2480 - rawtext is optional (can be None); if not set, cachedelta must be set.
2479 if both are set, they must correspond to each other.
2481 if both are set, they must correspond to each other.
2480 """
2482 """
2481 if node == self.nullid:
2483 if node == self.nullid:
2482 raise error.RevlogError(
2484 raise error.RevlogError(
2483 _(b"%s: attempt to add null revision") % self.display_id
2485 _(b"%s: attempt to add null revision") % self.display_id
2484 )
2486 )
2485 if (
2487 if (
2486 node == self.nodeconstants.wdirid
2488 node == self.nodeconstants.wdirid
2487 or node in self.nodeconstants.wdirfilenodeids
2489 or node in self.nodeconstants.wdirfilenodeids
2488 ):
2490 ):
2489 raise error.RevlogError(
2491 raise error.RevlogError(
2490 _(b"%s: attempt to add wdir revision") % self.display_id
2492 _(b"%s: attempt to add wdir revision") % self.display_id
2491 )
2493 )
2492 if self._writinghandles is None:
2494 if self._writinghandles is None:
2493 msg = b'adding revision outside `revlog._writing` context'
2495 msg = b'adding revision outside `revlog._writing` context'
2494 raise error.ProgrammingError(msg)
2496 raise error.ProgrammingError(msg)
2495
2497
2496 if self._inline:
2498 if self._inline:
2497 fh = self._writinghandles[0]
2499 fh = self._writinghandles[0]
2498 else:
2500 else:
2499 fh = self._writinghandles[1]
2501 fh = self._writinghandles[1]
2500
2502
2501 btext = [rawtext]
2503 btext = [rawtext]
2502
2504
2503 curr = len(self)
2505 curr = len(self)
2504 prev = curr - 1
2506 prev = curr - 1
2505
2507
2506 offset = self._get_data_offset(prev)
2508 offset = self._get_data_offset(prev)
2507
2509
2508 if self._concurrencychecker:
2510 if self._concurrencychecker:
2509 ifh, dfh = self._writinghandles
2511 ifh, dfh = self._writinghandles
2510 if self._inline:
2512 if self._inline:
2511 # offset is "as if" it were in the .d file, so we need to add on
2513 # offset is "as if" it were in the .d file, so we need to add on
2512 # the size of the entry metadata.
2514 # the size of the entry metadata.
2513 self._concurrencychecker(
2515 self._concurrencychecker(
2514 ifh, self._indexfile, offset + curr * self.index.entry_size
2516 ifh, self._indexfile, offset + curr * self.index.entry_size
2515 )
2517 )
2516 else:
2518 else:
2517 # Entries in the .i are a consistent size.
2519 # Entries in the .i are a consistent size.
2518 self._concurrencychecker(
2520 self._concurrencychecker(
2519 ifh, self._indexfile, curr * self.index.entry_size
2521 ifh, self._indexfile, curr * self.index.entry_size
2520 )
2522 )
2521 self._concurrencychecker(dfh, self._datafile, offset)
2523 self._concurrencychecker(dfh, self._datafile, offset)
2522
2524
2523 p1r, p2r = self.rev(p1), self.rev(p2)
2525 p1r, p2r = self.rev(p1), self.rev(p2)
2524
2526
2525 # full versions are inserted when the needed deltas
2527 # full versions are inserted when the needed deltas
2526 # become comparable to the uncompressed text
2528 # become comparable to the uncompressed text
2527 if rawtext is None:
2529 if rawtext is None:
2528 # need rawtext size, before changed by flag processors, which is
2530 # need rawtext size, before changed by flag processors, which is
2529 # the non-raw size. use revlog explicitly to avoid filelog's extra
2531 # the non-raw size. use revlog explicitly to avoid filelog's extra
2530 # logic that might remove metadata size.
2532 # logic that might remove metadata size.
2531 textlen = mdiff.patchedsize(
2533 textlen = mdiff.patchedsize(
2532 revlog.size(self, cachedelta[0]), cachedelta[1]
2534 revlog.size(self, cachedelta[0]), cachedelta[1]
2533 )
2535 )
2534 else:
2536 else:
2535 textlen = len(rawtext)
2537 textlen = len(rawtext)
2536
2538
2537 if deltacomputer is None:
2539 if deltacomputer is None:
2538 deltacomputer = deltautil.deltacomputer(self)
2540 deltacomputer = deltautil.deltacomputer(self)
2539
2541
2540 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2542 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2541
2543
2542 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2544 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2543
2545
2544 compression_mode = COMP_MODE_INLINE
2546 compression_mode = COMP_MODE_INLINE
2545 if self._docket is not None:
2547 if self._docket is not None:
2546 h, d = deltainfo.data
2548 h, d = deltainfo.data
2547 if not h and not d:
2549 if not h and not d:
2548 # not data to store at all... declare them uncompressed
2550 # not data to store at all... declare them uncompressed
2549 compression_mode = COMP_MODE_PLAIN
2551 compression_mode = COMP_MODE_PLAIN
2550 elif not h:
2552 elif not h:
2551 t = d[0:1]
2553 t = d[0:1]
2552 if t == b'\0':
2554 if t == b'\0':
2553 compression_mode = COMP_MODE_PLAIN
2555 compression_mode = COMP_MODE_PLAIN
2554 elif t == self._docket.default_compression_header:
2556 elif t == self._docket.default_compression_header:
2555 compression_mode = COMP_MODE_DEFAULT
2557 compression_mode = COMP_MODE_DEFAULT
2556 elif h == b'u':
2558 elif h == b'u':
2557 # we have a more efficient way to declare uncompressed
2559 # we have a more efficient way to declare uncompressed
2558 h = b''
2560 h = b''
2559 compression_mode = COMP_MODE_PLAIN
2561 compression_mode = COMP_MODE_PLAIN
2560 deltainfo = deltautil.drop_u_compression(deltainfo)
2562 deltainfo = deltautil.drop_u_compression(deltainfo)
2561
2563
2562 sidedata_compression_mode = COMP_MODE_INLINE
2564 sidedata_compression_mode = COMP_MODE_INLINE
2563 if sidedata and self.hassidedata:
2565 if sidedata and self.hassidedata:
2564 sidedata_compression_mode = COMP_MODE_PLAIN
2566 sidedata_compression_mode = COMP_MODE_PLAIN
2565 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2567 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2566 sidedata_offset = offset + deltainfo.deltalen
2568 sidedata_offset = offset + deltainfo.deltalen
2567 h, comp_sidedata = self.compress(serialized_sidedata)
2569 h, comp_sidedata = self.compress(serialized_sidedata)
2568 if (
2570 if (
2569 h != b'u'
2571 h != b'u'
2570 and comp_sidedata[0:1] != b'\0'
2572 and comp_sidedata[0:1] != b'\0'
2571 and len(comp_sidedata) < len(serialized_sidedata)
2573 and len(comp_sidedata) < len(serialized_sidedata)
2572 ):
2574 ):
2573 assert not h
2575 assert not h
2574 if (
2576 if (
2575 comp_sidedata[0:1]
2577 comp_sidedata[0:1]
2576 == self._docket.default_compression_header
2578 == self._docket.default_compression_header
2577 ):
2579 ):
2578 sidedata_compression_mode = COMP_MODE_DEFAULT
2580 sidedata_compression_mode = COMP_MODE_DEFAULT
2579 serialized_sidedata = comp_sidedata
2581 serialized_sidedata = comp_sidedata
2580 else:
2582 else:
2581 sidedata_compression_mode = COMP_MODE_INLINE
2583 sidedata_compression_mode = COMP_MODE_INLINE
2582 serialized_sidedata = comp_sidedata
2584 serialized_sidedata = comp_sidedata
2583 else:
2585 else:
2584 serialized_sidedata = b""
2586 serialized_sidedata = b""
2585 # Don't store the offset if the sidedata is empty, that way
2587 # Don't store the offset if the sidedata is empty, that way
2586 # we can easily detect empty sidedata and they will be no different
2588 # we can easily detect empty sidedata and they will be no different
2587 # than ones we manually add.
2589 # than ones we manually add.
2588 sidedata_offset = 0
2590 sidedata_offset = 0
2589
2591
2590 e = (
2592 e = (
2591 offset_type(offset, flags),
2593 offset_type(offset, flags),
2592 deltainfo.deltalen,
2594 deltainfo.deltalen,
2593 textlen,
2595 textlen,
2594 deltainfo.base,
2596 deltainfo.base,
2595 link,
2597 link,
2596 p1r,
2598 p1r,
2597 p2r,
2599 p2r,
2598 node,
2600 node,
2599 sidedata_offset,
2601 sidedata_offset,
2600 len(serialized_sidedata),
2602 len(serialized_sidedata),
2601 compression_mode,
2603 compression_mode,
2602 sidedata_compression_mode,
2604 sidedata_compression_mode,
2603 )
2605 )
2604
2606
2605 self.index.append(e)
2607 self.index.append(e)
2606 entry = self.index.entry_binary(curr)
2608 entry = self.index.entry_binary(curr)
2607 if curr == 0 and self._docket is None:
2609 if curr == 0 and self._docket is None:
2608 header = self._format_flags | self._format_version
2610 header = self._format_flags | self._format_version
2609 header = self.index.pack_header(header)
2611 header = self.index.pack_header(header)
2610 entry = header + entry
2612 entry = header + entry
2611 self._writeentry(
2613 self._writeentry(
2612 transaction,
2614 transaction,
2613 entry,
2615 entry,
2614 deltainfo.data,
2616 deltainfo.data,
2615 link,
2617 link,
2616 offset,
2618 offset,
2617 serialized_sidedata,
2619 serialized_sidedata,
2618 )
2620 )
2619
2621
2620 rawtext = btext[0]
2622 rawtext = btext[0]
2621
2623
2622 if alwayscache and rawtext is None:
2624 if alwayscache and rawtext is None:
2623 rawtext = deltacomputer.buildtext(revinfo, fh)
2625 rawtext = deltacomputer.buildtext(revinfo, fh)
2624
2626
2625 if type(rawtext) == bytes: # only accept immutable objects
2627 if type(rawtext) == bytes: # only accept immutable objects
2626 self._revisioncache = (node, curr, rawtext)
2628 self._revisioncache = (node, curr, rawtext)
2627 self._chainbasecache[curr] = deltainfo.chainbase
2629 self._chainbasecache[curr] = deltainfo.chainbase
2628 return curr
2630 return curr
2629
2631
2630 def _get_data_offset(self, prev):
2632 def _get_data_offset(self, prev):
2631 """Returns the current offset in the (in-transaction) data file.
2633 """Returns the current offset in the (in-transaction) data file.
2632 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2634 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2633 file to store that information: since sidedata can be rewritten to the
2635 file to store that information: since sidedata can be rewritten to the
2634 end of the data file within a transaction, you can have cases where, for
2636 end of the data file within a transaction, you can have cases where, for
2635 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2637 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2636 to `n - 1`'s sidedata being written after `n`'s data.
2638 to `n - 1`'s sidedata being written after `n`'s data.
2637
2639
2638 TODO cache this in a docket file before getting out of experimental."""
2640 TODO cache this in a docket file before getting out of experimental."""
2639 if self._docket is None:
2641 if self._docket is None:
2640 return self.end(prev)
2642 return self.end(prev)
2641 else:
2643 else:
2642 return self._docket.data_end
2644 return self._docket.data_end
2643
2645
2644 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2646 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2645 # Files opened in a+ mode have inconsistent behavior on various
2647 # Files opened in a+ mode have inconsistent behavior on various
2646 # platforms. Windows requires that a file positioning call be made
2648 # platforms. Windows requires that a file positioning call be made
2647 # when the file handle transitions between reads and writes. See
2649 # when the file handle transitions between reads and writes. See
2648 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2650 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2649 # platforms, Python or the platform itself can be buggy. Some versions
2651 # platforms, Python or the platform itself can be buggy. Some versions
2650 # of Solaris have been observed to not append at the end of the file
2652 # of Solaris have been observed to not append at the end of the file
2651 # if the file was seeked to before the end. See issue4943 for more.
2653 # if the file was seeked to before the end. See issue4943 for more.
2652 #
2654 #
2653 # We work around this issue by inserting a seek() before writing.
2655 # We work around this issue by inserting a seek() before writing.
2654 # Note: This is likely not necessary on Python 3. However, because
2656 # Note: This is likely not necessary on Python 3. However, because
2655 # the file handle is reused for reads and may be seeked there, we need
2657 # the file handle is reused for reads and may be seeked there, we need
2656 # to be careful before changing this.
2658 # to be careful before changing this.
2657 if self._writinghandles is None:
2659 if self._writinghandles is None:
2658 msg = b'adding revision outside `revlog._writing` context'
2660 msg = b'adding revision outside `revlog._writing` context'
2659 raise error.ProgrammingError(msg)
2661 raise error.ProgrammingError(msg)
2660 ifh, dfh = self._writinghandles
2662 ifh, dfh = self._writinghandles
2661 if self._docket is None:
2663 if self._docket is None:
2662 ifh.seek(0, os.SEEK_END)
2664 ifh.seek(0, os.SEEK_END)
2663 else:
2665 else:
2664 ifh.seek(self._docket.index_end, os.SEEK_SET)
2666 ifh.seek(self._docket.index_end, os.SEEK_SET)
2665 if dfh:
2667 if dfh:
2666 if self._docket is None:
2668 if self._docket is None:
2667 dfh.seek(0, os.SEEK_END)
2669 dfh.seek(0, os.SEEK_END)
2668 else:
2670 else:
2669 dfh.seek(self._docket.data_end, os.SEEK_SET)
2671 dfh.seek(self._docket.data_end, os.SEEK_SET)
2670
2672
2671 curr = len(self) - 1
2673 curr = len(self) - 1
2672 if not self._inline:
2674 if not self._inline:
2673 transaction.add(self._datafile, offset)
2675 transaction.add(self._datafile, offset)
2674 transaction.add(self._indexfile, curr * len(entry))
2676 transaction.add(self._indexfile, curr * len(entry))
2675 if data[0]:
2677 if data[0]:
2676 dfh.write(data[0])
2678 dfh.write(data[0])
2677 dfh.write(data[1])
2679 dfh.write(data[1])
2678 if sidedata:
2680 if sidedata:
2679 dfh.write(sidedata)
2681 dfh.write(sidedata)
2680 ifh.write(entry)
2682 ifh.write(entry)
2681 else:
2683 else:
2682 offset += curr * self.index.entry_size
2684 offset += curr * self.index.entry_size
2683 transaction.add(self._indexfile, offset)
2685 transaction.add(self._indexfile, offset)
2684 ifh.write(entry)
2686 ifh.write(entry)
2685 ifh.write(data[0])
2687 ifh.write(data[0])
2686 ifh.write(data[1])
2688 ifh.write(data[1])
2687 if sidedata:
2689 if sidedata:
2688 ifh.write(sidedata)
2690 ifh.write(sidedata)
2689 self._enforceinlinesize(transaction)
2691 self._enforceinlinesize(transaction)
2690 if self._docket is not None:
2692 if self._docket is not None:
2691 self._docket.index_end = self._writinghandles[0].tell()
2693 self._docket.index_end = self._writinghandles[0].tell()
2692 self._docket.data_end = self._writinghandles[1].tell()
2694 self._docket.data_end = self._writinghandles[1].tell()
2693
2695
2694 nodemaputil.setup_persistent_nodemap(transaction, self)
2696 nodemaputil.setup_persistent_nodemap(transaction, self)
2695
2697
2696 def addgroup(
2698 def addgroup(
2697 self,
2699 self,
2698 deltas,
2700 deltas,
2699 linkmapper,
2701 linkmapper,
2700 transaction,
2702 transaction,
2701 alwayscache=False,
2703 alwayscache=False,
2702 addrevisioncb=None,
2704 addrevisioncb=None,
2703 duplicaterevisioncb=None,
2705 duplicaterevisioncb=None,
2704 ):
2706 ):
2705 """
2707 """
2706 add a delta group
2708 add a delta group
2707
2709
2708 given a set of deltas, add them to the revision log. the
2710 given a set of deltas, add them to the revision log. the
2709 first delta is against its parent, which should be in our
2711 first delta is against its parent, which should be in our
2710 log, the rest are against the previous delta.
2712 log, the rest are against the previous delta.
2711
2713
2712 If ``addrevisioncb`` is defined, it will be called with arguments of
2714 If ``addrevisioncb`` is defined, it will be called with arguments of
2713 this revlog and the node that was added.
2715 this revlog and the node that was added.
2714 """
2716 """
2715
2717
2716 if self._adding_group:
2718 if self._adding_group:
2717 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2719 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2718
2720
2719 self._adding_group = True
2721 self._adding_group = True
2720 empty = True
2722 empty = True
2721 try:
2723 try:
2722 with self._writing(transaction):
2724 with self._writing(transaction):
2723 deltacomputer = deltautil.deltacomputer(self)
2725 deltacomputer = deltautil.deltacomputer(self)
2724 # loop through our set of deltas
2726 # loop through our set of deltas
2725 for data in deltas:
2727 for data in deltas:
2726 (
2728 (
2727 node,
2729 node,
2728 p1,
2730 p1,
2729 p2,
2731 p2,
2730 linknode,
2732 linknode,
2731 deltabase,
2733 deltabase,
2732 delta,
2734 delta,
2733 flags,
2735 flags,
2734 sidedata,
2736 sidedata,
2735 ) = data
2737 ) = data
2736 link = linkmapper(linknode)
2738 link = linkmapper(linknode)
2737 flags = flags or REVIDX_DEFAULT_FLAGS
2739 flags = flags or REVIDX_DEFAULT_FLAGS
2738
2740
2739 rev = self.index.get_rev(node)
2741 rev = self.index.get_rev(node)
2740 if rev is not None:
2742 if rev is not None:
2741 # this can happen if two branches make the same change
2743 # this can happen if two branches make the same change
2742 self._nodeduplicatecallback(transaction, rev)
2744 self._nodeduplicatecallback(transaction, rev)
2743 if duplicaterevisioncb:
2745 if duplicaterevisioncb:
2744 duplicaterevisioncb(self, rev)
2746 duplicaterevisioncb(self, rev)
2745 empty = False
2747 empty = False
2746 continue
2748 continue
2747
2749
2748 for p in (p1, p2):
2750 for p in (p1, p2):
2749 if not self.index.has_node(p):
2751 if not self.index.has_node(p):
2750 raise error.LookupError(
2752 raise error.LookupError(
2751 p, self.radix, _(b'unknown parent')
2753 p, self.radix, _(b'unknown parent')
2752 )
2754 )
2753
2755
2754 if not self.index.has_node(deltabase):
2756 if not self.index.has_node(deltabase):
2755 raise error.LookupError(
2757 raise error.LookupError(
2756 deltabase, self.display_id, _(b'unknown delta base')
2758 deltabase, self.display_id, _(b'unknown delta base')
2757 )
2759 )
2758
2760
2759 baserev = self.rev(deltabase)
2761 baserev = self.rev(deltabase)
2760
2762
2761 if baserev != nullrev and self.iscensored(baserev):
2763 if baserev != nullrev and self.iscensored(baserev):
2762 # if base is censored, delta must be full replacement in a
2764 # if base is censored, delta must be full replacement in a
2763 # single patch operation
2765 # single patch operation
2764 hlen = struct.calcsize(b">lll")
2766 hlen = struct.calcsize(b">lll")
2765 oldlen = self.rawsize(baserev)
2767 oldlen = self.rawsize(baserev)
2766 newlen = len(delta) - hlen
2768 newlen = len(delta) - hlen
2767 if delta[:hlen] != mdiff.replacediffheader(
2769 if delta[:hlen] != mdiff.replacediffheader(
2768 oldlen, newlen
2770 oldlen, newlen
2769 ):
2771 ):
2770 raise error.CensoredBaseError(
2772 raise error.CensoredBaseError(
2771 self.display_id, self.node(baserev)
2773 self.display_id, self.node(baserev)
2772 )
2774 )
2773
2775
2774 if not flags and self._peek_iscensored(baserev, delta):
2776 if not flags and self._peek_iscensored(baserev, delta):
2775 flags |= REVIDX_ISCENSORED
2777 flags |= REVIDX_ISCENSORED
2776
2778
2777 # We assume consumers of addrevisioncb will want to retrieve
2779 # We assume consumers of addrevisioncb will want to retrieve
2778 # the added revision, which will require a call to
2780 # the added revision, which will require a call to
2779 # revision(). revision() will fast path if there is a cache
2781 # revision(). revision() will fast path if there is a cache
2780 # hit. So, we tell _addrevision() to always cache in this case.
2782 # hit. So, we tell _addrevision() to always cache in this case.
2781 # We're only using addgroup() in the context of changegroup
2783 # We're only using addgroup() in the context of changegroup
2782 # generation so the revision data can always be handled as raw
2784 # generation so the revision data can always be handled as raw
2783 # by the flagprocessor.
2785 # by the flagprocessor.
2784 rev = self._addrevision(
2786 rev = self._addrevision(
2785 node,
2787 node,
2786 None,
2788 None,
2787 transaction,
2789 transaction,
2788 link,
2790 link,
2789 p1,
2791 p1,
2790 p2,
2792 p2,
2791 flags,
2793 flags,
2792 (baserev, delta),
2794 (baserev, delta),
2793 alwayscache=alwayscache,
2795 alwayscache=alwayscache,
2794 deltacomputer=deltacomputer,
2796 deltacomputer=deltacomputer,
2795 sidedata=sidedata,
2797 sidedata=sidedata,
2796 )
2798 )
2797
2799
2798 if addrevisioncb:
2800 if addrevisioncb:
2799 addrevisioncb(self, rev)
2801 addrevisioncb(self, rev)
2800 empty = False
2802 empty = False
2801 finally:
2803 finally:
2802 self._adding_group = False
2804 self._adding_group = False
2803 return not empty
2805 return not empty
2804
2806
2805 def iscensored(self, rev):
2807 def iscensored(self, rev):
2806 """Check if a file revision is censored."""
2808 """Check if a file revision is censored."""
2807 if not self._censorable:
2809 if not self._censorable:
2808 return False
2810 return False
2809
2811
2810 return self.flags(rev) & REVIDX_ISCENSORED
2812 return self.flags(rev) & REVIDX_ISCENSORED
2811
2813
2812 def _peek_iscensored(self, baserev, delta):
2814 def _peek_iscensored(self, baserev, delta):
2813 """Quickly check if a delta produces a censored revision."""
2815 """Quickly check if a delta produces a censored revision."""
2814 if not self._censorable:
2816 if not self._censorable:
2815 return False
2817 return False
2816
2818
2817 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2819 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2818
2820
2819 def getstrippoint(self, minlink):
2821 def getstrippoint(self, minlink):
2820 """find the minimum rev that must be stripped to strip the linkrev
2822 """find the minimum rev that must be stripped to strip the linkrev
2821
2823
2822 Returns a tuple containing the minimum rev and a set of all revs that
2824 Returns a tuple containing the minimum rev and a set of all revs that
2823 have linkrevs that will be broken by this strip.
2825 have linkrevs that will be broken by this strip.
2824 """
2826 """
2825 return storageutil.resolvestripinfo(
2827 return storageutil.resolvestripinfo(
2826 minlink,
2828 minlink,
2827 len(self) - 1,
2829 len(self) - 1,
2828 self.headrevs(),
2830 self.headrevs(),
2829 self.linkrev,
2831 self.linkrev,
2830 self.parentrevs,
2832 self.parentrevs,
2831 )
2833 )
2832
2834
2833 def strip(self, minlink, transaction):
2835 def strip(self, minlink, transaction):
2834 """truncate the revlog on the first revision with a linkrev >= minlink
2836 """truncate the revlog on the first revision with a linkrev >= minlink
2835
2837
2836 This function is called when we're stripping revision minlink and
2838 This function is called when we're stripping revision minlink and
2837 its descendants from the repository.
2839 its descendants from the repository.
2838
2840
2839 We have to remove all revisions with linkrev >= minlink, because
2841 We have to remove all revisions with linkrev >= minlink, because
2840 the equivalent changelog revisions will be renumbered after the
2842 the equivalent changelog revisions will be renumbered after the
2841 strip.
2843 strip.
2842
2844
2843 So we truncate the revlog on the first of these revisions, and
2845 So we truncate the revlog on the first of these revisions, and
2844 trust that the caller has saved the revisions that shouldn't be
2846 trust that the caller has saved the revisions that shouldn't be
2845 removed and that it'll re-add them after this truncation.
2847 removed and that it'll re-add them after this truncation.
2846 """
2848 """
2847 if len(self) == 0:
2849 if len(self) == 0:
2848 return
2850 return
2849
2851
2850 rev, _ = self.getstrippoint(minlink)
2852 rev, _ = self.getstrippoint(minlink)
2851 if rev == len(self):
2853 if rev == len(self):
2852 return
2854 return
2853
2855
2854 # first truncate the files on disk
2856 # first truncate the files on disk
2855 data_end = self.start(rev)
2857 data_end = self.start(rev)
2856 if not self._inline:
2858 if not self._inline:
2857 transaction.add(self._datafile, data_end)
2859 transaction.add(self._datafile, data_end)
2858 end = rev * self.index.entry_size
2860 end = rev * self.index.entry_size
2859 else:
2861 else:
2860 end = data_end + (rev * self.index.entry_size)
2862 end = data_end + (rev * self.index.entry_size)
2861
2863
2862 transaction.add(self._indexfile, end)
2864 transaction.add(self._indexfile, end)
2863 if self._docket is not None:
2865 if self._docket is not None:
2864 # XXX we could, leverage the docket while stripping. However it is
2866 # XXX we could, leverage the docket while stripping. However it is
2865 # not powerfull enough at the time of this comment
2867 # not powerfull enough at the time of this comment
2866 self._docket.index_end = end
2868 self._docket.index_end = end
2867 self._docket.data_end = data_end
2869 self._docket.data_end = data_end
2868 self._docket.write(transaction, stripping=True)
2870 self._docket.write(transaction, stripping=True)
2869
2871
2870 # then reset internal state in memory to forget those revisions
2872 # then reset internal state in memory to forget those revisions
2871 self._revisioncache = None
2873 self._revisioncache = None
2872 self._chaininfocache = util.lrucachedict(500)
2874 self._chaininfocache = util.lrucachedict(500)
2873 self._chunkclear()
2875 self._chunkclear()
2874
2876
2875 del self.index[rev:-1]
2877 del self.index[rev:-1]
2876
2878
2877 def checksize(self):
2879 def checksize(self):
2878 """Check size of index and data files
2880 """Check size of index and data files
2879
2881
2880 return a (dd, di) tuple.
2882 return a (dd, di) tuple.
2881 - dd: extra bytes for the "data" file
2883 - dd: extra bytes for the "data" file
2882 - di: extra bytes for the "index" file
2884 - di: extra bytes for the "index" file
2883
2885
2884 A healthy revlog will return (0, 0).
2886 A healthy revlog will return (0, 0).
2885 """
2887 """
2886 expected = 0
2888 expected = 0
2887 if len(self):
2889 if len(self):
2888 expected = max(0, self.end(len(self) - 1))
2890 expected = max(0, self.end(len(self) - 1))
2889
2891
2890 try:
2892 try:
2891 with self._datafp() as f:
2893 with self._datafp() as f:
2892 f.seek(0, io.SEEK_END)
2894 f.seek(0, io.SEEK_END)
2893 actual = f.tell()
2895 actual = f.tell()
2894 dd = actual - expected
2896 dd = actual - expected
2895 except IOError as inst:
2897 except IOError as inst:
2896 if inst.errno != errno.ENOENT:
2898 if inst.errno != errno.ENOENT:
2897 raise
2899 raise
2898 dd = 0
2900 dd = 0
2899
2901
2900 try:
2902 try:
2901 f = self.opener(self._indexfile)
2903 f = self.opener(self._indexfile)
2902 f.seek(0, io.SEEK_END)
2904 f.seek(0, io.SEEK_END)
2903 actual = f.tell()
2905 actual = f.tell()
2904 f.close()
2906 f.close()
2905 s = self.index.entry_size
2907 s = self.index.entry_size
2906 i = max(0, actual // s)
2908 i = max(0, actual // s)
2907 di = actual - (i * s)
2909 di = actual - (i * s)
2908 if self._inline:
2910 if self._inline:
2909 databytes = 0
2911 databytes = 0
2910 for r in self:
2912 for r in self:
2911 databytes += max(0, self.length(r))
2913 databytes += max(0, self.length(r))
2912 dd = 0
2914 dd = 0
2913 di = actual - len(self) * s - databytes
2915 di = actual - len(self) * s - databytes
2914 except IOError as inst:
2916 except IOError as inst:
2915 if inst.errno != errno.ENOENT:
2917 if inst.errno != errno.ENOENT:
2916 raise
2918 raise
2917 di = 0
2919 di = 0
2918
2920
2919 return (dd, di)
2921 return (dd, di)
2920
2922
2921 def files(self):
2923 def files(self):
2922 res = [self._indexfile]
2924 res = [self._indexfile]
2923 if not self._inline:
2925 if not self._inline:
2924 res.append(self._datafile)
2926 res.append(self._datafile)
2925 return res
2927 return res
2926
2928
2927 def emitrevisions(
2929 def emitrevisions(
2928 self,
2930 self,
2929 nodes,
2931 nodes,
2930 nodesorder=None,
2932 nodesorder=None,
2931 revisiondata=False,
2933 revisiondata=False,
2932 assumehaveparentrevisions=False,
2934 assumehaveparentrevisions=False,
2933 deltamode=repository.CG_DELTAMODE_STD,
2935 deltamode=repository.CG_DELTAMODE_STD,
2934 sidedata_helpers=None,
2936 sidedata_helpers=None,
2935 ):
2937 ):
2936 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2938 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2937 raise error.ProgrammingError(
2939 raise error.ProgrammingError(
2938 b'unhandled value for nodesorder: %s' % nodesorder
2940 b'unhandled value for nodesorder: %s' % nodesorder
2939 )
2941 )
2940
2942
2941 if nodesorder is None and not self._generaldelta:
2943 if nodesorder is None and not self._generaldelta:
2942 nodesorder = b'storage'
2944 nodesorder = b'storage'
2943
2945
2944 if (
2946 if (
2945 not self._storedeltachains
2947 not self._storedeltachains
2946 and deltamode != repository.CG_DELTAMODE_PREV
2948 and deltamode != repository.CG_DELTAMODE_PREV
2947 ):
2949 ):
2948 deltamode = repository.CG_DELTAMODE_FULL
2950 deltamode = repository.CG_DELTAMODE_FULL
2949
2951
2950 return storageutil.emitrevisions(
2952 return storageutil.emitrevisions(
2951 self,
2953 self,
2952 nodes,
2954 nodes,
2953 nodesorder,
2955 nodesorder,
2954 revlogrevisiondelta,
2956 revlogrevisiondelta,
2955 deltaparentfn=self.deltaparent,
2957 deltaparentfn=self.deltaparent,
2956 candeltafn=self.candelta,
2958 candeltafn=self.candelta,
2957 rawsizefn=self.rawsize,
2959 rawsizefn=self.rawsize,
2958 revdifffn=self.revdiff,
2960 revdifffn=self.revdiff,
2959 flagsfn=self.flags,
2961 flagsfn=self.flags,
2960 deltamode=deltamode,
2962 deltamode=deltamode,
2961 revisiondata=revisiondata,
2963 revisiondata=revisiondata,
2962 assumehaveparentrevisions=assumehaveparentrevisions,
2964 assumehaveparentrevisions=assumehaveparentrevisions,
2963 sidedata_helpers=sidedata_helpers,
2965 sidedata_helpers=sidedata_helpers,
2964 )
2966 )
2965
2967
2966 DELTAREUSEALWAYS = b'always'
2968 DELTAREUSEALWAYS = b'always'
2967 DELTAREUSESAMEREVS = b'samerevs'
2969 DELTAREUSESAMEREVS = b'samerevs'
2968 DELTAREUSENEVER = b'never'
2970 DELTAREUSENEVER = b'never'
2969
2971
2970 DELTAREUSEFULLADD = b'fulladd'
2972 DELTAREUSEFULLADD = b'fulladd'
2971
2973
2972 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2974 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2973
2975
2974 def clone(
2976 def clone(
2975 self,
2977 self,
2976 tr,
2978 tr,
2977 destrevlog,
2979 destrevlog,
2978 addrevisioncb=None,
2980 addrevisioncb=None,
2979 deltareuse=DELTAREUSESAMEREVS,
2981 deltareuse=DELTAREUSESAMEREVS,
2980 forcedeltabothparents=None,
2982 forcedeltabothparents=None,
2981 sidedata_helpers=None,
2983 sidedata_helpers=None,
2982 ):
2984 ):
2983 """Copy this revlog to another, possibly with format changes.
2985 """Copy this revlog to another, possibly with format changes.
2984
2986
2985 The destination revlog will contain the same revisions and nodes.
2987 The destination revlog will contain the same revisions and nodes.
2986 However, it may not be bit-for-bit identical due to e.g. delta encoding
2988 However, it may not be bit-for-bit identical due to e.g. delta encoding
2987 differences.
2989 differences.
2988
2990
2989 The ``deltareuse`` argument control how deltas from the existing revlog
2991 The ``deltareuse`` argument control how deltas from the existing revlog
2990 are preserved in the destination revlog. The argument can have the
2992 are preserved in the destination revlog. The argument can have the
2991 following values:
2993 following values:
2992
2994
2993 DELTAREUSEALWAYS
2995 DELTAREUSEALWAYS
2994 Deltas will always be reused (if possible), even if the destination
2996 Deltas will always be reused (if possible), even if the destination
2995 revlog would not select the same revisions for the delta. This is the
2997 revlog would not select the same revisions for the delta. This is the
2996 fastest mode of operation.
2998 fastest mode of operation.
2997 DELTAREUSESAMEREVS
2999 DELTAREUSESAMEREVS
2998 Deltas will be reused if the destination revlog would pick the same
3000 Deltas will be reused if the destination revlog would pick the same
2999 revisions for the delta. This mode strikes a balance between speed
3001 revisions for the delta. This mode strikes a balance between speed
3000 and optimization.
3002 and optimization.
3001 DELTAREUSENEVER
3003 DELTAREUSENEVER
3002 Deltas will never be reused. This is the slowest mode of execution.
3004 Deltas will never be reused. This is the slowest mode of execution.
3003 This mode can be used to recompute deltas (e.g. if the diff/delta
3005 This mode can be used to recompute deltas (e.g. if the diff/delta
3004 algorithm changes).
3006 algorithm changes).
3005 DELTAREUSEFULLADD
3007 DELTAREUSEFULLADD
3006 Revision will be re-added as if their were new content. This is
3008 Revision will be re-added as if their were new content. This is
3007 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3009 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3008 eg: large file detection and handling.
3010 eg: large file detection and handling.
3009
3011
3010 Delta computation can be slow, so the choice of delta reuse policy can
3012 Delta computation can be slow, so the choice of delta reuse policy can
3011 significantly affect run time.
3013 significantly affect run time.
3012
3014
3013 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3015 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3014 two extremes. Deltas will be reused if they are appropriate. But if the
3016 two extremes. Deltas will be reused if they are appropriate. But if the
3015 delta could choose a better revision, it will do so. This means if you
3017 delta could choose a better revision, it will do so. This means if you
3016 are converting a non-generaldelta revlog to a generaldelta revlog,
3018 are converting a non-generaldelta revlog to a generaldelta revlog,
3017 deltas will be recomputed if the delta's parent isn't a parent of the
3019 deltas will be recomputed if the delta's parent isn't a parent of the
3018 revision.
3020 revision.
3019
3021
3020 In addition to the delta policy, the ``forcedeltabothparents``
3022 In addition to the delta policy, the ``forcedeltabothparents``
3021 argument controls whether to force compute deltas against both parents
3023 argument controls whether to force compute deltas against both parents
3022 for merges. By default, the current default is used.
3024 for merges. By default, the current default is used.
3023
3025
3024 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3026 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3025 `sidedata_helpers`.
3027 `sidedata_helpers`.
3026 """
3028 """
3027 if deltareuse not in self.DELTAREUSEALL:
3029 if deltareuse not in self.DELTAREUSEALL:
3028 raise ValueError(
3030 raise ValueError(
3029 _(b'value for deltareuse invalid: %s') % deltareuse
3031 _(b'value for deltareuse invalid: %s') % deltareuse
3030 )
3032 )
3031
3033
3032 if len(destrevlog):
3034 if len(destrevlog):
3033 raise ValueError(_(b'destination revlog is not empty'))
3035 raise ValueError(_(b'destination revlog is not empty'))
3034
3036
3035 if getattr(self, 'filteredrevs', None):
3037 if getattr(self, 'filteredrevs', None):
3036 raise ValueError(_(b'source revlog has filtered revisions'))
3038 raise ValueError(_(b'source revlog has filtered revisions'))
3037 if getattr(destrevlog, 'filteredrevs', None):
3039 if getattr(destrevlog, 'filteredrevs', None):
3038 raise ValueError(_(b'destination revlog has filtered revisions'))
3040 raise ValueError(_(b'destination revlog has filtered revisions'))
3039
3041
3040 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3042 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3041 # if possible.
3043 # if possible.
3042 oldlazydelta = destrevlog._lazydelta
3044 oldlazydelta = destrevlog._lazydelta
3043 oldlazydeltabase = destrevlog._lazydeltabase
3045 oldlazydeltabase = destrevlog._lazydeltabase
3044 oldamd = destrevlog._deltabothparents
3046 oldamd = destrevlog._deltabothparents
3045
3047
3046 try:
3048 try:
3047 if deltareuse == self.DELTAREUSEALWAYS:
3049 if deltareuse == self.DELTAREUSEALWAYS:
3048 destrevlog._lazydeltabase = True
3050 destrevlog._lazydeltabase = True
3049 destrevlog._lazydelta = True
3051 destrevlog._lazydelta = True
3050 elif deltareuse == self.DELTAREUSESAMEREVS:
3052 elif deltareuse == self.DELTAREUSESAMEREVS:
3051 destrevlog._lazydeltabase = False
3053 destrevlog._lazydeltabase = False
3052 destrevlog._lazydelta = True
3054 destrevlog._lazydelta = True
3053 elif deltareuse == self.DELTAREUSENEVER:
3055 elif deltareuse == self.DELTAREUSENEVER:
3054 destrevlog._lazydeltabase = False
3056 destrevlog._lazydeltabase = False
3055 destrevlog._lazydelta = False
3057 destrevlog._lazydelta = False
3056
3058
3057 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3059 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3058
3060
3059 self._clone(
3061 self._clone(
3060 tr,
3062 tr,
3061 destrevlog,
3063 destrevlog,
3062 addrevisioncb,
3064 addrevisioncb,
3063 deltareuse,
3065 deltareuse,
3064 forcedeltabothparents,
3066 forcedeltabothparents,
3065 sidedata_helpers,
3067 sidedata_helpers,
3066 )
3068 )
3067
3069
3068 finally:
3070 finally:
3069 destrevlog._lazydelta = oldlazydelta
3071 destrevlog._lazydelta = oldlazydelta
3070 destrevlog._lazydeltabase = oldlazydeltabase
3072 destrevlog._lazydeltabase = oldlazydeltabase
3071 destrevlog._deltabothparents = oldamd
3073 destrevlog._deltabothparents = oldamd
3072
3074
3073 def _clone(
3075 def _clone(
3074 self,
3076 self,
3075 tr,
3077 tr,
3076 destrevlog,
3078 destrevlog,
3077 addrevisioncb,
3079 addrevisioncb,
3078 deltareuse,
3080 deltareuse,
3079 forcedeltabothparents,
3081 forcedeltabothparents,
3080 sidedata_helpers,
3082 sidedata_helpers,
3081 ):
3083 ):
3082 """perform the core duty of `revlog.clone` after parameter processing"""
3084 """perform the core duty of `revlog.clone` after parameter processing"""
3083 deltacomputer = deltautil.deltacomputer(destrevlog)
3085 deltacomputer = deltautil.deltacomputer(destrevlog)
3084 index = self.index
3086 index = self.index
3085 for rev in self:
3087 for rev in self:
3086 entry = index[rev]
3088 entry = index[rev]
3087
3089
3088 # Some classes override linkrev to take filtered revs into
3090 # Some classes override linkrev to take filtered revs into
3089 # account. Use raw entry from index.
3091 # account. Use raw entry from index.
3090 flags = entry[0] & 0xFFFF
3092 flags = entry[0] & 0xFFFF
3091 linkrev = entry[4]
3093 linkrev = entry[4]
3092 p1 = index[entry[5]][7]
3094 p1 = index[entry[5]][7]
3093 p2 = index[entry[6]][7]
3095 p2 = index[entry[6]][7]
3094 node = entry[7]
3096 node = entry[7]
3095
3097
3096 # (Possibly) reuse the delta from the revlog if allowed and
3098 # (Possibly) reuse the delta from the revlog if allowed and
3097 # the revlog chunk is a delta.
3099 # the revlog chunk is a delta.
3098 cachedelta = None
3100 cachedelta = None
3099 rawtext = None
3101 rawtext = None
3100 if deltareuse == self.DELTAREUSEFULLADD:
3102 if deltareuse == self.DELTAREUSEFULLADD:
3101 text, sidedata = self._revisiondata(rev)
3103 text, sidedata = self._revisiondata(rev)
3102
3104
3103 if sidedata_helpers is not None:
3105 if sidedata_helpers is not None:
3104 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3106 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3105 self, sidedata_helpers, sidedata, rev
3107 self, sidedata_helpers, sidedata, rev
3106 )
3108 )
3107 flags = flags | new_flags[0] & ~new_flags[1]
3109 flags = flags | new_flags[0] & ~new_flags[1]
3108
3110
3109 destrevlog.addrevision(
3111 destrevlog.addrevision(
3110 text,
3112 text,
3111 tr,
3113 tr,
3112 linkrev,
3114 linkrev,
3113 p1,
3115 p1,
3114 p2,
3116 p2,
3115 cachedelta=cachedelta,
3117 cachedelta=cachedelta,
3116 node=node,
3118 node=node,
3117 flags=flags,
3119 flags=flags,
3118 deltacomputer=deltacomputer,
3120 deltacomputer=deltacomputer,
3119 sidedata=sidedata,
3121 sidedata=sidedata,
3120 )
3122 )
3121 else:
3123 else:
3122 if destrevlog._lazydelta:
3124 if destrevlog._lazydelta:
3123 dp = self.deltaparent(rev)
3125 dp = self.deltaparent(rev)
3124 if dp != nullrev:
3126 if dp != nullrev:
3125 cachedelta = (dp, bytes(self._chunk(rev)))
3127 cachedelta = (dp, bytes(self._chunk(rev)))
3126
3128
3127 sidedata = None
3129 sidedata = None
3128 if not cachedelta:
3130 if not cachedelta:
3129 rawtext, sidedata = self._revisiondata(rev)
3131 rawtext, sidedata = self._revisiondata(rev)
3130 if sidedata is None:
3132 if sidedata is None:
3131 sidedata = self.sidedata(rev)
3133 sidedata = self.sidedata(rev)
3132
3134
3133 if sidedata_helpers is not None:
3135 if sidedata_helpers is not None:
3134 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3136 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3135 self, sidedata_helpers, sidedata, rev
3137 self, sidedata_helpers, sidedata, rev
3136 )
3138 )
3137 flags = flags | new_flags[0] & ~new_flags[1]
3139 flags = flags | new_flags[0] & ~new_flags[1]
3138
3140
3139 with destrevlog._writing(tr):
3141 with destrevlog._writing(tr):
3140 destrevlog._addrevision(
3142 destrevlog._addrevision(
3141 node,
3143 node,
3142 rawtext,
3144 rawtext,
3143 tr,
3145 tr,
3144 linkrev,
3146 linkrev,
3145 p1,
3147 p1,
3146 p2,
3148 p2,
3147 flags,
3149 flags,
3148 cachedelta,
3150 cachedelta,
3149 deltacomputer=deltacomputer,
3151 deltacomputer=deltacomputer,
3150 sidedata=sidedata,
3152 sidedata=sidedata,
3151 )
3153 )
3152
3154
3153 if addrevisioncb:
3155 if addrevisioncb:
3154 addrevisioncb(self, rev, node)
3156 addrevisioncb(self, rev, node)
3155
3157
3156 def censorrevision(self, tr, censornode, tombstone=b''):
3158 def censorrevision(self, tr, censornode, tombstone=b''):
3157 if self._format_version == REVLOGV0:
3159 if self._format_version == REVLOGV0:
3158 raise error.RevlogError(
3160 raise error.RevlogError(
3159 _(b'cannot censor with version %d revlogs')
3161 _(b'cannot censor with version %d revlogs')
3160 % self._format_version
3162 % self._format_version
3161 )
3163 )
3162
3164
3163 censorrev = self.rev(censornode)
3165 censorrev = self.rev(censornode)
3164 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3166 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3165
3167
3166 if len(tombstone) > self.rawsize(censorrev):
3168 if len(tombstone) > self.rawsize(censorrev):
3167 raise error.Abort(
3169 raise error.Abort(
3168 _(b'censor tombstone must be no longer than censored data')
3170 _(b'censor tombstone must be no longer than censored data')
3169 )
3171 )
3170
3172
3171 # Rewriting the revlog in place is hard. Our strategy for censoring is
3173 # Rewriting the revlog in place is hard. Our strategy for censoring is
3172 # to create a new revlog, copy all revisions to it, then replace the
3174 # to create a new revlog, copy all revisions to it, then replace the
3173 # revlogs on transaction close.
3175 # revlogs on transaction close.
3174 #
3176 #
3175 # This is a bit dangerous. We could easily have a mismatch of state.
3177 # This is a bit dangerous. We could easily have a mismatch of state.
3176 newrl = revlog(
3178 newrl = revlog(
3177 self.opener,
3179 self.opener,
3178 target=self.target,
3180 target=self.target,
3179 radix=self.radix,
3181 radix=self.radix,
3180 postfix=b'tmpcensored',
3182 postfix=b'tmpcensored',
3181 censorable=True,
3183 censorable=True,
3182 )
3184 )
3183 newrl._format_version = self._format_version
3185 newrl._format_version = self._format_version
3184 newrl._format_flags = self._format_flags
3186 newrl._format_flags = self._format_flags
3185 newrl._generaldelta = self._generaldelta
3187 newrl._generaldelta = self._generaldelta
3186 newrl._parse_index = self._parse_index
3188 newrl._parse_index = self._parse_index
3187
3189
3188 for rev in self.revs():
3190 for rev in self.revs():
3189 node = self.node(rev)
3191 node = self.node(rev)
3190 p1, p2 = self.parents(node)
3192 p1, p2 = self.parents(node)
3191
3193
3192 if rev == censorrev:
3194 if rev == censorrev:
3193 newrl.addrawrevision(
3195 newrl.addrawrevision(
3194 tombstone,
3196 tombstone,
3195 tr,
3197 tr,
3196 self.linkrev(censorrev),
3198 self.linkrev(censorrev),
3197 p1,
3199 p1,
3198 p2,
3200 p2,
3199 censornode,
3201 censornode,
3200 REVIDX_ISCENSORED,
3202 REVIDX_ISCENSORED,
3201 )
3203 )
3202
3204
3203 if newrl.deltaparent(rev) != nullrev:
3205 if newrl.deltaparent(rev) != nullrev:
3204 raise error.Abort(
3206 raise error.Abort(
3205 _(
3207 _(
3206 b'censored revision stored as delta; '
3208 b'censored revision stored as delta; '
3207 b'cannot censor'
3209 b'cannot censor'
3208 ),
3210 ),
3209 hint=_(
3211 hint=_(
3210 b'censoring of revlogs is not '
3212 b'censoring of revlogs is not '
3211 b'fully implemented; please report '
3213 b'fully implemented; please report '
3212 b'this bug'
3214 b'this bug'
3213 ),
3215 ),
3214 )
3216 )
3215 continue
3217 continue
3216
3218
3217 if self.iscensored(rev):
3219 if self.iscensored(rev):
3218 if self.deltaparent(rev) != nullrev:
3220 if self.deltaparent(rev) != nullrev:
3219 raise error.Abort(
3221 raise error.Abort(
3220 _(
3222 _(
3221 b'cannot censor due to censored '
3223 b'cannot censor due to censored '
3222 b'revision having delta stored'
3224 b'revision having delta stored'
3223 )
3225 )
3224 )
3226 )
3225 rawtext = self._chunk(rev)
3227 rawtext = self._chunk(rev)
3226 else:
3228 else:
3227 rawtext = self.rawdata(rev)
3229 rawtext = self.rawdata(rev)
3228
3230
3229 newrl.addrawrevision(
3231 newrl.addrawrevision(
3230 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3232 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3231 )
3233 )
3232
3234
3233 tr.addbackup(self._indexfile, location=b'store')
3235 tr.addbackup(self._indexfile, location=b'store')
3234 if not self._inline:
3236 if not self._inline:
3235 tr.addbackup(self._datafile, location=b'store')
3237 tr.addbackup(self._datafile, location=b'store')
3236
3238
3237 self.opener.rename(newrl._indexfile, self._indexfile)
3239 self.opener.rename(newrl._indexfile, self._indexfile)
3238 if not self._inline:
3240 if not self._inline:
3239 self.opener.rename(newrl._datafile, self._datafile)
3241 self.opener.rename(newrl._datafile, self._datafile)
3240
3242
3241 self.clearcaches()
3243 self.clearcaches()
3242 self._loadindex()
3244 self._loadindex()
3243
3245
3244 def verifyintegrity(self, state):
3246 def verifyintegrity(self, state):
3245 """Verifies the integrity of the revlog.
3247 """Verifies the integrity of the revlog.
3246
3248
3247 Yields ``revlogproblem`` instances describing problems that are
3249 Yields ``revlogproblem`` instances describing problems that are
3248 found.
3250 found.
3249 """
3251 """
3250 dd, di = self.checksize()
3252 dd, di = self.checksize()
3251 if dd:
3253 if dd:
3252 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3254 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3253 if di:
3255 if di:
3254 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3256 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3255
3257
3256 version = self._format_version
3258 version = self._format_version
3257
3259
3258 # The verifier tells us what version revlog we should be.
3260 # The verifier tells us what version revlog we should be.
3259 if version != state[b'expectedversion']:
3261 if version != state[b'expectedversion']:
3260 yield revlogproblem(
3262 yield revlogproblem(
3261 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3263 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3262 % (self.display_id, version, state[b'expectedversion'])
3264 % (self.display_id, version, state[b'expectedversion'])
3263 )
3265 )
3264
3266
3265 state[b'skipread'] = set()
3267 state[b'skipread'] = set()
3266 state[b'safe_renamed'] = set()
3268 state[b'safe_renamed'] = set()
3267
3269
3268 for rev in self:
3270 for rev in self:
3269 node = self.node(rev)
3271 node = self.node(rev)
3270
3272
3271 # Verify contents. 4 cases to care about:
3273 # Verify contents. 4 cases to care about:
3272 #
3274 #
3273 # common: the most common case
3275 # common: the most common case
3274 # rename: with a rename
3276 # rename: with a rename
3275 # meta: file content starts with b'\1\n', the metadata
3277 # meta: file content starts with b'\1\n', the metadata
3276 # header defined in filelog.py, but without a rename
3278 # header defined in filelog.py, but without a rename
3277 # ext: content stored externally
3279 # ext: content stored externally
3278 #
3280 #
3279 # More formally, their differences are shown below:
3281 # More formally, their differences are shown below:
3280 #
3282 #
3281 # | common | rename | meta | ext
3283 # | common | rename | meta | ext
3282 # -------------------------------------------------------
3284 # -------------------------------------------------------
3283 # flags() | 0 | 0 | 0 | not 0
3285 # flags() | 0 | 0 | 0 | not 0
3284 # renamed() | False | True | False | ?
3286 # renamed() | False | True | False | ?
3285 # rawtext[0:2]=='\1\n'| False | True | True | ?
3287 # rawtext[0:2]=='\1\n'| False | True | True | ?
3286 #
3288 #
3287 # "rawtext" means the raw text stored in revlog data, which
3289 # "rawtext" means the raw text stored in revlog data, which
3288 # could be retrieved by "rawdata(rev)". "text"
3290 # could be retrieved by "rawdata(rev)". "text"
3289 # mentioned below is "revision(rev)".
3291 # mentioned below is "revision(rev)".
3290 #
3292 #
3291 # There are 3 different lengths stored physically:
3293 # There are 3 different lengths stored physically:
3292 # 1. L1: rawsize, stored in revlog index
3294 # 1. L1: rawsize, stored in revlog index
3293 # 2. L2: len(rawtext), stored in revlog data
3295 # 2. L2: len(rawtext), stored in revlog data
3294 # 3. L3: len(text), stored in revlog data if flags==0, or
3296 # 3. L3: len(text), stored in revlog data if flags==0, or
3295 # possibly somewhere else if flags!=0
3297 # possibly somewhere else if flags!=0
3296 #
3298 #
3297 # L1 should be equal to L2. L3 could be different from them.
3299 # L1 should be equal to L2. L3 could be different from them.
3298 # "text" may or may not affect commit hash depending on flag
3300 # "text" may or may not affect commit hash depending on flag
3299 # processors (see flagutil.addflagprocessor).
3301 # processors (see flagutil.addflagprocessor).
3300 #
3302 #
3301 # | common | rename | meta | ext
3303 # | common | rename | meta | ext
3302 # -------------------------------------------------
3304 # -------------------------------------------------
3303 # rawsize() | L1 | L1 | L1 | L1
3305 # rawsize() | L1 | L1 | L1 | L1
3304 # size() | L1 | L2-LM | L1(*) | L1 (?)
3306 # size() | L1 | L2-LM | L1(*) | L1 (?)
3305 # len(rawtext) | L2 | L2 | L2 | L2
3307 # len(rawtext) | L2 | L2 | L2 | L2
3306 # len(text) | L2 | L2 | L2 | L3
3308 # len(text) | L2 | L2 | L2 | L3
3307 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3309 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3308 #
3310 #
3309 # LM: length of metadata, depending on rawtext
3311 # LM: length of metadata, depending on rawtext
3310 # (*): not ideal, see comment in filelog.size
3312 # (*): not ideal, see comment in filelog.size
3311 # (?): could be "- len(meta)" if the resolved content has
3313 # (?): could be "- len(meta)" if the resolved content has
3312 # rename metadata
3314 # rename metadata
3313 #
3315 #
3314 # Checks needed to be done:
3316 # Checks needed to be done:
3315 # 1. length check: L1 == L2, in all cases.
3317 # 1. length check: L1 == L2, in all cases.
3316 # 2. hash check: depending on flag processor, we may need to
3318 # 2. hash check: depending on flag processor, we may need to
3317 # use either "text" (external), or "rawtext" (in revlog).
3319 # use either "text" (external), or "rawtext" (in revlog).
3318
3320
3319 try:
3321 try:
3320 skipflags = state.get(b'skipflags', 0)
3322 skipflags = state.get(b'skipflags', 0)
3321 if skipflags:
3323 if skipflags:
3322 skipflags &= self.flags(rev)
3324 skipflags &= self.flags(rev)
3323
3325
3324 _verify_revision(self, skipflags, state, node)
3326 _verify_revision(self, skipflags, state, node)
3325
3327
3326 l1 = self.rawsize(rev)
3328 l1 = self.rawsize(rev)
3327 l2 = len(self.rawdata(node))
3329 l2 = len(self.rawdata(node))
3328
3330
3329 if l1 != l2:
3331 if l1 != l2:
3330 yield revlogproblem(
3332 yield revlogproblem(
3331 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3333 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3332 node=node,
3334 node=node,
3333 )
3335 )
3334
3336
3335 except error.CensoredNodeError:
3337 except error.CensoredNodeError:
3336 if state[b'erroroncensored']:
3338 if state[b'erroroncensored']:
3337 yield revlogproblem(
3339 yield revlogproblem(
3338 error=_(b'censored file data'), node=node
3340 error=_(b'censored file data'), node=node
3339 )
3341 )
3340 state[b'skipread'].add(node)
3342 state[b'skipread'].add(node)
3341 except Exception as e:
3343 except Exception as e:
3342 yield revlogproblem(
3344 yield revlogproblem(
3343 error=_(b'unpacking %s: %s')
3345 error=_(b'unpacking %s: %s')
3344 % (short(node), stringutil.forcebytestr(e)),
3346 % (short(node), stringutil.forcebytestr(e)),
3345 node=node,
3347 node=node,
3346 )
3348 )
3347 state[b'skipread'].add(node)
3349 state[b'skipread'].add(node)
3348
3350
3349 def storageinfo(
3351 def storageinfo(
3350 self,
3352 self,
3351 exclusivefiles=False,
3353 exclusivefiles=False,
3352 sharedfiles=False,
3354 sharedfiles=False,
3353 revisionscount=False,
3355 revisionscount=False,
3354 trackedsize=False,
3356 trackedsize=False,
3355 storedsize=False,
3357 storedsize=False,
3356 ):
3358 ):
3357 d = {}
3359 d = {}
3358
3360
3359 if exclusivefiles:
3361 if exclusivefiles:
3360 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3362 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3361 if not self._inline:
3363 if not self._inline:
3362 d[b'exclusivefiles'].append((self.opener, self._datafile))
3364 d[b'exclusivefiles'].append((self.opener, self._datafile))
3363
3365
3364 if sharedfiles:
3366 if sharedfiles:
3365 d[b'sharedfiles'] = []
3367 d[b'sharedfiles'] = []
3366
3368
3367 if revisionscount:
3369 if revisionscount:
3368 d[b'revisionscount'] = len(self)
3370 d[b'revisionscount'] = len(self)
3369
3371
3370 if trackedsize:
3372 if trackedsize:
3371 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3373 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3372
3374
3373 if storedsize:
3375 if storedsize:
3374 d[b'storedsize'] = sum(
3376 d[b'storedsize'] = sum(
3375 self.opener.stat(path).st_size for path in self.files()
3377 self.opener.stat(path).st_size for path in self.files()
3376 )
3378 )
3377
3379
3378 return d
3380 return d
3379
3381
3380 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3382 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3381 if not self.hassidedata:
3383 if not self.hassidedata:
3382 return
3384 return
3383 # revlog formats with sidedata support does not support inline
3385 # revlog formats with sidedata support does not support inline
3384 assert not self._inline
3386 assert not self._inline
3385 if not helpers[1] and not helpers[2]:
3387 if not helpers[1] and not helpers[2]:
3386 # Nothing to generate or remove
3388 # Nothing to generate or remove
3387 return
3389 return
3388
3390
3389 new_entries = []
3391 new_entries = []
3390 # append the new sidedata
3392 # append the new sidedata
3391 with self._writing(transaction):
3393 with self._writing(transaction):
3392 ifh, dfh = self._writinghandles
3394 ifh, dfh = self._writinghandles
3393 if self._docket is not None:
3395 if self._docket is not None:
3394 dfh.seek(self._docket.data_end, os.SEEK_SET)
3396 dfh.seek(self._docket.data_end, os.SEEK_SET)
3395 else:
3397 else:
3396 dfh.seek(0, os.SEEK_END)
3398 dfh.seek(0, os.SEEK_END)
3397
3399
3398 current_offset = dfh.tell()
3400 current_offset = dfh.tell()
3399 for rev in range(startrev, endrev + 1):
3401 for rev in range(startrev, endrev + 1):
3400 entry = self.index[rev]
3402 entry = self.index[rev]
3401 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3403 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3402 store=self,
3404 store=self,
3403 sidedata_helpers=helpers,
3405 sidedata_helpers=helpers,
3404 sidedata={},
3406 sidedata={},
3405 rev=rev,
3407 rev=rev,
3406 )
3408 )
3407
3409
3408 serialized_sidedata = sidedatautil.serialize_sidedata(
3410 serialized_sidedata = sidedatautil.serialize_sidedata(
3409 new_sidedata
3411 new_sidedata
3410 )
3412 )
3411
3413
3412 sidedata_compression_mode = COMP_MODE_INLINE
3414 sidedata_compression_mode = COMP_MODE_INLINE
3413 if serialized_sidedata and self.hassidedata:
3415 if serialized_sidedata and self.hassidedata:
3414 sidedata_compression_mode = COMP_MODE_PLAIN
3416 sidedata_compression_mode = COMP_MODE_PLAIN
3415 h, comp_sidedata = self.compress(serialized_sidedata)
3417 h, comp_sidedata = self.compress(serialized_sidedata)
3416 if (
3418 if (
3417 h != b'u'
3419 h != b'u'
3418 and comp_sidedata[0] != b'\0'
3420 and comp_sidedata[0] != b'\0'
3419 and len(comp_sidedata) < len(serialized_sidedata)
3421 and len(comp_sidedata) < len(serialized_sidedata)
3420 ):
3422 ):
3421 assert not h
3423 assert not h
3422 if (
3424 if (
3423 comp_sidedata[0]
3425 comp_sidedata[0]
3424 == self._docket.default_compression_header
3426 == self._docket.default_compression_header
3425 ):
3427 ):
3426 sidedata_compression_mode = COMP_MODE_DEFAULT
3428 sidedata_compression_mode = COMP_MODE_DEFAULT
3427 serialized_sidedata = comp_sidedata
3429 serialized_sidedata = comp_sidedata
3428 else:
3430 else:
3429 sidedata_compression_mode = COMP_MODE_INLINE
3431 sidedata_compression_mode = COMP_MODE_INLINE
3430 serialized_sidedata = comp_sidedata
3432 serialized_sidedata = comp_sidedata
3431 if entry[8] != 0 or entry[9] != 0:
3433 if entry[8] != 0 or entry[9] != 0:
3432 # rewriting entries that already have sidedata is not
3434 # rewriting entries that already have sidedata is not
3433 # supported yet, because it introduces garbage data in the
3435 # supported yet, because it introduces garbage data in the
3434 # revlog.
3436 # revlog.
3435 msg = b"rewriting existing sidedata is not supported yet"
3437 msg = b"rewriting existing sidedata is not supported yet"
3436 raise error.Abort(msg)
3438 raise error.Abort(msg)
3437
3439
3438 # Apply (potential) flags to add and to remove after running
3440 # Apply (potential) flags to add and to remove after running
3439 # the sidedata helpers
3441 # the sidedata helpers
3440 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3442 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3441 entry_update = (
3443 entry_update = (
3442 current_offset,
3444 current_offset,
3443 len(serialized_sidedata),
3445 len(serialized_sidedata),
3444 new_offset_flags,
3446 new_offset_flags,
3445 sidedata_compression_mode,
3447 sidedata_compression_mode,
3446 )
3448 )
3447
3449
3448 # the sidedata computation might have move the file cursors around
3450 # the sidedata computation might have move the file cursors around
3449 dfh.seek(current_offset, os.SEEK_SET)
3451 dfh.seek(current_offset, os.SEEK_SET)
3450 dfh.write(serialized_sidedata)
3452 dfh.write(serialized_sidedata)
3451 new_entries.append(entry_update)
3453 new_entries.append(entry_update)
3452 current_offset += len(serialized_sidedata)
3454 current_offset += len(serialized_sidedata)
3453 if self._docket is not None:
3455 if self._docket is not None:
3454 self._docket.data_end = dfh.tell()
3456 self._docket.data_end = dfh.tell()
3455
3457
3456 # rewrite the new index entries
3458 # rewrite the new index entries
3457 ifh.seek(startrev * self.index.entry_size)
3459 ifh.seek(startrev * self.index.entry_size)
3458 for i, e in enumerate(new_entries):
3460 for i, e in enumerate(new_entries):
3459 rev = startrev + i
3461 rev = startrev + i
3460 self.index.replace_sidedata_info(rev, *e)
3462 self.index.replace_sidedata_info(rev, *e)
3461 packed = self.index.entry_binary(rev)
3463 packed = self.index.entry_binary(rev)
3462 if rev == 0 and self._docket is None:
3464 if rev == 0 and self._docket is None:
3463 header = self._format_flags | self._format_version
3465 header = self._format_flags | self._format_version
3464 header = self.index.pack_header(header)
3466 header = self.index.pack_header(header)
3465 packed = header + packed
3467 packed = header + packed
3466 ifh.write(packed)
3468 ifh.write(packed)
@@ -1,249 +1,265 b''
1 # docket - code related to revlog "docket"
1 # docket - code related to revlog "docket"
2 #
2 #
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 ### Revlog docket file
8 ### Revlog docket file
9 #
9 #
10 # The revlog is stored on disk using multiple files:
10 # The revlog is stored on disk using multiple files:
11 #
11 #
12 # * a small docket file, containing metadata and a pointer,
12 # * a small docket file, containing metadata and a pointer,
13 #
13 #
14 # * an index file, containing fixed width information about revisions,
14 # * an index file, containing fixed width information about revisions,
15 #
15 #
16 # * a data file, containing variable width data for these revisions,
16 # * a data file, containing variable width data for these revisions,
17
17
18 from __future__ import absolute_import
18 from __future__ import absolute_import
19
19
20 import errno
20 import errno
21 import os
21 import os
22 import random
22 import random
23 import struct
23 import struct
24
24
25 from .. import (
25 from .. import (
26 encoding,
26 encoding,
27 error,
27 error,
28 node,
28 node,
29 pycompat,
29 pycompat,
30 util,
30 util,
31 )
31 )
32
32
33 from . import (
33 from . import (
34 constants,
34 constants,
35 )
35 )
36
36
37
37
38 def make_uid(id_size=8):
38 def make_uid(id_size=8):
39 """return a new unique identifier.
39 """return a new unique identifier.
40
40
41 The identifier is random and composed of ascii characters."""
41 The identifier is random and composed of ascii characters."""
42 # size we "hex" the result we need half the number of bits to have a final
42 # size we "hex" the result we need half the number of bits to have a final
43 # uuid of size ID_SIZE
43 # uuid of size ID_SIZE
44 return node.hex(os.urandom(id_size // 2))
44 return node.hex(os.urandom(id_size // 2))
45
45
46
46
47 # some special test logic to avoid anoying random output in the test
47 # some special test logic to avoid anoying random output in the test
48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
49
49
50 if stable_docket_file:
50 if stable_docket_file:
51
51
52 def make_uid(id_size=8):
52 def make_uid(id_size=8):
53 try:
53 try:
54 with open(stable_docket_file, mode='rb') as f:
54 with open(stable_docket_file, mode='rb') as f:
55 seed = f.read().strip()
55 seed = f.read().strip()
56 except IOError as inst:
56 except IOError as inst:
57 if inst.errno != errno.ENOENT:
57 if inst.errno != errno.ENOENT:
58 raise
58 raise
59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
60 if pycompat.ispy3:
60 if pycompat.ispy3:
61 iter_seed = iter(seed)
61 iter_seed = iter(seed)
62 else:
62 else:
63 iter_seed = (ord(c) for c in seed)
63 iter_seed = (ord(c) for c in seed)
64 # some basic circular sum hashing on 64 bits
64 # some basic circular sum hashing on 64 bits
65 int_seed = 0
65 int_seed = 0
66 low_mask = int('1' * 35, 2)
66 low_mask = int('1' * 35, 2)
67 for i in iter_seed:
67 for i in iter_seed:
68 high_part = int_seed >> 35
68 high_part = int_seed >> 35
69 low_part = (int_seed & low_mask) << 28
69 low_part = (int_seed & low_mask) << 28
70 int_seed = high_part + low_part + i
70 int_seed = high_part + low_part + i
71 r = random.Random()
71 r = random.Random()
72 if pycompat.ispy3:
72 if pycompat.ispy3:
73 r.seed(int_seed, version=1)
73 r.seed(int_seed, version=1)
74 else:
74 else:
75 r.seed(int_seed)
75 r.seed(int_seed)
76 # once we drop python 3.8 support we can simply use r.randbytes
76 # once we drop python 3.8 support we can simply use r.randbytes
77 raw = r.getrandbits(id_size * 4)
77 raw = r.getrandbits(id_size * 4)
78 assert id_size == 8
78 assert id_size == 8
79 p = struct.pack('>L', raw)
79 p = struct.pack('>L', raw)
80 new = node.hex(p)
80 new = node.hex(p)
81 with open(stable_docket_file, 'wb') as f:
81 with open(stable_docket_file, 'wb') as f:
82 f.write(new)
82 f.write(new)
83 return new
83 return new
84
84
85
85
86 # Docket format
86 # Docket format
87 #
87 #
88 # * 4 bytes: revlog version
88 # * 4 bytes: revlog version
89 # | This is mandatory as docket must be compatible with the previous
89 # | This is mandatory as docket must be compatible with the previous
90 # | revlog index header.
90 # | revlog index header.
91 # * 1 bytes: size of index uuid
91 # * 1 bytes: size of index uuid
92 # * 1 bytes: size of data uuid
92 # * 8 bytes: size of index-data
93 # * 8 bytes: size of index-data
93 # * 8 bytes: pending size of index-data
94 # * 8 bytes: pending size of index-data
94 # * 8 bytes: size of data
95 # * 8 bytes: size of data
95 # * 8 bytes: pending size of data
96 # * 8 bytes: pending size of data
96 # * 1 bytes: default compression header
97 # * 1 bytes: default compression header
97 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BLLLLc')
98 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BBLLLLc')
98
99
99
100
100 class RevlogDocket(object):
101 class RevlogDocket(object):
101 """metadata associated with revlog"""
102 """metadata associated with revlog"""
102
103
103 def __init__(
104 def __init__(
104 self,
105 self,
105 revlog,
106 revlog,
106 use_pending=False,
107 use_pending=False,
107 version_header=None,
108 version_header=None,
108 index_uuid=None,
109 index_uuid=None,
110 data_uuid=None,
109 index_end=0,
111 index_end=0,
110 pending_index_end=0,
112 pending_index_end=0,
111 data_end=0,
113 data_end=0,
112 pending_data_end=0,
114 pending_data_end=0,
113 default_compression_header=None,
115 default_compression_header=None,
114 ):
116 ):
115 self._version_header = version_header
117 self._version_header = version_header
116 self._read_only = bool(use_pending)
118 self._read_only = bool(use_pending)
117 self._dirty = False
119 self._dirty = False
118 self._radix = revlog.radix
120 self._radix = revlog.radix
119 self._path = revlog._docket_file
121 self._path = revlog._docket_file
120 self._opener = revlog.opener
122 self._opener = revlog.opener
121 self._index_uuid = index_uuid
123 self._index_uuid = index_uuid
124 self._data_uuid = data_uuid
122 # thes asserts should be True as long as we have a single index filename
125 # thes asserts should be True as long as we have a single index filename
123 assert index_end <= pending_index_end
126 assert index_end <= pending_index_end
124 assert data_end <= pending_data_end
127 assert data_end <= pending_data_end
125 self._initial_index_end = index_end
128 self._initial_index_end = index_end
126 self._pending_index_end = pending_index_end
129 self._pending_index_end = pending_index_end
127 self._initial_data_end = data_end
130 self._initial_data_end = data_end
128 self._pending_data_end = pending_data_end
131 self._pending_data_end = pending_data_end
129 if use_pending:
132 if use_pending:
130 self._index_end = self._pending_index_end
133 self._index_end = self._pending_index_end
131 self._data_end = self._pending_data_end
134 self._data_end = self._pending_data_end
132 else:
135 else:
133 self._index_end = self._initial_index_end
136 self._index_end = self._initial_index_end
134 self._data_end = self._initial_data_end
137 self._data_end = self._initial_data_end
135 self.default_compression_header = default_compression_header
138 self.default_compression_header = default_compression_header
136
139
137 def index_filepath(self):
140 def index_filepath(self):
138 """file path to the current index file associated to this docket"""
141 """file path to the current index file associated to this docket"""
139 # very simplistic version at first
142 # very simplistic version at first
140 if self._index_uuid is None:
143 if self._index_uuid is None:
141 self._index_uuid = make_uid()
144 self._index_uuid = make_uid()
142 return b"%s-%s.idx" % (self._radix, self._index_uuid)
145 return b"%s-%s.idx" % (self._radix, self._index_uuid)
143
146
147 def data_filepath(self):
148 """file path to the current index file associated to this docket"""
149 # very simplistic version at first
150 if self._data_uuid is None:
151 self._data_uuid = make_uid()
152 return b"%s-%s.dat" % (self._radix, self._data_uuid)
153
144 @property
154 @property
145 def index_end(self):
155 def index_end(self):
146 return self._index_end
156 return self._index_end
147
157
148 @index_end.setter
158 @index_end.setter
149 def index_end(self, new_size):
159 def index_end(self, new_size):
150 if new_size != self._index_end:
160 if new_size != self._index_end:
151 self._index_end = new_size
161 self._index_end = new_size
152 self._dirty = True
162 self._dirty = True
153
163
154 @property
164 @property
155 def data_end(self):
165 def data_end(self):
156 return self._data_end
166 return self._data_end
157
167
158 @data_end.setter
168 @data_end.setter
159 def data_end(self, new_size):
169 def data_end(self, new_size):
160 if new_size != self._data_end:
170 if new_size != self._data_end:
161 self._data_end = new_size
171 self._data_end = new_size
162 self._dirty = True
172 self._dirty = True
163
173
164 def write(self, transaction, pending=False, stripping=False):
174 def write(self, transaction, pending=False, stripping=False):
165 """write the modification of disk if any
175 """write the modification of disk if any
166
176
167 This make the new content visible to all process"""
177 This make the new content visible to all process"""
168 if not self._dirty:
178 if not self._dirty:
169 return False
179 return False
170 else:
180 else:
171 if self._read_only:
181 if self._read_only:
172 msg = b'writing read-only docket: %s'
182 msg = b'writing read-only docket: %s'
173 msg %= self._path
183 msg %= self._path
174 raise error.ProgrammingError(msg)
184 raise error.ProgrammingError(msg)
175 if not stripping:
185 if not stripping:
176 # XXX we could, leverage the docket while stripping. However it
186 # XXX we could, leverage the docket while stripping. However it
177 # is not powerfull enough at the time of this comment
187 # is not powerfull enough at the time of this comment
178 transaction.addbackup(self._path, location=b'store')
188 transaction.addbackup(self._path, location=b'store')
179 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
189 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
180 f.write(self._serialize(pending=pending))
190 f.write(self._serialize(pending=pending))
181 # if pending we still need to the write final data eventually
191 # if pending we still need to the write final data eventually
182 self._dirty = pending
192 self._dirty = pending
183 return True
193 return True
184
194
185 def _serialize(self, pending=False):
195 def _serialize(self, pending=False):
186 if pending:
196 if pending:
187 official_index_end = self._initial_index_end
197 official_index_end = self._initial_index_end
188 official_data_end = self._initial_data_end
198 official_data_end = self._initial_data_end
189 else:
199 else:
190 official_index_end = self._index_end
200 official_index_end = self._index_end
191 official_data_end = self._data_end
201 official_data_end = self._data_end
192
202
193 # this assert should be True as long as we have a single index filename
203 # this assert should be True as long as we have a single index filename
194 assert official_data_end <= self._data_end
204 assert official_data_end <= self._data_end
195 data = (
205 data = (
196 self._version_header,
206 self._version_header,
197 len(self._index_uuid),
207 len(self._index_uuid),
208 len(self._data_uuid),
198 official_index_end,
209 official_index_end,
199 self._index_end,
210 self._index_end,
200 official_data_end,
211 official_data_end,
201 self._data_end,
212 self._data_end,
202 self.default_compression_header,
213 self.default_compression_header,
203 )
214 )
204 s = []
215 s = []
205 s.append(S_HEADER.pack(*data))
216 s.append(S_HEADER.pack(*data))
206 s.append(self._index_uuid)
217 s.append(self._index_uuid)
218 s.append(self._data_uuid)
207 return b''.join(s)
219 return b''.join(s)
208
220
209
221
210 def default_docket(revlog, version_header):
222 def default_docket(revlog, version_header):
211 """given a revlog version a new docket object for the given revlog"""
223 """given a revlog version a new docket object for the given revlog"""
212 rl_version = version_header & 0xFFFF
224 rl_version = version_header & 0xFFFF
213 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
225 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
214 return None
226 return None
215 comp = util.compengines[revlog._compengine].revlogheader()
227 comp = util.compengines[revlog._compengine].revlogheader()
216 docket = RevlogDocket(
228 docket = RevlogDocket(
217 revlog,
229 revlog,
218 version_header=version_header,
230 version_header=version_header,
219 default_compression_header=comp,
231 default_compression_header=comp,
220 )
232 )
221 docket._dirty = True
233 docket._dirty = True
222 return docket
234 return docket
223
235
224
236
225 def parse_docket(revlog, data, use_pending=False):
237 def parse_docket(revlog, data, use_pending=False):
226 """given some docket data return a docket object for the given revlog"""
238 """given some docket data return a docket object for the given revlog"""
227 header = S_HEADER.unpack(data[: S_HEADER.size])
239 header = S_HEADER.unpack(data[: S_HEADER.size])
228 offset = S_HEADER.size
240 offset = S_HEADER.size
229 version_header = header[0]
241 version_header = header[0]
230 index_uuid_size = header[1]
242 index_uuid_size = header[1]
231 index_uuid = data[offset : offset + index_uuid_size]
243 index_uuid = data[offset : offset + index_uuid_size]
232 offset += index_uuid_size
244 offset += index_uuid_size
233 index_size = header[2]
245 data_uuid_size = header[2]
234 pending_index_size = header[3]
246 data_uuid = data[offset : offset + data_uuid_size]
235 data_size = header[4]
247 offset += data_uuid_size
236 pending_data_size = header[5]
248 index_size = header[3]
237 default_compression_header = header[6]
249 pending_index_size = header[4]
250 data_size = header[5]
251 pending_data_size = header[6]
252 default_compression_header = header[7]
238 docket = RevlogDocket(
253 docket = RevlogDocket(
239 revlog,
254 revlog,
240 use_pending=use_pending,
255 use_pending=use_pending,
241 version_header=version_header,
256 version_header=version_header,
242 index_uuid=index_uuid,
257 index_uuid=index_uuid,
258 data_uuid=data_uuid,
243 index_end=index_size,
259 index_end=index_size,
244 pending_index_end=pending_index_size,
260 pending_index_end=pending_index_size,
245 data_end=data_size,
261 data_end=data_size,
246 pending_data_end=pending_data_size,
262 pending_data_end=pending_data_size,
247 default_compression_header=default_compression_header,
263 default_compression_header=default_compression_header,
248 )
264 )
249 return docket
265 return docket
@@ -1,814 +1,823 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import errno
10 import errno
11 import functools
11 import functools
12 import os
12 import os
13 import re
13 import re
14 import stat
14 import stat
15
15
16 from .i18n import _
16 from .i18n import _
17 from .pycompat import getattr
17 from .pycompat import getattr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in pycompat.iteritems(cmap):
148 for k, v in pycompat.iteritems(cmap):
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in pycompat.xrange(1, 4):
154 for l in pycompat.xrange(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join(
165 lambda s: b''.join(
166 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
166 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
167 ),
167 ),
168 lambda s: b''.join(list(decode(s))),
168 lambda s: b''.join(list(decode(s))),
169 )
169 )
170
170
171
171
172 _encodefname, _decodefname = _buildencodefun()
172 _encodefname, _decodefname = _buildencodefun()
173
173
174
174
175 def encodefilename(s):
175 def encodefilename(s):
176 """
176 """
177 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
177 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
178 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
178 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
179 """
179 """
180 return _encodefname(encodedir(s))
180 return _encodefname(encodedir(s))
181
181
182
182
183 def decodefilename(s):
183 def decodefilename(s):
184 """
184 """
185 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
185 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
186 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
186 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
187 """
187 """
188 return decodedir(_decodefname(s))
188 return decodedir(_decodefname(s))
189
189
190
190
191 def _buildlowerencodefun():
191 def _buildlowerencodefun():
192 """
192 """
193 >>> f = _buildlowerencodefun()
193 >>> f = _buildlowerencodefun()
194 >>> f(b'nothing/special.txt')
194 >>> f(b'nothing/special.txt')
195 'nothing/special.txt'
195 'nothing/special.txt'
196 >>> f(b'HELLO')
196 >>> f(b'HELLO')
197 'hello'
197 'hello'
198 >>> f(b'hello:world?')
198 >>> f(b'hello:world?')
199 'hello~3aworld~3f'
199 'hello~3aworld~3f'
200 >>> f(b'the\\x07quick\\xADshot')
200 >>> f(b'the\\x07quick\\xADshot')
201 'the~07quick~adshot'
201 'the~07quick~adshot'
202 """
202 """
203 xchr = pycompat.bytechr
203 xchr = pycompat.bytechr
204 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
204 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
205 for x in _reserved():
205 for x in _reserved():
206 cmap[xchr(x)] = b"~%02x" % x
206 cmap[xchr(x)] = b"~%02x" % x
207 for x in range(ord(b"A"), ord(b"Z") + 1):
207 for x in range(ord(b"A"), ord(b"Z") + 1):
208 cmap[xchr(x)] = xchr(x).lower()
208 cmap[xchr(x)] = xchr(x).lower()
209
209
210 def lowerencode(s):
210 def lowerencode(s):
211 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
211 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
212
212
213 return lowerencode
213 return lowerencode
214
214
215
215
216 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
216 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
217
217
218 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
218 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
219 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
219 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
220 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
220 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
221
221
222
222
223 def _auxencode(path, dotencode):
223 def _auxencode(path, dotencode):
224 """
224 """
225 Encodes filenames containing names reserved by Windows or which end in
225 Encodes filenames containing names reserved by Windows or which end in
226 period or space. Does not touch other single reserved characters c.
226 period or space. Does not touch other single reserved characters c.
227 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
227 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
228 Additionally encodes space or period at the beginning, if dotencode is
228 Additionally encodes space or period at the beginning, if dotencode is
229 True. Parameter path is assumed to be all lowercase.
229 True. Parameter path is assumed to be all lowercase.
230 A segment only needs encoding if a reserved name appears as a
230 A segment only needs encoding if a reserved name appears as a
231 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
231 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
232 doesn't need encoding.
232 doesn't need encoding.
233
233
234 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
234 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
235 >>> _auxencode(s.split(b'/'), True)
235 >>> _auxencode(s.split(b'/'), True)
236 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
236 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
237 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
237 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
238 >>> _auxencode(s.split(b'/'), False)
238 >>> _auxencode(s.split(b'/'), False)
239 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
239 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
240 >>> _auxencode([b'foo. '], True)
240 >>> _auxencode([b'foo. '], True)
241 ['foo.~20']
241 ['foo.~20']
242 >>> _auxencode([b' .foo'], True)
242 >>> _auxencode([b' .foo'], True)
243 ['~20.foo']
243 ['~20.foo']
244 """
244 """
245 for i, n in enumerate(path):
245 for i, n in enumerate(path):
246 if not n:
246 if not n:
247 continue
247 continue
248 if dotencode and n[0] in b'. ':
248 if dotencode and n[0] in b'. ':
249 n = b"~%02x" % ord(n[0:1]) + n[1:]
249 n = b"~%02x" % ord(n[0:1]) + n[1:]
250 path[i] = n
250 path[i] = n
251 else:
251 else:
252 l = n.find(b'.')
252 l = n.find(b'.')
253 if l == -1:
253 if l == -1:
254 l = len(n)
254 l = len(n)
255 if (l == 3 and n[:3] in _winres3) or (
255 if (l == 3 and n[:3] in _winres3) or (
256 l == 4
256 l == 4
257 and n[3:4] <= b'9'
257 and n[3:4] <= b'9'
258 and n[3:4] >= b'1'
258 and n[3:4] >= b'1'
259 and n[:3] in _winres4
259 and n[:3] in _winres4
260 ):
260 ):
261 # encode third letter ('aux' -> 'au~78')
261 # encode third letter ('aux' -> 'au~78')
262 ec = b"~%02x" % ord(n[2:3])
262 ec = b"~%02x" % ord(n[2:3])
263 n = n[0:2] + ec + n[3:]
263 n = n[0:2] + ec + n[3:]
264 path[i] = n
264 path[i] = n
265 if n[-1] in b'. ':
265 if n[-1] in b'. ':
266 # encode last period or space ('foo...' -> 'foo..~2e')
266 # encode last period or space ('foo...' -> 'foo..~2e')
267 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
267 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
268 return path
268 return path
269
269
270
270
271 _maxstorepathlen = 120
271 _maxstorepathlen = 120
272 _dirprefixlen = 8
272 _dirprefixlen = 8
273 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
273 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
274
274
275
275
276 def _hashencode(path, dotencode):
276 def _hashencode(path, dotencode):
277 digest = hex(hashutil.sha1(path).digest())
277 digest = hex(hashutil.sha1(path).digest())
278 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
278 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
279 parts = _auxencode(le, dotencode)
279 parts = _auxencode(le, dotencode)
280 basename = parts[-1]
280 basename = parts[-1]
281 _root, ext = os.path.splitext(basename)
281 _root, ext = os.path.splitext(basename)
282 sdirs = []
282 sdirs = []
283 sdirslen = 0
283 sdirslen = 0
284 for p in parts[:-1]:
284 for p in parts[:-1]:
285 d = p[:_dirprefixlen]
285 d = p[:_dirprefixlen]
286 if d[-1] in b'. ':
286 if d[-1] in b'. ':
287 # Windows can't access dirs ending in period or space
287 # Windows can't access dirs ending in period or space
288 d = d[:-1] + b'_'
288 d = d[:-1] + b'_'
289 if sdirslen == 0:
289 if sdirslen == 0:
290 t = len(d)
290 t = len(d)
291 else:
291 else:
292 t = sdirslen + 1 + len(d)
292 t = sdirslen + 1 + len(d)
293 if t > _maxshortdirslen:
293 if t > _maxshortdirslen:
294 break
294 break
295 sdirs.append(d)
295 sdirs.append(d)
296 sdirslen = t
296 sdirslen = t
297 dirs = b'/'.join(sdirs)
297 dirs = b'/'.join(sdirs)
298 if len(dirs) > 0:
298 if len(dirs) > 0:
299 dirs += b'/'
299 dirs += b'/'
300 res = b'dh/' + dirs + digest + ext
300 res = b'dh/' + dirs + digest + ext
301 spaceleft = _maxstorepathlen - len(res)
301 spaceleft = _maxstorepathlen - len(res)
302 if spaceleft > 0:
302 if spaceleft > 0:
303 filler = basename[:spaceleft]
303 filler = basename[:spaceleft]
304 res = b'dh/' + dirs + filler + digest + ext
304 res = b'dh/' + dirs + filler + digest + ext
305 return res
305 return res
306
306
307
307
308 def _hybridencode(path, dotencode):
308 def _hybridencode(path, dotencode):
309 """encodes path with a length limit
309 """encodes path with a length limit
310
310
311 Encodes all paths that begin with 'data/', according to the following.
311 Encodes all paths that begin with 'data/', according to the following.
312
312
313 Default encoding (reversible):
313 Default encoding (reversible):
314
314
315 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
315 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
316 characters are encoded as '~xx', where xx is the two digit hex code
316 characters are encoded as '~xx', where xx is the two digit hex code
317 of the character (see encodefilename).
317 of the character (see encodefilename).
318 Relevant path components consisting of Windows reserved filenames are
318 Relevant path components consisting of Windows reserved filenames are
319 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
319 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
320
320
321 Hashed encoding (not reversible):
321 Hashed encoding (not reversible):
322
322
323 If the default-encoded path is longer than _maxstorepathlen, a
323 If the default-encoded path is longer than _maxstorepathlen, a
324 non-reversible hybrid hashing of the path is done instead.
324 non-reversible hybrid hashing of the path is done instead.
325 This encoding uses up to _dirprefixlen characters of all directory
325 This encoding uses up to _dirprefixlen characters of all directory
326 levels of the lowerencoded path, but not more levels than can fit into
326 levels of the lowerencoded path, but not more levels than can fit into
327 _maxshortdirslen.
327 _maxshortdirslen.
328 Then follows the filler followed by the sha digest of the full path.
328 Then follows the filler followed by the sha digest of the full path.
329 The filler is the beginning of the basename of the lowerencoded path
329 The filler is the beginning of the basename of the lowerencoded path
330 (the basename is everything after the last path separator). The filler
330 (the basename is everything after the last path separator). The filler
331 is as long as possible, filling in characters from the basename until
331 is as long as possible, filling in characters from the basename until
332 the encoded path has _maxstorepathlen characters (or all chars of the
332 the encoded path has _maxstorepathlen characters (or all chars of the
333 basename have been taken).
333 basename have been taken).
334 The extension (e.g. '.i' or '.d') is preserved.
334 The extension (e.g. '.i' or '.d') is preserved.
335
335
336 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
336 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
337 encoding was used.
337 encoding was used.
338 """
338 """
339 path = encodedir(path)
339 path = encodedir(path)
340 ef = _encodefname(path).split(b'/')
340 ef = _encodefname(path).split(b'/')
341 res = b'/'.join(_auxencode(ef, dotencode))
341 res = b'/'.join(_auxencode(ef, dotencode))
342 if len(res) > _maxstorepathlen:
342 if len(res) > _maxstorepathlen:
343 res = _hashencode(path, dotencode)
343 res = _hashencode(path, dotencode)
344 return res
344 return res
345
345
346
346
347 def _pathencode(path):
347 def _pathencode(path):
348 de = encodedir(path)
348 de = encodedir(path)
349 if len(path) > _maxstorepathlen:
349 if len(path) > _maxstorepathlen:
350 return _hashencode(de, True)
350 return _hashencode(de, True)
351 ef = _encodefname(de).split(b'/')
351 ef = _encodefname(de).split(b'/')
352 res = b'/'.join(_auxencode(ef, True))
352 res = b'/'.join(_auxencode(ef, True))
353 if len(res) > _maxstorepathlen:
353 if len(res) > _maxstorepathlen:
354 return _hashencode(de, True)
354 return _hashencode(de, True)
355 return res
355 return res
356
356
357
357
358 _pathencode = getattr(parsers, 'pathencode', _pathencode)
358 _pathencode = getattr(parsers, 'pathencode', _pathencode)
359
359
360
360
361 def _plainhybridencode(f):
361 def _plainhybridencode(f):
362 return _hybridencode(f, False)
362 return _hybridencode(f, False)
363
363
364
364
365 def _calcmode(vfs):
365 def _calcmode(vfs):
366 try:
366 try:
367 # files in .hg/ will be created using this mode
367 # files in .hg/ will be created using this mode
368 mode = vfs.stat().st_mode
368 mode = vfs.stat().st_mode
369 # avoid some useless chmods
369 # avoid some useless chmods
370 if (0o777 & ~util.umask) == (0o777 & mode):
370 if (0o777 & ~util.umask) == (0o777 & mode):
371 mode = None
371 mode = None
372 except OSError:
372 except OSError:
373 mode = None
373 mode = None
374 return mode
374 return mode
375
375
376
376
377 _data = [
377 _data = [
378 b'bookmarks',
378 b'bookmarks',
379 b'narrowspec',
379 b'narrowspec',
380 b'data',
380 b'data',
381 b'meta',
381 b'meta',
382 b'00manifest.d',
382 b'00manifest.d',
383 b'00manifest.i',
383 b'00manifest.i',
384 b'00changelog.d',
384 b'00changelog.d',
385 b'00changelog.i',
385 b'00changelog.i',
386 b'phaseroots',
386 b'phaseroots',
387 b'obsstore',
387 b'obsstore',
388 b'requires',
388 b'requires',
389 ]
389 ]
390
390
391 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
391 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
392 REVLOG_FILES_OTHER_EXT = (b'.idx', b'.d', b'.n', b'.nd', b'd.tmpcensored')
392 REVLOG_FILES_OTHER_EXT = (
393 b'.idx',
394 b'.d',
395 b'.dat',
396 b'.n',
397 b'.nd',
398 b'd.tmpcensored',
399 )
393 # files that are "volatile" and might change between listing and streaming
400 # files that are "volatile" and might change between listing and streaming
394 #
401 #
395 # note: the ".nd" file are nodemap data and won't "change" but they might be
402 # note: the ".nd" file are nodemap data and won't "change" but they might be
396 # deleted.
403 # deleted.
397 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
404 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
398
405
399 # some exception to the above matching
406 # some exception to the above matching
400 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
407 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
401
408
402
409
403 def is_revlog(f, kind, st):
410 def is_revlog(f, kind, st):
404 if kind != stat.S_IFREG:
411 if kind != stat.S_IFREG:
405 return None
412 return None
406 return revlog_type(f)
413 return revlog_type(f)
407
414
408
415
409 def revlog_type(f):
416 def revlog_type(f):
410 if f.endswith(REVLOG_FILES_MAIN_EXT) and EXCLUDED.match(f) is None:
417 if f.endswith(REVLOG_FILES_MAIN_EXT) and EXCLUDED.match(f) is None:
411 return FILEFLAGS_REVLOG_MAIN
418 return FILEFLAGS_REVLOG_MAIN
412 elif f.endswith(REVLOG_FILES_OTHER_EXT) and EXCLUDED.match(f) is None:
419 elif f.endswith(REVLOG_FILES_OTHER_EXT) and EXCLUDED.match(f) is None:
413 t = FILETYPE_FILELOG_OTHER
420 t = FILETYPE_FILELOG_OTHER
414 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
421 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
415 t |= FILEFLAGS_VOLATILE
422 t |= FILEFLAGS_VOLATILE
416 return t
423 return t
424 return None
417
425
418
426
419 # the file is part of changelog data
427 # the file is part of changelog data
420 FILEFLAGS_CHANGELOG = 1 << 13
428 FILEFLAGS_CHANGELOG = 1 << 13
421 # the file is part of manifest data
429 # the file is part of manifest data
422 FILEFLAGS_MANIFESTLOG = 1 << 12
430 FILEFLAGS_MANIFESTLOG = 1 << 12
423 # the file is part of filelog data
431 # the file is part of filelog data
424 FILEFLAGS_FILELOG = 1 << 11
432 FILEFLAGS_FILELOG = 1 << 11
425 # file that are not directly part of a revlog
433 # file that are not directly part of a revlog
426 FILEFLAGS_OTHER = 1 << 10
434 FILEFLAGS_OTHER = 1 << 10
427
435
428 # the main entry point for a revlog
436 # the main entry point for a revlog
429 FILEFLAGS_REVLOG_MAIN = 1 << 1
437 FILEFLAGS_REVLOG_MAIN = 1 << 1
430 # a secondary file for a revlog
438 # a secondary file for a revlog
431 FILEFLAGS_REVLOG_OTHER = 1 << 0
439 FILEFLAGS_REVLOG_OTHER = 1 << 0
432
440
433 # files that are "volatile" and might change between listing and streaming
441 # files that are "volatile" and might change between listing and streaming
434 FILEFLAGS_VOLATILE = 1 << 20
442 FILEFLAGS_VOLATILE = 1 << 20
435
443
436 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
444 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
437 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
445 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
438 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
446 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
439 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
447 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
440 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
448 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
441 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
449 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
442 FILETYPE_OTHER = FILEFLAGS_OTHER
450 FILETYPE_OTHER = FILEFLAGS_OTHER
443
451
444
452
445 class basicstore(object):
453 class basicstore(object):
446 '''base class for local repository stores'''
454 '''base class for local repository stores'''
447
455
448 def __init__(self, path, vfstype):
456 def __init__(self, path, vfstype):
449 vfs = vfstype(path)
457 vfs = vfstype(path)
450 self.path = vfs.base
458 self.path = vfs.base
451 self.createmode = _calcmode(vfs)
459 self.createmode = _calcmode(vfs)
452 vfs.createmode = self.createmode
460 vfs.createmode = self.createmode
453 self.rawvfs = vfs
461 self.rawvfs = vfs
454 self.vfs = vfsmod.filtervfs(vfs, encodedir)
462 self.vfs = vfsmod.filtervfs(vfs, encodedir)
455 self.opener = self.vfs
463 self.opener = self.vfs
456
464
457 def join(self, f):
465 def join(self, f):
458 return self.path + b'/' + encodedir(f)
466 return self.path + b'/' + encodedir(f)
459
467
460 def _walk(self, relpath, recurse):
468 def _walk(self, relpath, recurse):
461 '''yields (unencoded, encoded, size)'''
469 '''yields (unencoded, encoded, size)'''
462 path = self.path
470 path = self.path
463 if relpath:
471 if relpath:
464 path += b'/' + relpath
472 path += b'/' + relpath
465 striplen = len(self.path) + 1
473 striplen = len(self.path) + 1
466 l = []
474 l = []
467 if self.rawvfs.isdir(path):
475 if self.rawvfs.isdir(path):
468 visit = [path]
476 visit = [path]
469 readdir = self.rawvfs.readdir
477 readdir = self.rawvfs.readdir
470 while visit:
478 while visit:
471 p = visit.pop()
479 p = visit.pop()
472 for f, kind, st in readdir(p, stat=True):
480 for f, kind, st in readdir(p, stat=True):
473 fp = p + b'/' + f
481 fp = p + b'/' + f
474 rl_type = is_revlog(f, kind, st)
482 rl_type = is_revlog(f, kind, st)
475 if rl_type is not None:
483 if rl_type is not None:
476 n = util.pconvert(fp[striplen:])
484 n = util.pconvert(fp[striplen:])
477 l.append((rl_type, decodedir(n), n, st.st_size))
485 l.append((rl_type, decodedir(n), n, st.st_size))
478 elif kind == stat.S_IFDIR and recurse:
486 elif kind == stat.S_IFDIR and recurse:
479 visit.append(fp)
487 visit.append(fp)
480 l.sort()
488 l.sort()
481 return l
489 return l
482
490
483 def changelog(self, trypending, concurrencychecker=None):
491 def changelog(self, trypending, concurrencychecker=None):
484 return changelog.changelog(
492 return changelog.changelog(
485 self.vfs,
493 self.vfs,
486 trypending=trypending,
494 trypending=trypending,
487 concurrencychecker=concurrencychecker,
495 concurrencychecker=concurrencychecker,
488 )
496 )
489
497
490 def manifestlog(self, repo, storenarrowmatch):
498 def manifestlog(self, repo, storenarrowmatch):
491 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
499 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
492 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
500 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
493
501
494 def datafiles(self, matcher=None):
502 def datafiles(self, matcher=None):
495 files = self._walk(b'data', True) + self._walk(b'meta', True)
503 files = self._walk(b'data', True) + self._walk(b'meta', True)
496 for (t, u, e, s) in files:
504 for (t, u, e, s) in files:
497 yield (FILEFLAGS_FILELOG | t, u, e, s)
505 yield (FILEFLAGS_FILELOG | t, u, e, s)
498
506
499 def topfiles(self):
507 def topfiles(self):
500 # yield manifest before changelog
508 # yield manifest before changelog
501 files = reversed(self._walk(b'', False))
509 files = reversed(self._walk(b'', False))
502 for (t, u, e, s) in files:
510 for (t, u, e, s) in files:
503 if u.startswith(b'00changelog'):
511 if u.startswith(b'00changelog'):
504 yield (FILEFLAGS_CHANGELOG | t, u, e, s)
512 yield (FILEFLAGS_CHANGELOG | t, u, e, s)
505 elif u.startswith(b'00manifest'):
513 elif u.startswith(b'00manifest'):
506 yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
514 yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
507 else:
515 else:
508 yield (FILETYPE_OTHER | t, u, e, s)
516 yield (FILETYPE_OTHER | t, u, e, s)
509
517
510 def walk(self, matcher=None):
518 def walk(self, matcher=None):
511 """return file related to data storage (ie: revlogs)
519 """return file related to data storage (ie: revlogs)
512
520
513 yields (file_type, unencoded, encoded, size)
521 yields (file_type, unencoded, encoded, size)
514
522
515 if a matcher is passed, storage files of only those tracked paths
523 if a matcher is passed, storage files of only those tracked paths
516 are passed with matches the matcher
524 are passed with matches the matcher
517 """
525 """
518 # yield data files first
526 # yield data files first
519 for x in self.datafiles(matcher):
527 for x in self.datafiles(matcher):
520 yield x
528 yield x
521 for x in self.topfiles():
529 for x in self.topfiles():
522 yield x
530 yield x
523
531
524 def copylist(self):
532 def copylist(self):
525 return _data
533 return _data
526
534
527 def write(self, tr):
535 def write(self, tr):
528 pass
536 pass
529
537
530 def invalidatecaches(self):
538 def invalidatecaches(self):
531 pass
539 pass
532
540
533 def markremoved(self, fn):
541 def markremoved(self, fn):
534 pass
542 pass
535
543
536 def __contains__(self, path):
544 def __contains__(self, path):
537 '''Checks if the store contains path'''
545 '''Checks if the store contains path'''
538 path = b"/".join((b"data", path))
546 path = b"/".join((b"data", path))
539 # file?
547 # file?
540 if self.vfs.exists(path + b".i"):
548 if self.vfs.exists(path + b".i"):
541 return True
549 return True
542 # dir?
550 # dir?
543 if not path.endswith(b"/"):
551 if not path.endswith(b"/"):
544 path = path + b"/"
552 path = path + b"/"
545 return self.vfs.exists(path)
553 return self.vfs.exists(path)
546
554
547
555
548 class encodedstore(basicstore):
556 class encodedstore(basicstore):
549 def __init__(self, path, vfstype):
557 def __init__(self, path, vfstype):
550 vfs = vfstype(path + b'/store')
558 vfs = vfstype(path + b'/store')
551 self.path = vfs.base
559 self.path = vfs.base
552 self.createmode = _calcmode(vfs)
560 self.createmode = _calcmode(vfs)
553 vfs.createmode = self.createmode
561 vfs.createmode = self.createmode
554 self.rawvfs = vfs
562 self.rawvfs = vfs
555 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
563 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
556 self.opener = self.vfs
564 self.opener = self.vfs
557
565
558 def datafiles(self, matcher=None):
566 def datafiles(self, matcher=None):
559 for t, a, b, size in super(encodedstore, self).datafiles():
567 for t, a, b, size in super(encodedstore, self).datafiles():
560 try:
568 try:
561 a = decodefilename(a)
569 a = decodefilename(a)
562 except KeyError:
570 except KeyError:
563 a = None
571 a = None
564 if a is not None and not _matchtrackedpath(a, matcher):
572 if a is not None and not _matchtrackedpath(a, matcher):
565 continue
573 continue
566 yield t, a, b, size
574 yield t, a, b, size
567
575
568 def join(self, f):
576 def join(self, f):
569 return self.path + b'/' + encodefilename(f)
577 return self.path + b'/' + encodefilename(f)
570
578
571 def copylist(self):
579 def copylist(self):
572 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
580 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
573
581
574
582
575 class fncache(object):
583 class fncache(object):
576 # the filename used to be partially encoded
584 # the filename used to be partially encoded
577 # hence the encodedir/decodedir dance
585 # hence the encodedir/decodedir dance
578 def __init__(self, vfs):
586 def __init__(self, vfs):
579 self.vfs = vfs
587 self.vfs = vfs
580 self.entries = None
588 self.entries = None
581 self._dirty = False
589 self._dirty = False
582 # set of new additions to fncache
590 # set of new additions to fncache
583 self.addls = set()
591 self.addls = set()
584
592
585 def ensureloaded(self, warn=None):
593 def ensureloaded(self, warn=None):
586 """read the fncache file if not already read.
594 """read the fncache file if not already read.
587
595
588 If the file on disk is corrupted, raise. If warn is provided,
596 If the file on disk is corrupted, raise. If warn is provided,
589 warn and keep going instead."""
597 warn and keep going instead."""
590 if self.entries is None:
598 if self.entries is None:
591 self._load(warn)
599 self._load(warn)
592
600
593 def _load(self, warn=None):
601 def _load(self, warn=None):
594 '''fill the entries from the fncache file'''
602 '''fill the entries from the fncache file'''
595 self._dirty = False
603 self._dirty = False
596 try:
604 try:
597 fp = self.vfs(b'fncache', mode=b'rb')
605 fp = self.vfs(b'fncache', mode=b'rb')
598 except IOError:
606 except IOError:
599 # skip nonexistent file
607 # skip nonexistent file
600 self.entries = set()
608 self.entries = set()
601 return
609 return
602
610
603 self.entries = set()
611 self.entries = set()
604 chunk = b''
612 chunk = b''
605 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
613 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
606 chunk += c
614 chunk += c
607 try:
615 try:
608 p = chunk.rindex(b'\n')
616 p = chunk.rindex(b'\n')
609 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
617 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
610 chunk = chunk[p + 1 :]
618 chunk = chunk[p + 1 :]
611 except ValueError:
619 except ValueError:
612 # substring '\n' not found, maybe the entry is bigger than the
620 # substring '\n' not found, maybe the entry is bigger than the
613 # chunksize, so let's keep iterating
621 # chunksize, so let's keep iterating
614 pass
622 pass
615
623
616 if chunk:
624 if chunk:
617 msg = _(b"fncache does not ends with a newline")
625 msg = _(b"fncache does not ends with a newline")
618 if warn:
626 if warn:
619 warn(msg + b'\n')
627 warn(msg + b'\n')
620 else:
628 else:
621 raise error.Abort(
629 raise error.Abort(
622 msg,
630 msg,
623 hint=_(
631 hint=_(
624 b"use 'hg debugrebuildfncache' to "
632 b"use 'hg debugrebuildfncache' to "
625 b"rebuild the fncache"
633 b"rebuild the fncache"
626 ),
634 ),
627 )
635 )
628 self._checkentries(fp, warn)
636 self._checkentries(fp, warn)
629 fp.close()
637 fp.close()
630
638
631 def _checkentries(self, fp, warn):
639 def _checkentries(self, fp, warn):
632 """make sure there is no empty string in entries"""
640 """make sure there is no empty string in entries"""
633 if b'' in self.entries:
641 if b'' in self.entries:
634 fp.seek(0)
642 fp.seek(0)
635 for n, line in enumerate(util.iterfile(fp)):
643 for n, line in enumerate(util.iterfile(fp)):
636 if not line.rstrip(b'\n'):
644 if not line.rstrip(b'\n'):
637 t = _(b'invalid entry in fncache, line %d') % (n + 1)
645 t = _(b'invalid entry in fncache, line %d') % (n + 1)
638 if warn:
646 if warn:
639 warn(t + b'\n')
647 warn(t + b'\n')
640 else:
648 else:
641 raise error.Abort(t)
649 raise error.Abort(t)
642
650
643 def write(self, tr):
651 def write(self, tr):
644 if self._dirty:
652 if self._dirty:
645 assert self.entries is not None
653 assert self.entries is not None
646 self.entries = self.entries | self.addls
654 self.entries = self.entries | self.addls
647 self.addls = set()
655 self.addls = set()
648 tr.addbackup(b'fncache')
656 tr.addbackup(b'fncache')
649 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
657 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
650 if self.entries:
658 if self.entries:
651 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
659 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
652 fp.close()
660 fp.close()
653 self._dirty = False
661 self._dirty = False
654 if self.addls:
662 if self.addls:
655 # if we have just new entries, let's append them to the fncache
663 # if we have just new entries, let's append them to the fncache
656 tr.addbackup(b'fncache')
664 tr.addbackup(b'fncache')
657 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
665 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
658 if self.addls:
666 if self.addls:
659 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
667 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
660 fp.close()
668 fp.close()
661 self.entries = None
669 self.entries = None
662 self.addls = set()
670 self.addls = set()
663
671
664 def add(self, fn):
672 def add(self, fn):
665 if self.entries is None:
673 if self.entries is None:
666 self._load()
674 self._load()
667 if fn not in self.entries:
675 if fn not in self.entries:
668 self.addls.add(fn)
676 self.addls.add(fn)
669
677
670 def remove(self, fn):
678 def remove(self, fn):
671 if self.entries is None:
679 if self.entries is None:
672 self._load()
680 self._load()
673 if fn in self.addls:
681 if fn in self.addls:
674 self.addls.remove(fn)
682 self.addls.remove(fn)
675 return
683 return
676 try:
684 try:
677 self.entries.remove(fn)
685 self.entries.remove(fn)
678 self._dirty = True
686 self._dirty = True
679 except KeyError:
687 except KeyError:
680 pass
688 pass
681
689
682 def __contains__(self, fn):
690 def __contains__(self, fn):
683 if fn in self.addls:
691 if fn in self.addls:
684 return True
692 return True
685 if self.entries is None:
693 if self.entries is None:
686 self._load()
694 self._load()
687 return fn in self.entries
695 return fn in self.entries
688
696
689 def __iter__(self):
697 def __iter__(self):
690 if self.entries is None:
698 if self.entries is None:
691 self._load()
699 self._load()
692 return iter(self.entries | self.addls)
700 return iter(self.entries | self.addls)
693
701
694
702
695 class _fncachevfs(vfsmod.proxyvfs):
703 class _fncachevfs(vfsmod.proxyvfs):
696 def __init__(self, vfs, fnc, encode):
704 def __init__(self, vfs, fnc, encode):
697 vfsmod.proxyvfs.__init__(self, vfs)
705 vfsmod.proxyvfs.__init__(self, vfs)
698 self.fncache = fnc
706 self.fncache = fnc
699 self.encode = encode
707 self.encode = encode
700
708
701 def __call__(self, path, mode=b'r', *args, **kw):
709 def __call__(self, path, mode=b'r', *args, **kw):
702 encoded = self.encode(path)
710 encoded = self.encode(path)
703 if mode not in (b'r', b'rb') and (
711 if mode not in (b'r', b'rb') and (
704 path.startswith(b'data/') or path.startswith(b'meta/')
712 path.startswith(b'data/') or path.startswith(b'meta/')
705 ):
713 ):
706 # do not trigger a fncache load when adding a file that already is
714 # do not trigger a fncache load when adding a file that already is
707 # known to exist.
715 # known to exist.
708 notload = self.fncache.entries is None and self.vfs.exists(encoded)
716 notload = self.fncache.entries is None and self.vfs.exists(encoded)
709 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
717 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
710 # when appending to an existing file, if the file has size zero,
718 # when appending to an existing file, if the file has size zero,
711 # it should be considered as missing. Such zero-size files are
719 # it should be considered as missing. Such zero-size files are
712 # the result of truncation when a transaction is aborted.
720 # the result of truncation when a transaction is aborted.
713 notload = False
721 notload = False
714 if not notload:
722 if not notload:
715 self.fncache.add(path)
723 self.fncache.add(path)
716 return self.vfs(encoded, mode, *args, **kw)
724 return self.vfs(encoded, mode, *args, **kw)
717
725
718 def join(self, path):
726 def join(self, path):
719 if path:
727 if path:
720 return self.vfs.join(self.encode(path))
728 return self.vfs.join(self.encode(path))
721 else:
729 else:
722 return self.vfs.join(path)
730 return self.vfs.join(path)
723
731
724
732
725 class fncachestore(basicstore):
733 class fncachestore(basicstore):
726 def __init__(self, path, vfstype, dotencode):
734 def __init__(self, path, vfstype, dotencode):
727 if dotencode:
735 if dotencode:
728 encode = _pathencode
736 encode = _pathencode
729 else:
737 else:
730 encode = _plainhybridencode
738 encode = _plainhybridencode
731 self.encode = encode
739 self.encode = encode
732 vfs = vfstype(path + b'/store')
740 vfs = vfstype(path + b'/store')
733 self.path = vfs.base
741 self.path = vfs.base
734 self.pathsep = self.path + b'/'
742 self.pathsep = self.path + b'/'
735 self.createmode = _calcmode(vfs)
743 self.createmode = _calcmode(vfs)
736 vfs.createmode = self.createmode
744 vfs.createmode = self.createmode
737 self.rawvfs = vfs
745 self.rawvfs = vfs
738 fnc = fncache(vfs)
746 fnc = fncache(vfs)
739 self.fncache = fnc
747 self.fncache = fnc
740 self.vfs = _fncachevfs(vfs, fnc, encode)
748 self.vfs = _fncachevfs(vfs, fnc, encode)
741 self.opener = self.vfs
749 self.opener = self.vfs
742
750
743 def join(self, f):
751 def join(self, f):
744 return self.pathsep + self.encode(f)
752 return self.pathsep + self.encode(f)
745
753
746 def getsize(self, path):
754 def getsize(self, path):
747 return self.rawvfs.stat(path).st_size
755 return self.rawvfs.stat(path).st_size
748
756
749 def datafiles(self, matcher=None):
757 def datafiles(self, matcher=None):
750 for f in sorted(self.fncache):
758 for f in sorted(self.fncache):
751 if not _matchtrackedpath(f, matcher):
759 if not _matchtrackedpath(f, matcher):
752 continue
760 continue
753 ef = self.encode(f)
761 ef = self.encode(f)
754 try:
762 try:
755 t = revlog_type(f)
763 t = revlog_type(f)
764 assert t is not None, f
756 t |= FILEFLAGS_FILELOG
765 t |= FILEFLAGS_FILELOG
757 yield t, f, ef, self.getsize(ef)
766 yield t, f, ef, self.getsize(ef)
758 except OSError as err:
767 except OSError as err:
759 if err.errno != errno.ENOENT:
768 if err.errno != errno.ENOENT:
760 raise
769 raise
761
770
762 def copylist(self):
771 def copylist(self):
763 d = (
772 d = (
764 b'bookmarks',
773 b'bookmarks',
765 b'narrowspec',
774 b'narrowspec',
766 b'data',
775 b'data',
767 b'meta',
776 b'meta',
768 b'dh',
777 b'dh',
769 b'fncache',
778 b'fncache',
770 b'phaseroots',
779 b'phaseroots',
771 b'obsstore',
780 b'obsstore',
772 b'00manifest.d',
781 b'00manifest.d',
773 b'00manifest.i',
782 b'00manifest.i',
774 b'00changelog.d',
783 b'00changelog.d',
775 b'00changelog.i',
784 b'00changelog.i',
776 b'requires',
785 b'requires',
777 )
786 )
778 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
787 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
779
788
780 def write(self, tr):
789 def write(self, tr):
781 self.fncache.write(tr)
790 self.fncache.write(tr)
782
791
783 def invalidatecaches(self):
792 def invalidatecaches(self):
784 self.fncache.entries = None
793 self.fncache.entries = None
785 self.fncache.addls = set()
794 self.fncache.addls = set()
786
795
787 def markremoved(self, fn):
796 def markremoved(self, fn):
788 self.fncache.remove(fn)
797 self.fncache.remove(fn)
789
798
790 def _exists(self, f):
799 def _exists(self, f):
791 ef = self.encode(f)
800 ef = self.encode(f)
792 try:
801 try:
793 self.getsize(ef)
802 self.getsize(ef)
794 return True
803 return True
795 except OSError as err:
804 except OSError as err:
796 if err.errno != errno.ENOENT:
805 if err.errno != errno.ENOENT:
797 raise
806 raise
798 # nonexistent entry
807 # nonexistent entry
799 return False
808 return False
800
809
801 def __contains__(self, path):
810 def __contains__(self, path):
802 '''Checks if the store contains path'''
811 '''Checks if the store contains path'''
803 path = b"/".join((b"data", path))
812 path = b"/".join((b"data", path))
804 # check for files (exact match)
813 # check for files (exact match)
805 e = path + b'.i'
814 e = path + b'.i'
806 if e in self.fncache and self._exists(e):
815 if e in self.fncache and self._exists(e):
807 return True
816 return True
808 # now check for directories (prefix match)
817 # now check for directories (prefix match)
809 if not path.endswith(b'/'):
818 if not path.endswith(b'/'):
810 path += b'/'
819 path += b'/'
811 for e in self.fncache:
820 for e in self.fncache:
812 if e.startswith(path) and self._exists(e):
821 if e.startswith(path) and self._exists(e):
813 return True
822 return True
814 return False
823 return False
@@ -1,85 +1,86 b''
1 #require reporevlogstore
1 #require reporevlogstore
2
2
3 A repo with unknown revlogv2 requirement string cannot be opened
3 A repo with unknown revlogv2 requirement string cannot be opened
4
4
5 $ hg init invalidreq
5 $ hg init invalidreq
6 $ cd invalidreq
6 $ cd invalidreq
7 $ echo exp-revlogv2.unknown >> .hg/requires
7 $ echo exp-revlogv2.unknown >> .hg/requires
8 $ hg log
8 $ hg log
9 abort: repository requires features unknown to this Mercurial: exp-revlogv2.unknown
9 abort: repository requires features unknown to this Mercurial: exp-revlogv2.unknown
10 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
10 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
11 [255]
11 [255]
12 $ cd ..
12 $ cd ..
13
13
14 Can create and open repo with revlog v2 requirement
14 Can create and open repo with revlog v2 requirement
15
15
16 $ cat >> $HGRCPATH << EOF
16 $ cat >> $HGRCPATH << EOF
17 > [experimental]
17 > [experimental]
18 > revlogv2 = enable-unstable-format-and-corrupt-my-data
18 > revlogv2 = enable-unstable-format-and-corrupt-my-data
19 > EOF
19 > EOF
20
20
21 $ hg init empty-repo
21 $ hg init empty-repo
22 $ cd empty-repo
22 $ cd empty-repo
23 $ cat .hg/requires
23 $ cat .hg/requires
24 dotencode
24 dotencode
25 exp-dirstate-v2 (dirstate-v2 !)
25 exp-dirstate-v2 (dirstate-v2 !)
26 exp-revlogv2.2
26 exp-revlogv2.2
27 fncache
27 fncache
28 generaldelta
28 generaldelta
29 persistent-nodemap (rust !)
29 persistent-nodemap (rust !)
30 revlog-compression-zstd (zstd !)
30 revlog-compression-zstd (zstd !)
31 sparserevlog
31 sparserevlog
32 store
32 store
33
33
34 $ hg log
34 $ hg log
35
35
36 Unknown flags to revlog are rejected
36 Unknown flags to revlog are rejected
37
37
38 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
38 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
39 ... fh.write(b'\xff\x00\xde\xad') and None
39 ... fh.write(b'\xff\x00\xde\xad') and None
40
40
41 $ hg log
41 $ hg log
42 abort: unknown flags (0xff00) in version 57005 revlog 00changelog
42 abort: unknown flags (0xff00) in version 57005 revlog 00changelog
43 [50]
43 [50]
44
44
45 $ cd ..
45 $ cd ..
46
46
47 Writing a simple revlog v2 works
47 Writing a simple revlog v2 works
48
48
49 $ hg init simple
49 $ hg init simple
50 $ cd simple
50 $ cd simple
51 $ touch foo
51 $ touch foo
52 $ hg -q commit -A -m initial
52 $ hg -q commit -A -m initial
53
53
54 $ hg log
54 $ hg log
55 changeset: 0:96ee1d7354c4
55 changeset: 0:96ee1d7354c4
56 tag: tip
56 tag: tip
57 user: test
57 user: test
58 date: Thu Jan 01 00:00:00 1970 +0000
58 date: Thu Jan 01 00:00:00 1970 +0000
59 summary: initial
59 summary: initial
60
60
61
61 Header written as expected
62 Header written as expected
62
63
63 $ f --hexdump --bytes 4 .hg/store/00changelog.i
64 $ f --hexdump --bytes 4 .hg/store/00changelog.i
64 .hg/store/00changelog.i:
65 .hg/store/00changelog.i:
65 0000: 00 00 de ad |....|
66 0000: 00 00 de ad |....|
66
67
67 $ f --hexdump --bytes 4 .hg/store/data/foo.i
68 $ f --hexdump --bytes 4 .hg/store/data/foo.i
68 .hg/store/data/foo.i:
69 .hg/store/data/foo.i:
69 0000: 00 00 de ad |....|
70 0000: 00 00 de ad |....|
70
71
71 The expected files are generated
72 The expected files are generated
72 --------------------------------
73 --------------------------------
73
74
74 We should have have:
75 We should have have:
75 - a docket
76 - a docket
76 - a index file with a unique name
77 - a index file with a unique name
77 - a data file
78 - a data file
78
79
79 $ ls .hg/store/00changelog* .hg/store/00manifest*
80 $ ls .hg/store/00changelog* .hg/store/00manifest*
80 .hg/store/00changelog-b870a51b.idx
81 .hg/store/00changelog-6b8ab34b.dat
81 .hg/store/00changelog.d
82 .hg/store/00changelog-88698448.idx
82 .hg/store/00changelog.i
83 .hg/store/00changelog.i
83 .hg/store/00manifest-88698448.idx
84 .hg/store/00manifest-1335303a.dat
84 .hg/store/00manifest.d
85 .hg/store/00manifest-b875dfc5.idx
85 .hg/store/00manifest.i
86 .hg/store/00manifest.i
General Comments 0
You need to be logged in to leave comments. Login now