##// END OF EJS Templates
stream-clone: avoid opening a revlog in case we do not need it...
Arseniy Alekseyev -
r51565:3b563954 default
parent child Browse files
Show More
@@ -1,3501 +1,3511 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_NO,
43 DELTA_BASE_REUSE_TRY,
43 DELTA_BASE_REUSE_TRY,
44 ENTRY_RANK,
44 ENTRY_RANK,
45 FEATURES_BY_VERSION,
45 FEATURES_BY_VERSION,
46 FLAG_GENERALDELTA,
46 FLAG_GENERALDELTA,
47 FLAG_INLINE_DATA,
47 FLAG_INLINE_DATA,
48 INDEX_HEADER,
48 INDEX_HEADER,
49 KIND_CHANGELOG,
49 KIND_CHANGELOG,
50 KIND_FILELOG,
50 KIND_FILELOG,
51 RANK_UNKNOWN,
51 RANK_UNKNOWN,
52 REVLOGV0,
52 REVLOGV0,
53 REVLOGV1,
53 REVLOGV1,
54 REVLOGV1_FLAGS,
54 REVLOGV1_FLAGS,
55 REVLOGV2,
55 REVLOGV2,
56 REVLOGV2_FLAGS,
56 REVLOGV2_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
58 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_FORMAT,
59 REVLOG_DEFAULT_VERSION,
59 REVLOG_DEFAULT_VERSION,
60 SUPPORTED_FLAGS,
60 SUPPORTED_FLAGS,
61 )
61 )
62 from .revlogutils.flagutil import (
62 from .revlogutils.flagutil import (
63 REVIDX_DEFAULT_FLAGS,
63 REVIDX_DEFAULT_FLAGS,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 REVIDX_FLAGS_ORDER,
66 REVIDX_FLAGS_ORDER,
67 REVIDX_HASCOPIESINFO,
67 REVIDX_HASCOPIESINFO,
68 REVIDX_ISCENSORED,
68 REVIDX_ISCENSORED,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 )
70 )
71 from .thirdparty import attr
71 from .thirdparty import attr
72 from . import (
72 from . import (
73 ancestor,
73 ancestor,
74 dagop,
74 dagop,
75 error,
75 error,
76 mdiff,
76 mdiff,
77 policy,
77 policy,
78 pycompat,
78 pycompat,
79 revlogutils,
79 revlogutils,
80 templatefilters,
80 templatefilters,
81 util,
81 util,
82 )
82 )
83 from .interfaces import (
83 from .interfaces import (
84 repository,
84 repository,
85 util as interfaceutil,
85 util as interfaceutil,
86 )
86 )
87 from .revlogutils import (
87 from .revlogutils import (
88 deltas as deltautil,
88 deltas as deltautil,
89 docket as docketutil,
89 docket as docketutil,
90 flagutil,
90 flagutil,
91 nodemap as nodemaputil,
91 nodemap as nodemaputil,
92 randomaccessfile,
92 randomaccessfile,
93 revlogv0,
93 revlogv0,
94 rewrite,
94 rewrite,
95 sidedata as sidedatautil,
95 sidedata as sidedatautil,
96 )
96 )
97 from .utils import (
97 from .utils import (
98 storageutil,
98 storageutil,
99 stringutil,
99 stringutil,
100 )
100 )
101
101
102 # blanked usage of all the name to prevent pyflakes constraints
102 # blanked usage of all the name to prevent pyflakes constraints
103 # We need these name available in the module for extensions.
103 # We need these name available in the module for extensions.
104
104
105 REVLOGV0
105 REVLOGV0
106 REVLOGV1
106 REVLOGV1
107 REVLOGV2
107 REVLOGV2
108 CHANGELOGV2
108 CHANGELOGV2
109 FLAG_INLINE_DATA
109 FLAG_INLINE_DATA
110 FLAG_GENERALDELTA
110 FLAG_GENERALDELTA
111 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FLAGS
112 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_FORMAT
113 REVLOG_DEFAULT_VERSION
113 REVLOG_DEFAULT_VERSION
114 REVLOGV1_FLAGS
114 REVLOGV1_FLAGS
115 REVLOGV2_FLAGS
115 REVLOGV2_FLAGS
116 REVIDX_ISCENSORED
116 REVIDX_ISCENSORED
117 REVIDX_ELLIPSIS
117 REVIDX_ELLIPSIS
118 REVIDX_HASCOPIESINFO
118 REVIDX_HASCOPIESINFO
119 REVIDX_EXTSTORED
119 REVIDX_EXTSTORED
120 REVIDX_DEFAULT_FLAGS
120 REVIDX_DEFAULT_FLAGS
121 REVIDX_FLAGS_ORDER
121 REVIDX_FLAGS_ORDER
122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 REVIDX_RAWTEXT_CHANGING_FLAGS
123
123
124 parsers = policy.importmod('parsers')
124 parsers = policy.importmod('parsers')
125 rustancestor = policy.importrust('ancestor')
125 rustancestor = policy.importrust('ancestor')
126 rustdagop = policy.importrust('dagop')
126 rustdagop = policy.importrust('dagop')
127 rustrevlog = policy.importrust('revlog')
127 rustrevlog = policy.importrust('revlog')
128
128
129 # Aliased for performance.
129 # Aliased for performance.
130 _zlibdecompress = zlib.decompress
130 _zlibdecompress = zlib.decompress
131
131
132 # max size of inline data embedded into a revlog
132 # max size of inline data embedded into a revlog
133 _maxinline = 131072
133 _maxinline = 131072
134
134
135 # Flag processors for REVIDX_ELLIPSIS.
135 # Flag processors for REVIDX_ELLIPSIS.
136 def ellipsisreadprocessor(rl, text):
136 def ellipsisreadprocessor(rl, text):
137 return text, False
137 return text, False
138
138
139
139
140 def ellipsiswriteprocessor(rl, text):
140 def ellipsiswriteprocessor(rl, text):
141 return text, False
141 return text, False
142
142
143
143
144 def ellipsisrawprocessor(rl, text):
144 def ellipsisrawprocessor(rl, text):
145 return False
145 return False
146
146
147
147
148 ellipsisprocessor = (
148 ellipsisprocessor = (
149 ellipsisreadprocessor,
149 ellipsisreadprocessor,
150 ellipsiswriteprocessor,
150 ellipsiswriteprocessor,
151 ellipsisrawprocessor,
151 ellipsisrawprocessor,
152 )
152 )
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @interfaceutil.implementer(repository.irevisiondelta)
175 @interfaceutil.implementer(repository.irevisiondelta)
176 @attr.s(slots=True)
176 @attr.s(slots=True)
177 class revlogrevisiondelta:
177 class revlogrevisiondelta:
178 node = attr.ib()
178 node = attr.ib()
179 p1node = attr.ib()
179 p1node = attr.ib()
180 p2node = attr.ib()
180 p2node = attr.ib()
181 basenode = attr.ib()
181 basenode = attr.ib()
182 flags = attr.ib()
182 flags = attr.ib()
183 baserevisionsize = attr.ib()
183 baserevisionsize = attr.ib()
184 revision = attr.ib()
184 revision = attr.ib()
185 delta = attr.ib()
185 delta = attr.ib()
186 sidedata = attr.ib()
186 sidedata = attr.ib()
187 protocol_flags = attr.ib()
187 protocol_flags = attr.ib()
188 linknode = attr.ib(default=None)
188 linknode = attr.ib(default=None)
189
189
190
190
191 @interfaceutil.implementer(repository.iverifyproblem)
191 @interfaceutil.implementer(repository.iverifyproblem)
192 @attr.s(frozen=True)
192 @attr.s(frozen=True)
193 class revlogproblem:
193 class revlogproblem:
194 warning = attr.ib(default=None)
194 warning = attr.ib(default=None)
195 error = attr.ib(default=None)
195 error = attr.ib(default=None)
196 node = attr.ib(default=None)
196 node = attr.ib(default=None)
197
197
198
198
199 def parse_index_v1(data, inline):
199 def parse_index_v1(data, inline):
200 # call the C implementation to parse the index data
200 # call the C implementation to parse the index data
201 index, cache = parsers.parse_index2(data, inline)
201 index, cache = parsers.parse_index2(data, inline)
202 return index, cache
202 return index, cache
203
203
204
204
205 def parse_index_v2(data, inline):
205 def parse_index_v2(data, inline):
206 # call the C implementation to parse the index data
206 # call the C implementation to parse the index data
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 return index, cache
208 return index, cache
209
209
210
210
211 def parse_index_cl_v2(data, inline):
211 def parse_index_cl_v2(data, inline):
212 # call the C implementation to parse the index data
212 # call the C implementation to parse the index data
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 return index, cache
214 return index, cache
215
215
216
216
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218
218
219 def parse_index_v1_nodemap(data, inline):
219 def parse_index_v1_nodemap(data, inline):
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 else:
224 else:
225 parse_index_v1_nodemap = None
225 parse_index_v1_nodemap = None
226
226
227
227
228 def parse_index_v1_mixed(data, inline):
228 def parse_index_v1_mixed(data, inline):
229 index, cache = parse_index_v1(data, inline)
229 index, cache = parse_index_v1(data, inline)
230 return rustrevlog.MixedIndex(index), cache
230 return rustrevlog.MixedIndex(index), cache
231
231
232
232
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 # signed integer)
234 # signed integer)
235 _maxentrysize = 0x7FFFFFFF
235 _maxentrysize = 0x7FFFFFFF
236
236
237 FILE_TOO_SHORT_MSG = _(
237 FILE_TOO_SHORT_MSG = _(
238 b'cannot read from revlog %s;'
238 b'cannot read from revlog %s;'
239 b' expected %d bytes from offset %d, data size is %d'
239 b' expected %d bytes from offset %d, data size is %d'
240 )
240 )
241
241
242 hexdigits = b'0123456789abcdefABCDEF'
242 hexdigits = b'0123456789abcdefABCDEF'
243
243
244
244
245 class revlog:
245 class revlog:
246 """
246 """
247 the underlying revision storage object
247 the underlying revision storage object
248
248
249 A revlog consists of two parts, an index and the revision data.
249 A revlog consists of two parts, an index and the revision data.
250
250
251 The index is a file with a fixed record size containing
251 The index is a file with a fixed record size containing
252 information on each revision, including its nodeid (hash), the
252 information on each revision, including its nodeid (hash), the
253 nodeids of its parents, the position and offset of its data within
253 nodeids of its parents, the position and offset of its data within
254 the data file, and the revision it's based on. Finally, each entry
254 the data file, and the revision it's based on. Finally, each entry
255 contains a linkrev entry that can serve as a pointer to external
255 contains a linkrev entry that can serve as a pointer to external
256 data.
256 data.
257
257
258 The revision data itself is a linear collection of data chunks.
258 The revision data itself is a linear collection of data chunks.
259 Each chunk represents a revision and is usually represented as a
259 Each chunk represents a revision and is usually represented as a
260 delta against the previous chunk. To bound lookup time, runs of
260 delta against the previous chunk. To bound lookup time, runs of
261 deltas are limited to about 2 times the length of the original
261 deltas are limited to about 2 times the length of the original
262 version data. This makes retrieval of a version proportional to
262 version data. This makes retrieval of a version proportional to
263 its size, or O(1) relative to the number of revisions.
263 its size, or O(1) relative to the number of revisions.
264
264
265 Both pieces of the revlog are written to in an append-only
265 Both pieces of the revlog are written to in an append-only
266 fashion, which means we never need to rewrite a file to insert or
266 fashion, which means we never need to rewrite a file to insert or
267 remove data, and can use some simple techniques to avoid the need
267 remove data, and can use some simple techniques to avoid the need
268 for locking while reading.
268 for locking while reading.
269
269
270 If checkambig, indexfile is opened with checkambig=True at
270 If checkambig, indexfile is opened with checkambig=True at
271 writing, to avoid file stat ambiguity.
271 writing, to avoid file stat ambiguity.
272
272
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 index will be mmapped rather than read if it is larger than the
274 index will be mmapped rather than read if it is larger than the
275 configured threshold.
275 configured threshold.
276
276
277 If censorable is True, the revlog can have censored revisions.
277 If censorable is True, the revlog can have censored revisions.
278
278
279 If `upperboundcomp` is not None, this is the expected maximal gain from
279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 compression for the data content.
280 compression for the data content.
281
281
282 `concurrencychecker` is an optional function that receives 3 arguments: a
282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 file handle, a filename, and an expected position. It should check whether
283 file handle, a filename, and an expected position. It should check whether
284 the current position in the file handle is valid, and log/warn/fail (by
284 the current position in the file handle is valid, and log/warn/fail (by
285 raising).
285 raising).
286
286
287 See mercurial/revlogutils/contants.py for details about the content of an
287 See mercurial/revlogutils/contants.py for details about the content of an
288 index entry.
288 index entry.
289 """
289 """
290
290
291 _flagserrorclass = error.RevlogError
291 _flagserrorclass = error.RevlogError
292
292
293 @staticmethod
294 def is_inline_index(header_bytes):
295 header = INDEX_HEADER.unpack(header_bytes)[0]
296
297 _format_flags = header & ~0xFFFF
298 _format_version = header & 0xFFFF
299
300 features = FEATURES_BY_VERSION[_format_version]
301 return features[b'inline'](_format_flags)
302
293 def __init__(
303 def __init__(
294 self,
304 self,
295 opener,
305 opener,
296 target,
306 target,
297 radix,
307 radix,
298 postfix=None, # only exist for `tmpcensored` now
308 postfix=None, # only exist for `tmpcensored` now
299 checkambig=False,
309 checkambig=False,
300 mmaplargeindex=False,
310 mmaplargeindex=False,
301 censorable=False,
311 censorable=False,
302 upperboundcomp=None,
312 upperboundcomp=None,
303 persistentnodemap=False,
313 persistentnodemap=False,
304 concurrencychecker=None,
314 concurrencychecker=None,
305 trypending=False,
315 trypending=False,
306 try_split=False,
316 try_split=False,
307 canonical_parent_order=True,
317 canonical_parent_order=True,
308 ):
318 ):
309 """
319 """
310 create a revlog object
320 create a revlog object
311
321
312 opener is a function that abstracts the file opening operation
322 opener is a function that abstracts the file opening operation
313 and can be used to implement COW semantics or the like.
323 and can be used to implement COW semantics or the like.
314
324
315 `target`: a (KIND, ID) tuple that identify the content stored in
325 `target`: a (KIND, ID) tuple that identify the content stored in
316 this revlog. It help the rest of the code to understand what the revlog
326 this revlog. It help the rest of the code to understand what the revlog
317 is about without having to resort to heuristic and index filename
327 is about without having to resort to heuristic and index filename
318 analysis. Note: that this must be reliably be set by normal code, but
328 analysis. Note: that this must be reliably be set by normal code, but
319 that test, debug, or performance measurement code might not set this to
329 that test, debug, or performance measurement code might not set this to
320 accurate value.
330 accurate value.
321 """
331 """
322 self.upperboundcomp = upperboundcomp
332 self.upperboundcomp = upperboundcomp
323
333
324 self.radix = radix
334 self.radix = radix
325
335
326 self._docket_file = None
336 self._docket_file = None
327 self._indexfile = None
337 self._indexfile = None
328 self._datafile = None
338 self._datafile = None
329 self._sidedatafile = None
339 self._sidedatafile = None
330 self._nodemap_file = None
340 self._nodemap_file = None
331 self.postfix = postfix
341 self.postfix = postfix
332 self._trypending = trypending
342 self._trypending = trypending
333 self._try_split = try_split
343 self._try_split = try_split
334 self.opener = opener
344 self.opener = opener
335 if persistentnodemap:
345 if persistentnodemap:
336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
337
347
338 assert target[0] in ALL_KINDS
348 assert target[0] in ALL_KINDS
339 assert len(target) == 2
349 assert len(target) == 2
340 self.target = target
350 self.target = target
341 # When True, indexfile is opened with checkambig=True at writing, to
351 # When True, indexfile is opened with checkambig=True at writing, to
342 # avoid file stat ambiguity.
352 # avoid file stat ambiguity.
343 self._checkambig = checkambig
353 self._checkambig = checkambig
344 self._mmaplargeindex = mmaplargeindex
354 self._mmaplargeindex = mmaplargeindex
345 self._censorable = censorable
355 self._censorable = censorable
346 # 3-tuple of (node, rev, text) for a raw revision.
356 # 3-tuple of (node, rev, text) for a raw revision.
347 self._revisioncache = None
357 self._revisioncache = None
348 # Maps rev to chain base rev.
358 # Maps rev to chain base rev.
349 self._chainbasecache = util.lrucachedict(100)
359 self._chainbasecache = util.lrucachedict(100)
350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
351 self._chunkcache = (0, b'')
361 self._chunkcache = (0, b'')
352 # How much data to read and cache into the raw revlog data cache.
362 # How much data to read and cache into the raw revlog data cache.
353 self._chunkcachesize = 65536
363 self._chunkcachesize = 65536
354 self._maxchainlen = None
364 self._maxchainlen = None
355 self._deltabothparents = True
365 self._deltabothparents = True
356 self._candidate_group_chunk_size = 0
366 self._candidate_group_chunk_size = 0
357 self._debug_delta = False
367 self._debug_delta = False
358 self.index = None
368 self.index = None
359 self._docket = None
369 self._docket = None
360 self._nodemap_docket = None
370 self._nodemap_docket = None
361 # Mapping of partial identifiers to full nodes.
371 # Mapping of partial identifiers to full nodes.
362 self._pcache = {}
372 self._pcache = {}
363 # Mapping of revision integer to full node.
373 # Mapping of revision integer to full node.
364 self._compengine = b'zlib'
374 self._compengine = b'zlib'
365 self._compengineopts = {}
375 self._compengineopts = {}
366 self._maxdeltachainspan = -1
376 self._maxdeltachainspan = -1
367 self._withsparseread = False
377 self._withsparseread = False
368 self._sparserevlog = False
378 self._sparserevlog = False
369 self.hassidedata = False
379 self.hassidedata = False
370 self._srdensitythreshold = 0.50
380 self._srdensitythreshold = 0.50
371 self._srmingapsize = 262144
381 self._srmingapsize = 262144
372
382
373 # other optionnals features
383 # other optionnals features
374
384
375 # might remove rank configuration once the computation has no impact
385 # might remove rank configuration once the computation has no impact
376 self._compute_rank = False
386 self._compute_rank = False
377
387
378 # Make copy of flag processors so each revlog instance can support
388 # Make copy of flag processors so each revlog instance can support
379 # custom flags.
389 # custom flags.
380 self._flagprocessors = dict(flagutil.flagprocessors)
390 self._flagprocessors = dict(flagutil.flagprocessors)
381
391
382 # 3-tuple of file handles being used for active writing.
392 # 3-tuple of file handles being used for active writing.
383 self._writinghandles = None
393 self._writinghandles = None
384 # prevent nesting of addgroup
394 # prevent nesting of addgroup
385 self._adding_group = None
395 self._adding_group = None
386
396
387 self._loadindex()
397 self._loadindex()
388
398
389 self._concurrencychecker = concurrencychecker
399 self._concurrencychecker = concurrencychecker
390
400
391 # parent order is supposed to be semantically irrelevant, so we
401 # parent order is supposed to be semantically irrelevant, so we
392 # normally resort parents to ensure that the first parent is non-null,
402 # normally resort parents to ensure that the first parent is non-null,
393 # if there is a non-null parent at all.
403 # if there is a non-null parent at all.
394 # filelog abuses the parent order as flag to mark some instances of
404 # filelog abuses the parent order as flag to mark some instances of
395 # meta-encoded files, so allow it to disable this behavior.
405 # meta-encoded files, so allow it to disable this behavior.
396 self.canonical_parent_order = canonical_parent_order
406 self.canonical_parent_order = canonical_parent_order
397
407
398 def _init_opts(self):
408 def _init_opts(self):
399 """process options (from above/config) to setup associated default revlog mode
409 """process options (from above/config) to setup associated default revlog mode
400
410
401 These values might be affected when actually reading on disk information.
411 These values might be affected when actually reading on disk information.
402
412
403 The relevant values are returned for use in _loadindex().
413 The relevant values are returned for use in _loadindex().
404
414
405 * newversionflags:
415 * newversionflags:
406 version header to use if we need to create a new revlog
416 version header to use if we need to create a new revlog
407
417
408 * mmapindexthreshold:
418 * mmapindexthreshold:
409 minimal index size for start to use mmap
419 minimal index size for start to use mmap
410
420
411 * force_nodemap:
421 * force_nodemap:
412 force the usage of a "development" version of the nodemap code
422 force the usage of a "development" version of the nodemap code
413 """
423 """
414 mmapindexthreshold = None
424 mmapindexthreshold = None
415 opts = self.opener.options
425 opts = self.opener.options
416
426
417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
418 new_header = CHANGELOGV2
428 new_header = CHANGELOGV2
419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
420 elif b'revlogv2' in opts:
430 elif b'revlogv2' in opts:
421 new_header = REVLOGV2
431 new_header = REVLOGV2
422 elif b'revlogv1' in opts:
432 elif b'revlogv1' in opts:
423 new_header = REVLOGV1 | FLAG_INLINE_DATA
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
424 if b'generaldelta' in opts:
434 if b'generaldelta' in opts:
425 new_header |= FLAG_GENERALDELTA
435 new_header |= FLAG_GENERALDELTA
426 elif b'revlogv0' in self.opener.options:
436 elif b'revlogv0' in self.opener.options:
427 new_header = REVLOGV0
437 new_header = REVLOGV0
428 else:
438 else:
429 new_header = REVLOG_DEFAULT_VERSION
439 new_header = REVLOG_DEFAULT_VERSION
430
440
431 if b'chunkcachesize' in opts:
441 if b'chunkcachesize' in opts:
432 self._chunkcachesize = opts[b'chunkcachesize']
442 self._chunkcachesize = opts[b'chunkcachesize']
433 if b'maxchainlen' in opts:
443 if b'maxchainlen' in opts:
434 self._maxchainlen = opts[b'maxchainlen']
444 self._maxchainlen = opts[b'maxchainlen']
435 if b'deltabothparents' in opts:
445 if b'deltabothparents' in opts:
436 self._deltabothparents = opts[b'deltabothparents']
446 self._deltabothparents = opts[b'deltabothparents']
437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
438 if dps_cgds:
448 if dps_cgds:
439 self._candidate_group_chunk_size = dps_cgds
449 self._candidate_group_chunk_size = dps_cgds
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 self._lazydeltabase = False
451 self._lazydeltabase = False
442 if self._lazydelta:
452 if self._lazydelta:
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 if b'debug-delta' in opts:
454 if b'debug-delta' in opts:
445 self._debug_delta = opts[b'debug-delta']
455 self._debug_delta = opts[b'debug-delta']
446 if b'compengine' in opts:
456 if b'compengine' in opts:
447 self._compengine = opts[b'compengine']
457 self._compengine = opts[b'compengine']
448 if b'zlib.level' in opts:
458 if b'zlib.level' in opts:
449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
450 if b'zstd.level' in opts:
460 if b'zstd.level' in opts:
451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
452 if b'maxdeltachainspan' in opts:
462 if b'maxdeltachainspan' in opts:
453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
455 mmapindexthreshold = opts[b'mmapindexthreshold']
465 mmapindexthreshold = opts[b'mmapindexthreshold']
456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
457 withsparseread = bool(opts.get(b'with-sparse-read', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
458 # sparse-revlog forces sparse-read
468 # sparse-revlog forces sparse-read
459 self._withsparseread = self._sparserevlog or withsparseread
469 self._withsparseread = self._sparserevlog or withsparseread
460 if b'sparse-read-density-threshold' in opts:
470 if b'sparse-read-density-threshold' in opts:
461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
462 if b'sparse-read-min-gap-size' in opts:
472 if b'sparse-read-min-gap-size' in opts:
463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
464 if opts.get(b'enableellipsis'):
474 if opts.get(b'enableellipsis'):
465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
466
476
467 # revlog v0 doesn't have flag processors
477 # revlog v0 doesn't have flag processors
468 for flag, processor in opts.get(b'flagprocessors', {}).items():
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470
480
471 if self._chunkcachesize <= 0:
481 if self._chunkcachesize <= 0:
472 raise error.RevlogError(
482 raise error.RevlogError(
473 _(b'revlog chunk cache size %r is not greater than 0')
483 _(b'revlog chunk cache size %r is not greater than 0')
474 % self._chunkcachesize
484 % self._chunkcachesize
475 )
485 )
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 raise error.RevlogError(
487 raise error.RevlogError(
478 _(b'revlog chunk cache size %r is not a power of 2')
488 _(b'revlog chunk cache size %r is not a power of 2')
479 % self._chunkcachesize
489 % self._chunkcachesize
480 )
490 )
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 return new_header, mmapindexthreshold, force_nodemap
492 return new_header, mmapindexthreshold, force_nodemap
483
493
484 def _get_data(self, filepath, mmap_threshold, size=None):
494 def _get_data(self, filepath, mmap_threshold, size=None):
485 """return a file content with or without mmap
495 """return a file content with or without mmap
486
496
487 If the file is missing return the empty string"""
497 If the file is missing return the empty string"""
488 try:
498 try:
489 with self.opener(filepath) as fp:
499 with self.opener(filepath) as fp:
490 if mmap_threshold is not None:
500 if mmap_threshold is not None:
491 file_size = self.opener.fstat(fp).st_size
501 file_size = self.opener.fstat(fp).st_size
492 if file_size >= mmap_threshold:
502 if file_size >= mmap_threshold:
493 if size is not None:
503 if size is not None:
494 # avoid potentiel mmap crash
504 # avoid potentiel mmap crash
495 size = min(file_size, size)
505 size = min(file_size, size)
496 # TODO: should .close() to release resources without
506 # TODO: should .close() to release resources without
497 # relying on Python GC
507 # relying on Python GC
498 if size is None:
508 if size is None:
499 return util.buffer(util.mmapread(fp))
509 return util.buffer(util.mmapread(fp))
500 else:
510 else:
501 return util.buffer(util.mmapread(fp, size))
511 return util.buffer(util.mmapread(fp, size))
502 if size is None:
512 if size is None:
503 return fp.read()
513 return fp.read()
504 else:
514 else:
505 return fp.read(size)
515 return fp.read(size)
506 except FileNotFoundError:
516 except FileNotFoundError:
507 return b''
517 return b''
508
518
509 def get_streams(self, max_linkrev, force_inline=False):
519 def get_streams(self, max_linkrev, force_inline=False):
510 n = len(self)
520 n = len(self)
511 index = self.index
521 index = self.index
512 while n > 0:
522 while n > 0:
513 linkrev = index[n - 1][4]
523 linkrev = index[n - 1][4]
514 if linkrev < max_linkrev:
524 if linkrev < max_linkrev:
515 break
525 break
516 # note: this loop will rarely go through multiple iterations, since
526 # note: this loop will rarely go through multiple iterations, since
517 # it only traverses commits created during the current streaming
527 # it only traverses commits created during the current streaming
518 # pull operation.
528 # pull operation.
519 #
529 #
520 # If this become a problem, using a binary search should cap the
530 # If this become a problem, using a binary search should cap the
521 # runtime of this.
531 # runtime of this.
522 n = n - 1
532 n = n - 1
523 if n == 0:
533 if n == 0:
524 # no data to send
534 # no data to send
525 return []
535 return []
526 index_size = n * index.entry_size
536 index_size = n * index.entry_size
527 data_size = self.end(n - 1)
537 data_size = self.end(n - 1)
528
538
529 # XXX we might have been split (or stripped) since the object
539 # XXX we might have been split (or stripped) since the object
530 # initialization, We need to close this race too, but having a way to
540 # initialization, We need to close this race too, but having a way to
531 # pre-open the file we feed to the revlog and never closing them before
541 # pre-open the file we feed to the revlog and never closing them before
532 # we are done streaming.
542 # we are done streaming.
533
543
534 if self._inline:
544 if self._inline:
535
545
536 def get_stream():
546 def get_stream():
537 with self._indexfp() as fp:
547 with self._indexfp() as fp:
538 yield None
548 yield None
539 size = index_size + data_size
549 size = index_size + data_size
540 if size <= 65536:
550 if size <= 65536:
541 yield fp.read(size)
551 yield fp.read(size)
542 else:
552 else:
543 yield from util.filechunkiter(fp, limit=size)
553 yield from util.filechunkiter(fp, limit=size)
544
554
545 inline_stream = get_stream()
555 inline_stream = get_stream()
546 next(inline_stream)
556 next(inline_stream)
547 return [
557 return [
548 (self._indexfile, inline_stream, index_size + data_size),
558 (self._indexfile, inline_stream, index_size + data_size),
549 ]
559 ]
550 elif force_inline:
560 elif force_inline:
551
561
552 def get_stream():
562 def get_stream():
553 with self._datafp() as fp_d:
563 with self._datafp() as fp_d:
554 yield None
564 yield None
555
565
556 for rev in range(n):
566 for rev in range(n):
557 idx = self.index.entry_binary(rev)
567 idx = self.index.entry_binary(rev)
558 if rev == 0 and self._docket is None:
568 if rev == 0 and self._docket is None:
559 # re-inject the inline flag
569 # re-inject the inline flag
560 header = self._format_flags
570 header = self._format_flags
561 header |= self._format_version
571 header |= self._format_version
562 header |= FLAG_INLINE_DATA
572 header |= FLAG_INLINE_DATA
563 header = self.index.pack_header(header)
573 header = self.index.pack_header(header)
564 idx = header + idx
574 idx = header + idx
565 yield idx
575 yield idx
566 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
576 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
567
577
568 inline_stream = get_stream()
578 inline_stream = get_stream()
569 next(inline_stream)
579 next(inline_stream)
570 return [
580 return [
571 (self._indexfile, inline_stream, index_size + data_size),
581 (self._indexfile, inline_stream, index_size + data_size),
572 ]
582 ]
573 else:
583 else:
574
584
575 def get_index_stream():
585 def get_index_stream():
576 with self._indexfp() as fp:
586 with self._indexfp() as fp:
577 yield None
587 yield None
578 if index_size <= 65536:
588 if index_size <= 65536:
579 yield fp.read(index_size)
589 yield fp.read(index_size)
580 else:
590 else:
581 yield from util.filechunkiter(fp, limit=index_size)
591 yield from util.filechunkiter(fp, limit=index_size)
582
592
583 def get_data_stream():
593 def get_data_stream():
584 with self._datafp() as fp:
594 with self._datafp() as fp:
585 yield None
595 yield None
586 if data_size <= 65536:
596 if data_size <= 65536:
587 yield fp.read(data_size)
597 yield fp.read(data_size)
588 else:
598 else:
589 yield from util.filechunkiter(fp, limit=data_size)
599 yield from util.filechunkiter(fp, limit=data_size)
590
600
591 index_stream = get_index_stream()
601 index_stream = get_index_stream()
592 next(index_stream)
602 next(index_stream)
593 data_stream = get_data_stream()
603 data_stream = get_data_stream()
594 next(data_stream)
604 next(data_stream)
595 return [
605 return [
596 (self._datafile, data_stream, data_size),
606 (self._datafile, data_stream, data_size),
597 (self._indexfile, index_stream, index_size),
607 (self._indexfile, index_stream, index_size),
598 ]
608 ]
599
609
600 def _loadindex(self, docket=None):
610 def _loadindex(self, docket=None):
601
611
602 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
612 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
603
613
604 if self.postfix is not None:
614 if self.postfix is not None:
605 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
615 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
606 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
616 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
607 entry_point = b'%s.i.a' % self.radix
617 entry_point = b'%s.i.a' % self.radix
608 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
618 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
609 entry_point = b'%s.i.s' % self.radix
619 entry_point = b'%s.i.s' % self.radix
610 else:
620 else:
611 entry_point = b'%s.i' % self.radix
621 entry_point = b'%s.i' % self.radix
612
622
613 if docket is not None:
623 if docket is not None:
614 self._docket = docket
624 self._docket = docket
615 self._docket_file = entry_point
625 self._docket_file = entry_point
616 else:
626 else:
617 self._initempty = True
627 self._initempty = True
618 entry_data = self._get_data(entry_point, mmapindexthreshold)
628 entry_data = self._get_data(entry_point, mmapindexthreshold)
619 if len(entry_data) > 0:
629 if len(entry_data) > 0:
620 header = INDEX_HEADER.unpack(entry_data[:4])[0]
630 header = INDEX_HEADER.unpack(entry_data[:4])[0]
621 self._initempty = False
631 self._initempty = False
622 else:
632 else:
623 header = new_header
633 header = new_header
624
634
625 self._format_flags = header & ~0xFFFF
635 self._format_flags = header & ~0xFFFF
626 self._format_version = header & 0xFFFF
636 self._format_version = header & 0xFFFF
627
637
628 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
638 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
629 if supported_flags is None:
639 if supported_flags is None:
630 msg = _(b'unknown version (%d) in revlog %s')
640 msg = _(b'unknown version (%d) in revlog %s')
631 msg %= (self._format_version, self.display_id)
641 msg %= (self._format_version, self.display_id)
632 raise error.RevlogError(msg)
642 raise error.RevlogError(msg)
633 elif self._format_flags & ~supported_flags:
643 elif self._format_flags & ~supported_flags:
634 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
644 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
635 display_flag = self._format_flags >> 16
645 display_flag = self._format_flags >> 16
636 msg %= (display_flag, self._format_version, self.display_id)
646 msg %= (display_flag, self._format_version, self.display_id)
637 raise error.RevlogError(msg)
647 raise error.RevlogError(msg)
638
648
639 features = FEATURES_BY_VERSION[self._format_version]
649 features = FEATURES_BY_VERSION[self._format_version]
640 self._inline = features[b'inline'](self._format_flags)
650 self._inline = features[b'inline'](self._format_flags)
641 self._generaldelta = features[b'generaldelta'](self._format_flags)
651 self._generaldelta = features[b'generaldelta'](self._format_flags)
642 self.hassidedata = features[b'sidedata']
652 self.hassidedata = features[b'sidedata']
643
653
644 if not features[b'docket']:
654 if not features[b'docket']:
645 self._indexfile = entry_point
655 self._indexfile = entry_point
646 index_data = entry_data
656 index_data = entry_data
647 else:
657 else:
648 self._docket_file = entry_point
658 self._docket_file = entry_point
649 if self._initempty:
659 if self._initempty:
650 self._docket = docketutil.default_docket(self, header)
660 self._docket = docketutil.default_docket(self, header)
651 else:
661 else:
652 self._docket = docketutil.parse_docket(
662 self._docket = docketutil.parse_docket(
653 self, entry_data, use_pending=self._trypending
663 self, entry_data, use_pending=self._trypending
654 )
664 )
655
665
656 if self._docket is not None:
666 if self._docket is not None:
657 self._indexfile = self._docket.index_filepath()
667 self._indexfile = self._docket.index_filepath()
658 index_data = b''
668 index_data = b''
659 index_size = self._docket.index_end
669 index_size = self._docket.index_end
660 if index_size > 0:
670 if index_size > 0:
661 index_data = self._get_data(
671 index_data = self._get_data(
662 self._indexfile, mmapindexthreshold, size=index_size
672 self._indexfile, mmapindexthreshold, size=index_size
663 )
673 )
664 if len(index_data) < index_size:
674 if len(index_data) < index_size:
665 msg = _(b'too few index data for %s: got %d, expected %d')
675 msg = _(b'too few index data for %s: got %d, expected %d')
666 msg %= (self.display_id, len(index_data), index_size)
676 msg %= (self.display_id, len(index_data), index_size)
667 raise error.RevlogError(msg)
677 raise error.RevlogError(msg)
668
678
669 self._inline = False
679 self._inline = False
670 # generaldelta implied by version 2 revlogs.
680 # generaldelta implied by version 2 revlogs.
671 self._generaldelta = True
681 self._generaldelta = True
672 # the logic for persistent nodemap will be dealt with within the
682 # the logic for persistent nodemap will be dealt with within the
673 # main docket, so disable it for now.
683 # main docket, so disable it for now.
674 self._nodemap_file = None
684 self._nodemap_file = None
675
685
676 if self._docket is not None:
686 if self._docket is not None:
677 self._datafile = self._docket.data_filepath()
687 self._datafile = self._docket.data_filepath()
678 self._sidedatafile = self._docket.sidedata_filepath()
688 self._sidedatafile = self._docket.sidedata_filepath()
679 elif self.postfix is None:
689 elif self.postfix is None:
680 self._datafile = b'%s.d' % self.radix
690 self._datafile = b'%s.d' % self.radix
681 else:
691 else:
682 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
692 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
683
693
684 self.nodeconstants = sha1nodeconstants
694 self.nodeconstants = sha1nodeconstants
685 self.nullid = self.nodeconstants.nullid
695 self.nullid = self.nodeconstants.nullid
686
696
687 # sparse-revlog can't be on without general-delta (issue6056)
697 # sparse-revlog can't be on without general-delta (issue6056)
688 if not self._generaldelta:
698 if not self._generaldelta:
689 self._sparserevlog = False
699 self._sparserevlog = False
690
700
691 self._storedeltachains = True
701 self._storedeltachains = True
692
702
693 devel_nodemap = (
703 devel_nodemap = (
694 self._nodemap_file
704 self._nodemap_file
695 and force_nodemap
705 and force_nodemap
696 and parse_index_v1_nodemap is not None
706 and parse_index_v1_nodemap is not None
697 )
707 )
698
708
699 use_rust_index = False
709 use_rust_index = False
700 if rustrevlog is not None:
710 if rustrevlog is not None:
701 if self._nodemap_file is not None:
711 if self._nodemap_file is not None:
702 use_rust_index = True
712 use_rust_index = True
703 else:
713 else:
704 use_rust_index = self.opener.options.get(b'rust.index')
714 use_rust_index = self.opener.options.get(b'rust.index')
705
715
706 self._parse_index = parse_index_v1
716 self._parse_index = parse_index_v1
707 if self._format_version == REVLOGV0:
717 if self._format_version == REVLOGV0:
708 self._parse_index = revlogv0.parse_index_v0
718 self._parse_index = revlogv0.parse_index_v0
709 elif self._format_version == REVLOGV2:
719 elif self._format_version == REVLOGV2:
710 self._parse_index = parse_index_v2
720 self._parse_index = parse_index_v2
711 elif self._format_version == CHANGELOGV2:
721 elif self._format_version == CHANGELOGV2:
712 self._parse_index = parse_index_cl_v2
722 self._parse_index = parse_index_cl_v2
713 elif devel_nodemap:
723 elif devel_nodemap:
714 self._parse_index = parse_index_v1_nodemap
724 self._parse_index = parse_index_v1_nodemap
715 elif use_rust_index:
725 elif use_rust_index:
716 self._parse_index = parse_index_v1_mixed
726 self._parse_index = parse_index_v1_mixed
717 try:
727 try:
718 d = self._parse_index(index_data, self._inline)
728 d = self._parse_index(index_data, self._inline)
719 index, chunkcache = d
729 index, chunkcache = d
720 use_nodemap = (
730 use_nodemap = (
721 not self._inline
731 not self._inline
722 and self._nodemap_file is not None
732 and self._nodemap_file is not None
723 and util.safehasattr(index, 'update_nodemap_data')
733 and util.safehasattr(index, 'update_nodemap_data')
724 )
734 )
725 if use_nodemap:
735 if use_nodemap:
726 nodemap_data = nodemaputil.persisted_data(self)
736 nodemap_data = nodemaputil.persisted_data(self)
727 if nodemap_data is not None:
737 if nodemap_data is not None:
728 docket = nodemap_data[0]
738 docket = nodemap_data[0]
729 if (
739 if (
730 len(d[0]) > docket.tip_rev
740 len(d[0]) > docket.tip_rev
731 and d[0][docket.tip_rev][7] == docket.tip_node
741 and d[0][docket.tip_rev][7] == docket.tip_node
732 ):
742 ):
733 # no changelog tampering
743 # no changelog tampering
734 self._nodemap_docket = docket
744 self._nodemap_docket = docket
735 index.update_nodemap_data(*nodemap_data)
745 index.update_nodemap_data(*nodemap_data)
736 except (ValueError, IndexError):
746 except (ValueError, IndexError):
737 raise error.RevlogError(
747 raise error.RevlogError(
738 _(b"index %s is corrupted") % self.display_id
748 _(b"index %s is corrupted") % self.display_id
739 )
749 )
740 self.index = index
750 self.index = index
741 self._segmentfile = randomaccessfile.randomaccessfile(
751 self._segmentfile = randomaccessfile.randomaccessfile(
742 self.opener,
752 self.opener,
743 (self._indexfile if self._inline else self._datafile),
753 (self._indexfile if self._inline else self._datafile),
744 self._chunkcachesize,
754 self._chunkcachesize,
745 chunkcache,
755 chunkcache,
746 )
756 )
747 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
757 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
748 self.opener,
758 self.opener,
749 self._sidedatafile,
759 self._sidedatafile,
750 self._chunkcachesize,
760 self._chunkcachesize,
751 )
761 )
752 # revnum -> (chain-length, sum-delta-length)
762 # revnum -> (chain-length, sum-delta-length)
753 self._chaininfocache = util.lrucachedict(500)
763 self._chaininfocache = util.lrucachedict(500)
754 # revlog header -> revlog compressor
764 # revlog header -> revlog compressor
755 self._decompressors = {}
765 self._decompressors = {}
756
766
757 def get_revlog(self):
767 def get_revlog(self):
758 """simple function to mirror API of other not-really-revlog API"""
768 """simple function to mirror API of other not-really-revlog API"""
759 return self
769 return self
760
770
761 @util.propertycache
771 @util.propertycache
762 def revlog_kind(self):
772 def revlog_kind(self):
763 return self.target[0]
773 return self.target[0]
764
774
765 @util.propertycache
775 @util.propertycache
766 def display_id(self):
776 def display_id(self):
767 """The public facing "ID" of the revlog that we use in message"""
777 """The public facing "ID" of the revlog that we use in message"""
768 if self.revlog_kind == KIND_FILELOG:
778 if self.revlog_kind == KIND_FILELOG:
769 # Reference the file without the "data/" prefix, so it is familiar
779 # Reference the file without the "data/" prefix, so it is familiar
770 # to the user.
780 # to the user.
771 return self.target[1]
781 return self.target[1]
772 else:
782 else:
773 return self.radix
783 return self.radix
774
784
775 def _get_decompressor(self, t):
785 def _get_decompressor(self, t):
776 try:
786 try:
777 compressor = self._decompressors[t]
787 compressor = self._decompressors[t]
778 except KeyError:
788 except KeyError:
779 try:
789 try:
780 engine = util.compengines.forrevlogheader(t)
790 engine = util.compengines.forrevlogheader(t)
781 compressor = engine.revlogcompressor(self._compengineopts)
791 compressor = engine.revlogcompressor(self._compengineopts)
782 self._decompressors[t] = compressor
792 self._decompressors[t] = compressor
783 except KeyError:
793 except KeyError:
784 raise error.RevlogError(
794 raise error.RevlogError(
785 _(b'unknown compression type %s') % binascii.hexlify(t)
795 _(b'unknown compression type %s') % binascii.hexlify(t)
786 )
796 )
787 return compressor
797 return compressor
788
798
789 @util.propertycache
799 @util.propertycache
790 def _compressor(self):
800 def _compressor(self):
791 engine = util.compengines[self._compengine]
801 engine = util.compengines[self._compengine]
792 return engine.revlogcompressor(self._compengineopts)
802 return engine.revlogcompressor(self._compengineopts)
793
803
794 @util.propertycache
804 @util.propertycache
795 def _decompressor(self):
805 def _decompressor(self):
796 """the default decompressor"""
806 """the default decompressor"""
797 if self._docket is None:
807 if self._docket is None:
798 return None
808 return None
799 t = self._docket.default_compression_header
809 t = self._docket.default_compression_header
800 c = self._get_decompressor(t)
810 c = self._get_decompressor(t)
801 return c.decompress
811 return c.decompress
802
812
803 def _indexfp(self):
813 def _indexfp(self):
804 """file object for the revlog's index file"""
814 """file object for the revlog's index file"""
805 return self.opener(self._indexfile, mode=b"r")
815 return self.opener(self._indexfile, mode=b"r")
806
816
807 def __index_write_fp(self):
817 def __index_write_fp(self):
808 # You should not use this directly and use `_writing` instead
818 # You should not use this directly and use `_writing` instead
809 try:
819 try:
810 f = self.opener(
820 f = self.opener(
811 self._indexfile, mode=b"r+", checkambig=self._checkambig
821 self._indexfile, mode=b"r+", checkambig=self._checkambig
812 )
822 )
813 if self._docket is None:
823 if self._docket is None:
814 f.seek(0, os.SEEK_END)
824 f.seek(0, os.SEEK_END)
815 else:
825 else:
816 f.seek(self._docket.index_end, os.SEEK_SET)
826 f.seek(self._docket.index_end, os.SEEK_SET)
817 return f
827 return f
818 except FileNotFoundError:
828 except FileNotFoundError:
819 return self.opener(
829 return self.opener(
820 self._indexfile, mode=b"w+", checkambig=self._checkambig
830 self._indexfile, mode=b"w+", checkambig=self._checkambig
821 )
831 )
822
832
823 def __index_new_fp(self):
833 def __index_new_fp(self):
824 # You should not use this unless you are upgrading from inline revlog
834 # You should not use this unless you are upgrading from inline revlog
825 return self.opener(
835 return self.opener(
826 self._indexfile,
836 self._indexfile,
827 mode=b"w",
837 mode=b"w",
828 checkambig=self._checkambig,
838 checkambig=self._checkambig,
829 atomictemp=True,
839 atomictemp=True,
830 )
840 )
831
841
832 def _datafp(self, mode=b'r'):
842 def _datafp(self, mode=b'r'):
833 """file object for the revlog's data file"""
843 """file object for the revlog's data file"""
834 return self.opener(self._datafile, mode=mode)
844 return self.opener(self._datafile, mode=mode)
835
845
836 @contextlib.contextmanager
846 @contextlib.contextmanager
837 def _sidedatareadfp(self):
847 def _sidedatareadfp(self):
838 """file object suitable to read sidedata"""
848 """file object suitable to read sidedata"""
839 if self._writinghandles:
849 if self._writinghandles:
840 yield self._writinghandles[2]
850 yield self._writinghandles[2]
841 else:
851 else:
842 with self.opener(self._sidedatafile) as fp:
852 with self.opener(self._sidedatafile) as fp:
843 yield fp
853 yield fp
844
854
845 def tiprev(self):
855 def tiprev(self):
846 return len(self.index) - 1
856 return len(self.index) - 1
847
857
848 def tip(self):
858 def tip(self):
849 return self.node(self.tiprev())
859 return self.node(self.tiprev())
850
860
851 def __contains__(self, rev):
861 def __contains__(self, rev):
852 return 0 <= rev < len(self)
862 return 0 <= rev < len(self)
853
863
854 def __len__(self):
864 def __len__(self):
855 return len(self.index)
865 return len(self.index)
856
866
857 def __iter__(self):
867 def __iter__(self):
858 return iter(range(len(self)))
868 return iter(range(len(self)))
859
869
860 def revs(self, start=0, stop=None):
870 def revs(self, start=0, stop=None):
861 """iterate over all rev in this revlog (from start to stop)"""
871 """iterate over all rev in this revlog (from start to stop)"""
862 return storageutil.iterrevs(len(self), start=start, stop=stop)
872 return storageutil.iterrevs(len(self), start=start, stop=stop)
863
873
864 def hasnode(self, node):
874 def hasnode(self, node):
865 try:
875 try:
866 self.rev(node)
876 self.rev(node)
867 return True
877 return True
868 except KeyError:
878 except KeyError:
869 return False
879 return False
870
880
871 def candelta(self, baserev, rev):
881 def candelta(self, baserev, rev):
872 """whether two revisions (baserev, rev) can be delta-ed or not"""
882 """whether two revisions (baserev, rev) can be delta-ed or not"""
873 # Disable delta if either rev requires a content-changing flag
883 # Disable delta if either rev requires a content-changing flag
874 # processor (ex. LFS). This is because such flag processor can alter
884 # processor (ex. LFS). This is because such flag processor can alter
875 # the rawtext content that the delta will be based on, and two clients
885 # the rawtext content that the delta will be based on, and two clients
876 # could have a same revlog node with different flags (i.e. different
886 # could have a same revlog node with different flags (i.e. different
877 # rawtext contents) and the delta could be incompatible.
887 # rawtext contents) and the delta could be incompatible.
878 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
888 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
879 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
889 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
880 ):
890 ):
881 return False
891 return False
882 return True
892 return True
883
893
884 def update_caches(self, transaction):
894 def update_caches(self, transaction):
885 if self._nodemap_file is not None:
895 if self._nodemap_file is not None:
886 if transaction is None:
896 if transaction is None:
887 nodemaputil.update_persistent_nodemap(self)
897 nodemaputil.update_persistent_nodemap(self)
888 else:
898 else:
889 nodemaputil.setup_persistent_nodemap(transaction, self)
899 nodemaputil.setup_persistent_nodemap(transaction, self)
890
900
891 def clearcaches(self):
901 def clearcaches(self):
892 self._revisioncache = None
902 self._revisioncache = None
893 self._chainbasecache.clear()
903 self._chainbasecache.clear()
894 self._segmentfile.clear_cache()
904 self._segmentfile.clear_cache()
895 self._segmentfile_sidedata.clear_cache()
905 self._segmentfile_sidedata.clear_cache()
896 self._pcache = {}
906 self._pcache = {}
897 self._nodemap_docket = None
907 self._nodemap_docket = None
898 self.index.clearcaches()
908 self.index.clearcaches()
899 # The python code is the one responsible for validating the docket, we
909 # The python code is the one responsible for validating the docket, we
900 # end up having to refresh it here.
910 # end up having to refresh it here.
901 use_nodemap = (
911 use_nodemap = (
902 not self._inline
912 not self._inline
903 and self._nodemap_file is not None
913 and self._nodemap_file is not None
904 and util.safehasattr(self.index, 'update_nodemap_data')
914 and util.safehasattr(self.index, 'update_nodemap_data')
905 )
915 )
906 if use_nodemap:
916 if use_nodemap:
907 nodemap_data = nodemaputil.persisted_data(self)
917 nodemap_data = nodemaputil.persisted_data(self)
908 if nodemap_data is not None:
918 if nodemap_data is not None:
909 self._nodemap_docket = nodemap_data[0]
919 self._nodemap_docket = nodemap_data[0]
910 self.index.update_nodemap_data(*nodemap_data)
920 self.index.update_nodemap_data(*nodemap_data)
911
921
912 def rev(self, node):
922 def rev(self, node):
913 try:
923 try:
914 return self.index.rev(node)
924 return self.index.rev(node)
915 except TypeError:
925 except TypeError:
916 raise
926 raise
917 except error.RevlogError:
927 except error.RevlogError:
918 # parsers.c radix tree lookup failed
928 # parsers.c radix tree lookup failed
919 if (
929 if (
920 node == self.nodeconstants.wdirid
930 node == self.nodeconstants.wdirid
921 or node in self.nodeconstants.wdirfilenodeids
931 or node in self.nodeconstants.wdirfilenodeids
922 ):
932 ):
923 raise error.WdirUnsupported
933 raise error.WdirUnsupported
924 raise error.LookupError(node, self.display_id, _(b'no node'))
934 raise error.LookupError(node, self.display_id, _(b'no node'))
925
935
926 # Accessors for index entries.
936 # Accessors for index entries.
927
937
928 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
938 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
929 # are flags.
939 # are flags.
930 def start(self, rev):
940 def start(self, rev):
931 return int(self.index[rev][0] >> 16)
941 return int(self.index[rev][0] >> 16)
932
942
933 def sidedata_cut_off(self, rev):
943 def sidedata_cut_off(self, rev):
934 sd_cut_off = self.index[rev][8]
944 sd_cut_off = self.index[rev][8]
935 if sd_cut_off != 0:
945 if sd_cut_off != 0:
936 return sd_cut_off
946 return sd_cut_off
937 # This is some annoying dance, because entries without sidedata
947 # This is some annoying dance, because entries without sidedata
938 # currently use 0 as their ofsset. (instead of previous-offset +
948 # currently use 0 as their ofsset. (instead of previous-offset +
939 # previous-size)
949 # previous-size)
940 #
950 #
941 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
951 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
942 # In the meantime, we need this.
952 # In the meantime, we need this.
943 while 0 <= rev:
953 while 0 <= rev:
944 e = self.index[rev]
954 e = self.index[rev]
945 if e[9] != 0:
955 if e[9] != 0:
946 return e[8] + e[9]
956 return e[8] + e[9]
947 rev -= 1
957 rev -= 1
948 return 0
958 return 0
949
959
950 def flags(self, rev):
960 def flags(self, rev):
951 return self.index[rev][0] & 0xFFFF
961 return self.index[rev][0] & 0xFFFF
952
962
953 def length(self, rev):
963 def length(self, rev):
954 return self.index[rev][1]
964 return self.index[rev][1]
955
965
956 def sidedata_length(self, rev):
966 def sidedata_length(self, rev):
957 if not self.hassidedata:
967 if not self.hassidedata:
958 return 0
968 return 0
959 return self.index[rev][9]
969 return self.index[rev][9]
960
970
961 def rawsize(self, rev):
971 def rawsize(self, rev):
962 """return the length of the uncompressed text for a given revision"""
972 """return the length of the uncompressed text for a given revision"""
963 l = self.index[rev][2]
973 l = self.index[rev][2]
964 if l >= 0:
974 if l >= 0:
965 return l
975 return l
966
976
967 t = self.rawdata(rev)
977 t = self.rawdata(rev)
968 return len(t)
978 return len(t)
969
979
970 def size(self, rev):
980 def size(self, rev):
971 """length of non-raw text (processed by a "read" flag processor)"""
981 """length of non-raw text (processed by a "read" flag processor)"""
972 # fast path: if no "read" flag processor could change the content,
982 # fast path: if no "read" flag processor could change the content,
973 # size is rawsize. note: ELLIPSIS is known to not change the content.
983 # size is rawsize. note: ELLIPSIS is known to not change the content.
974 flags = self.flags(rev)
984 flags = self.flags(rev)
975 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
985 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
976 return self.rawsize(rev)
986 return self.rawsize(rev)
977
987
978 return len(self.revision(rev))
988 return len(self.revision(rev))
979
989
980 def fast_rank(self, rev):
990 def fast_rank(self, rev):
981 """Return the rank of a revision if already known, or None otherwise.
991 """Return the rank of a revision if already known, or None otherwise.
982
992
983 The rank of a revision is the size of the sub-graph it defines as a
993 The rank of a revision is the size of the sub-graph it defines as a
984 head. Equivalently, the rank of a revision `r` is the size of the set
994 head. Equivalently, the rank of a revision `r` is the size of the set
985 `ancestors(r)`, `r` included.
995 `ancestors(r)`, `r` included.
986
996
987 This method returns the rank retrieved from the revlog in constant
997 This method returns the rank retrieved from the revlog in constant
988 time. It makes no attempt at computing unknown values for versions of
998 time. It makes no attempt at computing unknown values for versions of
989 the revlog which do not persist the rank.
999 the revlog which do not persist the rank.
990 """
1000 """
991 rank = self.index[rev][ENTRY_RANK]
1001 rank = self.index[rev][ENTRY_RANK]
992 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1002 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
993 return None
1003 return None
994 if rev == nullrev:
1004 if rev == nullrev:
995 return 0 # convention
1005 return 0 # convention
996 return rank
1006 return rank
997
1007
998 def chainbase(self, rev):
1008 def chainbase(self, rev):
999 base = self._chainbasecache.get(rev)
1009 base = self._chainbasecache.get(rev)
1000 if base is not None:
1010 if base is not None:
1001 return base
1011 return base
1002
1012
1003 index = self.index
1013 index = self.index
1004 iterrev = rev
1014 iterrev = rev
1005 base = index[iterrev][3]
1015 base = index[iterrev][3]
1006 while base != iterrev:
1016 while base != iterrev:
1007 iterrev = base
1017 iterrev = base
1008 base = index[iterrev][3]
1018 base = index[iterrev][3]
1009
1019
1010 self._chainbasecache[rev] = base
1020 self._chainbasecache[rev] = base
1011 return base
1021 return base
1012
1022
1013 def linkrev(self, rev):
1023 def linkrev(self, rev):
1014 return self.index[rev][4]
1024 return self.index[rev][4]
1015
1025
1016 def parentrevs(self, rev):
1026 def parentrevs(self, rev):
1017 try:
1027 try:
1018 entry = self.index[rev]
1028 entry = self.index[rev]
1019 except IndexError:
1029 except IndexError:
1020 if rev == wdirrev:
1030 if rev == wdirrev:
1021 raise error.WdirUnsupported
1031 raise error.WdirUnsupported
1022 raise
1032 raise
1023
1033
1024 if self.canonical_parent_order and entry[5] == nullrev:
1034 if self.canonical_parent_order and entry[5] == nullrev:
1025 return entry[6], entry[5]
1035 return entry[6], entry[5]
1026 else:
1036 else:
1027 return entry[5], entry[6]
1037 return entry[5], entry[6]
1028
1038
1029 # fast parentrevs(rev) where rev isn't filtered
1039 # fast parentrevs(rev) where rev isn't filtered
1030 _uncheckedparentrevs = parentrevs
1040 _uncheckedparentrevs = parentrevs
1031
1041
1032 def node(self, rev):
1042 def node(self, rev):
1033 try:
1043 try:
1034 return self.index[rev][7]
1044 return self.index[rev][7]
1035 except IndexError:
1045 except IndexError:
1036 if rev == wdirrev:
1046 if rev == wdirrev:
1037 raise error.WdirUnsupported
1047 raise error.WdirUnsupported
1038 raise
1048 raise
1039
1049
1040 # Derived from index values.
1050 # Derived from index values.
1041
1051
1042 def end(self, rev):
1052 def end(self, rev):
1043 return self.start(rev) + self.length(rev)
1053 return self.start(rev) + self.length(rev)
1044
1054
1045 def parents(self, node):
1055 def parents(self, node):
1046 i = self.index
1056 i = self.index
1047 d = i[self.rev(node)]
1057 d = i[self.rev(node)]
1048 # inline node() to avoid function call overhead
1058 # inline node() to avoid function call overhead
1049 if self.canonical_parent_order and d[5] == self.nullid:
1059 if self.canonical_parent_order and d[5] == self.nullid:
1050 return i[d[6]][7], i[d[5]][7]
1060 return i[d[6]][7], i[d[5]][7]
1051 else:
1061 else:
1052 return i[d[5]][7], i[d[6]][7]
1062 return i[d[5]][7], i[d[6]][7]
1053
1063
1054 def chainlen(self, rev):
1064 def chainlen(self, rev):
1055 return self._chaininfo(rev)[0]
1065 return self._chaininfo(rev)[0]
1056
1066
1057 def _chaininfo(self, rev):
1067 def _chaininfo(self, rev):
1058 chaininfocache = self._chaininfocache
1068 chaininfocache = self._chaininfocache
1059 if rev in chaininfocache:
1069 if rev in chaininfocache:
1060 return chaininfocache[rev]
1070 return chaininfocache[rev]
1061 index = self.index
1071 index = self.index
1062 generaldelta = self._generaldelta
1072 generaldelta = self._generaldelta
1063 iterrev = rev
1073 iterrev = rev
1064 e = index[iterrev]
1074 e = index[iterrev]
1065 clen = 0
1075 clen = 0
1066 compresseddeltalen = 0
1076 compresseddeltalen = 0
1067 while iterrev != e[3]:
1077 while iterrev != e[3]:
1068 clen += 1
1078 clen += 1
1069 compresseddeltalen += e[1]
1079 compresseddeltalen += e[1]
1070 if generaldelta:
1080 if generaldelta:
1071 iterrev = e[3]
1081 iterrev = e[3]
1072 else:
1082 else:
1073 iterrev -= 1
1083 iterrev -= 1
1074 if iterrev in chaininfocache:
1084 if iterrev in chaininfocache:
1075 t = chaininfocache[iterrev]
1085 t = chaininfocache[iterrev]
1076 clen += t[0]
1086 clen += t[0]
1077 compresseddeltalen += t[1]
1087 compresseddeltalen += t[1]
1078 break
1088 break
1079 e = index[iterrev]
1089 e = index[iterrev]
1080 else:
1090 else:
1081 # Add text length of base since decompressing that also takes
1091 # Add text length of base since decompressing that also takes
1082 # work. For cache hits the length is already included.
1092 # work. For cache hits the length is already included.
1083 compresseddeltalen += e[1]
1093 compresseddeltalen += e[1]
1084 r = (clen, compresseddeltalen)
1094 r = (clen, compresseddeltalen)
1085 chaininfocache[rev] = r
1095 chaininfocache[rev] = r
1086 return r
1096 return r
1087
1097
1088 def _deltachain(self, rev, stoprev=None):
1098 def _deltachain(self, rev, stoprev=None):
1089 """Obtain the delta chain for a revision.
1099 """Obtain the delta chain for a revision.
1090
1100
1091 ``stoprev`` specifies a revision to stop at. If not specified, we
1101 ``stoprev`` specifies a revision to stop at. If not specified, we
1092 stop at the base of the chain.
1102 stop at the base of the chain.
1093
1103
1094 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1104 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1095 revs in ascending order and ``stopped`` is a bool indicating whether
1105 revs in ascending order and ``stopped`` is a bool indicating whether
1096 ``stoprev`` was hit.
1106 ``stoprev`` was hit.
1097 """
1107 """
1098 # Try C implementation.
1108 # Try C implementation.
1099 try:
1109 try:
1100 return self.index.deltachain(rev, stoprev, self._generaldelta)
1110 return self.index.deltachain(rev, stoprev, self._generaldelta)
1101 except AttributeError:
1111 except AttributeError:
1102 pass
1112 pass
1103
1113
1104 chain = []
1114 chain = []
1105
1115
1106 # Alias to prevent attribute lookup in tight loop.
1116 # Alias to prevent attribute lookup in tight loop.
1107 index = self.index
1117 index = self.index
1108 generaldelta = self._generaldelta
1118 generaldelta = self._generaldelta
1109
1119
1110 iterrev = rev
1120 iterrev = rev
1111 e = index[iterrev]
1121 e = index[iterrev]
1112 while iterrev != e[3] and iterrev != stoprev:
1122 while iterrev != e[3] and iterrev != stoprev:
1113 chain.append(iterrev)
1123 chain.append(iterrev)
1114 if generaldelta:
1124 if generaldelta:
1115 iterrev = e[3]
1125 iterrev = e[3]
1116 else:
1126 else:
1117 iterrev -= 1
1127 iterrev -= 1
1118 e = index[iterrev]
1128 e = index[iterrev]
1119
1129
1120 if iterrev == stoprev:
1130 if iterrev == stoprev:
1121 stopped = True
1131 stopped = True
1122 else:
1132 else:
1123 chain.append(iterrev)
1133 chain.append(iterrev)
1124 stopped = False
1134 stopped = False
1125
1135
1126 chain.reverse()
1136 chain.reverse()
1127 return chain, stopped
1137 return chain, stopped
1128
1138
1129 def ancestors(self, revs, stoprev=0, inclusive=False):
1139 def ancestors(self, revs, stoprev=0, inclusive=False):
1130 """Generate the ancestors of 'revs' in reverse revision order.
1140 """Generate the ancestors of 'revs' in reverse revision order.
1131 Does not generate revs lower than stoprev.
1141 Does not generate revs lower than stoprev.
1132
1142
1133 See the documentation for ancestor.lazyancestors for more details."""
1143 See the documentation for ancestor.lazyancestors for more details."""
1134
1144
1135 # first, make sure start revisions aren't filtered
1145 # first, make sure start revisions aren't filtered
1136 revs = list(revs)
1146 revs = list(revs)
1137 checkrev = self.node
1147 checkrev = self.node
1138 for r in revs:
1148 for r in revs:
1139 checkrev(r)
1149 checkrev(r)
1140 # and we're sure ancestors aren't filtered as well
1150 # and we're sure ancestors aren't filtered as well
1141
1151
1142 if rustancestor is not None and self.index.rust_ext_compat:
1152 if rustancestor is not None and self.index.rust_ext_compat:
1143 lazyancestors = rustancestor.LazyAncestors
1153 lazyancestors = rustancestor.LazyAncestors
1144 arg = self.index
1154 arg = self.index
1145 else:
1155 else:
1146 lazyancestors = ancestor.lazyancestors
1156 lazyancestors = ancestor.lazyancestors
1147 arg = self._uncheckedparentrevs
1157 arg = self._uncheckedparentrevs
1148 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1158 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1149
1159
1150 def descendants(self, revs):
1160 def descendants(self, revs):
1151 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1161 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1152
1162
1153 def findcommonmissing(self, common=None, heads=None):
1163 def findcommonmissing(self, common=None, heads=None):
1154 """Return a tuple of the ancestors of common and the ancestors of heads
1164 """Return a tuple of the ancestors of common and the ancestors of heads
1155 that are not ancestors of common. In revset terminology, we return the
1165 that are not ancestors of common. In revset terminology, we return the
1156 tuple:
1166 tuple:
1157
1167
1158 ::common, (::heads) - (::common)
1168 ::common, (::heads) - (::common)
1159
1169
1160 The list is sorted by revision number, meaning it is
1170 The list is sorted by revision number, meaning it is
1161 topologically sorted.
1171 topologically sorted.
1162
1172
1163 'heads' and 'common' are both lists of node IDs. If heads is
1173 'heads' and 'common' are both lists of node IDs. If heads is
1164 not supplied, uses all of the revlog's heads. If common is not
1174 not supplied, uses all of the revlog's heads. If common is not
1165 supplied, uses nullid."""
1175 supplied, uses nullid."""
1166 if common is None:
1176 if common is None:
1167 common = [self.nullid]
1177 common = [self.nullid]
1168 if heads is None:
1178 if heads is None:
1169 heads = self.heads()
1179 heads = self.heads()
1170
1180
1171 common = [self.rev(n) for n in common]
1181 common = [self.rev(n) for n in common]
1172 heads = [self.rev(n) for n in heads]
1182 heads = [self.rev(n) for n in heads]
1173
1183
1174 # we want the ancestors, but inclusive
1184 # we want the ancestors, but inclusive
1175 class lazyset:
1185 class lazyset:
1176 def __init__(self, lazyvalues):
1186 def __init__(self, lazyvalues):
1177 self.addedvalues = set()
1187 self.addedvalues = set()
1178 self.lazyvalues = lazyvalues
1188 self.lazyvalues = lazyvalues
1179
1189
1180 def __contains__(self, value):
1190 def __contains__(self, value):
1181 return value in self.addedvalues or value in self.lazyvalues
1191 return value in self.addedvalues or value in self.lazyvalues
1182
1192
1183 def __iter__(self):
1193 def __iter__(self):
1184 added = self.addedvalues
1194 added = self.addedvalues
1185 for r in added:
1195 for r in added:
1186 yield r
1196 yield r
1187 for r in self.lazyvalues:
1197 for r in self.lazyvalues:
1188 if not r in added:
1198 if not r in added:
1189 yield r
1199 yield r
1190
1200
1191 def add(self, value):
1201 def add(self, value):
1192 self.addedvalues.add(value)
1202 self.addedvalues.add(value)
1193
1203
1194 def update(self, values):
1204 def update(self, values):
1195 self.addedvalues.update(values)
1205 self.addedvalues.update(values)
1196
1206
1197 has = lazyset(self.ancestors(common))
1207 has = lazyset(self.ancestors(common))
1198 has.add(nullrev)
1208 has.add(nullrev)
1199 has.update(common)
1209 has.update(common)
1200
1210
1201 # take all ancestors from heads that aren't in has
1211 # take all ancestors from heads that aren't in has
1202 missing = set()
1212 missing = set()
1203 visit = collections.deque(r for r in heads if r not in has)
1213 visit = collections.deque(r for r in heads if r not in has)
1204 while visit:
1214 while visit:
1205 r = visit.popleft()
1215 r = visit.popleft()
1206 if r in missing:
1216 if r in missing:
1207 continue
1217 continue
1208 else:
1218 else:
1209 missing.add(r)
1219 missing.add(r)
1210 for p in self.parentrevs(r):
1220 for p in self.parentrevs(r):
1211 if p not in has:
1221 if p not in has:
1212 visit.append(p)
1222 visit.append(p)
1213 missing = list(missing)
1223 missing = list(missing)
1214 missing.sort()
1224 missing.sort()
1215 return has, [self.node(miss) for miss in missing]
1225 return has, [self.node(miss) for miss in missing]
1216
1226
1217 def incrementalmissingrevs(self, common=None):
1227 def incrementalmissingrevs(self, common=None):
1218 """Return an object that can be used to incrementally compute the
1228 """Return an object that can be used to incrementally compute the
1219 revision numbers of the ancestors of arbitrary sets that are not
1229 revision numbers of the ancestors of arbitrary sets that are not
1220 ancestors of common. This is an ancestor.incrementalmissingancestors
1230 ancestors of common. This is an ancestor.incrementalmissingancestors
1221 object.
1231 object.
1222
1232
1223 'common' is a list of revision numbers. If common is not supplied, uses
1233 'common' is a list of revision numbers. If common is not supplied, uses
1224 nullrev.
1234 nullrev.
1225 """
1235 """
1226 if common is None:
1236 if common is None:
1227 common = [nullrev]
1237 common = [nullrev]
1228
1238
1229 if rustancestor is not None and self.index.rust_ext_compat:
1239 if rustancestor is not None and self.index.rust_ext_compat:
1230 return rustancestor.MissingAncestors(self.index, common)
1240 return rustancestor.MissingAncestors(self.index, common)
1231 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1241 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1232
1242
1233 def findmissingrevs(self, common=None, heads=None):
1243 def findmissingrevs(self, common=None, heads=None):
1234 """Return the revision numbers of the ancestors of heads that
1244 """Return the revision numbers of the ancestors of heads that
1235 are not ancestors of common.
1245 are not ancestors of common.
1236
1246
1237 More specifically, return a list of revision numbers corresponding to
1247 More specifically, return a list of revision numbers corresponding to
1238 nodes N such that every N satisfies the following constraints:
1248 nodes N such that every N satisfies the following constraints:
1239
1249
1240 1. N is an ancestor of some node in 'heads'
1250 1. N is an ancestor of some node in 'heads'
1241 2. N is not an ancestor of any node in 'common'
1251 2. N is not an ancestor of any node in 'common'
1242
1252
1243 The list is sorted by revision number, meaning it is
1253 The list is sorted by revision number, meaning it is
1244 topologically sorted.
1254 topologically sorted.
1245
1255
1246 'heads' and 'common' are both lists of revision numbers. If heads is
1256 'heads' and 'common' are both lists of revision numbers. If heads is
1247 not supplied, uses all of the revlog's heads. If common is not
1257 not supplied, uses all of the revlog's heads. If common is not
1248 supplied, uses nullid."""
1258 supplied, uses nullid."""
1249 if common is None:
1259 if common is None:
1250 common = [nullrev]
1260 common = [nullrev]
1251 if heads is None:
1261 if heads is None:
1252 heads = self.headrevs()
1262 heads = self.headrevs()
1253
1263
1254 inc = self.incrementalmissingrevs(common=common)
1264 inc = self.incrementalmissingrevs(common=common)
1255 return inc.missingancestors(heads)
1265 return inc.missingancestors(heads)
1256
1266
1257 def findmissing(self, common=None, heads=None):
1267 def findmissing(self, common=None, heads=None):
1258 """Return the ancestors of heads that are not ancestors of common.
1268 """Return the ancestors of heads that are not ancestors of common.
1259
1269
1260 More specifically, return a list of nodes N such that every N
1270 More specifically, return a list of nodes N such that every N
1261 satisfies the following constraints:
1271 satisfies the following constraints:
1262
1272
1263 1. N is an ancestor of some node in 'heads'
1273 1. N is an ancestor of some node in 'heads'
1264 2. N is not an ancestor of any node in 'common'
1274 2. N is not an ancestor of any node in 'common'
1265
1275
1266 The list is sorted by revision number, meaning it is
1276 The list is sorted by revision number, meaning it is
1267 topologically sorted.
1277 topologically sorted.
1268
1278
1269 'heads' and 'common' are both lists of node IDs. If heads is
1279 'heads' and 'common' are both lists of node IDs. If heads is
1270 not supplied, uses all of the revlog's heads. If common is not
1280 not supplied, uses all of the revlog's heads. If common is not
1271 supplied, uses nullid."""
1281 supplied, uses nullid."""
1272 if common is None:
1282 if common is None:
1273 common = [self.nullid]
1283 common = [self.nullid]
1274 if heads is None:
1284 if heads is None:
1275 heads = self.heads()
1285 heads = self.heads()
1276
1286
1277 common = [self.rev(n) for n in common]
1287 common = [self.rev(n) for n in common]
1278 heads = [self.rev(n) for n in heads]
1288 heads = [self.rev(n) for n in heads]
1279
1289
1280 inc = self.incrementalmissingrevs(common=common)
1290 inc = self.incrementalmissingrevs(common=common)
1281 return [self.node(r) for r in inc.missingancestors(heads)]
1291 return [self.node(r) for r in inc.missingancestors(heads)]
1282
1292
1283 def nodesbetween(self, roots=None, heads=None):
1293 def nodesbetween(self, roots=None, heads=None):
1284 """Return a topological path from 'roots' to 'heads'.
1294 """Return a topological path from 'roots' to 'heads'.
1285
1295
1286 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1296 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1287 topologically sorted list of all nodes N that satisfy both of
1297 topologically sorted list of all nodes N that satisfy both of
1288 these constraints:
1298 these constraints:
1289
1299
1290 1. N is a descendant of some node in 'roots'
1300 1. N is a descendant of some node in 'roots'
1291 2. N is an ancestor of some node in 'heads'
1301 2. N is an ancestor of some node in 'heads'
1292
1302
1293 Every node is considered to be both a descendant and an ancestor
1303 Every node is considered to be both a descendant and an ancestor
1294 of itself, so every reachable node in 'roots' and 'heads' will be
1304 of itself, so every reachable node in 'roots' and 'heads' will be
1295 included in 'nodes'.
1305 included in 'nodes'.
1296
1306
1297 'outroots' is the list of reachable nodes in 'roots', i.e., the
1307 'outroots' is the list of reachable nodes in 'roots', i.e., the
1298 subset of 'roots' that is returned in 'nodes'. Likewise,
1308 subset of 'roots' that is returned in 'nodes'. Likewise,
1299 'outheads' is the subset of 'heads' that is also in 'nodes'.
1309 'outheads' is the subset of 'heads' that is also in 'nodes'.
1300
1310
1301 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1311 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1302 unspecified, uses nullid as the only root. If 'heads' is
1312 unspecified, uses nullid as the only root. If 'heads' is
1303 unspecified, uses list of all of the revlog's heads."""
1313 unspecified, uses list of all of the revlog's heads."""
1304 nonodes = ([], [], [])
1314 nonodes = ([], [], [])
1305 if roots is not None:
1315 if roots is not None:
1306 roots = list(roots)
1316 roots = list(roots)
1307 if not roots:
1317 if not roots:
1308 return nonodes
1318 return nonodes
1309 lowestrev = min([self.rev(n) for n in roots])
1319 lowestrev = min([self.rev(n) for n in roots])
1310 else:
1320 else:
1311 roots = [self.nullid] # Everybody's a descendant of nullid
1321 roots = [self.nullid] # Everybody's a descendant of nullid
1312 lowestrev = nullrev
1322 lowestrev = nullrev
1313 if (lowestrev == nullrev) and (heads is None):
1323 if (lowestrev == nullrev) and (heads is None):
1314 # We want _all_ the nodes!
1324 # We want _all_ the nodes!
1315 return (
1325 return (
1316 [self.node(r) for r in self],
1326 [self.node(r) for r in self],
1317 [self.nullid],
1327 [self.nullid],
1318 list(self.heads()),
1328 list(self.heads()),
1319 )
1329 )
1320 if heads is None:
1330 if heads is None:
1321 # All nodes are ancestors, so the latest ancestor is the last
1331 # All nodes are ancestors, so the latest ancestor is the last
1322 # node.
1332 # node.
1323 highestrev = len(self) - 1
1333 highestrev = len(self) - 1
1324 # Set ancestors to None to signal that every node is an ancestor.
1334 # Set ancestors to None to signal that every node is an ancestor.
1325 ancestors = None
1335 ancestors = None
1326 # Set heads to an empty dictionary for later discovery of heads
1336 # Set heads to an empty dictionary for later discovery of heads
1327 heads = {}
1337 heads = {}
1328 else:
1338 else:
1329 heads = list(heads)
1339 heads = list(heads)
1330 if not heads:
1340 if not heads:
1331 return nonodes
1341 return nonodes
1332 ancestors = set()
1342 ancestors = set()
1333 # Turn heads into a dictionary so we can remove 'fake' heads.
1343 # Turn heads into a dictionary so we can remove 'fake' heads.
1334 # Also, later we will be using it to filter out the heads we can't
1344 # Also, later we will be using it to filter out the heads we can't
1335 # find from roots.
1345 # find from roots.
1336 heads = dict.fromkeys(heads, False)
1346 heads = dict.fromkeys(heads, False)
1337 # Start at the top and keep marking parents until we're done.
1347 # Start at the top and keep marking parents until we're done.
1338 nodestotag = set(heads)
1348 nodestotag = set(heads)
1339 # Remember where the top was so we can use it as a limit later.
1349 # Remember where the top was so we can use it as a limit later.
1340 highestrev = max([self.rev(n) for n in nodestotag])
1350 highestrev = max([self.rev(n) for n in nodestotag])
1341 while nodestotag:
1351 while nodestotag:
1342 # grab a node to tag
1352 # grab a node to tag
1343 n = nodestotag.pop()
1353 n = nodestotag.pop()
1344 # Never tag nullid
1354 # Never tag nullid
1345 if n == self.nullid:
1355 if n == self.nullid:
1346 continue
1356 continue
1347 # A node's revision number represents its place in a
1357 # A node's revision number represents its place in a
1348 # topologically sorted list of nodes.
1358 # topologically sorted list of nodes.
1349 r = self.rev(n)
1359 r = self.rev(n)
1350 if r >= lowestrev:
1360 if r >= lowestrev:
1351 if n not in ancestors:
1361 if n not in ancestors:
1352 # If we are possibly a descendant of one of the roots
1362 # If we are possibly a descendant of one of the roots
1353 # and we haven't already been marked as an ancestor
1363 # and we haven't already been marked as an ancestor
1354 ancestors.add(n) # Mark as ancestor
1364 ancestors.add(n) # Mark as ancestor
1355 # Add non-nullid parents to list of nodes to tag.
1365 # Add non-nullid parents to list of nodes to tag.
1356 nodestotag.update(
1366 nodestotag.update(
1357 [p for p in self.parents(n) if p != self.nullid]
1367 [p for p in self.parents(n) if p != self.nullid]
1358 )
1368 )
1359 elif n in heads: # We've seen it before, is it a fake head?
1369 elif n in heads: # We've seen it before, is it a fake head?
1360 # So it is, real heads should not be the ancestors of
1370 # So it is, real heads should not be the ancestors of
1361 # any other heads.
1371 # any other heads.
1362 heads.pop(n)
1372 heads.pop(n)
1363 if not ancestors:
1373 if not ancestors:
1364 return nonodes
1374 return nonodes
1365 # Now that we have our set of ancestors, we want to remove any
1375 # Now that we have our set of ancestors, we want to remove any
1366 # roots that are not ancestors.
1376 # roots that are not ancestors.
1367
1377
1368 # If one of the roots was nullid, everything is included anyway.
1378 # If one of the roots was nullid, everything is included anyway.
1369 if lowestrev > nullrev:
1379 if lowestrev > nullrev:
1370 # But, since we weren't, let's recompute the lowest rev to not
1380 # But, since we weren't, let's recompute the lowest rev to not
1371 # include roots that aren't ancestors.
1381 # include roots that aren't ancestors.
1372
1382
1373 # Filter out roots that aren't ancestors of heads
1383 # Filter out roots that aren't ancestors of heads
1374 roots = [root for root in roots if root in ancestors]
1384 roots = [root for root in roots if root in ancestors]
1375 # Recompute the lowest revision
1385 # Recompute the lowest revision
1376 if roots:
1386 if roots:
1377 lowestrev = min([self.rev(root) for root in roots])
1387 lowestrev = min([self.rev(root) for root in roots])
1378 else:
1388 else:
1379 # No more roots? Return empty list
1389 # No more roots? Return empty list
1380 return nonodes
1390 return nonodes
1381 else:
1391 else:
1382 # We are descending from nullid, and don't need to care about
1392 # We are descending from nullid, and don't need to care about
1383 # any other roots.
1393 # any other roots.
1384 lowestrev = nullrev
1394 lowestrev = nullrev
1385 roots = [self.nullid]
1395 roots = [self.nullid]
1386 # Transform our roots list into a set.
1396 # Transform our roots list into a set.
1387 descendants = set(roots)
1397 descendants = set(roots)
1388 # Also, keep the original roots so we can filter out roots that aren't
1398 # Also, keep the original roots so we can filter out roots that aren't
1389 # 'real' roots (i.e. are descended from other roots).
1399 # 'real' roots (i.e. are descended from other roots).
1390 roots = descendants.copy()
1400 roots = descendants.copy()
1391 # Our topologically sorted list of output nodes.
1401 # Our topologically sorted list of output nodes.
1392 orderedout = []
1402 orderedout = []
1393 # Don't start at nullid since we don't want nullid in our output list,
1403 # Don't start at nullid since we don't want nullid in our output list,
1394 # and if nullid shows up in descendants, empty parents will look like
1404 # and if nullid shows up in descendants, empty parents will look like
1395 # they're descendants.
1405 # they're descendants.
1396 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1406 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1397 n = self.node(r)
1407 n = self.node(r)
1398 isdescendant = False
1408 isdescendant = False
1399 if lowestrev == nullrev: # Everybody is a descendant of nullid
1409 if lowestrev == nullrev: # Everybody is a descendant of nullid
1400 isdescendant = True
1410 isdescendant = True
1401 elif n in descendants:
1411 elif n in descendants:
1402 # n is already a descendant
1412 # n is already a descendant
1403 isdescendant = True
1413 isdescendant = True
1404 # This check only needs to be done here because all the roots
1414 # This check only needs to be done here because all the roots
1405 # will start being marked is descendants before the loop.
1415 # will start being marked is descendants before the loop.
1406 if n in roots:
1416 if n in roots:
1407 # If n was a root, check if it's a 'real' root.
1417 # If n was a root, check if it's a 'real' root.
1408 p = tuple(self.parents(n))
1418 p = tuple(self.parents(n))
1409 # If any of its parents are descendants, it's not a root.
1419 # If any of its parents are descendants, it's not a root.
1410 if (p[0] in descendants) or (p[1] in descendants):
1420 if (p[0] in descendants) or (p[1] in descendants):
1411 roots.remove(n)
1421 roots.remove(n)
1412 else:
1422 else:
1413 p = tuple(self.parents(n))
1423 p = tuple(self.parents(n))
1414 # A node is a descendant if either of its parents are
1424 # A node is a descendant if either of its parents are
1415 # descendants. (We seeded the dependents list with the roots
1425 # descendants. (We seeded the dependents list with the roots
1416 # up there, remember?)
1426 # up there, remember?)
1417 if (p[0] in descendants) or (p[1] in descendants):
1427 if (p[0] in descendants) or (p[1] in descendants):
1418 descendants.add(n)
1428 descendants.add(n)
1419 isdescendant = True
1429 isdescendant = True
1420 if isdescendant and ((ancestors is None) or (n in ancestors)):
1430 if isdescendant and ((ancestors is None) or (n in ancestors)):
1421 # Only include nodes that are both descendants and ancestors.
1431 # Only include nodes that are both descendants and ancestors.
1422 orderedout.append(n)
1432 orderedout.append(n)
1423 if (ancestors is not None) and (n in heads):
1433 if (ancestors is not None) and (n in heads):
1424 # We're trying to figure out which heads are reachable
1434 # We're trying to figure out which heads are reachable
1425 # from roots.
1435 # from roots.
1426 # Mark this head as having been reached
1436 # Mark this head as having been reached
1427 heads[n] = True
1437 heads[n] = True
1428 elif ancestors is None:
1438 elif ancestors is None:
1429 # Otherwise, we're trying to discover the heads.
1439 # Otherwise, we're trying to discover the heads.
1430 # Assume this is a head because if it isn't, the next step
1440 # Assume this is a head because if it isn't, the next step
1431 # will eventually remove it.
1441 # will eventually remove it.
1432 heads[n] = True
1442 heads[n] = True
1433 # But, obviously its parents aren't.
1443 # But, obviously its parents aren't.
1434 for p in self.parents(n):
1444 for p in self.parents(n):
1435 heads.pop(p, None)
1445 heads.pop(p, None)
1436 heads = [head for head, flag in heads.items() if flag]
1446 heads = [head for head, flag in heads.items() if flag]
1437 roots = list(roots)
1447 roots = list(roots)
1438 assert orderedout
1448 assert orderedout
1439 assert roots
1449 assert roots
1440 assert heads
1450 assert heads
1441 return (orderedout, roots, heads)
1451 return (orderedout, roots, heads)
1442
1452
1443 def headrevs(self, revs=None):
1453 def headrevs(self, revs=None):
1444 if revs is None:
1454 if revs is None:
1445 try:
1455 try:
1446 return self.index.headrevs()
1456 return self.index.headrevs()
1447 except AttributeError:
1457 except AttributeError:
1448 return self._headrevs()
1458 return self._headrevs()
1449 if rustdagop is not None and self.index.rust_ext_compat:
1459 if rustdagop is not None and self.index.rust_ext_compat:
1450 return rustdagop.headrevs(self.index, revs)
1460 return rustdagop.headrevs(self.index, revs)
1451 return dagop.headrevs(revs, self._uncheckedparentrevs)
1461 return dagop.headrevs(revs, self._uncheckedparentrevs)
1452
1462
1453 def computephases(self, roots):
1463 def computephases(self, roots):
1454 return self.index.computephasesmapsets(roots)
1464 return self.index.computephasesmapsets(roots)
1455
1465
1456 def _headrevs(self):
1466 def _headrevs(self):
1457 count = len(self)
1467 count = len(self)
1458 if not count:
1468 if not count:
1459 return [nullrev]
1469 return [nullrev]
1460 # we won't iter over filtered rev so nobody is a head at start
1470 # we won't iter over filtered rev so nobody is a head at start
1461 ishead = [0] * (count + 1)
1471 ishead = [0] * (count + 1)
1462 index = self.index
1472 index = self.index
1463 for r in self:
1473 for r in self:
1464 ishead[r] = 1 # I may be an head
1474 ishead[r] = 1 # I may be an head
1465 e = index[r]
1475 e = index[r]
1466 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1476 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1467 return [r for r, val in enumerate(ishead) if val]
1477 return [r for r, val in enumerate(ishead) if val]
1468
1478
1469 def heads(self, start=None, stop=None):
1479 def heads(self, start=None, stop=None):
1470 """return the list of all nodes that have no children
1480 """return the list of all nodes that have no children
1471
1481
1472 if start is specified, only heads that are descendants of
1482 if start is specified, only heads that are descendants of
1473 start will be returned
1483 start will be returned
1474 if stop is specified, it will consider all the revs from stop
1484 if stop is specified, it will consider all the revs from stop
1475 as if they had no children
1485 as if they had no children
1476 """
1486 """
1477 if start is None and stop is None:
1487 if start is None and stop is None:
1478 if not len(self):
1488 if not len(self):
1479 return [self.nullid]
1489 return [self.nullid]
1480 return [self.node(r) for r in self.headrevs()]
1490 return [self.node(r) for r in self.headrevs()]
1481
1491
1482 if start is None:
1492 if start is None:
1483 start = nullrev
1493 start = nullrev
1484 else:
1494 else:
1485 start = self.rev(start)
1495 start = self.rev(start)
1486
1496
1487 stoprevs = {self.rev(n) for n in stop or []}
1497 stoprevs = {self.rev(n) for n in stop or []}
1488
1498
1489 revs = dagop.headrevssubset(
1499 revs = dagop.headrevssubset(
1490 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1500 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1491 )
1501 )
1492
1502
1493 return [self.node(rev) for rev in revs]
1503 return [self.node(rev) for rev in revs]
1494
1504
1495 def children(self, node):
1505 def children(self, node):
1496 """find the children of a given node"""
1506 """find the children of a given node"""
1497 c = []
1507 c = []
1498 p = self.rev(node)
1508 p = self.rev(node)
1499 for r in self.revs(start=p + 1):
1509 for r in self.revs(start=p + 1):
1500 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1510 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1501 if prevs:
1511 if prevs:
1502 for pr in prevs:
1512 for pr in prevs:
1503 if pr == p:
1513 if pr == p:
1504 c.append(self.node(r))
1514 c.append(self.node(r))
1505 elif p == nullrev:
1515 elif p == nullrev:
1506 c.append(self.node(r))
1516 c.append(self.node(r))
1507 return c
1517 return c
1508
1518
1509 def commonancestorsheads(self, a, b):
1519 def commonancestorsheads(self, a, b):
1510 """calculate all the heads of the common ancestors of nodes a and b"""
1520 """calculate all the heads of the common ancestors of nodes a and b"""
1511 a, b = self.rev(a), self.rev(b)
1521 a, b = self.rev(a), self.rev(b)
1512 ancs = self._commonancestorsheads(a, b)
1522 ancs = self._commonancestorsheads(a, b)
1513 return pycompat.maplist(self.node, ancs)
1523 return pycompat.maplist(self.node, ancs)
1514
1524
1515 def _commonancestorsheads(self, *revs):
1525 def _commonancestorsheads(self, *revs):
1516 """calculate all the heads of the common ancestors of revs"""
1526 """calculate all the heads of the common ancestors of revs"""
1517 try:
1527 try:
1518 ancs = self.index.commonancestorsheads(*revs)
1528 ancs = self.index.commonancestorsheads(*revs)
1519 except (AttributeError, OverflowError): # C implementation failed
1529 except (AttributeError, OverflowError): # C implementation failed
1520 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1530 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1521 return ancs
1531 return ancs
1522
1532
1523 def isancestor(self, a, b):
1533 def isancestor(self, a, b):
1524 """return True if node a is an ancestor of node b
1534 """return True if node a is an ancestor of node b
1525
1535
1526 A revision is considered an ancestor of itself."""
1536 A revision is considered an ancestor of itself."""
1527 a, b = self.rev(a), self.rev(b)
1537 a, b = self.rev(a), self.rev(b)
1528 return self.isancestorrev(a, b)
1538 return self.isancestorrev(a, b)
1529
1539
1530 def isancestorrev(self, a, b):
1540 def isancestorrev(self, a, b):
1531 """return True if revision a is an ancestor of revision b
1541 """return True if revision a is an ancestor of revision b
1532
1542
1533 A revision is considered an ancestor of itself.
1543 A revision is considered an ancestor of itself.
1534
1544
1535 The implementation of this is trivial but the use of
1545 The implementation of this is trivial but the use of
1536 reachableroots is not."""
1546 reachableroots is not."""
1537 if a == nullrev:
1547 if a == nullrev:
1538 return True
1548 return True
1539 elif a == b:
1549 elif a == b:
1540 return True
1550 return True
1541 elif a > b:
1551 elif a > b:
1542 return False
1552 return False
1543 return bool(self.reachableroots(a, [b], [a], includepath=False))
1553 return bool(self.reachableroots(a, [b], [a], includepath=False))
1544
1554
1545 def reachableroots(self, minroot, heads, roots, includepath=False):
1555 def reachableroots(self, minroot, heads, roots, includepath=False):
1546 """return (heads(::(<roots> and <roots>::<heads>)))
1556 """return (heads(::(<roots> and <roots>::<heads>)))
1547
1557
1548 If includepath is True, return (<roots>::<heads>)."""
1558 If includepath is True, return (<roots>::<heads>)."""
1549 try:
1559 try:
1550 return self.index.reachableroots2(
1560 return self.index.reachableroots2(
1551 minroot, heads, roots, includepath
1561 minroot, heads, roots, includepath
1552 )
1562 )
1553 except AttributeError:
1563 except AttributeError:
1554 return dagop._reachablerootspure(
1564 return dagop._reachablerootspure(
1555 self.parentrevs, minroot, roots, heads, includepath
1565 self.parentrevs, minroot, roots, heads, includepath
1556 )
1566 )
1557
1567
1558 def ancestor(self, a, b):
1568 def ancestor(self, a, b):
1559 """calculate the "best" common ancestor of nodes a and b"""
1569 """calculate the "best" common ancestor of nodes a and b"""
1560
1570
1561 a, b = self.rev(a), self.rev(b)
1571 a, b = self.rev(a), self.rev(b)
1562 try:
1572 try:
1563 ancs = self.index.ancestors(a, b)
1573 ancs = self.index.ancestors(a, b)
1564 except (AttributeError, OverflowError):
1574 except (AttributeError, OverflowError):
1565 ancs = ancestor.ancestors(self.parentrevs, a, b)
1575 ancs = ancestor.ancestors(self.parentrevs, a, b)
1566 if ancs:
1576 if ancs:
1567 # choose a consistent winner when there's a tie
1577 # choose a consistent winner when there's a tie
1568 return min(map(self.node, ancs))
1578 return min(map(self.node, ancs))
1569 return self.nullid
1579 return self.nullid
1570
1580
1571 def _match(self, id):
1581 def _match(self, id):
1572 if isinstance(id, int):
1582 if isinstance(id, int):
1573 # rev
1583 # rev
1574 return self.node(id)
1584 return self.node(id)
1575 if len(id) == self.nodeconstants.nodelen:
1585 if len(id) == self.nodeconstants.nodelen:
1576 # possibly a binary node
1586 # possibly a binary node
1577 # odds of a binary node being all hex in ASCII are 1 in 10**25
1587 # odds of a binary node being all hex in ASCII are 1 in 10**25
1578 try:
1588 try:
1579 node = id
1589 node = id
1580 self.rev(node) # quick search the index
1590 self.rev(node) # quick search the index
1581 return node
1591 return node
1582 except error.LookupError:
1592 except error.LookupError:
1583 pass # may be partial hex id
1593 pass # may be partial hex id
1584 try:
1594 try:
1585 # str(rev)
1595 # str(rev)
1586 rev = int(id)
1596 rev = int(id)
1587 if b"%d" % rev != id:
1597 if b"%d" % rev != id:
1588 raise ValueError
1598 raise ValueError
1589 if rev < 0:
1599 if rev < 0:
1590 rev = len(self) + rev
1600 rev = len(self) + rev
1591 if rev < 0 or rev >= len(self):
1601 if rev < 0 or rev >= len(self):
1592 raise ValueError
1602 raise ValueError
1593 return self.node(rev)
1603 return self.node(rev)
1594 except (ValueError, OverflowError):
1604 except (ValueError, OverflowError):
1595 pass
1605 pass
1596 if len(id) == 2 * self.nodeconstants.nodelen:
1606 if len(id) == 2 * self.nodeconstants.nodelen:
1597 try:
1607 try:
1598 # a full hex nodeid?
1608 # a full hex nodeid?
1599 node = bin(id)
1609 node = bin(id)
1600 self.rev(node)
1610 self.rev(node)
1601 return node
1611 return node
1602 except (binascii.Error, error.LookupError):
1612 except (binascii.Error, error.LookupError):
1603 pass
1613 pass
1604
1614
1605 def _partialmatch(self, id):
1615 def _partialmatch(self, id):
1606 # we don't care wdirfilenodeids as they should be always full hash
1616 # we don't care wdirfilenodeids as they should be always full hash
1607 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1617 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1608 ambiguous = False
1618 ambiguous = False
1609 try:
1619 try:
1610 partial = self.index.partialmatch(id)
1620 partial = self.index.partialmatch(id)
1611 if partial and self.hasnode(partial):
1621 if partial and self.hasnode(partial):
1612 if maybewdir:
1622 if maybewdir:
1613 # single 'ff...' match in radix tree, ambiguous with wdir
1623 # single 'ff...' match in radix tree, ambiguous with wdir
1614 ambiguous = True
1624 ambiguous = True
1615 else:
1625 else:
1616 return partial
1626 return partial
1617 elif maybewdir:
1627 elif maybewdir:
1618 # no 'ff...' match in radix tree, wdir identified
1628 # no 'ff...' match in radix tree, wdir identified
1619 raise error.WdirUnsupported
1629 raise error.WdirUnsupported
1620 else:
1630 else:
1621 return None
1631 return None
1622 except error.RevlogError:
1632 except error.RevlogError:
1623 # parsers.c radix tree lookup gave multiple matches
1633 # parsers.c radix tree lookup gave multiple matches
1624 # fast path: for unfiltered changelog, radix tree is accurate
1634 # fast path: for unfiltered changelog, radix tree is accurate
1625 if not getattr(self, 'filteredrevs', None):
1635 if not getattr(self, 'filteredrevs', None):
1626 ambiguous = True
1636 ambiguous = True
1627 # fall through to slow path that filters hidden revisions
1637 # fall through to slow path that filters hidden revisions
1628 except (AttributeError, ValueError):
1638 except (AttributeError, ValueError):
1629 # we are pure python, or key is not hex
1639 # we are pure python, or key is not hex
1630 pass
1640 pass
1631 if ambiguous:
1641 if ambiguous:
1632 raise error.AmbiguousPrefixLookupError(
1642 raise error.AmbiguousPrefixLookupError(
1633 id, self.display_id, _(b'ambiguous identifier')
1643 id, self.display_id, _(b'ambiguous identifier')
1634 )
1644 )
1635
1645
1636 if id in self._pcache:
1646 if id in self._pcache:
1637 return self._pcache[id]
1647 return self._pcache[id]
1638
1648
1639 if len(id) <= 40:
1649 if len(id) <= 40:
1640 # hex(node)[:...]
1650 # hex(node)[:...]
1641 l = len(id) // 2 * 2 # grab an even number of digits
1651 l = len(id) // 2 * 2 # grab an even number of digits
1642 try:
1652 try:
1643 # we're dropping the last digit, so let's check that it's hex,
1653 # we're dropping the last digit, so let's check that it's hex,
1644 # to avoid the expensive computation below if it's not
1654 # to avoid the expensive computation below if it's not
1645 if len(id) % 2 > 0:
1655 if len(id) % 2 > 0:
1646 if not (id[-1] in hexdigits):
1656 if not (id[-1] in hexdigits):
1647 return None
1657 return None
1648 prefix = bin(id[:l])
1658 prefix = bin(id[:l])
1649 except binascii.Error:
1659 except binascii.Error:
1650 pass
1660 pass
1651 else:
1661 else:
1652 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1662 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1653 nl = [
1663 nl = [
1654 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1664 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1655 ]
1665 ]
1656 if self.nodeconstants.nullhex.startswith(id):
1666 if self.nodeconstants.nullhex.startswith(id):
1657 nl.append(self.nullid)
1667 nl.append(self.nullid)
1658 if len(nl) > 0:
1668 if len(nl) > 0:
1659 if len(nl) == 1 and not maybewdir:
1669 if len(nl) == 1 and not maybewdir:
1660 self._pcache[id] = nl[0]
1670 self._pcache[id] = nl[0]
1661 return nl[0]
1671 return nl[0]
1662 raise error.AmbiguousPrefixLookupError(
1672 raise error.AmbiguousPrefixLookupError(
1663 id, self.display_id, _(b'ambiguous identifier')
1673 id, self.display_id, _(b'ambiguous identifier')
1664 )
1674 )
1665 if maybewdir:
1675 if maybewdir:
1666 raise error.WdirUnsupported
1676 raise error.WdirUnsupported
1667 return None
1677 return None
1668
1678
1669 def lookup(self, id):
1679 def lookup(self, id):
1670 """locate a node based on:
1680 """locate a node based on:
1671 - revision number or str(revision number)
1681 - revision number or str(revision number)
1672 - nodeid or subset of hex nodeid
1682 - nodeid or subset of hex nodeid
1673 """
1683 """
1674 n = self._match(id)
1684 n = self._match(id)
1675 if n is not None:
1685 if n is not None:
1676 return n
1686 return n
1677 n = self._partialmatch(id)
1687 n = self._partialmatch(id)
1678 if n:
1688 if n:
1679 return n
1689 return n
1680
1690
1681 raise error.LookupError(id, self.display_id, _(b'no match found'))
1691 raise error.LookupError(id, self.display_id, _(b'no match found'))
1682
1692
1683 def shortest(self, node, minlength=1):
1693 def shortest(self, node, minlength=1):
1684 """Find the shortest unambiguous prefix that matches node."""
1694 """Find the shortest unambiguous prefix that matches node."""
1685
1695
1686 def isvalid(prefix):
1696 def isvalid(prefix):
1687 try:
1697 try:
1688 matchednode = self._partialmatch(prefix)
1698 matchednode = self._partialmatch(prefix)
1689 except error.AmbiguousPrefixLookupError:
1699 except error.AmbiguousPrefixLookupError:
1690 return False
1700 return False
1691 except error.WdirUnsupported:
1701 except error.WdirUnsupported:
1692 # single 'ff...' match
1702 # single 'ff...' match
1693 return True
1703 return True
1694 if matchednode is None:
1704 if matchednode is None:
1695 raise error.LookupError(node, self.display_id, _(b'no node'))
1705 raise error.LookupError(node, self.display_id, _(b'no node'))
1696 return True
1706 return True
1697
1707
1698 def maybewdir(prefix):
1708 def maybewdir(prefix):
1699 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1709 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1700
1710
1701 hexnode = hex(node)
1711 hexnode = hex(node)
1702
1712
1703 def disambiguate(hexnode, minlength):
1713 def disambiguate(hexnode, minlength):
1704 """Disambiguate against wdirid."""
1714 """Disambiguate against wdirid."""
1705 for length in range(minlength, len(hexnode) + 1):
1715 for length in range(minlength, len(hexnode) + 1):
1706 prefix = hexnode[:length]
1716 prefix = hexnode[:length]
1707 if not maybewdir(prefix):
1717 if not maybewdir(prefix):
1708 return prefix
1718 return prefix
1709
1719
1710 if not getattr(self, 'filteredrevs', None):
1720 if not getattr(self, 'filteredrevs', None):
1711 try:
1721 try:
1712 length = max(self.index.shortest(node), minlength)
1722 length = max(self.index.shortest(node), minlength)
1713 return disambiguate(hexnode, length)
1723 return disambiguate(hexnode, length)
1714 except error.RevlogError:
1724 except error.RevlogError:
1715 if node != self.nodeconstants.wdirid:
1725 if node != self.nodeconstants.wdirid:
1716 raise error.LookupError(
1726 raise error.LookupError(
1717 node, self.display_id, _(b'no node')
1727 node, self.display_id, _(b'no node')
1718 )
1728 )
1719 except AttributeError:
1729 except AttributeError:
1720 # Fall through to pure code
1730 # Fall through to pure code
1721 pass
1731 pass
1722
1732
1723 if node == self.nodeconstants.wdirid:
1733 if node == self.nodeconstants.wdirid:
1724 for length in range(minlength, len(hexnode) + 1):
1734 for length in range(minlength, len(hexnode) + 1):
1725 prefix = hexnode[:length]
1735 prefix = hexnode[:length]
1726 if isvalid(prefix):
1736 if isvalid(prefix):
1727 return prefix
1737 return prefix
1728
1738
1729 for length in range(minlength, len(hexnode) + 1):
1739 for length in range(minlength, len(hexnode) + 1):
1730 prefix = hexnode[:length]
1740 prefix = hexnode[:length]
1731 if isvalid(prefix):
1741 if isvalid(prefix):
1732 return disambiguate(hexnode, length)
1742 return disambiguate(hexnode, length)
1733
1743
1734 def cmp(self, node, text):
1744 def cmp(self, node, text):
1735 """compare text with a given file revision
1745 """compare text with a given file revision
1736
1746
1737 returns True if text is different than what is stored.
1747 returns True if text is different than what is stored.
1738 """
1748 """
1739 p1, p2 = self.parents(node)
1749 p1, p2 = self.parents(node)
1740 return storageutil.hashrevisionsha1(text, p1, p2) != node
1750 return storageutil.hashrevisionsha1(text, p1, p2) != node
1741
1751
1742 def _getsegmentforrevs(self, startrev, endrev, df=None):
1752 def _getsegmentforrevs(self, startrev, endrev, df=None):
1743 """Obtain a segment of raw data corresponding to a range of revisions.
1753 """Obtain a segment of raw data corresponding to a range of revisions.
1744
1754
1745 Accepts the start and end revisions and an optional already-open
1755 Accepts the start and end revisions and an optional already-open
1746 file handle to be used for reading. If the file handle is read, its
1756 file handle to be used for reading. If the file handle is read, its
1747 seek position will not be preserved.
1757 seek position will not be preserved.
1748
1758
1749 Requests for data may be satisfied by a cache.
1759 Requests for data may be satisfied by a cache.
1750
1760
1751 Returns a 2-tuple of (offset, data) for the requested range of
1761 Returns a 2-tuple of (offset, data) for the requested range of
1752 revisions. Offset is the integer offset from the beginning of the
1762 revisions. Offset is the integer offset from the beginning of the
1753 revlog and data is a str or buffer of the raw byte data.
1763 revlog and data is a str or buffer of the raw byte data.
1754
1764
1755 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1765 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1756 to determine where each revision's data begins and ends.
1766 to determine where each revision's data begins and ends.
1757 """
1767 """
1758 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1768 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1759 # (functions are expensive).
1769 # (functions are expensive).
1760 index = self.index
1770 index = self.index
1761 istart = index[startrev]
1771 istart = index[startrev]
1762 start = int(istart[0] >> 16)
1772 start = int(istart[0] >> 16)
1763 if startrev == endrev:
1773 if startrev == endrev:
1764 end = start + istart[1]
1774 end = start + istart[1]
1765 else:
1775 else:
1766 iend = index[endrev]
1776 iend = index[endrev]
1767 end = int(iend[0] >> 16) + iend[1]
1777 end = int(iend[0] >> 16) + iend[1]
1768
1778
1769 if self._inline:
1779 if self._inline:
1770 start += (startrev + 1) * self.index.entry_size
1780 start += (startrev + 1) * self.index.entry_size
1771 end += (endrev + 1) * self.index.entry_size
1781 end += (endrev + 1) * self.index.entry_size
1772 length = end - start
1782 length = end - start
1773
1783
1774 return start, self._segmentfile.read_chunk(start, length, df)
1784 return start, self._segmentfile.read_chunk(start, length, df)
1775
1785
1776 def _chunk(self, rev, df=None):
1786 def _chunk(self, rev, df=None):
1777 """Obtain a single decompressed chunk for a revision.
1787 """Obtain a single decompressed chunk for a revision.
1778
1788
1779 Accepts an integer revision and an optional already-open file handle
1789 Accepts an integer revision and an optional already-open file handle
1780 to be used for reading. If used, the seek position of the file will not
1790 to be used for reading. If used, the seek position of the file will not
1781 be preserved.
1791 be preserved.
1782
1792
1783 Returns a str holding uncompressed data for the requested revision.
1793 Returns a str holding uncompressed data for the requested revision.
1784 """
1794 """
1785 compression_mode = self.index[rev][10]
1795 compression_mode = self.index[rev][10]
1786 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1796 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1787 if compression_mode == COMP_MODE_PLAIN:
1797 if compression_mode == COMP_MODE_PLAIN:
1788 return data
1798 return data
1789 elif compression_mode == COMP_MODE_DEFAULT:
1799 elif compression_mode == COMP_MODE_DEFAULT:
1790 return self._decompressor(data)
1800 return self._decompressor(data)
1791 elif compression_mode == COMP_MODE_INLINE:
1801 elif compression_mode == COMP_MODE_INLINE:
1792 return self.decompress(data)
1802 return self.decompress(data)
1793 else:
1803 else:
1794 msg = b'unknown compression mode %d'
1804 msg = b'unknown compression mode %d'
1795 msg %= compression_mode
1805 msg %= compression_mode
1796 raise error.RevlogError(msg)
1806 raise error.RevlogError(msg)
1797
1807
1798 def _chunks(self, revs, df=None, targetsize=None):
1808 def _chunks(self, revs, df=None, targetsize=None):
1799 """Obtain decompressed chunks for the specified revisions.
1809 """Obtain decompressed chunks for the specified revisions.
1800
1810
1801 Accepts an iterable of numeric revisions that are assumed to be in
1811 Accepts an iterable of numeric revisions that are assumed to be in
1802 ascending order. Also accepts an optional already-open file handle
1812 ascending order. Also accepts an optional already-open file handle
1803 to be used for reading. If used, the seek position of the file will
1813 to be used for reading. If used, the seek position of the file will
1804 not be preserved.
1814 not be preserved.
1805
1815
1806 This function is similar to calling ``self._chunk()`` multiple times,
1816 This function is similar to calling ``self._chunk()`` multiple times,
1807 but is faster.
1817 but is faster.
1808
1818
1809 Returns a list with decompressed data for each requested revision.
1819 Returns a list with decompressed data for each requested revision.
1810 """
1820 """
1811 if not revs:
1821 if not revs:
1812 return []
1822 return []
1813 start = self.start
1823 start = self.start
1814 length = self.length
1824 length = self.length
1815 inline = self._inline
1825 inline = self._inline
1816 iosize = self.index.entry_size
1826 iosize = self.index.entry_size
1817 buffer = util.buffer
1827 buffer = util.buffer
1818
1828
1819 l = []
1829 l = []
1820 ladd = l.append
1830 ladd = l.append
1821
1831
1822 if not self._withsparseread:
1832 if not self._withsparseread:
1823 slicedchunks = (revs,)
1833 slicedchunks = (revs,)
1824 else:
1834 else:
1825 slicedchunks = deltautil.slicechunk(
1835 slicedchunks = deltautil.slicechunk(
1826 self, revs, targetsize=targetsize
1836 self, revs, targetsize=targetsize
1827 )
1837 )
1828
1838
1829 for revschunk in slicedchunks:
1839 for revschunk in slicedchunks:
1830 firstrev = revschunk[0]
1840 firstrev = revschunk[0]
1831 # Skip trailing revisions with empty diff
1841 # Skip trailing revisions with empty diff
1832 for lastrev in revschunk[::-1]:
1842 for lastrev in revschunk[::-1]:
1833 if length(lastrev) != 0:
1843 if length(lastrev) != 0:
1834 break
1844 break
1835
1845
1836 try:
1846 try:
1837 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1847 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1838 except OverflowError:
1848 except OverflowError:
1839 # issue4215 - we can't cache a run of chunks greater than
1849 # issue4215 - we can't cache a run of chunks greater than
1840 # 2G on Windows
1850 # 2G on Windows
1841 return [self._chunk(rev, df=df) for rev in revschunk]
1851 return [self._chunk(rev, df=df) for rev in revschunk]
1842
1852
1843 decomp = self.decompress
1853 decomp = self.decompress
1844 # self._decompressor might be None, but will not be used in that case
1854 # self._decompressor might be None, but will not be used in that case
1845 def_decomp = self._decompressor
1855 def_decomp = self._decompressor
1846 for rev in revschunk:
1856 for rev in revschunk:
1847 chunkstart = start(rev)
1857 chunkstart = start(rev)
1848 if inline:
1858 if inline:
1849 chunkstart += (rev + 1) * iosize
1859 chunkstart += (rev + 1) * iosize
1850 chunklength = length(rev)
1860 chunklength = length(rev)
1851 comp_mode = self.index[rev][10]
1861 comp_mode = self.index[rev][10]
1852 c = buffer(data, chunkstart - offset, chunklength)
1862 c = buffer(data, chunkstart - offset, chunklength)
1853 if comp_mode == COMP_MODE_PLAIN:
1863 if comp_mode == COMP_MODE_PLAIN:
1854 ladd(c)
1864 ladd(c)
1855 elif comp_mode == COMP_MODE_INLINE:
1865 elif comp_mode == COMP_MODE_INLINE:
1856 ladd(decomp(c))
1866 ladd(decomp(c))
1857 elif comp_mode == COMP_MODE_DEFAULT:
1867 elif comp_mode == COMP_MODE_DEFAULT:
1858 ladd(def_decomp(c))
1868 ladd(def_decomp(c))
1859 else:
1869 else:
1860 msg = b'unknown compression mode %d'
1870 msg = b'unknown compression mode %d'
1861 msg %= comp_mode
1871 msg %= comp_mode
1862 raise error.RevlogError(msg)
1872 raise error.RevlogError(msg)
1863
1873
1864 return l
1874 return l
1865
1875
1866 def deltaparent(self, rev):
1876 def deltaparent(self, rev):
1867 """return deltaparent of the given revision"""
1877 """return deltaparent of the given revision"""
1868 base = self.index[rev][3]
1878 base = self.index[rev][3]
1869 if base == rev:
1879 if base == rev:
1870 return nullrev
1880 return nullrev
1871 elif self._generaldelta:
1881 elif self._generaldelta:
1872 return base
1882 return base
1873 else:
1883 else:
1874 return rev - 1
1884 return rev - 1
1875
1885
1876 def issnapshot(self, rev):
1886 def issnapshot(self, rev):
1877 """tells whether rev is a snapshot"""
1887 """tells whether rev is a snapshot"""
1878 if not self._sparserevlog:
1888 if not self._sparserevlog:
1879 return self.deltaparent(rev) == nullrev
1889 return self.deltaparent(rev) == nullrev
1880 elif util.safehasattr(self.index, 'issnapshot'):
1890 elif util.safehasattr(self.index, 'issnapshot'):
1881 # directly assign the method to cache the testing and access
1891 # directly assign the method to cache the testing and access
1882 self.issnapshot = self.index.issnapshot
1892 self.issnapshot = self.index.issnapshot
1883 return self.issnapshot(rev)
1893 return self.issnapshot(rev)
1884 if rev == nullrev:
1894 if rev == nullrev:
1885 return True
1895 return True
1886 entry = self.index[rev]
1896 entry = self.index[rev]
1887 base = entry[3]
1897 base = entry[3]
1888 if base == rev:
1898 if base == rev:
1889 return True
1899 return True
1890 if base == nullrev:
1900 if base == nullrev:
1891 return True
1901 return True
1892 p1 = entry[5]
1902 p1 = entry[5]
1893 while self.length(p1) == 0:
1903 while self.length(p1) == 0:
1894 b = self.deltaparent(p1)
1904 b = self.deltaparent(p1)
1895 if b == p1:
1905 if b == p1:
1896 break
1906 break
1897 p1 = b
1907 p1 = b
1898 p2 = entry[6]
1908 p2 = entry[6]
1899 while self.length(p2) == 0:
1909 while self.length(p2) == 0:
1900 b = self.deltaparent(p2)
1910 b = self.deltaparent(p2)
1901 if b == p2:
1911 if b == p2:
1902 break
1912 break
1903 p2 = b
1913 p2 = b
1904 if base == p1 or base == p2:
1914 if base == p1 or base == p2:
1905 return False
1915 return False
1906 return self.issnapshot(base)
1916 return self.issnapshot(base)
1907
1917
1908 def snapshotdepth(self, rev):
1918 def snapshotdepth(self, rev):
1909 """number of snapshot in the chain before this one"""
1919 """number of snapshot in the chain before this one"""
1910 if not self.issnapshot(rev):
1920 if not self.issnapshot(rev):
1911 raise error.ProgrammingError(b'revision %d not a snapshot')
1921 raise error.ProgrammingError(b'revision %d not a snapshot')
1912 return len(self._deltachain(rev)[0]) - 1
1922 return len(self._deltachain(rev)[0]) - 1
1913
1923
1914 def revdiff(self, rev1, rev2):
1924 def revdiff(self, rev1, rev2):
1915 """return or calculate a delta between two revisions
1925 """return or calculate a delta between two revisions
1916
1926
1917 The delta calculated is in binary form and is intended to be written to
1927 The delta calculated is in binary form and is intended to be written to
1918 revlog data directly. So this function needs raw revision data.
1928 revlog data directly. So this function needs raw revision data.
1919 """
1929 """
1920 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1930 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1921 return bytes(self._chunk(rev2))
1931 return bytes(self._chunk(rev2))
1922
1932
1923 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1933 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1924
1934
1925 def revision(self, nodeorrev, _df=None):
1935 def revision(self, nodeorrev, _df=None):
1926 """return an uncompressed revision of a given node or revision
1936 """return an uncompressed revision of a given node or revision
1927 number.
1937 number.
1928
1938
1929 _df - an existing file handle to read from. (internal-only)
1939 _df - an existing file handle to read from. (internal-only)
1930 """
1940 """
1931 return self._revisiondata(nodeorrev, _df)
1941 return self._revisiondata(nodeorrev, _df)
1932
1942
1933 def sidedata(self, nodeorrev, _df=None):
1943 def sidedata(self, nodeorrev, _df=None):
1934 """a map of extra data related to the changeset but not part of the hash
1944 """a map of extra data related to the changeset but not part of the hash
1935
1945
1936 This function currently return a dictionary. However, more advanced
1946 This function currently return a dictionary. However, more advanced
1937 mapping object will likely be used in the future for a more
1947 mapping object will likely be used in the future for a more
1938 efficient/lazy code.
1948 efficient/lazy code.
1939 """
1949 """
1940 # deal with <nodeorrev> argument type
1950 # deal with <nodeorrev> argument type
1941 if isinstance(nodeorrev, int):
1951 if isinstance(nodeorrev, int):
1942 rev = nodeorrev
1952 rev = nodeorrev
1943 else:
1953 else:
1944 rev = self.rev(nodeorrev)
1954 rev = self.rev(nodeorrev)
1945 return self._sidedata(rev)
1955 return self._sidedata(rev)
1946
1956
1947 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1957 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1948 # deal with <nodeorrev> argument type
1958 # deal with <nodeorrev> argument type
1949 if isinstance(nodeorrev, int):
1959 if isinstance(nodeorrev, int):
1950 rev = nodeorrev
1960 rev = nodeorrev
1951 node = self.node(rev)
1961 node = self.node(rev)
1952 else:
1962 else:
1953 node = nodeorrev
1963 node = nodeorrev
1954 rev = None
1964 rev = None
1955
1965
1956 # fast path the special `nullid` rev
1966 # fast path the special `nullid` rev
1957 if node == self.nullid:
1967 if node == self.nullid:
1958 return b""
1968 return b""
1959
1969
1960 # ``rawtext`` is the text as stored inside the revlog. Might be the
1970 # ``rawtext`` is the text as stored inside the revlog. Might be the
1961 # revision or might need to be processed to retrieve the revision.
1971 # revision or might need to be processed to retrieve the revision.
1962 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1972 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1963
1973
1964 if raw and validated:
1974 if raw and validated:
1965 # if we don't want to process the raw text and that raw
1975 # if we don't want to process the raw text and that raw
1966 # text is cached, we can exit early.
1976 # text is cached, we can exit early.
1967 return rawtext
1977 return rawtext
1968 if rev is None:
1978 if rev is None:
1969 rev = self.rev(node)
1979 rev = self.rev(node)
1970 # the revlog's flag for this revision
1980 # the revlog's flag for this revision
1971 # (usually alter its state or content)
1981 # (usually alter its state or content)
1972 flags = self.flags(rev)
1982 flags = self.flags(rev)
1973
1983
1974 if validated and flags == REVIDX_DEFAULT_FLAGS:
1984 if validated and flags == REVIDX_DEFAULT_FLAGS:
1975 # no extra flags set, no flag processor runs, text = rawtext
1985 # no extra flags set, no flag processor runs, text = rawtext
1976 return rawtext
1986 return rawtext
1977
1987
1978 if raw:
1988 if raw:
1979 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1989 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1980 text = rawtext
1990 text = rawtext
1981 else:
1991 else:
1982 r = flagutil.processflagsread(self, rawtext, flags)
1992 r = flagutil.processflagsread(self, rawtext, flags)
1983 text, validatehash = r
1993 text, validatehash = r
1984 if validatehash:
1994 if validatehash:
1985 self.checkhash(text, node, rev=rev)
1995 self.checkhash(text, node, rev=rev)
1986 if not validated:
1996 if not validated:
1987 self._revisioncache = (node, rev, rawtext)
1997 self._revisioncache = (node, rev, rawtext)
1988
1998
1989 return text
1999 return text
1990
2000
1991 def _rawtext(self, node, rev, _df=None):
2001 def _rawtext(self, node, rev, _df=None):
1992 """return the possibly unvalidated rawtext for a revision
2002 """return the possibly unvalidated rawtext for a revision
1993
2003
1994 returns (rev, rawtext, validated)
2004 returns (rev, rawtext, validated)
1995 """
2005 """
1996
2006
1997 # revision in the cache (could be useful to apply delta)
2007 # revision in the cache (could be useful to apply delta)
1998 cachedrev = None
2008 cachedrev = None
1999 # An intermediate text to apply deltas to
2009 # An intermediate text to apply deltas to
2000 basetext = None
2010 basetext = None
2001
2011
2002 # Check if we have the entry in cache
2012 # Check if we have the entry in cache
2003 # The cache entry looks like (node, rev, rawtext)
2013 # The cache entry looks like (node, rev, rawtext)
2004 if self._revisioncache:
2014 if self._revisioncache:
2005 if self._revisioncache[0] == node:
2015 if self._revisioncache[0] == node:
2006 return (rev, self._revisioncache[2], True)
2016 return (rev, self._revisioncache[2], True)
2007 cachedrev = self._revisioncache[1]
2017 cachedrev = self._revisioncache[1]
2008
2018
2009 if rev is None:
2019 if rev is None:
2010 rev = self.rev(node)
2020 rev = self.rev(node)
2011
2021
2012 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2022 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2013 if stopped:
2023 if stopped:
2014 basetext = self._revisioncache[2]
2024 basetext = self._revisioncache[2]
2015
2025
2016 # drop cache to save memory, the caller is expected to
2026 # drop cache to save memory, the caller is expected to
2017 # update self._revisioncache after validating the text
2027 # update self._revisioncache after validating the text
2018 self._revisioncache = None
2028 self._revisioncache = None
2019
2029
2020 targetsize = None
2030 targetsize = None
2021 rawsize = self.index[rev][2]
2031 rawsize = self.index[rev][2]
2022 if 0 <= rawsize:
2032 if 0 <= rawsize:
2023 targetsize = 4 * rawsize
2033 targetsize = 4 * rawsize
2024
2034
2025 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2035 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2026 if basetext is None:
2036 if basetext is None:
2027 basetext = bytes(bins[0])
2037 basetext = bytes(bins[0])
2028 bins = bins[1:]
2038 bins = bins[1:]
2029
2039
2030 rawtext = mdiff.patches(basetext, bins)
2040 rawtext = mdiff.patches(basetext, bins)
2031 del basetext # let us have a chance to free memory early
2041 del basetext # let us have a chance to free memory early
2032 return (rev, rawtext, False)
2042 return (rev, rawtext, False)
2033
2043
2034 def _sidedata(self, rev):
2044 def _sidedata(self, rev):
2035 """Return the sidedata for a given revision number."""
2045 """Return the sidedata for a given revision number."""
2036 index_entry = self.index[rev]
2046 index_entry = self.index[rev]
2037 sidedata_offset = index_entry[8]
2047 sidedata_offset = index_entry[8]
2038 sidedata_size = index_entry[9]
2048 sidedata_size = index_entry[9]
2039
2049
2040 if self._inline:
2050 if self._inline:
2041 sidedata_offset += self.index.entry_size * (1 + rev)
2051 sidedata_offset += self.index.entry_size * (1 + rev)
2042 if sidedata_size == 0:
2052 if sidedata_size == 0:
2043 return {}
2053 return {}
2044
2054
2045 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2055 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2046 filename = self._sidedatafile
2056 filename = self._sidedatafile
2047 end = self._docket.sidedata_end
2057 end = self._docket.sidedata_end
2048 offset = sidedata_offset
2058 offset = sidedata_offset
2049 length = sidedata_size
2059 length = sidedata_size
2050 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2060 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2051 raise error.RevlogError(m)
2061 raise error.RevlogError(m)
2052
2062
2053 comp_segment = self._segmentfile_sidedata.read_chunk(
2063 comp_segment = self._segmentfile_sidedata.read_chunk(
2054 sidedata_offset, sidedata_size
2064 sidedata_offset, sidedata_size
2055 )
2065 )
2056
2066
2057 comp = self.index[rev][11]
2067 comp = self.index[rev][11]
2058 if comp == COMP_MODE_PLAIN:
2068 if comp == COMP_MODE_PLAIN:
2059 segment = comp_segment
2069 segment = comp_segment
2060 elif comp == COMP_MODE_DEFAULT:
2070 elif comp == COMP_MODE_DEFAULT:
2061 segment = self._decompressor(comp_segment)
2071 segment = self._decompressor(comp_segment)
2062 elif comp == COMP_MODE_INLINE:
2072 elif comp == COMP_MODE_INLINE:
2063 segment = self.decompress(comp_segment)
2073 segment = self.decompress(comp_segment)
2064 else:
2074 else:
2065 msg = b'unknown compression mode %d'
2075 msg = b'unknown compression mode %d'
2066 msg %= comp
2076 msg %= comp
2067 raise error.RevlogError(msg)
2077 raise error.RevlogError(msg)
2068
2078
2069 sidedata = sidedatautil.deserialize_sidedata(segment)
2079 sidedata = sidedatautil.deserialize_sidedata(segment)
2070 return sidedata
2080 return sidedata
2071
2081
2072 def rawdata(self, nodeorrev, _df=None):
2082 def rawdata(self, nodeorrev, _df=None):
2073 """return an uncompressed raw data of a given node or revision number.
2083 """return an uncompressed raw data of a given node or revision number.
2074
2084
2075 _df - an existing file handle to read from. (internal-only)
2085 _df - an existing file handle to read from. (internal-only)
2076 """
2086 """
2077 return self._revisiondata(nodeorrev, _df, raw=True)
2087 return self._revisiondata(nodeorrev, _df, raw=True)
2078
2088
2079 def hash(self, text, p1, p2):
2089 def hash(self, text, p1, p2):
2080 """Compute a node hash.
2090 """Compute a node hash.
2081
2091
2082 Available as a function so that subclasses can replace the hash
2092 Available as a function so that subclasses can replace the hash
2083 as needed.
2093 as needed.
2084 """
2094 """
2085 return storageutil.hashrevisionsha1(text, p1, p2)
2095 return storageutil.hashrevisionsha1(text, p1, p2)
2086
2096
2087 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2097 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2088 """Check node hash integrity.
2098 """Check node hash integrity.
2089
2099
2090 Available as a function so that subclasses can extend hash mismatch
2100 Available as a function so that subclasses can extend hash mismatch
2091 behaviors as needed.
2101 behaviors as needed.
2092 """
2102 """
2093 try:
2103 try:
2094 if p1 is None and p2 is None:
2104 if p1 is None and p2 is None:
2095 p1, p2 = self.parents(node)
2105 p1, p2 = self.parents(node)
2096 if node != self.hash(text, p1, p2):
2106 if node != self.hash(text, p1, p2):
2097 # Clear the revision cache on hash failure. The revision cache
2107 # Clear the revision cache on hash failure. The revision cache
2098 # only stores the raw revision and clearing the cache does have
2108 # only stores the raw revision and clearing the cache does have
2099 # the side-effect that we won't have a cache hit when the raw
2109 # the side-effect that we won't have a cache hit when the raw
2100 # revision data is accessed. But this case should be rare and
2110 # revision data is accessed. But this case should be rare and
2101 # it is extra work to teach the cache about the hash
2111 # it is extra work to teach the cache about the hash
2102 # verification state.
2112 # verification state.
2103 if self._revisioncache and self._revisioncache[0] == node:
2113 if self._revisioncache and self._revisioncache[0] == node:
2104 self._revisioncache = None
2114 self._revisioncache = None
2105
2115
2106 revornode = rev
2116 revornode = rev
2107 if revornode is None:
2117 if revornode is None:
2108 revornode = templatefilters.short(hex(node))
2118 revornode = templatefilters.short(hex(node))
2109 raise error.RevlogError(
2119 raise error.RevlogError(
2110 _(b"integrity check failed on %s:%s")
2120 _(b"integrity check failed on %s:%s")
2111 % (self.display_id, pycompat.bytestr(revornode))
2121 % (self.display_id, pycompat.bytestr(revornode))
2112 )
2122 )
2113 except error.RevlogError:
2123 except error.RevlogError:
2114 if self._censorable and storageutil.iscensoredtext(text):
2124 if self._censorable and storageutil.iscensoredtext(text):
2115 raise error.CensoredNodeError(self.display_id, node, text)
2125 raise error.CensoredNodeError(self.display_id, node, text)
2116 raise
2126 raise
2117
2127
2118 def _enforceinlinesize(self, tr, side_write=True):
2128 def _enforceinlinesize(self, tr, side_write=True):
2119 """Check if the revlog is too big for inline and convert if so.
2129 """Check if the revlog is too big for inline and convert if so.
2120
2130
2121 This should be called after revisions are added to the revlog. If the
2131 This should be called after revisions are added to the revlog. If the
2122 revlog has grown too large to be an inline revlog, it will convert it
2132 revlog has grown too large to be an inline revlog, it will convert it
2123 to use multiple index and data files.
2133 to use multiple index and data files.
2124 """
2134 """
2125 tiprev = len(self) - 1
2135 tiprev = len(self) - 1
2126 total_size = self.start(tiprev) + self.length(tiprev)
2136 total_size = self.start(tiprev) + self.length(tiprev)
2127 if not self._inline or total_size < _maxinline:
2137 if not self._inline or total_size < _maxinline:
2128 return
2138 return
2129
2139
2130 troffset = tr.findoffset(self._indexfile)
2140 troffset = tr.findoffset(self._indexfile)
2131 if troffset is None:
2141 if troffset is None:
2132 raise error.RevlogError(
2142 raise error.RevlogError(
2133 _(b"%s not found in the transaction") % self._indexfile
2143 _(b"%s not found in the transaction") % self._indexfile
2134 )
2144 )
2135 if troffset:
2145 if troffset:
2136 tr.addbackup(self._indexfile, for_offset=True)
2146 tr.addbackup(self._indexfile, for_offset=True)
2137 tr.add(self._datafile, 0)
2147 tr.add(self._datafile, 0)
2138
2148
2139 existing_handles = False
2149 existing_handles = False
2140 if self._writinghandles is not None:
2150 if self._writinghandles is not None:
2141 existing_handles = True
2151 existing_handles = True
2142 fp = self._writinghandles[0]
2152 fp = self._writinghandles[0]
2143 fp.flush()
2153 fp.flush()
2144 fp.close()
2154 fp.close()
2145 # We can't use the cached file handle after close(). So prevent
2155 # We can't use the cached file handle after close(). So prevent
2146 # its usage.
2156 # its usage.
2147 self._writinghandles = None
2157 self._writinghandles = None
2148 self._segmentfile.writing_handle = None
2158 self._segmentfile.writing_handle = None
2149 # No need to deal with sidedata writing handle as it is only
2159 # No need to deal with sidedata writing handle as it is only
2150 # relevant with revlog-v2 which is never inline, not reaching
2160 # relevant with revlog-v2 which is never inline, not reaching
2151 # this code
2161 # this code
2152 if side_write:
2162 if side_write:
2153 old_index_file_path = self._indexfile
2163 old_index_file_path = self._indexfile
2154 new_index_file_path = self._indexfile + b'.s'
2164 new_index_file_path = self._indexfile + b'.s'
2155 opener = self.opener
2165 opener = self.opener
2156 weak_self = weakref.ref(self)
2166 weak_self = weakref.ref(self)
2157
2167
2158 # the "split" index replace the real index when the transaction is finalized
2168 # the "split" index replace the real index when the transaction is finalized
2159 def finalize_callback(tr):
2169 def finalize_callback(tr):
2160 opener.rename(
2170 opener.rename(
2161 new_index_file_path,
2171 new_index_file_path,
2162 old_index_file_path,
2172 old_index_file_path,
2163 checkambig=True,
2173 checkambig=True,
2164 )
2174 )
2165 maybe_self = weak_self()
2175 maybe_self = weak_self()
2166 if maybe_self is not None:
2176 if maybe_self is not None:
2167 maybe_self._indexfile = old_index_file_path
2177 maybe_self._indexfile = old_index_file_path
2168
2178
2169 def abort_callback(tr):
2179 def abort_callback(tr):
2170 maybe_self = weak_self()
2180 maybe_self = weak_self()
2171 if maybe_self is not None:
2181 if maybe_self is not None:
2172 maybe_self._indexfile = old_index_file_path
2182 maybe_self._indexfile = old_index_file_path
2173
2183
2174 tr.registertmp(new_index_file_path)
2184 tr.registertmp(new_index_file_path)
2175 if self.target[1] is not None:
2185 if self.target[1] is not None:
2176 callback_id = b'000-revlog-split-%d-%s' % self.target
2186 callback_id = b'000-revlog-split-%d-%s' % self.target
2177 else:
2187 else:
2178 callback_id = b'000-revlog-split-%d' % self.target[0]
2188 callback_id = b'000-revlog-split-%d' % self.target[0]
2179 tr.addfinalize(callback_id, finalize_callback)
2189 tr.addfinalize(callback_id, finalize_callback)
2180 tr.addabort(callback_id, abort_callback)
2190 tr.addabort(callback_id, abort_callback)
2181
2191
2182 new_dfh = self._datafp(b'w+')
2192 new_dfh = self._datafp(b'w+')
2183 new_dfh.truncate(0) # drop any potentially existing data
2193 new_dfh.truncate(0) # drop any potentially existing data
2184 try:
2194 try:
2185 with self._indexfp() as read_ifh:
2195 with self._indexfp() as read_ifh:
2186 for r in self:
2196 for r in self:
2187 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2197 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2188 new_dfh.flush()
2198 new_dfh.flush()
2189
2199
2190 if side_write:
2200 if side_write:
2191 self._indexfile = new_index_file_path
2201 self._indexfile = new_index_file_path
2192 with self.__index_new_fp() as fp:
2202 with self.__index_new_fp() as fp:
2193 self._format_flags &= ~FLAG_INLINE_DATA
2203 self._format_flags &= ~FLAG_INLINE_DATA
2194 self._inline = False
2204 self._inline = False
2195 for i in self:
2205 for i in self:
2196 e = self.index.entry_binary(i)
2206 e = self.index.entry_binary(i)
2197 if i == 0 and self._docket is None:
2207 if i == 0 and self._docket is None:
2198 header = self._format_flags | self._format_version
2208 header = self._format_flags | self._format_version
2199 header = self.index.pack_header(header)
2209 header = self.index.pack_header(header)
2200 e = header + e
2210 e = header + e
2201 fp.write(e)
2211 fp.write(e)
2202 if self._docket is not None:
2212 if self._docket is not None:
2203 self._docket.index_end = fp.tell()
2213 self._docket.index_end = fp.tell()
2204
2214
2205 # If we don't use side-write, the temp file replace the real
2215 # If we don't use side-write, the temp file replace the real
2206 # index when we exit the context manager
2216 # index when we exit the context manager
2207
2217
2208 nodemaputil.setup_persistent_nodemap(tr, self)
2218 nodemaputil.setup_persistent_nodemap(tr, self)
2209 self._segmentfile = randomaccessfile.randomaccessfile(
2219 self._segmentfile = randomaccessfile.randomaccessfile(
2210 self.opener,
2220 self.opener,
2211 self._datafile,
2221 self._datafile,
2212 self._chunkcachesize,
2222 self._chunkcachesize,
2213 )
2223 )
2214
2224
2215 if existing_handles:
2225 if existing_handles:
2216 # switched from inline to conventional reopen the index
2226 # switched from inline to conventional reopen the index
2217 ifh = self.__index_write_fp()
2227 ifh = self.__index_write_fp()
2218 self._writinghandles = (ifh, new_dfh, None)
2228 self._writinghandles = (ifh, new_dfh, None)
2219 self._segmentfile.writing_handle = new_dfh
2229 self._segmentfile.writing_handle = new_dfh
2220 new_dfh = None
2230 new_dfh = None
2221 # No need to deal with sidedata writing handle as it is only
2231 # No need to deal with sidedata writing handle as it is only
2222 # relevant with revlog-v2 which is never inline, not reaching
2232 # relevant with revlog-v2 which is never inline, not reaching
2223 # this code
2233 # this code
2224 finally:
2234 finally:
2225 if new_dfh is not None:
2235 if new_dfh is not None:
2226 new_dfh.close()
2236 new_dfh.close()
2227
2237
2228 def _nodeduplicatecallback(self, transaction, node):
2238 def _nodeduplicatecallback(self, transaction, node):
2229 """called when trying to add a node already stored."""
2239 """called when trying to add a node already stored."""
2230
2240
2231 @contextlib.contextmanager
2241 @contextlib.contextmanager
2232 def reading(self):
2242 def reading(self):
2233 """Context manager that keeps data and sidedata files open for reading"""
2243 """Context manager that keeps data and sidedata files open for reading"""
2234 with self._segmentfile.reading():
2244 with self._segmentfile.reading():
2235 with self._segmentfile_sidedata.reading():
2245 with self._segmentfile_sidedata.reading():
2236 yield
2246 yield
2237
2247
2238 @contextlib.contextmanager
2248 @contextlib.contextmanager
2239 def _writing(self, transaction):
2249 def _writing(self, transaction):
2240 if self._trypending:
2250 if self._trypending:
2241 msg = b'try to write in a `trypending` revlog: %s'
2251 msg = b'try to write in a `trypending` revlog: %s'
2242 msg %= self.display_id
2252 msg %= self.display_id
2243 raise error.ProgrammingError(msg)
2253 raise error.ProgrammingError(msg)
2244 if self._writinghandles is not None:
2254 if self._writinghandles is not None:
2245 yield
2255 yield
2246 else:
2256 else:
2247 ifh = dfh = sdfh = None
2257 ifh = dfh = sdfh = None
2248 try:
2258 try:
2249 r = len(self)
2259 r = len(self)
2250 # opening the data file.
2260 # opening the data file.
2251 dsize = 0
2261 dsize = 0
2252 if r:
2262 if r:
2253 dsize = self.end(r - 1)
2263 dsize = self.end(r - 1)
2254 dfh = None
2264 dfh = None
2255 if not self._inline:
2265 if not self._inline:
2256 try:
2266 try:
2257 dfh = self._datafp(b"r+")
2267 dfh = self._datafp(b"r+")
2258 if self._docket is None:
2268 if self._docket is None:
2259 dfh.seek(0, os.SEEK_END)
2269 dfh.seek(0, os.SEEK_END)
2260 else:
2270 else:
2261 dfh.seek(self._docket.data_end, os.SEEK_SET)
2271 dfh.seek(self._docket.data_end, os.SEEK_SET)
2262 except FileNotFoundError:
2272 except FileNotFoundError:
2263 dfh = self._datafp(b"w+")
2273 dfh = self._datafp(b"w+")
2264 transaction.add(self._datafile, dsize)
2274 transaction.add(self._datafile, dsize)
2265 if self._sidedatafile is not None:
2275 if self._sidedatafile is not None:
2266 # revlog-v2 does not inline, help Pytype
2276 # revlog-v2 does not inline, help Pytype
2267 assert dfh is not None
2277 assert dfh is not None
2268 try:
2278 try:
2269 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2279 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2270 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2280 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2271 except FileNotFoundError:
2281 except FileNotFoundError:
2272 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2282 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2273 transaction.add(
2283 transaction.add(
2274 self._sidedatafile, self._docket.sidedata_end
2284 self._sidedatafile, self._docket.sidedata_end
2275 )
2285 )
2276
2286
2277 # opening the index file.
2287 # opening the index file.
2278 isize = r * self.index.entry_size
2288 isize = r * self.index.entry_size
2279 ifh = self.__index_write_fp()
2289 ifh = self.__index_write_fp()
2280 if self._inline:
2290 if self._inline:
2281 transaction.add(self._indexfile, dsize + isize)
2291 transaction.add(self._indexfile, dsize + isize)
2282 else:
2292 else:
2283 transaction.add(self._indexfile, isize)
2293 transaction.add(self._indexfile, isize)
2284 # exposing all file handle for writing.
2294 # exposing all file handle for writing.
2285 self._writinghandles = (ifh, dfh, sdfh)
2295 self._writinghandles = (ifh, dfh, sdfh)
2286 self._segmentfile.writing_handle = ifh if self._inline else dfh
2296 self._segmentfile.writing_handle = ifh if self._inline else dfh
2287 self._segmentfile_sidedata.writing_handle = sdfh
2297 self._segmentfile_sidedata.writing_handle = sdfh
2288 yield
2298 yield
2289 if self._docket is not None:
2299 if self._docket is not None:
2290 self._write_docket(transaction)
2300 self._write_docket(transaction)
2291 finally:
2301 finally:
2292 self._writinghandles = None
2302 self._writinghandles = None
2293 self._segmentfile.writing_handle = None
2303 self._segmentfile.writing_handle = None
2294 self._segmentfile_sidedata.writing_handle = None
2304 self._segmentfile_sidedata.writing_handle = None
2295 if dfh is not None:
2305 if dfh is not None:
2296 dfh.close()
2306 dfh.close()
2297 if sdfh is not None:
2307 if sdfh is not None:
2298 sdfh.close()
2308 sdfh.close()
2299 # closing the index file last to avoid exposing referent to
2309 # closing the index file last to avoid exposing referent to
2300 # potential unflushed data content.
2310 # potential unflushed data content.
2301 if ifh is not None:
2311 if ifh is not None:
2302 ifh.close()
2312 ifh.close()
2303
2313
2304 def _write_docket(self, transaction):
2314 def _write_docket(self, transaction):
2305 """write the current docket on disk
2315 """write the current docket on disk
2306
2316
2307 Exist as a method to help changelog to implement transaction logic
2317 Exist as a method to help changelog to implement transaction logic
2308
2318
2309 We could also imagine using the same transaction logic for all revlog
2319 We could also imagine using the same transaction logic for all revlog
2310 since docket are cheap."""
2320 since docket are cheap."""
2311 self._docket.write(transaction)
2321 self._docket.write(transaction)
2312
2322
2313 def addrevision(
2323 def addrevision(
2314 self,
2324 self,
2315 text,
2325 text,
2316 transaction,
2326 transaction,
2317 link,
2327 link,
2318 p1,
2328 p1,
2319 p2,
2329 p2,
2320 cachedelta=None,
2330 cachedelta=None,
2321 node=None,
2331 node=None,
2322 flags=REVIDX_DEFAULT_FLAGS,
2332 flags=REVIDX_DEFAULT_FLAGS,
2323 deltacomputer=None,
2333 deltacomputer=None,
2324 sidedata=None,
2334 sidedata=None,
2325 ):
2335 ):
2326 """add a revision to the log
2336 """add a revision to the log
2327
2337
2328 text - the revision data to add
2338 text - the revision data to add
2329 transaction - the transaction object used for rollback
2339 transaction - the transaction object used for rollback
2330 link - the linkrev data to add
2340 link - the linkrev data to add
2331 p1, p2 - the parent nodeids of the revision
2341 p1, p2 - the parent nodeids of the revision
2332 cachedelta - an optional precomputed delta
2342 cachedelta - an optional precomputed delta
2333 node - nodeid of revision; typically node is not specified, and it is
2343 node - nodeid of revision; typically node is not specified, and it is
2334 computed by default as hash(text, p1, p2), however subclasses might
2344 computed by default as hash(text, p1, p2), however subclasses might
2335 use different hashing method (and override checkhash() in such case)
2345 use different hashing method (and override checkhash() in such case)
2336 flags - the known flags to set on the revision
2346 flags - the known flags to set on the revision
2337 deltacomputer - an optional deltacomputer instance shared between
2347 deltacomputer - an optional deltacomputer instance shared between
2338 multiple calls
2348 multiple calls
2339 """
2349 """
2340 if link == nullrev:
2350 if link == nullrev:
2341 raise error.RevlogError(
2351 raise error.RevlogError(
2342 _(b"attempted to add linkrev -1 to %s") % self.display_id
2352 _(b"attempted to add linkrev -1 to %s") % self.display_id
2343 )
2353 )
2344
2354
2345 if sidedata is None:
2355 if sidedata is None:
2346 sidedata = {}
2356 sidedata = {}
2347 elif sidedata and not self.hassidedata:
2357 elif sidedata and not self.hassidedata:
2348 raise error.ProgrammingError(
2358 raise error.ProgrammingError(
2349 _(b"trying to add sidedata to a revlog who don't support them")
2359 _(b"trying to add sidedata to a revlog who don't support them")
2350 )
2360 )
2351
2361
2352 if flags:
2362 if flags:
2353 node = node or self.hash(text, p1, p2)
2363 node = node or self.hash(text, p1, p2)
2354
2364
2355 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2365 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2356
2366
2357 # If the flag processor modifies the revision data, ignore any provided
2367 # If the flag processor modifies the revision data, ignore any provided
2358 # cachedelta.
2368 # cachedelta.
2359 if rawtext != text:
2369 if rawtext != text:
2360 cachedelta = None
2370 cachedelta = None
2361
2371
2362 if len(rawtext) > _maxentrysize:
2372 if len(rawtext) > _maxentrysize:
2363 raise error.RevlogError(
2373 raise error.RevlogError(
2364 _(
2374 _(
2365 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2375 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2366 )
2376 )
2367 % (self.display_id, len(rawtext))
2377 % (self.display_id, len(rawtext))
2368 )
2378 )
2369
2379
2370 node = node or self.hash(rawtext, p1, p2)
2380 node = node or self.hash(rawtext, p1, p2)
2371 rev = self.index.get_rev(node)
2381 rev = self.index.get_rev(node)
2372 if rev is not None:
2382 if rev is not None:
2373 return rev
2383 return rev
2374
2384
2375 if validatehash:
2385 if validatehash:
2376 self.checkhash(rawtext, node, p1=p1, p2=p2)
2386 self.checkhash(rawtext, node, p1=p1, p2=p2)
2377
2387
2378 return self.addrawrevision(
2388 return self.addrawrevision(
2379 rawtext,
2389 rawtext,
2380 transaction,
2390 transaction,
2381 link,
2391 link,
2382 p1,
2392 p1,
2383 p2,
2393 p2,
2384 node,
2394 node,
2385 flags,
2395 flags,
2386 cachedelta=cachedelta,
2396 cachedelta=cachedelta,
2387 deltacomputer=deltacomputer,
2397 deltacomputer=deltacomputer,
2388 sidedata=sidedata,
2398 sidedata=sidedata,
2389 )
2399 )
2390
2400
2391 def addrawrevision(
2401 def addrawrevision(
2392 self,
2402 self,
2393 rawtext,
2403 rawtext,
2394 transaction,
2404 transaction,
2395 link,
2405 link,
2396 p1,
2406 p1,
2397 p2,
2407 p2,
2398 node,
2408 node,
2399 flags,
2409 flags,
2400 cachedelta=None,
2410 cachedelta=None,
2401 deltacomputer=None,
2411 deltacomputer=None,
2402 sidedata=None,
2412 sidedata=None,
2403 ):
2413 ):
2404 """add a raw revision with known flags, node and parents
2414 """add a raw revision with known flags, node and parents
2405 useful when reusing a revision not stored in this revlog (ex: received
2415 useful when reusing a revision not stored in this revlog (ex: received
2406 over wire, or read from an external bundle).
2416 over wire, or read from an external bundle).
2407 """
2417 """
2408 with self._writing(transaction):
2418 with self._writing(transaction):
2409 return self._addrevision(
2419 return self._addrevision(
2410 node,
2420 node,
2411 rawtext,
2421 rawtext,
2412 transaction,
2422 transaction,
2413 link,
2423 link,
2414 p1,
2424 p1,
2415 p2,
2425 p2,
2416 flags,
2426 flags,
2417 cachedelta,
2427 cachedelta,
2418 deltacomputer=deltacomputer,
2428 deltacomputer=deltacomputer,
2419 sidedata=sidedata,
2429 sidedata=sidedata,
2420 )
2430 )
2421
2431
2422 def compress(self, data):
2432 def compress(self, data):
2423 """Generate a possibly-compressed representation of data."""
2433 """Generate a possibly-compressed representation of data."""
2424 if not data:
2434 if not data:
2425 return b'', data
2435 return b'', data
2426
2436
2427 compressed = self._compressor.compress(data)
2437 compressed = self._compressor.compress(data)
2428
2438
2429 if compressed:
2439 if compressed:
2430 # The revlog compressor added the header in the returned data.
2440 # The revlog compressor added the header in the returned data.
2431 return b'', compressed
2441 return b'', compressed
2432
2442
2433 if data[0:1] == b'\0':
2443 if data[0:1] == b'\0':
2434 return b'', data
2444 return b'', data
2435 return b'u', data
2445 return b'u', data
2436
2446
2437 def decompress(self, data):
2447 def decompress(self, data):
2438 """Decompress a revlog chunk.
2448 """Decompress a revlog chunk.
2439
2449
2440 The chunk is expected to begin with a header identifying the
2450 The chunk is expected to begin with a header identifying the
2441 format type so it can be routed to an appropriate decompressor.
2451 format type so it can be routed to an appropriate decompressor.
2442 """
2452 """
2443 if not data:
2453 if not data:
2444 return data
2454 return data
2445
2455
2446 # Revlogs are read much more frequently than they are written and many
2456 # Revlogs are read much more frequently than they are written and many
2447 # chunks only take microseconds to decompress, so performance is
2457 # chunks only take microseconds to decompress, so performance is
2448 # important here.
2458 # important here.
2449 #
2459 #
2450 # We can make a few assumptions about revlogs:
2460 # We can make a few assumptions about revlogs:
2451 #
2461 #
2452 # 1) the majority of chunks will be compressed (as opposed to inline
2462 # 1) the majority of chunks will be compressed (as opposed to inline
2453 # raw data).
2463 # raw data).
2454 # 2) decompressing *any* data will likely by at least 10x slower than
2464 # 2) decompressing *any* data will likely by at least 10x slower than
2455 # returning raw inline data.
2465 # returning raw inline data.
2456 # 3) we want to prioritize common and officially supported compression
2466 # 3) we want to prioritize common and officially supported compression
2457 # engines
2467 # engines
2458 #
2468 #
2459 # It follows that we want to optimize for "decompress compressed data
2469 # It follows that we want to optimize for "decompress compressed data
2460 # when encoded with common and officially supported compression engines"
2470 # when encoded with common and officially supported compression engines"
2461 # case over "raw data" and "data encoded by less common or non-official
2471 # case over "raw data" and "data encoded by less common or non-official
2462 # compression engines." That is why we have the inline lookup first
2472 # compression engines." That is why we have the inline lookup first
2463 # followed by the compengines lookup.
2473 # followed by the compengines lookup.
2464 #
2474 #
2465 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2475 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2466 # compressed chunks. And this matters for changelog and manifest reads.
2476 # compressed chunks. And this matters for changelog and manifest reads.
2467 t = data[0:1]
2477 t = data[0:1]
2468
2478
2469 if t == b'x':
2479 if t == b'x':
2470 try:
2480 try:
2471 return _zlibdecompress(data)
2481 return _zlibdecompress(data)
2472 except zlib.error as e:
2482 except zlib.error as e:
2473 raise error.RevlogError(
2483 raise error.RevlogError(
2474 _(b'revlog decompress error: %s')
2484 _(b'revlog decompress error: %s')
2475 % stringutil.forcebytestr(e)
2485 % stringutil.forcebytestr(e)
2476 )
2486 )
2477 # '\0' is more common than 'u' so it goes first.
2487 # '\0' is more common than 'u' so it goes first.
2478 elif t == b'\0':
2488 elif t == b'\0':
2479 return data
2489 return data
2480 elif t == b'u':
2490 elif t == b'u':
2481 return util.buffer(data, 1)
2491 return util.buffer(data, 1)
2482
2492
2483 compressor = self._get_decompressor(t)
2493 compressor = self._get_decompressor(t)
2484
2494
2485 return compressor.decompress(data)
2495 return compressor.decompress(data)
2486
2496
2487 def _addrevision(
2497 def _addrevision(
2488 self,
2498 self,
2489 node,
2499 node,
2490 rawtext,
2500 rawtext,
2491 transaction,
2501 transaction,
2492 link,
2502 link,
2493 p1,
2503 p1,
2494 p2,
2504 p2,
2495 flags,
2505 flags,
2496 cachedelta,
2506 cachedelta,
2497 alwayscache=False,
2507 alwayscache=False,
2498 deltacomputer=None,
2508 deltacomputer=None,
2499 sidedata=None,
2509 sidedata=None,
2500 ):
2510 ):
2501 """internal function to add revisions to the log
2511 """internal function to add revisions to the log
2502
2512
2503 see addrevision for argument descriptions.
2513 see addrevision for argument descriptions.
2504
2514
2505 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2515 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2506
2516
2507 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2517 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2508 be used.
2518 be used.
2509
2519
2510 invariants:
2520 invariants:
2511 - rawtext is optional (can be None); if not set, cachedelta must be set.
2521 - rawtext is optional (can be None); if not set, cachedelta must be set.
2512 if both are set, they must correspond to each other.
2522 if both are set, they must correspond to each other.
2513 """
2523 """
2514 if node == self.nullid:
2524 if node == self.nullid:
2515 raise error.RevlogError(
2525 raise error.RevlogError(
2516 _(b"%s: attempt to add null revision") % self.display_id
2526 _(b"%s: attempt to add null revision") % self.display_id
2517 )
2527 )
2518 if (
2528 if (
2519 node == self.nodeconstants.wdirid
2529 node == self.nodeconstants.wdirid
2520 or node in self.nodeconstants.wdirfilenodeids
2530 or node in self.nodeconstants.wdirfilenodeids
2521 ):
2531 ):
2522 raise error.RevlogError(
2532 raise error.RevlogError(
2523 _(b"%s: attempt to add wdir revision") % self.display_id
2533 _(b"%s: attempt to add wdir revision") % self.display_id
2524 )
2534 )
2525 if self._writinghandles is None:
2535 if self._writinghandles is None:
2526 msg = b'adding revision outside `revlog._writing` context'
2536 msg = b'adding revision outside `revlog._writing` context'
2527 raise error.ProgrammingError(msg)
2537 raise error.ProgrammingError(msg)
2528
2538
2529 if self._inline:
2539 if self._inline:
2530 fh = self._writinghandles[0]
2540 fh = self._writinghandles[0]
2531 else:
2541 else:
2532 fh = self._writinghandles[1]
2542 fh = self._writinghandles[1]
2533
2543
2534 btext = [rawtext]
2544 btext = [rawtext]
2535
2545
2536 curr = len(self)
2546 curr = len(self)
2537 prev = curr - 1
2547 prev = curr - 1
2538
2548
2539 offset = self._get_data_offset(prev)
2549 offset = self._get_data_offset(prev)
2540
2550
2541 if self._concurrencychecker:
2551 if self._concurrencychecker:
2542 ifh, dfh, sdfh = self._writinghandles
2552 ifh, dfh, sdfh = self._writinghandles
2543 # XXX no checking for the sidedata file
2553 # XXX no checking for the sidedata file
2544 if self._inline:
2554 if self._inline:
2545 # offset is "as if" it were in the .d file, so we need to add on
2555 # offset is "as if" it were in the .d file, so we need to add on
2546 # the size of the entry metadata.
2556 # the size of the entry metadata.
2547 self._concurrencychecker(
2557 self._concurrencychecker(
2548 ifh, self._indexfile, offset + curr * self.index.entry_size
2558 ifh, self._indexfile, offset + curr * self.index.entry_size
2549 )
2559 )
2550 else:
2560 else:
2551 # Entries in the .i are a consistent size.
2561 # Entries in the .i are a consistent size.
2552 self._concurrencychecker(
2562 self._concurrencychecker(
2553 ifh, self._indexfile, curr * self.index.entry_size
2563 ifh, self._indexfile, curr * self.index.entry_size
2554 )
2564 )
2555 self._concurrencychecker(dfh, self._datafile, offset)
2565 self._concurrencychecker(dfh, self._datafile, offset)
2556
2566
2557 p1r, p2r = self.rev(p1), self.rev(p2)
2567 p1r, p2r = self.rev(p1), self.rev(p2)
2558
2568
2559 # full versions are inserted when the needed deltas
2569 # full versions are inserted when the needed deltas
2560 # become comparable to the uncompressed text
2570 # become comparable to the uncompressed text
2561 if rawtext is None:
2571 if rawtext is None:
2562 # need rawtext size, before changed by flag processors, which is
2572 # need rawtext size, before changed by flag processors, which is
2563 # the non-raw size. use revlog explicitly to avoid filelog's extra
2573 # the non-raw size. use revlog explicitly to avoid filelog's extra
2564 # logic that might remove metadata size.
2574 # logic that might remove metadata size.
2565 textlen = mdiff.patchedsize(
2575 textlen = mdiff.patchedsize(
2566 revlog.size(self, cachedelta[0]), cachedelta[1]
2576 revlog.size(self, cachedelta[0]), cachedelta[1]
2567 )
2577 )
2568 else:
2578 else:
2569 textlen = len(rawtext)
2579 textlen = len(rawtext)
2570
2580
2571 if deltacomputer is None:
2581 if deltacomputer is None:
2572 write_debug = None
2582 write_debug = None
2573 if self._debug_delta:
2583 if self._debug_delta:
2574 write_debug = transaction._report
2584 write_debug = transaction._report
2575 deltacomputer = deltautil.deltacomputer(
2585 deltacomputer = deltautil.deltacomputer(
2576 self, write_debug=write_debug
2586 self, write_debug=write_debug
2577 )
2587 )
2578
2588
2579 if cachedelta is not None and len(cachedelta) == 2:
2589 if cachedelta is not None and len(cachedelta) == 2:
2580 # If the cached delta has no information about how it should be
2590 # If the cached delta has no information about how it should be
2581 # reused, add the default reuse instruction according to the
2591 # reused, add the default reuse instruction according to the
2582 # revlog's configuration.
2592 # revlog's configuration.
2583 if self._generaldelta and self._lazydeltabase:
2593 if self._generaldelta and self._lazydeltabase:
2584 delta_base_reuse = DELTA_BASE_REUSE_TRY
2594 delta_base_reuse = DELTA_BASE_REUSE_TRY
2585 else:
2595 else:
2586 delta_base_reuse = DELTA_BASE_REUSE_NO
2596 delta_base_reuse = DELTA_BASE_REUSE_NO
2587 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2597 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2588
2598
2589 revinfo = revlogutils.revisioninfo(
2599 revinfo = revlogutils.revisioninfo(
2590 node,
2600 node,
2591 p1,
2601 p1,
2592 p2,
2602 p2,
2593 btext,
2603 btext,
2594 textlen,
2604 textlen,
2595 cachedelta,
2605 cachedelta,
2596 flags,
2606 flags,
2597 )
2607 )
2598
2608
2599 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2609 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2600
2610
2601 compression_mode = COMP_MODE_INLINE
2611 compression_mode = COMP_MODE_INLINE
2602 if self._docket is not None:
2612 if self._docket is not None:
2603 default_comp = self._docket.default_compression_header
2613 default_comp = self._docket.default_compression_header
2604 r = deltautil.delta_compression(default_comp, deltainfo)
2614 r = deltautil.delta_compression(default_comp, deltainfo)
2605 compression_mode, deltainfo = r
2615 compression_mode, deltainfo = r
2606
2616
2607 sidedata_compression_mode = COMP_MODE_INLINE
2617 sidedata_compression_mode = COMP_MODE_INLINE
2608 if sidedata and self.hassidedata:
2618 if sidedata and self.hassidedata:
2609 sidedata_compression_mode = COMP_MODE_PLAIN
2619 sidedata_compression_mode = COMP_MODE_PLAIN
2610 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2620 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2611 sidedata_offset = self._docket.sidedata_end
2621 sidedata_offset = self._docket.sidedata_end
2612 h, comp_sidedata = self.compress(serialized_sidedata)
2622 h, comp_sidedata = self.compress(serialized_sidedata)
2613 if (
2623 if (
2614 h != b'u'
2624 h != b'u'
2615 and comp_sidedata[0:1] != b'\0'
2625 and comp_sidedata[0:1] != b'\0'
2616 and len(comp_sidedata) < len(serialized_sidedata)
2626 and len(comp_sidedata) < len(serialized_sidedata)
2617 ):
2627 ):
2618 assert not h
2628 assert not h
2619 if (
2629 if (
2620 comp_sidedata[0:1]
2630 comp_sidedata[0:1]
2621 == self._docket.default_compression_header
2631 == self._docket.default_compression_header
2622 ):
2632 ):
2623 sidedata_compression_mode = COMP_MODE_DEFAULT
2633 sidedata_compression_mode = COMP_MODE_DEFAULT
2624 serialized_sidedata = comp_sidedata
2634 serialized_sidedata = comp_sidedata
2625 else:
2635 else:
2626 sidedata_compression_mode = COMP_MODE_INLINE
2636 sidedata_compression_mode = COMP_MODE_INLINE
2627 serialized_sidedata = comp_sidedata
2637 serialized_sidedata = comp_sidedata
2628 else:
2638 else:
2629 serialized_sidedata = b""
2639 serialized_sidedata = b""
2630 # Don't store the offset if the sidedata is empty, that way
2640 # Don't store the offset if the sidedata is empty, that way
2631 # we can easily detect empty sidedata and they will be no different
2641 # we can easily detect empty sidedata and they will be no different
2632 # than ones we manually add.
2642 # than ones we manually add.
2633 sidedata_offset = 0
2643 sidedata_offset = 0
2634
2644
2635 rank = RANK_UNKNOWN
2645 rank = RANK_UNKNOWN
2636 if self._compute_rank:
2646 if self._compute_rank:
2637 if (p1r, p2r) == (nullrev, nullrev):
2647 if (p1r, p2r) == (nullrev, nullrev):
2638 rank = 1
2648 rank = 1
2639 elif p1r != nullrev and p2r == nullrev:
2649 elif p1r != nullrev and p2r == nullrev:
2640 rank = 1 + self.fast_rank(p1r)
2650 rank = 1 + self.fast_rank(p1r)
2641 elif p1r == nullrev and p2r != nullrev:
2651 elif p1r == nullrev and p2r != nullrev:
2642 rank = 1 + self.fast_rank(p2r)
2652 rank = 1 + self.fast_rank(p2r)
2643 else: # merge node
2653 else: # merge node
2644 if rustdagop is not None and self.index.rust_ext_compat:
2654 if rustdagop is not None and self.index.rust_ext_compat:
2645 rank = rustdagop.rank(self.index, p1r, p2r)
2655 rank = rustdagop.rank(self.index, p1r, p2r)
2646 else:
2656 else:
2647 pmin, pmax = sorted((p1r, p2r))
2657 pmin, pmax = sorted((p1r, p2r))
2648 rank = 1 + self.fast_rank(pmax)
2658 rank = 1 + self.fast_rank(pmax)
2649 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2659 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2650
2660
2651 e = revlogutils.entry(
2661 e = revlogutils.entry(
2652 flags=flags,
2662 flags=flags,
2653 data_offset=offset,
2663 data_offset=offset,
2654 data_compressed_length=deltainfo.deltalen,
2664 data_compressed_length=deltainfo.deltalen,
2655 data_uncompressed_length=textlen,
2665 data_uncompressed_length=textlen,
2656 data_compression_mode=compression_mode,
2666 data_compression_mode=compression_mode,
2657 data_delta_base=deltainfo.base,
2667 data_delta_base=deltainfo.base,
2658 link_rev=link,
2668 link_rev=link,
2659 parent_rev_1=p1r,
2669 parent_rev_1=p1r,
2660 parent_rev_2=p2r,
2670 parent_rev_2=p2r,
2661 node_id=node,
2671 node_id=node,
2662 sidedata_offset=sidedata_offset,
2672 sidedata_offset=sidedata_offset,
2663 sidedata_compressed_length=len(serialized_sidedata),
2673 sidedata_compressed_length=len(serialized_sidedata),
2664 sidedata_compression_mode=sidedata_compression_mode,
2674 sidedata_compression_mode=sidedata_compression_mode,
2665 rank=rank,
2675 rank=rank,
2666 )
2676 )
2667
2677
2668 self.index.append(e)
2678 self.index.append(e)
2669 entry = self.index.entry_binary(curr)
2679 entry = self.index.entry_binary(curr)
2670 if curr == 0 and self._docket is None:
2680 if curr == 0 and self._docket is None:
2671 header = self._format_flags | self._format_version
2681 header = self._format_flags | self._format_version
2672 header = self.index.pack_header(header)
2682 header = self.index.pack_header(header)
2673 entry = header + entry
2683 entry = header + entry
2674 self._writeentry(
2684 self._writeentry(
2675 transaction,
2685 transaction,
2676 entry,
2686 entry,
2677 deltainfo.data,
2687 deltainfo.data,
2678 link,
2688 link,
2679 offset,
2689 offset,
2680 serialized_sidedata,
2690 serialized_sidedata,
2681 sidedata_offset,
2691 sidedata_offset,
2682 )
2692 )
2683
2693
2684 rawtext = btext[0]
2694 rawtext = btext[0]
2685
2695
2686 if alwayscache and rawtext is None:
2696 if alwayscache and rawtext is None:
2687 rawtext = deltacomputer.buildtext(revinfo, fh)
2697 rawtext = deltacomputer.buildtext(revinfo, fh)
2688
2698
2689 if type(rawtext) == bytes: # only accept immutable objects
2699 if type(rawtext) == bytes: # only accept immutable objects
2690 self._revisioncache = (node, curr, rawtext)
2700 self._revisioncache = (node, curr, rawtext)
2691 self._chainbasecache[curr] = deltainfo.chainbase
2701 self._chainbasecache[curr] = deltainfo.chainbase
2692 return curr
2702 return curr
2693
2703
2694 def _get_data_offset(self, prev):
2704 def _get_data_offset(self, prev):
2695 """Returns the current offset in the (in-transaction) data file.
2705 """Returns the current offset in the (in-transaction) data file.
2696 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2706 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2697 file to store that information: since sidedata can be rewritten to the
2707 file to store that information: since sidedata can be rewritten to the
2698 end of the data file within a transaction, you can have cases where, for
2708 end of the data file within a transaction, you can have cases where, for
2699 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2709 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2700 to `n - 1`'s sidedata being written after `n`'s data.
2710 to `n - 1`'s sidedata being written after `n`'s data.
2701
2711
2702 TODO cache this in a docket file before getting out of experimental."""
2712 TODO cache this in a docket file before getting out of experimental."""
2703 if self._docket is None:
2713 if self._docket is None:
2704 return self.end(prev)
2714 return self.end(prev)
2705 else:
2715 else:
2706 return self._docket.data_end
2716 return self._docket.data_end
2707
2717
2708 def _writeentry(
2718 def _writeentry(
2709 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2719 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2710 ):
2720 ):
2711 # Files opened in a+ mode have inconsistent behavior on various
2721 # Files opened in a+ mode have inconsistent behavior on various
2712 # platforms. Windows requires that a file positioning call be made
2722 # platforms. Windows requires that a file positioning call be made
2713 # when the file handle transitions between reads and writes. See
2723 # when the file handle transitions between reads and writes. See
2714 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2724 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2715 # platforms, Python or the platform itself can be buggy. Some versions
2725 # platforms, Python or the platform itself can be buggy. Some versions
2716 # of Solaris have been observed to not append at the end of the file
2726 # of Solaris have been observed to not append at the end of the file
2717 # if the file was seeked to before the end. See issue4943 for more.
2727 # if the file was seeked to before the end. See issue4943 for more.
2718 #
2728 #
2719 # We work around this issue by inserting a seek() before writing.
2729 # We work around this issue by inserting a seek() before writing.
2720 # Note: This is likely not necessary on Python 3. However, because
2730 # Note: This is likely not necessary on Python 3. However, because
2721 # the file handle is reused for reads and may be seeked there, we need
2731 # the file handle is reused for reads and may be seeked there, we need
2722 # to be careful before changing this.
2732 # to be careful before changing this.
2723 if self._writinghandles is None:
2733 if self._writinghandles is None:
2724 msg = b'adding revision outside `revlog._writing` context'
2734 msg = b'adding revision outside `revlog._writing` context'
2725 raise error.ProgrammingError(msg)
2735 raise error.ProgrammingError(msg)
2726 ifh, dfh, sdfh = self._writinghandles
2736 ifh, dfh, sdfh = self._writinghandles
2727 if self._docket is None:
2737 if self._docket is None:
2728 ifh.seek(0, os.SEEK_END)
2738 ifh.seek(0, os.SEEK_END)
2729 else:
2739 else:
2730 ifh.seek(self._docket.index_end, os.SEEK_SET)
2740 ifh.seek(self._docket.index_end, os.SEEK_SET)
2731 if dfh:
2741 if dfh:
2732 if self._docket is None:
2742 if self._docket is None:
2733 dfh.seek(0, os.SEEK_END)
2743 dfh.seek(0, os.SEEK_END)
2734 else:
2744 else:
2735 dfh.seek(self._docket.data_end, os.SEEK_SET)
2745 dfh.seek(self._docket.data_end, os.SEEK_SET)
2736 if sdfh:
2746 if sdfh:
2737 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2747 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2738
2748
2739 curr = len(self) - 1
2749 curr = len(self) - 1
2740 if not self._inline:
2750 if not self._inline:
2741 transaction.add(self._datafile, offset)
2751 transaction.add(self._datafile, offset)
2742 if self._sidedatafile:
2752 if self._sidedatafile:
2743 transaction.add(self._sidedatafile, sidedata_offset)
2753 transaction.add(self._sidedatafile, sidedata_offset)
2744 transaction.add(self._indexfile, curr * len(entry))
2754 transaction.add(self._indexfile, curr * len(entry))
2745 if data[0]:
2755 if data[0]:
2746 dfh.write(data[0])
2756 dfh.write(data[0])
2747 dfh.write(data[1])
2757 dfh.write(data[1])
2748 if sidedata:
2758 if sidedata:
2749 sdfh.write(sidedata)
2759 sdfh.write(sidedata)
2750 ifh.write(entry)
2760 ifh.write(entry)
2751 else:
2761 else:
2752 offset += curr * self.index.entry_size
2762 offset += curr * self.index.entry_size
2753 transaction.add(self._indexfile, offset)
2763 transaction.add(self._indexfile, offset)
2754 ifh.write(entry)
2764 ifh.write(entry)
2755 ifh.write(data[0])
2765 ifh.write(data[0])
2756 ifh.write(data[1])
2766 ifh.write(data[1])
2757 assert not sidedata
2767 assert not sidedata
2758 self._enforceinlinesize(transaction)
2768 self._enforceinlinesize(transaction)
2759 if self._docket is not None:
2769 if self._docket is not None:
2760 # revlog-v2 always has 3 writing handles, help Pytype
2770 # revlog-v2 always has 3 writing handles, help Pytype
2761 wh1 = self._writinghandles[0]
2771 wh1 = self._writinghandles[0]
2762 wh2 = self._writinghandles[1]
2772 wh2 = self._writinghandles[1]
2763 wh3 = self._writinghandles[2]
2773 wh3 = self._writinghandles[2]
2764 assert wh1 is not None
2774 assert wh1 is not None
2765 assert wh2 is not None
2775 assert wh2 is not None
2766 assert wh3 is not None
2776 assert wh3 is not None
2767 self._docket.index_end = wh1.tell()
2777 self._docket.index_end = wh1.tell()
2768 self._docket.data_end = wh2.tell()
2778 self._docket.data_end = wh2.tell()
2769 self._docket.sidedata_end = wh3.tell()
2779 self._docket.sidedata_end = wh3.tell()
2770
2780
2771 nodemaputil.setup_persistent_nodemap(transaction, self)
2781 nodemaputil.setup_persistent_nodemap(transaction, self)
2772
2782
2773 def addgroup(
2783 def addgroup(
2774 self,
2784 self,
2775 deltas,
2785 deltas,
2776 linkmapper,
2786 linkmapper,
2777 transaction,
2787 transaction,
2778 alwayscache=False,
2788 alwayscache=False,
2779 addrevisioncb=None,
2789 addrevisioncb=None,
2780 duplicaterevisioncb=None,
2790 duplicaterevisioncb=None,
2781 debug_info=None,
2791 debug_info=None,
2782 delta_base_reuse_policy=None,
2792 delta_base_reuse_policy=None,
2783 ):
2793 ):
2784 """
2794 """
2785 add a delta group
2795 add a delta group
2786
2796
2787 given a set of deltas, add them to the revision log. the
2797 given a set of deltas, add them to the revision log. the
2788 first delta is against its parent, which should be in our
2798 first delta is against its parent, which should be in our
2789 log, the rest are against the previous delta.
2799 log, the rest are against the previous delta.
2790
2800
2791 If ``addrevisioncb`` is defined, it will be called with arguments of
2801 If ``addrevisioncb`` is defined, it will be called with arguments of
2792 this revlog and the node that was added.
2802 this revlog and the node that was added.
2793 """
2803 """
2794
2804
2795 if self._adding_group:
2805 if self._adding_group:
2796 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2806 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2797
2807
2798 # read the default delta-base reuse policy from revlog config if the
2808 # read the default delta-base reuse policy from revlog config if the
2799 # group did not specify one.
2809 # group did not specify one.
2800 if delta_base_reuse_policy is None:
2810 if delta_base_reuse_policy is None:
2801 if self._generaldelta and self._lazydeltabase:
2811 if self._generaldelta and self._lazydeltabase:
2802 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2812 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2803 else:
2813 else:
2804 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2814 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2805
2815
2806 self._adding_group = True
2816 self._adding_group = True
2807 empty = True
2817 empty = True
2808 try:
2818 try:
2809 with self._writing(transaction):
2819 with self._writing(transaction):
2810 write_debug = None
2820 write_debug = None
2811 if self._debug_delta:
2821 if self._debug_delta:
2812 write_debug = transaction._report
2822 write_debug = transaction._report
2813 deltacomputer = deltautil.deltacomputer(
2823 deltacomputer = deltautil.deltacomputer(
2814 self,
2824 self,
2815 write_debug=write_debug,
2825 write_debug=write_debug,
2816 debug_info=debug_info,
2826 debug_info=debug_info,
2817 )
2827 )
2818 # loop through our set of deltas
2828 # loop through our set of deltas
2819 for data in deltas:
2829 for data in deltas:
2820 (
2830 (
2821 node,
2831 node,
2822 p1,
2832 p1,
2823 p2,
2833 p2,
2824 linknode,
2834 linknode,
2825 deltabase,
2835 deltabase,
2826 delta,
2836 delta,
2827 flags,
2837 flags,
2828 sidedata,
2838 sidedata,
2829 ) = data
2839 ) = data
2830 link = linkmapper(linknode)
2840 link = linkmapper(linknode)
2831 flags = flags or REVIDX_DEFAULT_FLAGS
2841 flags = flags or REVIDX_DEFAULT_FLAGS
2832
2842
2833 rev = self.index.get_rev(node)
2843 rev = self.index.get_rev(node)
2834 if rev is not None:
2844 if rev is not None:
2835 # this can happen if two branches make the same change
2845 # this can happen if two branches make the same change
2836 self._nodeduplicatecallback(transaction, rev)
2846 self._nodeduplicatecallback(transaction, rev)
2837 if duplicaterevisioncb:
2847 if duplicaterevisioncb:
2838 duplicaterevisioncb(self, rev)
2848 duplicaterevisioncb(self, rev)
2839 empty = False
2849 empty = False
2840 continue
2850 continue
2841
2851
2842 for p in (p1, p2):
2852 for p in (p1, p2):
2843 if not self.index.has_node(p):
2853 if not self.index.has_node(p):
2844 raise error.LookupError(
2854 raise error.LookupError(
2845 p, self.radix, _(b'unknown parent')
2855 p, self.radix, _(b'unknown parent')
2846 )
2856 )
2847
2857
2848 if not self.index.has_node(deltabase):
2858 if not self.index.has_node(deltabase):
2849 raise error.LookupError(
2859 raise error.LookupError(
2850 deltabase, self.display_id, _(b'unknown delta base')
2860 deltabase, self.display_id, _(b'unknown delta base')
2851 )
2861 )
2852
2862
2853 baserev = self.rev(deltabase)
2863 baserev = self.rev(deltabase)
2854
2864
2855 if baserev != nullrev and self.iscensored(baserev):
2865 if baserev != nullrev and self.iscensored(baserev):
2856 # if base is censored, delta must be full replacement in a
2866 # if base is censored, delta must be full replacement in a
2857 # single patch operation
2867 # single patch operation
2858 hlen = struct.calcsize(b">lll")
2868 hlen = struct.calcsize(b">lll")
2859 oldlen = self.rawsize(baserev)
2869 oldlen = self.rawsize(baserev)
2860 newlen = len(delta) - hlen
2870 newlen = len(delta) - hlen
2861 if delta[:hlen] != mdiff.replacediffheader(
2871 if delta[:hlen] != mdiff.replacediffheader(
2862 oldlen, newlen
2872 oldlen, newlen
2863 ):
2873 ):
2864 raise error.CensoredBaseError(
2874 raise error.CensoredBaseError(
2865 self.display_id, self.node(baserev)
2875 self.display_id, self.node(baserev)
2866 )
2876 )
2867
2877
2868 if not flags and self._peek_iscensored(baserev, delta):
2878 if not flags and self._peek_iscensored(baserev, delta):
2869 flags |= REVIDX_ISCENSORED
2879 flags |= REVIDX_ISCENSORED
2870
2880
2871 # We assume consumers of addrevisioncb will want to retrieve
2881 # We assume consumers of addrevisioncb will want to retrieve
2872 # the added revision, which will require a call to
2882 # the added revision, which will require a call to
2873 # revision(). revision() will fast path if there is a cache
2883 # revision(). revision() will fast path if there is a cache
2874 # hit. So, we tell _addrevision() to always cache in this case.
2884 # hit. So, we tell _addrevision() to always cache in this case.
2875 # We're only using addgroup() in the context of changegroup
2885 # We're only using addgroup() in the context of changegroup
2876 # generation so the revision data can always be handled as raw
2886 # generation so the revision data can always be handled as raw
2877 # by the flagprocessor.
2887 # by the flagprocessor.
2878 rev = self._addrevision(
2888 rev = self._addrevision(
2879 node,
2889 node,
2880 None,
2890 None,
2881 transaction,
2891 transaction,
2882 link,
2892 link,
2883 p1,
2893 p1,
2884 p2,
2894 p2,
2885 flags,
2895 flags,
2886 (baserev, delta, delta_base_reuse_policy),
2896 (baserev, delta, delta_base_reuse_policy),
2887 alwayscache=alwayscache,
2897 alwayscache=alwayscache,
2888 deltacomputer=deltacomputer,
2898 deltacomputer=deltacomputer,
2889 sidedata=sidedata,
2899 sidedata=sidedata,
2890 )
2900 )
2891
2901
2892 if addrevisioncb:
2902 if addrevisioncb:
2893 addrevisioncb(self, rev)
2903 addrevisioncb(self, rev)
2894 empty = False
2904 empty = False
2895 finally:
2905 finally:
2896 self._adding_group = False
2906 self._adding_group = False
2897 return not empty
2907 return not empty
2898
2908
2899 def iscensored(self, rev):
2909 def iscensored(self, rev):
2900 """Check if a file revision is censored."""
2910 """Check if a file revision is censored."""
2901 if not self._censorable:
2911 if not self._censorable:
2902 return False
2912 return False
2903
2913
2904 return self.flags(rev) & REVIDX_ISCENSORED
2914 return self.flags(rev) & REVIDX_ISCENSORED
2905
2915
2906 def _peek_iscensored(self, baserev, delta):
2916 def _peek_iscensored(self, baserev, delta):
2907 """Quickly check if a delta produces a censored revision."""
2917 """Quickly check if a delta produces a censored revision."""
2908 if not self._censorable:
2918 if not self._censorable:
2909 return False
2919 return False
2910
2920
2911 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2921 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2912
2922
2913 def getstrippoint(self, minlink):
2923 def getstrippoint(self, minlink):
2914 """find the minimum rev that must be stripped to strip the linkrev
2924 """find the minimum rev that must be stripped to strip the linkrev
2915
2925
2916 Returns a tuple containing the minimum rev and a set of all revs that
2926 Returns a tuple containing the minimum rev and a set of all revs that
2917 have linkrevs that will be broken by this strip.
2927 have linkrevs that will be broken by this strip.
2918 """
2928 """
2919 return storageutil.resolvestripinfo(
2929 return storageutil.resolvestripinfo(
2920 minlink,
2930 minlink,
2921 len(self) - 1,
2931 len(self) - 1,
2922 self.headrevs(),
2932 self.headrevs(),
2923 self.linkrev,
2933 self.linkrev,
2924 self.parentrevs,
2934 self.parentrevs,
2925 )
2935 )
2926
2936
2927 def strip(self, minlink, transaction):
2937 def strip(self, minlink, transaction):
2928 """truncate the revlog on the first revision with a linkrev >= minlink
2938 """truncate the revlog on the first revision with a linkrev >= minlink
2929
2939
2930 This function is called when we're stripping revision minlink and
2940 This function is called when we're stripping revision minlink and
2931 its descendants from the repository.
2941 its descendants from the repository.
2932
2942
2933 We have to remove all revisions with linkrev >= minlink, because
2943 We have to remove all revisions with linkrev >= minlink, because
2934 the equivalent changelog revisions will be renumbered after the
2944 the equivalent changelog revisions will be renumbered after the
2935 strip.
2945 strip.
2936
2946
2937 So we truncate the revlog on the first of these revisions, and
2947 So we truncate the revlog on the first of these revisions, and
2938 trust that the caller has saved the revisions that shouldn't be
2948 trust that the caller has saved the revisions that shouldn't be
2939 removed and that it'll re-add them after this truncation.
2949 removed and that it'll re-add them after this truncation.
2940 """
2950 """
2941 if len(self) == 0:
2951 if len(self) == 0:
2942 return
2952 return
2943
2953
2944 rev, _ = self.getstrippoint(minlink)
2954 rev, _ = self.getstrippoint(minlink)
2945 if rev == len(self):
2955 if rev == len(self):
2946 return
2956 return
2947
2957
2948 # first truncate the files on disk
2958 # first truncate the files on disk
2949 data_end = self.start(rev)
2959 data_end = self.start(rev)
2950 if not self._inline:
2960 if not self._inline:
2951 transaction.add(self._datafile, data_end)
2961 transaction.add(self._datafile, data_end)
2952 end = rev * self.index.entry_size
2962 end = rev * self.index.entry_size
2953 else:
2963 else:
2954 end = data_end + (rev * self.index.entry_size)
2964 end = data_end + (rev * self.index.entry_size)
2955
2965
2956 if self._sidedatafile:
2966 if self._sidedatafile:
2957 sidedata_end = self.sidedata_cut_off(rev)
2967 sidedata_end = self.sidedata_cut_off(rev)
2958 transaction.add(self._sidedatafile, sidedata_end)
2968 transaction.add(self._sidedatafile, sidedata_end)
2959
2969
2960 transaction.add(self._indexfile, end)
2970 transaction.add(self._indexfile, end)
2961 if self._docket is not None:
2971 if self._docket is not None:
2962 # XXX we could, leverage the docket while stripping. However it is
2972 # XXX we could, leverage the docket while stripping. However it is
2963 # not powerfull enough at the time of this comment
2973 # not powerfull enough at the time of this comment
2964 self._docket.index_end = end
2974 self._docket.index_end = end
2965 self._docket.data_end = data_end
2975 self._docket.data_end = data_end
2966 self._docket.sidedata_end = sidedata_end
2976 self._docket.sidedata_end = sidedata_end
2967 self._docket.write(transaction, stripping=True)
2977 self._docket.write(transaction, stripping=True)
2968
2978
2969 # then reset internal state in memory to forget those revisions
2979 # then reset internal state in memory to forget those revisions
2970 self._revisioncache = None
2980 self._revisioncache = None
2971 self._chaininfocache = util.lrucachedict(500)
2981 self._chaininfocache = util.lrucachedict(500)
2972 self._segmentfile.clear_cache()
2982 self._segmentfile.clear_cache()
2973 self._segmentfile_sidedata.clear_cache()
2983 self._segmentfile_sidedata.clear_cache()
2974
2984
2975 del self.index[rev:-1]
2985 del self.index[rev:-1]
2976
2986
2977 def checksize(self):
2987 def checksize(self):
2978 """Check size of index and data files
2988 """Check size of index and data files
2979
2989
2980 return a (dd, di) tuple.
2990 return a (dd, di) tuple.
2981 - dd: extra bytes for the "data" file
2991 - dd: extra bytes for the "data" file
2982 - di: extra bytes for the "index" file
2992 - di: extra bytes for the "index" file
2983
2993
2984 A healthy revlog will return (0, 0).
2994 A healthy revlog will return (0, 0).
2985 """
2995 """
2986 expected = 0
2996 expected = 0
2987 if len(self):
2997 if len(self):
2988 expected = max(0, self.end(len(self) - 1))
2998 expected = max(0, self.end(len(self) - 1))
2989
2999
2990 try:
3000 try:
2991 with self._datafp() as f:
3001 with self._datafp() as f:
2992 f.seek(0, io.SEEK_END)
3002 f.seek(0, io.SEEK_END)
2993 actual = f.tell()
3003 actual = f.tell()
2994 dd = actual - expected
3004 dd = actual - expected
2995 except FileNotFoundError:
3005 except FileNotFoundError:
2996 dd = 0
3006 dd = 0
2997
3007
2998 try:
3008 try:
2999 f = self.opener(self._indexfile)
3009 f = self.opener(self._indexfile)
3000 f.seek(0, io.SEEK_END)
3010 f.seek(0, io.SEEK_END)
3001 actual = f.tell()
3011 actual = f.tell()
3002 f.close()
3012 f.close()
3003 s = self.index.entry_size
3013 s = self.index.entry_size
3004 i = max(0, actual // s)
3014 i = max(0, actual // s)
3005 di = actual - (i * s)
3015 di = actual - (i * s)
3006 if self._inline:
3016 if self._inline:
3007 databytes = 0
3017 databytes = 0
3008 for r in self:
3018 for r in self:
3009 databytes += max(0, self.length(r))
3019 databytes += max(0, self.length(r))
3010 dd = 0
3020 dd = 0
3011 di = actual - len(self) * s - databytes
3021 di = actual - len(self) * s - databytes
3012 except FileNotFoundError:
3022 except FileNotFoundError:
3013 di = 0
3023 di = 0
3014
3024
3015 return (dd, di)
3025 return (dd, di)
3016
3026
3017 def files(self):
3027 def files(self):
3018 res = [self._indexfile]
3028 res = [self._indexfile]
3019 if self._docket_file is None:
3029 if self._docket_file is None:
3020 if not self._inline:
3030 if not self._inline:
3021 res.append(self._datafile)
3031 res.append(self._datafile)
3022 else:
3032 else:
3023 res.append(self._docket_file)
3033 res.append(self._docket_file)
3024 res.extend(self._docket.old_index_filepaths(include_empty=False))
3034 res.extend(self._docket.old_index_filepaths(include_empty=False))
3025 if self._docket.data_end:
3035 if self._docket.data_end:
3026 res.append(self._datafile)
3036 res.append(self._datafile)
3027 res.extend(self._docket.old_data_filepaths(include_empty=False))
3037 res.extend(self._docket.old_data_filepaths(include_empty=False))
3028 if self._docket.sidedata_end:
3038 if self._docket.sidedata_end:
3029 res.append(self._sidedatafile)
3039 res.append(self._sidedatafile)
3030 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3040 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3031 return res
3041 return res
3032
3042
3033 def emitrevisions(
3043 def emitrevisions(
3034 self,
3044 self,
3035 nodes,
3045 nodes,
3036 nodesorder=None,
3046 nodesorder=None,
3037 revisiondata=False,
3047 revisiondata=False,
3038 assumehaveparentrevisions=False,
3048 assumehaveparentrevisions=False,
3039 deltamode=repository.CG_DELTAMODE_STD,
3049 deltamode=repository.CG_DELTAMODE_STD,
3040 sidedata_helpers=None,
3050 sidedata_helpers=None,
3041 debug_info=None,
3051 debug_info=None,
3042 ):
3052 ):
3043 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3053 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3044 raise error.ProgrammingError(
3054 raise error.ProgrammingError(
3045 b'unhandled value for nodesorder: %s' % nodesorder
3055 b'unhandled value for nodesorder: %s' % nodesorder
3046 )
3056 )
3047
3057
3048 if nodesorder is None and not self._generaldelta:
3058 if nodesorder is None and not self._generaldelta:
3049 nodesorder = b'storage'
3059 nodesorder = b'storage'
3050
3060
3051 if (
3061 if (
3052 not self._storedeltachains
3062 not self._storedeltachains
3053 and deltamode != repository.CG_DELTAMODE_PREV
3063 and deltamode != repository.CG_DELTAMODE_PREV
3054 ):
3064 ):
3055 deltamode = repository.CG_DELTAMODE_FULL
3065 deltamode = repository.CG_DELTAMODE_FULL
3056
3066
3057 return storageutil.emitrevisions(
3067 return storageutil.emitrevisions(
3058 self,
3068 self,
3059 nodes,
3069 nodes,
3060 nodesorder,
3070 nodesorder,
3061 revlogrevisiondelta,
3071 revlogrevisiondelta,
3062 deltaparentfn=self.deltaparent,
3072 deltaparentfn=self.deltaparent,
3063 candeltafn=self.candelta,
3073 candeltafn=self.candelta,
3064 rawsizefn=self.rawsize,
3074 rawsizefn=self.rawsize,
3065 revdifffn=self.revdiff,
3075 revdifffn=self.revdiff,
3066 flagsfn=self.flags,
3076 flagsfn=self.flags,
3067 deltamode=deltamode,
3077 deltamode=deltamode,
3068 revisiondata=revisiondata,
3078 revisiondata=revisiondata,
3069 assumehaveparentrevisions=assumehaveparentrevisions,
3079 assumehaveparentrevisions=assumehaveparentrevisions,
3070 sidedata_helpers=sidedata_helpers,
3080 sidedata_helpers=sidedata_helpers,
3071 debug_info=debug_info,
3081 debug_info=debug_info,
3072 )
3082 )
3073
3083
3074 DELTAREUSEALWAYS = b'always'
3084 DELTAREUSEALWAYS = b'always'
3075 DELTAREUSESAMEREVS = b'samerevs'
3085 DELTAREUSESAMEREVS = b'samerevs'
3076 DELTAREUSENEVER = b'never'
3086 DELTAREUSENEVER = b'never'
3077
3087
3078 DELTAREUSEFULLADD = b'fulladd'
3088 DELTAREUSEFULLADD = b'fulladd'
3079
3089
3080 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3090 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3081
3091
3082 def clone(
3092 def clone(
3083 self,
3093 self,
3084 tr,
3094 tr,
3085 destrevlog,
3095 destrevlog,
3086 addrevisioncb=None,
3096 addrevisioncb=None,
3087 deltareuse=DELTAREUSESAMEREVS,
3097 deltareuse=DELTAREUSESAMEREVS,
3088 forcedeltabothparents=None,
3098 forcedeltabothparents=None,
3089 sidedata_helpers=None,
3099 sidedata_helpers=None,
3090 ):
3100 ):
3091 """Copy this revlog to another, possibly with format changes.
3101 """Copy this revlog to another, possibly with format changes.
3092
3102
3093 The destination revlog will contain the same revisions and nodes.
3103 The destination revlog will contain the same revisions and nodes.
3094 However, it may not be bit-for-bit identical due to e.g. delta encoding
3104 However, it may not be bit-for-bit identical due to e.g. delta encoding
3095 differences.
3105 differences.
3096
3106
3097 The ``deltareuse`` argument control how deltas from the existing revlog
3107 The ``deltareuse`` argument control how deltas from the existing revlog
3098 are preserved in the destination revlog. The argument can have the
3108 are preserved in the destination revlog. The argument can have the
3099 following values:
3109 following values:
3100
3110
3101 DELTAREUSEALWAYS
3111 DELTAREUSEALWAYS
3102 Deltas will always be reused (if possible), even if the destination
3112 Deltas will always be reused (if possible), even if the destination
3103 revlog would not select the same revisions for the delta. This is the
3113 revlog would not select the same revisions for the delta. This is the
3104 fastest mode of operation.
3114 fastest mode of operation.
3105 DELTAREUSESAMEREVS
3115 DELTAREUSESAMEREVS
3106 Deltas will be reused if the destination revlog would pick the same
3116 Deltas will be reused if the destination revlog would pick the same
3107 revisions for the delta. This mode strikes a balance between speed
3117 revisions for the delta. This mode strikes a balance between speed
3108 and optimization.
3118 and optimization.
3109 DELTAREUSENEVER
3119 DELTAREUSENEVER
3110 Deltas will never be reused. This is the slowest mode of execution.
3120 Deltas will never be reused. This is the slowest mode of execution.
3111 This mode can be used to recompute deltas (e.g. if the diff/delta
3121 This mode can be used to recompute deltas (e.g. if the diff/delta
3112 algorithm changes).
3122 algorithm changes).
3113 DELTAREUSEFULLADD
3123 DELTAREUSEFULLADD
3114 Revision will be re-added as if their were new content. This is
3124 Revision will be re-added as if their were new content. This is
3115 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3125 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3116 eg: large file detection and handling.
3126 eg: large file detection and handling.
3117
3127
3118 Delta computation can be slow, so the choice of delta reuse policy can
3128 Delta computation can be slow, so the choice of delta reuse policy can
3119 significantly affect run time.
3129 significantly affect run time.
3120
3130
3121 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3131 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3122 two extremes. Deltas will be reused if they are appropriate. But if the
3132 two extremes. Deltas will be reused if they are appropriate. But if the
3123 delta could choose a better revision, it will do so. This means if you
3133 delta could choose a better revision, it will do so. This means if you
3124 are converting a non-generaldelta revlog to a generaldelta revlog,
3134 are converting a non-generaldelta revlog to a generaldelta revlog,
3125 deltas will be recomputed if the delta's parent isn't a parent of the
3135 deltas will be recomputed if the delta's parent isn't a parent of the
3126 revision.
3136 revision.
3127
3137
3128 In addition to the delta policy, the ``forcedeltabothparents``
3138 In addition to the delta policy, the ``forcedeltabothparents``
3129 argument controls whether to force compute deltas against both parents
3139 argument controls whether to force compute deltas against both parents
3130 for merges. By default, the current default is used.
3140 for merges. By default, the current default is used.
3131
3141
3132 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3142 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3133 `sidedata_helpers`.
3143 `sidedata_helpers`.
3134 """
3144 """
3135 if deltareuse not in self.DELTAREUSEALL:
3145 if deltareuse not in self.DELTAREUSEALL:
3136 raise ValueError(
3146 raise ValueError(
3137 _(b'value for deltareuse invalid: %s') % deltareuse
3147 _(b'value for deltareuse invalid: %s') % deltareuse
3138 )
3148 )
3139
3149
3140 if len(destrevlog):
3150 if len(destrevlog):
3141 raise ValueError(_(b'destination revlog is not empty'))
3151 raise ValueError(_(b'destination revlog is not empty'))
3142
3152
3143 if getattr(self, 'filteredrevs', None):
3153 if getattr(self, 'filteredrevs', None):
3144 raise ValueError(_(b'source revlog has filtered revisions'))
3154 raise ValueError(_(b'source revlog has filtered revisions'))
3145 if getattr(destrevlog, 'filteredrevs', None):
3155 if getattr(destrevlog, 'filteredrevs', None):
3146 raise ValueError(_(b'destination revlog has filtered revisions'))
3156 raise ValueError(_(b'destination revlog has filtered revisions'))
3147
3157
3148 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3158 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3149 # if possible.
3159 # if possible.
3150 oldlazydelta = destrevlog._lazydelta
3160 oldlazydelta = destrevlog._lazydelta
3151 oldlazydeltabase = destrevlog._lazydeltabase
3161 oldlazydeltabase = destrevlog._lazydeltabase
3152 oldamd = destrevlog._deltabothparents
3162 oldamd = destrevlog._deltabothparents
3153
3163
3154 try:
3164 try:
3155 if deltareuse == self.DELTAREUSEALWAYS:
3165 if deltareuse == self.DELTAREUSEALWAYS:
3156 destrevlog._lazydeltabase = True
3166 destrevlog._lazydeltabase = True
3157 destrevlog._lazydelta = True
3167 destrevlog._lazydelta = True
3158 elif deltareuse == self.DELTAREUSESAMEREVS:
3168 elif deltareuse == self.DELTAREUSESAMEREVS:
3159 destrevlog._lazydeltabase = False
3169 destrevlog._lazydeltabase = False
3160 destrevlog._lazydelta = True
3170 destrevlog._lazydelta = True
3161 elif deltareuse == self.DELTAREUSENEVER:
3171 elif deltareuse == self.DELTAREUSENEVER:
3162 destrevlog._lazydeltabase = False
3172 destrevlog._lazydeltabase = False
3163 destrevlog._lazydelta = False
3173 destrevlog._lazydelta = False
3164
3174
3165 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3175 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3166
3176
3167 self._clone(
3177 self._clone(
3168 tr,
3178 tr,
3169 destrevlog,
3179 destrevlog,
3170 addrevisioncb,
3180 addrevisioncb,
3171 deltareuse,
3181 deltareuse,
3172 forcedeltabothparents,
3182 forcedeltabothparents,
3173 sidedata_helpers,
3183 sidedata_helpers,
3174 )
3184 )
3175
3185
3176 finally:
3186 finally:
3177 destrevlog._lazydelta = oldlazydelta
3187 destrevlog._lazydelta = oldlazydelta
3178 destrevlog._lazydeltabase = oldlazydeltabase
3188 destrevlog._lazydeltabase = oldlazydeltabase
3179 destrevlog._deltabothparents = oldamd
3189 destrevlog._deltabothparents = oldamd
3180
3190
3181 def _clone(
3191 def _clone(
3182 self,
3192 self,
3183 tr,
3193 tr,
3184 destrevlog,
3194 destrevlog,
3185 addrevisioncb,
3195 addrevisioncb,
3186 deltareuse,
3196 deltareuse,
3187 forcedeltabothparents,
3197 forcedeltabothparents,
3188 sidedata_helpers,
3198 sidedata_helpers,
3189 ):
3199 ):
3190 """perform the core duty of `revlog.clone` after parameter processing"""
3200 """perform the core duty of `revlog.clone` after parameter processing"""
3191 write_debug = None
3201 write_debug = None
3192 if self._debug_delta:
3202 if self._debug_delta:
3193 write_debug = tr._report
3203 write_debug = tr._report
3194 deltacomputer = deltautil.deltacomputer(
3204 deltacomputer = deltautil.deltacomputer(
3195 destrevlog,
3205 destrevlog,
3196 write_debug=write_debug,
3206 write_debug=write_debug,
3197 )
3207 )
3198 index = self.index
3208 index = self.index
3199 for rev in self:
3209 for rev in self:
3200 entry = index[rev]
3210 entry = index[rev]
3201
3211
3202 # Some classes override linkrev to take filtered revs into
3212 # Some classes override linkrev to take filtered revs into
3203 # account. Use raw entry from index.
3213 # account. Use raw entry from index.
3204 flags = entry[0] & 0xFFFF
3214 flags = entry[0] & 0xFFFF
3205 linkrev = entry[4]
3215 linkrev = entry[4]
3206 p1 = index[entry[5]][7]
3216 p1 = index[entry[5]][7]
3207 p2 = index[entry[6]][7]
3217 p2 = index[entry[6]][7]
3208 node = entry[7]
3218 node = entry[7]
3209
3219
3210 # (Possibly) reuse the delta from the revlog if allowed and
3220 # (Possibly) reuse the delta from the revlog if allowed and
3211 # the revlog chunk is a delta.
3221 # the revlog chunk is a delta.
3212 cachedelta = None
3222 cachedelta = None
3213 rawtext = None
3223 rawtext = None
3214 if deltareuse == self.DELTAREUSEFULLADD:
3224 if deltareuse == self.DELTAREUSEFULLADD:
3215 text = self._revisiondata(rev)
3225 text = self._revisiondata(rev)
3216 sidedata = self.sidedata(rev)
3226 sidedata = self.sidedata(rev)
3217
3227
3218 if sidedata_helpers is not None:
3228 if sidedata_helpers is not None:
3219 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3229 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3220 self, sidedata_helpers, sidedata, rev
3230 self, sidedata_helpers, sidedata, rev
3221 )
3231 )
3222 flags = flags | new_flags[0] & ~new_flags[1]
3232 flags = flags | new_flags[0] & ~new_flags[1]
3223
3233
3224 destrevlog.addrevision(
3234 destrevlog.addrevision(
3225 text,
3235 text,
3226 tr,
3236 tr,
3227 linkrev,
3237 linkrev,
3228 p1,
3238 p1,
3229 p2,
3239 p2,
3230 cachedelta=cachedelta,
3240 cachedelta=cachedelta,
3231 node=node,
3241 node=node,
3232 flags=flags,
3242 flags=flags,
3233 deltacomputer=deltacomputer,
3243 deltacomputer=deltacomputer,
3234 sidedata=sidedata,
3244 sidedata=sidedata,
3235 )
3245 )
3236 else:
3246 else:
3237 if destrevlog._lazydelta:
3247 if destrevlog._lazydelta:
3238 dp = self.deltaparent(rev)
3248 dp = self.deltaparent(rev)
3239 if dp != nullrev:
3249 if dp != nullrev:
3240 cachedelta = (dp, bytes(self._chunk(rev)))
3250 cachedelta = (dp, bytes(self._chunk(rev)))
3241
3251
3242 sidedata = None
3252 sidedata = None
3243 if not cachedelta:
3253 if not cachedelta:
3244 rawtext = self._revisiondata(rev)
3254 rawtext = self._revisiondata(rev)
3245 sidedata = self.sidedata(rev)
3255 sidedata = self.sidedata(rev)
3246 if sidedata is None:
3256 if sidedata is None:
3247 sidedata = self.sidedata(rev)
3257 sidedata = self.sidedata(rev)
3248
3258
3249 if sidedata_helpers is not None:
3259 if sidedata_helpers is not None:
3250 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3260 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3251 self, sidedata_helpers, sidedata, rev
3261 self, sidedata_helpers, sidedata, rev
3252 )
3262 )
3253 flags = flags | new_flags[0] & ~new_flags[1]
3263 flags = flags | new_flags[0] & ~new_flags[1]
3254
3264
3255 with destrevlog._writing(tr):
3265 with destrevlog._writing(tr):
3256 destrevlog._addrevision(
3266 destrevlog._addrevision(
3257 node,
3267 node,
3258 rawtext,
3268 rawtext,
3259 tr,
3269 tr,
3260 linkrev,
3270 linkrev,
3261 p1,
3271 p1,
3262 p2,
3272 p2,
3263 flags,
3273 flags,
3264 cachedelta,
3274 cachedelta,
3265 deltacomputer=deltacomputer,
3275 deltacomputer=deltacomputer,
3266 sidedata=sidedata,
3276 sidedata=sidedata,
3267 )
3277 )
3268
3278
3269 if addrevisioncb:
3279 if addrevisioncb:
3270 addrevisioncb(self, rev, node)
3280 addrevisioncb(self, rev, node)
3271
3281
3272 def censorrevision(self, tr, censornode, tombstone=b''):
3282 def censorrevision(self, tr, censornode, tombstone=b''):
3273 if self._format_version == REVLOGV0:
3283 if self._format_version == REVLOGV0:
3274 raise error.RevlogError(
3284 raise error.RevlogError(
3275 _(b'cannot censor with version %d revlogs')
3285 _(b'cannot censor with version %d revlogs')
3276 % self._format_version
3286 % self._format_version
3277 )
3287 )
3278 elif self._format_version == REVLOGV1:
3288 elif self._format_version == REVLOGV1:
3279 rewrite.v1_censor(self, tr, censornode, tombstone)
3289 rewrite.v1_censor(self, tr, censornode, tombstone)
3280 else:
3290 else:
3281 rewrite.v2_censor(self, tr, censornode, tombstone)
3291 rewrite.v2_censor(self, tr, censornode, tombstone)
3282
3292
3283 def verifyintegrity(self, state):
3293 def verifyintegrity(self, state):
3284 """Verifies the integrity of the revlog.
3294 """Verifies the integrity of the revlog.
3285
3295
3286 Yields ``revlogproblem`` instances describing problems that are
3296 Yields ``revlogproblem`` instances describing problems that are
3287 found.
3297 found.
3288 """
3298 """
3289 dd, di = self.checksize()
3299 dd, di = self.checksize()
3290 if dd:
3300 if dd:
3291 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3301 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3292 if di:
3302 if di:
3293 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3303 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3294
3304
3295 version = self._format_version
3305 version = self._format_version
3296
3306
3297 # The verifier tells us what version revlog we should be.
3307 # The verifier tells us what version revlog we should be.
3298 if version != state[b'expectedversion']:
3308 if version != state[b'expectedversion']:
3299 yield revlogproblem(
3309 yield revlogproblem(
3300 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3310 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3301 % (self.display_id, version, state[b'expectedversion'])
3311 % (self.display_id, version, state[b'expectedversion'])
3302 )
3312 )
3303
3313
3304 state[b'skipread'] = set()
3314 state[b'skipread'] = set()
3305 state[b'safe_renamed'] = set()
3315 state[b'safe_renamed'] = set()
3306
3316
3307 for rev in self:
3317 for rev in self:
3308 node = self.node(rev)
3318 node = self.node(rev)
3309
3319
3310 # Verify contents. 4 cases to care about:
3320 # Verify contents. 4 cases to care about:
3311 #
3321 #
3312 # common: the most common case
3322 # common: the most common case
3313 # rename: with a rename
3323 # rename: with a rename
3314 # meta: file content starts with b'\1\n', the metadata
3324 # meta: file content starts with b'\1\n', the metadata
3315 # header defined in filelog.py, but without a rename
3325 # header defined in filelog.py, but without a rename
3316 # ext: content stored externally
3326 # ext: content stored externally
3317 #
3327 #
3318 # More formally, their differences are shown below:
3328 # More formally, their differences are shown below:
3319 #
3329 #
3320 # | common | rename | meta | ext
3330 # | common | rename | meta | ext
3321 # -------------------------------------------------------
3331 # -------------------------------------------------------
3322 # flags() | 0 | 0 | 0 | not 0
3332 # flags() | 0 | 0 | 0 | not 0
3323 # renamed() | False | True | False | ?
3333 # renamed() | False | True | False | ?
3324 # rawtext[0:2]=='\1\n'| False | True | True | ?
3334 # rawtext[0:2]=='\1\n'| False | True | True | ?
3325 #
3335 #
3326 # "rawtext" means the raw text stored in revlog data, which
3336 # "rawtext" means the raw text stored in revlog data, which
3327 # could be retrieved by "rawdata(rev)". "text"
3337 # could be retrieved by "rawdata(rev)". "text"
3328 # mentioned below is "revision(rev)".
3338 # mentioned below is "revision(rev)".
3329 #
3339 #
3330 # There are 3 different lengths stored physically:
3340 # There are 3 different lengths stored physically:
3331 # 1. L1: rawsize, stored in revlog index
3341 # 1. L1: rawsize, stored in revlog index
3332 # 2. L2: len(rawtext), stored in revlog data
3342 # 2. L2: len(rawtext), stored in revlog data
3333 # 3. L3: len(text), stored in revlog data if flags==0, or
3343 # 3. L3: len(text), stored in revlog data if flags==0, or
3334 # possibly somewhere else if flags!=0
3344 # possibly somewhere else if flags!=0
3335 #
3345 #
3336 # L1 should be equal to L2. L3 could be different from them.
3346 # L1 should be equal to L2. L3 could be different from them.
3337 # "text" may or may not affect commit hash depending on flag
3347 # "text" may or may not affect commit hash depending on flag
3338 # processors (see flagutil.addflagprocessor).
3348 # processors (see flagutil.addflagprocessor).
3339 #
3349 #
3340 # | common | rename | meta | ext
3350 # | common | rename | meta | ext
3341 # -------------------------------------------------
3351 # -------------------------------------------------
3342 # rawsize() | L1 | L1 | L1 | L1
3352 # rawsize() | L1 | L1 | L1 | L1
3343 # size() | L1 | L2-LM | L1(*) | L1 (?)
3353 # size() | L1 | L2-LM | L1(*) | L1 (?)
3344 # len(rawtext) | L2 | L2 | L2 | L2
3354 # len(rawtext) | L2 | L2 | L2 | L2
3345 # len(text) | L2 | L2 | L2 | L3
3355 # len(text) | L2 | L2 | L2 | L3
3346 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3356 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3347 #
3357 #
3348 # LM: length of metadata, depending on rawtext
3358 # LM: length of metadata, depending on rawtext
3349 # (*): not ideal, see comment in filelog.size
3359 # (*): not ideal, see comment in filelog.size
3350 # (?): could be "- len(meta)" if the resolved content has
3360 # (?): could be "- len(meta)" if the resolved content has
3351 # rename metadata
3361 # rename metadata
3352 #
3362 #
3353 # Checks needed to be done:
3363 # Checks needed to be done:
3354 # 1. length check: L1 == L2, in all cases.
3364 # 1. length check: L1 == L2, in all cases.
3355 # 2. hash check: depending on flag processor, we may need to
3365 # 2. hash check: depending on flag processor, we may need to
3356 # use either "text" (external), or "rawtext" (in revlog).
3366 # use either "text" (external), or "rawtext" (in revlog).
3357
3367
3358 try:
3368 try:
3359 skipflags = state.get(b'skipflags', 0)
3369 skipflags = state.get(b'skipflags', 0)
3360 if skipflags:
3370 if skipflags:
3361 skipflags &= self.flags(rev)
3371 skipflags &= self.flags(rev)
3362
3372
3363 _verify_revision(self, skipflags, state, node)
3373 _verify_revision(self, skipflags, state, node)
3364
3374
3365 l1 = self.rawsize(rev)
3375 l1 = self.rawsize(rev)
3366 l2 = len(self.rawdata(node))
3376 l2 = len(self.rawdata(node))
3367
3377
3368 if l1 != l2:
3378 if l1 != l2:
3369 yield revlogproblem(
3379 yield revlogproblem(
3370 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3380 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3371 node=node,
3381 node=node,
3372 )
3382 )
3373
3383
3374 except error.CensoredNodeError:
3384 except error.CensoredNodeError:
3375 if state[b'erroroncensored']:
3385 if state[b'erroroncensored']:
3376 yield revlogproblem(
3386 yield revlogproblem(
3377 error=_(b'censored file data'), node=node
3387 error=_(b'censored file data'), node=node
3378 )
3388 )
3379 state[b'skipread'].add(node)
3389 state[b'skipread'].add(node)
3380 except Exception as e:
3390 except Exception as e:
3381 yield revlogproblem(
3391 yield revlogproblem(
3382 error=_(b'unpacking %s: %s')
3392 error=_(b'unpacking %s: %s')
3383 % (short(node), stringutil.forcebytestr(e)),
3393 % (short(node), stringutil.forcebytestr(e)),
3384 node=node,
3394 node=node,
3385 )
3395 )
3386 state[b'skipread'].add(node)
3396 state[b'skipread'].add(node)
3387
3397
3388 def storageinfo(
3398 def storageinfo(
3389 self,
3399 self,
3390 exclusivefiles=False,
3400 exclusivefiles=False,
3391 sharedfiles=False,
3401 sharedfiles=False,
3392 revisionscount=False,
3402 revisionscount=False,
3393 trackedsize=False,
3403 trackedsize=False,
3394 storedsize=False,
3404 storedsize=False,
3395 ):
3405 ):
3396 d = {}
3406 d = {}
3397
3407
3398 if exclusivefiles:
3408 if exclusivefiles:
3399 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3409 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3400 if not self._inline:
3410 if not self._inline:
3401 d[b'exclusivefiles'].append((self.opener, self._datafile))
3411 d[b'exclusivefiles'].append((self.opener, self._datafile))
3402
3412
3403 if sharedfiles:
3413 if sharedfiles:
3404 d[b'sharedfiles'] = []
3414 d[b'sharedfiles'] = []
3405
3415
3406 if revisionscount:
3416 if revisionscount:
3407 d[b'revisionscount'] = len(self)
3417 d[b'revisionscount'] = len(self)
3408
3418
3409 if trackedsize:
3419 if trackedsize:
3410 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3420 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3411
3421
3412 if storedsize:
3422 if storedsize:
3413 d[b'storedsize'] = sum(
3423 d[b'storedsize'] = sum(
3414 self.opener.stat(path).st_size for path in self.files()
3424 self.opener.stat(path).st_size for path in self.files()
3415 )
3425 )
3416
3426
3417 return d
3427 return d
3418
3428
3419 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3429 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3420 if not self.hassidedata:
3430 if not self.hassidedata:
3421 return
3431 return
3422 # revlog formats with sidedata support does not support inline
3432 # revlog formats with sidedata support does not support inline
3423 assert not self._inline
3433 assert not self._inline
3424 if not helpers[1] and not helpers[2]:
3434 if not helpers[1] and not helpers[2]:
3425 # Nothing to generate or remove
3435 # Nothing to generate or remove
3426 return
3436 return
3427
3437
3428 new_entries = []
3438 new_entries = []
3429 # append the new sidedata
3439 # append the new sidedata
3430 with self._writing(transaction):
3440 with self._writing(transaction):
3431 ifh, dfh, sdfh = self._writinghandles
3441 ifh, dfh, sdfh = self._writinghandles
3432 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3442 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3433
3443
3434 current_offset = sdfh.tell()
3444 current_offset = sdfh.tell()
3435 for rev in range(startrev, endrev + 1):
3445 for rev in range(startrev, endrev + 1):
3436 entry = self.index[rev]
3446 entry = self.index[rev]
3437 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3447 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3438 store=self,
3448 store=self,
3439 sidedata_helpers=helpers,
3449 sidedata_helpers=helpers,
3440 sidedata={},
3450 sidedata={},
3441 rev=rev,
3451 rev=rev,
3442 )
3452 )
3443
3453
3444 serialized_sidedata = sidedatautil.serialize_sidedata(
3454 serialized_sidedata = sidedatautil.serialize_sidedata(
3445 new_sidedata
3455 new_sidedata
3446 )
3456 )
3447
3457
3448 sidedata_compression_mode = COMP_MODE_INLINE
3458 sidedata_compression_mode = COMP_MODE_INLINE
3449 if serialized_sidedata and self.hassidedata:
3459 if serialized_sidedata and self.hassidedata:
3450 sidedata_compression_mode = COMP_MODE_PLAIN
3460 sidedata_compression_mode = COMP_MODE_PLAIN
3451 h, comp_sidedata = self.compress(serialized_sidedata)
3461 h, comp_sidedata = self.compress(serialized_sidedata)
3452 if (
3462 if (
3453 h != b'u'
3463 h != b'u'
3454 and comp_sidedata[0] != b'\0'
3464 and comp_sidedata[0] != b'\0'
3455 and len(comp_sidedata) < len(serialized_sidedata)
3465 and len(comp_sidedata) < len(serialized_sidedata)
3456 ):
3466 ):
3457 assert not h
3467 assert not h
3458 if (
3468 if (
3459 comp_sidedata[0]
3469 comp_sidedata[0]
3460 == self._docket.default_compression_header
3470 == self._docket.default_compression_header
3461 ):
3471 ):
3462 sidedata_compression_mode = COMP_MODE_DEFAULT
3472 sidedata_compression_mode = COMP_MODE_DEFAULT
3463 serialized_sidedata = comp_sidedata
3473 serialized_sidedata = comp_sidedata
3464 else:
3474 else:
3465 sidedata_compression_mode = COMP_MODE_INLINE
3475 sidedata_compression_mode = COMP_MODE_INLINE
3466 serialized_sidedata = comp_sidedata
3476 serialized_sidedata = comp_sidedata
3467 if entry[8] != 0 or entry[9] != 0:
3477 if entry[8] != 0 or entry[9] != 0:
3468 # rewriting entries that already have sidedata is not
3478 # rewriting entries that already have sidedata is not
3469 # supported yet, because it introduces garbage data in the
3479 # supported yet, because it introduces garbage data in the
3470 # revlog.
3480 # revlog.
3471 msg = b"rewriting existing sidedata is not supported yet"
3481 msg = b"rewriting existing sidedata is not supported yet"
3472 raise error.Abort(msg)
3482 raise error.Abort(msg)
3473
3483
3474 # Apply (potential) flags to add and to remove after running
3484 # Apply (potential) flags to add and to remove after running
3475 # the sidedata helpers
3485 # the sidedata helpers
3476 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3486 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3477 entry_update = (
3487 entry_update = (
3478 current_offset,
3488 current_offset,
3479 len(serialized_sidedata),
3489 len(serialized_sidedata),
3480 new_offset_flags,
3490 new_offset_flags,
3481 sidedata_compression_mode,
3491 sidedata_compression_mode,
3482 )
3492 )
3483
3493
3484 # the sidedata computation might have move the file cursors around
3494 # the sidedata computation might have move the file cursors around
3485 sdfh.seek(current_offset, os.SEEK_SET)
3495 sdfh.seek(current_offset, os.SEEK_SET)
3486 sdfh.write(serialized_sidedata)
3496 sdfh.write(serialized_sidedata)
3487 new_entries.append(entry_update)
3497 new_entries.append(entry_update)
3488 current_offset += len(serialized_sidedata)
3498 current_offset += len(serialized_sidedata)
3489 self._docket.sidedata_end = sdfh.tell()
3499 self._docket.sidedata_end = sdfh.tell()
3490
3500
3491 # rewrite the new index entries
3501 # rewrite the new index entries
3492 ifh.seek(startrev * self.index.entry_size)
3502 ifh.seek(startrev * self.index.entry_size)
3493 for i, e in enumerate(new_entries):
3503 for i, e in enumerate(new_entries):
3494 rev = startrev + i
3504 rev = startrev + i
3495 self.index.replace_sidedata_info(rev, *e)
3505 self.index.replace_sidedata_info(rev, *e)
3496 packed = self.index.entry_binary(rev)
3506 packed = self.index.entry_binary(rev)
3497 if rev == 0 and self._docket is None:
3507 if rev == 0 and self._docket is None:
3498 header = self._format_flags | self._format_version
3508 header = self._format_flags | self._format_version
3499 header = self.index.pack_header(header)
3509 header = self.index.pack_header(header)
3500 packed = header + packed
3510 packed = header + packed
3501 ifh.write(packed)
3511 ifh.write(packed)
@@ -1,1200 +1,1230 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator, List
13 from typing import Generator, List
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from .revlogutils.constants import (
20 INDEX_HEADER,
21 )
19 from . import (
22 from . import (
20 changelog,
23 changelog,
21 error,
24 error,
22 filelog,
25 filelog,
23 manifest,
26 manifest,
24 policy,
27 policy,
25 pycompat,
28 pycompat,
29 revlog as revlogmod,
26 util,
30 util,
27 vfs as vfsmod,
31 vfs as vfsmod,
28 )
32 )
29 from .utils import hashutil
33 from .utils import hashutil
30
34
31 parsers = policy.importmod('parsers')
35 parsers = policy.importmod('parsers')
32 # how much bytes should be read from fncache in one read
36 # how much bytes should be read from fncache in one read
33 # It is done to prevent loading large fncache files into memory
37 # It is done to prevent loading large fncache files into memory
34 fncache_chunksize = 10 ** 6
38 fncache_chunksize = 10 ** 6
35
39
36
40
37 def _match_tracked_entry(entry, matcher):
41 def _match_tracked_entry(entry, matcher):
38 """parses a fncache entry and returns whether the entry is tracking a path
42 """parses a fncache entry and returns whether the entry is tracking a path
39 matched by matcher or not.
43 matched by matcher or not.
40
44
41 If matcher is None, returns True"""
45 If matcher is None, returns True"""
42
46
43 if matcher is None:
47 if matcher is None:
44 return True
48 return True
45 if entry.is_filelog:
49 if entry.is_filelog:
46 return matcher(entry.target_id)
50 return matcher(entry.target_id)
47 elif entry.is_manifestlog:
51 elif entry.is_manifestlog:
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
52 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
53 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50
54
51
55
52 # This avoids a collision between a file named foo and a dir named
56 # This avoids a collision between a file named foo and a dir named
53 # foo.i or foo.d
57 # foo.i or foo.d
54 def _encodedir(path):
58 def _encodedir(path):
55 """
59 """
56 >>> _encodedir(b'data/foo.i')
60 >>> _encodedir(b'data/foo.i')
57 'data/foo.i'
61 'data/foo.i'
58 >>> _encodedir(b'data/foo.i/bla.i')
62 >>> _encodedir(b'data/foo.i/bla.i')
59 'data/foo.i.hg/bla.i'
63 'data/foo.i.hg/bla.i'
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
64 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 'data/foo.i.hg.hg/bla.i'
65 'data/foo.i.hg.hg/bla.i'
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
66 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
67 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 """
68 """
65 return (
69 return (
66 path.replace(b".hg/", b".hg.hg/")
70 path.replace(b".hg/", b".hg.hg/")
67 .replace(b".i/", b".i.hg/")
71 .replace(b".i/", b".i.hg/")
68 .replace(b".d/", b".d.hg/")
72 .replace(b".d/", b".d.hg/")
69 )
73 )
70
74
71
75
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
76 encodedir = getattr(parsers, 'encodedir', _encodedir)
73
77
74
78
75 def decodedir(path):
79 def decodedir(path):
76 """
80 """
77 >>> decodedir(b'data/foo.i')
81 >>> decodedir(b'data/foo.i')
78 'data/foo.i'
82 'data/foo.i'
79 >>> decodedir(b'data/foo.i.hg/bla.i')
83 >>> decodedir(b'data/foo.i.hg/bla.i')
80 'data/foo.i/bla.i'
84 'data/foo.i/bla.i'
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
85 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 'data/foo.i.hg/bla.i'
86 'data/foo.i.hg/bla.i'
83 """
87 """
84 if b".hg/" not in path:
88 if b".hg/" not in path:
85 return path
89 return path
86 return (
90 return (
87 path.replace(b".d.hg/", b".d/")
91 path.replace(b".d.hg/", b".d/")
88 .replace(b".i.hg/", b".i/")
92 .replace(b".i.hg/", b".i/")
89 .replace(b".hg.hg/", b".hg/")
93 .replace(b".hg.hg/", b".hg/")
90 )
94 )
91
95
92
96
93 def _reserved():
97 def _reserved():
94 """characters that are problematic for filesystems
98 """characters that are problematic for filesystems
95
99
96 * ascii escapes (0..31)
100 * ascii escapes (0..31)
97 * ascii hi (126..255)
101 * ascii hi (126..255)
98 * windows specials
102 * windows specials
99
103
100 these characters will be escaped by encodefunctions
104 these characters will be escaped by encodefunctions
101 """
105 """
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
106 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 for x in range(32):
107 for x in range(32):
104 yield x
108 yield x
105 for x in range(126, 256):
109 for x in range(126, 256):
106 yield x
110 yield x
107 for x in winreserved:
111 for x in winreserved:
108 yield x
112 yield x
109
113
110
114
111 def _buildencodefun():
115 def _buildencodefun():
112 """
116 """
113 >>> enc, dec = _buildencodefun()
117 >>> enc, dec = _buildencodefun()
114
118
115 >>> enc(b'nothing/special.txt')
119 >>> enc(b'nothing/special.txt')
116 'nothing/special.txt'
120 'nothing/special.txt'
117 >>> dec(b'nothing/special.txt')
121 >>> dec(b'nothing/special.txt')
118 'nothing/special.txt'
122 'nothing/special.txt'
119
123
120 >>> enc(b'HELLO')
124 >>> enc(b'HELLO')
121 '_h_e_l_l_o'
125 '_h_e_l_l_o'
122 >>> dec(b'_h_e_l_l_o')
126 >>> dec(b'_h_e_l_l_o')
123 'HELLO'
127 'HELLO'
124
128
125 >>> enc(b'hello:world?')
129 >>> enc(b'hello:world?')
126 'hello~3aworld~3f'
130 'hello~3aworld~3f'
127 >>> dec(b'hello~3aworld~3f')
131 >>> dec(b'hello~3aworld~3f')
128 'hello:world?'
132 'hello:world?'
129
133
130 >>> enc(b'the\\x07quick\\xADshot')
134 >>> enc(b'the\\x07quick\\xADshot')
131 'the~07quick~adshot'
135 'the~07quick~adshot'
132 >>> dec(b'the~07quick~adshot')
136 >>> dec(b'the~07quick~adshot')
133 'the\\x07quick\\xadshot'
137 'the\\x07quick\\xadshot'
134 """
138 """
135 e = b'_'
139 e = b'_'
136 xchr = pycompat.bytechr
140 xchr = pycompat.bytechr
137 asciistr = list(map(xchr, range(127)))
141 asciistr = list(map(xchr, range(127)))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
142 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139
143
140 cmap = {x: x for x in asciistr}
144 cmap = {x: x for x in asciistr}
141 for x in _reserved():
145 for x in _reserved():
142 cmap[xchr(x)] = b"~%02x" % x
146 cmap[xchr(x)] = b"~%02x" % x
143 for x in capitals + [ord(e)]:
147 for x in capitals + [ord(e)]:
144 cmap[xchr(x)] = e + xchr(x).lower()
148 cmap[xchr(x)] = e + xchr(x).lower()
145
149
146 dmap = {}
150 dmap = {}
147 for k, v in cmap.items():
151 for k, v in cmap.items():
148 dmap[v] = k
152 dmap[v] = k
149
153
150 def decode(s):
154 def decode(s):
151 i = 0
155 i = 0
152 while i < len(s):
156 while i < len(s):
153 for l in range(1, 4):
157 for l in range(1, 4):
154 try:
158 try:
155 yield dmap[s[i : i + l]]
159 yield dmap[s[i : i + l]]
156 i += l
160 i += l
157 break
161 break
158 except KeyError:
162 except KeyError:
159 pass
163 pass
160 else:
164 else:
161 raise KeyError
165 raise KeyError
162
166
163 return (
167 return (
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
168 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join(list(decode(s))),
169 lambda s: b''.join(list(decode(s))),
166 )
170 )
167
171
168
172
169 _encodefname, _decodefname = _buildencodefun()
173 _encodefname, _decodefname = _buildencodefun()
170
174
171
175
172 def encodefilename(s):
176 def encodefilename(s):
173 """
177 """
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
178 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
179 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 """
180 """
177 return _encodefname(encodedir(s))
181 return _encodefname(encodedir(s))
178
182
179
183
180 def decodefilename(s):
184 def decodefilename(s):
181 """
185 """
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
186 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
187 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 """
188 """
185 return decodedir(_decodefname(s))
189 return decodedir(_decodefname(s))
186
190
187
191
188 def _buildlowerencodefun():
192 def _buildlowerencodefun():
189 """
193 """
190 >>> f = _buildlowerencodefun()
194 >>> f = _buildlowerencodefun()
191 >>> f(b'nothing/special.txt')
195 >>> f(b'nothing/special.txt')
192 'nothing/special.txt'
196 'nothing/special.txt'
193 >>> f(b'HELLO')
197 >>> f(b'HELLO')
194 'hello'
198 'hello'
195 >>> f(b'hello:world?')
199 >>> f(b'hello:world?')
196 'hello~3aworld~3f'
200 'hello~3aworld~3f'
197 >>> f(b'the\\x07quick\\xADshot')
201 >>> f(b'the\\x07quick\\xADshot')
198 'the~07quick~adshot'
202 'the~07quick~adshot'
199 """
203 """
200 xchr = pycompat.bytechr
204 xchr = pycompat.bytechr
201 cmap = {xchr(x): xchr(x) for x in range(127)}
205 cmap = {xchr(x): xchr(x) for x in range(127)}
202 for x in _reserved():
206 for x in _reserved():
203 cmap[xchr(x)] = b"~%02x" % x
207 cmap[xchr(x)] = b"~%02x" % x
204 for x in range(ord(b"A"), ord(b"Z") + 1):
208 for x in range(ord(b"A"), ord(b"Z") + 1):
205 cmap[xchr(x)] = xchr(x).lower()
209 cmap[xchr(x)] = xchr(x).lower()
206
210
207 def lowerencode(s):
211 def lowerencode(s):
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
212 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209
213
210 return lowerencode
214 return lowerencode
211
215
212
216
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
217 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214
218
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
219 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
220 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
221 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218
222
219
223
220 def _auxencode(path, dotencode):
224 def _auxencode(path, dotencode):
221 """
225 """
222 Encodes filenames containing names reserved by Windows or which end in
226 Encodes filenames containing names reserved by Windows or which end in
223 period or space. Does not touch other single reserved characters c.
227 period or space. Does not touch other single reserved characters c.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
228 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Additionally encodes space or period at the beginning, if dotencode is
229 Additionally encodes space or period at the beginning, if dotencode is
226 True. Parameter path is assumed to be all lowercase.
230 True. Parameter path is assumed to be all lowercase.
227 A segment only needs encoding if a reserved name appears as a
231 A segment only needs encoding if a reserved name appears as a
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
232 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 doesn't need encoding.
233 doesn't need encoding.
230
234
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
235 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> _auxencode(s.split(b'/'), True)
236 >>> _auxencode(s.split(b'/'), True)
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
237 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
238 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> _auxencode(s.split(b'/'), False)
239 >>> _auxencode(s.split(b'/'), False)
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
240 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 >>> _auxencode([b'foo. '], True)
241 >>> _auxencode([b'foo. '], True)
238 ['foo.~20']
242 ['foo.~20']
239 >>> _auxencode([b' .foo'], True)
243 >>> _auxencode([b' .foo'], True)
240 ['~20.foo']
244 ['~20.foo']
241 """
245 """
242 for i, n in enumerate(path):
246 for i, n in enumerate(path):
243 if not n:
247 if not n:
244 continue
248 continue
245 if dotencode and n[0] in b'. ':
249 if dotencode and n[0] in b'. ':
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
250 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 path[i] = n
251 path[i] = n
248 else:
252 else:
249 l = n.find(b'.')
253 l = n.find(b'.')
250 if l == -1:
254 if l == -1:
251 l = len(n)
255 l = len(n)
252 if (l == 3 and n[:3] in _winres3) or (
256 if (l == 3 and n[:3] in _winres3) or (
253 l == 4
257 l == 4
254 and n[3:4] <= b'9'
258 and n[3:4] <= b'9'
255 and n[3:4] >= b'1'
259 and n[3:4] >= b'1'
256 and n[:3] in _winres4
260 and n[:3] in _winres4
257 ):
261 ):
258 # encode third letter ('aux' -> 'au~78')
262 # encode third letter ('aux' -> 'au~78')
259 ec = b"~%02x" % ord(n[2:3])
263 ec = b"~%02x" % ord(n[2:3])
260 n = n[0:2] + ec + n[3:]
264 n = n[0:2] + ec + n[3:]
261 path[i] = n
265 path[i] = n
262 if n[-1] in b'. ':
266 if n[-1] in b'. ':
263 # encode last period or space ('foo...' -> 'foo..~2e')
267 # encode last period or space ('foo...' -> 'foo..~2e')
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
268 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 return path
269 return path
266
270
267
271
268 _maxstorepathlen = 120
272 _maxstorepathlen = 120
269 _dirprefixlen = 8
273 _dirprefixlen = 8
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
274 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271
275
272
276
273 def _hashencode(path, dotencode):
277 def _hashencode(path, dotencode):
274 digest = hex(hashutil.sha1(path).digest())
278 digest = hex(hashutil.sha1(path).digest())
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
279 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 parts = _auxencode(le, dotencode)
280 parts = _auxencode(le, dotencode)
277 basename = parts[-1]
281 basename = parts[-1]
278 _root, ext = os.path.splitext(basename)
282 _root, ext = os.path.splitext(basename)
279 sdirs = []
283 sdirs = []
280 sdirslen = 0
284 sdirslen = 0
281 for p in parts[:-1]:
285 for p in parts[:-1]:
282 d = p[:_dirprefixlen]
286 d = p[:_dirprefixlen]
283 if d[-1] in b'. ':
287 if d[-1] in b'. ':
284 # Windows can't access dirs ending in period or space
288 # Windows can't access dirs ending in period or space
285 d = d[:-1] + b'_'
289 d = d[:-1] + b'_'
286 if sdirslen == 0:
290 if sdirslen == 0:
287 t = len(d)
291 t = len(d)
288 else:
292 else:
289 t = sdirslen + 1 + len(d)
293 t = sdirslen + 1 + len(d)
290 if t > _maxshortdirslen:
294 if t > _maxshortdirslen:
291 break
295 break
292 sdirs.append(d)
296 sdirs.append(d)
293 sdirslen = t
297 sdirslen = t
294 dirs = b'/'.join(sdirs)
298 dirs = b'/'.join(sdirs)
295 if len(dirs) > 0:
299 if len(dirs) > 0:
296 dirs += b'/'
300 dirs += b'/'
297 res = b'dh/' + dirs + digest + ext
301 res = b'dh/' + dirs + digest + ext
298 spaceleft = _maxstorepathlen - len(res)
302 spaceleft = _maxstorepathlen - len(res)
299 if spaceleft > 0:
303 if spaceleft > 0:
300 filler = basename[:spaceleft]
304 filler = basename[:spaceleft]
301 res = b'dh/' + dirs + filler + digest + ext
305 res = b'dh/' + dirs + filler + digest + ext
302 return res
306 return res
303
307
304
308
305 def _hybridencode(path, dotencode):
309 def _hybridencode(path, dotencode):
306 """encodes path with a length limit
310 """encodes path with a length limit
307
311
308 Encodes all paths that begin with 'data/', according to the following.
312 Encodes all paths that begin with 'data/', according to the following.
309
313
310 Default encoding (reversible):
314 Default encoding (reversible):
311
315
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
316 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 characters are encoded as '~xx', where xx is the two digit hex code
317 characters are encoded as '~xx', where xx is the two digit hex code
314 of the character (see encodefilename).
318 of the character (see encodefilename).
315 Relevant path components consisting of Windows reserved filenames are
319 Relevant path components consisting of Windows reserved filenames are
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
320 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317
321
318 Hashed encoding (not reversible):
322 Hashed encoding (not reversible):
319
323
320 If the default-encoded path is longer than _maxstorepathlen, a
324 If the default-encoded path is longer than _maxstorepathlen, a
321 non-reversible hybrid hashing of the path is done instead.
325 non-reversible hybrid hashing of the path is done instead.
322 This encoding uses up to _dirprefixlen characters of all directory
326 This encoding uses up to _dirprefixlen characters of all directory
323 levels of the lowerencoded path, but not more levels than can fit into
327 levels of the lowerencoded path, but not more levels than can fit into
324 _maxshortdirslen.
328 _maxshortdirslen.
325 Then follows the filler followed by the sha digest of the full path.
329 Then follows the filler followed by the sha digest of the full path.
326 The filler is the beginning of the basename of the lowerencoded path
330 The filler is the beginning of the basename of the lowerencoded path
327 (the basename is everything after the last path separator). The filler
331 (the basename is everything after the last path separator). The filler
328 is as long as possible, filling in characters from the basename until
332 is as long as possible, filling in characters from the basename until
329 the encoded path has _maxstorepathlen characters (or all chars of the
333 the encoded path has _maxstorepathlen characters (or all chars of the
330 basename have been taken).
334 basename have been taken).
331 The extension (e.g. '.i' or '.d') is preserved.
335 The extension (e.g. '.i' or '.d') is preserved.
332
336
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
337 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 encoding was used.
338 encoding was used.
335 """
339 """
336 path = encodedir(path)
340 path = encodedir(path)
337 ef = _encodefname(path).split(b'/')
341 ef = _encodefname(path).split(b'/')
338 res = b'/'.join(_auxencode(ef, dotencode))
342 res = b'/'.join(_auxencode(ef, dotencode))
339 if len(res) > _maxstorepathlen:
343 if len(res) > _maxstorepathlen:
340 res = _hashencode(path, dotencode)
344 res = _hashencode(path, dotencode)
341 return res
345 return res
342
346
343
347
344 def _pathencode(path):
348 def _pathencode(path):
345 de = encodedir(path)
349 de = encodedir(path)
346 if len(path) > _maxstorepathlen:
350 if len(path) > _maxstorepathlen:
347 return _hashencode(de, True)
351 return _hashencode(de, True)
348 ef = _encodefname(de).split(b'/')
352 ef = _encodefname(de).split(b'/')
349 res = b'/'.join(_auxencode(ef, True))
353 res = b'/'.join(_auxencode(ef, True))
350 if len(res) > _maxstorepathlen:
354 if len(res) > _maxstorepathlen:
351 return _hashencode(de, True)
355 return _hashencode(de, True)
352 return res
356 return res
353
357
354
358
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
359 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356
360
357
361
358 def _plainhybridencode(f):
362 def _plainhybridencode(f):
359 return _hybridencode(f, False)
363 return _hybridencode(f, False)
360
364
361
365
362 def _calcmode(vfs):
366 def _calcmode(vfs):
363 try:
367 try:
364 # files in .hg/ will be created using this mode
368 # files in .hg/ will be created using this mode
365 mode = vfs.stat().st_mode
369 mode = vfs.stat().st_mode
366 # avoid some useless chmods
370 # avoid some useless chmods
367 if (0o777 & ~util.umask) == (0o777 & mode):
371 if (0o777 & ~util.umask) == (0o777 & mode):
368 mode = None
372 mode = None
369 except OSError:
373 except OSError:
370 mode = None
374 mode = None
371 return mode
375 return mode
372
376
373
377
374 _data = [
378 _data = [
375 b'bookmarks',
379 b'bookmarks',
376 b'narrowspec',
380 b'narrowspec',
377 b'data',
381 b'data',
378 b'meta',
382 b'meta',
379 b'00manifest.d',
383 b'00manifest.d',
380 b'00manifest.i',
384 b'00manifest.i',
381 b'00changelog.d',
385 b'00changelog.d',
382 b'00changelog.i',
386 b'00changelog.i',
383 b'phaseroots',
387 b'phaseroots',
384 b'obsstore',
388 b'obsstore',
385 b'requires',
389 b'requires',
386 ]
390 ]
387
391
388 REVLOG_FILES_EXT = (
392 REVLOG_FILES_EXT = (
389 b'.i',
393 b'.i',
390 b'.idx',
394 b'.idx',
391 b'.d',
395 b'.d',
392 b'.dat',
396 b'.dat',
393 b'.n',
397 b'.n',
394 b'.nd',
398 b'.nd',
395 b'.sda',
399 b'.sda',
396 )
400 )
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
401 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 REVLOG_FILES_LONG_EXT = (
402 REVLOG_FILES_LONG_EXT = (
399 b'.nd',
403 b'.nd',
400 b'.idx',
404 b'.idx',
401 b'.dat',
405 b'.dat',
402 b'.sda',
406 b'.sda',
403 )
407 )
404 # files that are "volatile" and might change between listing and streaming
408 # files that are "volatile" and might change between listing and streaming
405 #
409 #
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
410 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 # deleted.
411 # deleted.
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
412 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409
413
410 # some exception to the above matching
414 # some exception to the above matching
411 #
415 #
412 # XXX This is currently not in use because of issue6542
416 # XXX This is currently not in use because of issue6542
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
417 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414
418
415
419
416 def is_revlog(f, kind, st):
420 def is_revlog(f, kind, st):
417 if kind != stat.S_IFREG:
421 if kind != stat.S_IFREG:
418 return False
422 return False
419 if f.endswith(REVLOG_FILES_EXT):
423 if f.endswith(REVLOG_FILES_EXT):
420 return True
424 return True
421 return False
425 return False
422
426
423
427
424 def is_revlog_file(f):
428 def is_revlog_file(f):
425 if f.endswith(REVLOG_FILES_EXT):
429 if f.endswith(REVLOG_FILES_EXT):
426 return True
430 return True
427 return False
431 return False
428
432
429
433
430 # the file is part of changelog data
434 # the file is part of changelog data
431 FILEFLAGS_CHANGELOG = 1 << 13
435 FILEFLAGS_CHANGELOG = 1 << 13
432 # the file is part of manifest data
436 # the file is part of manifest data
433 FILEFLAGS_MANIFESTLOG = 1 << 12
437 FILEFLAGS_MANIFESTLOG = 1 << 12
434 # the file is part of filelog data
438 # the file is part of filelog data
435 FILEFLAGS_FILELOG = 1 << 11
439 FILEFLAGS_FILELOG = 1 << 11
436 # file that are not directly part of a revlog
440 # file that are not directly part of a revlog
437 FILEFLAGS_OTHER = 1 << 10
441 FILEFLAGS_OTHER = 1 << 10
438
442
439 # the main entry point for a revlog
443 # the main entry point for a revlog
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
444 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 # a secondary file for a revlog
445 # a secondary file for a revlog
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
446 FILEFLAGS_REVLOG_OTHER = 1 << 0
443
447
444 # files that are "volatile" and might change between listing and streaming
448 # files that are "volatile" and might change between listing and streaming
445 FILEFLAGS_VOLATILE = 1 << 20
449 FILEFLAGS_VOLATILE = 1 << 20
446
450
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
451 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
452 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
457 FILETYPE_OTHER = FILEFLAGS_OTHER
454
458
455
459
456 @attr.s(slots=True)
460 @attr.s(slots=True)
457 class StoreFile:
461 class StoreFile:
458 """a file matching a store entry"""
462 """a file matching a store entry"""
459
463
460 unencoded_path = attr.ib()
464 unencoded_path = attr.ib()
461 _file_size = attr.ib(default=None)
465 _file_size = attr.ib(default=None)
462 is_volatile = attr.ib(default=False)
466 is_volatile = attr.ib(default=False)
463
467
464 def file_size(self, vfs):
468 def file_size(self, vfs):
465 if self._file_size is None:
469 if self._file_size is None:
466 if vfs is None:
470 if vfs is None:
467 msg = b"calling vfs-less file_size without prior call: %s"
471 msg = b"calling vfs-less file_size without prior call: %s"
468 msg %= self.unencoded_path
472 msg %= self.unencoded_path
469 raise error.ProgrammingError(msg)
473 raise error.ProgrammingError(msg)
470 try:
474 try:
471 self._file_size = vfs.stat(self.unencoded_path).st_size
475 self._file_size = vfs.stat(self.unencoded_path).st_size
472 except FileNotFoundError:
476 except FileNotFoundError:
473 self._file_size = 0
477 self._file_size = 0
474 return self._file_size
478 return self._file_size
475
479
476 def get_stream(self, vfs, copies):
480 def get_stream(self, vfs, copies):
477 """return data "stream" information for this file
481 """return data "stream" information for this file
478
482
479 (unencoded_file_path, content_iterator, content_size)
483 (unencoded_file_path, content_iterator, content_size)
480 """
484 """
481 size = self.file_size(None)
485 size = self.file_size(None)
482
486
483 def get_stream():
487 def get_stream():
484 actual_path = copies[vfs.join(self.unencoded_path)]
488 actual_path = copies[vfs.join(self.unencoded_path)]
485 with open(actual_path, 'rb') as fp:
489 with open(actual_path, 'rb') as fp:
486 yield None # ready to stream
490 yield None # ready to stream
487 if size <= 65536:
491 if size <= 65536:
488 yield fp.read(size)
492 yield fp.read(size)
489 else:
493 else:
490 yield from util.filechunkiter(fp, limit=size)
494 yield from util.filechunkiter(fp, limit=size)
491
495
492 s = get_stream()
496 s = get_stream()
493 next(s)
497 next(s)
494 return (self.unencoded_path, s, size)
498 return (self.unencoded_path, s, size)
495
499
496
500
497 @attr.s(slots=True, init=False)
501 @attr.s(slots=True, init=False)
498 class BaseStoreEntry:
502 class BaseStoreEntry:
499 """An entry in the store
503 """An entry in the store
500
504
501 This is returned by `store.walk` and represent some data in the store."""
505 This is returned by `store.walk` and represent some data in the store."""
502
506
503 def files(self) -> List[StoreFile]:
507 def files(self) -> List[StoreFile]:
504 raise NotImplementedError
508 raise NotImplementedError
505
509
506 def get_streams(
510 def get_streams(
507 self,
511 self,
508 repo=None,
512 repo=None,
509 vfs=None,
513 vfs=None,
510 copies=None,
514 copies=None,
511 max_changeset=None,
515 max_changeset=None,
512 ):
516 ):
513 """return a list of data stream associated to files for this entry
517 """return a list of data stream associated to files for this entry
514
518
515 return [(unencoded_file_path, content_iterator, content_size), …]
519 return [(unencoded_file_path, content_iterator, content_size), …]
516 """
520 """
517 assert vfs is not None
521 assert vfs is not None
518 return [f.get_stream(vfs, copies) for f in self.files()]
522 return [f.get_stream(vfs, copies) for f in self.files()]
519
523
520
524
521 @attr.s(slots=True, init=False)
525 @attr.s(slots=True, init=False)
522 class SimpleStoreEntry(BaseStoreEntry):
526 class SimpleStoreEntry(BaseStoreEntry):
523 """A generic entry in the store"""
527 """A generic entry in the store"""
524
528
525 is_revlog = False
529 is_revlog = False
526
530
527 _entry_path = attr.ib()
531 _entry_path = attr.ib()
528 _is_volatile = attr.ib(default=False)
532 _is_volatile = attr.ib(default=False)
529 _file_size = attr.ib(default=None)
533 _file_size = attr.ib(default=None)
530 _files = attr.ib(default=None)
534 _files = attr.ib(default=None)
531
535
532 def __init__(
536 def __init__(
533 self,
537 self,
534 entry_path,
538 entry_path,
535 is_volatile=False,
539 is_volatile=False,
536 file_size=None,
540 file_size=None,
537 ):
541 ):
538 super().__init__()
542 super().__init__()
539 self._entry_path = entry_path
543 self._entry_path = entry_path
540 self._is_volatile = is_volatile
544 self._is_volatile = is_volatile
541 self._file_size = file_size
545 self._file_size = file_size
542 self._files = None
546 self._files = None
543
547
544 def files(self) -> List[StoreFile]:
548 def files(self) -> List[StoreFile]:
545 if self._files is None:
549 if self._files is None:
546 self._files = [
550 self._files = [
547 StoreFile(
551 StoreFile(
548 unencoded_path=self._entry_path,
552 unencoded_path=self._entry_path,
549 file_size=self._file_size,
553 file_size=self._file_size,
550 is_volatile=self._is_volatile,
554 is_volatile=self._is_volatile,
551 )
555 )
552 ]
556 ]
553 return self._files
557 return self._files
554
558
555
559
556 @attr.s(slots=True, init=False)
560 @attr.s(slots=True, init=False)
557 class RevlogStoreEntry(BaseStoreEntry):
561 class RevlogStoreEntry(BaseStoreEntry):
558 """A revlog entry in the store"""
562 """A revlog entry in the store"""
559
563
560 is_revlog = True
564 is_revlog = True
561
565
562 revlog_type = attr.ib(default=None)
566 revlog_type = attr.ib(default=None)
563 target_id = attr.ib(default=None)
567 target_id = attr.ib(default=None)
564 _path_prefix = attr.ib(default=None)
568 _path_prefix = attr.ib(default=None)
565 _details = attr.ib(default=None)
569 _details = attr.ib(default=None)
566 _files = attr.ib(default=None)
570 _files = attr.ib(default=None)
567
571
568 def __init__(
572 def __init__(
569 self,
573 self,
570 revlog_type,
574 revlog_type,
571 path_prefix,
575 path_prefix,
572 target_id,
576 target_id,
573 details,
577 details,
574 ):
578 ):
575 super().__init__()
579 super().__init__()
576 self.revlog_type = revlog_type
580 self.revlog_type = revlog_type
577 self.target_id = target_id
581 self.target_id = target_id
578 self._path_prefix = path_prefix
582 self._path_prefix = path_prefix
579 assert b'.i' in details, (path_prefix, details)
583 assert b'.i' in details, (path_prefix, details)
580 self._details = details
584 self._details = details
581 self._files = None
585 self._files = None
582
586
583 @property
587 @property
584 def is_changelog(self):
588 def is_changelog(self):
585 return self.revlog_type & FILEFLAGS_CHANGELOG
589 return self.revlog_type & FILEFLAGS_CHANGELOG
586
590
587 @property
591 @property
588 def is_manifestlog(self):
592 def is_manifestlog(self):
589 return self.revlog_type & FILEFLAGS_MANIFESTLOG
593 return self.revlog_type & FILEFLAGS_MANIFESTLOG
590
594
591 @property
595 @property
592 def is_filelog(self):
596 def is_filelog(self):
593 return self.revlog_type & FILEFLAGS_FILELOG
597 return self.revlog_type & FILEFLAGS_FILELOG
594
598
595 def main_file_path(self):
599 def main_file_path(self):
596 """unencoded path of the main revlog file"""
600 """unencoded path of the main revlog file"""
597 return self._path_prefix + b'.i'
601 return self._path_prefix + b'.i'
598
602
599 def files(self) -> List[StoreFile]:
603 def files(self) -> List[StoreFile]:
600 if self._files is None:
604 if self._files is None:
601 self._files = []
605 self._files = []
602 for ext in sorted(self._details, key=_ext_key):
606 for ext in sorted(self._details, key=_ext_key):
603 path = self._path_prefix + ext
607 path = self._path_prefix + ext
604 file_size = self._details[ext]
608 file_size = self._details[ext]
605 # files that are "volatile" and might change between
609 # files that are "volatile" and might change between
606 # listing and streaming
610 # listing and streaming
607 #
611 #
608 # note: the ".nd" file are nodemap data and won't "change"
612 # note: the ".nd" file are nodemap data and won't "change"
609 # but they might be deleted.
613 # but they might be deleted.
610 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
614 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
611 f = StoreFile(path, file_size, volatile)
615 f = StoreFile(path, file_size, volatile)
612 self._files.append(f)
616 self._files.append(f)
613 return self._files
617 return self._files
614
618
615 def get_streams(
619 def get_streams(
616 self,
620 self,
617 repo=None,
621 repo=None,
618 vfs=None,
622 vfs=None,
619 copies=None,
623 copies=None,
620 max_changeset=None,
624 max_changeset=None,
621 ):
625 ):
622 if repo is None or max_changeset is None:
626 if (
623 return super().get_streams(
627 repo is None
624 repo=repo,
628 or max_changeset is None
625 vfs=vfs,
626 copies=copies,
627 max_changeset=max_changeset,
628 )
629 if any(k.endswith(b'.idx') for k in self._details.keys()):
630 # This use revlog-v2, ignore for now
629 # This use revlog-v2, ignore for now
630 or any(k.endswith(b'.idx') for k in self._details.keys())
631 # This is not inline, no race expected
632 or b'.d' in self._details
633 ):
631 return super().get_streams(
634 return super().get_streams(
632 repo=repo,
635 repo=repo,
633 vfs=vfs,
636 vfs=vfs,
634 copies=copies,
637 copies=copies,
635 max_changeset=max_changeset,
638 max_changeset=max_changeset,
636 )
639 )
637 name_to_ext = {}
640
638 for ext in self._details.keys():
639 name_to_ext[self._path_prefix + ext] = ext
640 name_to_size = {}
641 name_to_size = {}
641 for f in self.files():
642 for f in self.files():
642 name_to_size[f.unencoded_path] = f.file_size(None)
643 name_to_size[f.unencoded_path] = f.file_size(None)
644
643 stream = [
645 stream = [
644 f.get_stream(vfs, copies)
646 f.get_stream(vfs, copies)
645 for f in self.files()
647 for f in self.files()
646 if name_to_ext[f.unencoded_path] not in (b'.d', b'.i')
648 if not f.unencoded_path.endswith(b'.i')
647 ]
649 ]
648
650
649 is_inline = b'.d' not in self._details
651 index_path = self._path_prefix + b'.i'
650
652
651 rl = self.get_revlog_instance(repo).get_revlog()
653 index_file = None
652 rl_stream = rl.get_streams(max_changeset, force_inline=is_inline)
654 try:
655 index_file = vfs(index_path)
656 header = index_file.read(INDEX_HEADER.size)
657 if revlogmod.revlog.is_inline_index(header):
658 size = name_to_size[index_path]
653
659
654 for name, s, size in rl_stream:
660 # no split underneath, just return the stream
655 if name_to_size.get(name, 0) != size:
661 def get_stream():
656 msg = _(b"expected %d bytes but %d provided for %s")
662 fp = index_file
657 msg %= name_to_size.get(name, 0), size, name
663 try:
658 raise error.Abort(msg)
664 fp.seek(0)
659 stream.extend(rl_stream)
665 yield None
666 if size <= 65536:
667 yield fp.read(size)
668 else:
669 yield from util.filechunkiter(fp, limit=size)
670 finally:
671 fp.close()
672
673 s = get_stream()
674 next(s)
675 index_file = None
676 stream.append((index_path, s, size))
677 else:
678 rl = self.get_revlog_instance(repo).get_revlog()
679 rl_stream = rl.get_streams(max_changeset, force_inline=True)
680 for name, s, size in rl_stream:
681 if name_to_size.get(name, 0) != size:
682 msg = _(b"expected %d bytes but %d provided for %s")
683 msg %= name_to_size.get(name, 0), size, name
684 raise error.Abort(msg)
685 stream.extend(rl_stream)
686 finally:
687 if index_file is not None:
688 index_file.close()
689
660 files = self.files()
690 files = self.files()
661 assert len(stream) == len(files), (
691 assert len(stream) == len(files), (
662 stream,
692 stream,
663 files,
693 files,
664 self._path_prefix,
694 self._path_prefix,
665 self.target_id,
695 self.target_id,
666 )
696 )
667 return stream
697 return stream
668
698
669 def get_revlog_instance(self, repo):
699 def get_revlog_instance(self, repo):
670 """Obtain a revlog instance from this store entry
700 """Obtain a revlog instance from this store entry
671
701
672 An instance of the appropriate class is returned.
702 An instance of the appropriate class is returned.
673 """
703 """
674 if self.is_changelog:
704 if self.is_changelog:
675 return changelog.changelog(repo.svfs)
705 return changelog.changelog(repo.svfs)
676 elif self.is_manifestlog:
706 elif self.is_manifestlog:
677 mandir = self.target_id
707 mandir = self.target_id
678 return manifest.manifestrevlog(
708 return manifest.manifestrevlog(
679 repo.nodeconstants, repo.svfs, tree=mandir
709 repo.nodeconstants, repo.svfs, tree=mandir
680 )
710 )
681 else:
711 else:
682 return filelog.filelog(repo.svfs, self.target_id)
712 return filelog.filelog(repo.svfs, self.target_id)
683
713
684
714
685 def _gather_revlog(files_data):
715 def _gather_revlog(files_data):
686 """group files per revlog prefix
716 """group files per revlog prefix
687
717
688 The returns a two level nested dict. The top level key is the revlog prefix
718 The returns a two level nested dict. The top level key is the revlog prefix
689 without extension, the second level is all the file "suffix" that were
719 without extension, the second level is all the file "suffix" that were
690 seen for this revlog and arbitrary file data as value.
720 seen for this revlog and arbitrary file data as value.
691 """
721 """
692 revlogs = collections.defaultdict(dict)
722 revlogs = collections.defaultdict(dict)
693 for u, value in files_data:
723 for u, value in files_data:
694 name, ext = _split_revlog_ext(u)
724 name, ext = _split_revlog_ext(u)
695 revlogs[name][ext] = value
725 revlogs[name][ext] = value
696 return sorted(revlogs.items())
726 return sorted(revlogs.items())
697
727
698
728
699 def _split_revlog_ext(filename):
729 def _split_revlog_ext(filename):
700 """split the revlog file prefix from the variable extension"""
730 """split the revlog file prefix from the variable extension"""
701 if filename.endswith(REVLOG_FILES_LONG_EXT):
731 if filename.endswith(REVLOG_FILES_LONG_EXT):
702 char = b'-'
732 char = b'-'
703 else:
733 else:
704 char = b'.'
734 char = b'.'
705 idx = filename.rfind(char)
735 idx = filename.rfind(char)
706 return filename[:idx], filename[idx:]
736 return filename[:idx], filename[idx:]
707
737
708
738
709 def _ext_key(ext):
739 def _ext_key(ext):
710 """a key to order revlog suffix
740 """a key to order revlog suffix
711
741
712 important to issue .i after other entry."""
742 important to issue .i after other entry."""
713 # the only important part of this order is to keep the `.i` last.
743 # the only important part of this order is to keep the `.i` last.
714 if ext.endswith(b'.n'):
744 if ext.endswith(b'.n'):
715 return (0, ext)
745 return (0, ext)
716 elif ext.endswith(b'.nd'):
746 elif ext.endswith(b'.nd'):
717 return (10, ext)
747 return (10, ext)
718 elif ext.endswith(b'.d'):
748 elif ext.endswith(b'.d'):
719 return (20, ext)
749 return (20, ext)
720 elif ext.endswith(b'.i'):
750 elif ext.endswith(b'.i'):
721 return (50, ext)
751 return (50, ext)
722 else:
752 else:
723 return (40, ext)
753 return (40, ext)
724
754
725
755
726 class basicstore:
756 class basicstore:
727 '''base class for local repository stores'''
757 '''base class for local repository stores'''
728
758
729 def __init__(self, path, vfstype):
759 def __init__(self, path, vfstype):
730 vfs = vfstype(path)
760 vfs = vfstype(path)
731 self.path = vfs.base
761 self.path = vfs.base
732 self.createmode = _calcmode(vfs)
762 self.createmode = _calcmode(vfs)
733 vfs.createmode = self.createmode
763 vfs.createmode = self.createmode
734 self.rawvfs = vfs
764 self.rawvfs = vfs
735 self.vfs = vfsmod.filtervfs(vfs, encodedir)
765 self.vfs = vfsmod.filtervfs(vfs, encodedir)
736 self.opener = self.vfs
766 self.opener = self.vfs
737
767
738 def join(self, f):
768 def join(self, f):
739 return self.path + b'/' + encodedir(f)
769 return self.path + b'/' + encodedir(f)
740
770
741 def _walk(self, relpath, recurse, undecodable=None):
771 def _walk(self, relpath, recurse, undecodable=None):
742 '''yields (revlog_type, unencoded, size)'''
772 '''yields (revlog_type, unencoded, size)'''
743 path = self.path
773 path = self.path
744 if relpath:
774 if relpath:
745 path += b'/' + relpath
775 path += b'/' + relpath
746 striplen = len(self.path) + 1
776 striplen = len(self.path) + 1
747 l = []
777 l = []
748 if self.rawvfs.isdir(path):
778 if self.rawvfs.isdir(path):
749 visit = [path]
779 visit = [path]
750 readdir = self.rawvfs.readdir
780 readdir = self.rawvfs.readdir
751 while visit:
781 while visit:
752 p = visit.pop()
782 p = visit.pop()
753 for f, kind, st in readdir(p, stat=True):
783 for f, kind, st in readdir(p, stat=True):
754 fp = p + b'/' + f
784 fp = p + b'/' + f
755 if is_revlog(f, kind, st):
785 if is_revlog(f, kind, st):
756 n = util.pconvert(fp[striplen:])
786 n = util.pconvert(fp[striplen:])
757 l.append((decodedir(n), st.st_size))
787 l.append((decodedir(n), st.st_size))
758 elif kind == stat.S_IFDIR and recurse:
788 elif kind == stat.S_IFDIR and recurse:
759 visit.append(fp)
789 visit.append(fp)
760
790
761 l.sort()
791 l.sort()
762 return l
792 return l
763
793
764 def changelog(self, trypending, concurrencychecker=None):
794 def changelog(self, trypending, concurrencychecker=None):
765 return changelog.changelog(
795 return changelog.changelog(
766 self.vfs,
796 self.vfs,
767 trypending=trypending,
797 trypending=trypending,
768 concurrencychecker=concurrencychecker,
798 concurrencychecker=concurrencychecker,
769 )
799 )
770
800
771 def manifestlog(self, repo, storenarrowmatch):
801 def manifestlog(self, repo, storenarrowmatch):
772 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
802 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
773 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
803 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
774
804
775 def data_entries(
805 def data_entries(
776 self, matcher=None, undecodable=None
806 self, matcher=None, undecodable=None
777 ) -> Generator[BaseStoreEntry, None, None]:
807 ) -> Generator[BaseStoreEntry, None, None]:
778 """Like walk, but excluding the changelog and root manifest.
808 """Like walk, but excluding the changelog and root manifest.
779
809
780 When [undecodable] is None, revlogs names that can't be
810 When [undecodable] is None, revlogs names that can't be
781 decoded cause an exception. When it is provided, it should
811 decoded cause an exception. When it is provided, it should
782 be a list and the filenames that can't be decoded are added
812 be a list and the filenames that can't be decoded are added
783 to it instead. This is very rarely needed."""
813 to it instead. This is very rarely needed."""
784 dirs = [
814 dirs = [
785 (b'data', FILEFLAGS_FILELOG, False),
815 (b'data', FILEFLAGS_FILELOG, False),
786 (b'meta', FILEFLAGS_MANIFESTLOG, True),
816 (b'meta', FILEFLAGS_MANIFESTLOG, True),
787 ]
817 ]
788 for base_dir, rl_type, strip_filename in dirs:
818 for base_dir, rl_type, strip_filename in dirs:
789 files = self._walk(base_dir, True, undecodable=undecodable)
819 files = self._walk(base_dir, True, undecodable=undecodable)
790 for revlog, details in _gather_revlog(files):
820 for revlog, details in _gather_revlog(files):
791 revlog_target_id = revlog.split(b'/', 1)[1]
821 revlog_target_id = revlog.split(b'/', 1)[1]
792 if strip_filename and b'/' in revlog:
822 if strip_filename and b'/' in revlog:
793 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
823 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
794 revlog_target_id += b'/'
824 revlog_target_id += b'/'
795 yield RevlogStoreEntry(
825 yield RevlogStoreEntry(
796 path_prefix=revlog,
826 path_prefix=revlog,
797 revlog_type=rl_type,
827 revlog_type=rl_type,
798 target_id=revlog_target_id,
828 target_id=revlog_target_id,
799 details=details,
829 details=details,
800 )
830 )
801
831
802 def top_entries(
832 def top_entries(
803 self, phase=False, obsolescence=False
833 self, phase=False, obsolescence=False
804 ) -> Generator[BaseStoreEntry, None, None]:
834 ) -> Generator[BaseStoreEntry, None, None]:
805 if phase and self.vfs.exists(b'phaseroots'):
835 if phase and self.vfs.exists(b'phaseroots'):
806 yield SimpleStoreEntry(
836 yield SimpleStoreEntry(
807 entry_path=b'phaseroots',
837 entry_path=b'phaseroots',
808 is_volatile=True,
838 is_volatile=True,
809 )
839 )
810
840
811 if obsolescence and self.vfs.exists(b'obsstore'):
841 if obsolescence and self.vfs.exists(b'obsstore'):
812 # XXX if we had the file size it could be non-volatile
842 # XXX if we had the file size it could be non-volatile
813 yield SimpleStoreEntry(
843 yield SimpleStoreEntry(
814 entry_path=b'obsstore',
844 entry_path=b'obsstore',
815 is_volatile=True,
845 is_volatile=True,
816 )
846 )
817
847
818 files = reversed(self._walk(b'', False))
848 files = reversed(self._walk(b'', False))
819
849
820 changelogs = collections.defaultdict(dict)
850 changelogs = collections.defaultdict(dict)
821 manifestlogs = collections.defaultdict(dict)
851 manifestlogs = collections.defaultdict(dict)
822
852
823 for u, s in files:
853 for u, s in files:
824 if u.startswith(b'00changelog'):
854 if u.startswith(b'00changelog'):
825 name, ext = _split_revlog_ext(u)
855 name, ext = _split_revlog_ext(u)
826 changelogs[name][ext] = s
856 changelogs[name][ext] = s
827 elif u.startswith(b'00manifest'):
857 elif u.startswith(b'00manifest'):
828 name, ext = _split_revlog_ext(u)
858 name, ext = _split_revlog_ext(u)
829 manifestlogs[name][ext] = s
859 manifestlogs[name][ext] = s
830 else:
860 else:
831 yield SimpleStoreEntry(
861 yield SimpleStoreEntry(
832 entry_path=u,
862 entry_path=u,
833 is_volatile=False,
863 is_volatile=False,
834 file_size=s,
864 file_size=s,
835 )
865 )
836 # yield manifest before changelog
866 # yield manifest before changelog
837 top_rl = [
867 top_rl = [
838 (manifestlogs, FILEFLAGS_MANIFESTLOG),
868 (manifestlogs, FILEFLAGS_MANIFESTLOG),
839 (changelogs, FILEFLAGS_CHANGELOG),
869 (changelogs, FILEFLAGS_CHANGELOG),
840 ]
870 ]
841 assert len(manifestlogs) <= 1
871 assert len(manifestlogs) <= 1
842 assert len(changelogs) <= 1
872 assert len(changelogs) <= 1
843 for data, revlog_type in top_rl:
873 for data, revlog_type in top_rl:
844 for revlog, details in sorted(data.items()):
874 for revlog, details in sorted(data.items()):
845 yield RevlogStoreEntry(
875 yield RevlogStoreEntry(
846 path_prefix=revlog,
876 path_prefix=revlog,
847 revlog_type=revlog_type,
877 revlog_type=revlog_type,
848 target_id=b'',
878 target_id=b'',
849 details=details,
879 details=details,
850 )
880 )
851
881
852 def walk(
882 def walk(
853 self, matcher=None, phase=False, obsolescence=False
883 self, matcher=None, phase=False, obsolescence=False
854 ) -> Generator[BaseStoreEntry, None, None]:
884 ) -> Generator[BaseStoreEntry, None, None]:
855 """return files related to data storage (ie: revlogs)
885 """return files related to data storage (ie: revlogs)
856
886
857 yields instance from BaseStoreEntry subclasses
887 yields instance from BaseStoreEntry subclasses
858
888
859 if a matcher is passed, storage files of only those tracked paths
889 if a matcher is passed, storage files of only those tracked paths
860 are passed with matches the matcher
890 are passed with matches the matcher
861 """
891 """
862 # yield data files first
892 # yield data files first
863 for x in self.data_entries(matcher):
893 for x in self.data_entries(matcher):
864 yield x
894 yield x
865 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
895 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
866 yield x
896 yield x
867
897
868 def copylist(self):
898 def copylist(self):
869 return _data
899 return _data
870
900
871 def write(self, tr):
901 def write(self, tr):
872 pass
902 pass
873
903
874 def invalidatecaches(self):
904 def invalidatecaches(self):
875 pass
905 pass
876
906
877 def markremoved(self, fn):
907 def markremoved(self, fn):
878 pass
908 pass
879
909
880 def __contains__(self, path):
910 def __contains__(self, path):
881 '''Checks if the store contains path'''
911 '''Checks if the store contains path'''
882 path = b"/".join((b"data", path))
912 path = b"/".join((b"data", path))
883 # file?
913 # file?
884 if self.vfs.exists(path + b".i"):
914 if self.vfs.exists(path + b".i"):
885 return True
915 return True
886 # dir?
916 # dir?
887 if not path.endswith(b"/"):
917 if not path.endswith(b"/"):
888 path = path + b"/"
918 path = path + b"/"
889 return self.vfs.exists(path)
919 return self.vfs.exists(path)
890
920
891
921
892 class encodedstore(basicstore):
922 class encodedstore(basicstore):
893 def __init__(self, path, vfstype):
923 def __init__(self, path, vfstype):
894 vfs = vfstype(path + b'/store')
924 vfs = vfstype(path + b'/store')
895 self.path = vfs.base
925 self.path = vfs.base
896 self.createmode = _calcmode(vfs)
926 self.createmode = _calcmode(vfs)
897 vfs.createmode = self.createmode
927 vfs.createmode = self.createmode
898 self.rawvfs = vfs
928 self.rawvfs = vfs
899 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
929 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
900 self.opener = self.vfs
930 self.opener = self.vfs
901
931
902 def _walk(self, relpath, recurse, undecodable=None):
932 def _walk(self, relpath, recurse, undecodable=None):
903 old = super()._walk(relpath, recurse)
933 old = super()._walk(relpath, recurse)
904 new = []
934 new = []
905 for f1, value in old:
935 for f1, value in old:
906 try:
936 try:
907 f2 = decodefilename(f1)
937 f2 = decodefilename(f1)
908 except KeyError:
938 except KeyError:
909 if undecodable is None:
939 if undecodable is None:
910 msg = _(b'undecodable revlog name %s') % f1
940 msg = _(b'undecodable revlog name %s') % f1
911 raise error.StorageError(msg)
941 raise error.StorageError(msg)
912 else:
942 else:
913 undecodable.append(f1)
943 undecodable.append(f1)
914 continue
944 continue
915 new.append((f2, value))
945 new.append((f2, value))
916 return new
946 return new
917
947
918 def data_entries(
948 def data_entries(
919 self, matcher=None, undecodable=None
949 self, matcher=None, undecodable=None
920 ) -> Generator[BaseStoreEntry, None, None]:
950 ) -> Generator[BaseStoreEntry, None, None]:
921 entries = super(encodedstore, self).data_entries(
951 entries = super(encodedstore, self).data_entries(
922 undecodable=undecodable
952 undecodable=undecodable
923 )
953 )
924 for entry in entries:
954 for entry in entries:
925 if _match_tracked_entry(entry, matcher):
955 if _match_tracked_entry(entry, matcher):
926 yield entry
956 yield entry
927
957
928 def join(self, f):
958 def join(self, f):
929 return self.path + b'/' + encodefilename(f)
959 return self.path + b'/' + encodefilename(f)
930
960
931 def copylist(self):
961 def copylist(self):
932 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
962 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
933
963
934
964
935 class fncache:
965 class fncache:
936 # the filename used to be partially encoded
966 # the filename used to be partially encoded
937 # hence the encodedir/decodedir dance
967 # hence the encodedir/decodedir dance
938 def __init__(self, vfs):
968 def __init__(self, vfs):
939 self.vfs = vfs
969 self.vfs = vfs
940 self._ignores = set()
970 self._ignores = set()
941 self.entries = None
971 self.entries = None
942 self._dirty = False
972 self._dirty = False
943 # set of new additions to fncache
973 # set of new additions to fncache
944 self.addls = set()
974 self.addls = set()
945
975
946 def ensureloaded(self, warn=None):
976 def ensureloaded(self, warn=None):
947 """read the fncache file if not already read.
977 """read the fncache file if not already read.
948
978
949 If the file on disk is corrupted, raise. If warn is provided,
979 If the file on disk is corrupted, raise. If warn is provided,
950 warn and keep going instead."""
980 warn and keep going instead."""
951 if self.entries is None:
981 if self.entries is None:
952 self._load(warn)
982 self._load(warn)
953
983
954 def _load(self, warn=None):
984 def _load(self, warn=None):
955 '''fill the entries from the fncache file'''
985 '''fill the entries from the fncache file'''
956 self._dirty = False
986 self._dirty = False
957 try:
987 try:
958 fp = self.vfs(b'fncache', mode=b'rb')
988 fp = self.vfs(b'fncache', mode=b'rb')
959 except IOError:
989 except IOError:
960 # skip nonexistent file
990 # skip nonexistent file
961 self.entries = set()
991 self.entries = set()
962 return
992 return
963
993
964 self.entries = set()
994 self.entries = set()
965 chunk = b''
995 chunk = b''
966 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
996 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
967 chunk += c
997 chunk += c
968 try:
998 try:
969 p = chunk.rindex(b'\n')
999 p = chunk.rindex(b'\n')
970 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
1000 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
971 chunk = chunk[p + 1 :]
1001 chunk = chunk[p + 1 :]
972 except ValueError:
1002 except ValueError:
973 # substring '\n' not found, maybe the entry is bigger than the
1003 # substring '\n' not found, maybe the entry is bigger than the
974 # chunksize, so let's keep iterating
1004 # chunksize, so let's keep iterating
975 pass
1005 pass
976
1006
977 if chunk:
1007 if chunk:
978 msg = _(b"fncache does not ends with a newline")
1008 msg = _(b"fncache does not ends with a newline")
979 if warn:
1009 if warn:
980 warn(msg + b'\n')
1010 warn(msg + b'\n')
981 else:
1011 else:
982 raise error.Abort(
1012 raise error.Abort(
983 msg,
1013 msg,
984 hint=_(
1014 hint=_(
985 b"use 'hg debugrebuildfncache' to "
1015 b"use 'hg debugrebuildfncache' to "
986 b"rebuild the fncache"
1016 b"rebuild the fncache"
987 ),
1017 ),
988 )
1018 )
989 self._checkentries(fp, warn)
1019 self._checkentries(fp, warn)
990 fp.close()
1020 fp.close()
991
1021
992 def _checkentries(self, fp, warn):
1022 def _checkentries(self, fp, warn):
993 """make sure there is no empty string in entries"""
1023 """make sure there is no empty string in entries"""
994 if b'' in self.entries:
1024 if b'' in self.entries:
995 fp.seek(0)
1025 fp.seek(0)
996 for n, line in enumerate(fp):
1026 for n, line in enumerate(fp):
997 if not line.rstrip(b'\n'):
1027 if not line.rstrip(b'\n'):
998 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1028 t = _(b'invalid entry in fncache, line %d') % (n + 1)
999 if warn:
1029 if warn:
1000 warn(t + b'\n')
1030 warn(t + b'\n')
1001 else:
1031 else:
1002 raise error.Abort(t)
1032 raise error.Abort(t)
1003
1033
1004 def write(self, tr):
1034 def write(self, tr):
1005 if self._dirty:
1035 if self._dirty:
1006 assert self.entries is not None
1036 assert self.entries is not None
1007 self.entries = self.entries | self.addls
1037 self.entries = self.entries | self.addls
1008 self.addls = set()
1038 self.addls = set()
1009 tr.addbackup(b'fncache')
1039 tr.addbackup(b'fncache')
1010 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1040 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1011 if self.entries:
1041 if self.entries:
1012 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1042 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1013 fp.close()
1043 fp.close()
1014 self._dirty = False
1044 self._dirty = False
1015 if self.addls:
1045 if self.addls:
1016 # if we have just new entries, let's append them to the fncache
1046 # if we have just new entries, let's append them to the fncache
1017 tr.addbackup(b'fncache')
1047 tr.addbackup(b'fncache')
1018 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1048 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1019 if self.addls:
1049 if self.addls:
1020 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1050 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1021 fp.close()
1051 fp.close()
1022 self.entries = None
1052 self.entries = None
1023 self.addls = set()
1053 self.addls = set()
1024
1054
1025 def addignore(self, fn):
1055 def addignore(self, fn):
1026 self._ignores.add(fn)
1056 self._ignores.add(fn)
1027
1057
1028 def add(self, fn):
1058 def add(self, fn):
1029 if fn in self._ignores:
1059 if fn in self._ignores:
1030 return
1060 return
1031 if self.entries is None:
1061 if self.entries is None:
1032 self._load()
1062 self._load()
1033 if fn not in self.entries:
1063 if fn not in self.entries:
1034 self.addls.add(fn)
1064 self.addls.add(fn)
1035
1065
1036 def remove(self, fn):
1066 def remove(self, fn):
1037 if self.entries is None:
1067 if self.entries is None:
1038 self._load()
1068 self._load()
1039 if fn in self.addls:
1069 if fn in self.addls:
1040 self.addls.remove(fn)
1070 self.addls.remove(fn)
1041 return
1071 return
1042 try:
1072 try:
1043 self.entries.remove(fn)
1073 self.entries.remove(fn)
1044 self._dirty = True
1074 self._dirty = True
1045 except KeyError:
1075 except KeyError:
1046 pass
1076 pass
1047
1077
1048 def __contains__(self, fn):
1078 def __contains__(self, fn):
1049 if fn in self.addls:
1079 if fn in self.addls:
1050 return True
1080 return True
1051 if self.entries is None:
1081 if self.entries is None:
1052 self._load()
1082 self._load()
1053 return fn in self.entries
1083 return fn in self.entries
1054
1084
1055 def __iter__(self):
1085 def __iter__(self):
1056 if self.entries is None:
1086 if self.entries is None:
1057 self._load()
1087 self._load()
1058 return iter(self.entries | self.addls)
1088 return iter(self.entries | self.addls)
1059
1089
1060
1090
1061 class _fncachevfs(vfsmod.proxyvfs):
1091 class _fncachevfs(vfsmod.proxyvfs):
1062 def __init__(self, vfs, fnc, encode):
1092 def __init__(self, vfs, fnc, encode):
1063 vfsmod.proxyvfs.__init__(self, vfs)
1093 vfsmod.proxyvfs.__init__(self, vfs)
1064 self.fncache = fnc
1094 self.fncache = fnc
1065 self.encode = encode
1095 self.encode = encode
1066
1096
1067 def __call__(self, path, mode=b'r', *args, **kw):
1097 def __call__(self, path, mode=b'r', *args, **kw):
1068 encoded = self.encode(path)
1098 encoded = self.encode(path)
1069 if (
1099 if (
1070 mode not in (b'r', b'rb')
1100 mode not in (b'r', b'rb')
1071 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1101 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1072 and is_revlog_file(path)
1102 and is_revlog_file(path)
1073 ):
1103 ):
1074 # do not trigger a fncache load when adding a file that already is
1104 # do not trigger a fncache load when adding a file that already is
1075 # known to exist.
1105 # known to exist.
1076 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1106 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1077 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1107 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1078 # when appending to an existing file, if the file has size zero,
1108 # when appending to an existing file, if the file has size zero,
1079 # it should be considered as missing. Such zero-size files are
1109 # it should be considered as missing. Such zero-size files are
1080 # the result of truncation when a transaction is aborted.
1110 # the result of truncation when a transaction is aborted.
1081 notload = False
1111 notload = False
1082 if not notload:
1112 if not notload:
1083 self.fncache.add(path)
1113 self.fncache.add(path)
1084 return self.vfs(encoded, mode, *args, **kw)
1114 return self.vfs(encoded, mode, *args, **kw)
1085
1115
1086 def join(self, path):
1116 def join(self, path):
1087 if path:
1117 if path:
1088 return self.vfs.join(self.encode(path))
1118 return self.vfs.join(self.encode(path))
1089 else:
1119 else:
1090 return self.vfs.join(path)
1120 return self.vfs.join(path)
1091
1121
1092 def register_file(self, path):
1122 def register_file(self, path):
1093 """generic hook point to lets fncache steer its stew"""
1123 """generic hook point to lets fncache steer its stew"""
1094 if path.startswith(b'data/') or path.startswith(b'meta/'):
1124 if path.startswith(b'data/') or path.startswith(b'meta/'):
1095 self.fncache.add(path)
1125 self.fncache.add(path)
1096
1126
1097
1127
1098 class fncachestore(basicstore):
1128 class fncachestore(basicstore):
1099 def __init__(self, path, vfstype, dotencode):
1129 def __init__(self, path, vfstype, dotencode):
1100 if dotencode:
1130 if dotencode:
1101 encode = _pathencode
1131 encode = _pathencode
1102 else:
1132 else:
1103 encode = _plainhybridencode
1133 encode = _plainhybridencode
1104 self.encode = encode
1134 self.encode = encode
1105 vfs = vfstype(path + b'/store')
1135 vfs = vfstype(path + b'/store')
1106 self.path = vfs.base
1136 self.path = vfs.base
1107 self.pathsep = self.path + b'/'
1137 self.pathsep = self.path + b'/'
1108 self.createmode = _calcmode(vfs)
1138 self.createmode = _calcmode(vfs)
1109 vfs.createmode = self.createmode
1139 vfs.createmode = self.createmode
1110 self.rawvfs = vfs
1140 self.rawvfs = vfs
1111 fnc = fncache(vfs)
1141 fnc = fncache(vfs)
1112 self.fncache = fnc
1142 self.fncache = fnc
1113 self.vfs = _fncachevfs(vfs, fnc, encode)
1143 self.vfs = _fncachevfs(vfs, fnc, encode)
1114 self.opener = self.vfs
1144 self.opener = self.vfs
1115
1145
1116 def join(self, f):
1146 def join(self, f):
1117 return self.pathsep + self.encode(f)
1147 return self.pathsep + self.encode(f)
1118
1148
1119 def getsize(self, path):
1149 def getsize(self, path):
1120 return self.rawvfs.stat(path).st_size
1150 return self.rawvfs.stat(path).st_size
1121
1151
1122 def data_entries(
1152 def data_entries(
1123 self, matcher=None, undecodable=None
1153 self, matcher=None, undecodable=None
1124 ) -> Generator[BaseStoreEntry, None, None]:
1154 ) -> Generator[BaseStoreEntry, None, None]:
1125 # Note: all files in fncache should be revlog related, However the
1155 # Note: all files in fncache should be revlog related, However the
1126 # fncache might contains such file added by previous version of
1156 # fncache might contains such file added by previous version of
1127 # Mercurial.
1157 # Mercurial.
1128 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1158 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1129 by_revlog = _gather_revlog(files)
1159 by_revlog = _gather_revlog(files)
1130 for revlog, details in by_revlog:
1160 for revlog, details in by_revlog:
1131 if revlog.startswith(b'data/'):
1161 if revlog.startswith(b'data/'):
1132 rl_type = FILEFLAGS_FILELOG
1162 rl_type = FILEFLAGS_FILELOG
1133 revlog_target_id = revlog.split(b'/', 1)[1]
1163 revlog_target_id = revlog.split(b'/', 1)[1]
1134 elif revlog.startswith(b'meta/'):
1164 elif revlog.startswith(b'meta/'):
1135 rl_type = FILEFLAGS_MANIFESTLOG
1165 rl_type = FILEFLAGS_MANIFESTLOG
1136 # drop the initial directory and the `00manifest` file part
1166 # drop the initial directory and the `00manifest` file part
1137 tmp = revlog.split(b'/', 1)[1]
1167 tmp = revlog.split(b'/', 1)[1]
1138 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1168 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1139 else:
1169 else:
1140 # unreachable
1170 # unreachable
1141 assert False, revlog
1171 assert False, revlog
1142 entry = RevlogStoreEntry(
1172 entry = RevlogStoreEntry(
1143 path_prefix=revlog,
1173 path_prefix=revlog,
1144 revlog_type=rl_type,
1174 revlog_type=rl_type,
1145 target_id=revlog_target_id,
1175 target_id=revlog_target_id,
1146 details=details,
1176 details=details,
1147 )
1177 )
1148 if _match_tracked_entry(entry, matcher):
1178 if _match_tracked_entry(entry, matcher):
1149 yield entry
1179 yield entry
1150
1180
1151 def copylist(self):
1181 def copylist(self):
1152 d = (
1182 d = (
1153 b'bookmarks',
1183 b'bookmarks',
1154 b'narrowspec',
1184 b'narrowspec',
1155 b'data',
1185 b'data',
1156 b'meta',
1186 b'meta',
1157 b'dh',
1187 b'dh',
1158 b'fncache',
1188 b'fncache',
1159 b'phaseroots',
1189 b'phaseroots',
1160 b'obsstore',
1190 b'obsstore',
1161 b'00manifest.d',
1191 b'00manifest.d',
1162 b'00manifest.i',
1192 b'00manifest.i',
1163 b'00changelog.d',
1193 b'00changelog.d',
1164 b'00changelog.i',
1194 b'00changelog.i',
1165 b'requires',
1195 b'requires',
1166 )
1196 )
1167 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1197 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1168
1198
1169 def write(self, tr):
1199 def write(self, tr):
1170 self.fncache.write(tr)
1200 self.fncache.write(tr)
1171
1201
1172 def invalidatecaches(self):
1202 def invalidatecaches(self):
1173 self.fncache.entries = None
1203 self.fncache.entries = None
1174 self.fncache.addls = set()
1204 self.fncache.addls = set()
1175
1205
1176 def markremoved(self, fn):
1206 def markremoved(self, fn):
1177 self.fncache.remove(fn)
1207 self.fncache.remove(fn)
1178
1208
1179 def _exists(self, f):
1209 def _exists(self, f):
1180 ef = self.encode(f)
1210 ef = self.encode(f)
1181 try:
1211 try:
1182 self.getsize(ef)
1212 self.getsize(ef)
1183 return True
1213 return True
1184 except FileNotFoundError:
1214 except FileNotFoundError:
1185 return False
1215 return False
1186
1216
1187 def __contains__(self, path):
1217 def __contains__(self, path):
1188 '''Checks if the store contains path'''
1218 '''Checks if the store contains path'''
1189 path = b"/".join((b"data", path))
1219 path = b"/".join((b"data", path))
1190 # check for files (exact match)
1220 # check for files (exact match)
1191 e = path + b'.i'
1221 e = path + b'.i'
1192 if e in self.fncache and self._exists(e):
1222 if e in self.fncache and self._exists(e):
1193 return True
1223 return True
1194 # now check for directories (prefix match)
1224 # now check for directories (prefix match)
1195 if not path.endswith(b'/'):
1225 if not path.endswith(b'/'):
1196 path += b'/'
1226 path += b'/'
1197 for e in self.fncache:
1227 for e in self.fncache:
1198 if e.startswith(path) and self._exists(e):
1228 if e.startswith(path) and self._exists(e):
1199 return True
1229 return True
1200 return False
1230 return False
General Comments 0
You need to be logged in to leave comments. Login now