##// END OF EJS Templates
stream-clone: smoothly detect and handle a case were a revlog is split...
marmoute -
r51534:54604240 default
parent child Browse files
Show More
@@ -1,3478 +1,3501 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_NO,
43 DELTA_BASE_REUSE_TRY,
43 DELTA_BASE_REUSE_TRY,
44 ENTRY_RANK,
44 ENTRY_RANK,
45 FEATURES_BY_VERSION,
45 FEATURES_BY_VERSION,
46 FLAG_GENERALDELTA,
46 FLAG_GENERALDELTA,
47 FLAG_INLINE_DATA,
47 FLAG_INLINE_DATA,
48 INDEX_HEADER,
48 INDEX_HEADER,
49 KIND_CHANGELOG,
49 KIND_CHANGELOG,
50 KIND_FILELOG,
50 KIND_FILELOG,
51 RANK_UNKNOWN,
51 RANK_UNKNOWN,
52 REVLOGV0,
52 REVLOGV0,
53 REVLOGV1,
53 REVLOGV1,
54 REVLOGV1_FLAGS,
54 REVLOGV1_FLAGS,
55 REVLOGV2,
55 REVLOGV2,
56 REVLOGV2_FLAGS,
56 REVLOGV2_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
58 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_FORMAT,
59 REVLOG_DEFAULT_VERSION,
59 REVLOG_DEFAULT_VERSION,
60 SUPPORTED_FLAGS,
60 SUPPORTED_FLAGS,
61 )
61 )
62 from .revlogutils.flagutil import (
62 from .revlogutils.flagutil import (
63 REVIDX_DEFAULT_FLAGS,
63 REVIDX_DEFAULT_FLAGS,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 REVIDX_FLAGS_ORDER,
66 REVIDX_FLAGS_ORDER,
67 REVIDX_HASCOPIESINFO,
67 REVIDX_HASCOPIESINFO,
68 REVIDX_ISCENSORED,
68 REVIDX_ISCENSORED,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 )
70 )
71 from .thirdparty import attr
71 from .thirdparty import attr
72 from . import (
72 from . import (
73 ancestor,
73 ancestor,
74 dagop,
74 dagop,
75 error,
75 error,
76 mdiff,
76 mdiff,
77 policy,
77 policy,
78 pycompat,
78 pycompat,
79 revlogutils,
79 revlogutils,
80 templatefilters,
80 templatefilters,
81 util,
81 util,
82 )
82 )
83 from .interfaces import (
83 from .interfaces import (
84 repository,
84 repository,
85 util as interfaceutil,
85 util as interfaceutil,
86 )
86 )
87 from .revlogutils import (
87 from .revlogutils import (
88 deltas as deltautil,
88 deltas as deltautil,
89 docket as docketutil,
89 docket as docketutil,
90 flagutil,
90 flagutil,
91 nodemap as nodemaputil,
91 nodemap as nodemaputil,
92 randomaccessfile,
92 randomaccessfile,
93 revlogv0,
93 revlogv0,
94 rewrite,
94 rewrite,
95 sidedata as sidedatautil,
95 sidedata as sidedatautil,
96 )
96 )
97 from .utils import (
97 from .utils import (
98 storageutil,
98 storageutil,
99 stringutil,
99 stringutil,
100 )
100 )
101
101
102 # blanked usage of all the name to prevent pyflakes constraints
102 # blanked usage of all the name to prevent pyflakes constraints
103 # We need these name available in the module for extensions.
103 # We need these name available in the module for extensions.
104
104
105 REVLOGV0
105 REVLOGV0
106 REVLOGV1
106 REVLOGV1
107 REVLOGV2
107 REVLOGV2
108 CHANGELOGV2
108 CHANGELOGV2
109 FLAG_INLINE_DATA
109 FLAG_INLINE_DATA
110 FLAG_GENERALDELTA
110 FLAG_GENERALDELTA
111 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FLAGS
112 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_FORMAT
113 REVLOG_DEFAULT_VERSION
113 REVLOG_DEFAULT_VERSION
114 REVLOGV1_FLAGS
114 REVLOGV1_FLAGS
115 REVLOGV2_FLAGS
115 REVLOGV2_FLAGS
116 REVIDX_ISCENSORED
116 REVIDX_ISCENSORED
117 REVIDX_ELLIPSIS
117 REVIDX_ELLIPSIS
118 REVIDX_HASCOPIESINFO
118 REVIDX_HASCOPIESINFO
119 REVIDX_EXTSTORED
119 REVIDX_EXTSTORED
120 REVIDX_DEFAULT_FLAGS
120 REVIDX_DEFAULT_FLAGS
121 REVIDX_FLAGS_ORDER
121 REVIDX_FLAGS_ORDER
122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 REVIDX_RAWTEXT_CHANGING_FLAGS
123
123
124 parsers = policy.importmod('parsers')
124 parsers = policy.importmod('parsers')
125 rustancestor = policy.importrust('ancestor')
125 rustancestor = policy.importrust('ancestor')
126 rustdagop = policy.importrust('dagop')
126 rustdagop = policy.importrust('dagop')
127 rustrevlog = policy.importrust('revlog')
127 rustrevlog = policy.importrust('revlog')
128
128
129 # Aliased for performance.
129 # Aliased for performance.
130 _zlibdecompress = zlib.decompress
130 _zlibdecompress = zlib.decompress
131
131
132 # max size of inline data embedded into a revlog
132 # max size of inline data embedded into a revlog
133 _maxinline = 131072
133 _maxinline = 131072
134
134
135 # Flag processors for REVIDX_ELLIPSIS.
135 # Flag processors for REVIDX_ELLIPSIS.
136 def ellipsisreadprocessor(rl, text):
136 def ellipsisreadprocessor(rl, text):
137 return text, False
137 return text, False
138
138
139
139
140 def ellipsiswriteprocessor(rl, text):
140 def ellipsiswriteprocessor(rl, text):
141 return text, False
141 return text, False
142
142
143
143
144 def ellipsisrawprocessor(rl, text):
144 def ellipsisrawprocessor(rl, text):
145 return False
145 return False
146
146
147
147
148 ellipsisprocessor = (
148 ellipsisprocessor = (
149 ellipsisreadprocessor,
149 ellipsisreadprocessor,
150 ellipsiswriteprocessor,
150 ellipsiswriteprocessor,
151 ellipsisrawprocessor,
151 ellipsisrawprocessor,
152 )
152 )
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @interfaceutil.implementer(repository.irevisiondelta)
175 @interfaceutil.implementer(repository.irevisiondelta)
176 @attr.s(slots=True)
176 @attr.s(slots=True)
177 class revlogrevisiondelta:
177 class revlogrevisiondelta:
178 node = attr.ib()
178 node = attr.ib()
179 p1node = attr.ib()
179 p1node = attr.ib()
180 p2node = attr.ib()
180 p2node = attr.ib()
181 basenode = attr.ib()
181 basenode = attr.ib()
182 flags = attr.ib()
182 flags = attr.ib()
183 baserevisionsize = attr.ib()
183 baserevisionsize = attr.ib()
184 revision = attr.ib()
184 revision = attr.ib()
185 delta = attr.ib()
185 delta = attr.ib()
186 sidedata = attr.ib()
186 sidedata = attr.ib()
187 protocol_flags = attr.ib()
187 protocol_flags = attr.ib()
188 linknode = attr.ib(default=None)
188 linknode = attr.ib(default=None)
189
189
190
190
191 @interfaceutil.implementer(repository.iverifyproblem)
191 @interfaceutil.implementer(repository.iverifyproblem)
192 @attr.s(frozen=True)
192 @attr.s(frozen=True)
193 class revlogproblem:
193 class revlogproblem:
194 warning = attr.ib(default=None)
194 warning = attr.ib(default=None)
195 error = attr.ib(default=None)
195 error = attr.ib(default=None)
196 node = attr.ib(default=None)
196 node = attr.ib(default=None)
197
197
198
198
199 def parse_index_v1(data, inline):
199 def parse_index_v1(data, inline):
200 # call the C implementation to parse the index data
200 # call the C implementation to parse the index data
201 index, cache = parsers.parse_index2(data, inline)
201 index, cache = parsers.parse_index2(data, inline)
202 return index, cache
202 return index, cache
203
203
204
204
205 def parse_index_v2(data, inline):
205 def parse_index_v2(data, inline):
206 # call the C implementation to parse the index data
206 # call the C implementation to parse the index data
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 return index, cache
208 return index, cache
209
209
210
210
211 def parse_index_cl_v2(data, inline):
211 def parse_index_cl_v2(data, inline):
212 # call the C implementation to parse the index data
212 # call the C implementation to parse the index data
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 return index, cache
214 return index, cache
215
215
216
216
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218
218
219 def parse_index_v1_nodemap(data, inline):
219 def parse_index_v1_nodemap(data, inline):
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 else:
224 else:
225 parse_index_v1_nodemap = None
225 parse_index_v1_nodemap = None
226
226
227
227
228 def parse_index_v1_mixed(data, inline):
228 def parse_index_v1_mixed(data, inline):
229 index, cache = parse_index_v1(data, inline)
229 index, cache = parse_index_v1(data, inline)
230 return rustrevlog.MixedIndex(index), cache
230 return rustrevlog.MixedIndex(index), cache
231
231
232
232
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 # signed integer)
234 # signed integer)
235 _maxentrysize = 0x7FFFFFFF
235 _maxentrysize = 0x7FFFFFFF
236
236
237 FILE_TOO_SHORT_MSG = _(
237 FILE_TOO_SHORT_MSG = _(
238 b'cannot read from revlog %s;'
238 b'cannot read from revlog %s;'
239 b' expected %d bytes from offset %d, data size is %d'
239 b' expected %d bytes from offset %d, data size is %d'
240 )
240 )
241
241
242 hexdigits = b'0123456789abcdefABCDEF'
242 hexdigits = b'0123456789abcdefABCDEF'
243
243
244
244
245 class revlog:
245 class revlog:
246 """
246 """
247 the underlying revision storage object
247 the underlying revision storage object
248
248
249 A revlog consists of two parts, an index and the revision data.
249 A revlog consists of two parts, an index and the revision data.
250
250
251 The index is a file with a fixed record size containing
251 The index is a file with a fixed record size containing
252 information on each revision, including its nodeid (hash), the
252 information on each revision, including its nodeid (hash), the
253 nodeids of its parents, the position and offset of its data within
253 nodeids of its parents, the position and offset of its data within
254 the data file, and the revision it's based on. Finally, each entry
254 the data file, and the revision it's based on. Finally, each entry
255 contains a linkrev entry that can serve as a pointer to external
255 contains a linkrev entry that can serve as a pointer to external
256 data.
256 data.
257
257
258 The revision data itself is a linear collection of data chunks.
258 The revision data itself is a linear collection of data chunks.
259 Each chunk represents a revision and is usually represented as a
259 Each chunk represents a revision and is usually represented as a
260 delta against the previous chunk. To bound lookup time, runs of
260 delta against the previous chunk. To bound lookup time, runs of
261 deltas are limited to about 2 times the length of the original
261 deltas are limited to about 2 times the length of the original
262 version data. This makes retrieval of a version proportional to
262 version data. This makes retrieval of a version proportional to
263 its size, or O(1) relative to the number of revisions.
263 its size, or O(1) relative to the number of revisions.
264
264
265 Both pieces of the revlog are written to in an append-only
265 Both pieces of the revlog are written to in an append-only
266 fashion, which means we never need to rewrite a file to insert or
266 fashion, which means we never need to rewrite a file to insert or
267 remove data, and can use some simple techniques to avoid the need
267 remove data, and can use some simple techniques to avoid the need
268 for locking while reading.
268 for locking while reading.
269
269
270 If checkambig, indexfile is opened with checkambig=True at
270 If checkambig, indexfile is opened with checkambig=True at
271 writing, to avoid file stat ambiguity.
271 writing, to avoid file stat ambiguity.
272
272
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 index will be mmapped rather than read if it is larger than the
274 index will be mmapped rather than read if it is larger than the
275 configured threshold.
275 configured threshold.
276
276
277 If censorable is True, the revlog can have censored revisions.
277 If censorable is True, the revlog can have censored revisions.
278
278
279 If `upperboundcomp` is not None, this is the expected maximal gain from
279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 compression for the data content.
280 compression for the data content.
281
281
282 `concurrencychecker` is an optional function that receives 3 arguments: a
282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 file handle, a filename, and an expected position. It should check whether
283 file handle, a filename, and an expected position. It should check whether
284 the current position in the file handle is valid, and log/warn/fail (by
284 the current position in the file handle is valid, and log/warn/fail (by
285 raising).
285 raising).
286
286
287 See mercurial/revlogutils/contants.py for details about the content of an
287 See mercurial/revlogutils/contants.py for details about the content of an
288 index entry.
288 index entry.
289 """
289 """
290
290
291 _flagserrorclass = error.RevlogError
291 _flagserrorclass = error.RevlogError
292
292
293 def __init__(
293 def __init__(
294 self,
294 self,
295 opener,
295 opener,
296 target,
296 target,
297 radix,
297 radix,
298 postfix=None, # only exist for `tmpcensored` now
298 postfix=None, # only exist for `tmpcensored` now
299 checkambig=False,
299 checkambig=False,
300 mmaplargeindex=False,
300 mmaplargeindex=False,
301 censorable=False,
301 censorable=False,
302 upperboundcomp=None,
302 upperboundcomp=None,
303 persistentnodemap=False,
303 persistentnodemap=False,
304 concurrencychecker=None,
304 concurrencychecker=None,
305 trypending=False,
305 trypending=False,
306 try_split=False,
306 try_split=False,
307 canonical_parent_order=True,
307 canonical_parent_order=True,
308 ):
308 ):
309 """
309 """
310 create a revlog object
310 create a revlog object
311
311
312 opener is a function that abstracts the file opening operation
312 opener is a function that abstracts the file opening operation
313 and can be used to implement COW semantics or the like.
313 and can be used to implement COW semantics or the like.
314
314
315 `target`: a (KIND, ID) tuple that identify the content stored in
315 `target`: a (KIND, ID) tuple that identify the content stored in
316 this revlog. It help the rest of the code to understand what the revlog
316 this revlog. It help the rest of the code to understand what the revlog
317 is about without having to resort to heuristic and index filename
317 is about without having to resort to heuristic and index filename
318 analysis. Note: that this must be reliably be set by normal code, but
318 analysis. Note: that this must be reliably be set by normal code, but
319 that test, debug, or performance measurement code might not set this to
319 that test, debug, or performance measurement code might not set this to
320 accurate value.
320 accurate value.
321 """
321 """
322 self.upperboundcomp = upperboundcomp
322 self.upperboundcomp = upperboundcomp
323
323
324 self.radix = radix
324 self.radix = radix
325
325
326 self._docket_file = None
326 self._docket_file = None
327 self._indexfile = None
327 self._indexfile = None
328 self._datafile = None
328 self._datafile = None
329 self._sidedatafile = None
329 self._sidedatafile = None
330 self._nodemap_file = None
330 self._nodemap_file = None
331 self.postfix = postfix
331 self.postfix = postfix
332 self._trypending = trypending
332 self._trypending = trypending
333 self._try_split = try_split
333 self._try_split = try_split
334 self.opener = opener
334 self.opener = opener
335 if persistentnodemap:
335 if persistentnodemap:
336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
337
337
338 assert target[0] in ALL_KINDS
338 assert target[0] in ALL_KINDS
339 assert len(target) == 2
339 assert len(target) == 2
340 self.target = target
340 self.target = target
341 # When True, indexfile is opened with checkambig=True at writing, to
341 # When True, indexfile is opened with checkambig=True at writing, to
342 # avoid file stat ambiguity.
342 # avoid file stat ambiguity.
343 self._checkambig = checkambig
343 self._checkambig = checkambig
344 self._mmaplargeindex = mmaplargeindex
344 self._mmaplargeindex = mmaplargeindex
345 self._censorable = censorable
345 self._censorable = censorable
346 # 3-tuple of (node, rev, text) for a raw revision.
346 # 3-tuple of (node, rev, text) for a raw revision.
347 self._revisioncache = None
347 self._revisioncache = None
348 # Maps rev to chain base rev.
348 # Maps rev to chain base rev.
349 self._chainbasecache = util.lrucachedict(100)
349 self._chainbasecache = util.lrucachedict(100)
350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
351 self._chunkcache = (0, b'')
351 self._chunkcache = (0, b'')
352 # How much data to read and cache into the raw revlog data cache.
352 # How much data to read and cache into the raw revlog data cache.
353 self._chunkcachesize = 65536
353 self._chunkcachesize = 65536
354 self._maxchainlen = None
354 self._maxchainlen = None
355 self._deltabothparents = True
355 self._deltabothparents = True
356 self._candidate_group_chunk_size = 0
356 self._candidate_group_chunk_size = 0
357 self._debug_delta = False
357 self._debug_delta = False
358 self.index = None
358 self.index = None
359 self._docket = None
359 self._docket = None
360 self._nodemap_docket = None
360 self._nodemap_docket = None
361 # Mapping of partial identifiers to full nodes.
361 # Mapping of partial identifiers to full nodes.
362 self._pcache = {}
362 self._pcache = {}
363 # Mapping of revision integer to full node.
363 # Mapping of revision integer to full node.
364 self._compengine = b'zlib'
364 self._compengine = b'zlib'
365 self._compengineopts = {}
365 self._compengineopts = {}
366 self._maxdeltachainspan = -1
366 self._maxdeltachainspan = -1
367 self._withsparseread = False
367 self._withsparseread = False
368 self._sparserevlog = False
368 self._sparserevlog = False
369 self.hassidedata = False
369 self.hassidedata = False
370 self._srdensitythreshold = 0.50
370 self._srdensitythreshold = 0.50
371 self._srmingapsize = 262144
371 self._srmingapsize = 262144
372
372
373 # other optionnals features
373 # other optionnals features
374
374
375 # might remove rank configuration once the computation has no impact
375 # might remove rank configuration once the computation has no impact
376 self._compute_rank = False
376 self._compute_rank = False
377
377
378 # Make copy of flag processors so each revlog instance can support
378 # Make copy of flag processors so each revlog instance can support
379 # custom flags.
379 # custom flags.
380 self._flagprocessors = dict(flagutil.flagprocessors)
380 self._flagprocessors = dict(flagutil.flagprocessors)
381
381
382 # 3-tuple of file handles being used for active writing.
382 # 3-tuple of file handles being used for active writing.
383 self._writinghandles = None
383 self._writinghandles = None
384 # prevent nesting of addgroup
384 # prevent nesting of addgroup
385 self._adding_group = None
385 self._adding_group = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 self._concurrencychecker = concurrencychecker
389 self._concurrencychecker = concurrencychecker
390
390
391 # parent order is supposed to be semantically irrelevant, so we
391 # parent order is supposed to be semantically irrelevant, so we
392 # normally resort parents to ensure that the first parent is non-null,
392 # normally resort parents to ensure that the first parent is non-null,
393 # if there is a non-null parent at all.
393 # if there is a non-null parent at all.
394 # filelog abuses the parent order as flag to mark some instances of
394 # filelog abuses the parent order as flag to mark some instances of
395 # meta-encoded files, so allow it to disable this behavior.
395 # meta-encoded files, so allow it to disable this behavior.
396 self.canonical_parent_order = canonical_parent_order
396 self.canonical_parent_order = canonical_parent_order
397
397
398 def _init_opts(self):
398 def _init_opts(self):
399 """process options (from above/config) to setup associated default revlog mode
399 """process options (from above/config) to setup associated default revlog mode
400
400
401 These values might be affected when actually reading on disk information.
401 These values might be affected when actually reading on disk information.
402
402
403 The relevant values are returned for use in _loadindex().
403 The relevant values are returned for use in _loadindex().
404
404
405 * newversionflags:
405 * newversionflags:
406 version header to use if we need to create a new revlog
406 version header to use if we need to create a new revlog
407
407
408 * mmapindexthreshold:
408 * mmapindexthreshold:
409 minimal index size for start to use mmap
409 minimal index size for start to use mmap
410
410
411 * force_nodemap:
411 * force_nodemap:
412 force the usage of a "development" version of the nodemap code
412 force the usage of a "development" version of the nodemap code
413 """
413 """
414 mmapindexthreshold = None
414 mmapindexthreshold = None
415 opts = self.opener.options
415 opts = self.opener.options
416
416
417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
418 new_header = CHANGELOGV2
418 new_header = CHANGELOGV2
419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
420 elif b'revlogv2' in opts:
420 elif b'revlogv2' in opts:
421 new_header = REVLOGV2
421 new_header = REVLOGV2
422 elif b'revlogv1' in opts:
422 elif b'revlogv1' in opts:
423 new_header = REVLOGV1 | FLAG_INLINE_DATA
423 new_header = REVLOGV1 | FLAG_INLINE_DATA
424 if b'generaldelta' in opts:
424 if b'generaldelta' in opts:
425 new_header |= FLAG_GENERALDELTA
425 new_header |= FLAG_GENERALDELTA
426 elif b'revlogv0' in self.opener.options:
426 elif b'revlogv0' in self.opener.options:
427 new_header = REVLOGV0
427 new_header = REVLOGV0
428 else:
428 else:
429 new_header = REVLOG_DEFAULT_VERSION
429 new_header = REVLOG_DEFAULT_VERSION
430
430
431 if b'chunkcachesize' in opts:
431 if b'chunkcachesize' in opts:
432 self._chunkcachesize = opts[b'chunkcachesize']
432 self._chunkcachesize = opts[b'chunkcachesize']
433 if b'maxchainlen' in opts:
433 if b'maxchainlen' in opts:
434 self._maxchainlen = opts[b'maxchainlen']
434 self._maxchainlen = opts[b'maxchainlen']
435 if b'deltabothparents' in opts:
435 if b'deltabothparents' in opts:
436 self._deltabothparents = opts[b'deltabothparents']
436 self._deltabothparents = opts[b'deltabothparents']
437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
438 if dps_cgds:
438 if dps_cgds:
439 self._candidate_group_chunk_size = dps_cgds
439 self._candidate_group_chunk_size = dps_cgds
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 self._lazydeltabase = False
441 self._lazydeltabase = False
442 if self._lazydelta:
442 if self._lazydelta:
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 if b'debug-delta' in opts:
444 if b'debug-delta' in opts:
445 self._debug_delta = opts[b'debug-delta']
445 self._debug_delta = opts[b'debug-delta']
446 if b'compengine' in opts:
446 if b'compengine' in opts:
447 self._compengine = opts[b'compengine']
447 self._compengine = opts[b'compengine']
448 if b'zlib.level' in opts:
448 if b'zlib.level' in opts:
449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
450 if b'zstd.level' in opts:
450 if b'zstd.level' in opts:
451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
452 if b'maxdeltachainspan' in opts:
452 if b'maxdeltachainspan' in opts:
453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
455 mmapindexthreshold = opts[b'mmapindexthreshold']
455 mmapindexthreshold = opts[b'mmapindexthreshold']
456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
457 withsparseread = bool(opts.get(b'with-sparse-read', False))
457 withsparseread = bool(opts.get(b'with-sparse-read', False))
458 # sparse-revlog forces sparse-read
458 # sparse-revlog forces sparse-read
459 self._withsparseread = self._sparserevlog or withsparseread
459 self._withsparseread = self._sparserevlog or withsparseread
460 if b'sparse-read-density-threshold' in opts:
460 if b'sparse-read-density-threshold' in opts:
461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
462 if b'sparse-read-min-gap-size' in opts:
462 if b'sparse-read-min-gap-size' in opts:
463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
464 if opts.get(b'enableellipsis'):
464 if opts.get(b'enableellipsis'):
465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
466
466
467 # revlog v0 doesn't have flag processors
467 # revlog v0 doesn't have flag processors
468 for flag, processor in opts.get(b'flagprocessors', {}).items():
468 for flag, processor in opts.get(b'flagprocessors', {}).items():
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470
470
471 if self._chunkcachesize <= 0:
471 if self._chunkcachesize <= 0:
472 raise error.RevlogError(
472 raise error.RevlogError(
473 _(b'revlog chunk cache size %r is not greater than 0')
473 _(b'revlog chunk cache size %r is not greater than 0')
474 % self._chunkcachesize
474 % self._chunkcachesize
475 )
475 )
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 raise error.RevlogError(
477 raise error.RevlogError(
478 _(b'revlog chunk cache size %r is not a power of 2')
478 _(b'revlog chunk cache size %r is not a power of 2')
479 % self._chunkcachesize
479 % self._chunkcachesize
480 )
480 )
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 return new_header, mmapindexthreshold, force_nodemap
482 return new_header, mmapindexthreshold, force_nodemap
483
483
484 def _get_data(self, filepath, mmap_threshold, size=None):
484 def _get_data(self, filepath, mmap_threshold, size=None):
485 """return a file content with or without mmap
485 """return a file content with or without mmap
486
486
487 If the file is missing return the empty string"""
487 If the file is missing return the empty string"""
488 try:
488 try:
489 with self.opener(filepath) as fp:
489 with self.opener(filepath) as fp:
490 if mmap_threshold is not None:
490 if mmap_threshold is not None:
491 file_size = self.opener.fstat(fp).st_size
491 file_size = self.opener.fstat(fp).st_size
492 if file_size >= mmap_threshold:
492 if file_size >= mmap_threshold:
493 if size is not None:
493 if size is not None:
494 # avoid potentiel mmap crash
494 # avoid potentiel mmap crash
495 size = min(file_size, size)
495 size = min(file_size, size)
496 # TODO: should .close() to release resources without
496 # TODO: should .close() to release resources without
497 # relying on Python GC
497 # relying on Python GC
498 if size is None:
498 if size is None:
499 return util.buffer(util.mmapread(fp))
499 return util.buffer(util.mmapread(fp))
500 else:
500 else:
501 return util.buffer(util.mmapread(fp, size))
501 return util.buffer(util.mmapread(fp, size))
502 if size is None:
502 if size is None:
503 return fp.read()
503 return fp.read()
504 else:
504 else:
505 return fp.read(size)
505 return fp.read(size)
506 except FileNotFoundError:
506 except FileNotFoundError:
507 return b''
507 return b''
508
508
509 def get_streams(self, max_linkrev):
509 def get_streams(self, max_linkrev, force_inline=False):
510 n = len(self)
510 n = len(self)
511 index = self.index
511 index = self.index
512 while n > 0:
512 while n > 0:
513 linkrev = index[n - 1][4]
513 linkrev = index[n - 1][4]
514 if linkrev < max_linkrev:
514 if linkrev < max_linkrev:
515 break
515 break
516 # note: this loop will rarely go through multiple iterations, since
516 # note: this loop will rarely go through multiple iterations, since
517 # it only traverses commits created during the current streaming
517 # it only traverses commits created during the current streaming
518 # pull operation.
518 # pull operation.
519 #
519 #
520 # If this become a problem, using a binary search should cap the
520 # If this become a problem, using a binary search should cap the
521 # runtime of this.
521 # runtime of this.
522 n = n - 1
522 n = n - 1
523 if n == 0:
523 if n == 0:
524 # no data to send
524 # no data to send
525 return []
525 return []
526 index_size = n * index.entry_size
526 index_size = n * index.entry_size
527 data_size = self.end(n - 1)
527 data_size = self.end(n - 1)
528
528
529 # XXX we might have been split (or stripped) since the object
529 # XXX we might have been split (or stripped) since the object
530 # initialization, We need to close this race too, but having a way to
530 # initialization, We need to close this race too, but having a way to
531 # pre-open the file we feed to the revlog and never closing them before
531 # pre-open the file we feed to the revlog and never closing them before
532 # we are done streaming.
532 # we are done streaming.
533
533
534 if self._inline:
534 if self._inline:
535
535
536 def get_stream():
536 def get_stream():
537 with self._indexfp() as fp:
537 with self._indexfp() as fp:
538 yield None
538 yield None
539 size = index_size + data_size
539 size = index_size + data_size
540 if size <= 65536:
540 if size <= 65536:
541 yield fp.read(size)
541 yield fp.read(size)
542 else:
542 else:
543 yield from util.filechunkiter(fp, limit=size)
543 yield from util.filechunkiter(fp, limit=size)
544
544
545 inline_stream = get_stream()
545 inline_stream = get_stream()
546 next(inline_stream)
546 next(inline_stream)
547 return [
547 return [
548 (self._indexfile, inline_stream, index_size + data_size),
548 (self._indexfile, inline_stream, index_size + data_size),
549 ]
549 ]
550 elif force_inline:
551
552 def get_stream():
553 with self._datafp() as fp_d:
554 yield None
555
556 for rev in range(n):
557 idx = self.index.entry_binary(rev)
558 if rev == 0 and self._docket is None:
559 # re-inject the inline flag
560 header = self._format_flags
561 header |= self._format_version
562 header |= FLAG_INLINE_DATA
563 header = self.index.pack_header(header)
564 idx = header + idx
565 yield idx
566 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
567
568 inline_stream = get_stream()
569 next(inline_stream)
570 return [
571 (self._indexfile, inline_stream, index_size + data_size),
572 ]
550 else:
573 else:
551
574
552 def get_index_stream():
575 def get_index_stream():
553 with self._indexfp() as fp:
576 with self._indexfp() as fp:
554 yield None
577 yield None
555 if index_size <= 65536:
578 if index_size <= 65536:
556 yield fp.read(index_size)
579 yield fp.read(index_size)
557 else:
580 else:
558 yield from util.filechunkiter(fp, limit=index_size)
581 yield from util.filechunkiter(fp, limit=index_size)
559
582
560 def get_data_stream():
583 def get_data_stream():
561 with self._datafp() as fp:
584 with self._datafp() as fp:
562 yield None
585 yield None
563 if data_size <= 65536:
586 if data_size <= 65536:
564 yield fp.read(data_size)
587 yield fp.read(data_size)
565 else:
588 else:
566 yield from util.filechunkiter(fp, limit=data_size)
589 yield from util.filechunkiter(fp, limit=data_size)
567
590
568 index_stream = get_index_stream()
591 index_stream = get_index_stream()
569 next(index_stream)
592 next(index_stream)
570 data_stream = get_data_stream()
593 data_stream = get_data_stream()
571 next(data_stream)
594 next(data_stream)
572 return [
595 return [
573 (self._datafile, data_stream, data_size),
596 (self._datafile, data_stream, data_size),
574 (self._indexfile, index_stream, index_size),
597 (self._indexfile, index_stream, index_size),
575 ]
598 ]
576
599
577 def _loadindex(self, docket=None):
600 def _loadindex(self, docket=None):
578
601
579 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
602 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
580
603
581 if self.postfix is not None:
604 if self.postfix is not None:
582 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
605 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
583 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
606 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
584 entry_point = b'%s.i.a' % self.radix
607 entry_point = b'%s.i.a' % self.radix
585 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
608 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
586 entry_point = b'%s.i.s' % self.radix
609 entry_point = b'%s.i.s' % self.radix
587 else:
610 else:
588 entry_point = b'%s.i' % self.radix
611 entry_point = b'%s.i' % self.radix
589
612
590 if docket is not None:
613 if docket is not None:
591 self._docket = docket
614 self._docket = docket
592 self._docket_file = entry_point
615 self._docket_file = entry_point
593 else:
616 else:
594 self._initempty = True
617 self._initempty = True
595 entry_data = self._get_data(entry_point, mmapindexthreshold)
618 entry_data = self._get_data(entry_point, mmapindexthreshold)
596 if len(entry_data) > 0:
619 if len(entry_data) > 0:
597 header = INDEX_HEADER.unpack(entry_data[:4])[0]
620 header = INDEX_HEADER.unpack(entry_data[:4])[0]
598 self._initempty = False
621 self._initempty = False
599 else:
622 else:
600 header = new_header
623 header = new_header
601
624
602 self._format_flags = header & ~0xFFFF
625 self._format_flags = header & ~0xFFFF
603 self._format_version = header & 0xFFFF
626 self._format_version = header & 0xFFFF
604
627
605 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
628 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
606 if supported_flags is None:
629 if supported_flags is None:
607 msg = _(b'unknown version (%d) in revlog %s')
630 msg = _(b'unknown version (%d) in revlog %s')
608 msg %= (self._format_version, self.display_id)
631 msg %= (self._format_version, self.display_id)
609 raise error.RevlogError(msg)
632 raise error.RevlogError(msg)
610 elif self._format_flags & ~supported_flags:
633 elif self._format_flags & ~supported_flags:
611 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
634 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
612 display_flag = self._format_flags >> 16
635 display_flag = self._format_flags >> 16
613 msg %= (display_flag, self._format_version, self.display_id)
636 msg %= (display_flag, self._format_version, self.display_id)
614 raise error.RevlogError(msg)
637 raise error.RevlogError(msg)
615
638
616 features = FEATURES_BY_VERSION[self._format_version]
639 features = FEATURES_BY_VERSION[self._format_version]
617 self._inline = features[b'inline'](self._format_flags)
640 self._inline = features[b'inline'](self._format_flags)
618 self._generaldelta = features[b'generaldelta'](self._format_flags)
641 self._generaldelta = features[b'generaldelta'](self._format_flags)
619 self.hassidedata = features[b'sidedata']
642 self.hassidedata = features[b'sidedata']
620
643
621 if not features[b'docket']:
644 if not features[b'docket']:
622 self._indexfile = entry_point
645 self._indexfile = entry_point
623 index_data = entry_data
646 index_data = entry_data
624 else:
647 else:
625 self._docket_file = entry_point
648 self._docket_file = entry_point
626 if self._initempty:
649 if self._initempty:
627 self._docket = docketutil.default_docket(self, header)
650 self._docket = docketutil.default_docket(self, header)
628 else:
651 else:
629 self._docket = docketutil.parse_docket(
652 self._docket = docketutil.parse_docket(
630 self, entry_data, use_pending=self._trypending
653 self, entry_data, use_pending=self._trypending
631 )
654 )
632
655
633 if self._docket is not None:
656 if self._docket is not None:
634 self._indexfile = self._docket.index_filepath()
657 self._indexfile = self._docket.index_filepath()
635 index_data = b''
658 index_data = b''
636 index_size = self._docket.index_end
659 index_size = self._docket.index_end
637 if index_size > 0:
660 if index_size > 0:
638 index_data = self._get_data(
661 index_data = self._get_data(
639 self._indexfile, mmapindexthreshold, size=index_size
662 self._indexfile, mmapindexthreshold, size=index_size
640 )
663 )
641 if len(index_data) < index_size:
664 if len(index_data) < index_size:
642 msg = _(b'too few index data for %s: got %d, expected %d')
665 msg = _(b'too few index data for %s: got %d, expected %d')
643 msg %= (self.display_id, len(index_data), index_size)
666 msg %= (self.display_id, len(index_data), index_size)
644 raise error.RevlogError(msg)
667 raise error.RevlogError(msg)
645
668
646 self._inline = False
669 self._inline = False
647 # generaldelta implied by version 2 revlogs.
670 # generaldelta implied by version 2 revlogs.
648 self._generaldelta = True
671 self._generaldelta = True
649 # the logic for persistent nodemap will be dealt with within the
672 # the logic for persistent nodemap will be dealt with within the
650 # main docket, so disable it for now.
673 # main docket, so disable it for now.
651 self._nodemap_file = None
674 self._nodemap_file = None
652
675
653 if self._docket is not None:
676 if self._docket is not None:
654 self._datafile = self._docket.data_filepath()
677 self._datafile = self._docket.data_filepath()
655 self._sidedatafile = self._docket.sidedata_filepath()
678 self._sidedatafile = self._docket.sidedata_filepath()
656 elif self.postfix is None:
679 elif self.postfix is None:
657 self._datafile = b'%s.d' % self.radix
680 self._datafile = b'%s.d' % self.radix
658 else:
681 else:
659 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
682 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
660
683
661 self.nodeconstants = sha1nodeconstants
684 self.nodeconstants = sha1nodeconstants
662 self.nullid = self.nodeconstants.nullid
685 self.nullid = self.nodeconstants.nullid
663
686
664 # sparse-revlog can't be on without general-delta (issue6056)
687 # sparse-revlog can't be on without general-delta (issue6056)
665 if not self._generaldelta:
688 if not self._generaldelta:
666 self._sparserevlog = False
689 self._sparserevlog = False
667
690
668 self._storedeltachains = True
691 self._storedeltachains = True
669
692
670 devel_nodemap = (
693 devel_nodemap = (
671 self._nodemap_file
694 self._nodemap_file
672 and force_nodemap
695 and force_nodemap
673 and parse_index_v1_nodemap is not None
696 and parse_index_v1_nodemap is not None
674 )
697 )
675
698
676 use_rust_index = False
699 use_rust_index = False
677 if rustrevlog is not None:
700 if rustrevlog is not None:
678 if self._nodemap_file is not None:
701 if self._nodemap_file is not None:
679 use_rust_index = True
702 use_rust_index = True
680 else:
703 else:
681 use_rust_index = self.opener.options.get(b'rust.index')
704 use_rust_index = self.opener.options.get(b'rust.index')
682
705
683 self._parse_index = parse_index_v1
706 self._parse_index = parse_index_v1
684 if self._format_version == REVLOGV0:
707 if self._format_version == REVLOGV0:
685 self._parse_index = revlogv0.parse_index_v0
708 self._parse_index = revlogv0.parse_index_v0
686 elif self._format_version == REVLOGV2:
709 elif self._format_version == REVLOGV2:
687 self._parse_index = parse_index_v2
710 self._parse_index = parse_index_v2
688 elif self._format_version == CHANGELOGV2:
711 elif self._format_version == CHANGELOGV2:
689 self._parse_index = parse_index_cl_v2
712 self._parse_index = parse_index_cl_v2
690 elif devel_nodemap:
713 elif devel_nodemap:
691 self._parse_index = parse_index_v1_nodemap
714 self._parse_index = parse_index_v1_nodemap
692 elif use_rust_index:
715 elif use_rust_index:
693 self._parse_index = parse_index_v1_mixed
716 self._parse_index = parse_index_v1_mixed
694 try:
717 try:
695 d = self._parse_index(index_data, self._inline)
718 d = self._parse_index(index_data, self._inline)
696 index, chunkcache = d
719 index, chunkcache = d
697 use_nodemap = (
720 use_nodemap = (
698 not self._inline
721 not self._inline
699 and self._nodemap_file is not None
722 and self._nodemap_file is not None
700 and util.safehasattr(index, 'update_nodemap_data')
723 and util.safehasattr(index, 'update_nodemap_data')
701 )
724 )
702 if use_nodemap:
725 if use_nodemap:
703 nodemap_data = nodemaputil.persisted_data(self)
726 nodemap_data = nodemaputil.persisted_data(self)
704 if nodemap_data is not None:
727 if nodemap_data is not None:
705 docket = nodemap_data[0]
728 docket = nodemap_data[0]
706 if (
729 if (
707 len(d[0]) > docket.tip_rev
730 len(d[0]) > docket.tip_rev
708 and d[0][docket.tip_rev][7] == docket.tip_node
731 and d[0][docket.tip_rev][7] == docket.tip_node
709 ):
732 ):
710 # no changelog tampering
733 # no changelog tampering
711 self._nodemap_docket = docket
734 self._nodemap_docket = docket
712 index.update_nodemap_data(*nodemap_data)
735 index.update_nodemap_data(*nodemap_data)
713 except (ValueError, IndexError):
736 except (ValueError, IndexError):
714 raise error.RevlogError(
737 raise error.RevlogError(
715 _(b"index %s is corrupted") % self.display_id
738 _(b"index %s is corrupted") % self.display_id
716 )
739 )
717 self.index = index
740 self.index = index
718 self._segmentfile = randomaccessfile.randomaccessfile(
741 self._segmentfile = randomaccessfile.randomaccessfile(
719 self.opener,
742 self.opener,
720 (self._indexfile if self._inline else self._datafile),
743 (self._indexfile if self._inline else self._datafile),
721 self._chunkcachesize,
744 self._chunkcachesize,
722 chunkcache,
745 chunkcache,
723 )
746 )
724 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
747 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
725 self.opener,
748 self.opener,
726 self._sidedatafile,
749 self._sidedatafile,
727 self._chunkcachesize,
750 self._chunkcachesize,
728 )
751 )
729 # revnum -> (chain-length, sum-delta-length)
752 # revnum -> (chain-length, sum-delta-length)
730 self._chaininfocache = util.lrucachedict(500)
753 self._chaininfocache = util.lrucachedict(500)
731 # revlog header -> revlog compressor
754 # revlog header -> revlog compressor
732 self._decompressors = {}
755 self._decompressors = {}
733
756
734 def get_revlog(self):
757 def get_revlog(self):
735 """simple function to mirror API of other not-really-revlog API"""
758 """simple function to mirror API of other not-really-revlog API"""
736 return self
759 return self
737
760
738 @util.propertycache
761 @util.propertycache
739 def revlog_kind(self):
762 def revlog_kind(self):
740 return self.target[0]
763 return self.target[0]
741
764
742 @util.propertycache
765 @util.propertycache
743 def display_id(self):
766 def display_id(self):
744 """The public facing "ID" of the revlog that we use in message"""
767 """The public facing "ID" of the revlog that we use in message"""
745 if self.revlog_kind == KIND_FILELOG:
768 if self.revlog_kind == KIND_FILELOG:
746 # Reference the file without the "data/" prefix, so it is familiar
769 # Reference the file without the "data/" prefix, so it is familiar
747 # to the user.
770 # to the user.
748 return self.target[1]
771 return self.target[1]
749 else:
772 else:
750 return self.radix
773 return self.radix
751
774
752 def _get_decompressor(self, t):
775 def _get_decompressor(self, t):
753 try:
776 try:
754 compressor = self._decompressors[t]
777 compressor = self._decompressors[t]
755 except KeyError:
778 except KeyError:
756 try:
779 try:
757 engine = util.compengines.forrevlogheader(t)
780 engine = util.compengines.forrevlogheader(t)
758 compressor = engine.revlogcompressor(self._compengineopts)
781 compressor = engine.revlogcompressor(self._compengineopts)
759 self._decompressors[t] = compressor
782 self._decompressors[t] = compressor
760 except KeyError:
783 except KeyError:
761 raise error.RevlogError(
784 raise error.RevlogError(
762 _(b'unknown compression type %s') % binascii.hexlify(t)
785 _(b'unknown compression type %s') % binascii.hexlify(t)
763 )
786 )
764 return compressor
787 return compressor
765
788
766 @util.propertycache
789 @util.propertycache
767 def _compressor(self):
790 def _compressor(self):
768 engine = util.compengines[self._compengine]
791 engine = util.compengines[self._compengine]
769 return engine.revlogcompressor(self._compengineopts)
792 return engine.revlogcompressor(self._compengineopts)
770
793
771 @util.propertycache
794 @util.propertycache
772 def _decompressor(self):
795 def _decompressor(self):
773 """the default decompressor"""
796 """the default decompressor"""
774 if self._docket is None:
797 if self._docket is None:
775 return None
798 return None
776 t = self._docket.default_compression_header
799 t = self._docket.default_compression_header
777 c = self._get_decompressor(t)
800 c = self._get_decompressor(t)
778 return c.decompress
801 return c.decompress
779
802
780 def _indexfp(self):
803 def _indexfp(self):
781 """file object for the revlog's index file"""
804 """file object for the revlog's index file"""
782 return self.opener(self._indexfile, mode=b"r")
805 return self.opener(self._indexfile, mode=b"r")
783
806
784 def __index_write_fp(self):
807 def __index_write_fp(self):
785 # You should not use this directly and use `_writing` instead
808 # You should not use this directly and use `_writing` instead
786 try:
809 try:
787 f = self.opener(
810 f = self.opener(
788 self._indexfile, mode=b"r+", checkambig=self._checkambig
811 self._indexfile, mode=b"r+", checkambig=self._checkambig
789 )
812 )
790 if self._docket is None:
813 if self._docket is None:
791 f.seek(0, os.SEEK_END)
814 f.seek(0, os.SEEK_END)
792 else:
815 else:
793 f.seek(self._docket.index_end, os.SEEK_SET)
816 f.seek(self._docket.index_end, os.SEEK_SET)
794 return f
817 return f
795 except FileNotFoundError:
818 except FileNotFoundError:
796 return self.opener(
819 return self.opener(
797 self._indexfile, mode=b"w+", checkambig=self._checkambig
820 self._indexfile, mode=b"w+", checkambig=self._checkambig
798 )
821 )
799
822
800 def __index_new_fp(self):
823 def __index_new_fp(self):
801 # You should not use this unless you are upgrading from inline revlog
824 # You should not use this unless you are upgrading from inline revlog
802 return self.opener(
825 return self.opener(
803 self._indexfile,
826 self._indexfile,
804 mode=b"w",
827 mode=b"w",
805 checkambig=self._checkambig,
828 checkambig=self._checkambig,
806 atomictemp=True,
829 atomictemp=True,
807 )
830 )
808
831
809 def _datafp(self, mode=b'r'):
832 def _datafp(self, mode=b'r'):
810 """file object for the revlog's data file"""
833 """file object for the revlog's data file"""
811 return self.opener(self._datafile, mode=mode)
834 return self.opener(self._datafile, mode=mode)
812
835
813 @contextlib.contextmanager
836 @contextlib.contextmanager
814 def _sidedatareadfp(self):
837 def _sidedatareadfp(self):
815 """file object suitable to read sidedata"""
838 """file object suitable to read sidedata"""
816 if self._writinghandles:
839 if self._writinghandles:
817 yield self._writinghandles[2]
840 yield self._writinghandles[2]
818 else:
841 else:
819 with self.opener(self._sidedatafile) as fp:
842 with self.opener(self._sidedatafile) as fp:
820 yield fp
843 yield fp
821
844
822 def tiprev(self):
845 def tiprev(self):
823 return len(self.index) - 1
846 return len(self.index) - 1
824
847
825 def tip(self):
848 def tip(self):
826 return self.node(self.tiprev())
849 return self.node(self.tiprev())
827
850
828 def __contains__(self, rev):
851 def __contains__(self, rev):
829 return 0 <= rev < len(self)
852 return 0 <= rev < len(self)
830
853
831 def __len__(self):
854 def __len__(self):
832 return len(self.index)
855 return len(self.index)
833
856
834 def __iter__(self):
857 def __iter__(self):
835 return iter(range(len(self)))
858 return iter(range(len(self)))
836
859
837 def revs(self, start=0, stop=None):
860 def revs(self, start=0, stop=None):
838 """iterate over all rev in this revlog (from start to stop)"""
861 """iterate over all rev in this revlog (from start to stop)"""
839 return storageutil.iterrevs(len(self), start=start, stop=stop)
862 return storageutil.iterrevs(len(self), start=start, stop=stop)
840
863
841 def hasnode(self, node):
864 def hasnode(self, node):
842 try:
865 try:
843 self.rev(node)
866 self.rev(node)
844 return True
867 return True
845 except KeyError:
868 except KeyError:
846 return False
869 return False
847
870
848 def candelta(self, baserev, rev):
871 def candelta(self, baserev, rev):
849 """whether two revisions (baserev, rev) can be delta-ed or not"""
872 """whether two revisions (baserev, rev) can be delta-ed or not"""
850 # Disable delta if either rev requires a content-changing flag
873 # Disable delta if either rev requires a content-changing flag
851 # processor (ex. LFS). This is because such flag processor can alter
874 # processor (ex. LFS). This is because such flag processor can alter
852 # the rawtext content that the delta will be based on, and two clients
875 # the rawtext content that the delta will be based on, and two clients
853 # could have a same revlog node with different flags (i.e. different
876 # could have a same revlog node with different flags (i.e. different
854 # rawtext contents) and the delta could be incompatible.
877 # rawtext contents) and the delta could be incompatible.
855 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
878 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
856 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
879 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
857 ):
880 ):
858 return False
881 return False
859 return True
882 return True
860
883
861 def update_caches(self, transaction):
884 def update_caches(self, transaction):
862 if self._nodemap_file is not None:
885 if self._nodemap_file is not None:
863 if transaction is None:
886 if transaction is None:
864 nodemaputil.update_persistent_nodemap(self)
887 nodemaputil.update_persistent_nodemap(self)
865 else:
888 else:
866 nodemaputil.setup_persistent_nodemap(transaction, self)
889 nodemaputil.setup_persistent_nodemap(transaction, self)
867
890
868 def clearcaches(self):
891 def clearcaches(self):
869 self._revisioncache = None
892 self._revisioncache = None
870 self._chainbasecache.clear()
893 self._chainbasecache.clear()
871 self._segmentfile.clear_cache()
894 self._segmentfile.clear_cache()
872 self._segmentfile_sidedata.clear_cache()
895 self._segmentfile_sidedata.clear_cache()
873 self._pcache = {}
896 self._pcache = {}
874 self._nodemap_docket = None
897 self._nodemap_docket = None
875 self.index.clearcaches()
898 self.index.clearcaches()
876 # The python code is the one responsible for validating the docket, we
899 # The python code is the one responsible for validating the docket, we
877 # end up having to refresh it here.
900 # end up having to refresh it here.
878 use_nodemap = (
901 use_nodemap = (
879 not self._inline
902 not self._inline
880 and self._nodemap_file is not None
903 and self._nodemap_file is not None
881 and util.safehasattr(self.index, 'update_nodemap_data')
904 and util.safehasattr(self.index, 'update_nodemap_data')
882 )
905 )
883 if use_nodemap:
906 if use_nodemap:
884 nodemap_data = nodemaputil.persisted_data(self)
907 nodemap_data = nodemaputil.persisted_data(self)
885 if nodemap_data is not None:
908 if nodemap_data is not None:
886 self._nodemap_docket = nodemap_data[0]
909 self._nodemap_docket = nodemap_data[0]
887 self.index.update_nodemap_data(*nodemap_data)
910 self.index.update_nodemap_data(*nodemap_data)
888
911
889 def rev(self, node):
912 def rev(self, node):
890 try:
913 try:
891 return self.index.rev(node)
914 return self.index.rev(node)
892 except TypeError:
915 except TypeError:
893 raise
916 raise
894 except error.RevlogError:
917 except error.RevlogError:
895 # parsers.c radix tree lookup failed
918 # parsers.c radix tree lookup failed
896 if (
919 if (
897 node == self.nodeconstants.wdirid
920 node == self.nodeconstants.wdirid
898 or node in self.nodeconstants.wdirfilenodeids
921 or node in self.nodeconstants.wdirfilenodeids
899 ):
922 ):
900 raise error.WdirUnsupported
923 raise error.WdirUnsupported
901 raise error.LookupError(node, self.display_id, _(b'no node'))
924 raise error.LookupError(node, self.display_id, _(b'no node'))
902
925
903 # Accessors for index entries.
926 # Accessors for index entries.
904
927
905 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
928 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
906 # are flags.
929 # are flags.
907 def start(self, rev):
930 def start(self, rev):
908 return int(self.index[rev][0] >> 16)
931 return int(self.index[rev][0] >> 16)
909
932
910 def sidedata_cut_off(self, rev):
933 def sidedata_cut_off(self, rev):
911 sd_cut_off = self.index[rev][8]
934 sd_cut_off = self.index[rev][8]
912 if sd_cut_off != 0:
935 if sd_cut_off != 0:
913 return sd_cut_off
936 return sd_cut_off
914 # This is some annoying dance, because entries without sidedata
937 # This is some annoying dance, because entries without sidedata
915 # currently use 0 as their ofsset. (instead of previous-offset +
938 # currently use 0 as their ofsset. (instead of previous-offset +
916 # previous-size)
939 # previous-size)
917 #
940 #
918 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
941 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
919 # In the meantime, we need this.
942 # In the meantime, we need this.
920 while 0 <= rev:
943 while 0 <= rev:
921 e = self.index[rev]
944 e = self.index[rev]
922 if e[9] != 0:
945 if e[9] != 0:
923 return e[8] + e[9]
946 return e[8] + e[9]
924 rev -= 1
947 rev -= 1
925 return 0
948 return 0
926
949
927 def flags(self, rev):
950 def flags(self, rev):
928 return self.index[rev][0] & 0xFFFF
951 return self.index[rev][0] & 0xFFFF
929
952
930 def length(self, rev):
953 def length(self, rev):
931 return self.index[rev][1]
954 return self.index[rev][1]
932
955
933 def sidedata_length(self, rev):
956 def sidedata_length(self, rev):
934 if not self.hassidedata:
957 if not self.hassidedata:
935 return 0
958 return 0
936 return self.index[rev][9]
959 return self.index[rev][9]
937
960
938 def rawsize(self, rev):
961 def rawsize(self, rev):
939 """return the length of the uncompressed text for a given revision"""
962 """return the length of the uncompressed text for a given revision"""
940 l = self.index[rev][2]
963 l = self.index[rev][2]
941 if l >= 0:
964 if l >= 0:
942 return l
965 return l
943
966
944 t = self.rawdata(rev)
967 t = self.rawdata(rev)
945 return len(t)
968 return len(t)
946
969
947 def size(self, rev):
970 def size(self, rev):
948 """length of non-raw text (processed by a "read" flag processor)"""
971 """length of non-raw text (processed by a "read" flag processor)"""
949 # fast path: if no "read" flag processor could change the content,
972 # fast path: if no "read" flag processor could change the content,
950 # size is rawsize. note: ELLIPSIS is known to not change the content.
973 # size is rawsize. note: ELLIPSIS is known to not change the content.
951 flags = self.flags(rev)
974 flags = self.flags(rev)
952 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
975 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
953 return self.rawsize(rev)
976 return self.rawsize(rev)
954
977
955 return len(self.revision(rev))
978 return len(self.revision(rev))
956
979
957 def fast_rank(self, rev):
980 def fast_rank(self, rev):
958 """Return the rank of a revision if already known, or None otherwise.
981 """Return the rank of a revision if already known, or None otherwise.
959
982
960 The rank of a revision is the size of the sub-graph it defines as a
983 The rank of a revision is the size of the sub-graph it defines as a
961 head. Equivalently, the rank of a revision `r` is the size of the set
984 head. Equivalently, the rank of a revision `r` is the size of the set
962 `ancestors(r)`, `r` included.
985 `ancestors(r)`, `r` included.
963
986
964 This method returns the rank retrieved from the revlog in constant
987 This method returns the rank retrieved from the revlog in constant
965 time. It makes no attempt at computing unknown values for versions of
988 time. It makes no attempt at computing unknown values for versions of
966 the revlog which do not persist the rank.
989 the revlog which do not persist the rank.
967 """
990 """
968 rank = self.index[rev][ENTRY_RANK]
991 rank = self.index[rev][ENTRY_RANK]
969 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
992 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
970 return None
993 return None
971 if rev == nullrev:
994 if rev == nullrev:
972 return 0 # convention
995 return 0 # convention
973 return rank
996 return rank
974
997
975 def chainbase(self, rev):
998 def chainbase(self, rev):
976 base = self._chainbasecache.get(rev)
999 base = self._chainbasecache.get(rev)
977 if base is not None:
1000 if base is not None:
978 return base
1001 return base
979
1002
980 index = self.index
1003 index = self.index
981 iterrev = rev
1004 iterrev = rev
982 base = index[iterrev][3]
1005 base = index[iterrev][3]
983 while base != iterrev:
1006 while base != iterrev:
984 iterrev = base
1007 iterrev = base
985 base = index[iterrev][3]
1008 base = index[iterrev][3]
986
1009
987 self._chainbasecache[rev] = base
1010 self._chainbasecache[rev] = base
988 return base
1011 return base
989
1012
990 def linkrev(self, rev):
1013 def linkrev(self, rev):
991 return self.index[rev][4]
1014 return self.index[rev][4]
992
1015
993 def parentrevs(self, rev):
1016 def parentrevs(self, rev):
994 try:
1017 try:
995 entry = self.index[rev]
1018 entry = self.index[rev]
996 except IndexError:
1019 except IndexError:
997 if rev == wdirrev:
1020 if rev == wdirrev:
998 raise error.WdirUnsupported
1021 raise error.WdirUnsupported
999 raise
1022 raise
1000
1023
1001 if self.canonical_parent_order and entry[5] == nullrev:
1024 if self.canonical_parent_order and entry[5] == nullrev:
1002 return entry[6], entry[5]
1025 return entry[6], entry[5]
1003 else:
1026 else:
1004 return entry[5], entry[6]
1027 return entry[5], entry[6]
1005
1028
1006 # fast parentrevs(rev) where rev isn't filtered
1029 # fast parentrevs(rev) where rev isn't filtered
1007 _uncheckedparentrevs = parentrevs
1030 _uncheckedparentrevs = parentrevs
1008
1031
1009 def node(self, rev):
1032 def node(self, rev):
1010 try:
1033 try:
1011 return self.index[rev][7]
1034 return self.index[rev][7]
1012 except IndexError:
1035 except IndexError:
1013 if rev == wdirrev:
1036 if rev == wdirrev:
1014 raise error.WdirUnsupported
1037 raise error.WdirUnsupported
1015 raise
1038 raise
1016
1039
1017 # Derived from index values.
1040 # Derived from index values.
1018
1041
1019 def end(self, rev):
1042 def end(self, rev):
1020 return self.start(rev) + self.length(rev)
1043 return self.start(rev) + self.length(rev)
1021
1044
1022 def parents(self, node):
1045 def parents(self, node):
1023 i = self.index
1046 i = self.index
1024 d = i[self.rev(node)]
1047 d = i[self.rev(node)]
1025 # inline node() to avoid function call overhead
1048 # inline node() to avoid function call overhead
1026 if self.canonical_parent_order and d[5] == self.nullid:
1049 if self.canonical_parent_order and d[5] == self.nullid:
1027 return i[d[6]][7], i[d[5]][7]
1050 return i[d[6]][7], i[d[5]][7]
1028 else:
1051 else:
1029 return i[d[5]][7], i[d[6]][7]
1052 return i[d[5]][7], i[d[6]][7]
1030
1053
1031 def chainlen(self, rev):
1054 def chainlen(self, rev):
1032 return self._chaininfo(rev)[0]
1055 return self._chaininfo(rev)[0]
1033
1056
1034 def _chaininfo(self, rev):
1057 def _chaininfo(self, rev):
1035 chaininfocache = self._chaininfocache
1058 chaininfocache = self._chaininfocache
1036 if rev in chaininfocache:
1059 if rev in chaininfocache:
1037 return chaininfocache[rev]
1060 return chaininfocache[rev]
1038 index = self.index
1061 index = self.index
1039 generaldelta = self._generaldelta
1062 generaldelta = self._generaldelta
1040 iterrev = rev
1063 iterrev = rev
1041 e = index[iterrev]
1064 e = index[iterrev]
1042 clen = 0
1065 clen = 0
1043 compresseddeltalen = 0
1066 compresseddeltalen = 0
1044 while iterrev != e[3]:
1067 while iterrev != e[3]:
1045 clen += 1
1068 clen += 1
1046 compresseddeltalen += e[1]
1069 compresseddeltalen += e[1]
1047 if generaldelta:
1070 if generaldelta:
1048 iterrev = e[3]
1071 iterrev = e[3]
1049 else:
1072 else:
1050 iterrev -= 1
1073 iterrev -= 1
1051 if iterrev in chaininfocache:
1074 if iterrev in chaininfocache:
1052 t = chaininfocache[iterrev]
1075 t = chaininfocache[iterrev]
1053 clen += t[0]
1076 clen += t[0]
1054 compresseddeltalen += t[1]
1077 compresseddeltalen += t[1]
1055 break
1078 break
1056 e = index[iterrev]
1079 e = index[iterrev]
1057 else:
1080 else:
1058 # Add text length of base since decompressing that also takes
1081 # Add text length of base since decompressing that also takes
1059 # work. For cache hits the length is already included.
1082 # work. For cache hits the length is already included.
1060 compresseddeltalen += e[1]
1083 compresseddeltalen += e[1]
1061 r = (clen, compresseddeltalen)
1084 r = (clen, compresseddeltalen)
1062 chaininfocache[rev] = r
1085 chaininfocache[rev] = r
1063 return r
1086 return r
1064
1087
1065 def _deltachain(self, rev, stoprev=None):
1088 def _deltachain(self, rev, stoprev=None):
1066 """Obtain the delta chain for a revision.
1089 """Obtain the delta chain for a revision.
1067
1090
1068 ``stoprev`` specifies a revision to stop at. If not specified, we
1091 ``stoprev`` specifies a revision to stop at. If not specified, we
1069 stop at the base of the chain.
1092 stop at the base of the chain.
1070
1093
1071 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1094 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1072 revs in ascending order and ``stopped`` is a bool indicating whether
1095 revs in ascending order and ``stopped`` is a bool indicating whether
1073 ``stoprev`` was hit.
1096 ``stoprev`` was hit.
1074 """
1097 """
1075 # Try C implementation.
1098 # Try C implementation.
1076 try:
1099 try:
1077 return self.index.deltachain(rev, stoprev, self._generaldelta)
1100 return self.index.deltachain(rev, stoprev, self._generaldelta)
1078 except AttributeError:
1101 except AttributeError:
1079 pass
1102 pass
1080
1103
1081 chain = []
1104 chain = []
1082
1105
1083 # Alias to prevent attribute lookup in tight loop.
1106 # Alias to prevent attribute lookup in tight loop.
1084 index = self.index
1107 index = self.index
1085 generaldelta = self._generaldelta
1108 generaldelta = self._generaldelta
1086
1109
1087 iterrev = rev
1110 iterrev = rev
1088 e = index[iterrev]
1111 e = index[iterrev]
1089 while iterrev != e[3] and iterrev != stoprev:
1112 while iterrev != e[3] and iterrev != stoprev:
1090 chain.append(iterrev)
1113 chain.append(iterrev)
1091 if generaldelta:
1114 if generaldelta:
1092 iterrev = e[3]
1115 iterrev = e[3]
1093 else:
1116 else:
1094 iterrev -= 1
1117 iterrev -= 1
1095 e = index[iterrev]
1118 e = index[iterrev]
1096
1119
1097 if iterrev == stoprev:
1120 if iterrev == stoprev:
1098 stopped = True
1121 stopped = True
1099 else:
1122 else:
1100 chain.append(iterrev)
1123 chain.append(iterrev)
1101 stopped = False
1124 stopped = False
1102
1125
1103 chain.reverse()
1126 chain.reverse()
1104 return chain, stopped
1127 return chain, stopped
1105
1128
1106 def ancestors(self, revs, stoprev=0, inclusive=False):
1129 def ancestors(self, revs, stoprev=0, inclusive=False):
1107 """Generate the ancestors of 'revs' in reverse revision order.
1130 """Generate the ancestors of 'revs' in reverse revision order.
1108 Does not generate revs lower than stoprev.
1131 Does not generate revs lower than stoprev.
1109
1132
1110 See the documentation for ancestor.lazyancestors for more details."""
1133 See the documentation for ancestor.lazyancestors for more details."""
1111
1134
1112 # first, make sure start revisions aren't filtered
1135 # first, make sure start revisions aren't filtered
1113 revs = list(revs)
1136 revs = list(revs)
1114 checkrev = self.node
1137 checkrev = self.node
1115 for r in revs:
1138 for r in revs:
1116 checkrev(r)
1139 checkrev(r)
1117 # and we're sure ancestors aren't filtered as well
1140 # and we're sure ancestors aren't filtered as well
1118
1141
1119 if rustancestor is not None and self.index.rust_ext_compat:
1142 if rustancestor is not None and self.index.rust_ext_compat:
1120 lazyancestors = rustancestor.LazyAncestors
1143 lazyancestors = rustancestor.LazyAncestors
1121 arg = self.index
1144 arg = self.index
1122 else:
1145 else:
1123 lazyancestors = ancestor.lazyancestors
1146 lazyancestors = ancestor.lazyancestors
1124 arg = self._uncheckedparentrevs
1147 arg = self._uncheckedparentrevs
1125 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1148 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1126
1149
1127 def descendants(self, revs):
1150 def descendants(self, revs):
1128 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1151 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1129
1152
1130 def findcommonmissing(self, common=None, heads=None):
1153 def findcommonmissing(self, common=None, heads=None):
1131 """Return a tuple of the ancestors of common and the ancestors of heads
1154 """Return a tuple of the ancestors of common and the ancestors of heads
1132 that are not ancestors of common. In revset terminology, we return the
1155 that are not ancestors of common. In revset terminology, we return the
1133 tuple:
1156 tuple:
1134
1157
1135 ::common, (::heads) - (::common)
1158 ::common, (::heads) - (::common)
1136
1159
1137 The list is sorted by revision number, meaning it is
1160 The list is sorted by revision number, meaning it is
1138 topologically sorted.
1161 topologically sorted.
1139
1162
1140 'heads' and 'common' are both lists of node IDs. If heads is
1163 'heads' and 'common' are both lists of node IDs. If heads is
1141 not supplied, uses all of the revlog's heads. If common is not
1164 not supplied, uses all of the revlog's heads. If common is not
1142 supplied, uses nullid."""
1165 supplied, uses nullid."""
1143 if common is None:
1166 if common is None:
1144 common = [self.nullid]
1167 common = [self.nullid]
1145 if heads is None:
1168 if heads is None:
1146 heads = self.heads()
1169 heads = self.heads()
1147
1170
1148 common = [self.rev(n) for n in common]
1171 common = [self.rev(n) for n in common]
1149 heads = [self.rev(n) for n in heads]
1172 heads = [self.rev(n) for n in heads]
1150
1173
1151 # we want the ancestors, but inclusive
1174 # we want the ancestors, but inclusive
1152 class lazyset:
1175 class lazyset:
1153 def __init__(self, lazyvalues):
1176 def __init__(self, lazyvalues):
1154 self.addedvalues = set()
1177 self.addedvalues = set()
1155 self.lazyvalues = lazyvalues
1178 self.lazyvalues = lazyvalues
1156
1179
1157 def __contains__(self, value):
1180 def __contains__(self, value):
1158 return value in self.addedvalues or value in self.lazyvalues
1181 return value in self.addedvalues or value in self.lazyvalues
1159
1182
1160 def __iter__(self):
1183 def __iter__(self):
1161 added = self.addedvalues
1184 added = self.addedvalues
1162 for r in added:
1185 for r in added:
1163 yield r
1186 yield r
1164 for r in self.lazyvalues:
1187 for r in self.lazyvalues:
1165 if not r in added:
1188 if not r in added:
1166 yield r
1189 yield r
1167
1190
1168 def add(self, value):
1191 def add(self, value):
1169 self.addedvalues.add(value)
1192 self.addedvalues.add(value)
1170
1193
1171 def update(self, values):
1194 def update(self, values):
1172 self.addedvalues.update(values)
1195 self.addedvalues.update(values)
1173
1196
1174 has = lazyset(self.ancestors(common))
1197 has = lazyset(self.ancestors(common))
1175 has.add(nullrev)
1198 has.add(nullrev)
1176 has.update(common)
1199 has.update(common)
1177
1200
1178 # take all ancestors from heads that aren't in has
1201 # take all ancestors from heads that aren't in has
1179 missing = set()
1202 missing = set()
1180 visit = collections.deque(r for r in heads if r not in has)
1203 visit = collections.deque(r for r in heads if r not in has)
1181 while visit:
1204 while visit:
1182 r = visit.popleft()
1205 r = visit.popleft()
1183 if r in missing:
1206 if r in missing:
1184 continue
1207 continue
1185 else:
1208 else:
1186 missing.add(r)
1209 missing.add(r)
1187 for p in self.parentrevs(r):
1210 for p in self.parentrevs(r):
1188 if p not in has:
1211 if p not in has:
1189 visit.append(p)
1212 visit.append(p)
1190 missing = list(missing)
1213 missing = list(missing)
1191 missing.sort()
1214 missing.sort()
1192 return has, [self.node(miss) for miss in missing]
1215 return has, [self.node(miss) for miss in missing]
1193
1216
1194 def incrementalmissingrevs(self, common=None):
1217 def incrementalmissingrevs(self, common=None):
1195 """Return an object that can be used to incrementally compute the
1218 """Return an object that can be used to incrementally compute the
1196 revision numbers of the ancestors of arbitrary sets that are not
1219 revision numbers of the ancestors of arbitrary sets that are not
1197 ancestors of common. This is an ancestor.incrementalmissingancestors
1220 ancestors of common. This is an ancestor.incrementalmissingancestors
1198 object.
1221 object.
1199
1222
1200 'common' is a list of revision numbers. If common is not supplied, uses
1223 'common' is a list of revision numbers. If common is not supplied, uses
1201 nullrev.
1224 nullrev.
1202 """
1225 """
1203 if common is None:
1226 if common is None:
1204 common = [nullrev]
1227 common = [nullrev]
1205
1228
1206 if rustancestor is not None and self.index.rust_ext_compat:
1229 if rustancestor is not None and self.index.rust_ext_compat:
1207 return rustancestor.MissingAncestors(self.index, common)
1230 return rustancestor.MissingAncestors(self.index, common)
1208 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1231 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1209
1232
1210 def findmissingrevs(self, common=None, heads=None):
1233 def findmissingrevs(self, common=None, heads=None):
1211 """Return the revision numbers of the ancestors of heads that
1234 """Return the revision numbers of the ancestors of heads that
1212 are not ancestors of common.
1235 are not ancestors of common.
1213
1236
1214 More specifically, return a list of revision numbers corresponding to
1237 More specifically, return a list of revision numbers corresponding to
1215 nodes N such that every N satisfies the following constraints:
1238 nodes N such that every N satisfies the following constraints:
1216
1239
1217 1. N is an ancestor of some node in 'heads'
1240 1. N is an ancestor of some node in 'heads'
1218 2. N is not an ancestor of any node in 'common'
1241 2. N is not an ancestor of any node in 'common'
1219
1242
1220 The list is sorted by revision number, meaning it is
1243 The list is sorted by revision number, meaning it is
1221 topologically sorted.
1244 topologically sorted.
1222
1245
1223 'heads' and 'common' are both lists of revision numbers. If heads is
1246 'heads' and 'common' are both lists of revision numbers. If heads is
1224 not supplied, uses all of the revlog's heads. If common is not
1247 not supplied, uses all of the revlog's heads. If common is not
1225 supplied, uses nullid."""
1248 supplied, uses nullid."""
1226 if common is None:
1249 if common is None:
1227 common = [nullrev]
1250 common = [nullrev]
1228 if heads is None:
1251 if heads is None:
1229 heads = self.headrevs()
1252 heads = self.headrevs()
1230
1253
1231 inc = self.incrementalmissingrevs(common=common)
1254 inc = self.incrementalmissingrevs(common=common)
1232 return inc.missingancestors(heads)
1255 return inc.missingancestors(heads)
1233
1256
1234 def findmissing(self, common=None, heads=None):
1257 def findmissing(self, common=None, heads=None):
1235 """Return the ancestors of heads that are not ancestors of common.
1258 """Return the ancestors of heads that are not ancestors of common.
1236
1259
1237 More specifically, return a list of nodes N such that every N
1260 More specifically, return a list of nodes N such that every N
1238 satisfies the following constraints:
1261 satisfies the following constraints:
1239
1262
1240 1. N is an ancestor of some node in 'heads'
1263 1. N is an ancestor of some node in 'heads'
1241 2. N is not an ancestor of any node in 'common'
1264 2. N is not an ancestor of any node in 'common'
1242
1265
1243 The list is sorted by revision number, meaning it is
1266 The list is sorted by revision number, meaning it is
1244 topologically sorted.
1267 topologically sorted.
1245
1268
1246 'heads' and 'common' are both lists of node IDs. If heads is
1269 'heads' and 'common' are both lists of node IDs. If heads is
1247 not supplied, uses all of the revlog's heads. If common is not
1270 not supplied, uses all of the revlog's heads. If common is not
1248 supplied, uses nullid."""
1271 supplied, uses nullid."""
1249 if common is None:
1272 if common is None:
1250 common = [self.nullid]
1273 common = [self.nullid]
1251 if heads is None:
1274 if heads is None:
1252 heads = self.heads()
1275 heads = self.heads()
1253
1276
1254 common = [self.rev(n) for n in common]
1277 common = [self.rev(n) for n in common]
1255 heads = [self.rev(n) for n in heads]
1278 heads = [self.rev(n) for n in heads]
1256
1279
1257 inc = self.incrementalmissingrevs(common=common)
1280 inc = self.incrementalmissingrevs(common=common)
1258 return [self.node(r) for r in inc.missingancestors(heads)]
1281 return [self.node(r) for r in inc.missingancestors(heads)]
1259
1282
1260 def nodesbetween(self, roots=None, heads=None):
1283 def nodesbetween(self, roots=None, heads=None):
1261 """Return a topological path from 'roots' to 'heads'.
1284 """Return a topological path from 'roots' to 'heads'.
1262
1285
1263 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1286 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1264 topologically sorted list of all nodes N that satisfy both of
1287 topologically sorted list of all nodes N that satisfy both of
1265 these constraints:
1288 these constraints:
1266
1289
1267 1. N is a descendant of some node in 'roots'
1290 1. N is a descendant of some node in 'roots'
1268 2. N is an ancestor of some node in 'heads'
1291 2. N is an ancestor of some node in 'heads'
1269
1292
1270 Every node is considered to be both a descendant and an ancestor
1293 Every node is considered to be both a descendant and an ancestor
1271 of itself, so every reachable node in 'roots' and 'heads' will be
1294 of itself, so every reachable node in 'roots' and 'heads' will be
1272 included in 'nodes'.
1295 included in 'nodes'.
1273
1296
1274 'outroots' is the list of reachable nodes in 'roots', i.e., the
1297 'outroots' is the list of reachable nodes in 'roots', i.e., the
1275 subset of 'roots' that is returned in 'nodes'. Likewise,
1298 subset of 'roots' that is returned in 'nodes'. Likewise,
1276 'outheads' is the subset of 'heads' that is also in 'nodes'.
1299 'outheads' is the subset of 'heads' that is also in 'nodes'.
1277
1300
1278 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1301 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1279 unspecified, uses nullid as the only root. If 'heads' is
1302 unspecified, uses nullid as the only root. If 'heads' is
1280 unspecified, uses list of all of the revlog's heads."""
1303 unspecified, uses list of all of the revlog's heads."""
1281 nonodes = ([], [], [])
1304 nonodes = ([], [], [])
1282 if roots is not None:
1305 if roots is not None:
1283 roots = list(roots)
1306 roots = list(roots)
1284 if not roots:
1307 if not roots:
1285 return nonodes
1308 return nonodes
1286 lowestrev = min([self.rev(n) for n in roots])
1309 lowestrev = min([self.rev(n) for n in roots])
1287 else:
1310 else:
1288 roots = [self.nullid] # Everybody's a descendant of nullid
1311 roots = [self.nullid] # Everybody's a descendant of nullid
1289 lowestrev = nullrev
1312 lowestrev = nullrev
1290 if (lowestrev == nullrev) and (heads is None):
1313 if (lowestrev == nullrev) and (heads is None):
1291 # We want _all_ the nodes!
1314 # We want _all_ the nodes!
1292 return (
1315 return (
1293 [self.node(r) for r in self],
1316 [self.node(r) for r in self],
1294 [self.nullid],
1317 [self.nullid],
1295 list(self.heads()),
1318 list(self.heads()),
1296 )
1319 )
1297 if heads is None:
1320 if heads is None:
1298 # All nodes are ancestors, so the latest ancestor is the last
1321 # All nodes are ancestors, so the latest ancestor is the last
1299 # node.
1322 # node.
1300 highestrev = len(self) - 1
1323 highestrev = len(self) - 1
1301 # Set ancestors to None to signal that every node is an ancestor.
1324 # Set ancestors to None to signal that every node is an ancestor.
1302 ancestors = None
1325 ancestors = None
1303 # Set heads to an empty dictionary for later discovery of heads
1326 # Set heads to an empty dictionary for later discovery of heads
1304 heads = {}
1327 heads = {}
1305 else:
1328 else:
1306 heads = list(heads)
1329 heads = list(heads)
1307 if not heads:
1330 if not heads:
1308 return nonodes
1331 return nonodes
1309 ancestors = set()
1332 ancestors = set()
1310 # Turn heads into a dictionary so we can remove 'fake' heads.
1333 # Turn heads into a dictionary so we can remove 'fake' heads.
1311 # Also, later we will be using it to filter out the heads we can't
1334 # Also, later we will be using it to filter out the heads we can't
1312 # find from roots.
1335 # find from roots.
1313 heads = dict.fromkeys(heads, False)
1336 heads = dict.fromkeys(heads, False)
1314 # Start at the top and keep marking parents until we're done.
1337 # Start at the top and keep marking parents until we're done.
1315 nodestotag = set(heads)
1338 nodestotag = set(heads)
1316 # Remember where the top was so we can use it as a limit later.
1339 # Remember where the top was so we can use it as a limit later.
1317 highestrev = max([self.rev(n) for n in nodestotag])
1340 highestrev = max([self.rev(n) for n in nodestotag])
1318 while nodestotag:
1341 while nodestotag:
1319 # grab a node to tag
1342 # grab a node to tag
1320 n = nodestotag.pop()
1343 n = nodestotag.pop()
1321 # Never tag nullid
1344 # Never tag nullid
1322 if n == self.nullid:
1345 if n == self.nullid:
1323 continue
1346 continue
1324 # A node's revision number represents its place in a
1347 # A node's revision number represents its place in a
1325 # topologically sorted list of nodes.
1348 # topologically sorted list of nodes.
1326 r = self.rev(n)
1349 r = self.rev(n)
1327 if r >= lowestrev:
1350 if r >= lowestrev:
1328 if n not in ancestors:
1351 if n not in ancestors:
1329 # If we are possibly a descendant of one of the roots
1352 # If we are possibly a descendant of one of the roots
1330 # and we haven't already been marked as an ancestor
1353 # and we haven't already been marked as an ancestor
1331 ancestors.add(n) # Mark as ancestor
1354 ancestors.add(n) # Mark as ancestor
1332 # Add non-nullid parents to list of nodes to tag.
1355 # Add non-nullid parents to list of nodes to tag.
1333 nodestotag.update(
1356 nodestotag.update(
1334 [p for p in self.parents(n) if p != self.nullid]
1357 [p for p in self.parents(n) if p != self.nullid]
1335 )
1358 )
1336 elif n in heads: # We've seen it before, is it a fake head?
1359 elif n in heads: # We've seen it before, is it a fake head?
1337 # So it is, real heads should not be the ancestors of
1360 # So it is, real heads should not be the ancestors of
1338 # any other heads.
1361 # any other heads.
1339 heads.pop(n)
1362 heads.pop(n)
1340 if not ancestors:
1363 if not ancestors:
1341 return nonodes
1364 return nonodes
1342 # Now that we have our set of ancestors, we want to remove any
1365 # Now that we have our set of ancestors, we want to remove any
1343 # roots that are not ancestors.
1366 # roots that are not ancestors.
1344
1367
1345 # If one of the roots was nullid, everything is included anyway.
1368 # If one of the roots was nullid, everything is included anyway.
1346 if lowestrev > nullrev:
1369 if lowestrev > nullrev:
1347 # But, since we weren't, let's recompute the lowest rev to not
1370 # But, since we weren't, let's recompute the lowest rev to not
1348 # include roots that aren't ancestors.
1371 # include roots that aren't ancestors.
1349
1372
1350 # Filter out roots that aren't ancestors of heads
1373 # Filter out roots that aren't ancestors of heads
1351 roots = [root for root in roots if root in ancestors]
1374 roots = [root for root in roots if root in ancestors]
1352 # Recompute the lowest revision
1375 # Recompute the lowest revision
1353 if roots:
1376 if roots:
1354 lowestrev = min([self.rev(root) for root in roots])
1377 lowestrev = min([self.rev(root) for root in roots])
1355 else:
1378 else:
1356 # No more roots? Return empty list
1379 # No more roots? Return empty list
1357 return nonodes
1380 return nonodes
1358 else:
1381 else:
1359 # We are descending from nullid, and don't need to care about
1382 # We are descending from nullid, and don't need to care about
1360 # any other roots.
1383 # any other roots.
1361 lowestrev = nullrev
1384 lowestrev = nullrev
1362 roots = [self.nullid]
1385 roots = [self.nullid]
1363 # Transform our roots list into a set.
1386 # Transform our roots list into a set.
1364 descendants = set(roots)
1387 descendants = set(roots)
1365 # Also, keep the original roots so we can filter out roots that aren't
1388 # Also, keep the original roots so we can filter out roots that aren't
1366 # 'real' roots (i.e. are descended from other roots).
1389 # 'real' roots (i.e. are descended from other roots).
1367 roots = descendants.copy()
1390 roots = descendants.copy()
1368 # Our topologically sorted list of output nodes.
1391 # Our topologically sorted list of output nodes.
1369 orderedout = []
1392 orderedout = []
1370 # Don't start at nullid since we don't want nullid in our output list,
1393 # Don't start at nullid since we don't want nullid in our output list,
1371 # and if nullid shows up in descendants, empty parents will look like
1394 # and if nullid shows up in descendants, empty parents will look like
1372 # they're descendants.
1395 # they're descendants.
1373 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1396 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1374 n = self.node(r)
1397 n = self.node(r)
1375 isdescendant = False
1398 isdescendant = False
1376 if lowestrev == nullrev: # Everybody is a descendant of nullid
1399 if lowestrev == nullrev: # Everybody is a descendant of nullid
1377 isdescendant = True
1400 isdescendant = True
1378 elif n in descendants:
1401 elif n in descendants:
1379 # n is already a descendant
1402 # n is already a descendant
1380 isdescendant = True
1403 isdescendant = True
1381 # This check only needs to be done here because all the roots
1404 # This check only needs to be done here because all the roots
1382 # will start being marked is descendants before the loop.
1405 # will start being marked is descendants before the loop.
1383 if n in roots:
1406 if n in roots:
1384 # If n was a root, check if it's a 'real' root.
1407 # If n was a root, check if it's a 'real' root.
1385 p = tuple(self.parents(n))
1408 p = tuple(self.parents(n))
1386 # If any of its parents are descendants, it's not a root.
1409 # If any of its parents are descendants, it's not a root.
1387 if (p[0] in descendants) or (p[1] in descendants):
1410 if (p[0] in descendants) or (p[1] in descendants):
1388 roots.remove(n)
1411 roots.remove(n)
1389 else:
1412 else:
1390 p = tuple(self.parents(n))
1413 p = tuple(self.parents(n))
1391 # A node is a descendant if either of its parents are
1414 # A node is a descendant if either of its parents are
1392 # descendants. (We seeded the dependents list with the roots
1415 # descendants. (We seeded the dependents list with the roots
1393 # up there, remember?)
1416 # up there, remember?)
1394 if (p[0] in descendants) or (p[1] in descendants):
1417 if (p[0] in descendants) or (p[1] in descendants):
1395 descendants.add(n)
1418 descendants.add(n)
1396 isdescendant = True
1419 isdescendant = True
1397 if isdescendant and ((ancestors is None) or (n in ancestors)):
1420 if isdescendant and ((ancestors is None) or (n in ancestors)):
1398 # Only include nodes that are both descendants and ancestors.
1421 # Only include nodes that are both descendants and ancestors.
1399 orderedout.append(n)
1422 orderedout.append(n)
1400 if (ancestors is not None) and (n in heads):
1423 if (ancestors is not None) and (n in heads):
1401 # We're trying to figure out which heads are reachable
1424 # We're trying to figure out which heads are reachable
1402 # from roots.
1425 # from roots.
1403 # Mark this head as having been reached
1426 # Mark this head as having been reached
1404 heads[n] = True
1427 heads[n] = True
1405 elif ancestors is None:
1428 elif ancestors is None:
1406 # Otherwise, we're trying to discover the heads.
1429 # Otherwise, we're trying to discover the heads.
1407 # Assume this is a head because if it isn't, the next step
1430 # Assume this is a head because if it isn't, the next step
1408 # will eventually remove it.
1431 # will eventually remove it.
1409 heads[n] = True
1432 heads[n] = True
1410 # But, obviously its parents aren't.
1433 # But, obviously its parents aren't.
1411 for p in self.parents(n):
1434 for p in self.parents(n):
1412 heads.pop(p, None)
1435 heads.pop(p, None)
1413 heads = [head for head, flag in heads.items() if flag]
1436 heads = [head for head, flag in heads.items() if flag]
1414 roots = list(roots)
1437 roots = list(roots)
1415 assert orderedout
1438 assert orderedout
1416 assert roots
1439 assert roots
1417 assert heads
1440 assert heads
1418 return (orderedout, roots, heads)
1441 return (orderedout, roots, heads)
1419
1442
1420 def headrevs(self, revs=None):
1443 def headrevs(self, revs=None):
1421 if revs is None:
1444 if revs is None:
1422 try:
1445 try:
1423 return self.index.headrevs()
1446 return self.index.headrevs()
1424 except AttributeError:
1447 except AttributeError:
1425 return self._headrevs()
1448 return self._headrevs()
1426 if rustdagop is not None and self.index.rust_ext_compat:
1449 if rustdagop is not None and self.index.rust_ext_compat:
1427 return rustdagop.headrevs(self.index, revs)
1450 return rustdagop.headrevs(self.index, revs)
1428 return dagop.headrevs(revs, self._uncheckedparentrevs)
1451 return dagop.headrevs(revs, self._uncheckedparentrevs)
1429
1452
1430 def computephases(self, roots):
1453 def computephases(self, roots):
1431 return self.index.computephasesmapsets(roots)
1454 return self.index.computephasesmapsets(roots)
1432
1455
1433 def _headrevs(self):
1456 def _headrevs(self):
1434 count = len(self)
1457 count = len(self)
1435 if not count:
1458 if not count:
1436 return [nullrev]
1459 return [nullrev]
1437 # we won't iter over filtered rev so nobody is a head at start
1460 # we won't iter over filtered rev so nobody is a head at start
1438 ishead = [0] * (count + 1)
1461 ishead = [0] * (count + 1)
1439 index = self.index
1462 index = self.index
1440 for r in self:
1463 for r in self:
1441 ishead[r] = 1 # I may be an head
1464 ishead[r] = 1 # I may be an head
1442 e = index[r]
1465 e = index[r]
1443 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1466 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1444 return [r for r, val in enumerate(ishead) if val]
1467 return [r for r, val in enumerate(ishead) if val]
1445
1468
1446 def heads(self, start=None, stop=None):
1469 def heads(self, start=None, stop=None):
1447 """return the list of all nodes that have no children
1470 """return the list of all nodes that have no children
1448
1471
1449 if start is specified, only heads that are descendants of
1472 if start is specified, only heads that are descendants of
1450 start will be returned
1473 start will be returned
1451 if stop is specified, it will consider all the revs from stop
1474 if stop is specified, it will consider all the revs from stop
1452 as if they had no children
1475 as if they had no children
1453 """
1476 """
1454 if start is None and stop is None:
1477 if start is None and stop is None:
1455 if not len(self):
1478 if not len(self):
1456 return [self.nullid]
1479 return [self.nullid]
1457 return [self.node(r) for r in self.headrevs()]
1480 return [self.node(r) for r in self.headrevs()]
1458
1481
1459 if start is None:
1482 if start is None:
1460 start = nullrev
1483 start = nullrev
1461 else:
1484 else:
1462 start = self.rev(start)
1485 start = self.rev(start)
1463
1486
1464 stoprevs = {self.rev(n) for n in stop or []}
1487 stoprevs = {self.rev(n) for n in stop or []}
1465
1488
1466 revs = dagop.headrevssubset(
1489 revs = dagop.headrevssubset(
1467 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1490 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1468 )
1491 )
1469
1492
1470 return [self.node(rev) for rev in revs]
1493 return [self.node(rev) for rev in revs]
1471
1494
1472 def children(self, node):
1495 def children(self, node):
1473 """find the children of a given node"""
1496 """find the children of a given node"""
1474 c = []
1497 c = []
1475 p = self.rev(node)
1498 p = self.rev(node)
1476 for r in self.revs(start=p + 1):
1499 for r in self.revs(start=p + 1):
1477 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1500 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1478 if prevs:
1501 if prevs:
1479 for pr in prevs:
1502 for pr in prevs:
1480 if pr == p:
1503 if pr == p:
1481 c.append(self.node(r))
1504 c.append(self.node(r))
1482 elif p == nullrev:
1505 elif p == nullrev:
1483 c.append(self.node(r))
1506 c.append(self.node(r))
1484 return c
1507 return c
1485
1508
1486 def commonancestorsheads(self, a, b):
1509 def commonancestorsheads(self, a, b):
1487 """calculate all the heads of the common ancestors of nodes a and b"""
1510 """calculate all the heads of the common ancestors of nodes a and b"""
1488 a, b = self.rev(a), self.rev(b)
1511 a, b = self.rev(a), self.rev(b)
1489 ancs = self._commonancestorsheads(a, b)
1512 ancs = self._commonancestorsheads(a, b)
1490 return pycompat.maplist(self.node, ancs)
1513 return pycompat.maplist(self.node, ancs)
1491
1514
1492 def _commonancestorsheads(self, *revs):
1515 def _commonancestorsheads(self, *revs):
1493 """calculate all the heads of the common ancestors of revs"""
1516 """calculate all the heads of the common ancestors of revs"""
1494 try:
1517 try:
1495 ancs = self.index.commonancestorsheads(*revs)
1518 ancs = self.index.commonancestorsheads(*revs)
1496 except (AttributeError, OverflowError): # C implementation failed
1519 except (AttributeError, OverflowError): # C implementation failed
1497 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1520 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1498 return ancs
1521 return ancs
1499
1522
1500 def isancestor(self, a, b):
1523 def isancestor(self, a, b):
1501 """return True if node a is an ancestor of node b
1524 """return True if node a is an ancestor of node b
1502
1525
1503 A revision is considered an ancestor of itself."""
1526 A revision is considered an ancestor of itself."""
1504 a, b = self.rev(a), self.rev(b)
1527 a, b = self.rev(a), self.rev(b)
1505 return self.isancestorrev(a, b)
1528 return self.isancestorrev(a, b)
1506
1529
1507 def isancestorrev(self, a, b):
1530 def isancestorrev(self, a, b):
1508 """return True if revision a is an ancestor of revision b
1531 """return True if revision a is an ancestor of revision b
1509
1532
1510 A revision is considered an ancestor of itself.
1533 A revision is considered an ancestor of itself.
1511
1534
1512 The implementation of this is trivial but the use of
1535 The implementation of this is trivial but the use of
1513 reachableroots is not."""
1536 reachableroots is not."""
1514 if a == nullrev:
1537 if a == nullrev:
1515 return True
1538 return True
1516 elif a == b:
1539 elif a == b:
1517 return True
1540 return True
1518 elif a > b:
1541 elif a > b:
1519 return False
1542 return False
1520 return bool(self.reachableroots(a, [b], [a], includepath=False))
1543 return bool(self.reachableroots(a, [b], [a], includepath=False))
1521
1544
1522 def reachableroots(self, minroot, heads, roots, includepath=False):
1545 def reachableroots(self, minroot, heads, roots, includepath=False):
1523 """return (heads(::(<roots> and <roots>::<heads>)))
1546 """return (heads(::(<roots> and <roots>::<heads>)))
1524
1547
1525 If includepath is True, return (<roots>::<heads>)."""
1548 If includepath is True, return (<roots>::<heads>)."""
1526 try:
1549 try:
1527 return self.index.reachableroots2(
1550 return self.index.reachableroots2(
1528 minroot, heads, roots, includepath
1551 minroot, heads, roots, includepath
1529 )
1552 )
1530 except AttributeError:
1553 except AttributeError:
1531 return dagop._reachablerootspure(
1554 return dagop._reachablerootspure(
1532 self.parentrevs, minroot, roots, heads, includepath
1555 self.parentrevs, minroot, roots, heads, includepath
1533 )
1556 )
1534
1557
1535 def ancestor(self, a, b):
1558 def ancestor(self, a, b):
1536 """calculate the "best" common ancestor of nodes a and b"""
1559 """calculate the "best" common ancestor of nodes a and b"""
1537
1560
1538 a, b = self.rev(a), self.rev(b)
1561 a, b = self.rev(a), self.rev(b)
1539 try:
1562 try:
1540 ancs = self.index.ancestors(a, b)
1563 ancs = self.index.ancestors(a, b)
1541 except (AttributeError, OverflowError):
1564 except (AttributeError, OverflowError):
1542 ancs = ancestor.ancestors(self.parentrevs, a, b)
1565 ancs = ancestor.ancestors(self.parentrevs, a, b)
1543 if ancs:
1566 if ancs:
1544 # choose a consistent winner when there's a tie
1567 # choose a consistent winner when there's a tie
1545 return min(map(self.node, ancs))
1568 return min(map(self.node, ancs))
1546 return self.nullid
1569 return self.nullid
1547
1570
1548 def _match(self, id):
1571 def _match(self, id):
1549 if isinstance(id, int):
1572 if isinstance(id, int):
1550 # rev
1573 # rev
1551 return self.node(id)
1574 return self.node(id)
1552 if len(id) == self.nodeconstants.nodelen:
1575 if len(id) == self.nodeconstants.nodelen:
1553 # possibly a binary node
1576 # possibly a binary node
1554 # odds of a binary node being all hex in ASCII are 1 in 10**25
1577 # odds of a binary node being all hex in ASCII are 1 in 10**25
1555 try:
1578 try:
1556 node = id
1579 node = id
1557 self.rev(node) # quick search the index
1580 self.rev(node) # quick search the index
1558 return node
1581 return node
1559 except error.LookupError:
1582 except error.LookupError:
1560 pass # may be partial hex id
1583 pass # may be partial hex id
1561 try:
1584 try:
1562 # str(rev)
1585 # str(rev)
1563 rev = int(id)
1586 rev = int(id)
1564 if b"%d" % rev != id:
1587 if b"%d" % rev != id:
1565 raise ValueError
1588 raise ValueError
1566 if rev < 0:
1589 if rev < 0:
1567 rev = len(self) + rev
1590 rev = len(self) + rev
1568 if rev < 0 or rev >= len(self):
1591 if rev < 0 or rev >= len(self):
1569 raise ValueError
1592 raise ValueError
1570 return self.node(rev)
1593 return self.node(rev)
1571 except (ValueError, OverflowError):
1594 except (ValueError, OverflowError):
1572 pass
1595 pass
1573 if len(id) == 2 * self.nodeconstants.nodelen:
1596 if len(id) == 2 * self.nodeconstants.nodelen:
1574 try:
1597 try:
1575 # a full hex nodeid?
1598 # a full hex nodeid?
1576 node = bin(id)
1599 node = bin(id)
1577 self.rev(node)
1600 self.rev(node)
1578 return node
1601 return node
1579 except (binascii.Error, error.LookupError):
1602 except (binascii.Error, error.LookupError):
1580 pass
1603 pass
1581
1604
1582 def _partialmatch(self, id):
1605 def _partialmatch(self, id):
1583 # we don't care wdirfilenodeids as they should be always full hash
1606 # we don't care wdirfilenodeids as they should be always full hash
1584 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1607 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1585 ambiguous = False
1608 ambiguous = False
1586 try:
1609 try:
1587 partial = self.index.partialmatch(id)
1610 partial = self.index.partialmatch(id)
1588 if partial and self.hasnode(partial):
1611 if partial and self.hasnode(partial):
1589 if maybewdir:
1612 if maybewdir:
1590 # single 'ff...' match in radix tree, ambiguous with wdir
1613 # single 'ff...' match in radix tree, ambiguous with wdir
1591 ambiguous = True
1614 ambiguous = True
1592 else:
1615 else:
1593 return partial
1616 return partial
1594 elif maybewdir:
1617 elif maybewdir:
1595 # no 'ff...' match in radix tree, wdir identified
1618 # no 'ff...' match in radix tree, wdir identified
1596 raise error.WdirUnsupported
1619 raise error.WdirUnsupported
1597 else:
1620 else:
1598 return None
1621 return None
1599 except error.RevlogError:
1622 except error.RevlogError:
1600 # parsers.c radix tree lookup gave multiple matches
1623 # parsers.c radix tree lookup gave multiple matches
1601 # fast path: for unfiltered changelog, radix tree is accurate
1624 # fast path: for unfiltered changelog, radix tree is accurate
1602 if not getattr(self, 'filteredrevs', None):
1625 if not getattr(self, 'filteredrevs', None):
1603 ambiguous = True
1626 ambiguous = True
1604 # fall through to slow path that filters hidden revisions
1627 # fall through to slow path that filters hidden revisions
1605 except (AttributeError, ValueError):
1628 except (AttributeError, ValueError):
1606 # we are pure python, or key is not hex
1629 # we are pure python, or key is not hex
1607 pass
1630 pass
1608 if ambiguous:
1631 if ambiguous:
1609 raise error.AmbiguousPrefixLookupError(
1632 raise error.AmbiguousPrefixLookupError(
1610 id, self.display_id, _(b'ambiguous identifier')
1633 id, self.display_id, _(b'ambiguous identifier')
1611 )
1634 )
1612
1635
1613 if id in self._pcache:
1636 if id in self._pcache:
1614 return self._pcache[id]
1637 return self._pcache[id]
1615
1638
1616 if len(id) <= 40:
1639 if len(id) <= 40:
1617 # hex(node)[:...]
1640 # hex(node)[:...]
1618 l = len(id) // 2 * 2 # grab an even number of digits
1641 l = len(id) // 2 * 2 # grab an even number of digits
1619 try:
1642 try:
1620 # we're dropping the last digit, so let's check that it's hex,
1643 # we're dropping the last digit, so let's check that it's hex,
1621 # to avoid the expensive computation below if it's not
1644 # to avoid the expensive computation below if it's not
1622 if len(id) % 2 > 0:
1645 if len(id) % 2 > 0:
1623 if not (id[-1] in hexdigits):
1646 if not (id[-1] in hexdigits):
1624 return None
1647 return None
1625 prefix = bin(id[:l])
1648 prefix = bin(id[:l])
1626 except binascii.Error:
1649 except binascii.Error:
1627 pass
1650 pass
1628 else:
1651 else:
1629 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1652 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1630 nl = [
1653 nl = [
1631 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1654 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1632 ]
1655 ]
1633 if self.nodeconstants.nullhex.startswith(id):
1656 if self.nodeconstants.nullhex.startswith(id):
1634 nl.append(self.nullid)
1657 nl.append(self.nullid)
1635 if len(nl) > 0:
1658 if len(nl) > 0:
1636 if len(nl) == 1 and not maybewdir:
1659 if len(nl) == 1 and not maybewdir:
1637 self._pcache[id] = nl[0]
1660 self._pcache[id] = nl[0]
1638 return nl[0]
1661 return nl[0]
1639 raise error.AmbiguousPrefixLookupError(
1662 raise error.AmbiguousPrefixLookupError(
1640 id, self.display_id, _(b'ambiguous identifier')
1663 id, self.display_id, _(b'ambiguous identifier')
1641 )
1664 )
1642 if maybewdir:
1665 if maybewdir:
1643 raise error.WdirUnsupported
1666 raise error.WdirUnsupported
1644 return None
1667 return None
1645
1668
1646 def lookup(self, id):
1669 def lookup(self, id):
1647 """locate a node based on:
1670 """locate a node based on:
1648 - revision number or str(revision number)
1671 - revision number or str(revision number)
1649 - nodeid or subset of hex nodeid
1672 - nodeid or subset of hex nodeid
1650 """
1673 """
1651 n = self._match(id)
1674 n = self._match(id)
1652 if n is not None:
1675 if n is not None:
1653 return n
1676 return n
1654 n = self._partialmatch(id)
1677 n = self._partialmatch(id)
1655 if n:
1678 if n:
1656 return n
1679 return n
1657
1680
1658 raise error.LookupError(id, self.display_id, _(b'no match found'))
1681 raise error.LookupError(id, self.display_id, _(b'no match found'))
1659
1682
1660 def shortest(self, node, minlength=1):
1683 def shortest(self, node, minlength=1):
1661 """Find the shortest unambiguous prefix that matches node."""
1684 """Find the shortest unambiguous prefix that matches node."""
1662
1685
1663 def isvalid(prefix):
1686 def isvalid(prefix):
1664 try:
1687 try:
1665 matchednode = self._partialmatch(prefix)
1688 matchednode = self._partialmatch(prefix)
1666 except error.AmbiguousPrefixLookupError:
1689 except error.AmbiguousPrefixLookupError:
1667 return False
1690 return False
1668 except error.WdirUnsupported:
1691 except error.WdirUnsupported:
1669 # single 'ff...' match
1692 # single 'ff...' match
1670 return True
1693 return True
1671 if matchednode is None:
1694 if matchednode is None:
1672 raise error.LookupError(node, self.display_id, _(b'no node'))
1695 raise error.LookupError(node, self.display_id, _(b'no node'))
1673 return True
1696 return True
1674
1697
1675 def maybewdir(prefix):
1698 def maybewdir(prefix):
1676 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1699 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1677
1700
1678 hexnode = hex(node)
1701 hexnode = hex(node)
1679
1702
1680 def disambiguate(hexnode, minlength):
1703 def disambiguate(hexnode, minlength):
1681 """Disambiguate against wdirid."""
1704 """Disambiguate against wdirid."""
1682 for length in range(minlength, len(hexnode) + 1):
1705 for length in range(minlength, len(hexnode) + 1):
1683 prefix = hexnode[:length]
1706 prefix = hexnode[:length]
1684 if not maybewdir(prefix):
1707 if not maybewdir(prefix):
1685 return prefix
1708 return prefix
1686
1709
1687 if not getattr(self, 'filteredrevs', None):
1710 if not getattr(self, 'filteredrevs', None):
1688 try:
1711 try:
1689 length = max(self.index.shortest(node), minlength)
1712 length = max(self.index.shortest(node), minlength)
1690 return disambiguate(hexnode, length)
1713 return disambiguate(hexnode, length)
1691 except error.RevlogError:
1714 except error.RevlogError:
1692 if node != self.nodeconstants.wdirid:
1715 if node != self.nodeconstants.wdirid:
1693 raise error.LookupError(
1716 raise error.LookupError(
1694 node, self.display_id, _(b'no node')
1717 node, self.display_id, _(b'no node')
1695 )
1718 )
1696 except AttributeError:
1719 except AttributeError:
1697 # Fall through to pure code
1720 # Fall through to pure code
1698 pass
1721 pass
1699
1722
1700 if node == self.nodeconstants.wdirid:
1723 if node == self.nodeconstants.wdirid:
1701 for length in range(minlength, len(hexnode) + 1):
1724 for length in range(minlength, len(hexnode) + 1):
1702 prefix = hexnode[:length]
1725 prefix = hexnode[:length]
1703 if isvalid(prefix):
1726 if isvalid(prefix):
1704 return prefix
1727 return prefix
1705
1728
1706 for length in range(minlength, len(hexnode) + 1):
1729 for length in range(minlength, len(hexnode) + 1):
1707 prefix = hexnode[:length]
1730 prefix = hexnode[:length]
1708 if isvalid(prefix):
1731 if isvalid(prefix):
1709 return disambiguate(hexnode, length)
1732 return disambiguate(hexnode, length)
1710
1733
1711 def cmp(self, node, text):
1734 def cmp(self, node, text):
1712 """compare text with a given file revision
1735 """compare text with a given file revision
1713
1736
1714 returns True if text is different than what is stored.
1737 returns True if text is different than what is stored.
1715 """
1738 """
1716 p1, p2 = self.parents(node)
1739 p1, p2 = self.parents(node)
1717 return storageutil.hashrevisionsha1(text, p1, p2) != node
1740 return storageutil.hashrevisionsha1(text, p1, p2) != node
1718
1741
1719 def _getsegmentforrevs(self, startrev, endrev, df=None):
1742 def _getsegmentforrevs(self, startrev, endrev, df=None):
1720 """Obtain a segment of raw data corresponding to a range of revisions.
1743 """Obtain a segment of raw data corresponding to a range of revisions.
1721
1744
1722 Accepts the start and end revisions and an optional already-open
1745 Accepts the start and end revisions and an optional already-open
1723 file handle to be used for reading. If the file handle is read, its
1746 file handle to be used for reading. If the file handle is read, its
1724 seek position will not be preserved.
1747 seek position will not be preserved.
1725
1748
1726 Requests for data may be satisfied by a cache.
1749 Requests for data may be satisfied by a cache.
1727
1750
1728 Returns a 2-tuple of (offset, data) for the requested range of
1751 Returns a 2-tuple of (offset, data) for the requested range of
1729 revisions. Offset is the integer offset from the beginning of the
1752 revisions. Offset is the integer offset from the beginning of the
1730 revlog and data is a str or buffer of the raw byte data.
1753 revlog and data is a str or buffer of the raw byte data.
1731
1754
1732 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1755 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1733 to determine where each revision's data begins and ends.
1756 to determine where each revision's data begins and ends.
1734 """
1757 """
1735 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1758 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1736 # (functions are expensive).
1759 # (functions are expensive).
1737 index = self.index
1760 index = self.index
1738 istart = index[startrev]
1761 istart = index[startrev]
1739 start = int(istart[0] >> 16)
1762 start = int(istart[0] >> 16)
1740 if startrev == endrev:
1763 if startrev == endrev:
1741 end = start + istart[1]
1764 end = start + istart[1]
1742 else:
1765 else:
1743 iend = index[endrev]
1766 iend = index[endrev]
1744 end = int(iend[0] >> 16) + iend[1]
1767 end = int(iend[0] >> 16) + iend[1]
1745
1768
1746 if self._inline:
1769 if self._inline:
1747 start += (startrev + 1) * self.index.entry_size
1770 start += (startrev + 1) * self.index.entry_size
1748 end += (endrev + 1) * self.index.entry_size
1771 end += (endrev + 1) * self.index.entry_size
1749 length = end - start
1772 length = end - start
1750
1773
1751 return start, self._segmentfile.read_chunk(start, length, df)
1774 return start, self._segmentfile.read_chunk(start, length, df)
1752
1775
1753 def _chunk(self, rev, df=None):
1776 def _chunk(self, rev, df=None):
1754 """Obtain a single decompressed chunk for a revision.
1777 """Obtain a single decompressed chunk for a revision.
1755
1778
1756 Accepts an integer revision and an optional already-open file handle
1779 Accepts an integer revision and an optional already-open file handle
1757 to be used for reading. If used, the seek position of the file will not
1780 to be used for reading. If used, the seek position of the file will not
1758 be preserved.
1781 be preserved.
1759
1782
1760 Returns a str holding uncompressed data for the requested revision.
1783 Returns a str holding uncompressed data for the requested revision.
1761 """
1784 """
1762 compression_mode = self.index[rev][10]
1785 compression_mode = self.index[rev][10]
1763 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1786 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1764 if compression_mode == COMP_MODE_PLAIN:
1787 if compression_mode == COMP_MODE_PLAIN:
1765 return data
1788 return data
1766 elif compression_mode == COMP_MODE_DEFAULT:
1789 elif compression_mode == COMP_MODE_DEFAULT:
1767 return self._decompressor(data)
1790 return self._decompressor(data)
1768 elif compression_mode == COMP_MODE_INLINE:
1791 elif compression_mode == COMP_MODE_INLINE:
1769 return self.decompress(data)
1792 return self.decompress(data)
1770 else:
1793 else:
1771 msg = b'unknown compression mode %d'
1794 msg = b'unknown compression mode %d'
1772 msg %= compression_mode
1795 msg %= compression_mode
1773 raise error.RevlogError(msg)
1796 raise error.RevlogError(msg)
1774
1797
1775 def _chunks(self, revs, df=None, targetsize=None):
1798 def _chunks(self, revs, df=None, targetsize=None):
1776 """Obtain decompressed chunks for the specified revisions.
1799 """Obtain decompressed chunks for the specified revisions.
1777
1800
1778 Accepts an iterable of numeric revisions that are assumed to be in
1801 Accepts an iterable of numeric revisions that are assumed to be in
1779 ascending order. Also accepts an optional already-open file handle
1802 ascending order. Also accepts an optional already-open file handle
1780 to be used for reading. If used, the seek position of the file will
1803 to be used for reading. If used, the seek position of the file will
1781 not be preserved.
1804 not be preserved.
1782
1805
1783 This function is similar to calling ``self._chunk()`` multiple times,
1806 This function is similar to calling ``self._chunk()`` multiple times,
1784 but is faster.
1807 but is faster.
1785
1808
1786 Returns a list with decompressed data for each requested revision.
1809 Returns a list with decompressed data for each requested revision.
1787 """
1810 """
1788 if not revs:
1811 if not revs:
1789 return []
1812 return []
1790 start = self.start
1813 start = self.start
1791 length = self.length
1814 length = self.length
1792 inline = self._inline
1815 inline = self._inline
1793 iosize = self.index.entry_size
1816 iosize = self.index.entry_size
1794 buffer = util.buffer
1817 buffer = util.buffer
1795
1818
1796 l = []
1819 l = []
1797 ladd = l.append
1820 ladd = l.append
1798
1821
1799 if not self._withsparseread:
1822 if not self._withsparseread:
1800 slicedchunks = (revs,)
1823 slicedchunks = (revs,)
1801 else:
1824 else:
1802 slicedchunks = deltautil.slicechunk(
1825 slicedchunks = deltautil.slicechunk(
1803 self, revs, targetsize=targetsize
1826 self, revs, targetsize=targetsize
1804 )
1827 )
1805
1828
1806 for revschunk in slicedchunks:
1829 for revschunk in slicedchunks:
1807 firstrev = revschunk[0]
1830 firstrev = revschunk[0]
1808 # Skip trailing revisions with empty diff
1831 # Skip trailing revisions with empty diff
1809 for lastrev in revschunk[::-1]:
1832 for lastrev in revschunk[::-1]:
1810 if length(lastrev) != 0:
1833 if length(lastrev) != 0:
1811 break
1834 break
1812
1835
1813 try:
1836 try:
1814 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1837 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1815 except OverflowError:
1838 except OverflowError:
1816 # issue4215 - we can't cache a run of chunks greater than
1839 # issue4215 - we can't cache a run of chunks greater than
1817 # 2G on Windows
1840 # 2G on Windows
1818 return [self._chunk(rev, df=df) for rev in revschunk]
1841 return [self._chunk(rev, df=df) for rev in revschunk]
1819
1842
1820 decomp = self.decompress
1843 decomp = self.decompress
1821 # self._decompressor might be None, but will not be used in that case
1844 # self._decompressor might be None, but will not be used in that case
1822 def_decomp = self._decompressor
1845 def_decomp = self._decompressor
1823 for rev in revschunk:
1846 for rev in revschunk:
1824 chunkstart = start(rev)
1847 chunkstart = start(rev)
1825 if inline:
1848 if inline:
1826 chunkstart += (rev + 1) * iosize
1849 chunkstart += (rev + 1) * iosize
1827 chunklength = length(rev)
1850 chunklength = length(rev)
1828 comp_mode = self.index[rev][10]
1851 comp_mode = self.index[rev][10]
1829 c = buffer(data, chunkstart - offset, chunklength)
1852 c = buffer(data, chunkstart - offset, chunklength)
1830 if comp_mode == COMP_MODE_PLAIN:
1853 if comp_mode == COMP_MODE_PLAIN:
1831 ladd(c)
1854 ladd(c)
1832 elif comp_mode == COMP_MODE_INLINE:
1855 elif comp_mode == COMP_MODE_INLINE:
1833 ladd(decomp(c))
1856 ladd(decomp(c))
1834 elif comp_mode == COMP_MODE_DEFAULT:
1857 elif comp_mode == COMP_MODE_DEFAULT:
1835 ladd(def_decomp(c))
1858 ladd(def_decomp(c))
1836 else:
1859 else:
1837 msg = b'unknown compression mode %d'
1860 msg = b'unknown compression mode %d'
1838 msg %= comp_mode
1861 msg %= comp_mode
1839 raise error.RevlogError(msg)
1862 raise error.RevlogError(msg)
1840
1863
1841 return l
1864 return l
1842
1865
1843 def deltaparent(self, rev):
1866 def deltaparent(self, rev):
1844 """return deltaparent of the given revision"""
1867 """return deltaparent of the given revision"""
1845 base = self.index[rev][3]
1868 base = self.index[rev][3]
1846 if base == rev:
1869 if base == rev:
1847 return nullrev
1870 return nullrev
1848 elif self._generaldelta:
1871 elif self._generaldelta:
1849 return base
1872 return base
1850 else:
1873 else:
1851 return rev - 1
1874 return rev - 1
1852
1875
1853 def issnapshot(self, rev):
1876 def issnapshot(self, rev):
1854 """tells whether rev is a snapshot"""
1877 """tells whether rev is a snapshot"""
1855 if not self._sparserevlog:
1878 if not self._sparserevlog:
1856 return self.deltaparent(rev) == nullrev
1879 return self.deltaparent(rev) == nullrev
1857 elif util.safehasattr(self.index, 'issnapshot'):
1880 elif util.safehasattr(self.index, 'issnapshot'):
1858 # directly assign the method to cache the testing and access
1881 # directly assign the method to cache the testing and access
1859 self.issnapshot = self.index.issnapshot
1882 self.issnapshot = self.index.issnapshot
1860 return self.issnapshot(rev)
1883 return self.issnapshot(rev)
1861 if rev == nullrev:
1884 if rev == nullrev:
1862 return True
1885 return True
1863 entry = self.index[rev]
1886 entry = self.index[rev]
1864 base = entry[3]
1887 base = entry[3]
1865 if base == rev:
1888 if base == rev:
1866 return True
1889 return True
1867 if base == nullrev:
1890 if base == nullrev:
1868 return True
1891 return True
1869 p1 = entry[5]
1892 p1 = entry[5]
1870 while self.length(p1) == 0:
1893 while self.length(p1) == 0:
1871 b = self.deltaparent(p1)
1894 b = self.deltaparent(p1)
1872 if b == p1:
1895 if b == p1:
1873 break
1896 break
1874 p1 = b
1897 p1 = b
1875 p2 = entry[6]
1898 p2 = entry[6]
1876 while self.length(p2) == 0:
1899 while self.length(p2) == 0:
1877 b = self.deltaparent(p2)
1900 b = self.deltaparent(p2)
1878 if b == p2:
1901 if b == p2:
1879 break
1902 break
1880 p2 = b
1903 p2 = b
1881 if base == p1 or base == p2:
1904 if base == p1 or base == p2:
1882 return False
1905 return False
1883 return self.issnapshot(base)
1906 return self.issnapshot(base)
1884
1907
1885 def snapshotdepth(self, rev):
1908 def snapshotdepth(self, rev):
1886 """number of snapshot in the chain before this one"""
1909 """number of snapshot in the chain before this one"""
1887 if not self.issnapshot(rev):
1910 if not self.issnapshot(rev):
1888 raise error.ProgrammingError(b'revision %d not a snapshot')
1911 raise error.ProgrammingError(b'revision %d not a snapshot')
1889 return len(self._deltachain(rev)[0]) - 1
1912 return len(self._deltachain(rev)[0]) - 1
1890
1913
1891 def revdiff(self, rev1, rev2):
1914 def revdiff(self, rev1, rev2):
1892 """return or calculate a delta between two revisions
1915 """return or calculate a delta between two revisions
1893
1916
1894 The delta calculated is in binary form and is intended to be written to
1917 The delta calculated is in binary form and is intended to be written to
1895 revlog data directly. So this function needs raw revision data.
1918 revlog data directly. So this function needs raw revision data.
1896 """
1919 """
1897 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1920 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1898 return bytes(self._chunk(rev2))
1921 return bytes(self._chunk(rev2))
1899
1922
1900 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1923 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1901
1924
1902 def revision(self, nodeorrev, _df=None):
1925 def revision(self, nodeorrev, _df=None):
1903 """return an uncompressed revision of a given node or revision
1926 """return an uncompressed revision of a given node or revision
1904 number.
1927 number.
1905
1928
1906 _df - an existing file handle to read from. (internal-only)
1929 _df - an existing file handle to read from. (internal-only)
1907 """
1930 """
1908 return self._revisiondata(nodeorrev, _df)
1931 return self._revisiondata(nodeorrev, _df)
1909
1932
1910 def sidedata(self, nodeorrev, _df=None):
1933 def sidedata(self, nodeorrev, _df=None):
1911 """a map of extra data related to the changeset but not part of the hash
1934 """a map of extra data related to the changeset but not part of the hash
1912
1935
1913 This function currently return a dictionary. However, more advanced
1936 This function currently return a dictionary. However, more advanced
1914 mapping object will likely be used in the future for a more
1937 mapping object will likely be used in the future for a more
1915 efficient/lazy code.
1938 efficient/lazy code.
1916 """
1939 """
1917 # deal with <nodeorrev> argument type
1940 # deal with <nodeorrev> argument type
1918 if isinstance(nodeorrev, int):
1941 if isinstance(nodeorrev, int):
1919 rev = nodeorrev
1942 rev = nodeorrev
1920 else:
1943 else:
1921 rev = self.rev(nodeorrev)
1944 rev = self.rev(nodeorrev)
1922 return self._sidedata(rev)
1945 return self._sidedata(rev)
1923
1946
1924 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1947 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1925 # deal with <nodeorrev> argument type
1948 # deal with <nodeorrev> argument type
1926 if isinstance(nodeorrev, int):
1949 if isinstance(nodeorrev, int):
1927 rev = nodeorrev
1950 rev = nodeorrev
1928 node = self.node(rev)
1951 node = self.node(rev)
1929 else:
1952 else:
1930 node = nodeorrev
1953 node = nodeorrev
1931 rev = None
1954 rev = None
1932
1955
1933 # fast path the special `nullid` rev
1956 # fast path the special `nullid` rev
1934 if node == self.nullid:
1957 if node == self.nullid:
1935 return b""
1958 return b""
1936
1959
1937 # ``rawtext`` is the text as stored inside the revlog. Might be the
1960 # ``rawtext`` is the text as stored inside the revlog. Might be the
1938 # revision or might need to be processed to retrieve the revision.
1961 # revision or might need to be processed to retrieve the revision.
1939 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1962 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1940
1963
1941 if raw and validated:
1964 if raw and validated:
1942 # if we don't want to process the raw text and that raw
1965 # if we don't want to process the raw text and that raw
1943 # text is cached, we can exit early.
1966 # text is cached, we can exit early.
1944 return rawtext
1967 return rawtext
1945 if rev is None:
1968 if rev is None:
1946 rev = self.rev(node)
1969 rev = self.rev(node)
1947 # the revlog's flag for this revision
1970 # the revlog's flag for this revision
1948 # (usually alter its state or content)
1971 # (usually alter its state or content)
1949 flags = self.flags(rev)
1972 flags = self.flags(rev)
1950
1973
1951 if validated and flags == REVIDX_DEFAULT_FLAGS:
1974 if validated and flags == REVIDX_DEFAULT_FLAGS:
1952 # no extra flags set, no flag processor runs, text = rawtext
1975 # no extra flags set, no flag processor runs, text = rawtext
1953 return rawtext
1976 return rawtext
1954
1977
1955 if raw:
1978 if raw:
1956 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1979 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1957 text = rawtext
1980 text = rawtext
1958 else:
1981 else:
1959 r = flagutil.processflagsread(self, rawtext, flags)
1982 r = flagutil.processflagsread(self, rawtext, flags)
1960 text, validatehash = r
1983 text, validatehash = r
1961 if validatehash:
1984 if validatehash:
1962 self.checkhash(text, node, rev=rev)
1985 self.checkhash(text, node, rev=rev)
1963 if not validated:
1986 if not validated:
1964 self._revisioncache = (node, rev, rawtext)
1987 self._revisioncache = (node, rev, rawtext)
1965
1988
1966 return text
1989 return text
1967
1990
1968 def _rawtext(self, node, rev, _df=None):
1991 def _rawtext(self, node, rev, _df=None):
1969 """return the possibly unvalidated rawtext for a revision
1992 """return the possibly unvalidated rawtext for a revision
1970
1993
1971 returns (rev, rawtext, validated)
1994 returns (rev, rawtext, validated)
1972 """
1995 """
1973
1996
1974 # revision in the cache (could be useful to apply delta)
1997 # revision in the cache (could be useful to apply delta)
1975 cachedrev = None
1998 cachedrev = None
1976 # An intermediate text to apply deltas to
1999 # An intermediate text to apply deltas to
1977 basetext = None
2000 basetext = None
1978
2001
1979 # Check if we have the entry in cache
2002 # Check if we have the entry in cache
1980 # The cache entry looks like (node, rev, rawtext)
2003 # The cache entry looks like (node, rev, rawtext)
1981 if self._revisioncache:
2004 if self._revisioncache:
1982 if self._revisioncache[0] == node:
2005 if self._revisioncache[0] == node:
1983 return (rev, self._revisioncache[2], True)
2006 return (rev, self._revisioncache[2], True)
1984 cachedrev = self._revisioncache[1]
2007 cachedrev = self._revisioncache[1]
1985
2008
1986 if rev is None:
2009 if rev is None:
1987 rev = self.rev(node)
2010 rev = self.rev(node)
1988
2011
1989 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2012 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1990 if stopped:
2013 if stopped:
1991 basetext = self._revisioncache[2]
2014 basetext = self._revisioncache[2]
1992
2015
1993 # drop cache to save memory, the caller is expected to
2016 # drop cache to save memory, the caller is expected to
1994 # update self._revisioncache after validating the text
2017 # update self._revisioncache after validating the text
1995 self._revisioncache = None
2018 self._revisioncache = None
1996
2019
1997 targetsize = None
2020 targetsize = None
1998 rawsize = self.index[rev][2]
2021 rawsize = self.index[rev][2]
1999 if 0 <= rawsize:
2022 if 0 <= rawsize:
2000 targetsize = 4 * rawsize
2023 targetsize = 4 * rawsize
2001
2024
2002 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2025 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2003 if basetext is None:
2026 if basetext is None:
2004 basetext = bytes(bins[0])
2027 basetext = bytes(bins[0])
2005 bins = bins[1:]
2028 bins = bins[1:]
2006
2029
2007 rawtext = mdiff.patches(basetext, bins)
2030 rawtext = mdiff.patches(basetext, bins)
2008 del basetext # let us have a chance to free memory early
2031 del basetext # let us have a chance to free memory early
2009 return (rev, rawtext, False)
2032 return (rev, rawtext, False)
2010
2033
2011 def _sidedata(self, rev):
2034 def _sidedata(self, rev):
2012 """Return the sidedata for a given revision number."""
2035 """Return the sidedata for a given revision number."""
2013 index_entry = self.index[rev]
2036 index_entry = self.index[rev]
2014 sidedata_offset = index_entry[8]
2037 sidedata_offset = index_entry[8]
2015 sidedata_size = index_entry[9]
2038 sidedata_size = index_entry[9]
2016
2039
2017 if self._inline:
2040 if self._inline:
2018 sidedata_offset += self.index.entry_size * (1 + rev)
2041 sidedata_offset += self.index.entry_size * (1 + rev)
2019 if sidedata_size == 0:
2042 if sidedata_size == 0:
2020 return {}
2043 return {}
2021
2044
2022 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2045 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2023 filename = self._sidedatafile
2046 filename = self._sidedatafile
2024 end = self._docket.sidedata_end
2047 end = self._docket.sidedata_end
2025 offset = sidedata_offset
2048 offset = sidedata_offset
2026 length = sidedata_size
2049 length = sidedata_size
2027 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2050 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2028 raise error.RevlogError(m)
2051 raise error.RevlogError(m)
2029
2052
2030 comp_segment = self._segmentfile_sidedata.read_chunk(
2053 comp_segment = self._segmentfile_sidedata.read_chunk(
2031 sidedata_offset, sidedata_size
2054 sidedata_offset, sidedata_size
2032 )
2055 )
2033
2056
2034 comp = self.index[rev][11]
2057 comp = self.index[rev][11]
2035 if comp == COMP_MODE_PLAIN:
2058 if comp == COMP_MODE_PLAIN:
2036 segment = comp_segment
2059 segment = comp_segment
2037 elif comp == COMP_MODE_DEFAULT:
2060 elif comp == COMP_MODE_DEFAULT:
2038 segment = self._decompressor(comp_segment)
2061 segment = self._decompressor(comp_segment)
2039 elif comp == COMP_MODE_INLINE:
2062 elif comp == COMP_MODE_INLINE:
2040 segment = self.decompress(comp_segment)
2063 segment = self.decompress(comp_segment)
2041 else:
2064 else:
2042 msg = b'unknown compression mode %d'
2065 msg = b'unknown compression mode %d'
2043 msg %= comp
2066 msg %= comp
2044 raise error.RevlogError(msg)
2067 raise error.RevlogError(msg)
2045
2068
2046 sidedata = sidedatautil.deserialize_sidedata(segment)
2069 sidedata = sidedatautil.deserialize_sidedata(segment)
2047 return sidedata
2070 return sidedata
2048
2071
2049 def rawdata(self, nodeorrev, _df=None):
2072 def rawdata(self, nodeorrev, _df=None):
2050 """return an uncompressed raw data of a given node or revision number.
2073 """return an uncompressed raw data of a given node or revision number.
2051
2074
2052 _df - an existing file handle to read from. (internal-only)
2075 _df - an existing file handle to read from. (internal-only)
2053 """
2076 """
2054 return self._revisiondata(nodeorrev, _df, raw=True)
2077 return self._revisiondata(nodeorrev, _df, raw=True)
2055
2078
2056 def hash(self, text, p1, p2):
2079 def hash(self, text, p1, p2):
2057 """Compute a node hash.
2080 """Compute a node hash.
2058
2081
2059 Available as a function so that subclasses can replace the hash
2082 Available as a function so that subclasses can replace the hash
2060 as needed.
2083 as needed.
2061 """
2084 """
2062 return storageutil.hashrevisionsha1(text, p1, p2)
2085 return storageutil.hashrevisionsha1(text, p1, p2)
2063
2086
2064 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2087 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2065 """Check node hash integrity.
2088 """Check node hash integrity.
2066
2089
2067 Available as a function so that subclasses can extend hash mismatch
2090 Available as a function so that subclasses can extend hash mismatch
2068 behaviors as needed.
2091 behaviors as needed.
2069 """
2092 """
2070 try:
2093 try:
2071 if p1 is None and p2 is None:
2094 if p1 is None and p2 is None:
2072 p1, p2 = self.parents(node)
2095 p1, p2 = self.parents(node)
2073 if node != self.hash(text, p1, p2):
2096 if node != self.hash(text, p1, p2):
2074 # Clear the revision cache on hash failure. The revision cache
2097 # Clear the revision cache on hash failure. The revision cache
2075 # only stores the raw revision and clearing the cache does have
2098 # only stores the raw revision and clearing the cache does have
2076 # the side-effect that we won't have a cache hit when the raw
2099 # the side-effect that we won't have a cache hit when the raw
2077 # revision data is accessed. But this case should be rare and
2100 # revision data is accessed. But this case should be rare and
2078 # it is extra work to teach the cache about the hash
2101 # it is extra work to teach the cache about the hash
2079 # verification state.
2102 # verification state.
2080 if self._revisioncache and self._revisioncache[0] == node:
2103 if self._revisioncache and self._revisioncache[0] == node:
2081 self._revisioncache = None
2104 self._revisioncache = None
2082
2105
2083 revornode = rev
2106 revornode = rev
2084 if revornode is None:
2107 if revornode is None:
2085 revornode = templatefilters.short(hex(node))
2108 revornode = templatefilters.short(hex(node))
2086 raise error.RevlogError(
2109 raise error.RevlogError(
2087 _(b"integrity check failed on %s:%s")
2110 _(b"integrity check failed on %s:%s")
2088 % (self.display_id, pycompat.bytestr(revornode))
2111 % (self.display_id, pycompat.bytestr(revornode))
2089 )
2112 )
2090 except error.RevlogError:
2113 except error.RevlogError:
2091 if self._censorable and storageutil.iscensoredtext(text):
2114 if self._censorable and storageutil.iscensoredtext(text):
2092 raise error.CensoredNodeError(self.display_id, node, text)
2115 raise error.CensoredNodeError(self.display_id, node, text)
2093 raise
2116 raise
2094
2117
2095 def _enforceinlinesize(self, tr, side_write=True):
2118 def _enforceinlinesize(self, tr, side_write=True):
2096 """Check if the revlog is too big for inline and convert if so.
2119 """Check if the revlog is too big for inline and convert if so.
2097
2120
2098 This should be called after revisions are added to the revlog. If the
2121 This should be called after revisions are added to the revlog. If the
2099 revlog has grown too large to be an inline revlog, it will convert it
2122 revlog has grown too large to be an inline revlog, it will convert it
2100 to use multiple index and data files.
2123 to use multiple index and data files.
2101 """
2124 """
2102 tiprev = len(self) - 1
2125 tiprev = len(self) - 1
2103 total_size = self.start(tiprev) + self.length(tiprev)
2126 total_size = self.start(tiprev) + self.length(tiprev)
2104 if not self._inline or total_size < _maxinline:
2127 if not self._inline or total_size < _maxinline:
2105 return
2128 return
2106
2129
2107 troffset = tr.findoffset(self._indexfile)
2130 troffset = tr.findoffset(self._indexfile)
2108 if troffset is None:
2131 if troffset is None:
2109 raise error.RevlogError(
2132 raise error.RevlogError(
2110 _(b"%s not found in the transaction") % self._indexfile
2133 _(b"%s not found in the transaction") % self._indexfile
2111 )
2134 )
2112 if troffset:
2135 if troffset:
2113 tr.addbackup(self._indexfile, for_offset=True)
2136 tr.addbackup(self._indexfile, for_offset=True)
2114 tr.add(self._datafile, 0)
2137 tr.add(self._datafile, 0)
2115
2138
2116 existing_handles = False
2139 existing_handles = False
2117 if self._writinghandles is not None:
2140 if self._writinghandles is not None:
2118 existing_handles = True
2141 existing_handles = True
2119 fp = self._writinghandles[0]
2142 fp = self._writinghandles[0]
2120 fp.flush()
2143 fp.flush()
2121 fp.close()
2144 fp.close()
2122 # We can't use the cached file handle after close(). So prevent
2145 # We can't use the cached file handle after close(). So prevent
2123 # its usage.
2146 # its usage.
2124 self._writinghandles = None
2147 self._writinghandles = None
2125 self._segmentfile.writing_handle = None
2148 self._segmentfile.writing_handle = None
2126 # No need to deal with sidedata writing handle as it is only
2149 # No need to deal with sidedata writing handle as it is only
2127 # relevant with revlog-v2 which is never inline, not reaching
2150 # relevant with revlog-v2 which is never inline, not reaching
2128 # this code
2151 # this code
2129 if side_write:
2152 if side_write:
2130 old_index_file_path = self._indexfile
2153 old_index_file_path = self._indexfile
2131 new_index_file_path = self._indexfile + b'.s'
2154 new_index_file_path = self._indexfile + b'.s'
2132 opener = self.opener
2155 opener = self.opener
2133 weak_self = weakref.ref(self)
2156 weak_self = weakref.ref(self)
2134
2157
2135 # the "split" index replace the real index when the transaction is finalized
2158 # the "split" index replace the real index when the transaction is finalized
2136 def finalize_callback(tr):
2159 def finalize_callback(tr):
2137 opener.rename(
2160 opener.rename(
2138 new_index_file_path,
2161 new_index_file_path,
2139 old_index_file_path,
2162 old_index_file_path,
2140 checkambig=True,
2163 checkambig=True,
2141 )
2164 )
2142 maybe_self = weak_self()
2165 maybe_self = weak_self()
2143 if maybe_self is not None:
2166 if maybe_self is not None:
2144 maybe_self._indexfile = old_index_file_path
2167 maybe_self._indexfile = old_index_file_path
2145
2168
2146 def abort_callback(tr):
2169 def abort_callback(tr):
2147 maybe_self = weak_self()
2170 maybe_self = weak_self()
2148 if maybe_self is not None:
2171 if maybe_self is not None:
2149 maybe_self._indexfile = old_index_file_path
2172 maybe_self._indexfile = old_index_file_path
2150
2173
2151 tr.registertmp(new_index_file_path)
2174 tr.registertmp(new_index_file_path)
2152 if self.target[1] is not None:
2175 if self.target[1] is not None:
2153 callback_id = b'000-revlog-split-%d-%s' % self.target
2176 callback_id = b'000-revlog-split-%d-%s' % self.target
2154 else:
2177 else:
2155 callback_id = b'000-revlog-split-%d' % self.target[0]
2178 callback_id = b'000-revlog-split-%d' % self.target[0]
2156 tr.addfinalize(callback_id, finalize_callback)
2179 tr.addfinalize(callback_id, finalize_callback)
2157 tr.addabort(callback_id, abort_callback)
2180 tr.addabort(callback_id, abort_callback)
2158
2181
2159 new_dfh = self._datafp(b'w+')
2182 new_dfh = self._datafp(b'w+')
2160 new_dfh.truncate(0) # drop any potentially existing data
2183 new_dfh.truncate(0) # drop any potentially existing data
2161 try:
2184 try:
2162 with self._indexfp() as read_ifh:
2185 with self._indexfp() as read_ifh:
2163 for r in self:
2186 for r in self:
2164 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2187 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2165 new_dfh.flush()
2188 new_dfh.flush()
2166
2189
2167 if side_write:
2190 if side_write:
2168 self._indexfile = new_index_file_path
2191 self._indexfile = new_index_file_path
2169 with self.__index_new_fp() as fp:
2192 with self.__index_new_fp() as fp:
2170 self._format_flags &= ~FLAG_INLINE_DATA
2193 self._format_flags &= ~FLAG_INLINE_DATA
2171 self._inline = False
2194 self._inline = False
2172 for i in self:
2195 for i in self:
2173 e = self.index.entry_binary(i)
2196 e = self.index.entry_binary(i)
2174 if i == 0 and self._docket is None:
2197 if i == 0 and self._docket is None:
2175 header = self._format_flags | self._format_version
2198 header = self._format_flags | self._format_version
2176 header = self.index.pack_header(header)
2199 header = self.index.pack_header(header)
2177 e = header + e
2200 e = header + e
2178 fp.write(e)
2201 fp.write(e)
2179 if self._docket is not None:
2202 if self._docket is not None:
2180 self._docket.index_end = fp.tell()
2203 self._docket.index_end = fp.tell()
2181
2204
2182 # If we don't use side-write, the temp file replace the real
2205 # If we don't use side-write, the temp file replace the real
2183 # index when we exit the context manager
2206 # index when we exit the context manager
2184
2207
2185 nodemaputil.setup_persistent_nodemap(tr, self)
2208 nodemaputil.setup_persistent_nodemap(tr, self)
2186 self._segmentfile = randomaccessfile.randomaccessfile(
2209 self._segmentfile = randomaccessfile.randomaccessfile(
2187 self.opener,
2210 self.opener,
2188 self._datafile,
2211 self._datafile,
2189 self._chunkcachesize,
2212 self._chunkcachesize,
2190 )
2213 )
2191
2214
2192 if existing_handles:
2215 if existing_handles:
2193 # switched from inline to conventional reopen the index
2216 # switched from inline to conventional reopen the index
2194 ifh = self.__index_write_fp()
2217 ifh = self.__index_write_fp()
2195 self._writinghandles = (ifh, new_dfh, None)
2218 self._writinghandles = (ifh, new_dfh, None)
2196 self._segmentfile.writing_handle = new_dfh
2219 self._segmentfile.writing_handle = new_dfh
2197 new_dfh = None
2220 new_dfh = None
2198 # No need to deal with sidedata writing handle as it is only
2221 # No need to deal with sidedata writing handle as it is only
2199 # relevant with revlog-v2 which is never inline, not reaching
2222 # relevant with revlog-v2 which is never inline, not reaching
2200 # this code
2223 # this code
2201 finally:
2224 finally:
2202 if new_dfh is not None:
2225 if new_dfh is not None:
2203 new_dfh.close()
2226 new_dfh.close()
2204
2227
2205 def _nodeduplicatecallback(self, transaction, node):
2228 def _nodeduplicatecallback(self, transaction, node):
2206 """called when trying to add a node already stored."""
2229 """called when trying to add a node already stored."""
2207
2230
2208 @contextlib.contextmanager
2231 @contextlib.contextmanager
2209 def reading(self):
2232 def reading(self):
2210 """Context manager that keeps data and sidedata files open for reading"""
2233 """Context manager that keeps data and sidedata files open for reading"""
2211 with self._segmentfile.reading():
2234 with self._segmentfile.reading():
2212 with self._segmentfile_sidedata.reading():
2235 with self._segmentfile_sidedata.reading():
2213 yield
2236 yield
2214
2237
2215 @contextlib.contextmanager
2238 @contextlib.contextmanager
2216 def _writing(self, transaction):
2239 def _writing(self, transaction):
2217 if self._trypending:
2240 if self._trypending:
2218 msg = b'try to write in a `trypending` revlog: %s'
2241 msg = b'try to write in a `trypending` revlog: %s'
2219 msg %= self.display_id
2242 msg %= self.display_id
2220 raise error.ProgrammingError(msg)
2243 raise error.ProgrammingError(msg)
2221 if self._writinghandles is not None:
2244 if self._writinghandles is not None:
2222 yield
2245 yield
2223 else:
2246 else:
2224 ifh = dfh = sdfh = None
2247 ifh = dfh = sdfh = None
2225 try:
2248 try:
2226 r = len(self)
2249 r = len(self)
2227 # opening the data file.
2250 # opening the data file.
2228 dsize = 0
2251 dsize = 0
2229 if r:
2252 if r:
2230 dsize = self.end(r - 1)
2253 dsize = self.end(r - 1)
2231 dfh = None
2254 dfh = None
2232 if not self._inline:
2255 if not self._inline:
2233 try:
2256 try:
2234 dfh = self._datafp(b"r+")
2257 dfh = self._datafp(b"r+")
2235 if self._docket is None:
2258 if self._docket is None:
2236 dfh.seek(0, os.SEEK_END)
2259 dfh.seek(0, os.SEEK_END)
2237 else:
2260 else:
2238 dfh.seek(self._docket.data_end, os.SEEK_SET)
2261 dfh.seek(self._docket.data_end, os.SEEK_SET)
2239 except FileNotFoundError:
2262 except FileNotFoundError:
2240 dfh = self._datafp(b"w+")
2263 dfh = self._datafp(b"w+")
2241 transaction.add(self._datafile, dsize)
2264 transaction.add(self._datafile, dsize)
2242 if self._sidedatafile is not None:
2265 if self._sidedatafile is not None:
2243 # revlog-v2 does not inline, help Pytype
2266 # revlog-v2 does not inline, help Pytype
2244 assert dfh is not None
2267 assert dfh is not None
2245 try:
2268 try:
2246 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2269 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2247 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2270 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2248 except FileNotFoundError:
2271 except FileNotFoundError:
2249 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2272 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2250 transaction.add(
2273 transaction.add(
2251 self._sidedatafile, self._docket.sidedata_end
2274 self._sidedatafile, self._docket.sidedata_end
2252 )
2275 )
2253
2276
2254 # opening the index file.
2277 # opening the index file.
2255 isize = r * self.index.entry_size
2278 isize = r * self.index.entry_size
2256 ifh = self.__index_write_fp()
2279 ifh = self.__index_write_fp()
2257 if self._inline:
2280 if self._inline:
2258 transaction.add(self._indexfile, dsize + isize)
2281 transaction.add(self._indexfile, dsize + isize)
2259 else:
2282 else:
2260 transaction.add(self._indexfile, isize)
2283 transaction.add(self._indexfile, isize)
2261 # exposing all file handle for writing.
2284 # exposing all file handle for writing.
2262 self._writinghandles = (ifh, dfh, sdfh)
2285 self._writinghandles = (ifh, dfh, sdfh)
2263 self._segmentfile.writing_handle = ifh if self._inline else dfh
2286 self._segmentfile.writing_handle = ifh if self._inline else dfh
2264 self._segmentfile_sidedata.writing_handle = sdfh
2287 self._segmentfile_sidedata.writing_handle = sdfh
2265 yield
2288 yield
2266 if self._docket is not None:
2289 if self._docket is not None:
2267 self._write_docket(transaction)
2290 self._write_docket(transaction)
2268 finally:
2291 finally:
2269 self._writinghandles = None
2292 self._writinghandles = None
2270 self._segmentfile.writing_handle = None
2293 self._segmentfile.writing_handle = None
2271 self._segmentfile_sidedata.writing_handle = None
2294 self._segmentfile_sidedata.writing_handle = None
2272 if dfh is not None:
2295 if dfh is not None:
2273 dfh.close()
2296 dfh.close()
2274 if sdfh is not None:
2297 if sdfh is not None:
2275 sdfh.close()
2298 sdfh.close()
2276 # closing the index file last to avoid exposing referent to
2299 # closing the index file last to avoid exposing referent to
2277 # potential unflushed data content.
2300 # potential unflushed data content.
2278 if ifh is not None:
2301 if ifh is not None:
2279 ifh.close()
2302 ifh.close()
2280
2303
2281 def _write_docket(self, transaction):
2304 def _write_docket(self, transaction):
2282 """write the current docket on disk
2305 """write the current docket on disk
2283
2306
2284 Exist as a method to help changelog to implement transaction logic
2307 Exist as a method to help changelog to implement transaction logic
2285
2308
2286 We could also imagine using the same transaction logic for all revlog
2309 We could also imagine using the same transaction logic for all revlog
2287 since docket are cheap."""
2310 since docket are cheap."""
2288 self._docket.write(transaction)
2311 self._docket.write(transaction)
2289
2312
2290 def addrevision(
2313 def addrevision(
2291 self,
2314 self,
2292 text,
2315 text,
2293 transaction,
2316 transaction,
2294 link,
2317 link,
2295 p1,
2318 p1,
2296 p2,
2319 p2,
2297 cachedelta=None,
2320 cachedelta=None,
2298 node=None,
2321 node=None,
2299 flags=REVIDX_DEFAULT_FLAGS,
2322 flags=REVIDX_DEFAULT_FLAGS,
2300 deltacomputer=None,
2323 deltacomputer=None,
2301 sidedata=None,
2324 sidedata=None,
2302 ):
2325 ):
2303 """add a revision to the log
2326 """add a revision to the log
2304
2327
2305 text - the revision data to add
2328 text - the revision data to add
2306 transaction - the transaction object used for rollback
2329 transaction - the transaction object used for rollback
2307 link - the linkrev data to add
2330 link - the linkrev data to add
2308 p1, p2 - the parent nodeids of the revision
2331 p1, p2 - the parent nodeids of the revision
2309 cachedelta - an optional precomputed delta
2332 cachedelta - an optional precomputed delta
2310 node - nodeid of revision; typically node is not specified, and it is
2333 node - nodeid of revision; typically node is not specified, and it is
2311 computed by default as hash(text, p1, p2), however subclasses might
2334 computed by default as hash(text, p1, p2), however subclasses might
2312 use different hashing method (and override checkhash() in such case)
2335 use different hashing method (and override checkhash() in such case)
2313 flags - the known flags to set on the revision
2336 flags - the known flags to set on the revision
2314 deltacomputer - an optional deltacomputer instance shared between
2337 deltacomputer - an optional deltacomputer instance shared between
2315 multiple calls
2338 multiple calls
2316 """
2339 """
2317 if link == nullrev:
2340 if link == nullrev:
2318 raise error.RevlogError(
2341 raise error.RevlogError(
2319 _(b"attempted to add linkrev -1 to %s") % self.display_id
2342 _(b"attempted to add linkrev -1 to %s") % self.display_id
2320 )
2343 )
2321
2344
2322 if sidedata is None:
2345 if sidedata is None:
2323 sidedata = {}
2346 sidedata = {}
2324 elif sidedata and not self.hassidedata:
2347 elif sidedata and not self.hassidedata:
2325 raise error.ProgrammingError(
2348 raise error.ProgrammingError(
2326 _(b"trying to add sidedata to a revlog who don't support them")
2349 _(b"trying to add sidedata to a revlog who don't support them")
2327 )
2350 )
2328
2351
2329 if flags:
2352 if flags:
2330 node = node or self.hash(text, p1, p2)
2353 node = node or self.hash(text, p1, p2)
2331
2354
2332 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2355 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2333
2356
2334 # If the flag processor modifies the revision data, ignore any provided
2357 # If the flag processor modifies the revision data, ignore any provided
2335 # cachedelta.
2358 # cachedelta.
2336 if rawtext != text:
2359 if rawtext != text:
2337 cachedelta = None
2360 cachedelta = None
2338
2361
2339 if len(rawtext) > _maxentrysize:
2362 if len(rawtext) > _maxentrysize:
2340 raise error.RevlogError(
2363 raise error.RevlogError(
2341 _(
2364 _(
2342 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2365 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2343 )
2366 )
2344 % (self.display_id, len(rawtext))
2367 % (self.display_id, len(rawtext))
2345 )
2368 )
2346
2369
2347 node = node or self.hash(rawtext, p1, p2)
2370 node = node or self.hash(rawtext, p1, p2)
2348 rev = self.index.get_rev(node)
2371 rev = self.index.get_rev(node)
2349 if rev is not None:
2372 if rev is not None:
2350 return rev
2373 return rev
2351
2374
2352 if validatehash:
2375 if validatehash:
2353 self.checkhash(rawtext, node, p1=p1, p2=p2)
2376 self.checkhash(rawtext, node, p1=p1, p2=p2)
2354
2377
2355 return self.addrawrevision(
2378 return self.addrawrevision(
2356 rawtext,
2379 rawtext,
2357 transaction,
2380 transaction,
2358 link,
2381 link,
2359 p1,
2382 p1,
2360 p2,
2383 p2,
2361 node,
2384 node,
2362 flags,
2385 flags,
2363 cachedelta=cachedelta,
2386 cachedelta=cachedelta,
2364 deltacomputer=deltacomputer,
2387 deltacomputer=deltacomputer,
2365 sidedata=sidedata,
2388 sidedata=sidedata,
2366 )
2389 )
2367
2390
2368 def addrawrevision(
2391 def addrawrevision(
2369 self,
2392 self,
2370 rawtext,
2393 rawtext,
2371 transaction,
2394 transaction,
2372 link,
2395 link,
2373 p1,
2396 p1,
2374 p2,
2397 p2,
2375 node,
2398 node,
2376 flags,
2399 flags,
2377 cachedelta=None,
2400 cachedelta=None,
2378 deltacomputer=None,
2401 deltacomputer=None,
2379 sidedata=None,
2402 sidedata=None,
2380 ):
2403 ):
2381 """add a raw revision with known flags, node and parents
2404 """add a raw revision with known flags, node and parents
2382 useful when reusing a revision not stored in this revlog (ex: received
2405 useful when reusing a revision not stored in this revlog (ex: received
2383 over wire, or read from an external bundle).
2406 over wire, or read from an external bundle).
2384 """
2407 """
2385 with self._writing(transaction):
2408 with self._writing(transaction):
2386 return self._addrevision(
2409 return self._addrevision(
2387 node,
2410 node,
2388 rawtext,
2411 rawtext,
2389 transaction,
2412 transaction,
2390 link,
2413 link,
2391 p1,
2414 p1,
2392 p2,
2415 p2,
2393 flags,
2416 flags,
2394 cachedelta,
2417 cachedelta,
2395 deltacomputer=deltacomputer,
2418 deltacomputer=deltacomputer,
2396 sidedata=sidedata,
2419 sidedata=sidedata,
2397 )
2420 )
2398
2421
2399 def compress(self, data):
2422 def compress(self, data):
2400 """Generate a possibly-compressed representation of data."""
2423 """Generate a possibly-compressed representation of data."""
2401 if not data:
2424 if not data:
2402 return b'', data
2425 return b'', data
2403
2426
2404 compressed = self._compressor.compress(data)
2427 compressed = self._compressor.compress(data)
2405
2428
2406 if compressed:
2429 if compressed:
2407 # The revlog compressor added the header in the returned data.
2430 # The revlog compressor added the header in the returned data.
2408 return b'', compressed
2431 return b'', compressed
2409
2432
2410 if data[0:1] == b'\0':
2433 if data[0:1] == b'\0':
2411 return b'', data
2434 return b'', data
2412 return b'u', data
2435 return b'u', data
2413
2436
2414 def decompress(self, data):
2437 def decompress(self, data):
2415 """Decompress a revlog chunk.
2438 """Decompress a revlog chunk.
2416
2439
2417 The chunk is expected to begin with a header identifying the
2440 The chunk is expected to begin with a header identifying the
2418 format type so it can be routed to an appropriate decompressor.
2441 format type so it can be routed to an appropriate decompressor.
2419 """
2442 """
2420 if not data:
2443 if not data:
2421 return data
2444 return data
2422
2445
2423 # Revlogs are read much more frequently than they are written and many
2446 # Revlogs are read much more frequently than they are written and many
2424 # chunks only take microseconds to decompress, so performance is
2447 # chunks only take microseconds to decompress, so performance is
2425 # important here.
2448 # important here.
2426 #
2449 #
2427 # We can make a few assumptions about revlogs:
2450 # We can make a few assumptions about revlogs:
2428 #
2451 #
2429 # 1) the majority of chunks will be compressed (as opposed to inline
2452 # 1) the majority of chunks will be compressed (as opposed to inline
2430 # raw data).
2453 # raw data).
2431 # 2) decompressing *any* data will likely by at least 10x slower than
2454 # 2) decompressing *any* data will likely by at least 10x slower than
2432 # returning raw inline data.
2455 # returning raw inline data.
2433 # 3) we want to prioritize common and officially supported compression
2456 # 3) we want to prioritize common and officially supported compression
2434 # engines
2457 # engines
2435 #
2458 #
2436 # It follows that we want to optimize for "decompress compressed data
2459 # It follows that we want to optimize for "decompress compressed data
2437 # when encoded with common and officially supported compression engines"
2460 # when encoded with common and officially supported compression engines"
2438 # case over "raw data" and "data encoded by less common or non-official
2461 # case over "raw data" and "data encoded by less common or non-official
2439 # compression engines." That is why we have the inline lookup first
2462 # compression engines." That is why we have the inline lookup first
2440 # followed by the compengines lookup.
2463 # followed by the compengines lookup.
2441 #
2464 #
2442 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2465 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2443 # compressed chunks. And this matters for changelog and manifest reads.
2466 # compressed chunks. And this matters for changelog and manifest reads.
2444 t = data[0:1]
2467 t = data[0:1]
2445
2468
2446 if t == b'x':
2469 if t == b'x':
2447 try:
2470 try:
2448 return _zlibdecompress(data)
2471 return _zlibdecompress(data)
2449 except zlib.error as e:
2472 except zlib.error as e:
2450 raise error.RevlogError(
2473 raise error.RevlogError(
2451 _(b'revlog decompress error: %s')
2474 _(b'revlog decompress error: %s')
2452 % stringutil.forcebytestr(e)
2475 % stringutil.forcebytestr(e)
2453 )
2476 )
2454 # '\0' is more common than 'u' so it goes first.
2477 # '\0' is more common than 'u' so it goes first.
2455 elif t == b'\0':
2478 elif t == b'\0':
2456 return data
2479 return data
2457 elif t == b'u':
2480 elif t == b'u':
2458 return util.buffer(data, 1)
2481 return util.buffer(data, 1)
2459
2482
2460 compressor = self._get_decompressor(t)
2483 compressor = self._get_decompressor(t)
2461
2484
2462 return compressor.decompress(data)
2485 return compressor.decompress(data)
2463
2486
2464 def _addrevision(
2487 def _addrevision(
2465 self,
2488 self,
2466 node,
2489 node,
2467 rawtext,
2490 rawtext,
2468 transaction,
2491 transaction,
2469 link,
2492 link,
2470 p1,
2493 p1,
2471 p2,
2494 p2,
2472 flags,
2495 flags,
2473 cachedelta,
2496 cachedelta,
2474 alwayscache=False,
2497 alwayscache=False,
2475 deltacomputer=None,
2498 deltacomputer=None,
2476 sidedata=None,
2499 sidedata=None,
2477 ):
2500 ):
2478 """internal function to add revisions to the log
2501 """internal function to add revisions to the log
2479
2502
2480 see addrevision for argument descriptions.
2503 see addrevision for argument descriptions.
2481
2504
2482 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2505 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2483
2506
2484 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2507 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2485 be used.
2508 be used.
2486
2509
2487 invariants:
2510 invariants:
2488 - rawtext is optional (can be None); if not set, cachedelta must be set.
2511 - rawtext is optional (can be None); if not set, cachedelta must be set.
2489 if both are set, they must correspond to each other.
2512 if both are set, they must correspond to each other.
2490 """
2513 """
2491 if node == self.nullid:
2514 if node == self.nullid:
2492 raise error.RevlogError(
2515 raise error.RevlogError(
2493 _(b"%s: attempt to add null revision") % self.display_id
2516 _(b"%s: attempt to add null revision") % self.display_id
2494 )
2517 )
2495 if (
2518 if (
2496 node == self.nodeconstants.wdirid
2519 node == self.nodeconstants.wdirid
2497 or node in self.nodeconstants.wdirfilenodeids
2520 or node in self.nodeconstants.wdirfilenodeids
2498 ):
2521 ):
2499 raise error.RevlogError(
2522 raise error.RevlogError(
2500 _(b"%s: attempt to add wdir revision") % self.display_id
2523 _(b"%s: attempt to add wdir revision") % self.display_id
2501 )
2524 )
2502 if self._writinghandles is None:
2525 if self._writinghandles is None:
2503 msg = b'adding revision outside `revlog._writing` context'
2526 msg = b'adding revision outside `revlog._writing` context'
2504 raise error.ProgrammingError(msg)
2527 raise error.ProgrammingError(msg)
2505
2528
2506 if self._inline:
2529 if self._inline:
2507 fh = self._writinghandles[0]
2530 fh = self._writinghandles[0]
2508 else:
2531 else:
2509 fh = self._writinghandles[1]
2532 fh = self._writinghandles[1]
2510
2533
2511 btext = [rawtext]
2534 btext = [rawtext]
2512
2535
2513 curr = len(self)
2536 curr = len(self)
2514 prev = curr - 1
2537 prev = curr - 1
2515
2538
2516 offset = self._get_data_offset(prev)
2539 offset = self._get_data_offset(prev)
2517
2540
2518 if self._concurrencychecker:
2541 if self._concurrencychecker:
2519 ifh, dfh, sdfh = self._writinghandles
2542 ifh, dfh, sdfh = self._writinghandles
2520 # XXX no checking for the sidedata file
2543 # XXX no checking for the sidedata file
2521 if self._inline:
2544 if self._inline:
2522 # offset is "as if" it were in the .d file, so we need to add on
2545 # offset is "as if" it were in the .d file, so we need to add on
2523 # the size of the entry metadata.
2546 # the size of the entry metadata.
2524 self._concurrencychecker(
2547 self._concurrencychecker(
2525 ifh, self._indexfile, offset + curr * self.index.entry_size
2548 ifh, self._indexfile, offset + curr * self.index.entry_size
2526 )
2549 )
2527 else:
2550 else:
2528 # Entries in the .i are a consistent size.
2551 # Entries in the .i are a consistent size.
2529 self._concurrencychecker(
2552 self._concurrencychecker(
2530 ifh, self._indexfile, curr * self.index.entry_size
2553 ifh, self._indexfile, curr * self.index.entry_size
2531 )
2554 )
2532 self._concurrencychecker(dfh, self._datafile, offset)
2555 self._concurrencychecker(dfh, self._datafile, offset)
2533
2556
2534 p1r, p2r = self.rev(p1), self.rev(p2)
2557 p1r, p2r = self.rev(p1), self.rev(p2)
2535
2558
2536 # full versions are inserted when the needed deltas
2559 # full versions are inserted when the needed deltas
2537 # become comparable to the uncompressed text
2560 # become comparable to the uncompressed text
2538 if rawtext is None:
2561 if rawtext is None:
2539 # need rawtext size, before changed by flag processors, which is
2562 # need rawtext size, before changed by flag processors, which is
2540 # the non-raw size. use revlog explicitly to avoid filelog's extra
2563 # the non-raw size. use revlog explicitly to avoid filelog's extra
2541 # logic that might remove metadata size.
2564 # logic that might remove metadata size.
2542 textlen = mdiff.patchedsize(
2565 textlen = mdiff.patchedsize(
2543 revlog.size(self, cachedelta[0]), cachedelta[1]
2566 revlog.size(self, cachedelta[0]), cachedelta[1]
2544 )
2567 )
2545 else:
2568 else:
2546 textlen = len(rawtext)
2569 textlen = len(rawtext)
2547
2570
2548 if deltacomputer is None:
2571 if deltacomputer is None:
2549 write_debug = None
2572 write_debug = None
2550 if self._debug_delta:
2573 if self._debug_delta:
2551 write_debug = transaction._report
2574 write_debug = transaction._report
2552 deltacomputer = deltautil.deltacomputer(
2575 deltacomputer = deltautil.deltacomputer(
2553 self, write_debug=write_debug
2576 self, write_debug=write_debug
2554 )
2577 )
2555
2578
2556 if cachedelta is not None and len(cachedelta) == 2:
2579 if cachedelta is not None and len(cachedelta) == 2:
2557 # If the cached delta has no information about how it should be
2580 # If the cached delta has no information about how it should be
2558 # reused, add the default reuse instruction according to the
2581 # reused, add the default reuse instruction according to the
2559 # revlog's configuration.
2582 # revlog's configuration.
2560 if self._generaldelta and self._lazydeltabase:
2583 if self._generaldelta and self._lazydeltabase:
2561 delta_base_reuse = DELTA_BASE_REUSE_TRY
2584 delta_base_reuse = DELTA_BASE_REUSE_TRY
2562 else:
2585 else:
2563 delta_base_reuse = DELTA_BASE_REUSE_NO
2586 delta_base_reuse = DELTA_BASE_REUSE_NO
2564 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2587 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2565
2588
2566 revinfo = revlogutils.revisioninfo(
2589 revinfo = revlogutils.revisioninfo(
2567 node,
2590 node,
2568 p1,
2591 p1,
2569 p2,
2592 p2,
2570 btext,
2593 btext,
2571 textlen,
2594 textlen,
2572 cachedelta,
2595 cachedelta,
2573 flags,
2596 flags,
2574 )
2597 )
2575
2598
2576 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2599 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2577
2600
2578 compression_mode = COMP_MODE_INLINE
2601 compression_mode = COMP_MODE_INLINE
2579 if self._docket is not None:
2602 if self._docket is not None:
2580 default_comp = self._docket.default_compression_header
2603 default_comp = self._docket.default_compression_header
2581 r = deltautil.delta_compression(default_comp, deltainfo)
2604 r = deltautil.delta_compression(default_comp, deltainfo)
2582 compression_mode, deltainfo = r
2605 compression_mode, deltainfo = r
2583
2606
2584 sidedata_compression_mode = COMP_MODE_INLINE
2607 sidedata_compression_mode = COMP_MODE_INLINE
2585 if sidedata and self.hassidedata:
2608 if sidedata and self.hassidedata:
2586 sidedata_compression_mode = COMP_MODE_PLAIN
2609 sidedata_compression_mode = COMP_MODE_PLAIN
2587 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2610 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2588 sidedata_offset = self._docket.sidedata_end
2611 sidedata_offset = self._docket.sidedata_end
2589 h, comp_sidedata = self.compress(serialized_sidedata)
2612 h, comp_sidedata = self.compress(serialized_sidedata)
2590 if (
2613 if (
2591 h != b'u'
2614 h != b'u'
2592 and comp_sidedata[0:1] != b'\0'
2615 and comp_sidedata[0:1] != b'\0'
2593 and len(comp_sidedata) < len(serialized_sidedata)
2616 and len(comp_sidedata) < len(serialized_sidedata)
2594 ):
2617 ):
2595 assert not h
2618 assert not h
2596 if (
2619 if (
2597 comp_sidedata[0:1]
2620 comp_sidedata[0:1]
2598 == self._docket.default_compression_header
2621 == self._docket.default_compression_header
2599 ):
2622 ):
2600 sidedata_compression_mode = COMP_MODE_DEFAULT
2623 sidedata_compression_mode = COMP_MODE_DEFAULT
2601 serialized_sidedata = comp_sidedata
2624 serialized_sidedata = comp_sidedata
2602 else:
2625 else:
2603 sidedata_compression_mode = COMP_MODE_INLINE
2626 sidedata_compression_mode = COMP_MODE_INLINE
2604 serialized_sidedata = comp_sidedata
2627 serialized_sidedata = comp_sidedata
2605 else:
2628 else:
2606 serialized_sidedata = b""
2629 serialized_sidedata = b""
2607 # Don't store the offset if the sidedata is empty, that way
2630 # Don't store the offset if the sidedata is empty, that way
2608 # we can easily detect empty sidedata and they will be no different
2631 # we can easily detect empty sidedata and they will be no different
2609 # than ones we manually add.
2632 # than ones we manually add.
2610 sidedata_offset = 0
2633 sidedata_offset = 0
2611
2634
2612 rank = RANK_UNKNOWN
2635 rank = RANK_UNKNOWN
2613 if self._compute_rank:
2636 if self._compute_rank:
2614 if (p1r, p2r) == (nullrev, nullrev):
2637 if (p1r, p2r) == (nullrev, nullrev):
2615 rank = 1
2638 rank = 1
2616 elif p1r != nullrev and p2r == nullrev:
2639 elif p1r != nullrev and p2r == nullrev:
2617 rank = 1 + self.fast_rank(p1r)
2640 rank = 1 + self.fast_rank(p1r)
2618 elif p1r == nullrev and p2r != nullrev:
2641 elif p1r == nullrev and p2r != nullrev:
2619 rank = 1 + self.fast_rank(p2r)
2642 rank = 1 + self.fast_rank(p2r)
2620 else: # merge node
2643 else: # merge node
2621 if rustdagop is not None and self.index.rust_ext_compat:
2644 if rustdagop is not None and self.index.rust_ext_compat:
2622 rank = rustdagop.rank(self.index, p1r, p2r)
2645 rank = rustdagop.rank(self.index, p1r, p2r)
2623 else:
2646 else:
2624 pmin, pmax = sorted((p1r, p2r))
2647 pmin, pmax = sorted((p1r, p2r))
2625 rank = 1 + self.fast_rank(pmax)
2648 rank = 1 + self.fast_rank(pmax)
2626 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2649 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2627
2650
2628 e = revlogutils.entry(
2651 e = revlogutils.entry(
2629 flags=flags,
2652 flags=flags,
2630 data_offset=offset,
2653 data_offset=offset,
2631 data_compressed_length=deltainfo.deltalen,
2654 data_compressed_length=deltainfo.deltalen,
2632 data_uncompressed_length=textlen,
2655 data_uncompressed_length=textlen,
2633 data_compression_mode=compression_mode,
2656 data_compression_mode=compression_mode,
2634 data_delta_base=deltainfo.base,
2657 data_delta_base=deltainfo.base,
2635 link_rev=link,
2658 link_rev=link,
2636 parent_rev_1=p1r,
2659 parent_rev_1=p1r,
2637 parent_rev_2=p2r,
2660 parent_rev_2=p2r,
2638 node_id=node,
2661 node_id=node,
2639 sidedata_offset=sidedata_offset,
2662 sidedata_offset=sidedata_offset,
2640 sidedata_compressed_length=len(serialized_sidedata),
2663 sidedata_compressed_length=len(serialized_sidedata),
2641 sidedata_compression_mode=sidedata_compression_mode,
2664 sidedata_compression_mode=sidedata_compression_mode,
2642 rank=rank,
2665 rank=rank,
2643 )
2666 )
2644
2667
2645 self.index.append(e)
2668 self.index.append(e)
2646 entry = self.index.entry_binary(curr)
2669 entry = self.index.entry_binary(curr)
2647 if curr == 0 and self._docket is None:
2670 if curr == 0 and self._docket is None:
2648 header = self._format_flags | self._format_version
2671 header = self._format_flags | self._format_version
2649 header = self.index.pack_header(header)
2672 header = self.index.pack_header(header)
2650 entry = header + entry
2673 entry = header + entry
2651 self._writeentry(
2674 self._writeentry(
2652 transaction,
2675 transaction,
2653 entry,
2676 entry,
2654 deltainfo.data,
2677 deltainfo.data,
2655 link,
2678 link,
2656 offset,
2679 offset,
2657 serialized_sidedata,
2680 serialized_sidedata,
2658 sidedata_offset,
2681 sidedata_offset,
2659 )
2682 )
2660
2683
2661 rawtext = btext[0]
2684 rawtext = btext[0]
2662
2685
2663 if alwayscache and rawtext is None:
2686 if alwayscache and rawtext is None:
2664 rawtext = deltacomputer.buildtext(revinfo, fh)
2687 rawtext = deltacomputer.buildtext(revinfo, fh)
2665
2688
2666 if type(rawtext) == bytes: # only accept immutable objects
2689 if type(rawtext) == bytes: # only accept immutable objects
2667 self._revisioncache = (node, curr, rawtext)
2690 self._revisioncache = (node, curr, rawtext)
2668 self._chainbasecache[curr] = deltainfo.chainbase
2691 self._chainbasecache[curr] = deltainfo.chainbase
2669 return curr
2692 return curr
2670
2693
2671 def _get_data_offset(self, prev):
2694 def _get_data_offset(self, prev):
2672 """Returns the current offset in the (in-transaction) data file.
2695 """Returns the current offset in the (in-transaction) data file.
2673 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2696 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2674 file to store that information: since sidedata can be rewritten to the
2697 file to store that information: since sidedata can be rewritten to the
2675 end of the data file within a transaction, you can have cases where, for
2698 end of the data file within a transaction, you can have cases where, for
2676 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2699 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2677 to `n - 1`'s sidedata being written after `n`'s data.
2700 to `n - 1`'s sidedata being written after `n`'s data.
2678
2701
2679 TODO cache this in a docket file before getting out of experimental."""
2702 TODO cache this in a docket file before getting out of experimental."""
2680 if self._docket is None:
2703 if self._docket is None:
2681 return self.end(prev)
2704 return self.end(prev)
2682 else:
2705 else:
2683 return self._docket.data_end
2706 return self._docket.data_end
2684
2707
2685 def _writeentry(
2708 def _writeentry(
2686 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2709 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2687 ):
2710 ):
2688 # Files opened in a+ mode have inconsistent behavior on various
2711 # Files opened in a+ mode have inconsistent behavior on various
2689 # platforms. Windows requires that a file positioning call be made
2712 # platforms. Windows requires that a file positioning call be made
2690 # when the file handle transitions between reads and writes. See
2713 # when the file handle transitions between reads and writes. See
2691 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2714 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2692 # platforms, Python or the platform itself can be buggy. Some versions
2715 # platforms, Python or the platform itself can be buggy. Some versions
2693 # of Solaris have been observed to not append at the end of the file
2716 # of Solaris have been observed to not append at the end of the file
2694 # if the file was seeked to before the end. See issue4943 for more.
2717 # if the file was seeked to before the end. See issue4943 for more.
2695 #
2718 #
2696 # We work around this issue by inserting a seek() before writing.
2719 # We work around this issue by inserting a seek() before writing.
2697 # Note: This is likely not necessary on Python 3. However, because
2720 # Note: This is likely not necessary on Python 3. However, because
2698 # the file handle is reused for reads and may be seeked there, we need
2721 # the file handle is reused for reads and may be seeked there, we need
2699 # to be careful before changing this.
2722 # to be careful before changing this.
2700 if self._writinghandles is None:
2723 if self._writinghandles is None:
2701 msg = b'adding revision outside `revlog._writing` context'
2724 msg = b'adding revision outside `revlog._writing` context'
2702 raise error.ProgrammingError(msg)
2725 raise error.ProgrammingError(msg)
2703 ifh, dfh, sdfh = self._writinghandles
2726 ifh, dfh, sdfh = self._writinghandles
2704 if self._docket is None:
2727 if self._docket is None:
2705 ifh.seek(0, os.SEEK_END)
2728 ifh.seek(0, os.SEEK_END)
2706 else:
2729 else:
2707 ifh.seek(self._docket.index_end, os.SEEK_SET)
2730 ifh.seek(self._docket.index_end, os.SEEK_SET)
2708 if dfh:
2731 if dfh:
2709 if self._docket is None:
2732 if self._docket is None:
2710 dfh.seek(0, os.SEEK_END)
2733 dfh.seek(0, os.SEEK_END)
2711 else:
2734 else:
2712 dfh.seek(self._docket.data_end, os.SEEK_SET)
2735 dfh.seek(self._docket.data_end, os.SEEK_SET)
2713 if sdfh:
2736 if sdfh:
2714 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2737 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2715
2738
2716 curr = len(self) - 1
2739 curr = len(self) - 1
2717 if not self._inline:
2740 if not self._inline:
2718 transaction.add(self._datafile, offset)
2741 transaction.add(self._datafile, offset)
2719 if self._sidedatafile:
2742 if self._sidedatafile:
2720 transaction.add(self._sidedatafile, sidedata_offset)
2743 transaction.add(self._sidedatafile, sidedata_offset)
2721 transaction.add(self._indexfile, curr * len(entry))
2744 transaction.add(self._indexfile, curr * len(entry))
2722 if data[0]:
2745 if data[0]:
2723 dfh.write(data[0])
2746 dfh.write(data[0])
2724 dfh.write(data[1])
2747 dfh.write(data[1])
2725 if sidedata:
2748 if sidedata:
2726 sdfh.write(sidedata)
2749 sdfh.write(sidedata)
2727 ifh.write(entry)
2750 ifh.write(entry)
2728 else:
2751 else:
2729 offset += curr * self.index.entry_size
2752 offset += curr * self.index.entry_size
2730 transaction.add(self._indexfile, offset)
2753 transaction.add(self._indexfile, offset)
2731 ifh.write(entry)
2754 ifh.write(entry)
2732 ifh.write(data[0])
2755 ifh.write(data[0])
2733 ifh.write(data[1])
2756 ifh.write(data[1])
2734 assert not sidedata
2757 assert not sidedata
2735 self._enforceinlinesize(transaction)
2758 self._enforceinlinesize(transaction)
2736 if self._docket is not None:
2759 if self._docket is not None:
2737 # revlog-v2 always has 3 writing handles, help Pytype
2760 # revlog-v2 always has 3 writing handles, help Pytype
2738 wh1 = self._writinghandles[0]
2761 wh1 = self._writinghandles[0]
2739 wh2 = self._writinghandles[1]
2762 wh2 = self._writinghandles[1]
2740 wh3 = self._writinghandles[2]
2763 wh3 = self._writinghandles[2]
2741 assert wh1 is not None
2764 assert wh1 is not None
2742 assert wh2 is not None
2765 assert wh2 is not None
2743 assert wh3 is not None
2766 assert wh3 is not None
2744 self._docket.index_end = wh1.tell()
2767 self._docket.index_end = wh1.tell()
2745 self._docket.data_end = wh2.tell()
2768 self._docket.data_end = wh2.tell()
2746 self._docket.sidedata_end = wh3.tell()
2769 self._docket.sidedata_end = wh3.tell()
2747
2770
2748 nodemaputil.setup_persistent_nodemap(transaction, self)
2771 nodemaputil.setup_persistent_nodemap(transaction, self)
2749
2772
2750 def addgroup(
2773 def addgroup(
2751 self,
2774 self,
2752 deltas,
2775 deltas,
2753 linkmapper,
2776 linkmapper,
2754 transaction,
2777 transaction,
2755 alwayscache=False,
2778 alwayscache=False,
2756 addrevisioncb=None,
2779 addrevisioncb=None,
2757 duplicaterevisioncb=None,
2780 duplicaterevisioncb=None,
2758 debug_info=None,
2781 debug_info=None,
2759 delta_base_reuse_policy=None,
2782 delta_base_reuse_policy=None,
2760 ):
2783 ):
2761 """
2784 """
2762 add a delta group
2785 add a delta group
2763
2786
2764 given a set of deltas, add them to the revision log. the
2787 given a set of deltas, add them to the revision log. the
2765 first delta is against its parent, which should be in our
2788 first delta is against its parent, which should be in our
2766 log, the rest are against the previous delta.
2789 log, the rest are against the previous delta.
2767
2790
2768 If ``addrevisioncb`` is defined, it will be called with arguments of
2791 If ``addrevisioncb`` is defined, it will be called with arguments of
2769 this revlog and the node that was added.
2792 this revlog and the node that was added.
2770 """
2793 """
2771
2794
2772 if self._adding_group:
2795 if self._adding_group:
2773 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2796 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2774
2797
2775 # read the default delta-base reuse policy from revlog config if the
2798 # read the default delta-base reuse policy from revlog config if the
2776 # group did not specify one.
2799 # group did not specify one.
2777 if delta_base_reuse_policy is None:
2800 if delta_base_reuse_policy is None:
2778 if self._generaldelta and self._lazydeltabase:
2801 if self._generaldelta and self._lazydeltabase:
2779 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2802 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2780 else:
2803 else:
2781 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2804 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2782
2805
2783 self._adding_group = True
2806 self._adding_group = True
2784 empty = True
2807 empty = True
2785 try:
2808 try:
2786 with self._writing(transaction):
2809 with self._writing(transaction):
2787 write_debug = None
2810 write_debug = None
2788 if self._debug_delta:
2811 if self._debug_delta:
2789 write_debug = transaction._report
2812 write_debug = transaction._report
2790 deltacomputer = deltautil.deltacomputer(
2813 deltacomputer = deltautil.deltacomputer(
2791 self,
2814 self,
2792 write_debug=write_debug,
2815 write_debug=write_debug,
2793 debug_info=debug_info,
2816 debug_info=debug_info,
2794 )
2817 )
2795 # loop through our set of deltas
2818 # loop through our set of deltas
2796 for data in deltas:
2819 for data in deltas:
2797 (
2820 (
2798 node,
2821 node,
2799 p1,
2822 p1,
2800 p2,
2823 p2,
2801 linknode,
2824 linknode,
2802 deltabase,
2825 deltabase,
2803 delta,
2826 delta,
2804 flags,
2827 flags,
2805 sidedata,
2828 sidedata,
2806 ) = data
2829 ) = data
2807 link = linkmapper(linknode)
2830 link = linkmapper(linknode)
2808 flags = flags or REVIDX_DEFAULT_FLAGS
2831 flags = flags or REVIDX_DEFAULT_FLAGS
2809
2832
2810 rev = self.index.get_rev(node)
2833 rev = self.index.get_rev(node)
2811 if rev is not None:
2834 if rev is not None:
2812 # this can happen if two branches make the same change
2835 # this can happen if two branches make the same change
2813 self._nodeduplicatecallback(transaction, rev)
2836 self._nodeduplicatecallback(transaction, rev)
2814 if duplicaterevisioncb:
2837 if duplicaterevisioncb:
2815 duplicaterevisioncb(self, rev)
2838 duplicaterevisioncb(self, rev)
2816 empty = False
2839 empty = False
2817 continue
2840 continue
2818
2841
2819 for p in (p1, p2):
2842 for p in (p1, p2):
2820 if not self.index.has_node(p):
2843 if not self.index.has_node(p):
2821 raise error.LookupError(
2844 raise error.LookupError(
2822 p, self.radix, _(b'unknown parent')
2845 p, self.radix, _(b'unknown parent')
2823 )
2846 )
2824
2847
2825 if not self.index.has_node(deltabase):
2848 if not self.index.has_node(deltabase):
2826 raise error.LookupError(
2849 raise error.LookupError(
2827 deltabase, self.display_id, _(b'unknown delta base')
2850 deltabase, self.display_id, _(b'unknown delta base')
2828 )
2851 )
2829
2852
2830 baserev = self.rev(deltabase)
2853 baserev = self.rev(deltabase)
2831
2854
2832 if baserev != nullrev and self.iscensored(baserev):
2855 if baserev != nullrev and self.iscensored(baserev):
2833 # if base is censored, delta must be full replacement in a
2856 # if base is censored, delta must be full replacement in a
2834 # single patch operation
2857 # single patch operation
2835 hlen = struct.calcsize(b">lll")
2858 hlen = struct.calcsize(b">lll")
2836 oldlen = self.rawsize(baserev)
2859 oldlen = self.rawsize(baserev)
2837 newlen = len(delta) - hlen
2860 newlen = len(delta) - hlen
2838 if delta[:hlen] != mdiff.replacediffheader(
2861 if delta[:hlen] != mdiff.replacediffheader(
2839 oldlen, newlen
2862 oldlen, newlen
2840 ):
2863 ):
2841 raise error.CensoredBaseError(
2864 raise error.CensoredBaseError(
2842 self.display_id, self.node(baserev)
2865 self.display_id, self.node(baserev)
2843 )
2866 )
2844
2867
2845 if not flags and self._peek_iscensored(baserev, delta):
2868 if not flags and self._peek_iscensored(baserev, delta):
2846 flags |= REVIDX_ISCENSORED
2869 flags |= REVIDX_ISCENSORED
2847
2870
2848 # We assume consumers of addrevisioncb will want to retrieve
2871 # We assume consumers of addrevisioncb will want to retrieve
2849 # the added revision, which will require a call to
2872 # the added revision, which will require a call to
2850 # revision(). revision() will fast path if there is a cache
2873 # revision(). revision() will fast path if there is a cache
2851 # hit. So, we tell _addrevision() to always cache in this case.
2874 # hit. So, we tell _addrevision() to always cache in this case.
2852 # We're only using addgroup() in the context of changegroup
2875 # We're only using addgroup() in the context of changegroup
2853 # generation so the revision data can always be handled as raw
2876 # generation so the revision data can always be handled as raw
2854 # by the flagprocessor.
2877 # by the flagprocessor.
2855 rev = self._addrevision(
2878 rev = self._addrevision(
2856 node,
2879 node,
2857 None,
2880 None,
2858 transaction,
2881 transaction,
2859 link,
2882 link,
2860 p1,
2883 p1,
2861 p2,
2884 p2,
2862 flags,
2885 flags,
2863 (baserev, delta, delta_base_reuse_policy),
2886 (baserev, delta, delta_base_reuse_policy),
2864 alwayscache=alwayscache,
2887 alwayscache=alwayscache,
2865 deltacomputer=deltacomputer,
2888 deltacomputer=deltacomputer,
2866 sidedata=sidedata,
2889 sidedata=sidedata,
2867 )
2890 )
2868
2891
2869 if addrevisioncb:
2892 if addrevisioncb:
2870 addrevisioncb(self, rev)
2893 addrevisioncb(self, rev)
2871 empty = False
2894 empty = False
2872 finally:
2895 finally:
2873 self._adding_group = False
2896 self._adding_group = False
2874 return not empty
2897 return not empty
2875
2898
2876 def iscensored(self, rev):
2899 def iscensored(self, rev):
2877 """Check if a file revision is censored."""
2900 """Check if a file revision is censored."""
2878 if not self._censorable:
2901 if not self._censorable:
2879 return False
2902 return False
2880
2903
2881 return self.flags(rev) & REVIDX_ISCENSORED
2904 return self.flags(rev) & REVIDX_ISCENSORED
2882
2905
2883 def _peek_iscensored(self, baserev, delta):
2906 def _peek_iscensored(self, baserev, delta):
2884 """Quickly check if a delta produces a censored revision."""
2907 """Quickly check if a delta produces a censored revision."""
2885 if not self._censorable:
2908 if not self._censorable:
2886 return False
2909 return False
2887
2910
2888 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2911 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2889
2912
2890 def getstrippoint(self, minlink):
2913 def getstrippoint(self, minlink):
2891 """find the minimum rev that must be stripped to strip the linkrev
2914 """find the minimum rev that must be stripped to strip the linkrev
2892
2915
2893 Returns a tuple containing the minimum rev and a set of all revs that
2916 Returns a tuple containing the minimum rev and a set of all revs that
2894 have linkrevs that will be broken by this strip.
2917 have linkrevs that will be broken by this strip.
2895 """
2918 """
2896 return storageutil.resolvestripinfo(
2919 return storageutil.resolvestripinfo(
2897 minlink,
2920 minlink,
2898 len(self) - 1,
2921 len(self) - 1,
2899 self.headrevs(),
2922 self.headrevs(),
2900 self.linkrev,
2923 self.linkrev,
2901 self.parentrevs,
2924 self.parentrevs,
2902 )
2925 )
2903
2926
2904 def strip(self, minlink, transaction):
2927 def strip(self, minlink, transaction):
2905 """truncate the revlog on the first revision with a linkrev >= minlink
2928 """truncate the revlog on the first revision with a linkrev >= minlink
2906
2929
2907 This function is called when we're stripping revision minlink and
2930 This function is called when we're stripping revision minlink and
2908 its descendants from the repository.
2931 its descendants from the repository.
2909
2932
2910 We have to remove all revisions with linkrev >= minlink, because
2933 We have to remove all revisions with linkrev >= minlink, because
2911 the equivalent changelog revisions will be renumbered after the
2934 the equivalent changelog revisions will be renumbered after the
2912 strip.
2935 strip.
2913
2936
2914 So we truncate the revlog on the first of these revisions, and
2937 So we truncate the revlog on the first of these revisions, and
2915 trust that the caller has saved the revisions that shouldn't be
2938 trust that the caller has saved the revisions that shouldn't be
2916 removed and that it'll re-add them after this truncation.
2939 removed and that it'll re-add them after this truncation.
2917 """
2940 """
2918 if len(self) == 0:
2941 if len(self) == 0:
2919 return
2942 return
2920
2943
2921 rev, _ = self.getstrippoint(minlink)
2944 rev, _ = self.getstrippoint(minlink)
2922 if rev == len(self):
2945 if rev == len(self):
2923 return
2946 return
2924
2947
2925 # first truncate the files on disk
2948 # first truncate the files on disk
2926 data_end = self.start(rev)
2949 data_end = self.start(rev)
2927 if not self._inline:
2950 if not self._inline:
2928 transaction.add(self._datafile, data_end)
2951 transaction.add(self._datafile, data_end)
2929 end = rev * self.index.entry_size
2952 end = rev * self.index.entry_size
2930 else:
2953 else:
2931 end = data_end + (rev * self.index.entry_size)
2954 end = data_end + (rev * self.index.entry_size)
2932
2955
2933 if self._sidedatafile:
2956 if self._sidedatafile:
2934 sidedata_end = self.sidedata_cut_off(rev)
2957 sidedata_end = self.sidedata_cut_off(rev)
2935 transaction.add(self._sidedatafile, sidedata_end)
2958 transaction.add(self._sidedatafile, sidedata_end)
2936
2959
2937 transaction.add(self._indexfile, end)
2960 transaction.add(self._indexfile, end)
2938 if self._docket is not None:
2961 if self._docket is not None:
2939 # XXX we could, leverage the docket while stripping. However it is
2962 # XXX we could, leverage the docket while stripping. However it is
2940 # not powerfull enough at the time of this comment
2963 # not powerfull enough at the time of this comment
2941 self._docket.index_end = end
2964 self._docket.index_end = end
2942 self._docket.data_end = data_end
2965 self._docket.data_end = data_end
2943 self._docket.sidedata_end = sidedata_end
2966 self._docket.sidedata_end = sidedata_end
2944 self._docket.write(transaction, stripping=True)
2967 self._docket.write(transaction, stripping=True)
2945
2968
2946 # then reset internal state in memory to forget those revisions
2969 # then reset internal state in memory to forget those revisions
2947 self._revisioncache = None
2970 self._revisioncache = None
2948 self._chaininfocache = util.lrucachedict(500)
2971 self._chaininfocache = util.lrucachedict(500)
2949 self._segmentfile.clear_cache()
2972 self._segmentfile.clear_cache()
2950 self._segmentfile_sidedata.clear_cache()
2973 self._segmentfile_sidedata.clear_cache()
2951
2974
2952 del self.index[rev:-1]
2975 del self.index[rev:-1]
2953
2976
2954 def checksize(self):
2977 def checksize(self):
2955 """Check size of index and data files
2978 """Check size of index and data files
2956
2979
2957 return a (dd, di) tuple.
2980 return a (dd, di) tuple.
2958 - dd: extra bytes for the "data" file
2981 - dd: extra bytes for the "data" file
2959 - di: extra bytes for the "index" file
2982 - di: extra bytes for the "index" file
2960
2983
2961 A healthy revlog will return (0, 0).
2984 A healthy revlog will return (0, 0).
2962 """
2985 """
2963 expected = 0
2986 expected = 0
2964 if len(self):
2987 if len(self):
2965 expected = max(0, self.end(len(self) - 1))
2988 expected = max(0, self.end(len(self) - 1))
2966
2989
2967 try:
2990 try:
2968 with self._datafp() as f:
2991 with self._datafp() as f:
2969 f.seek(0, io.SEEK_END)
2992 f.seek(0, io.SEEK_END)
2970 actual = f.tell()
2993 actual = f.tell()
2971 dd = actual - expected
2994 dd = actual - expected
2972 except FileNotFoundError:
2995 except FileNotFoundError:
2973 dd = 0
2996 dd = 0
2974
2997
2975 try:
2998 try:
2976 f = self.opener(self._indexfile)
2999 f = self.opener(self._indexfile)
2977 f.seek(0, io.SEEK_END)
3000 f.seek(0, io.SEEK_END)
2978 actual = f.tell()
3001 actual = f.tell()
2979 f.close()
3002 f.close()
2980 s = self.index.entry_size
3003 s = self.index.entry_size
2981 i = max(0, actual // s)
3004 i = max(0, actual // s)
2982 di = actual - (i * s)
3005 di = actual - (i * s)
2983 if self._inline:
3006 if self._inline:
2984 databytes = 0
3007 databytes = 0
2985 for r in self:
3008 for r in self:
2986 databytes += max(0, self.length(r))
3009 databytes += max(0, self.length(r))
2987 dd = 0
3010 dd = 0
2988 di = actual - len(self) * s - databytes
3011 di = actual - len(self) * s - databytes
2989 except FileNotFoundError:
3012 except FileNotFoundError:
2990 di = 0
3013 di = 0
2991
3014
2992 return (dd, di)
3015 return (dd, di)
2993
3016
2994 def files(self):
3017 def files(self):
2995 res = [self._indexfile]
3018 res = [self._indexfile]
2996 if self._docket_file is None:
3019 if self._docket_file is None:
2997 if not self._inline:
3020 if not self._inline:
2998 res.append(self._datafile)
3021 res.append(self._datafile)
2999 else:
3022 else:
3000 res.append(self._docket_file)
3023 res.append(self._docket_file)
3001 res.extend(self._docket.old_index_filepaths(include_empty=False))
3024 res.extend(self._docket.old_index_filepaths(include_empty=False))
3002 if self._docket.data_end:
3025 if self._docket.data_end:
3003 res.append(self._datafile)
3026 res.append(self._datafile)
3004 res.extend(self._docket.old_data_filepaths(include_empty=False))
3027 res.extend(self._docket.old_data_filepaths(include_empty=False))
3005 if self._docket.sidedata_end:
3028 if self._docket.sidedata_end:
3006 res.append(self._sidedatafile)
3029 res.append(self._sidedatafile)
3007 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3030 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3008 return res
3031 return res
3009
3032
3010 def emitrevisions(
3033 def emitrevisions(
3011 self,
3034 self,
3012 nodes,
3035 nodes,
3013 nodesorder=None,
3036 nodesorder=None,
3014 revisiondata=False,
3037 revisiondata=False,
3015 assumehaveparentrevisions=False,
3038 assumehaveparentrevisions=False,
3016 deltamode=repository.CG_DELTAMODE_STD,
3039 deltamode=repository.CG_DELTAMODE_STD,
3017 sidedata_helpers=None,
3040 sidedata_helpers=None,
3018 debug_info=None,
3041 debug_info=None,
3019 ):
3042 ):
3020 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3043 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3021 raise error.ProgrammingError(
3044 raise error.ProgrammingError(
3022 b'unhandled value for nodesorder: %s' % nodesorder
3045 b'unhandled value for nodesorder: %s' % nodesorder
3023 )
3046 )
3024
3047
3025 if nodesorder is None and not self._generaldelta:
3048 if nodesorder is None and not self._generaldelta:
3026 nodesorder = b'storage'
3049 nodesorder = b'storage'
3027
3050
3028 if (
3051 if (
3029 not self._storedeltachains
3052 not self._storedeltachains
3030 and deltamode != repository.CG_DELTAMODE_PREV
3053 and deltamode != repository.CG_DELTAMODE_PREV
3031 ):
3054 ):
3032 deltamode = repository.CG_DELTAMODE_FULL
3055 deltamode = repository.CG_DELTAMODE_FULL
3033
3056
3034 return storageutil.emitrevisions(
3057 return storageutil.emitrevisions(
3035 self,
3058 self,
3036 nodes,
3059 nodes,
3037 nodesorder,
3060 nodesorder,
3038 revlogrevisiondelta,
3061 revlogrevisiondelta,
3039 deltaparentfn=self.deltaparent,
3062 deltaparentfn=self.deltaparent,
3040 candeltafn=self.candelta,
3063 candeltafn=self.candelta,
3041 rawsizefn=self.rawsize,
3064 rawsizefn=self.rawsize,
3042 revdifffn=self.revdiff,
3065 revdifffn=self.revdiff,
3043 flagsfn=self.flags,
3066 flagsfn=self.flags,
3044 deltamode=deltamode,
3067 deltamode=deltamode,
3045 revisiondata=revisiondata,
3068 revisiondata=revisiondata,
3046 assumehaveparentrevisions=assumehaveparentrevisions,
3069 assumehaveparentrevisions=assumehaveparentrevisions,
3047 sidedata_helpers=sidedata_helpers,
3070 sidedata_helpers=sidedata_helpers,
3048 debug_info=debug_info,
3071 debug_info=debug_info,
3049 )
3072 )
3050
3073
3051 DELTAREUSEALWAYS = b'always'
3074 DELTAREUSEALWAYS = b'always'
3052 DELTAREUSESAMEREVS = b'samerevs'
3075 DELTAREUSESAMEREVS = b'samerevs'
3053 DELTAREUSENEVER = b'never'
3076 DELTAREUSENEVER = b'never'
3054
3077
3055 DELTAREUSEFULLADD = b'fulladd'
3078 DELTAREUSEFULLADD = b'fulladd'
3056
3079
3057 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3080 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3058
3081
3059 def clone(
3082 def clone(
3060 self,
3083 self,
3061 tr,
3084 tr,
3062 destrevlog,
3085 destrevlog,
3063 addrevisioncb=None,
3086 addrevisioncb=None,
3064 deltareuse=DELTAREUSESAMEREVS,
3087 deltareuse=DELTAREUSESAMEREVS,
3065 forcedeltabothparents=None,
3088 forcedeltabothparents=None,
3066 sidedata_helpers=None,
3089 sidedata_helpers=None,
3067 ):
3090 ):
3068 """Copy this revlog to another, possibly with format changes.
3091 """Copy this revlog to another, possibly with format changes.
3069
3092
3070 The destination revlog will contain the same revisions and nodes.
3093 The destination revlog will contain the same revisions and nodes.
3071 However, it may not be bit-for-bit identical due to e.g. delta encoding
3094 However, it may not be bit-for-bit identical due to e.g. delta encoding
3072 differences.
3095 differences.
3073
3096
3074 The ``deltareuse`` argument control how deltas from the existing revlog
3097 The ``deltareuse`` argument control how deltas from the existing revlog
3075 are preserved in the destination revlog. The argument can have the
3098 are preserved in the destination revlog. The argument can have the
3076 following values:
3099 following values:
3077
3100
3078 DELTAREUSEALWAYS
3101 DELTAREUSEALWAYS
3079 Deltas will always be reused (if possible), even if the destination
3102 Deltas will always be reused (if possible), even if the destination
3080 revlog would not select the same revisions for the delta. This is the
3103 revlog would not select the same revisions for the delta. This is the
3081 fastest mode of operation.
3104 fastest mode of operation.
3082 DELTAREUSESAMEREVS
3105 DELTAREUSESAMEREVS
3083 Deltas will be reused if the destination revlog would pick the same
3106 Deltas will be reused if the destination revlog would pick the same
3084 revisions for the delta. This mode strikes a balance between speed
3107 revisions for the delta. This mode strikes a balance between speed
3085 and optimization.
3108 and optimization.
3086 DELTAREUSENEVER
3109 DELTAREUSENEVER
3087 Deltas will never be reused. This is the slowest mode of execution.
3110 Deltas will never be reused. This is the slowest mode of execution.
3088 This mode can be used to recompute deltas (e.g. if the diff/delta
3111 This mode can be used to recompute deltas (e.g. if the diff/delta
3089 algorithm changes).
3112 algorithm changes).
3090 DELTAREUSEFULLADD
3113 DELTAREUSEFULLADD
3091 Revision will be re-added as if their were new content. This is
3114 Revision will be re-added as if their were new content. This is
3092 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3115 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3093 eg: large file detection and handling.
3116 eg: large file detection and handling.
3094
3117
3095 Delta computation can be slow, so the choice of delta reuse policy can
3118 Delta computation can be slow, so the choice of delta reuse policy can
3096 significantly affect run time.
3119 significantly affect run time.
3097
3120
3098 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3121 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3099 two extremes. Deltas will be reused if they are appropriate. But if the
3122 two extremes. Deltas will be reused if they are appropriate. But if the
3100 delta could choose a better revision, it will do so. This means if you
3123 delta could choose a better revision, it will do so. This means if you
3101 are converting a non-generaldelta revlog to a generaldelta revlog,
3124 are converting a non-generaldelta revlog to a generaldelta revlog,
3102 deltas will be recomputed if the delta's parent isn't a parent of the
3125 deltas will be recomputed if the delta's parent isn't a parent of the
3103 revision.
3126 revision.
3104
3127
3105 In addition to the delta policy, the ``forcedeltabothparents``
3128 In addition to the delta policy, the ``forcedeltabothparents``
3106 argument controls whether to force compute deltas against both parents
3129 argument controls whether to force compute deltas against both parents
3107 for merges. By default, the current default is used.
3130 for merges. By default, the current default is used.
3108
3131
3109 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3132 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3110 `sidedata_helpers`.
3133 `sidedata_helpers`.
3111 """
3134 """
3112 if deltareuse not in self.DELTAREUSEALL:
3135 if deltareuse not in self.DELTAREUSEALL:
3113 raise ValueError(
3136 raise ValueError(
3114 _(b'value for deltareuse invalid: %s') % deltareuse
3137 _(b'value for deltareuse invalid: %s') % deltareuse
3115 )
3138 )
3116
3139
3117 if len(destrevlog):
3140 if len(destrevlog):
3118 raise ValueError(_(b'destination revlog is not empty'))
3141 raise ValueError(_(b'destination revlog is not empty'))
3119
3142
3120 if getattr(self, 'filteredrevs', None):
3143 if getattr(self, 'filteredrevs', None):
3121 raise ValueError(_(b'source revlog has filtered revisions'))
3144 raise ValueError(_(b'source revlog has filtered revisions'))
3122 if getattr(destrevlog, 'filteredrevs', None):
3145 if getattr(destrevlog, 'filteredrevs', None):
3123 raise ValueError(_(b'destination revlog has filtered revisions'))
3146 raise ValueError(_(b'destination revlog has filtered revisions'))
3124
3147
3125 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3148 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3126 # if possible.
3149 # if possible.
3127 oldlazydelta = destrevlog._lazydelta
3150 oldlazydelta = destrevlog._lazydelta
3128 oldlazydeltabase = destrevlog._lazydeltabase
3151 oldlazydeltabase = destrevlog._lazydeltabase
3129 oldamd = destrevlog._deltabothparents
3152 oldamd = destrevlog._deltabothparents
3130
3153
3131 try:
3154 try:
3132 if deltareuse == self.DELTAREUSEALWAYS:
3155 if deltareuse == self.DELTAREUSEALWAYS:
3133 destrevlog._lazydeltabase = True
3156 destrevlog._lazydeltabase = True
3134 destrevlog._lazydelta = True
3157 destrevlog._lazydelta = True
3135 elif deltareuse == self.DELTAREUSESAMEREVS:
3158 elif deltareuse == self.DELTAREUSESAMEREVS:
3136 destrevlog._lazydeltabase = False
3159 destrevlog._lazydeltabase = False
3137 destrevlog._lazydelta = True
3160 destrevlog._lazydelta = True
3138 elif deltareuse == self.DELTAREUSENEVER:
3161 elif deltareuse == self.DELTAREUSENEVER:
3139 destrevlog._lazydeltabase = False
3162 destrevlog._lazydeltabase = False
3140 destrevlog._lazydelta = False
3163 destrevlog._lazydelta = False
3141
3164
3142 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3165 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3143
3166
3144 self._clone(
3167 self._clone(
3145 tr,
3168 tr,
3146 destrevlog,
3169 destrevlog,
3147 addrevisioncb,
3170 addrevisioncb,
3148 deltareuse,
3171 deltareuse,
3149 forcedeltabothparents,
3172 forcedeltabothparents,
3150 sidedata_helpers,
3173 sidedata_helpers,
3151 )
3174 )
3152
3175
3153 finally:
3176 finally:
3154 destrevlog._lazydelta = oldlazydelta
3177 destrevlog._lazydelta = oldlazydelta
3155 destrevlog._lazydeltabase = oldlazydeltabase
3178 destrevlog._lazydeltabase = oldlazydeltabase
3156 destrevlog._deltabothparents = oldamd
3179 destrevlog._deltabothparents = oldamd
3157
3180
3158 def _clone(
3181 def _clone(
3159 self,
3182 self,
3160 tr,
3183 tr,
3161 destrevlog,
3184 destrevlog,
3162 addrevisioncb,
3185 addrevisioncb,
3163 deltareuse,
3186 deltareuse,
3164 forcedeltabothparents,
3187 forcedeltabothparents,
3165 sidedata_helpers,
3188 sidedata_helpers,
3166 ):
3189 ):
3167 """perform the core duty of `revlog.clone` after parameter processing"""
3190 """perform the core duty of `revlog.clone` after parameter processing"""
3168 write_debug = None
3191 write_debug = None
3169 if self._debug_delta:
3192 if self._debug_delta:
3170 write_debug = tr._report
3193 write_debug = tr._report
3171 deltacomputer = deltautil.deltacomputer(
3194 deltacomputer = deltautil.deltacomputer(
3172 destrevlog,
3195 destrevlog,
3173 write_debug=write_debug,
3196 write_debug=write_debug,
3174 )
3197 )
3175 index = self.index
3198 index = self.index
3176 for rev in self:
3199 for rev in self:
3177 entry = index[rev]
3200 entry = index[rev]
3178
3201
3179 # Some classes override linkrev to take filtered revs into
3202 # Some classes override linkrev to take filtered revs into
3180 # account. Use raw entry from index.
3203 # account. Use raw entry from index.
3181 flags = entry[0] & 0xFFFF
3204 flags = entry[0] & 0xFFFF
3182 linkrev = entry[4]
3205 linkrev = entry[4]
3183 p1 = index[entry[5]][7]
3206 p1 = index[entry[5]][7]
3184 p2 = index[entry[6]][7]
3207 p2 = index[entry[6]][7]
3185 node = entry[7]
3208 node = entry[7]
3186
3209
3187 # (Possibly) reuse the delta from the revlog if allowed and
3210 # (Possibly) reuse the delta from the revlog if allowed and
3188 # the revlog chunk is a delta.
3211 # the revlog chunk is a delta.
3189 cachedelta = None
3212 cachedelta = None
3190 rawtext = None
3213 rawtext = None
3191 if deltareuse == self.DELTAREUSEFULLADD:
3214 if deltareuse == self.DELTAREUSEFULLADD:
3192 text = self._revisiondata(rev)
3215 text = self._revisiondata(rev)
3193 sidedata = self.sidedata(rev)
3216 sidedata = self.sidedata(rev)
3194
3217
3195 if sidedata_helpers is not None:
3218 if sidedata_helpers is not None:
3196 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3219 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3197 self, sidedata_helpers, sidedata, rev
3220 self, sidedata_helpers, sidedata, rev
3198 )
3221 )
3199 flags = flags | new_flags[0] & ~new_flags[1]
3222 flags = flags | new_flags[0] & ~new_flags[1]
3200
3223
3201 destrevlog.addrevision(
3224 destrevlog.addrevision(
3202 text,
3225 text,
3203 tr,
3226 tr,
3204 linkrev,
3227 linkrev,
3205 p1,
3228 p1,
3206 p2,
3229 p2,
3207 cachedelta=cachedelta,
3230 cachedelta=cachedelta,
3208 node=node,
3231 node=node,
3209 flags=flags,
3232 flags=flags,
3210 deltacomputer=deltacomputer,
3233 deltacomputer=deltacomputer,
3211 sidedata=sidedata,
3234 sidedata=sidedata,
3212 )
3235 )
3213 else:
3236 else:
3214 if destrevlog._lazydelta:
3237 if destrevlog._lazydelta:
3215 dp = self.deltaparent(rev)
3238 dp = self.deltaparent(rev)
3216 if dp != nullrev:
3239 if dp != nullrev:
3217 cachedelta = (dp, bytes(self._chunk(rev)))
3240 cachedelta = (dp, bytes(self._chunk(rev)))
3218
3241
3219 sidedata = None
3242 sidedata = None
3220 if not cachedelta:
3243 if not cachedelta:
3221 rawtext = self._revisiondata(rev)
3244 rawtext = self._revisiondata(rev)
3222 sidedata = self.sidedata(rev)
3245 sidedata = self.sidedata(rev)
3223 if sidedata is None:
3246 if sidedata is None:
3224 sidedata = self.sidedata(rev)
3247 sidedata = self.sidedata(rev)
3225
3248
3226 if sidedata_helpers is not None:
3249 if sidedata_helpers is not None:
3227 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3250 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3228 self, sidedata_helpers, sidedata, rev
3251 self, sidedata_helpers, sidedata, rev
3229 )
3252 )
3230 flags = flags | new_flags[0] & ~new_flags[1]
3253 flags = flags | new_flags[0] & ~new_flags[1]
3231
3254
3232 with destrevlog._writing(tr):
3255 with destrevlog._writing(tr):
3233 destrevlog._addrevision(
3256 destrevlog._addrevision(
3234 node,
3257 node,
3235 rawtext,
3258 rawtext,
3236 tr,
3259 tr,
3237 linkrev,
3260 linkrev,
3238 p1,
3261 p1,
3239 p2,
3262 p2,
3240 flags,
3263 flags,
3241 cachedelta,
3264 cachedelta,
3242 deltacomputer=deltacomputer,
3265 deltacomputer=deltacomputer,
3243 sidedata=sidedata,
3266 sidedata=sidedata,
3244 )
3267 )
3245
3268
3246 if addrevisioncb:
3269 if addrevisioncb:
3247 addrevisioncb(self, rev, node)
3270 addrevisioncb(self, rev, node)
3248
3271
3249 def censorrevision(self, tr, censornode, tombstone=b''):
3272 def censorrevision(self, tr, censornode, tombstone=b''):
3250 if self._format_version == REVLOGV0:
3273 if self._format_version == REVLOGV0:
3251 raise error.RevlogError(
3274 raise error.RevlogError(
3252 _(b'cannot censor with version %d revlogs')
3275 _(b'cannot censor with version %d revlogs')
3253 % self._format_version
3276 % self._format_version
3254 )
3277 )
3255 elif self._format_version == REVLOGV1:
3278 elif self._format_version == REVLOGV1:
3256 rewrite.v1_censor(self, tr, censornode, tombstone)
3279 rewrite.v1_censor(self, tr, censornode, tombstone)
3257 else:
3280 else:
3258 rewrite.v2_censor(self, tr, censornode, tombstone)
3281 rewrite.v2_censor(self, tr, censornode, tombstone)
3259
3282
3260 def verifyintegrity(self, state):
3283 def verifyintegrity(self, state):
3261 """Verifies the integrity of the revlog.
3284 """Verifies the integrity of the revlog.
3262
3285
3263 Yields ``revlogproblem`` instances describing problems that are
3286 Yields ``revlogproblem`` instances describing problems that are
3264 found.
3287 found.
3265 """
3288 """
3266 dd, di = self.checksize()
3289 dd, di = self.checksize()
3267 if dd:
3290 if dd:
3268 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3291 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3269 if di:
3292 if di:
3270 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3293 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3271
3294
3272 version = self._format_version
3295 version = self._format_version
3273
3296
3274 # The verifier tells us what version revlog we should be.
3297 # The verifier tells us what version revlog we should be.
3275 if version != state[b'expectedversion']:
3298 if version != state[b'expectedversion']:
3276 yield revlogproblem(
3299 yield revlogproblem(
3277 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3300 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3278 % (self.display_id, version, state[b'expectedversion'])
3301 % (self.display_id, version, state[b'expectedversion'])
3279 )
3302 )
3280
3303
3281 state[b'skipread'] = set()
3304 state[b'skipread'] = set()
3282 state[b'safe_renamed'] = set()
3305 state[b'safe_renamed'] = set()
3283
3306
3284 for rev in self:
3307 for rev in self:
3285 node = self.node(rev)
3308 node = self.node(rev)
3286
3309
3287 # Verify contents. 4 cases to care about:
3310 # Verify contents. 4 cases to care about:
3288 #
3311 #
3289 # common: the most common case
3312 # common: the most common case
3290 # rename: with a rename
3313 # rename: with a rename
3291 # meta: file content starts with b'\1\n', the metadata
3314 # meta: file content starts with b'\1\n', the metadata
3292 # header defined in filelog.py, but without a rename
3315 # header defined in filelog.py, but without a rename
3293 # ext: content stored externally
3316 # ext: content stored externally
3294 #
3317 #
3295 # More formally, their differences are shown below:
3318 # More formally, their differences are shown below:
3296 #
3319 #
3297 # | common | rename | meta | ext
3320 # | common | rename | meta | ext
3298 # -------------------------------------------------------
3321 # -------------------------------------------------------
3299 # flags() | 0 | 0 | 0 | not 0
3322 # flags() | 0 | 0 | 0 | not 0
3300 # renamed() | False | True | False | ?
3323 # renamed() | False | True | False | ?
3301 # rawtext[0:2]=='\1\n'| False | True | True | ?
3324 # rawtext[0:2]=='\1\n'| False | True | True | ?
3302 #
3325 #
3303 # "rawtext" means the raw text stored in revlog data, which
3326 # "rawtext" means the raw text stored in revlog data, which
3304 # could be retrieved by "rawdata(rev)". "text"
3327 # could be retrieved by "rawdata(rev)". "text"
3305 # mentioned below is "revision(rev)".
3328 # mentioned below is "revision(rev)".
3306 #
3329 #
3307 # There are 3 different lengths stored physically:
3330 # There are 3 different lengths stored physically:
3308 # 1. L1: rawsize, stored in revlog index
3331 # 1. L1: rawsize, stored in revlog index
3309 # 2. L2: len(rawtext), stored in revlog data
3332 # 2. L2: len(rawtext), stored in revlog data
3310 # 3. L3: len(text), stored in revlog data if flags==0, or
3333 # 3. L3: len(text), stored in revlog data if flags==0, or
3311 # possibly somewhere else if flags!=0
3334 # possibly somewhere else if flags!=0
3312 #
3335 #
3313 # L1 should be equal to L2. L3 could be different from them.
3336 # L1 should be equal to L2. L3 could be different from them.
3314 # "text" may or may not affect commit hash depending on flag
3337 # "text" may or may not affect commit hash depending on flag
3315 # processors (see flagutil.addflagprocessor).
3338 # processors (see flagutil.addflagprocessor).
3316 #
3339 #
3317 # | common | rename | meta | ext
3340 # | common | rename | meta | ext
3318 # -------------------------------------------------
3341 # -------------------------------------------------
3319 # rawsize() | L1 | L1 | L1 | L1
3342 # rawsize() | L1 | L1 | L1 | L1
3320 # size() | L1 | L2-LM | L1(*) | L1 (?)
3343 # size() | L1 | L2-LM | L1(*) | L1 (?)
3321 # len(rawtext) | L2 | L2 | L2 | L2
3344 # len(rawtext) | L2 | L2 | L2 | L2
3322 # len(text) | L2 | L2 | L2 | L3
3345 # len(text) | L2 | L2 | L2 | L3
3323 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3346 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3324 #
3347 #
3325 # LM: length of metadata, depending on rawtext
3348 # LM: length of metadata, depending on rawtext
3326 # (*): not ideal, see comment in filelog.size
3349 # (*): not ideal, see comment in filelog.size
3327 # (?): could be "- len(meta)" if the resolved content has
3350 # (?): could be "- len(meta)" if the resolved content has
3328 # rename metadata
3351 # rename metadata
3329 #
3352 #
3330 # Checks needed to be done:
3353 # Checks needed to be done:
3331 # 1. length check: L1 == L2, in all cases.
3354 # 1. length check: L1 == L2, in all cases.
3332 # 2. hash check: depending on flag processor, we may need to
3355 # 2. hash check: depending on flag processor, we may need to
3333 # use either "text" (external), or "rawtext" (in revlog).
3356 # use either "text" (external), or "rawtext" (in revlog).
3334
3357
3335 try:
3358 try:
3336 skipflags = state.get(b'skipflags', 0)
3359 skipflags = state.get(b'skipflags', 0)
3337 if skipflags:
3360 if skipflags:
3338 skipflags &= self.flags(rev)
3361 skipflags &= self.flags(rev)
3339
3362
3340 _verify_revision(self, skipflags, state, node)
3363 _verify_revision(self, skipflags, state, node)
3341
3364
3342 l1 = self.rawsize(rev)
3365 l1 = self.rawsize(rev)
3343 l2 = len(self.rawdata(node))
3366 l2 = len(self.rawdata(node))
3344
3367
3345 if l1 != l2:
3368 if l1 != l2:
3346 yield revlogproblem(
3369 yield revlogproblem(
3347 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3370 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3348 node=node,
3371 node=node,
3349 )
3372 )
3350
3373
3351 except error.CensoredNodeError:
3374 except error.CensoredNodeError:
3352 if state[b'erroroncensored']:
3375 if state[b'erroroncensored']:
3353 yield revlogproblem(
3376 yield revlogproblem(
3354 error=_(b'censored file data'), node=node
3377 error=_(b'censored file data'), node=node
3355 )
3378 )
3356 state[b'skipread'].add(node)
3379 state[b'skipread'].add(node)
3357 except Exception as e:
3380 except Exception as e:
3358 yield revlogproblem(
3381 yield revlogproblem(
3359 error=_(b'unpacking %s: %s')
3382 error=_(b'unpacking %s: %s')
3360 % (short(node), stringutil.forcebytestr(e)),
3383 % (short(node), stringutil.forcebytestr(e)),
3361 node=node,
3384 node=node,
3362 )
3385 )
3363 state[b'skipread'].add(node)
3386 state[b'skipread'].add(node)
3364
3387
3365 def storageinfo(
3388 def storageinfo(
3366 self,
3389 self,
3367 exclusivefiles=False,
3390 exclusivefiles=False,
3368 sharedfiles=False,
3391 sharedfiles=False,
3369 revisionscount=False,
3392 revisionscount=False,
3370 trackedsize=False,
3393 trackedsize=False,
3371 storedsize=False,
3394 storedsize=False,
3372 ):
3395 ):
3373 d = {}
3396 d = {}
3374
3397
3375 if exclusivefiles:
3398 if exclusivefiles:
3376 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3399 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3377 if not self._inline:
3400 if not self._inline:
3378 d[b'exclusivefiles'].append((self.opener, self._datafile))
3401 d[b'exclusivefiles'].append((self.opener, self._datafile))
3379
3402
3380 if sharedfiles:
3403 if sharedfiles:
3381 d[b'sharedfiles'] = []
3404 d[b'sharedfiles'] = []
3382
3405
3383 if revisionscount:
3406 if revisionscount:
3384 d[b'revisionscount'] = len(self)
3407 d[b'revisionscount'] = len(self)
3385
3408
3386 if trackedsize:
3409 if trackedsize:
3387 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3410 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3388
3411
3389 if storedsize:
3412 if storedsize:
3390 d[b'storedsize'] = sum(
3413 d[b'storedsize'] = sum(
3391 self.opener.stat(path).st_size for path in self.files()
3414 self.opener.stat(path).st_size for path in self.files()
3392 )
3415 )
3393
3416
3394 return d
3417 return d
3395
3418
3396 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3419 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3397 if not self.hassidedata:
3420 if not self.hassidedata:
3398 return
3421 return
3399 # revlog formats with sidedata support does not support inline
3422 # revlog formats with sidedata support does not support inline
3400 assert not self._inline
3423 assert not self._inline
3401 if not helpers[1] and not helpers[2]:
3424 if not helpers[1] and not helpers[2]:
3402 # Nothing to generate or remove
3425 # Nothing to generate or remove
3403 return
3426 return
3404
3427
3405 new_entries = []
3428 new_entries = []
3406 # append the new sidedata
3429 # append the new sidedata
3407 with self._writing(transaction):
3430 with self._writing(transaction):
3408 ifh, dfh, sdfh = self._writinghandles
3431 ifh, dfh, sdfh = self._writinghandles
3409 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3432 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3410
3433
3411 current_offset = sdfh.tell()
3434 current_offset = sdfh.tell()
3412 for rev in range(startrev, endrev + 1):
3435 for rev in range(startrev, endrev + 1):
3413 entry = self.index[rev]
3436 entry = self.index[rev]
3414 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3437 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3415 store=self,
3438 store=self,
3416 sidedata_helpers=helpers,
3439 sidedata_helpers=helpers,
3417 sidedata={},
3440 sidedata={},
3418 rev=rev,
3441 rev=rev,
3419 )
3442 )
3420
3443
3421 serialized_sidedata = sidedatautil.serialize_sidedata(
3444 serialized_sidedata = sidedatautil.serialize_sidedata(
3422 new_sidedata
3445 new_sidedata
3423 )
3446 )
3424
3447
3425 sidedata_compression_mode = COMP_MODE_INLINE
3448 sidedata_compression_mode = COMP_MODE_INLINE
3426 if serialized_sidedata and self.hassidedata:
3449 if serialized_sidedata and self.hassidedata:
3427 sidedata_compression_mode = COMP_MODE_PLAIN
3450 sidedata_compression_mode = COMP_MODE_PLAIN
3428 h, comp_sidedata = self.compress(serialized_sidedata)
3451 h, comp_sidedata = self.compress(serialized_sidedata)
3429 if (
3452 if (
3430 h != b'u'
3453 h != b'u'
3431 and comp_sidedata[0] != b'\0'
3454 and comp_sidedata[0] != b'\0'
3432 and len(comp_sidedata) < len(serialized_sidedata)
3455 and len(comp_sidedata) < len(serialized_sidedata)
3433 ):
3456 ):
3434 assert not h
3457 assert not h
3435 if (
3458 if (
3436 comp_sidedata[0]
3459 comp_sidedata[0]
3437 == self._docket.default_compression_header
3460 == self._docket.default_compression_header
3438 ):
3461 ):
3439 sidedata_compression_mode = COMP_MODE_DEFAULT
3462 sidedata_compression_mode = COMP_MODE_DEFAULT
3440 serialized_sidedata = comp_sidedata
3463 serialized_sidedata = comp_sidedata
3441 else:
3464 else:
3442 sidedata_compression_mode = COMP_MODE_INLINE
3465 sidedata_compression_mode = COMP_MODE_INLINE
3443 serialized_sidedata = comp_sidedata
3466 serialized_sidedata = comp_sidedata
3444 if entry[8] != 0 or entry[9] != 0:
3467 if entry[8] != 0 or entry[9] != 0:
3445 # rewriting entries that already have sidedata is not
3468 # rewriting entries that already have sidedata is not
3446 # supported yet, because it introduces garbage data in the
3469 # supported yet, because it introduces garbage data in the
3447 # revlog.
3470 # revlog.
3448 msg = b"rewriting existing sidedata is not supported yet"
3471 msg = b"rewriting existing sidedata is not supported yet"
3449 raise error.Abort(msg)
3472 raise error.Abort(msg)
3450
3473
3451 # Apply (potential) flags to add and to remove after running
3474 # Apply (potential) flags to add and to remove after running
3452 # the sidedata helpers
3475 # the sidedata helpers
3453 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3476 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3454 entry_update = (
3477 entry_update = (
3455 current_offset,
3478 current_offset,
3456 len(serialized_sidedata),
3479 len(serialized_sidedata),
3457 new_offset_flags,
3480 new_offset_flags,
3458 sidedata_compression_mode,
3481 sidedata_compression_mode,
3459 )
3482 )
3460
3483
3461 # the sidedata computation might have move the file cursors around
3484 # the sidedata computation might have move the file cursors around
3462 sdfh.seek(current_offset, os.SEEK_SET)
3485 sdfh.seek(current_offset, os.SEEK_SET)
3463 sdfh.write(serialized_sidedata)
3486 sdfh.write(serialized_sidedata)
3464 new_entries.append(entry_update)
3487 new_entries.append(entry_update)
3465 current_offset += len(serialized_sidedata)
3488 current_offset += len(serialized_sidedata)
3466 self._docket.sidedata_end = sdfh.tell()
3489 self._docket.sidedata_end = sdfh.tell()
3467
3490
3468 # rewrite the new index entries
3491 # rewrite the new index entries
3469 ifh.seek(startrev * self.index.entry_size)
3492 ifh.seek(startrev * self.index.entry_size)
3470 for i, e in enumerate(new_entries):
3493 for i, e in enumerate(new_entries):
3471 rev = startrev + i
3494 rev = startrev + i
3472 self.index.replace_sidedata_info(rev, *e)
3495 self.index.replace_sidedata_info(rev, *e)
3473 packed = self.index.entry_binary(rev)
3496 packed = self.index.entry_binary(rev)
3474 if rev == 0 and self._docket is None:
3497 if rev == 0 and self._docket is None:
3475 header = self._format_flags | self._format_version
3498 header = self._format_flags | self._format_version
3476 header = self.index.pack_header(header)
3499 header = self.index.pack_header(header)
3477 packed = header + packed
3500 packed = header + packed
3478 ifh.write(packed)
3501 ifh.write(packed)
@@ -1,1216 +1,1219 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator, List
13 from typing import Generator, List
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 filelog,
22 filelog,
23 manifest,
23 manifest,
24 policy,
24 policy,
25 pycompat,
25 pycompat,
26 util,
26 util,
27 vfs as vfsmod,
27 vfs as vfsmod,
28 )
28 )
29 from .utils import hashutil
29 from .utils import hashutil
30
30
31 parsers = policy.importmod('parsers')
31 parsers = policy.importmod('parsers')
32 # how much bytes should be read from fncache in one read
32 # how much bytes should be read from fncache in one read
33 # It is done to prevent loading large fncache files into memory
33 # It is done to prevent loading large fncache files into memory
34 fncache_chunksize = 10 ** 6
34 fncache_chunksize = 10 ** 6
35
35
36
36
37 def _match_tracked_entry(entry, matcher):
37 def _match_tracked_entry(entry, matcher):
38 """parses a fncache entry and returns whether the entry is tracking a path
38 """parses a fncache entry and returns whether the entry is tracking a path
39 matched by matcher or not.
39 matched by matcher or not.
40
40
41 If matcher is None, returns True"""
41 If matcher is None, returns True"""
42
42
43 if matcher is None:
43 if matcher is None:
44 return True
44 return True
45 if entry.is_filelog:
45 if entry.is_filelog:
46 return matcher(entry.target_id)
46 return matcher(entry.target_id)
47 elif entry.is_manifestlog:
47 elif entry.is_manifestlog:
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50
50
51
51
52 # This avoids a collision between a file named foo and a dir named
52 # This avoids a collision between a file named foo and a dir named
53 # foo.i or foo.d
53 # foo.i or foo.d
54 def _encodedir(path):
54 def _encodedir(path):
55 """
55 """
56 >>> _encodedir(b'data/foo.i')
56 >>> _encodedir(b'data/foo.i')
57 'data/foo.i'
57 'data/foo.i'
58 >>> _encodedir(b'data/foo.i/bla.i')
58 >>> _encodedir(b'data/foo.i/bla.i')
59 'data/foo.i.hg/bla.i'
59 'data/foo.i.hg/bla.i'
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 'data/foo.i.hg.hg/bla.i'
61 'data/foo.i.hg.hg/bla.i'
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 """
64 """
65 return (
65 return (
66 path.replace(b".hg/", b".hg.hg/")
66 path.replace(b".hg/", b".hg.hg/")
67 .replace(b".i/", b".i.hg/")
67 .replace(b".i/", b".i.hg/")
68 .replace(b".d/", b".d.hg/")
68 .replace(b".d/", b".d.hg/")
69 )
69 )
70
70
71
71
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73
73
74
74
75 def decodedir(path):
75 def decodedir(path):
76 """
76 """
77 >>> decodedir(b'data/foo.i')
77 >>> decodedir(b'data/foo.i')
78 'data/foo.i'
78 'data/foo.i'
79 >>> decodedir(b'data/foo.i.hg/bla.i')
79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 'data/foo.i/bla.i'
80 'data/foo.i/bla.i'
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 'data/foo.i.hg/bla.i'
82 'data/foo.i.hg/bla.i'
83 """
83 """
84 if b".hg/" not in path:
84 if b".hg/" not in path:
85 return path
85 return path
86 return (
86 return (
87 path.replace(b".d.hg/", b".d/")
87 path.replace(b".d.hg/", b".d/")
88 .replace(b".i.hg/", b".i/")
88 .replace(b".i.hg/", b".i/")
89 .replace(b".hg.hg/", b".hg/")
89 .replace(b".hg.hg/", b".hg/")
90 )
90 )
91
91
92
92
93 def _reserved():
93 def _reserved():
94 """characters that are problematic for filesystems
94 """characters that are problematic for filesystems
95
95
96 * ascii escapes (0..31)
96 * ascii escapes (0..31)
97 * ascii hi (126..255)
97 * ascii hi (126..255)
98 * windows specials
98 * windows specials
99
99
100 these characters will be escaped by encodefunctions
100 these characters will be escaped by encodefunctions
101 """
101 """
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 for x in range(32):
103 for x in range(32):
104 yield x
104 yield x
105 for x in range(126, 256):
105 for x in range(126, 256):
106 yield x
106 yield x
107 for x in winreserved:
107 for x in winreserved:
108 yield x
108 yield x
109
109
110
110
111 def _buildencodefun():
111 def _buildencodefun():
112 """
112 """
113 >>> enc, dec = _buildencodefun()
113 >>> enc, dec = _buildencodefun()
114
114
115 >>> enc(b'nothing/special.txt')
115 >>> enc(b'nothing/special.txt')
116 'nothing/special.txt'
116 'nothing/special.txt'
117 >>> dec(b'nothing/special.txt')
117 >>> dec(b'nothing/special.txt')
118 'nothing/special.txt'
118 'nothing/special.txt'
119
119
120 >>> enc(b'HELLO')
120 >>> enc(b'HELLO')
121 '_h_e_l_l_o'
121 '_h_e_l_l_o'
122 >>> dec(b'_h_e_l_l_o')
122 >>> dec(b'_h_e_l_l_o')
123 'HELLO'
123 'HELLO'
124
124
125 >>> enc(b'hello:world?')
125 >>> enc(b'hello:world?')
126 'hello~3aworld~3f'
126 'hello~3aworld~3f'
127 >>> dec(b'hello~3aworld~3f')
127 >>> dec(b'hello~3aworld~3f')
128 'hello:world?'
128 'hello:world?'
129
129
130 >>> enc(b'the\\x07quick\\xADshot')
130 >>> enc(b'the\\x07quick\\xADshot')
131 'the~07quick~adshot'
131 'the~07quick~adshot'
132 >>> dec(b'the~07quick~adshot')
132 >>> dec(b'the~07quick~adshot')
133 'the\\x07quick\\xadshot'
133 'the\\x07quick\\xadshot'
134 """
134 """
135 e = b'_'
135 e = b'_'
136 xchr = pycompat.bytechr
136 xchr = pycompat.bytechr
137 asciistr = list(map(xchr, range(127)))
137 asciistr = list(map(xchr, range(127)))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139
139
140 cmap = {x: x for x in asciistr}
140 cmap = {x: x for x in asciistr}
141 for x in _reserved():
141 for x in _reserved():
142 cmap[xchr(x)] = b"~%02x" % x
142 cmap[xchr(x)] = b"~%02x" % x
143 for x in capitals + [ord(e)]:
143 for x in capitals + [ord(e)]:
144 cmap[xchr(x)] = e + xchr(x).lower()
144 cmap[xchr(x)] = e + xchr(x).lower()
145
145
146 dmap = {}
146 dmap = {}
147 for k, v in cmap.items():
147 for k, v in cmap.items():
148 dmap[v] = k
148 dmap[v] = k
149
149
150 def decode(s):
150 def decode(s):
151 i = 0
151 i = 0
152 while i < len(s):
152 while i < len(s):
153 for l in range(1, 4):
153 for l in range(1, 4):
154 try:
154 try:
155 yield dmap[s[i : i + l]]
155 yield dmap[s[i : i + l]]
156 i += l
156 i += l
157 break
157 break
158 except KeyError:
158 except KeyError:
159 pass
159 pass
160 else:
160 else:
161 raise KeyError
161 raise KeyError
162
162
163 return (
163 return (
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join(list(decode(s))),
165 lambda s: b''.join(list(decode(s))),
166 )
166 )
167
167
168
168
169 _encodefname, _decodefname = _buildencodefun()
169 _encodefname, _decodefname = _buildencodefun()
170
170
171
171
172 def encodefilename(s):
172 def encodefilename(s):
173 """
173 """
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 """
176 """
177 return _encodefname(encodedir(s))
177 return _encodefname(encodedir(s))
178
178
179
179
180 def decodefilename(s):
180 def decodefilename(s):
181 """
181 """
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 """
184 """
185 return decodedir(_decodefname(s))
185 return decodedir(_decodefname(s))
186
186
187
187
188 def _buildlowerencodefun():
188 def _buildlowerencodefun():
189 """
189 """
190 >>> f = _buildlowerencodefun()
190 >>> f = _buildlowerencodefun()
191 >>> f(b'nothing/special.txt')
191 >>> f(b'nothing/special.txt')
192 'nothing/special.txt'
192 'nothing/special.txt'
193 >>> f(b'HELLO')
193 >>> f(b'HELLO')
194 'hello'
194 'hello'
195 >>> f(b'hello:world?')
195 >>> f(b'hello:world?')
196 'hello~3aworld~3f'
196 'hello~3aworld~3f'
197 >>> f(b'the\\x07quick\\xADshot')
197 >>> f(b'the\\x07quick\\xADshot')
198 'the~07quick~adshot'
198 'the~07quick~adshot'
199 """
199 """
200 xchr = pycompat.bytechr
200 xchr = pycompat.bytechr
201 cmap = {xchr(x): xchr(x) for x in range(127)}
201 cmap = {xchr(x): xchr(x) for x in range(127)}
202 for x in _reserved():
202 for x in _reserved():
203 cmap[xchr(x)] = b"~%02x" % x
203 cmap[xchr(x)] = b"~%02x" % x
204 for x in range(ord(b"A"), ord(b"Z") + 1):
204 for x in range(ord(b"A"), ord(b"Z") + 1):
205 cmap[xchr(x)] = xchr(x).lower()
205 cmap[xchr(x)] = xchr(x).lower()
206
206
207 def lowerencode(s):
207 def lowerencode(s):
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209
209
210 return lowerencode
210 return lowerencode
211
211
212
212
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214
214
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218
218
219
219
220 def _auxencode(path, dotencode):
220 def _auxencode(path, dotencode):
221 """
221 """
222 Encodes filenames containing names reserved by Windows or which end in
222 Encodes filenames containing names reserved by Windows or which end in
223 period or space. Does not touch other single reserved characters c.
223 period or space. Does not touch other single reserved characters c.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Additionally encodes space or period at the beginning, if dotencode is
225 Additionally encodes space or period at the beginning, if dotencode is
226 True. Parameter path is assumed to be all lowercase.
226 True. Parameter path is assumed to be all lowercase.
227 A segment only needs encoding if a reserved name appears as a
227 A segment only needs encoding if a reserved name appears as a
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 doesn't need encoding.
229 doesn't need encoding.
230
230
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> _auxencode(s.split(b'/'), True)
232 >>> _auxencode(s.split(b'/'), True)
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> _auxencode(s.split(b'/'), False)
235 >>> _auxencode(s.split(b'/'), False)
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 >>> _auxencode([b'foo. '], True)
237 >>> _auxencode([b'foo. '], True)
238 ['foo.~20']
238 ['foo.~20']
239 >>> _auxencode([b' .foo'], True)
239 >>> _auxencode([b' .foo'], True)
240 ['~20.foo']
240 ['~20.foo']
241 """
241 """
242 for i, n in enumerate(path):
242 for i, n in enumerate(path):
243 if not n:
243 if not n:
244 continue
244 continue
245 if dotencode and n[0] in b'. ':
245 if dotencode and n[0] in b'. ':
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 path[i] = n
247 path[i] = n
248 else:
248 else:
249 l = n.find(b'.')
249 l = n.find(b'.')
250 if l == -1:
250 if l == -1:
251 l = len(n)
251 l = len(n)
252 if (l == 3 and n[:3] in _winres3) or (
252 if (l == 3 and n[:3] in _winres3) or (
253 l == 4
253 l == 4
254 and n[3:4] <= b'9'
254 and n[3:4] <= b'9'
255 and n[3:4] >= b'1'
255 and n[3:4] >= b'1'
256 and n[:3] in _winres4
256 and n[:3] in _winres4
257 ):
257 ):
258 # encode third letter ('aux' -> 'au~78')
258 # encode third letter ('aux' -> 'au~78')
259 ec = b"~%02x" % ord(n[2:3])
259 ec = b"~%02x" % ord(n[2:3])
260 n = n[0:2] + ec + n[3:]
260 n = n[0:2] + ec + n[3:]
261 path[i] = n
261 path[i] = n
262 if n[-1] in b'. ':
262 if n[-1] in b'. ':
263 # encode last period or space ('foo...' -> 'foo..~2e')
263 # encode last period or space ('foo...' -> 'foo..~2e')
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 return path
265 return path
266
266
267
267
268 _maxstorepathlen = 120
268 _maxstorepathlen = 120
269 _dirprefixlen = 8
269 _dirprefixlen = 8
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271
271
272
272
273 def _hashencode(path, dotencode):
273 def _hashencode(path, dotencode):
274 digest = hex(hashutil.sha1(path).digest())
274 digest = hex(hashutil.sha1(path).digest())
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 parts = _auxencode(le, dotencode)
276 parts = _auxencode(le, dotencode)
277 basename = parts[-1]
277 basename = parts[-1]
278 _root, ext = os.path.splitext(basename)
278 _root, ext = os.path.splitext(basename)
279 sdirs = []
279 sdirs = []
280 sdirslen = 0
280 sdirslen = 0
281 for p in parts[:-1]:
281 for p in parts[:-1]:
282 d = p[:_dirprefixlen]
282 d = p[:_dirprefixlen]
283 if d[-1] in b'. ':
283 if d[-1] in b'. ':
284 # Windows can't access dirs ending in period or space
284 # Windows can't access dirs ending in period or space
285 d = d[:-1] + b'_'
285 d = d[:-1] + b'_'
286 if sdirslen == 0:
286 if sdirslen == 0:
287 t = len(d)
287 t = len(d)
288 else:
288 else:
289 t = sdirslen + 1 + len(d)
289 t = sdirslen + 1 + len(d)
290 if t > _maxshortdirslen:
290 if t > _maxshortdirslen:
291 break
291 break
292 sdirs.append(d)
292 sdirs.append(d)
293 sdirslen = t
293 sdirslen = t
294 dirs = b'/'.join(sdirs)
294 dirs = b'/'.join(sdirs)
295 if len(dirs) > 0:
295 if len(dirs) > 0:
296 dirs += b'/'
296 dirs += b'/'
297 res = b'dh/' + dirs + digest + ext
297 res = b'dh/' + dirs + digest + ext
298 spaceleft = _maxstorepathlen - len(res)
298 spaceleft = _maxstorepathlen - len(res)
299 if spaceleft > 0:
299 if spaceleft > 0:
300 filler = basename[:spaceleft]
300 filler = basename[:spaceleft]
301 res = b'dh/' + dirs + filler + digest + ext
301 res = b'dh/' + dirs + filler + digest + ext
302 return res
302 return res
303
303
304
304
305 def _hybridencode(path, dotencode):
305 def _hybridencode(path, dotencode):
306 """encodes path with a length limit
306 """encodes path with a length limit
307
307
308 Encodes all paths that begin with 'data/', according to the following.
308 Encodes all paths that begin with 'data/', according to the following.
309
309
310 Default encoding (reversible):
310 Default encoding (reversible):
311
311
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 characters are encoded as '~xx', where xx is the two digit hex code
313 characters are encoded as '~xx', where xx is the two digit hex code
314 of the character (see encodefilename).
314 of the character (see encodefilename).
315 Relevant path components consisting of Windows reserved filenames are
315 Relevant path components consisting of Windows reserved filenames are
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317
317
318 Hashed encoding (not reversible):
318 Hashed encoding (not reversible):
319
319
320 If the default-encoded path is longer than _maxstorepathlen, a
320 If the default-encoded path is longer than _maxstorepathlen, a
321 non-reversible hybrid hashing of the path is done instead.
321 non-reversible hybrid hashing of the path is done instead.
322 This encoding uses up to _dirprefixlen characters of all directory
322 This encoding uses up to _dirprefixlen characters of all directory
323 levels of the lowerencoded path, but not more levels than can fit into
323 levels of the lowerencoded path, but not more levels than can fit into
324 _maxshortdirslen.
324 _maxshortdirslen.
325 Then follows the filler followed by the sha digest of the full path.
325 Then follows the filler followed by the sha digest of the full path.
326 The filler is the beginning of the basename of the lowerencoded path
326 The filler is the beginning of the basename of the lowerencoded path
327 (the basename is everything after the last path separator). The filler
327 (the basename is everything after the last path separator). The filler
328 is as long as possible, filling in characters from the basename until
328 is as long as possible, filling in characters from the basename until
329 the encoded path has _maxstorepathlen characters (or all chars of the
329 the encoded path has _maxstorepathlen characters (or all chars of the
330 basename have been taken).
330 basename have been taken).
331 The extension (e.g. '.i' or '.d') is preserved.
331 The extension (e.g. '.i' or '.d') is preserved.
332
332
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 encoding was used.
334 encoding was used.
335 """
335 """
336 path = encodedir(path)
336 path = encodedir(path)
337 ef = _encodefname(path).split(b'/')
337 ef = _encodefname(path).split(b'/')
338 res = b'/'.join(_auxencode(ef, dotencode))
338 res = b'/'.join(_auxencode(ef, dotencode))
339 if len(res) > _maxstorepathlen:
339 if len(res) > _maxstorepathlen:
340 res = _hashencode(path, dotencode)
340 res = _hashencode(path, dotencode)
341 return res
341 return res
342
342
343
343
344 def _pathencode(path):
344 def _pathencode(path):
345 de = encodedir(path)
345 de = encodedir(path)
346 if len(path) > _maxstorepathlen:
346 if len(path) > _maxstorepathlen:
347 return _hashencode(de, True)
347 return _hashencode(de, True)
348 ef = _encodefname(de).split(b'/')
348 ef = _encodefname(de).split(b'/')
349 res = b'/'.join(_auxencode(ef, True))
349 res = b'/'.join(_auxencode(ef, True))
350 if len(res) > _maxstorepathlen:
350 if len(res) > _maxstorepathlen:
351 return _hashencode(de, True)
351 return _hashencode(de, True)
352 return res
352 return res
353
353
354
354
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356
356
357
357
358 def _plainhybridencode(f):
358 def _plainhybridencode(f):
359 return _hybridencode(f, False)
359 return _hybridencode(f, False)
360
360
361
361
362 def _calcmode(vfs):
362 def _calcmode(vfs):
363 try:
363 try:
364 # files in .hg/ will be created using this mode
364 # files in .hg/ will be created using this mode
365 mode = vfs.stat().st_mode
365 mode = vfs.stat().st_mode
366 # avoid some useless chmods
366 # avoid some useless chmods
367 if (0o777 & ~util.umask) == (0o777 & mode):
367 if (0o777 & ~util.umask) == (0o777 & mode):
368 mode = None
368 mode = None
369 except OSError:
369 except OSError:
370 mode = None
370 mode = None
371 return mode
371 return mode
372
372
373
373
374 _data = [
374 _data = [
375 b'bookmarks',
375 b'bookmarks',
376 b'narrowspec',
376 b'narrowspec',
377 b'data',
377 b'data',
378 b'meta',
378 b'meta',
379 b'00manifest.d',
379 b'00manifest.d',
380 b'00manifest.i',
380 b'00manifest.i',
381 b'00changelog.d',
381 b'00changelog.d',
382 b'00changelog.i',
382 b'00changelog.i',
383 b'phaseroots',
383 b'phaseroots',
384 b'obsstore',
384 b'obsstore',
385 b'requires',
385 b'requires',
386 ]
386 ]
387
387
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_OTHER_EXT = (
389 REVLOG_FILES_OTHER_EXT = (
390 b'.idx',
390 b'.idx',
391 b'.d',
391 b'.d',
392 b'.dat',
392 b'.dat',
393 b'.n',
393 b'.n',
394 b'.nd',
394 b'.nd',
395 b'.sda',
395 b'.sda',
396 )
396 )
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 REVLOG_FILES_LONG_EXT = (
398 REVLOG_FILES_LONG_EXT = (
399 b'.nd',
399 b'.nd',
400 b'.idx',
400 b'.idx',
401 b'.dat',
401 b'.dat',
402 b'.sda',
402 b'.sda',
403 )
403 )
404 # files that are "volatile" and might change between listing and streaming
404 # files that are "volatile" and might change between listing and streaming
405 #
405 #
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 # deleted.
407 # deleted.
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409
409
410 # some exception to the above matching
410 # some exception to the above matching
411 #
411 #
412 # XXX This is currently not in use because of issue6542
412 # XXX This is currently not in use because of issue6542
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414
414
415
415
416 def is_revlog(f, kind, st):
416 def is_revlog(f, kind, st):
417 if kind != stat.S_IFREG:
417 if kind != stat.S_IFREG:
418 return None
418 return None
419 return revlog_type(f)
419 return revlog_type(f)
420
420
421
421
422 def revlog_type(f):
422 def revlog_type(f):
423 # XXX we need to filter `undo.` created by the transaction here, however
423 # XXX we need to filter `undo.` created by the transaction here, however
424 # being naive about it also filter revlog for `undo.*` files, leading to
424 # being naive about it also filter revlog for `undo.*` files, leading to
425 # issue6542. So we no longer use EXCLUDED.
425 # issue6542. So we no longer use EXCLUDED.
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 return FILEFLAGS_REVLOG_MAIN
427 return FILEFLAGS_REVLOG_MAIN
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 t = FILETYPE_FILELOG_OTHER
429 t = FILETYPE_FILELOG_OTHER
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 t |= FILEFLAGS_VOLATILE
431 t |= FILEFLAGS_VOLATILE
432 return t
432 return t
433 return None
433 return None
434
434
435
435
436 # the file is part of changelog data
436 # the file is part of changelog data
437 FILEFLAGS_CHANGELOG = 1 << 13
437 FILEFLAGS_CHANGELOG = 1 << 13
438 # the file is part of manifest data
438 # the file is part of manifest data
439 FILEFLAGS_MANIFESTLOG = 1 << 12
439 FILEFLAGS_MANIFESTLOG = 1 << 12
440 # the file is part of filelog data
440 # the file is part of filelog data
441 FILEFLAGS_FILELOG = 1 << 11
441 FILEFLAGS_FILELOG = 1 << 11
442 # file that are not directly part of a revlog
442 # file that are not directly part of a revlog
443 FILEFLAGS_OTHER = 1 << 10
443 FILEFLAGS_OTHER = 1 << 10
444
444
445 # the main entry point for a revlog
445 # the main entry point for a revlog
446 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 # a secondary file for a revlog
447 # a secondary file for a revlog
448 FILEFLAGS_REVLOG_OTHER = 1 << 0
448 FILEFLAGS_REVLOG_OTHER = 1 << 0
449
449
450 # files that are "volatile" and might change between listing and streaming
450 # files that are "volatile" and might change between listing and streaming
451 FILEFLAGS_VOLATILE = 1 << 20
451 FILEFLAGS_VOLATILE = 1 << 20
452
452
453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 FILETYPE_OTHER = FILEFLAGS_OTHER
459 FILETYPE_OTHER = FILEFLAGS_OTHER
460
460
461
461
462 @attr.s(slots=True)
462 @attr.s(slots=True)
463 class StoreFile:
463 class StoreFile:
464 """a file matching a store entry"""
464 """a file matching a store entry"""
465
465
466 unencoded_path = attr.ib()
466 unencoded_path = attr.ib()
467 _file_size = attr.ib(default=None)
467 _file_size = attr.ib(default=None)
468 is_volatile = attr.ib(default=False)
468 is_volatile = attr.ib(default=False)
469
469
470 def file_size(self, vfs):
470 def file_size(self, vfs):
471 if self._file_size is None:
471 if self._file_size is None:
472 if vfs is None:
472 if vfs is None:
473 msg = b"calling vfs-less file_size without prior call: %s"
473 msg = b"calling vfs-less file_size without prior call: %s"
474 msg %= self.unencoded_path
474 msg %= self.unencoded_path
475 raise error.ProgrammingError(msg)
475 raise error.ProgrammingError(msg)
476 try:
476 try:
477 self._file_size = vfs.stat(self.unencoded_path).st_size
477 self._file_size = vfs.stat(self.unencoded_path).st_size
478 except FileNotFoundError:
478 except FileNotFoundError:
479 self._file_size = 0
479 self._file_size = 0
480 return self._file_size
480 return self._file_size
481
481
482 def get_stream(self, vfs, copies):
482 def get_stream(self, vfs, copies):
483 """return data "stream" information for this file
483 """return data "stream" information for this file
484
484
485 (unencoded_file_path, content_iterator, content_size)
485 (unencoded_file_path, content_iterator, content_size)
486 """
486 """
487 size = self.file_size(None)
487 size = self.file_size(None)
488
488
489 def get_stream():
489 def get_stream():
490 actual_path = copies[vfs.join(self.unencoded_path)]
490 actual_path = copies[vfs.join(self.unencoded_path)]
491 with open(actual_path, 'rb') as fp:
491 with open(actual_path, 'rb') as fp:
492 yield None # ready to stream
492 yield None # ready to stream
493 if size <= 65536:
493 if size <= 65536:
494 yield fp.read(size)
494 yield fp.read(size)
495 else:
495 else:
496 yield from util.filechunkiter(fp, limit=size)
496 yield from util.filechunkiter(fp, limit=size)
497
497
498 s = get_stream()
498 s = get_stream()
499 next(s)
499 next(s)
500 return (self.unencoded_path, s, size)
500 return (self.unencoded_path, s, size)
501
501
502
502
503 @attr.s(slots=True, init=False)
503 @attr.s(slots=True, init=False)
504 class BaseStoreEntry:
504 class BaseStoreEntry:
505 """An entry in the store
505 """An entry in the store
506
506
507 This is returned by `store.walk` and represent some data in the store."""
507 This is returned by `store.walk` and represent some data in the store."""
508
508
509 def files(self) -> List[StoreFile]:
509 def files(self) -> List[StoreFile]:
510 raise NotImplementedError
510 raise NotImplementedError
511
511
512 def get_streams(
512 def get_streams(
513 self,
513 self,
514 repo=None,
514 repo=None,
515 vfs=None,
515 vfs=None,
516 copies=None,
516 copies=None,
517 max_changeset=None,
517 max_changeset=None,
518 ):
518 ):
519 """return a list of data stream associated to files for this entry
519 """return a list of data stream associated to files for this entry
520
520
521 return [(unencoded_file_path, content_iterator, content_size), …]
521 return [(unencoded_file_path, content_iterator, content_size), …]
522 """
522 """
523 assert vfs is not None
523 assert vfs is not None
524 return [f.get_stream(vfs, copies) for f in self.files()]
524 return [f.get_stream(vfs, copies) for f in self.files()]
525
525
526
526
527 @attr.s(slots=True, init=False)
527 @attr.s(slots=True, init=False)
528 class SimpleStoreEntry(BaseStoreEntry):
528 class SimpleStoreEntry(BaseStoreEntry):
529 """A generic entry in the store"""
529 """A generic entry in the store"""
530
530
531 is_revlog = False
531 is_revlog = False
532
532
533 _entry_path = attr.ib()
533 _entry_path = attr.ib()
534 _is_volatile = attr.ib(default=False)
534 _is_volatile = attr.ib(default=False)
535 _file_size = attr.ib(default=None)
535 _file_size = attr.ib(default=None)
536 _files = attr.ib(default=None)
536 _files = attr.ib(default=None)
537
537
538 def __init__(
538 def __init__(
539 self,
539 self,
540 entry_path,
540 entry_path,
541 is_volatile=False,
541 is_volatile=False,
542 file_size=None,
542 file_size=None,
543 ):
543 ):
544 super().__init__()
544 super().__init__()
545 self._entry_path = entry_path
545 self._entry_path = entry_path
546 self._is_volatile = is_volatile
546 self._is_volatile = is_volatile
547 self._file_size = file_size
547 self._file_size = file_size
548 self._files = None
548 self._files = None
549
549
550 def files(self) -> List[StoreFile]:
550 def files(self) -> List[StoreFile]:
551 if self._files is None:
551 if self._files is None:
552 self._files = [
552 self._files = [
553 StoreFile(
553 StoreFile(
554 unencoded_path=self._entry_path,
554 unencoded_path=self._entry_path,
555 file_size=self._file_size,
555 file_size=self._file_size,
556 is_volatile=self._is_volatile,
556 is_volatile=self._is_volatile,
557 )
557 )
558 ]
558 ]
559 return self._files
559 return self._files
560
560
561
561
562 @attr.s(slots=True, init=False)
562 @attr.s(slots=True, init=False)
563 class RevlogStoreEntry(BaseStoreEntry):
563 class RevlogStoreEntry(BaseStoreEntry):
564 """A revlog entry in the store"""
564 """A revlog entry in the store"""
565
565
566 is_revlog = True
566 is_revlog = True
567
567
568 revlog_type = attr.ib(default=None)
568 revlog_type = attr.ib(default=None)
569 target_id = attr.ib(default=None)
569 target_id = attr.ib(default=None)
570 _path_prefix = attr.ib(default=None)
570 _path_prefix = attr.ib(default=None)
571 _details = attr.ib(default=None)
571 _details = attr.ib(default=None)
572 _files = attr.ib(default=None)
572 _files = attr.ib(default=None)
573
573
574 def __init__(
574 def __init__(
575 self,
575 self,
576 revlog_type,
576 revlog_type,
577 path_prefix,
577 path_prefix,
578 target_id,
578 target_id,
579 details,
579 details,
580 ):
580 ):
581 super().__init__()
581 super().__init__()
582 self.revlog_type = revlog_type
582 self.revlog_type = revlog_type
583 self.target_id = target_id
583 self.target_id = target_id
584 self._path_prefix = path_prefix
584 self._path_prefix = path_prefix
585 assert b'.i' in details, (path_prefix, details)
585 assert b'.i' in details, (path_prefix, details)
586 self._details = details
586 self._details = details
587 self._files = None
587 self._files = None
588
588
589 @property
589 @property
590 def is_changelog(self):
590 def is_changelog(self):
591 return self.revlog_type & FILEFLAGS_CHANGELOG
591 return self.revlog_type & FILEFLAGS_CHANGELOG
592
592
593 @property
593 @property
594 def is_manifestlog(self):
594 def is_manifestlog(self):
595 return self.revlog_type & FILEFLAGS_MANIFESTLOG
595 return self.revlog_type & FILEFLAGS_MANIFESTLOG
596
596
597 @property
597 @property
598 def is_filelog(self):
598 def is_filelog(self):
599 return self.revlog_type & FILEFLAGS_FILELOG
599 return self.revlog_type & FILEFLAGS_FILELOG
600
600
601 def main_file_path(self):
601 def main_file_path(self):
602 """unencoded path of the main revlog file"""
602 """unencoded path of the main revlog file"""
603 return self._path_prefix + b'.i'
603 return self._path_prefix + b'.i'
604
604
605 def files(self) -> List[StoreFile]:
605 def files(self) -> List[StoreFile]:
606 if self._files is None:
606 if self._files is None:
607 self._files = []
607 self._files = []
608 for ext in sorted(self._details, key=_ext_key):
608 for ext in sorted(self._details, key=_ext_key):
609 path = self._path_prefix + ext
609 path = self._path_prefix + ext
610 data = self._details[ext]
610 data = self._details[ext]
611 self._files.append(StoreFile(unencoded_path=path, **data))
611 self._files.append(StoreFile(unencoded_path=path, **data))
612 return self._files
612 return self._files
613
613
614 def get_streams(
614 def get_streams(
615 self,
615 self,
616 repo=None,
616 repo=None,
617 vfs=None,
617 vfs=None,
618 copies=None,
618 copies=None,
619 max_changeset=None,
619 max_changeset=None,
620 ):
620 ):
621 if repo is None or max_changeset is None:
621 if repo is None or max_changeset is None:
622 return super().get_streams(
622 return super().get_streams(
623 repo=repo,
623 repo=repo,
624 vfs=vfs,
624 vfs=vfs,
625 copies=copies,
625 copies=copies,
626 max_changeset=max_changeset,
626 max_changeset=max_changeset,
627 )
627 )
628 if any(k.endswith(b'.idx') for k in self._details.keys()):
628 if any(k.endswith(b'.idx') for k in self._details.keys()):
629 # This use revlog-v2, ignore for now
629 # This use revlog-v2, ignore for now
630 return super().get_streams(
630 return super().get_streams(
631 repo=repo,
631 repo=repo,
632 vfs=vfs,
632 vfs=vfs,
633 copies=copies,
633 copies=copies,
634 max_changeset=max_changeset,
634 max_changeset=max_changeset,
635 )
635 )
636 name_to_ext = {}
636 name_to_ext = {}
637 for ext in self._details.keys():
637 for ext in self._details.keys():
638 name_to_ext[self._path_prefix + ext] = ext
638 name_to_ext[self._path_prefix + ext] = ext
639 name_to_size = {}
639 name_to_size = {}
640 for f in self.files():
640 for f in self.files():
641 name_to_size[f.unencoded_path] = f.file_size(None)
641 name_to_size[f.unencoded_path] = f.file_size(None)
642 stream = [
642 stream = [
643 f.get_stream(vfs, copies)
643 f.get_stream(vfs, copies)
644 for f in self.files()
644 for f in self.files()
645 if name_to_ext[f.unencoded_path] not in (b'.d', b'.i')
645 if name_to_ext[f.unencoded_path] not in (b'.d', b'.i')
646 ]
646 ]
647
647
648 is_inline = b'.d' not in self._details
649
648 rl = self.get_revlog_instance(repo).get_revlog()
650 rl = self.get_revlog_instance(repo).get_revlog()
649 rl_stream = rl.get_streams(max_changeset)
651 rl_stream = rl.get_streams(max_changeset, force_inline=is_inline)
652
650 for name, s, size in rl_stream:
653 for name, s, size in rl_stream:
651 if name_to_size.get(name, 0) != size:
654 if name_to_size.get(name, 0) != size:
652 msg = _(b"expected %d bytes but %d provided for %s")
655 msg = _(b"expected %d bytes but %d provided for %s")
653 msg %= name_to_size.get(name, 0), size, name
656 msg %= name_to_size.get(name, 0), size, name
654 raise error.Abort(msg)
657 raise error.Abort(msg)
655 stream.extend(rl_stream)
658 stream.extend(rl_stream)
656 files = self.files()
659 files = self.files()
657 assert len(stream) == len(files), (
660 assert len(stream) == len(files), (
658 stream,
661 stream,
659 files,
662 files,
660 self._path_prefix,
663 self._path_prefix,
661 self.target_id,
664 self.target_id,
662 )
665 )
663 return stream
666 return stream
664
667
665 def get_revlog_instance(self, repo):
668 def get_revlog_instance(self, repo):
666 """Obtain a revlog instance from this store entry
669 """Obtain a revlog instance from this store entry
667
670
668 An instance of the appropriate class is returned.
671 An instance of the appropriate class is returned.
669 """
672 """
670 if self.is_changelog:
673 if self.is_changelog:
671 return changelog.changelog(repo.svfs)
674 return changelog.changelog(repo.svfs)
672 elif self.is_manifestlog:
675 elif self.is_manifestlog:
673 mandir = self.target_id
676 mandir = self.target_id
674 return manifest.manifestrevlog(
677 return manifest.manifestrevlog(
675 repo.nodeconstants, repo.svfs, tree=mandir
678 repo.nodeconstants, repo.svfs, tree=mandir
676 )
679 )
677 else:
680 else:
678 return filelog.filelog(repo.svfs, self.target_id)
681 return filelog.filelog(repo.svfs, self.target_id)
679
682
680
683
681 def _gather_revlog(files_data):
684 def _gather_revlog(files_data):
682 """group files per revlog prefix
685 """group files per revlog prefix
683
686
684 The returns a two level nested dict. The top level key is the revlog prefix
687 The returns a two level nested dict. The top level key is the revlog prefix
685 without extension, the second level is all the file "suffix" that were
688 without extension, the second level is all the file "suffix" that were
686 seen for this revlog and arbitrary file data as value.
689 seen for this revlog and arbitrary file data as value.
687 """
690 """
688 revlogs = collections.defaultdict(dict)
691 revlogs = collections.defaultdict(dict)
689 for u, value in files_data:
692 for u, value in files_data:
690 name, ext = _split_revlog_ext(u)
693 name, ext = _split_revlog_ext(u)
691 revlogs[name][ext] = value
694 revlogs[name][ext] = value
692 return sorted(revlogs.items())
695 return sorted(revlogs.items())
693
696
694
697
695 def _split_revlog_ext(filename):
698 def _split_revlog_ext(filename):
696 """split the revlog file prefix from the variable extension"""
699 """split the revlog file prefix from the variable extension"""
697 if filename.endswith(REVLOG_FILES_LONG_EXT):
700 if filename.endswith(REVLOG_FILES_LONG_EXT):
698 char = b'-'
701 char = b'-'
699 else:
702 else:
700 char = b'.'
703 char = b'.'
701 idx = filename.rfind(char)
704 idx = filename.rfind(char)
702 return filename[:idx], filename[idx:]
705 return filename[:idx], filename[idx:]
703
706
704
707
705 def _ext_key(ext):
708 def _ext_key(ext):
706 """a key to order revlog suffix
709 """a key to order revlog suffix
707
710
708 important to issue .i after other entry."""
711 important to issue .i after other entry."""
709 # the only important part of this order is to keep the `.i` last.
712 # the only important part of this order is to keep the `.i` last.
710 if ext.endswith(b'.n'):
713 if ext.endswith(b'.n'):
711 return (0, ext)
714 return (0, ext)
712 elif ext.endswith(b'.nd'):
715 elif ext.endswith(b'.nd'):
713 return (10, ext)
716 return (10, ext)
714 elif ext.endswith(b'.d'):
717 elif ext.endswith(b'.d'):
715 return (20, ext)
718 return (20, ext)
716 elif ext.endswith(b'.i'):
719 elif ext.endswith(b'.i'):
717 return (50, ext)
720 return (50, ext)
718 else:
721 else:
719 return (40, ext)
722 return (40, ext)
720
723
721
724
722 class basicstore:
725 class basicstore:
723 '''base class for local repository stores'''
726 '''base class for local repository stores'''
724
727
725 def __init__(self, path, vfstype):
728 def __init__(self, path, vfstype):
726 vfs = vfstype(path)
729 vfs = vfstype(path)
727 self.path = vfs.base
730 self.path = vfs.base
728 self.createmode = _calcmode(vfs)
731 self.createmode = _calcmode(vfs)
729 vfs.createmode = self.createmode
732 vfs.createmode = self.createmode
730 self.rawvfs = vfs
733 self.rawvfs = vfs
731 self.vfs = vfsmod.filtervfs(vfs, encodedir)
734 self.vfs = vfsmod.filtervfs(vfs, encodedir)
732 self.opener = self.vfs
735 self.opener = self.vfs
733
736
734 def join(self, f):
737 def join(self, f):
735 return self.path + b'/' + encodedir(f)
738 return self.path + b'/' + encodedir(f)
736
739
737 def _walk(self, relpath, recurse, undecodable=None):
740 def _walk(self, relpath, recurse, undecodable=None):
738 '''yields (revlog_type, unencoded, size)'''
741 '''yields (revlog_type, unencoded, size)'''
739 path = self.path
742 path = self.path
740 if relpath:
743 if relpath:
741 path += b'/' + relpath
744 path += b'/' + relpath
742 striplen = len(self.path) + 1
745 striplen = len(self.path) + 1
743 l = []
746 l = []
744 if self.rawvfs.isdir(path):
747 if self.rawvfs.isdir(path):
745 visit = [path]
748 visit = [path]
746 readdir = self.rawvfs.readdir
749 readdir = self.rawvfs.readdir
747 while visit:
750 while visit:
748 p = visit.pop()
751 p = visit.pop()
749 for f, kind, st in readdir(p, stat=True):
752 for f, kind, st in readdir(p, stat=True):
750 fp = p + b'/' + f
753 fp = p + b'/' + f
751 rl_type = is_revlog(f, kind, st)
754 rl_type = is_revlog(f, kind, st)
752 if rl_type is not None:
755 if rl_type is not None:
753 n = util.pconvert(fp[striplen:])
756 n = util.pconvert(fp[striplen:])
754 l.append((decodedir(n), (rl_type, st.st_size)))
757 l.append((decodedir(n), (rl_type, st.st_size)))
755 elif kind == stat.S_IFDIR and recurse:
758 elif kind == stat.S_IFDIR and recurse:
756 visit.append(fp)
759 visit.append(fp)
757
760
758 l.sort()
761 l.sort()
759 return l
762 return l
760
763
761 def changelog(self, trypending, concurrencychecker=None):
764 def changelog(self, trypending, concurrencychecker=None):
762 return changelog.changelog(
765 return changelog.changelog(
763 self.vfs,
766 self.vfs,
764 trypending=trypending,
767 trypending=trypending,
765 concurrencychecker=concurrencychecker,
768 concurrencychecker=concurrencychecker,
766 )
769 )
767
770
768 def manifestlog(self, repo, storenarrowmatch):
771 def manifestlog(self, repo, storenarrowmatch):
769 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
772 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
770 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
773 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
771
774
772 def data_entries(
775 def data_entries(
773 self, matcher=None, undecodable=None
776 self, matcher=None, undecodable=None
774 ) -> Generator[BaseStoreEntry, None, None]:
777 ) -> Generator[BaseStoreEntry, None, None]:
775 """Like walk, but excluding the changelog and root manifest.
778 """Like walk, but excluding the changelog and root manifest.
776
779
777 When [undecodable] is None, revlogs names that can't be
780 When [undecodable] is None, revlogs names that can't be
778 decoded cause an exception. When it is provided, it should
781 decoded cause an exception. When it is provided, it should
779 be a list and the filenames that can't be decoded are added
782 be a list and the filenames that can't be decoded are added
780 to it instead. This is very rarely needed."""
783 to it instead. This is very rarely needed."""
781 dirs = [
784 dirs = [
782 (b'data', FILEFLAGS_FILELOG, False),
785 (b'data', FILEFLAGS_FILELOG, False),
783 (b'meta', FILEFLAGS_MANIFESTLOG, True),
786 (b'meta', FILEFLAGS_MANIFESTLOG, True),
784 ]
787 ]
785 for base_dir, rl_type, strip_filename in dirs:
788 for base_dir, rl_type, strip_filename in dirs:
786 files = self._walk(base_dir, True, undecodable=undecodable)
789 files = self._walk(base_dir, True, undecodable=undecodable)
787 files = (f for f in files if f[1][0] is not None)
790 files = (f for f in files if f[1][0] is not None)
788 for revlog, details in _gather_revlog(files):
791 for revlog, details in _gather_revlog(files):
789 file_details = {}
792 file_details = {}
790 revlog_target_id = revlog.split(b'/', 1)[1]
793 revlog_target_id = revlog.split(b'/', 1)[1]
791 if strip_filename and b'/' in revlog:
794 if strip_filename and b'/' in revlog:
792 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
795 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
793 revlog_target_id += b'/'
796 revlog_target_id += b'/'
794 for ext, (t, s) in sorted(details.items()):
797 for ext, (t, s) in sorted(details.items()):
795 file_details[ext] = {
798 file_details[ext] = {
796 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
799 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
797 'file_size': s,
800 'file_size': s,
798 }
801 }
799 yield RevlogStoreEntry(
802 yield RevlogStoreEntry(
800 path_prefix=revlog,
803 path_prefix=revlog,
801 revlog_type=rl_type,
804 revlog_type=rl_type,
802 target_id=revlog_target_id,
805 target_id=revlog_target_id,
803 details=file_details,
806 details=file_details,
804 )
807 )
805
808
806 def top_entries(
809 def top_entries(
807 self, phase=False, obsolescence=False
810 self, phase=False, obsolescence=False
808 ) -> Generator[BaseStoreEntry, None, None]:
811 ) -> Generator[BaseStoreEntry, None, None]:
809 if phase and self.vfs.exists(b'phaseroots'):
812 if phase and self.vfs.exists(b'phaseroots'):
810 yield SimpleStoreEntry(
813 yield SimpleStoreEntry(
811 entry_path=b'phaseroots',
814 entry_path=b'phaseroots',
812 is_volatile=True,
815 is_volatile=True,
813 )
816 )
814
817
815 if obsolescence and self.vfs.exists(b'obsstore'):
818 if obsolescence and self.vfs.exists(b'obsstore'):
816 # XXX if we had the file size it could be non-volatile
819 # XXX if we had the file size it could be non-volatile
817 yield SimpleStoreEntry(
820 yield SimpleStoreEntry(
818 entry_path=b'obsstore',
821 entry_path=b'obsstore',
819 is_volatile=True,
822 is_volatile=True,
820 )
823 )
821
824
822 files = reversed(self._walk(b'', False))
825 files = reversed(self._walk(b'', False))
823
826
824 changelogs = collections.defaultdict(dict)
827 changelogs = collections.defaultdict(dict)
825 manifestlogs = collections.defaultdict(dict)
828 manifestlogs = collections.defaultdict(dict)
826
829
827 for u, (t, s) in files:
830 for u, (t, s) in files:
828 if u.startswith(b'00changelog'):
831 if u.startswith(b'00changelog'):
829 name, ext = _split_revlog_ext(u)
832 name, ext = _split_revlog_ext(u)
830 changelogs[name][ext] = (t, s)
833 changelogs[name][ext] = (t, s)
831 elif u.startswith(b'00manifest'):
834 elif u.startswith(b'00manifest'):
832 name, ext = _split_revlog_ext(u)
835 name, ext = _split_revlog_ext(u)
833 manifestlogs[name][ext] = (t, s)
836 manifestlogs[name][ext] = (t, s)
834 else:
837 else:
835 yield SimpleStoreEntry(
838 yield SimpleStoreEntry(
836 entry_path=u,
839 entry_path=u,
837 is_volatile=bool(t & FILEFLAGS_VOLATILE),
840 is_volatile=bool(t & FILEFLAGS_VOLATILE),
838 file_size=s,
841 file_size=s,
839 )
842 )
840 # yield manifest before changelog
843 # yield manifest before changelog
841 top_rl = [
844 top_rl = [
842 (manifestlogs, FILEFLAGS_MANIFESTLOG),
845 (manifestlogs, FILEFLAGS_MANIFESTLOG),
843 (changelogs, FILEFLAGS_CHANGELOG),
846 (changelogs, FILEFLAGS_CHANGELOG),
844 ]
847 ]
845 assert len(manifestlogs) <= 1
848 assert len(manifestlogs) <= 1
846 assert len(changelogs) <= 1
849 assert len(changelogs) <= 1
847 for data, revlog_type in top_rl:
850 for data, revlog_type in top_rl:
848 for revlog, details in sorted(data.items()):
851 for revlog, details in sorted(data.items()):
849 file_details = {}
852 file_details = {}
850 for ext, (t, s) in details.items():
853 for ext, (t, s) in details.items():
851 file_details[ext] = {
854 file_details[ext] = {
852 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
855 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
853 'file_size': s,
856 'file_size': s,
854 }
857 }
855 yield RevlogStoreEntry(
858 yield RevlogStoreEntry(
856 path_prefix=revlog,
859 path_prefix=revlog,
857 revlog_type=revlog_type,
860 revlog_type=revlog_type,
858 target_id=b'',
861 target_id=b'',
859 details=file_details,
862 details=file_details,
860 )
863 )
861
864
862 def walk(
865 def walk(
863 self, matcher=None, phase=False, obsolescence=False
866 self, matcher=None, phase=False, obsolescence=False
864 ) -> Generator[BaseStoreEntry, None, None]:
867 ) -> Generator[BaseStoreEntry, None, None]:
865 """return files related to data storage (ie: revlogs)
868 """return files related to data storage (ie: revlogs)
866
869
867 yields instance from BaseStoreEntry subclasses
870 yields instance from BaseStoreEntry subclasses
868
871
869 if a matcher is passed, storage files of only those tracked paths
872 if a matcher is passed, storage files of only those tracked paths
870 are passed with matches the matcher
873 are passed with matches the matcher
871 """
874 """
872 # yield data files first
875 # yield data files first
873 for x in self.data_entries(matcher):
876 for x in self.data_entries(matcher):
874 yield x
877 yield x
875 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
878 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
876 yield x
879 yield x
877
880
878 def copylist(self):
881 def copylist(self):
879 return _data
882 return _data
880
883
881 def write(self, tr):
884 def write(self, tr):
882 pass
885 pass
883
886
884 def invalidatecaches(self):
887 def invalidatecaches(self):
885 pass
888 pass
886
889
887 def markremoved(self, fn):
890 def markremoved(self, fn):
888 pass
891 pass
889
892
890 def __contains__(self, path):
893 def __contains__(self, path):
891 '''Checks if the store contains path'''
894 '''Checks if the store contains path'''
892 path = b"/".join((b"data", path))
895 path = b"/".join((b"data", path))
893 # file?
896 # file?
894 if self.vfs.exists(path + b".i"):
897 if self.vfs.exists(path + b".i"):
895 return True
898 return True
896 # dir?
899 # dir?
897 if not path.endswith(b"/"):
900 if not path.endswith(b"/"):
898 path = path + b"/"
901 path = path + b"/"
899 return self.vfs.exists(path)
902 return self.vfs.exists(path)
900
903
901
904
902 class encodedstore(basicstore):
905 class encodedstore(basicstore):
903 def __init__(self, path, vfstype):
906 def __init__(self, path, vfstype):
904 vfs = vfstype(path + b'/store')
907 vfs = vfstype(path + b'/store')
905 self.path = vfs.base
908 self.path = vfs.base
906 self.createmode = _calcmode(vfs)
909 self.createmode = _calcmode(vfs)
907 vfs.createmode = self.createmode
910 vfs.createmode = self.createmode
908 self.rawvfs = vfs
911 self.rawvfs = vfs
909 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
912 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
910 self.opener = self.vfs
913 self.opener = self.vfs
911
914
912 def _walk(self, relpath, recurse, undecodable=None):
915 def _walk(self, relpath, recurse, undecodable=None):
913 old = super()._walk(relpath, recurse)
916 old = super()._walk(relpath, recurse)
914 new = []
917 new = []
915 for f1, value in old:
918 for f1, value in old:
916 try:
919 try:
917 f2 = decodefilename(f1)
920 f2 = decodefilename(f1)
918 except KeyError:
921 except KeyError:
919 if undecodable is None:
922 if undecodable is None:
920 msg = _(b'undecodable revlog name %s') % f1
923 msg = _(b'undecodable revlog name %s') % f1
921 raise error.StorageError(msg)
924 raise error.StorageError(msg)
922 else:
925 else:
923 undecodable.append(f1)
926 undecodable.append(f1)
924 continue
927 continue
925 new.append((f2, value))
928 new.append((f2, value))
926 return new
929 return new
927
930
928 def data_entries(
931 def data_entries(
929 self, matcher=None, undecodable=None
932 self, matcher=None, undecodable=None
930 ) -> Generator[BaseStoreEntry, None, None]:
933 ) -> Generator[BaseStoreEntry, None, None]:
931 entries = super(encodedstore, self).data_entries(
934 entries = super(encodedstore, self).data_entries(
932 undecodable=undecodable
935 undecodable=undecodable
933 )
936 )
934 for entry in entries:
937 for entry in entries:
935 if _match_tracked_entry(entry, matcher):
938 if _match_tracked_entry(entry, matcher):
936 yield entry
939 yield entry
937
940
938 def join(self, f):
941 def join(self, f):
939 return self.path + b'/' + encodefilename(f)
942 return self.path + b'/' + encodefilename(f)
940
943
941 def copylist(self):
944 def copylist(self):
942 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
945 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
943
946
944
947
945 class fncache:
948 class fncache:
946 # the filename used to be partially encoded
949 # the filename used to be partially encoded
947 # hence the encodedir/decodedir dance
950 # hence the encodedir/decodedir dance
948 def __init__(self, vfs):
951 def __init__(self, vfs):
949 self.vfs = vfs
952 self.vfs = vfs
950 self._ignores = set()
953 self._ignores = set()
951 self.entries = None
954 self.entries = None
952 self._dirty = False
955 self._dirty = False
953 # set of new additions to fncache
956 # set of new additions to fncache
954 self.addls = set()
957 self.addls = set()
955
958
956 def ensureloaded(self, warn=None):
959 def ensureloaded(self, warn=None):
957 """read the fncache file if not already read.
960 """read the fncache file if not already read.
958
961
959 If the file on disk is corrupted, raise. If warn is provided,
962 If the file on disk is corrupted, raise. If warn is provided,
960 warn and keep going instead."""
963 warn and keep going instead."""
961 if self.entries is None:
964 if self.entries is None:
962 self._load(warn)
965 self._load(warn)
963
966
964 def _load(self, warn=None):
967 def _load(self, warn=None):
965 '''fill the entries from the fncache file'''
968 '''fill the entries from the fncache file'''
966 self._dirty = False
969 self._dirty = False
967 try:
970 try:
968 fp = self.vfs(b'fncache', mode=b'rb')
971 fp = self.vfs(b'fncache', mode=b'rb')
969 except IOError:
972 except IOError:
970 # skip nonexistent file
973 # skip nonexistent file
971 self.entries = set()
974 self.entries = set()
972 return
975 return
973
976
974 self.entries = set()
977 self.entries = set()
975 chunk = b''
978 chunk = b''
976 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
979 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
977 chunk += c
980 chunk += c
978 try:
981 try:
979 p = chunk.rindex(b'\n')
982 p = chunk.rindex(b'\n')
980 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
983 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
981 chunk = chunk[p + 1 :]
984 chunk = chunk[p + 1 :]
982 except ValueError:
985 except ValueError:
983 # substring '\n' not found, maybe the entry is bigger than the
986 # substring '\n' not found, maybe the entry is bigger than the
984 # chunksize, so let's keep iterating
987 # chunksize, so let's keep iterating
985 pass
988 pass
986
989
987 if chunk:
990 if chunk:
988 msg = _(b"fncache does not ends with a newline")
991 msg = _(b"fncache does not ends with a newline")
989 if warn:
992 if warn:
990 warn(msg + b'\n')
993 warn(msg + b'\n')
991 else:
994 else:
992 raise error.Abort(
995 raise error.Abort(
993 msg,
996 msg,
994 hint=_(
997 hint=_(
995 b"use 'hg debugrebuildfncache' to "
998 b"use 'hg debugrebuildfncache' to "
996 b"rebuild the fncache"
999 b"rebuild the fncache"
997 ),
1000 ),
998 )
1001 )
999 self._checkentries(fp, warn)
1002 self._checkentries(fp, warn)
1000 fp.close()
1003 fp.close()
1001
1004
1002 def _checkentries(self, fp, warn):
1005 def _checkentries(self, fp, warn):
1003 """make sure there is no empty string in entries"""
1006 """make sure there is no empty string in entries"""
1004 if b'' in self.entries:
1007 if b'' in self.entries:
1005 fp.seek(0)
1008 fp.seek(0)
1006 for n, line in enumerate(fp):
1009 for n, line in enumerate(fp):
1007 if not line.rstrip(b'\n'):
1010 if not line.rstrip(b'\n'):
1008 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1011 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1009 if warn:
1012 if warn:
1010 warn(t + b'\n')
1013 warn(t + b'\n')
1011 else:
1014 else:
1012 raise error.Abort(t)
1015 raise error.Abort(t)
1013
1016
1014 def write(self, tr):
1017 def write(self, tr):
1015 if self._dirty:
1018 if self._dirty:
1016 assert self.entries is not None
1019 assert self.entries is not None
1017 self.entries = self.entries | self.addls
1020 self.entries = self.entries | self.addls
1018 self.addls = set()
1021 self.addls = set()
1019 tr.addbackup(b'fncache')
1022 tr.addbackup(b'fncache')
1020 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1023 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1021 if self.entries:
1024 if self.entries:
1022 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1025 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1023 fp.close()
1026 fp.close()
1024 self._dirty = False
1027 self._dirty = False
1025 if self.addls:
1028 if self.addls:
1026 # if we have just new entries, let's append them to the fncache
1029 # if we have just new entries, let's append them to the fncache
1027 tr.addbackup(b'fncache')
1030 tr.addbackup(b'fncache')
1028 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1031 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1029 if self.addls:
1032 if self.addls:
1030 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1033 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1031 fp.close()
1034 fp.close()
1032 self.entries = None
1035 self.entries = None
1033 self.addls = set()
1036 self.addls = set()
1034
1037
1035 def addignore(self, fn):
1038 def addignore(self, fn):
1036 self._ignores.add(fn)
1039 self._ignores.add(fn)
1037
1040
1038 def add(self, fn):
1041 def add(self, fn):
1039 if fn in self._ignores:
1042 if fn in self._ignores:
1040 return
1043 return
1041 if self.entries is None:
1044 if self.entries is None:
1042 self._load()
1045 self._load()
1043 if fn not in self.entries:
1046 if fn not in self.entries:
1044 self.addls.add(fn)
1047 self.addls.add(fn)
1045
1048
1046 def remove(self, fn):
1049 def remove(self, fn):
1047 if self.entries is None:
1050 if self.entries is None:
1048 self._load()
1051 self._load()
1049 if fn in self.addls:
1052 if fn in self.addls:
1050 self.addls.remove(fn)
1053 self.addls.remove(fn)
1051 return
1054 return
1052 try:
1055 try:
1053 self.entries.remove(fn)
1056 self.entries.remove(fn)
1054 self._dirty = True
1057 self._dirty = True
1055 except KeyError:
1058 except KeyError:
1056 pass
1059 pass
1057
1060
1058 def __contains__(self, fn):
1061 def __contains__(self, fn):
1059 if fn in self.addls:
1062 if fn in self.addls:
1060 return True
1063 return True
1061 if self.entries is None:
1064 if self.entries is None:
1062 self._load()
1065 self._load()
1063 return fn in self.entries
1066 return fn in self.entries
1064
1067
1065 def __iter__(self):
1068 def __iter__(self):
1066 if self.entries is None:
1069 if self.entries is None:
1067 self._load()
1070 self._load()
1068 return iter(self.entries | self.addls)
1071 return iter(self.entries | self.addls)
1069
1072
1070
1073
1071 class _fncachevfs(vfsmod.proxyvfs):
1074 class _fncachevfs(vfsmod.proxyvfs):
1072 def __init__(self, vfs, fnc, encode):
1075 def __init__(self, vfs, fnc, encode):
1073 vfsmod.proxyvfs.__init__(self, vfs)
1076 vfsmod.proxyvfs.__init__(self, vfs)
1074 self.fncache = fnc
1077 self.fncache = fnc
1075 self.encode = encode
1078 self.encode = encode
1076
1079
1077 def __call__(self, path, mode=b'r', *args, **kw):
1080 def __call__(self, path, mode=b'r', *args, **kw):
1078 encoded = self.encode(path)
1081 encoded = self.encode(path)
1079 if (
1082 if (
1080 mode not in (b'r', b'rb')
1083 mode not in (b'r', b'rb')
1081 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1084 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1082 and revlog_type(path) is not None
1085 and revlog_type(path) is not None
1083 ):
1086 ):
1084 # do not trigger a fncache load when adding a file that already is
1087 # do not trigger a fncache load when adding a file that already is
1085 # known to exist.
1088 # known to exist.
1086 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1089 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1087 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1090 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1088 # when appending to an existing file, if the file has size zero,
1091 # when appending to an existing file, if the file has size zero,
1089 # it should be considered as missing. Such zero-size files are
1092 # it should be considered as missing. Such zero-size files are
1090 # the result of truncation when a transaction is aborted.
1093 # the result of truncation when a transaction is aborted.
1091 notload = False
1094 notload = False
1092 if not notload:
1095 if not notload:
1093 self.fncache.add(path)
1096 self.fncache.add(path)
1094 return self.vfs(encoded, mode, *args, **kw)
1097 return self.vfs(encoded, mode, *args, **kw)
1095
1098
1096 def join(self, path):
1099 def join(self, path):
1097 if path:
1100 if path:
1098 return self.vfs.join(self.encode(path))
1101 return self.vfs.join(self.encode(path))
1099 else:
1102 else:
1100 return self.vfs.join(path)
1103 return self.vfs.join(path)
1101
1104
1102 def register_file(self, path):
1105 def register_file(self, path):
1103 """generic hook point to lets fncache steer its stew"""
1106 """generic hook point to lets fncache steer its stew"""
1104 if path.startswith(b'data/') or path.startswith(b'meta/'):
1107 if path.startswith(b'data/') or path.startswith(b'meta/'):
1105 self.fncache.add(path)
1108 self.fncache.add(path)
1106
1109
1107
1110
1108 class fncachestore(basicstore):
1111 class fncachestore(basicstore):
1109 def __init__(self, path, vfstype, dotencode):
1112 def __init__(self, path, vfstype, dotencode):
1110 if dotencode:
1113 if dotencode:
1111 encode = _pathencode
1114 encode = _pathencode
1112 else:
1115 else:
1113 encode = _plainhybridencode
1116 encode = _plainhybridencode
1114 self.encode = encode
1117 self.encode = encode
1115 vfs = vfstype(path + b'/store')
1118 vfs = vfstype(path + b'/store')
1116 self.path = vfs.base
1119 self.path = vfs.base
1117 self.pathsep = self.path + b'/'
1120 self.pathsep = self.path + b'/'
1118 self.createmode = _calcmode(vfs)
1121 self.createmode = _calcmode(vfs)
1119 vfs.createmode = self.createmode
1122 vfs.createmode = self.createmode
1120 self.rawvfs = vfs
1123 self.rawvfs = vfs
1121 fnc = fncache(vfs)
1124 fnc = fncache(vfs)
1122 self.fncache = fnc
1125 self.fncache = fnc
1123 self.vfs = _fncachevfs(vfs, fnc, encode)
1126 self.vfs = _fncachevfs(vfs, fnc, encode)
1124 self.opener = self.vfs
1127 self.opener = self.vfs
1125
1128
1126 def join(self, f):
1129 def join(self, f):
1127 return self.pathsep + self.encode(f)
1130 return self.pathsep + self.encode(f)
1128
1131
1129 def getsize(self, path):
1132 def getsize(self, path):
1130 return self.rawvfs.stat(path).st_size
1133 return self.rawvfs.stat(path).st_size
1131
1134
1132 def data_entries(
1135 def data_entries(
1133 self, matcher=None, undecodable=None
1136 self, matcher=None, undecodable=None
1134 ) -> Generator[BaseStoreEntry, None, None]:
1137 ) -> Generator[BaseStoreEntry, None, None]:
1135 files = ((f, revlog_type(f)) for f in self.fncache)
1138 files = ((f, revlog_type(f)) for f in self.fncache)
1136 # Note: all files in fncache should be revlog related, However the
1139 # Note: all files in fncache should be revlog related, However the
1137 # fncache might contains such file added by previous version of
1140 # fncache might contains such file added by previous version of
1138 # Mercurial.
1141 # Mercurial.
1139 files = (f for f in files if f[1] is not None)
1142 files = (f for f in files if f[1] is not None)
1140 by_revlog = _gather_revlog(files)
1143 by_revlog = _gather_revlog(files)
1141 for revlog, details in by_revlog:
1144 for revlog, details in by_revlog:
1142 file_details = {}
1145 file_details = {}
1143 if revlog.startswith(b'data/'):
1146 if revlog.startswith(b'data/'):
1144 rl_type = FILEFLAGS_FILELOG
1147 rl_type = FILEFLAGS_FILELOG
1145 revlog_target_id = revlog.split(b'/', 1)[1]
1148 revlog_target_id = revlog.split(b'/', 1)[1]
1146 elif revlog.startswith(b'meta/'):
1149 elif revlog.startswith(b'meta/'):
1147 rl_type = FILEFLAGS_MANIFESTLOG
1150 rl_type = FILEFLAGS_MANIFESTLOG
1148 # drop the initial directory and the `00manifest` file part
1151 # drop the initial directory and the `00manifest` file part
1149 tmp = revlog.split(b'/', 1)[1]
1152 tmp = revlog.split(b'/', 1)[1]
1150 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1153 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1151 else:
1154 else:
1152 # unreachable
1155 # unreachable
1153 assert False, revlog
1156 assert False, revlog
1154 for ext, t in details.items():
1157 for ext, t in details.items():
1155 file_details[ext] = {
1158 file_details[ext] = {
1156 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1159 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1157 }
1160 }
1158 entry = RevlogStoreEntry(
1161 entry = RevlogStoreEntry(
1159 path_prefix=revlog,
1162 path_prefix=revlog,
1160 revlog_type=rl_type,
1163 revlog_type=rl_type,
1161 target_id=revlog_target_id,
1164 target_id=revlog_target_id,
1162 details=file_details,
1165 details=file_details,
1163 )
1166 )
1164 if _match_tracked_entry(entry, matcher):
1167 if _match_tracked_entry(entry, matcher):
1165 yield entry
1168 yield entry
1166
1169
1167 def copylist(self):
1170 def copylist(self):
1168 d = (
1171 d = (
1169 b'bookmarks',
1172 b'bookmarks',
1170 b'narrowspec',
1173 b'narrowspec',
1171 b'data',
1174 b'data',
1172 b'meta',
1175 b'meta',
1173 b'dh',
1176 b'dh',
1174 b'fncache',
1177 b'fncache',
1175 b'phaseroots',
1178 b'phaseroots',
1176 b'obsstore',
1179 b'obsstore',
1177 b'00manifest.d',
1180 b'00manifest.d',
1178 b'00manifest.i',
1181 b'00manifest.i',
1179 b'00changelog.d',
1182 b'00changelog.d',
1180 b'00changelog.i',
1183 b'00changelog.i',
1181 b'requires',
1184 b'requires',
1182 )
1185 )
1183 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1186 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1184
1187
1185 def write(self, tr):
1188 def write(self, tr):
1186 self.fncache.write(tr)
1189 self.fncache.write(tr)
1187
1190
1188 def invalidatecaches(self):
1191 def invalidatecaches(self):
1189 self.fncache.entries = None
1192 self.fncache.entries = None
1190 self.fncache.addls = set()
1193 self.fncache.addls = set()
1191
1194
1192 def markremoved(self, fn):
1195 def markremoved(self, fn):
1193 self.fncache.remove(fn)
1196 self.fncache.remove(fn)
1194
1197
1195 def _exists(self, f):
1198 def _exists(self, f):
1196 ef = self.encode(f)
1199 ef = self.encode(f)
1197 try:
1200 try:
1198 self.getsize(ef)
1201 self.getsize(ef)
1199 return True
1202 return True
1200 except FileNotFoundError:
1203 except FileNotFoundError:
1201 return False
1204 return False
1202
1205
1203 def __contains__(self, path):
1206 def __contains__(self, path):
1204 '''Checks if the store contains path'''
1207 '''Checks if the store contains path'''
1205 path = b"/".join((b"data", path))
1208 path = b"/".join((b"data", path))
1206 # check for files (exact match)
1209 # check for files (exact match)
1207 e = path + b'.i'
1210 e = path + b'.i'
1208 if e in self.fncache and self._exists(e):
1211 if e in self.fncache and self._exists(e):
1209 return True
1212 return True
1210 # now check for directories (prefix match)
1213 # now check for directories (prefix match)
1211 if not path.endswith(b'/'):
1214 if not path.endswith(b'/'):
1212 path += b'/'
1215 path += b'/'
1213 for e in self.fncache:
1216 for e in self.fncache:
1214 if e.startswith(path) and self._exists(e):
1217 if e.startswith(path) and self._exists(e):
1215 return True
1218 return True
1216 return False
1219 return False
@@ -1,150 +1,175 b''
1 Test stream cloning while a revlog split happens
1 Test stream cloning while a revlog split happens
2 ------------------------------------------------
2 ------------------------------------------------
3
3
4 #testcases stream-bundle2-v2 stream-bundle2-v3
4 #testcases stream-bundle2-v2 stream-bundle2-v3
5
5
6 #if stream-bundle2-v3
6 #if stream-bundle2-v3
7 $ cat << EOF >> $HGRCPATH
7 $ cat << EOF >> $HGRCPATH
8 > [experimental]
8 > [experimental]
9 > stream-v3 = yes
9 > stream-v3 = yes
10 > EOF
10 > EOF
11 #endif
11 #endif
12
12
13 setup a repository for tests
13 setup a repository for tests
14 ----------------------------
14 ----------------------------
15
15
16 $ cat >> $HGRCPATH << EOF
16 $ cat >> $HGRCPATH << EOF
17 > [format]
17 > [format]
18 > # skip compression to make it easy to trigger a split
18 > # skip compression to make it easy to trigger a split
19 > revlog-compression=none
19 > revlog-compression=none
20 > [phases]
21 > publish=no
20 > EOF
22 > EOF
21
23
22 $ hg init server
24 $ hg init server
23 $ cd server
25 $ cd server
24 $ file="some-file"
26 $ file="some-file"
25 $ printf '%20d' '1' > $file
27 $ printf '%20d' '1' > $file
26 $ hg commit -Aqma
28 $ hg commit -Aqma
27 $ printf '%1024d' '1' > $file
29 $ printf '%1024d' '1' > $file
28 $ hg commit -Aqmb
30 $ hg commit -Aqmb
29 $ printf '%20d' '1' > $file
31 $ printf '%20d' '1' > $file
30 $ hg commit -Aqmc
32 $ hg commit -Aqmc
31
33
32 check the revlog is inline
34 check the revlog is inline
33
35
34 $ f -s .hg/store/data/some-file*
36 $ f -s .hg/store/data/some-file*
35 .hg/store/data/some-file.i: size=1259
37 .hg/store/data/some-file.i: size=1259
36 $ hg debug-revlog-index some-file
38 $ hg debug-revlog-index some-file
37 rev linkrev nodeid p1-nodeid p2-nodeid
39 rev linkrev nodeid p1-nodeid p2-nodeid
38 0 0 ed70cecbc103 000000000000 000000000000
40 0 0 ed70cecbc103 000000000000 000000000000
39 1 1 7241018db64c ed70cecbc103 000000000000
41 1 1 7241018db64c ed70cecbc103 000000000000
40 2 2 fa1120531cc1 7241018db64c 000000000000
42 2 2 fa1120531cc1 7241018db64c 000000000000
41 $ cd ..
43 $ cd ..
42
44
43 setup synchronisation file
45 setup synchronisation file
44
46
45 $ HG_TEST_STREAM_WALKED_FILE_1="$TESTTMP/sync_file_walked_1"
47 $ HG_TEST_STREAM_WALKED_FILE_1="$TESTTMP/sync_file_walked_1"
46 $ export HG_TEST_STREAM_WALKED_FILE_1
48 $ export HG_TEST_STREAM_WALKED_FILE_1
47 $ HG_TEST_STREAM_WALKED_FILE_2="$TESTTMP/sync_file_walked_2"
49 $ HG_TEST_STREAM_WALKED_FILE_2="$TESTTMP/sync_file_walked_2"
48 $ export HG_TEST_STREAM_WALKED_FILE_2
50 $ export HG_TEST_STREAM_WALKED_FILE_2
49 $ HG_TEST_STREAM_WALKED_FILE_3="$TESTTMP/sync_file_walked_3"
51 $ HG_TEST_STREAM_WALKED_FILE_3="$TESTTMP/sync_file_walked_3"
50 $ export HG_TEST_STREAM_WALKED_FILE_3
52 $ export HG_TEST_STREAM_WALKED_FILE_3
51
53
52
54
53 Test stream-clone raced by a revlog-split
55 Test stream-clone raced by a revlog-split
54 =========================================
56 =========================================
55
57
56 Test stream-clone where the file is split right after the lock section is done
58 Test stream-clone where the file is split right after the lock section is done
57
59
58 Start the server
60 Start the server
59
61
60 $ hg serve -R server \
62 $ hg serve -R server \
61 > -p $HGPORT1 -d --error errors.log --pid-file=hg.pid \
63 > -p $HGPORT1 -d --error errors.log --pid-file=hg.pid \
62 > --config extensions.stream_steps="$RUNTESTDIR/testlib/ext-stream-clone-steps.py"
64 > --config extensions.stream_steps="$RUNTESTDIR/testlib/ext-stream-clone-steps.py"
63 $ cat hg.pid >> $DAEMON_PIDS
65 $ cat hg.pid >> $DAEMON_PIDS
64
66
65 Start a client doing a streaming clone
67 Start a client doing a streaming clone
66
68
67 $ (hg clone -q --stream -U http://localhost:$HGPORT1 clone-while-split > client.log 2>&1; touch "$HG_TEST_STREAM_WALKED_FILE_3") &
69 $ ( \
70 > hg clone --debug --stream -U http://localhost:$HGPORT1 \
71 > clone-while-split > client.log 2>&1; \
72 > touch "$HG_TEST_STREAM_WALKED_FILE_3" \
73 > ) &
68
74
69 Wait for the server to be done collecting data
75 Wait for the server to be done collecting data
70
76
71 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_1
77 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_1
72
78
73 trigger a split
79 trigger a split
74
80
75 $ dd if=/dev/zero of=server/$file bs=1k count=128 > /dev/null 2>&1
81 $ dd if=/dev/zero of=server/$file bs=1k count=128 > /dev/null 2>&1
76 $ hg -R server ci -m "triggering a split" --config ui.timeout.warn=-1
82 $ hg -R server ci -m "triggering a split" --config ui.timeout.warn=-1
77
83
78 unlock the stream generation
84 unlock the stream generation
79
85
80 $ touch $HG_TEST_STREAM_WALKED_FILE_2
86 $ touch $HG_TEST_STREAM_WALKED_FILE_2
81
87
82 wait for the client to be done cloning.
88 wait for the client to be done cloning.
83
89
84 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_3
90 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_3
85
91
86 Check everything is fine
92 Check everything is fine
87
93
88 $ cat client.log
94 $ cat client.log
89 remote: abort: unexpected error: expected 0 bytes but 1067 provided for data/some-file.d (known-bad-output !)
95 using http://localhost:$HGPORT1/
90 abort: pull failed on remote (known-bad-output !)
96 sending capabilities command
97 query 1; heads
98 sending batch command
99 streaming all changes
100 sending getbundle command
101 bundle2-input-bundle: with-transaction
102 bundle2-input-part: "stream2" (params: 3 mandatory) supported (stream-bundle2-v2 !)
103 bundle2-input-part: "stream3-exp" (params: 3 mandatory) supported (stream-bundle2-v3 !)
104 applying stream bundle
105 7 files to transfer, 2.11 KB of data
106 adding [s] data/some-file.i (1.23 KB)
107 adding [s] phaseroots (43 bytes)
108 adding [s] 00manifest.i (348 bytes)
109 adding [s] 00changelog.i (381 bytes)
110 adding [c] branch2-served (94 bytes)
111 adding [c] rbc-names-v1 (7 bytes)
112 adding [c] rbc-revs-v1 (24 bytes)
113 updating the branch cache
114 transferred 2.11 KB in * seconds (* */sec) (glob)
115 bundle2-input-part: total payload size 2268
116 bundle2-input-part: "listkeys" (params: 1 mandatory) supported
117 bundle2-input-bundle: 2 parts total
118 checking for updated bookmarks
119 updating the branch cache
120 (sent 3 HTTP requests and * bytes; received * bytes in responses) (glob)
91 $ tail -2 errors.log
121 $ tail -2 errors.log
92 mercurial.error.Abort: expected 0 bytes but 1067 provided for data/some-file.d (known-bad-output !)
93 (known-bad-output !)
94 $ hg -R clone-while-split verify
122 $ hg -R clone-while-split verify
95 checking changesets (missing-correct-output !)
123 checking changesets
96 checking manifests (missing-correct-output !)
124 checking manifests
97 crosschecking files in changesets and manifests (missing-correct-output !)
125 crosschecking files in changesets and manifests
98 checking files (missing-correct-output !)
126 checking files
99 checking dirstate (missing-correct-output !)
127 checking dirstate
100 checked 3 changesets with 3 changes to 1 files (missing-correct-output !)
128 checked 3 changesets with 3 changes to 1 files
101 abort: repository clone-while-split not found (known-bad-output !)
102 [255]
103 $ hg -R clone-while-split tip
129 $ hg -R clone-while-split tip
104 changeset: 2:dbd9854c38a6 (missing-correct-output !)
130 changeset: 2:dbd9854c38a6
105 tag: tip (missing-correct-output !)
131 tag: tip
106 user: test (missing-correct-output !)
132 user: test
107 date: Thu Jan 01 00:00:00 1970 +0000 (missing-correct-output !)
133 date: Thu Jan 01 00:00:00 1970 +0000
108 summary: c (missing-correct-output !)
134 summary: c
109 (missing-correct-output !)
135
110 abort: repository clone-while-split not found (known-bad-output !)
111 [255]
112 $ hg -R clone-while-split debug-revlog-index some-file
136 $ hg -R clone-while-split debug-revlog-index some-file
113 rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !)
137 rev linkrev nodeid p1-nodeid p2-nodeid
114 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !)
138 0 0 ed70cecbc103 000000000000 000000000000
115 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !)
139 1 1 7241018db64c ed70cecbc103 000000000000
116 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !)
140 2 2 fa1120531cc1 7241018db64c 000000000000
117 abort: repository clone-while-split not found (known-bad-output !)
141 $ hg -R server phase --rev 'all()'
118 [255]
142 0: draft
143 1: draft
144 2: draft
145 3: draft
146 $ hg -R clone-while-split phase --rev 'all()'
147 0: draft
148 1: draft
149 2: draft
119
150
120 subsequent pull work
151 subsequent pull work
121
152
122 $ hg -R clone-while-split pull
153 $ hg -R clone-while-split pull
123 pulling from http://localhost:$HGPORT1/ (missing-correct-output !)
154 pulling from http://localhost:$HGPORT1/
124 searching for changes (missing-correct-output !)
155 searching for changes
125 adding changesets (missing-correct-output !)
156 adding changesets
126 adding manifests (missing-correct-output !)
157 adding manifests
127 adding file changes (missing-correct-output !)
158 adding file changes
128 added 1 changesets with 1 changes to 1 files (missing-correct-output !)
159 added 1 changesets with 1 changes to 1 files
129 new changesets df05c6cb1406 (missing-correct-output !)
160 new changesets df05c6cb1406 (1 drafts)
130 (run 'hg update' to get a working copy) (missing-correct-output !)
161 (run 'hg update' to get a working copy)
131 abort: repository clone-while-split not found (known-bad-output !)
132 [255]
133
162
134 $ hg -R clone-while-split debug-revlog-index some-file
163 $ hg -R clone-while-split debug-revlog-index some-file
135 rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !)
164 rev linkrev nodeid p1-nodeid p2-nodeid
136 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !)
165 0 0 ed70cecbc103 000000000000 000000000000
137 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !)
166 1 1 7241018db64c ed70cecbc103 000000000000
138 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !)
167 2 2 fa1120531cc1 7241018db64c 000000000000
139 3 3 a631378adaa3 fa1120531cc1 000000000000 (missing-correct-output !)
168 3 3 a631378adaa3 fa1120531cc1 000000000000
140 abort: repository clone-while-split not found (known-bad-output !)
141 [255]
142 $ hg -R clone-while-split verify
169 $ hg -R clone-while-split verify
143 checking changesets (missing-correct-output !)
170 checking changesets
144 checking manifests (missing-correct-output !)
171 checking manifests
145 crosschecking files in changesets and manifests (missing-correct-output !)
172 crosschecking files in changesets and manifests
146 checking files (missing-correct-output !)
173 checking files
147 checking dirstate (missing-correct-output !)
174 checking dirstate
148 checked 4 changesets with 4 changes to 1 files (missing-correct-output !)
175 checked 4 changesets with 4 changes to 1 files
149 abort: repository clone-while-split not found (known-bad-output !)
150 [255]
General Comments 0
You need to be logged in to leave comments. Login now