##// END OF EJS Templates
stream-clone: implement decidated `get_streams` method for revlog...
marmoute -
r51533:9caa860d default
parent child Browse files
Show More
@@ -1,3410 +1,3478 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_NO,
43 DELTA_BASE_REUSE_TRY,
43 DELTA_BASE_REUSE_TRY,
44 ENTRY_RANK,
44 ENTRY_RANK,
45 FEATURES_BY_VERSION,
45 FEATURES_BY_VERSION,
46 FLAG_GENERALDELTA,
46 FLAG_GENERALDELTA,
47 FLAG_INLINE_DATA,
47 FLAG_INLINE_DATA,
48 INDEX_HEADER,
48 INDEX_HEADER,
49 KIND_CHANGELOG,
49 KIND_CHANGELOG,
50 KIND_FILELOG,
50 KIND_FILELOG,
51 RANK_UNKNOWN,
51 RANK_UNKNOWN,
52 REVLOGV0,
52 REVLOGV0,
53 REVLOGV1,
53 REVLOGV1,
54 REVLOGV1_FLAGS,
54 REVLOGV1_FLAGS,
55 REVLOGV2,
55 REVLOGV2,
56 REVLOGV2_FLAGS,
56 REVLOGV2_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
58 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_FORMAT,
59 REVLOG_DEFAULT_VERSION,
59 REVLOG_DEFAULT_VERSION,
60 SUPPORTED_FLAGS,
60 SUPPORTED_FLAGS,
61 )
61 )
62 from .revlogutils.flagutil import (
62 from .revlogutils.flagutil import (
63 REVIDX_DEFAULT_FLAGS,
63 REVIDX_DEFAULT_FLAGS,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 REVIDX_FLAGS_ORDER,
66 REVIDX_FLAGS_ORDER,
67 REVIDX_HASCOPIESINFO,
67 REVIDX_HASCOPIESINFO,
68 REVIDX_ISCENSORED,
68 REVIDX_ISCENSORED,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 )
70 )
71 from .thirdparty import attr
71 from .thirdparty import attr
72 from . import (
72 from . import (
73 ancestor,
73 ancestor,
74 dagop,
74 dagop,
75 error,
75 error,
76 mdiff,
76 mdiff,
77 policy,
77 policy,
78 pycompat,
78 pycompat,
79 revlogutils,
79 revlogutils,
80 templatefilters,
80 templatefilters,
81 util,
81 util,
82 )
82 )
83 from .interfaces import (
83 from .interfaces import (
84 repository,
84 repository,
85 util as interfaceutil,
85 util as interfaceutil,
86 )
86 )
87 from .revlogutils import (
87 from .revlogutils import (
88 deltas as deltautil,
88 deltas as deltautil,
89 docket as docketutil,
89 docket as docketutil,
90 flagutil,
90 flagutil,
91 nodemap as nodemaputil,
91 nodemap as nodemaputil,
92 randomaccessfile,
92 randomaccessfile,
93 revlogv0,
93 revlogv0,
94 rewrite,
94 rewrite,
95 sidedata as sidedatautil,
95 sidedata as sidedatautil,
96 )
96 )
97 from .utils import (
97 from .utils import (
98 storageutil,
98 storageutil,
99 stringutil,
99 stringutil,
100 )
100 )
101
101
102 # blanked usage of all the name to prevent pyflakes constraints
102 # blanked usage of all the name to prevent pyflakes constraints
103 # We need these name available in the module for extensions.
103 # We need these name available in the module for extensions.
104
104
105 REVLOGV0
105 REVLOGV0
106 REVLOGV1
106 REVLOGV1
107 REVLOGV2
107 REVLOGV2
108 CHANGELOGV2
108 CHANGELOGV2
109 FLAG_INLINE_DATA
109 FLAG_INLINE_DATA
110 FLAG_GENERALDELTA
110 FLAG_GENERALDELTA
111 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FLAGS
112 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_FORMAT
113 REVLOG_DEFAULT_VERSION
113 REVLOG_DEFAULT_VERSION
114 REVLOGV1_FLAGS
114 REVLOGV1_FLAGS
115 REVLOGV2_FLAGS
115 REVLOGV2_FLAGS
116 REVIDX_ISCENSORED
116 REVIDX_ISCENSORED
117 REVIDX_ELLIPSIS
117 REVIDX_ELLIPSIS
118 REVIDX_HASCOPIESINFO
118 REVIDX_HASCOPIESINFO
119 REVIDX_EXTSTORED
119 REVIDX_EXTSTORED
120 REVIDX_DEFAULT_FLAGS
120 REVIDX_DEFAULT_FLAGS
121 REVIDX_FLAGS_ORDER
121 REVIDX_FLAGS_ORDER
122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 REVIDX_RAWTEXT_CHANGING_FLAGS
123
123
124 parsers = policy.importmod('parsers')
124 parsers = policy.importmod('parsers')
125 rustancestor = policy.importrust('ancestor')
125 rustancestor = policy.importrust('ancestor')
126 rustdagop = policy.importrust('dagop')
126 rustdagop = policy.importrust('dagop')
127 rustrevlog = policy.importrust('revlog')
127 rustrevlog = policy.importrust('revlog')
128
128
129 # Aliased for performance.
129 # Aliased for performance.
130 _zlibdecompress = zlib.decompress
130 _zlibdecompress = zlib.decompress
131
131
132 # max size of inline data embedded into a revlog
132 # max size of inline data embedded into a revlog
133 _maxinline = 131072
133 _maxinline = 131072
134
134
135 # Flag processors for REVIDX_ELLIPSIS.
135 # Flag processors for REVIDX_ELLIPSIS.
136 def ellipsisreadprocessor(rl, text):
136 def ellipsisreadprocessor(rl, text):
137 return text, False
137 return text, False
138
138
139
139
140 def ellipsiswriteprocessor(rl, text):
140 def ellipsiswriteprocessor(rl, text):
141 return text, False
141 return text, False
142
142
143
143
144 def ellipsisrawprocessor(rl, text):
144 def ellipsisrawprocessor(rl, text):
145 return False
145 return False
146
146
147
147
148 ellipsisprocessor = (
148 ellipsisprocessor = (
149 ellipsisreadprocessor,
149 ellipsisreadprocessor,
150 ellipsiswriteprocessor,
150 ellipsiswriteprocessor,
151 ellipsisrawprocessor,
151 ellipsisrawprocessor,
152 )
152 )
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @interfaceutil.implementer(repository.irevisiondelta)
175 @interfaceutil.implementer(repository.irevisiondelta)
176 @attr.s(slots=True)
176 @attr.s(slots=True)
177 class revlogrevisiondelta:
177 class revlogrevisiondelta:
178 node = attr.ib()
178 node = attr.ib()
179 p1node = attr.ib()
179 p1node = attr.ib()
180 p2node = attr.ib()
180 p2node = attr.ib()
181 basenode = attr.ib()
181 basenode = attr.ib()
182 flags = attr.ib()
182 flags = attr.ib()
183 baserevisionsize = attr.ib()
183 baserevisionsize = attr.ib()
184 revision = attr.ib()
184 revision = attr.ib()
185 delta = attr.ib()
185 delta = attr.ib()
186 sidedata = attr.ib()
186 sidedata = attr.ib()
187 protocol_flags = attr.ib()
187 protocol_flags = attr.ib()
188 linknode = attr.ib(default=None)
188 linknode = attr.ib(default=None)
189
189
190
190
191 @interfaceutil.implementer(repository.iverifyproblem)
191 @interfaceutil.implementer(repository.iverifyproblem)
192 @attr.s(frozen=True)
192 @attr.s(frozen=True)
193 class revlogproblem:
193 class revlogproblem:
194 warning = attr.ib(default=None)
194 warning = attr.ib(default=None)
195 error = attr.ib(default=None)
195 error = attr.ib(default=None)
196 node = attr.ib(default=None)
196 node = attr.ib(default=None)
197
197
198
198
199 def parse_index_v1(data, inline):
199 def parse_index_v1(data, inline):
200 # call the C implementation to parse the index data
200 # call the C implementation to parse the index data
201 index, cache = parsers.parse_index2(data, inline)
201 index, cache = parsers.parse_index2(data, inline)
202 return index, cache
202 return index, cache
203
203
204
204
205 def parse_index_v2(data, inline):
205 def parse_index_v2(data, inline):
206 # call the C implementation to parse the index data
206 # call the C implementation to parse the index data
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 return index, cache
208 return index, cache
209
209
210
210
211 def parse_index_cl_v2(data, inline):
211 def parse_index_cl_v2(data, inline):
212 # call the C implementation to parse the index data
212 # call the C implementation to parse the index data
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 return index, cache
214 return index, cache
215
215
216
216
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218
218
219 def parse_index_v1_nodemap(data, inline):
219 def parse_index_v1_nodemap(data, inline):
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 else:
224 else:
225 parse_index_v1_nodemap = None
225 parse_index_v1_nodemap = None
226
226
227
227
228 def parse_index_v1_mixed(data, inline):
228 def parse_index_v1_mixed(data, inline):
229 index, cache = parse_index_v1(data, inline)
229 index, cache = parse_index_v1(data, inline)
230 return rustrevlog.MixedIndex(index), cache
230 return rustrevlog.MixedIndex(index), cache
231
231
232
232
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 # signed integer)
234 # signed integer)
235 _maxentrysize = 0x7FFFFFFF
235 _maxentrysize = 0x7FFFFFFF
236
236
237 FILE_TOO_SHORT_MSG = _(
237 FILE_TOO_SHORT_MSG = _(
238 b'cannot read from revlog %s;'
238 b'cannot read from revlog %s;'
239 b' expected %d bytes from offset %d, data size is %d'
239 b' expected %d bytes from offset %d, data size is %d'
240 )
240 )
241
241
242 hexdigits = b'0123456789abcdefABCDEF'
242 hexdigits = b'0123456789abcdefABCDEF'
243
243
244
244
245 class revlog:
245 class revlog:
246 """
246 """
247 the underlying revision storage object
247 the underlying revision storage object
248
248
249 A revlog consists of two parts, an index and the revision data.
249 A revlog consists of two parts, an index and the revision data.
250
250
251 The index is a file with a fixed record size containing
251 The index is a file with a fixed record size containing
252 information on each revision, including its nodeid (hash), the
252 information on each revision, including its nodeid (hash), the
253 nodeids of its parents, the position and offset of its data within
253 nodeids of its parents, the position and offset of its data within
254 the data file, and the revision it's based on. Finally, each entry
254 the data file, and the revision it's based on. Finally, each entry
255 contains a linkrev entry that can serve as a pointer to external
255 contains a linkrev entry that can serve as a pointer to external
256 data.
256 data.
257
257
258 The revision data itself is a linear collection of data chunks.
258 The revision data itself is a linear collection of data chunks.
259 Each chunk represents a revision and is usually represented as a
259 Each chunk represents a revision and is usually represented as a
260 delta against the previous chunk. To bound lookup time, runs of
260 delta against the previous chunk. To bound lookup time, runs of
261 deltas are limited to about 2 times the length of the original
261 deltas are limited to about 2 times the length of the original
262 version data. This makes retrieval of a version proportional to
262 version data. This makes retrieval of a version proportional to
263 its size, or O(1) relative to the number of revisions.
263 its size, or O(1) relative to the number of revisions.
264
264
265 Both pieces of the revlog are written to in an append-only
265 Both pieces of the revlog are written to in an append-only
266 fashion, which means we never need to rewrite a file to insert or
266 fashion, which means we never need to rewrite a file to insert or
267 remove data, and can use some simple techniques to avoid the need
267 remove data, and can use some simple techniques to avoid the need
268 for locking while reading.
268 for locking while reading.
269
269
270 If checkambig, indexfile is opened with checkambig=True at
270 If checkambig, indexfile is opened with checkambig=True at
271 writing, to avoid file stat ambiguity.
271 writing, to avoid file stat ambiguity.
272
272
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 index will be mmapped rather than read if it is larger than the
274 index will be mmapped rather than read if it is larger than the
275 configured threshold.
275 configured threshold.
276
276
277 If censorable is True, the revlog can have censored revisions.
277 If censorable is True, the revlog can have censored revisions.
278
278
279 If `upperboundcomp` is not None, this is the expected maximal gain from
279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 compression for the data content.
280 compression for the data content.
281
281
282 `concurrencychecker` is an optional function that receives 3 arguments: a
282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 file handle, a filename, and an expected position. It should check whether
283 file handle, a filename, and an expected position. It should check whether
284 the current position in the file handle is valid, and log/warn/fail (by
284 the current position in the file handle is valid, and log/warn/fail (by
285 raising).
285 raising).
286
286
287 See mercurial/revlogutils/contants.py for details about the content of an
287 See mercurial/revlogutils/contants.py for details about the content of an
288 index entry.
288 index entry.
289 """
289 """
290
290
291 _flagserrorclass = error.RevlogError
291 _flagserrorclass = error.RevlogError
292
292
293 def __init__(
293 def __init__(
294 self,
294 self,
295 opener,
295 opener,
296 target,
296 target,
297 radix,
297 radix,
298 postfix=None, # only exist for `tmpcensored` now
298 postfix=None, # only exist for `tmpcensored` now
299 checkambig=False,
299 checkambig=False,
300 mmaplargeindex=False,
300 mmaplargeindex=False,
301 censorable=False,
301 censorable=False,
302 upperboundcomp=None,
302 upperboundcomp=None,
303 persistentnodemap=False,
303 persistentnodemap=False,
304 concurrencychecker=None,
304 concurrencychecker=None,
305 trypending=False,
305 trypending=False,
306 try_split=False,
306 try_split=False,
307 canonical_parent_order=True,
307 canonical_parent_order=True,
308 ):
308 ):
309 """
309 """
310 create a revlog object
310 create a revlog object
311
311
312 opener is a function that abstracts the file opening operation
312 opener is a function that abstracts the file opening operation
313 and can be used to implement COW semantics or the like.
313 and can be used to implement COW semantics or the like.
314
314
315 `target`: a (KIND, ID) tuple that identify the content stored in
315 `target`: a (KIND, ID) tuple that identify the content stored in
316 this revlog. It help the rest of the code to understand what the revlog
316 this revlog. It help the rest of the code to understand what the revlog
317 is about without having to resort to heuristic and index filename
317 is about without having to resort to heuristic and index filename
318 analysis. Note: that this must be reliably be set by normal code, but
318 analysis. Note: that this must be reliably be set by normal code, but
319 that test, debug, or performance measurement code might not set this to
319 that test, debug, or performance measurement code might not set this to
320 accurate value.
320 accurate value.
321 """
321 """
322 self.upperboundcomp = upperboundcomp
322 self.upperboundcomp = upperboundcomp
323
323
324 self.radix = radix
324 self.radix = radix
325
325
326 self._docket_file = None
326 self._docket_file = None
327 self._indexfile = None
327 self._indexfile = None
328 self._datafile = None
328 self._datafile = None
329 self._sidedatafile = None
329 self._sidedatafile = None
330 self._nodemap_file = None
330 self._nodemap_file = None
331 self.postfix = postfix
331 self.postfix = postfix
332 self._trypending = trypending
332 self._trypending = trypending
333 self._try_split = try_split
333 self._try_split = try_split
334 self.opener = opener
334 self.opener = opener
335 if persistentnodemap:
335 if persistentnodemap:
336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
337
337
338 assert target[0] in ALL_KINDS
338 assert target[0] in ALL_KINDS
339 assert len(target) == 2
339 assert len(target) == 2
340 self.target = target
340 self.target = target
341 # When True, indexfile is opened with checkambig=True at writing, to
341 # When True, indexfile is opened with checkambig=True at writing, to
342 # avoid file stat ambiguity.
342 # avoid file stat ambiguity.
343 self._checkambig = checkambig
343 self._checkambig = checkambig
344 self._mmaplargeindex = mmaplargeindex
344 self._mmaplargeindex = mmaplargeindex
345 self._censorable = censorable
345 self._censorable = censorable
346 # 3-tuple of (node, rev, text) for a raw revision.
346 # 3-tuple of (node, rev, text) for a raw revision.
347 self._revisioncache = None
347 self._revisioncache = None
348 # Maps rev to chain base rev.
348 # Maps rev to chain base rev.
349 self._chainbasecache = util.lrucachedict(100)
349 self._chainbasecache = util.lrucachedict(100)
350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
351 self._chunkcache = (0, b'')
351 self._chunkcache = (0, b'')
352 # How much data to read and cache into the raw revlog data cache.
352 # How much data to read and cache into the raw revlog data cache.
353 self._chunkcachesize = 65536
353 self._chunkcachesize = 65536
354 self._maxchainlen = None
354 self._maxchainlen = None
355 self._deltabothparents = True
355 self._deltabothparents = True
356 self._candidate_group_chunk_size = 0
356 self._candidate_group_chunk_size = 0
357 self._debug_delta = False
357 self._debug_delta = False
358 self.index = None
358 self.index = None
359 self._docket = None
359 self._docket = None
360 self._nodemap_docket = None
360 self._nodemap_docket = None
361 # Mapping of partial identifiers to full nodes.
361 # Mapping of partial identifiers to full nodes.
362 self._pcache = {}
362 self._pcache = {}
363 # Mapping of revision integer to full node.
363 # Mapping of revision integer to full node.
364 self._compengine = b'zlib'
364 self._compengine = b'zlib'
365 self._compengineopts = {}
365 self._compengineopts = {}
366 self._maxdeltachainspan = -1
366 self._maxdeltachainspan = -1
367 self._withsparseread = False
367 self._withsparseread = False
368 self._sparserevlog = False
368 self._sparserevlog = False
369 self.hassidedata = False
369 self.hassidedata = False
370 self._srdensitythreshold = 0.50
370 self._srdensitythreshold = 0.50
371 self._srmingapsize = 262144
371 self._srmingapsize = 262144
372
372
373 # other optionnals features
373 # other optionnals features
374
374
375 # might remove rank configuration once the computation has no impact
375 # might remove rank configuration once the computation has no impact
376 self._compute_rank = False
376 self._compute_rank = False
377
377
378 # Make copy of flag processors so each revlog instance can support
378 # Make copy of flag processors so each revlog instance can support
379 # custom flags.
379 # custom flags.
380 self._flagprocessors = dict(flagutil.flagprocessors)
380 self._flagprocessors = dict(flagutil.flagprocessors)
381
381
382 # 3-tuple of file handles being used for active writing.
382 # 3-tuple of file handles being used for active writing.
383 self._writinghandles = None
383 self._writinghandles = None
384 # prevent nesting of addgroup
384 # prevent nesting of addgroup
385 self._adding_group = None
385 self._adding_group = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 self._concurrencychecker = concurrencychecker
389 self._concurrencychecker = concurrencychecker
390
390
391 # parent order is supposed to be semantically irrelevant, so we
391 # parent order is supposed to be semantically irrelevant, so we
392 # normally resort parents to ensure that the first parent is non-null,
392 # normally resort parents to ensure that the first parent is non-null,
393 # if there is a non-null parent at all.
393 # if there is a non-null parent at all.
394 # filelog abuses the parent order as flag to mark some instances of
394 # filelog abuses the parent order as flag to mark some instances of
395 # meta-encoded files, so allow it to disable this behavior.
395 # meta-encoded files, so allow it to disable this behavior.
396 self.canonical_parent_order = canonical_parent_order
396 self.canonical_parent_order = canonical_parent_order
397
397
398 def _init_opts(self):
398 def _init_opts(self):
399 """process options (from above/config) to setup associated default revlog mode
399 """process options (from above/config) to setup associated default revlog mode
400
400
401 These values might be affected when actually reading on disk information.
401 These values might be affected when actually reading on disk information.
402
402
403 The relevant values are returned for use in _loadindex().
403 The relevant values are returned for use in _loadindex().
404
404
405 * newversionflags:
405 * newversionflags:
406 version header to use if we need to create a new revlog
406 version header to use if we need to create a new revlog
407
407
408 * mmapindexthreshold:
408 * mmapindexthreshold:
409 minimal index size for start to use mmap
409 minimal index size for start to use mmap
410
410
411 * force_nodemap:
411 * force_nodemap:
412 force the usage of a "development" version of the nodemap code
412 force the usage of a "development" version of the nodemap code
413 """
413 """
414 mmapindexthreshold = None
414 mmapindexthreshold = None
415 opts = self.opener.options
415 opts = self.opener.options
416
416
417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
418 new_header = CHANGELOGV2
418 new_header = CHANGELOGV2
419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
420 elif b'revlogv2' in opts:
420 elif b'revlogv2' in opts:
421 new_header = REVLOGV2
421 new_header = REVLOGV2
422 elif b'revlogv1' in opts:
422 elif b'revlogv1' in opts:
423 new_header = REVLOGV1 | FLAG_INLINE_DATA
423 new_header = REVLOGV1 | FLAG_INLINE_DATA
424 if b'generaldelta' in opts:
424 if b'generaldelta' in opts:
425 new_header |= FLAG_GENERALDELTA
425 new_header |= FLAG_GENERALDELTA
426 elif b'revlogv0' in self.opener.options:
426 elif b'revlogv0' in self.opener.options:
427 new_header = REVLOGV0
427 new_header = REVLOGV0
428 else:
428 else:
429 new_header = REVLOG_DEFAULT_VERSION
429 new_header = REVLOG_DEFAULT_VERSION
430
430
431 if b'chunkcachesize' in opts:
431 if b'chunkcachesize' in opts:
432 self._chunkcachesize = opts[b'chunkcachesize']
432 self._chunkcachesize = opts[b'chunkcachesize']
433 if b'maxchainlen' in opts:
433 if b'maxchainlen' in opts:
434 self._maxchainlen = opts[b'maxchainlen']
434 self._maxchainlen = opts[b'maxchainlen']
435 if b'deltabothparents' in opts:
435 if b'deltabothparents' in opts:
436 self._deltabothparents = opts[b'deltabothparents']
436 self._deltabothparents = opts[b'deltabothparents']
437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
438 if dps_cgds:
438 if dps_cgds:
439 self._candidate_group_chunk_size = dps_cgds
439 self._candidate_group_chunk_size = dps_cgds
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 self._lazydeltabase = False
441 self._lazydeltabase = False
442 if self._lazydelta:
442 if self._lazydelta:
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 if b'debug-delta' in opts:
444 if b'debug-delta' in opts:
445 self._debug_delta = opts[b'debug-delta']
445 self._debug_delta = opts[b'debug-delta']
446 if b'compengine' in opts:
446 if b'compengine' in opts:
447 self._compengine = opts[b'compengine']
447 self._compengine = opts[b'compengine']
448 if b'zlib.level' in opts:
448 if b'zlib.level' in opts:
449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
450 if b'zstd.level' in opts:
450 if b'zstd.level' in opts:
451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
452 if b'maxdeltachainspan' in opts:
452 if b'maxdeltachainspan' in opts:
453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
455 mmapindexthreshold = opts[b'mmapindexthreshold']
455 mmapindexthreshold = opts[b'mmapindexthreshold']
456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
457 withsparseread = bool(opts.get(b'with-sparse-read', False))
457 withsparseread = bool(opts.get(b'with-sparse-read', False))
458 # sparse-revlog forces sparse-read
458 # sparse-revlog forces sparse-read
459 self._withsparseread = self._sparserevlog or withsparseread
459 self._withsparseread = self._sparserevlog or withsparseread
460 if b'sparse-read-density-threshold' in opts:
460 if b'sparse-read-density-threshold' in opts:
461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
462 if b'sparse-read-min-gap-size' in opts:
462 if b'sparse-read-min-gap-size' in opts:
463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
464 if opts.get(b'enableellipsis'):
464 if opts.get(b'enableellipsis'):
465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
466
466
467 # revlog v0 doesn't have flag processors
467 # revlog v0 doesn't have flag processors
468 for flag, processor in opts.get(b'flagprocessors', {}).items():
468 for flag, processor in opts.get(b'flagprocessors', {}).items():
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470
470
471 if self._chunkcachesize <= 0:
471 if self._chunkcachesize <= 0:
472 raise error.RevlogError(
472 raise error.RevlogError(
473 _(b'revlog chunk cache size %r is not greater than 0')
473 _(b'revlog chunk cache size %r is not greater than 0')
474 % self._chunkcachesize
474 % self._chunkcachesize
475 )
475 )
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 raise error.RevlogError(
477 raise error.RevlogError(
478 _(b'revlog chunk cache size %r is not a power of 2')
478 _(b'revlog chunk cache size %r is not a power of 2')
479 % self._chunkcachesize
479 % self._chunkcachesize
480 )
480 )
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 return new_header, mmapindexthreshold, force_nodemap
482 return new_header, mmapindexthreshold, force_nodemap
483
483
484 def _get_data(self, filepath, mmap_threshold, size=None):
484 def _get_data(self, filepath, mmap_threshold, size=None):
485 """return a file content with or without mmap
485 """return a file content with or without mmap
486
486
487 If the file is missing return the empty string"""
487 If the file is missing return the empty string"""
488 try:
488 try:
489 with self.opener(filepath) as fp:
489 with self.opener(filepath) as fp:
490 if mmap_threshold is not None:
490 if mmap_threshold is not None:
491 file_size = self.opener.fstat(fp).st_size
491 file_size = self.opener.fstat(fp).st_size
492 if file_size >= mmap_threshold:
492 if file_size >= mmap_threshold:
493 if size is not None:
493 if size is not None:
494 # avoid potentiel mmap crash
494 # avoid potentiel mmap crash
495 size = min(file_size, size)
495 size = min(file_size, size)
496 # TODO: should .close() to release resources without
496 # TODO: should .close() to release resources without
497 # relying on Python GC
497 # relying on Python GC
498 if size is None:
498 if size is None:
499 return util.buffer(util.mmapread(fp))
499 return util.buffer(util.mmapread(fp))
500 else:
500 else:
501 return util.buffer(util.mmapread(fp, size))
501 return util.buffer(util.mmapread(fp, size))
502 if size is None:
502 if size is None:
503 return fp.read()
503 return fp.read()
504 else:
504 else:
505 return fp.read(size)
505 return fp.read(size)
506 except FileNotFoundError:
506 except FileNotFoundError:
507 return b''
507 return b''
508
508
509 def get_streams(self, max_linkrev):
510 n = len(self)
511 index = self.index
512 while n > 0:
513 linkrev = index[n - 1][4]
514 if linkrev < max_linkrev:
515 break
516 # note: this loop will rarely go through multiple iterations, since
517 # it only traverses commits created during the current streaming
518 # pull operation.
519 #
520 # If this become a problem, using a binary search should cap the
521 # runtime of this.
522 n = n - 1
523 if n == 0:
524 # no data to send
525 return []
526 index_size = n * index.entry_size
527 data_size = self.end(n - 1)
528
529 # XXX we might have been split (or stripped) since the object
530 # initialization, We need to close this race too, but having a way to
531 # pre-open the file we feed to the revlog and never closing them before
532 # we are done streaming.
533
534 if self._inline:
535
536 def get_stream():
537 with self._indexfp() as fp:
538 yield None
539 size = index_size + data_size
540 if size <= 65536:
541 yield fp.read(size)
542 else:
543 yield from util.filechunkiter(fp, limit=size)
544
545 inline_stream = get_stream()
546 next(inline_stream)
547 return [
548 (self._indexfile, inline_stream, index_size + data_size),
549 ]
550 else:
551
552 def get_index_stream():
553 with self._indexfp() as fp:
554 yield None
555 if index_size <= 65536:
556 yield fp.read(index_size)
557 else:
558 yield from util.filechunkiter(fp, limit=index_size)
559
560 def get_data_stream():
561 with self._datafp() as fp:
562 yield None
563 if data_size <= 65536:
564 yield fp.read(data_size)
565 else:
566 yield from util.filechunkiter(fp, limit=data_size)
567
568 index_stream = get_index_stream()
569 next(index_stream)
570 data_stream = get_data_stream()
571 next(data_stream)
572 return [
573 (self._datafile, data_stream, data_size),
574 (self._indexfile, index_stream, index_size),
575 ]
576
509 def _loadindex(self, docket=None):
577 def _loadindex(self, docket=None):
510
578
511 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
579 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
512
580
513 if self.postfix is not None:
581 if self.postfix is not None:
514 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
582 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
515 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
583 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
516 entry_point = b'%s.i.a' % self.radix
584 entry_point = b'%s.i.a' % self.radix
517 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
585 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
518 entry_point = b'%s.i.s' % self.radix
586 entry_point = b'%s.i.s' % self.radix
519 else:
587 else:
520 entry_point = b'%s.i' % self.radix
588 entry_point = b'%s.i' % self.radix
521
589
522 if docket is not None:
590 if docket is not None:
523 self._docket = docket
591 self._docket = docket
524 self._docket_file = entry_point
592 self._docket_file = entry_point
525 else:
593 else:
526 self._initempty = True
594 self._initempty = True
527 entry_data = self._get_data(entry_point, mmapindexthreshold)
595 entry_data = self._get_data(entry_point, mmapindexthreshold)
528 if len(entry_data) > 0:
596 if len(entry_data) > 0:
529 header = INDEX_HEADER.unpack(entry_data[:4])[0]
597 header = INDEX_HEADER.unpack(entry_data[:4])[0]
530 self._initempty = False
598 self._initempty = False
531 else:
599 else:
532 header = new_header
600 header = new_header
533
601
534 self._format_flags = header & ~0xFFFF
602 self._format_flags = header & ~0xFFFF
535 self._format_version = header & 0xFFFF
603 self._format_version = header & 0xFFFF
536
604
537 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
605 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
538 if supported_flags is None:
606 if supported_flags is None:
539 msg = _(b'unknown version (%d) in revlog %s')
607 msg = _(b'unknown version (%d) in revlog %s')
540 msg %= (self._format_version, self.display_id)
608 msg %= (self._format_version, self.display_id)
541 raise error.RevlogError(msg)
609 raise error.RevlogError(msg)
542 elif self._format_flags & ~supported_flags:
610 elif self._format_flags & ~supported_flags:
543 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
611 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
544 display_flag = self._format_flags >> 16
612 display_flag = self._format_flags >> 16
545 msg %= (display_flag, self._format_version, self.display_id)
613 msg %= (display_flag, self._format_version, self.display_id)
546 raise error.RevlogError(msg)
614 raise error.RevlogError(msg)
547
615
548 features = FEATURES_BY_VERSION[self._format_version]
616 features = FEATURES_BY_VERSION[self._format_version]
549 self._inline = features[b'inline'](self._format_flags)
617 self._inline = features[b'inline'](self._format_flags)
550 self._generaldelta = features[b'generaldelta'](self._format_flags)
618 self._generaldelta = features[b'generaldelta'](self._format_flags)
551 self.hassidedata = features[b'sidedata']
619 self.hassidedata = features[b'sidedata']
552
620
553 if not features[b'docket']:
621 if not features[b'docket']:
554 self._indexfile = entry_point
622 self._indexfile = entry_point
555 index_data = entry_data
623 index_data = entry_data
556 else:
624 else:
557 self._docket_file = entry_point
625 self._docket_file = entry_point
558 if self._initempty:
626 if self._initempty:
559 self._docket = docketutil.default_docket(self, header)
627 self._docket = docketutil.default_docket(self, header)
560 else:
628 else:
561 self._docket = docketutil.parse_docket(
629 self._docket = docketutil.parse_docket(
562 self, entry_data, use_pending=self._trypending
630 self, entry_data, use_pending=self._trypending
563 )
631 )
564
632
565 if self._docket is not None:
633 if self._docket is not None:
566 self._indexfile = self._docket.index_filepath()
634 self._indexfile = self._docket.index_filepath()
567 index_data = b''
635 index_data = b''
568 index_size = self._docket.index_end
636 index_size = self._docket.index_end
569 if index_size > 0:
637 if index_size > 0:
570 index_data = self._get_data(
638 index_data = self._get_data(
571 self._indexfile, mmapindexthreshold, size=index_size
639 self._indexfile, mmapindexthreshold, size=index_size
572 )
640 )
573 if len(index_data) < index_size:
641 if len(index_data) < index_size:
574 msg = _(b'too few index data for %s: got %d, expected %d')
642 msg = _(b'too few index data for %s: got %d, expected %d')
575 msg %= (self.display_id, len(index_data), index_size)
643 msg %= (self.display_id, len(index_data), index_size)
576 raise error.RevlogError(msg)
644 raise error.RevlogError(msg)
577
645
578 self._inline = False
646 self._inline = False
579 # generaldelta implied by version 2 revlogs.
647 # generaldelta implied by version 2 revlogs.
580 self._generaldelta = True
648 self._generaldelta = True
581 # the logic for persistent nodemap will be dealt with within the
649 # the logic for persistent nodemap will be dealt with within the
582 # main docket, so disable it for now.
650 # main docket, so disable it for now.
583 self._nodemap_file = None
651 self._nodemap_file = None
584
652
585 if self._docket is not None:
653 if self._docket is not None:
586 self._datafile = self._docket.data_filepath()
654 self._datafile = self._docket.data_filepath()
587 self._sidedatafile = self._docket.sidedata_filepath()
655 self._sidedatafile = self._docket.sidedata_filepath()
588 elif self.postfix is None:
656 elif self.postfix is None:
589 self._datafile = b'%s.d' % self.radix
657 self._datafile = b'%s.d' % self.radix
590 else:
658 else:
591 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
659 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
592
660
593 self.nodeconstants = sha1nodeconstants
661 self.nodeconstants = sha1nodeconstants
594 self.nullid = self.nodeconstants.nullid
662 self.nullid = self.nodeconstants.nullid
595
663
596 # sparse-revlog can't be on without general-delta (issue6056)
664 # sparse-revlog can't be on without general-delta (issue6056)
597 if not self._generaldelta:
665 if not self._generaldelta:
598 self._sparserevlog = False
666 self._sparserevlog = False
599
667
600 self._storedeltachains = True
668 self._storedeltachains = True
601
669
602 devel_nodemap = (
670 devel_nodemap = (
603 self._nodemap_file
671 self._nodemap_file
604 and force_nodemap
672 and force_nodemap
605 and parse_index_v1_nodemap is not None
673 and parse_index_v1_nodemap is not None
606 )
674 )
607
675
608 use_rust_index = False
676 use_rust_index = False
609 if rustrevlog is not None:
677 if rustrevlog is not None:
610 if self._nodemap_file is not None:
678 if self._nodemap_file is not None:
611 use_rust_index = True
679 use_rust_index = True
612 else:
680 else:
613 use_rust_index = self.opener.options.get(b'rust.index')
681 use_rust_index = self.opener.options.get(b'rust.index')
614
682
615 self._parse_index = parse_index_v1
683 self._parse_index = parse_index_v1
616 if self._format_version == REVLOGV0:
684 if self._format_version == REVLOGV0:
617 self._parse_index = revlogv0.parse_index_v0
685 self._parse_index = revlogv0.parse_index_v0
618 elif self._format_version == REVLOGV2:
686 elif self._format_version == REVLOGV2:
619 self._parse_index = parse_index_v2
687 self._parse_index = parse_index_v2
620 elif self._format_version == CHANGELOGV2:
688 elif self._format_version == CHANGELOGV2:
621 self._parse_index = parse_index_cl_v2
689 self._parse_index = parse_index_cl_v2
622 elif devel_nodemap:
690 elif devel_nodemap:
623 self._parse_index = parse_index_v1_nodemap
691 self._parse_index = parse_index_v1_nodemap
624 elif use_rust_index:
692 elif use_rust_index:
625 self._parse_index = parse_index_v1_mixed
693 self._parse_index = parse_index_v1_mixed
626 try:
694 try:
627 d = self._parse_index(index_data, self._inline)
695 d = self._parse_index(index_data, self._inline)
628 index, chunkcache = d
696 index, chunkcache = d
629 use_nodemap = (
697 use_nodemap = (
630 not self._inline
698 not self._inline
631 and self._nodemap_file is not None
699 and self._nodemap_file is not None
632 and util.safehasattr(index, 'update_nodemap_data')
700 and util.safehasattr(index, 'update_nodemap_data')
633 )
701 )
634 if use_nodemap:
702 if use_nodemap:
635 nodemap_data = nodemaputil.persisted_data(self)
703 nodemap_data = nodemaputil.persisted_data(self)
636 if nodemap_data is not None:
704 if nodemap_data is not None:
637 docket = nodemap_data[0]
705 docket = nodemap_data[0]
638 if (
706 if (
639 len(d[0]) > docket.tip_rev
707 len(d[0]) > docket.tip_rev
640 and d[0][docket.tip_rev][7] == docket.tip_node
708 and d[0][docket.tip_rev][7] == docket.tip_node
641 ):
709 ):
642 # no changelog tampering
710 # no changelog tampering
643 self._nodemap_docket = docket
711 self._nodemap_docket = docket
644 index.update_nodemap_data(*nodemap_data)
712 index.update_nodemap_data(*nodemap_data)
645 except (ValueError, IndexError):
713 except (ValueError, IndexError):
646 raise error.RevlogError(
714 raise error.RevlogError(
647 _(b"index %s is corrupted") % self.display_id
715 _(b"index %s is corrupted") % self.display_id
648 )
716 )
649 self.index = index
717 self.index = index
650 self._segmentfile = randomaccessfile.randomaccessfile(
718 self._segmentfile = randomaccessfile.randomaccessfile(
651 self.opener,
719 self.opener,
652 (self._indexfile if self._inline else self._datafile),
720 (self._indexfile if self._inline else self._datafile),
653 self._chunkcachesize,
721 self._chunkcachesize,
654 chunkcache,
722 chunkcache,
655 )
723 )
656 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
724 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
657 self.opener,
725 self.opener,
658 self._sidedatafile,
726 self._sidedatafile,
659 self._chunkcachesize,
727 self._chunkcachesize,
660 )
728 )
661 # revnum -> (chain-length, sum-delta-length)
729 # revnum -> (chain-length, sum-delta-length)
662 self._chaininfocache = util.lrucachedict(500)
730 self._chaininfocache = util.lrucachedict(500)
663 # revlog header -> revlog compressor
731 # revlog header -> revlog compressor
664 self._decompressors = {}
732 self._decompressors = {}
665
733
666 def get_revlog(self):
734 def get_revlog(self):
667 """simple function to mirror API of other not-really-revlog API"""
735 """simple function to mirror API of other not-really-revlog API"""
668 return self
736 return self
669
737
670 @util.propertycache
738 @util.propertycache
671 def revlog_kind(self):
739 def revlog_kind(self):
672 return self.target[0]
740 return self.target[0]
673
741
674 @util.propertycache
742 @util.propertycache
675 def display_id(self):
743 def display_id(self):
676 """The public facing "ID" of the revlog that we use in message"""
744 """The public facing "ID" of the revlog that we use in message"""
677 if self.revlog_kind == KIND_FILELOG:
745 if self.revlog_kind == KIND_FILELOG:
678 # Reference the file without the "data/" prefix, so it is familiar
746 # Reference the file without the "data/" prefix, so it is familiar
679 # to the user.
747 # to the user.
680 return self.target[1]
748 return self.target[1]
681 else:
749 else:
682 return self.radix
750 return self.radix
683
751
684 def _get_decompressor(self, t):
752 def _get_decompressor(self, t):
685 try:
753 try:
686 compressor = self._decompressors[t]
754 compressor = self._decompressors[t]
687 except KeyError:
755 except KeyError:
688 try:
756 try:
689 engine = util.compengines.forrevlogheader(t)
757 engine = util.compengines.forrevlogheader(t)
690 compressor = engine.revlogcompressor(self._compengineopts)
758 compressor = engine.revlogcompressor(self._compengineopts)
691 self._decompressors[t] = compressor
759 self._decompressors[t] = compressor
692 except KeyError:
760 except KeyError:
693 raise error.RevlogError(
761 raise error.RevlogError(
694 _(b'unknown compression type %s') % binascii.hexlify(t)
762 _(b'unknown compression type %s') % binascii.hexlify(t)
695 )
763 )
696 return compressor
764 return compressor
697
765
698 @util.propertycache
766 @util.propertycache
699 def _compressor(self):
767 def _compressor(self):
700 engine = util.compengines[self._compengine]
768 engine = util.compengines[self._compengine]
701 return engine.revlogcompressor(self._compengineopts)
769 return engine.revlogcompressor(self._compengineopts)
702
770
703 @util.propertycache
771 @util.propertycache
704 def _decompressor(self):
772 def _decompressor(self):
705 """the default decompressor"""
773 """the default decompressor"""
706 if self._docket is None:
774 if self._docket is None:
707 return None
775 return None
708 t = self._docket.default_compression_header
776 t = self._docket.default_compression_header
709 c = self._get_decompressor(t)
777 c = self._get_decompressor(t)
710 return c.decompress
778 return c.decompress
711
779
712 def _indexfp(self):
780 def _indexfp(self):
713 """file object for the revlog's index file"""
781 """file object for the revlog's index file"""
714 return self.opener(self._indexfile, mode=b"r")
782 return self.opener(self._indexfile, mode=b"r")
715
783
716 def __index_write_fp(self):
784 def __index_write_fp(self):
717 # You should not use this directly and use `_writing` instead
785 # You should not use this directly and use `_writing` instead
718 try:
786 try:
719 f = self.opener(
787 f = self.opener(
720 self._indexfile, mode=b"r+", checkambig=self._checkambig
788 self._indexfile, mode=b"r+", checkambig=self._checkambig
721 )
789 )
722 if self._docket is None:
790 if self._docket is None:
723 f.seek(0, os.SEEK_END)
791 f.seek(0, os.SEEK_END)
724 else:
792 else:
725 f.seek(self._docket.index_end, os.SEEK_SET)
793 f.seek(self._docket.index_end, os.SEEK_SET)
726 return f
794 return f
727 except FileNotFoundError:
795 except FileNotFoundError:
728 return self.opener(
796 return self.opener(
729 self._indexfile, mode=b"w+", checkambig=self._checkambig
797 self._indexfile, mode=b"w+", checkambig=self._checkambig
730 )
798 )
731
799
732 def __index_new_fp(self):
800 def __index_new_fp(self):
733 # You should not use this unless you are upgrading from inline revlog
801 # You should not use this unless you are upgrading from inline revlog
734 return self.opener(
802 return self.opener(
735 self._indexfile,
803 self._indexfile,
736 mode=b"w",
804 mode=b"w",
737 checkambig=self._checkambig,
805 checkambig=self._checkambig,
738 atomictemp=True,
806 atomictemp=True,
739 )
807 )
740
808
741 def _datafp(self, mode=b'r'):
809 def _datafp(self, mode=b'r'):
742 """file object for the revlog's data file"""
810 """file object for the revlog's data file"""
743 return self.opener(self._datafile, mode=mode)
811 return self.opener(self._datafile, mode=mode)
744
812
745 @contextlib.contextmanager
813 @contextlib.contextmanager
746 def _sidedatareadfp(self):
814 def _sidedatareadfp(self):
747 """file object suitable to read sidedata"""
815 """file object suitable to read sidedata"""
748 if self._writinghandles:
816 if self._writinghandles:
749 yield self._writinghandles[2]
817 yield self._writinghandles[2]
750 else:
818 else:
751 with self.opener(self._sidedatafile) as fp:
819 with self.opener(self._sidedatafile) as fp:
752 yield fp
820 yield fp
753
821
754 def tiprev(self):
822 def tiprev(self):
755 return len(self.index) - 1
823 return len(self.index) - 1
756
824
757 def tip(self):
825 def tip(self):
758 return self.node(self.tiprev())
826 return self.node(self.tiprev())
759
827
760 def __contains__(self, rev):
828 def __contains__(self, rev):
761 return 0 <= rev < len(self)
829 return 0 <= rev < len(self)
762
830
763 def __len__(self):
831 def __len__(self):
764 return len(self.index)
832 return len(self.index)
765
833
766 def __iter__(self):
834 def __iter__(self):
767 return iter(range(len(self)))
835 return iter(range(len(self)))
768
836
769 def revs(self, start=0, stop=None):
837 def revs(self, start=0, stop=None):
770 """iterate over all rev in this revlog (from start to stop)"""
838 """iterate over all rev in this revlog (from start to stop)"""
771 return storageutil.iterrevs(len(self), start=start, stop=stop)
839 return storageutil.iterrevs(len(self), start=start, stop=stop)
772
840
773 def hasnode(self, node):
841 def hasnode(self, node):
774 try:
842 try:
775 self.rev(node)
843 self.rev(node)
776 return True
844 return True
777 except KeyError:
845 except KeyError:
778 return False
846 return False
779
847
780 def candelta(self, baserev, rev):
848 def candelta(self, baserev, rev):
781 """whether two revisions (baserev, rev) can be delta-ed or not"""
849 """whether two revisions (baserev, rev) can be delta-ed or not"""
782 # Disable delta if either rev requires a content-changing flag
850 # Disable delta if either rev requires a content-changing flag
783 # processor (ex. LFS). This is because such flag processor can alter
851 # processor (ex. LFS). This is because such flag processor can alter
784 # the rawtext content that the delta will be based on, and two clients
852 # the rawtext content that the delta will be based on, and two clients
785 # could have a same revlog node with different flags (i.e. different
853 # could have a same revlog node with different flags (i.e. different
786 # rawtext contents) and the delta could be incompatible.
854 # rawtext contents) and the delta could be incompatible.
787 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
855 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
788 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
856 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
789 ):
857 ):
790 return False
858 return False
791 return True
859 return True
792
860
793 def update_caches(self, transaction):
861 def update_caches(self, transaction):
794 if self._nodemap_file is not None:
862 if self._nodemap_file is not None:
795 if transaction is None:
863 if transaction is None:
796 nodemaputil.update_persistent_nodemap(self)
864 nodemaputil.update_persistent_nodemap(self)
797 else:
865 else:
798 nodemaputil.setup_persistent_nodemap(transaction, self)
866 nodemaputil.setup_persistent_nodemap(transaction, self)
799
867
800 def clearcaches(self):
868 def clearcaches(self):
801 self._revisioncache = None
869 self._revisioncache = None
802 self._chainbasecache.clear()
870 self._chainbasecache.clear()
803 self._segmentfile.clear_cache()
871 self._segmentfile.clear_cache()
804 self._segmentfile_sidedata.clear_cache()
872 self._segmentfile_sidedata.clear_cache()
805 self._pcache = {}
873 self._pcache = {}
806 self._nodemap_docket = None
874 self._nodemap_docket = None
807 self.index.clearcaches()
875 self.index.clearcaches()
808 # The python code is the one responsible for validating the docket, we
876 # The python code is the one responsible for validating the docket, we
809 # end up having to refresh it here.
877 # end up having to refresh it here.
810 use_nodemap = (
878 use_nodemap = (
811 not self._inline
879 not self._inline
812 and self._nodemap_file is not None
880 and self._nodemap_file is not None
813 and util.safehasattr(self.index, 'update_nodemap_data')
881 and util.safehasattr(self.index, 'update_nodemap_data')
814 )
882 )
815 if use_nodemap:
883 if use_nodemap:
816 nodemap_data = nodemaputil.persisted_data(self)
884 nodemap_data = nodemaputil.persisted_data(self)
817 if nodemap_data is not None:
885 if nodemap_data is not None:
818 self._nodemap_docket = nodemap_data[0]
886 self._nodemap_docket = nodemap_data[0]
819 self.index.update_nodemap_data(*nodemap_data)
887 self.index.update_nodemap_data(*nodemap_data)
820
888
821 def rev(self, node):
889 def rev(self, node):
822 try:
890 try:
823 return self.index.rev(node)
891 return self.index.rev(node)
824 except TypeError:
892 except TypeError:
825 raise
893 raise
826 except error.RevlogError:
894 except error.RevlogError:
827 # parsers.c radix tree lookup failed
895 # parsers.c radix tree lookup failed
828 if (
896 if (
829 node == self.nodeconstants.wdirid
897 node == self.nodeconstants.wdirid
830 or node in self.nodeconstants.wdirfilenodeids
898 or node in self.nodeconstants.wdirfilenodeids
831 ):
899 ):
832 raise error.WdirUnsupported
900 raise error.WdirUnsupported
833 raise error.LookupError(node, self.display_id, _(b'no node'))
901 raise error.LookupError(node, self.display_id, _(b'no node'))
834
902
835 # Accessors for index entries.
903 # Accessors for index entries.
836
904
837 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
905 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
838 # are flags.
906 # are flags.
839 def start(self, rev):
907 def start(self, rev):
840 return int(self.index[rev][0] >> 16)
908 return int(self.index[rev][0] >> 16)
841
909
842 def sidedata_cut_off(self, rev):
910 def sidedata_cut_off(self, rev):
843 sd_cut_off = self.index[rev][8]
911 sd_cut_off = self.index[rev][8]
844 if sd_cut_off != 0:
912 if sd_cut_off != 0:
845 return sd_cut_off
913 return sd_cut_off
846 # This is some annoying dance, because entries without sidedata
914 # This is some annoying dance, because entries without sidedata
847 # currently use 0 as their ofsset. (instead of previous-offset +
915 # currently use 0 as their ofsset. (instead of previous-offset +
848 # previous-size)
916 # previous-size)
849 #
917 #
850 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
918 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
851 # In the meantime, we need this.
919 # In the meantime, we need this.
852 while 0 <= rev:
920 while 0 <= rev:
853 e = self.index[rev]
921 e = self.index[rev]
854 if e[9] != 0:
922 if e[9] != 0:
855 return e[8] + e[9]
923 return e[8] + e[9]
856 rev -= 1
924 rev -= 1
857 return 0
925 return 0
858
926
859 def flags(self, rev):
927 def flags(self, rev):
860 return self.index[rev][0] & 0xFFFF
928 return self.index[rev][0] & 0xFFFF
861
929
862 def length(self, rev):
930 def length(self, rev):
863 return self.index[rev][1]
931 return self.index[rev][1]
864
932
865 def sidedata_length(self, rev):
933 def sidedata_length(self, rev):
866 if not self.hassidedata:
934 if not self.hassidedata:
867 return 0
935 return 0
868 return self.index[rev][9]
936 return self.index[rev][9]
869
937
870 def rawsize(self, rev):
938 def rawsize(self, rev):
871 """return the length of the uncompressed text for a given revision"""
939 """return the length of the uncompressed text for a given revision"""
872 l = self.index[rev][2]
940 l = self.index[rev][2]
873 if l >= 0:
941 if l >= 0:
874 return l
942 return l
875
943
876 t = self.rawdata(rev)
944 t = self.rawdata(rev)
877 return len(t)
945 return len(t)
878
946
879 def size(self, rev):
947 def size(self, rev):
880 """length of non-raw text (processed by a "read" flag processor)"""
948 """length of non-raw text (processed by a "read" flag processor)"""
881 # fast path: if no "read" flag processor could change the content,
949 # fast path: if no "read" flag processor could change the content,
882 # size is rawsize. note: ELLIPSIS is known to not change the content.
950 # size is rawsize. note: ELLIPSIS is known to not change the content.
883 flags = self.flags(rev)
951 flags = self.flags(rev)
884 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
952 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
885 return self.rawsize(rev)
953 return self.rawsize(rev)
886
954
887 return len(self.revision(rev))
955 return len(self.revision(rev))
888
956
889 def fast_rank(self, rev):
957 def fast_rank(self, rev):
890 """Return the rank of a revision if already known, or None otherwise.
958 """Return the rank of a revision if already known, or None otherwise.
891
959
892 The rank of a revision is the size of the sub-graph it defines as a
960 The rank of a revision is the size of the sub-graph it defines as a
893 head. Equivalently, the rank of a revision `r` is the size of the set
961 head. Equivalently, the rank of a revision `r` is the size of the set
894 `ancestors(r)`, `r` included.
962 `ancestors(r)`, `r` included.
895
963
896 This method returns the rank retrieved from the revlog in constant
964 This method returns the rank retrieved from the revlog in constant
897 time. It makes no attempt at computing unknown values for versions of
965 time. It makes no attempt at computing unknown values for versions of
898 the revlog which do not persist the rank.
966 the revlog which do not persist the rank.
899 """
967 """
900 rank = self.index[rev][ENTRY_RANK]
968 rank = self.index[rev][ENTRY_RANK]
901 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
969 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
902 return None
970 return None
903 if rev == nullrev:
971 if rev == nullrev:
904 return 0 # convention
972 return 0 # convention
905 return rank
973 return rank
906
974
907 def chainbase(self, rev):
975 def chainbase(self, rev):
908 base = self._chainbasecache.get(rev)
976 base = self._chainbasecache.get(rev)
909 if base is not None:
977 if base is not None:
910 return base
978 return base
911
979
912 index = self.index
980 index = self.index
913 iterrev = rev
981 iterrev = rev
914 base = index[iterrev][3]
982 base = index[iterrev][3]
915 while base != iterrev:
983 while base != iterrev:
916 iterrev = base
984 iterrev = base
917 base = index[iterrev][3]
985 base = index[iterrev][3]
918
986
919 self._chainbasecache[rev] = base
987 self._chainbasecache[rev] = base
920 return base
988 return base
921
989
922 def linkrev(self, rev):
990 def linkrev(self, rev):
923 return self.index[rev][4]
991 return self.index[rev][4]
924
992
925 def parentrevs(self, rev):
993 def parentrevs(self, rev):
926 try:
994 try:
927 entry = self.index[rev]
995 entry = self.index[rev]
928 except IndexError:
996 except IndexError:
929 if rev == wdirrev:
997 if rev == wdirrev:
930 raise error.WdirUnsupported
998 raise error.WdirUnsupported
931 raise
999 raise
932
1000
933 if self.canonical_parent_order and entry[5] == nullrev:
1001 if self.canonical_parent_order and entry[5] == nullrev:
934 return entry[6], entry[5]
1002 return entry[6], entry[5]
935 else:
1003 else:
936 return entry[5], entry[6]
1004 return entry[5], entry[6]
937
1005
938 # fast parentrevs(rev) where rev isn't filtered
1006 # fast parentrevs(rev) where rev isn't filtered
939 _uncheckedparentrevs = parentrevs
1007 _uncheckedparentrevs = parentrevs
940
1008
941 def node(self, rev):
1009 def node(self, rev):
942 try:
1010 try:
943 return self.index[rev][7]
1011 return self.index[rev][7]
944 except IndexError:
1012 except IndexError:
945 if rev == wdirrev:
1013 if rev == wdirrev:
946 raise error.WdirUnsupported
1014 raise error.WdirUnsupported
947 raise
1015 raise
948
1016
949 # Derived from index values.
1017 # Derived from index values.
950
1018
951 def end(self, rev):
1019 def end(self, rev):
952 return self.start(rev) + self.length(rev)
1020 return self.start(rev) + self.length(rev)
953
1021
954 def parents(self, node):
1022 def parents(self, node):
955 i = self.index
1023 i = self.index
956 d = i[self.rev(node)]
1024 d = i[self.rev(node)]
957 # inline node() to avoid function call overhead
1025 # inline node() to avoid function call overhead
958 if self.canonical_parent_order and d[5] == self.nullid:
1026 if self.canonical_parent_order and d[5] == self.nullid:
959 return i[d[6]][7], i[d[5]][7]
1027 return i[d[6]][7], i[d[5]][7]
960 else:
1028 else:
961 return i[d[5]][7], i[d[6]][7]
1029 return i[d[5]][7], i[d[6]][7]
962
1030
963 def chainlen(self, rev):
1031 def chainlen(self, rev):
964 return self._chaininfo(rev)[0]
1032 return self._chaininfo(rev)[0]
965
1033
966 def _chaininfo(self, rev):
1034 def _chaininfo(self, rev):
967 chaininfocache = self._chaininfocache
1035 chaininfocache = self._chaininfocache
968 if rev in chaininfocache:
1036 if rev in chaininfocache:
969 return chaininfocache[rev]
1037 return chaininfocache[rev]
970 index = self.index
1038 index = self.index
971 generaldelta = self._generaldelta
1039 generaldelta = self._generaldelta
972 iterrev = rev
1040 iterrev = rev
973 e = index[iterrev]
1041 e = index[iterrev]
974 clen = 0
1042 clen = 0
975 compresseddeltalen = 0
1043 compresseddeltalen = 0
976 while iterrev != e[3]:
1044 while iterrev != e[3]:
977 clen += 1
1045 clen += 1
978 compresseddeltalen += e[1]
1046 compresseddeltalen += e[1]
979 if generaldelta:
1047 if generaldelta:
980 iterrev = e[3]
1048 iterrev = e[3]
981 else:
1049 else:
982 iterrev -= 1
1050 iterrev -= 1
983 if iterrev in chaininfocache:
1051 if iterrev in chaininfocache:
984 t = chaininfocache[iterrev]
1052 t = chaininfocache[iterrev]
985 clen += t[0]
1053 clen += t[0]
986 compresseddeltalen += t[1]
1054 compresseddeltalen += t[1]
987 break
1055 break
988 e = index[iterrev]
1056 e = index[iterrev]
989 else:
1057 else:
990 # Add text length of base since decompressing that also takes
1058 # Add text length of base since decompressing that also takes
991 # work. For cache hits the length is already included.
1059 # work. For cache hits the length is already included.
992 compresseddeltalen += e[1]
1060 compresseddeltalen += e[1]
993 r = (clen, compresseddeltalen)
1061 r = (clen, compresseddeltalen)
994 chaininfocache[rev] = r
1062 chaininfocache[rev] = r
995 return r
1063 return r
996
1064
997 def _deltachain(self, rev, stoprev=None):
1065 def _deltachain(self, rev, stoprev=None):
998 """Obtain the delta chain for a revision.
1066 """Obtain the delta chain for a revision.
999
1067
1000 ``stoprev`` specifies a revision to stop at. If not specified, we
1068 ``stoprev`` specifies a revision to stop at. If not specified, we
1001 stop at the base of the chain.
1069 stop at the base of the chain.
1002
1070
1003 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1071 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1004 revs in ascending order and ``stopped`` is a bool indicating whether
1072 revs in ascending order and ``stopped`` is a bool indicating whether
1005 ``stoprev`` was hit.
1073 ``stoprev`` was hit.
1006 """
1074 """
1007 # Try C implementation.
1075 # Try C implementation.
1008 try:
1076 try:
1009 return self.index.deltachain(rev, stoprev, self._generaldelta)
1077 return self.index.deltachain(rev, stoprev, self._generaldelta)
1010 except AttributeError:
1078 except AttributeError:
1011 pass
1079 pass
1012
1080
1013 chain = []
1081 chain = []
1014
1082
1015 # Alias to prevent attribute lookup in tight loop.
1083 # Alias to prevent attribute lookup in tight loop.
1016 index = self.index
1084 index = self.index
1017 generaldelta = self._generaldelta
1085 generaldelta = self._generaldelta
1018
1086
1019 iterrev = rev
1087 iterrev = rev
1020 e = index[iterrev]
1088 e = index[iterrev]
1021 while iterrev != e[3] and iterrev != stoprev:
1089 while iterrev != e[3] and iterrev != stoprev:
1022 chain.append(iterrev)
1090 chain.append(iterrev)
1023 if generaldelta:
1091 if generaldelta:
1024 iterrev = e[3]
1092 iterrev = e[3]
1025 else:
1093 else:
1026 iterrev -= 1
1094 iterrev -= 1
1027 e = index[iterrev]
1095 e = index[iterrev]
1028
1096
1029 if iterrev == stoprev:
1097 if iterrev == stoprev:
1030 stopped = True
1098 stopped = True
1031 else:
1099 else:
1032 chain.append(iterrev)
1100 chain.append(iterrev)
1033 stopped = False
1101 stopped = False
1034
1102
1035 chain.reverse()
1103 chain.reverse()
1036 return chain, stopped
1104 return chain, stopped
1037
1105
1038 def ancestors(self, revs, stoprev=0, inclusive=False):
1106 def ancestors(self, revs, stoprev=0, inclusive=False):
1039 """Generate the ancestors of 'revs' in reverse revision order.
1107 """Generate the ancestors of 'revs' in reverse revision order.
1040 Does not generate revs lower than stoprev.
1108 Does not generate revs lower than stoprev.
1041
1109
1042 See the documentation for ancestor.lazyancestors for more details."""
1110 See the documentation for ancestor.lazyancestors for more details."""
1043
1111
1044 # first, make sure start revisions aren't filtered
1112 # first, make sure start revisions aren't filtered
1045 revs = list(revs)
1113 revs = list(revs)
1046 checkrev = self.node
1114 checkrev = self.node
1047 for r in revs:
1115 for r in revs:
1048 checkrev(r)
1116 checkrev(r)
1049 # and we're sure ancestors aren't filtered as well
1117 # and we're sure ancestors aren't filtered as well
1050
1118
1051 if rustancestor is not None and self.index.rust_ext_compat:
1119 if rustancestor is not None and self.index.rust_ext_compat:
1052 lazyancestors = rustancestor.LazyAncestors
1120 lazyancestors = rustancestor.LazyAncestors
1053 arg = self.index
1121 arg = self.index
1054 else:
1122 else:
1055 lazyancestors = ancestor.lazyancestors
1123 lazyancestors = ancestor.lazyancestors
1056 arg = self._uncheckedparentrevs
1124 arg = self._uncheckedparentrevs
1057 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1125 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1058
1126
1059 def descendants(self, revs):
1127 def descendants(self, revs):
1060 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1128 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1061
1129
1062 def findcommonmissing(self, common=None, heads=None):
1130 def findcommonmissing(self, common=None, heads=None):
1063 """Return a tuple of the ancestors of common and the ancestors of heads
1131 """Return a tuple of the ancestors of common and the ancestors of heads
1064 that are not ancestors of common. In revset terminology, we return the
1132 that are not ancestors of common. In revset terminology, we return the
1065 tuple:
1133 tuple:
1066
1134
1067 ::common, (::heads) - (::common)
1135 ::common, (::heads) - (::common)
1068
1136
1069 The list is sorted by revision number, meaning it is
1137 The list is sorted by revision number, meaning it is
1070 topologically sorted.
1138 topologically sorted.
1071
1139
1072 'heads' and 'common' are both lists of node IDs. If heads is
1140 'heads' and 'common' are both lists of node IDs. If heads is
1073 not supplied, uses all of the revlog's heads. If common is not
1141 not supplied, uses all of the revlog's heads. If common is not
1074 supplied, uses nullid."""
1142 supplied, uses nullid."""
1075 if common is None:
1143 if common is None:
1076 common = [self.nullid]
1144 common = [self.nullid]
1077 if heads is None:
1145 if heads is None:
1078 heads = self.heads()
1146 heads = self.heads()
1079
1147
1080 common = [self.rev(n) for n in common]
1148 common = [self.rev(n) for n in common]
1081 heads = [self.rev(n) for n in heads]
1149 heads = [self.rev(n) for n in heads]
1082
1150
1083 # we want the ancestors, but inclusive
1151 # we want the ancestors, but inclusive
1084 class lazyset:
1152 class lazyset:
1085 def __init__(self, lazyvalues):
1153 def __init__(self, lazyvalues):
1086 self.addedvalues = set()
1154 self.addedvalues = set()
1087 self.lazyvalues = lazyvalues
1155 self.lazyvalues = lazyvalues
1088
1156
1089 def __contains__(self, value):
1157 def __contains__(self, value):
1090 return value in self.addedvalues or value in self.lazyvalues
1158 return value in self.addedvalues or value in self.lazyvalues
1091
1159
1092 def __iter__(self):
1160 def __iter__(self):
1093 added = self.addedvalues
1161 added = self.addedvalues
1094 for r in added:
1162 for r in added:
1095 yield r
1163 yield r
1096 for r in self.lazyvalues:
1164 for r in self.lazyvalues:
1097 if not r in added:
1165 if not r in added:
1098 yield r
1166 yield r
1099
1167
1100 def add(self, value):
1168 def add(self, value):
1101 self.addedvalues.add(value)
1169 self.addedvalues.add(value)
1102
1170
1103 def update(self, values):
1171 def update(self, values):
1104 self.addedvalues.update(values)
1172 self.addedvalues.update(values)
1105
1173
1106 has = lazyset(self.ancestors(common))
1174 has = lazyset(self.ancestors(common))
1107 has.add(nullrev)
1175 has.add(nullrev)
1108 has.update(common)
1176 has.update(common)
1109
1177
1110 # take all ancestors from heads that aren't in has
1178 # take all ancestors from heads that aren't in has
1111 missing = set()
1179 missing = set()
1112 visit = collections.deque(r for r in heads if r not in has)
1180 visit = collections.deque(r for r in heads if r not in has)
1113 while visit:
1181 while visit:
1114 r = visit.popleft()
1182 r = visit.popleft()
1115 if r in missing:
1183 if r in missing:
1116 continue
1184 continue
1117 else:
1185 else:
1118 missing.add(r)
1186 missing.add(r)
1119 for p in self.parentrevs(r):
1187 for p in self.parentrevs(r):
1120 if p not in has:
1188 if p not in has:
1121 visit.append(p)
1189 visit.append(p)
1122 missing = list(missing)
1190 missing = list(missing)
1123 missing.sort()
1191 missing.sort()
1124 return has, [self.node(miss) for miss in missing]
1192 return has, [self.node(miss) for miss in missing]
1125
1193
1126 def incrementalmissingrevs(self, common=None):
1194 def incrementalmissingrevs(self, common=None):
1127 """Return an object that can be used to incrementally compute the
1195 """Return an object that can be used to incrementally compute the
1128 revision numbers of the ancestors of arbitrary sets that are not
1196 revision numbers of the ancestors of arbitrary sets that are not
1129 ancestors of common. This is an ancestor.incrementalmissingancestors
1197 ancestors of common. This is an ancestor.incrementalmissingancestors
1130 object.
1198 object.
1131
1199
1132 'common' is a list of revision numbers. If common is not supplied, uses
1200 'common' is a list of revision numbers. If common is not supplied, uses
1133 nullrev.
1201 nullrev.
1134 """
1202 """
1135 if common is None:
1203 if common is None:
1136 common = [nullrev]
1204 common = [nullrev]
1137
1205
1138 if rustancestor is not None and self.index.rust_ext_compat:
1206 if rustancestor is not None and self.index.rust_ext_compat:
1139 return rustancestor.MissingAncestors(self.index, common)
1207 return rustancestor.MissingAncestors(self.index, common)
1140 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1208 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1141
1209
1142 def findmissingrevs(self, common=None, heads=None):
1210 def findmissingrevs(self, common=None, heads=None):
1143 """Return the revision numbers of the ancestors of heads that
1211 """Return the revision numbers of the ancestors of heads that
1144 are not ancestors of common.
1212 are not ancestors of common.
1145
1213
1146 More specifically, return a list of revision numbers corresponding to
1214 More specifically, return a list of revision numbers corresponding to
1147 nodes N such that every N satisfies the following constraints:
1215 nodes N such that every N satisfies the following constraints:
1148
1216
1149 1. N is an ancestor of some node in 'heads'
1217 1. N is an ancestor of some node in 'heads'
1150 2. N is not an ancestor of any node in 'common'
1218 2. N is not an ancestor of any node in 'common'
1151
1219
1152 The list is sorted by revision number, meaning it is
1220 The list is sorted by revision number, meaning it is
1153 topologically sorted.
1221 topologically sorted.
1154
1222
1155 'heads' and 'common' are both lists of revision numbers. If heads is
1223 'heads' and 'common' are both lists of revision numbers. If heads is
1156 not supplied, uses all of the revlog's heads. If common is not
1224 not supplied, uses all of the revlog's heads. If common is not
1157 supplied, uses nullid."""
1225 supplied, uses nullid."""
1158 if common is None:
1226 if common is None:
1159 common = [nullrev]
1227 common = [nullrev]
1160 if heads is None:
1228 if heads is None:
1161 heads = self.headrevs()
1229 heads = self.headrevs()
1162
1230
1163 inc = self.incrementalmissingrevs(common=common)
1231 inc = self.incrementalmissingrevs(common=common)
1164 return inc.missingancestors(heads)
1232 return inc.missingancestors(heads)
1165
1233
1166 def findmissing(self, common=None, heads=None):
1234 def findmissing(self, common=None, heads=None):
1167 """Return the ancestors of heads that are not ancestors of common.
1235 """Return the ancestors of heads that are not ancestors of common.
1168
1236
1169 More specifically, return a list of nodes N such that every N
1237 More specifically, return a list of nodes N such that every N
1170 satisfies the following constraints:
1238 satisfies the following constraints:
1171
1239
1172 1. N is an ancestor of some node in 'heads'
1240 1. N is an ancestor of some node in 'heads'
1173 2. N is not an ancestor of any node in 'common'
1241 2. N is not an ancestor of any node in 'common'
1174
1242
1175 The list is sorted by revision number, meaning it is
1243 The list is sorted by revision number, meaning it is
1176 topologically sorted.
1244 topologically sorted.
1177
1245
1178 'heads' and 'common' are both lists of node IDs. If heads is
1246 'heads' and 'common' are both lists of node IDs. If heads is
1179 not supplied, uses all of the revlog's heads. If common is not
1247 not supplied, uses all of the revlog's heads. If common is not
1180 supplied, uses nullid."""
1248 supplied, uses nullid."""
1181 if common is None:
1249 if common is None:
1182 common = [self.nullid]
1250 common = [self.nullid]
1183 if heads is None:
1251 if heads is None:
1184 heads = self.heads()
1252 heads = self.heads()
1185
1253
1186 common = [self.rev(n) for n in common]
1254 common = [self.rev(n) for n in common]
1187 heads = [self.rev(n) for n in heads]
1255 heads = [self.rev(n) for n in heads]
1188
1256
1189 inc = self.incrementalmissingrevs(common=common)
1257 inc = self.incrementalmissingrevs(common=common)
1190 return [self.node(r) for r in inc.missingancestors(heads)]
1258 return [self.node(r) for r in inc.missingancestors(heads)]
1191
1259
1192 def nodesbetween(self, roots=None, heads=None):
1260 def nodesbetween(self, roots=None, heads=None):
1193 """Return a topological path from 'roots' to 'heads'.
1261 """Return a topological path from 'roots' to 'heads'.
1194
1262
1195 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1263 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1196 topologically sorted list of all nodes N that satisfy both of
1264 topologically sorted list of all nodes N that satisfy both of
1197 these constraints:
1265 these constraints:
1198
1266
1199 1. N is a descendant of some node in 'roots'
1267 1. N is a descendant of some node in 'roots'
1200 2. N is an ancestor of some node in 'heads'
1268 2. N is an ancestor of some node in 'heads'
1201
1269
1202 Every node is considered to be both a descendant and an ancestor
1270 Every node is considered to be both a descendant and an ancestor
1203 of itself, so every reachable node in 'roots' and 'heads' will be
1271 of itself, so every reachable node in 'roots' and 'heads' will be
1204 included in 'nodes'.
1272 included in 'nodes'.
1205
1273
1206 'outroots' is the list of reachable nodes in 'roots', i.e., the
1274 'outroots' is the list of reachable nodes in 'roots', i.e., the
1207 subset of 'roots' that is returned in 'nodes'. Likewise,
1275 subset of 'roots' that is returned in 'nodes'. Likewise,
1208 'outheads' is the subset of 'heads' that is also in 'nodes'.
1276 'outheads' is the subset of 'heads' that is also in 'nodes'.
1209
1277
1210 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1278 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1211 unspecified, uses nullid as the only root. If 'heads' is
1279 unspecified, uses nullid as the only root. If 'heads' is
1212 unspecified, uses list of all of the revlog's heads."""
1280 unspecified, uses list of all of the revlog's heads."""
1213 nonodes = ([], [], [])
1281 nonodes = ([], [], [])
1214 if roots is not None:
1282 if roots is not None:
1215 roots = list(roots)
1283 roots = list(roots)
1216 if not roots:
1284 if not roots:
1217 return nonodes
1285 return nonodes
1218 lowestrev = min([self.rev(n) for n in roots])
1286 lowestrev = min([self.rev(n) for n in roots])
1219 else:
1287 else:
1220 roots = [self.nullid] # Everybody's a descendant of nullid
1288 roots = [self.nullid] # Everybody's a descendant of nullid
1221 lowestrev = nullrev
1289 lowestrev = nullrev
1222 if (lowestrev == nullrev) and (heads is None):
1290 if (lowestrev == nullrev) and (heads is None):
1223 # We want _all_ the nodes!
1291 # We want _all_ the nodes!
1224 return (
1292 return (
1225 [self.node(r) for r in self],
1293 [self.node(r) for r in self],
1226 [self.nullid],
1294 [self.nullid],
1227 list(self.heads()),
1295 list(self.heads()),
1228 )
1296 )
1229 if heads is None:
1297 if heads is None:
1230 # All nodes are ancestors, so the latest ancestor is the last
1298 # All nodes are ancestors, so the latest ancestor is the last
1231 # node.
1299 # node.
1232 highestrev = len(self) - 1
1300 highestrev = len(self) - 1
1233 # Set ancestors to None to signal that every node is an ancestor.
1301 # Set ancestors to None to signal that every node is an ancestor.
1234 ancestors = None
1302 ancestors = None
1235 # Set heads to an empty dictionary for later discovery of heads
1303 # Set heads to an empty dictionary for later discovery of heads
1236 heads = {}
1304 heads = {}
1237 else:
1305 else:
1238 heads = list(heads)
1306 heads = list(heads)
1239 if not heads:
1307 if not heads:
1240 return nonodes
1308 return nonodes
1241 ancestors = set()
1309 ancestors = set()
1242 # Turn heads into a dictionary so we can remove 'fake' heads.
1310 # Turn heads into a dictionary so we can remove 'fake' heads.
1243 # Also, later we will be using it to filter out the heads we can't
1311 # Also, later we will be using it to filter out the heads we can't
1244 # find from roots.
1312 # find from roots.
1245 heads = dict.fromkeys(heads, False)
1313 heads = dict.fromkeys(heads, False)
1246 # Start at the top and keep marking parents until we're done.
1314 # Start at the top and keep marking parents until we're done.
1247 nodestotag = set(heads)
1315 nodestotag = set(heads)
1248 # Remember where the top was so we can use it as a limit later.
1316 # Remember where the top was so we can use it as a limit later.
1249 highestrev = max([self.rev(n) for n in nodestotag])
1317 highestrev = max([self.rev(n) for n in nodestotag])
1250 while nodestotag:
1318 while nodestotag:
1251 # grab a node to tag
1319 # grab a node to tag
1252 n = nodestotag.pop()
1320 n = nodestotag.pop()
1253 # Never tag nullid
1321 # Never tag nullid
1254 if n == self.nullid:
1322 if n == self.nullid:
1255 continue
1323 continue
1256 # A node's revision number represents its place in a
1324 # A node's revision number represents its place in a
1257 # topologically sorted list of nodes.
1325 # topologically sorted list of nodes.
1258 r = self.rev(n)
1326 r = self.rev(n)
1259 if r >= lowestrev:
1327 if r >= lowestrev:
1260 if n not in ancestors:
1328 if n not in ancestors:
1261 # If we are possibly a descendant of one of the roots
1329 # If we are possibly a descendant of one of the roots
1262 # and we haven't already been marked as an ancestor
1330 # and we haven't already been marked as an ancestor
1263 ancestors.add(n) # Mark as ancestor
1331 ancestors.add(n) # Mark as ancestor
1264 # Add non-nullid parents to list of nodes to tag.
1332 # Add non-nullid parents to list of nodes to tag.
1265 nodestotag.update(
1333 nodestotag.update(
1266 [p for p in self.parents(n) if p != self.nullid]
1334 [p for p in self.parents(n) if p != self.nullid]
1267 )
1335 )
1268 elif n in heads: # We've seen it before, is it a fake head?
1336 elif n in heads: # We've seen it before, is it a fake head?
1269 # So it is, real heads should not be the ancestors of
1337 # So it is, real heads should not be the ancestors of
1270 # any other heads.
1338 # any other heads.
1271 heads.pop(n)
1339 heads.pop(n)
1272 if not ancestors:
1340 if not ancestors:
1273 return nonodes
1341 return nonodes
1274 # Now that we have our set of ancestors, we want to remove any
1342 # Now that we have our set of ancestors, we want to remove any
1275 # roots that are not ancestors.
1343 # roots that are not ancestors.
1276
1344
1277 # If one of the roots was nullid, everything is included anyway.
1345 # If one of the roots was nullid, everything is included anyway.
1278 if lowestrev > nullrev:
1346 if lowestrev > nullrev:
1279 # But, since we weren't, let's recompute the lowest rev to not
1347 # But, since we weren't, let's recompute the lowest rev to not
1280 # include roots that aren't ancestors.
1348 # include roots that aren't ancestors.
1281
1349
1282 # Filter out roots that aren't ancestors of heads
1350 # Filter out roots that aren't ancestors of heads
1283 roots = [root for root in roots if root in ancestors]
1351 roots = [root for root in roots if root in ancestors]
1284 # Recompute the lowest revision
1352 # Recompute the lowest revision
1285 if roots:
1353 if roots:
1286 lowestrev = min([self.rev(root) for root in roots])
1354 lowestrev = min([self.rev(root) for root in roots])
1287 else:
1355 else:
1288 # No more roots? Return empty list
1356 # No more roots? Return empty list
1289 return nonodes
1357 return nonodes
1290 else:
1358 else:
1291 # We are descending from nullid, and don't need to care about
1359 # We are descending from nullid, and don't need to care about
1292 # any other roots.
1360 # any other roots.
1293 lowestrev = nullrev
1361 lowestrev = nullrev
1294 roots = [self.nullid]
1362 roots = [self.nullid]
1295 # Transform our roots list into a set.
1363 # Transform our roots list into a set.
1296 descendants = set(roots)
1364 descendants = set(roots)
1297 # Also, keep the original roots so we can filter out roots that aren't
1365 # Also, keep the original roots so we can filter out roots that aren't
1298 # 'real' roots (i.e. are descended from other roots).
1366 # 'real' roots (i.e. are descended from other roots).
1299 roots = descendants.copy()
1367 roots = descendants.copy()
1300 # Our topologically sorted list of output nodes.
1368 # Our topologically sorted list of output nodes.
1301 orderedout = []
1369 orderedout = []
1302 # Don't start at nullid since we don't want nullid in our output list,
1370 # Don't start at nullid since we don't want nullid in our output list,
1303 # and if nullid shows up in descendants, empty parents will look like
1371 # and if nullid shows up in descendants, empty parents will look like
1304 # they're descendants.
1372 # they're descendants.
1305 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1373 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1306 n = self.node(r)
1374 n = self.node(r)
1307 isdescendant = False
1375 isdescendant = False
1308 if lowestrev == nullrev: # Everybody is a descendant of nullid
1376 if lowestrev == nullrev: # Everybody is a descendant of nullid
1309 isdescendant = True
1377 isdescendant = True
1310 elif n in descendants:
1378 elif n in descendants:
1311 # n is already a descendant
1379 # n is already a descendant
1312 isdescendant = True
1380 isdescendant = True
1313 # This check only needs to be done here because all the roots
1381 # This check only needs to be done here because all the roots
1314 # will start being marked is descendants before the loop.
1382 # will start being marked is descendants before the loop.
1315 if n in roots:
1383 if n in roots:
1316 # If n was a root, check if it's a 'real' root.
1384 # If n was a root, check if it's a 'real' root.
1317 p = tuple(self.parents(n))
1385 p = tuple(self.parents(n))
1318 # If any of its parents are descendants, it's not a root.
1386 # If any of its parents are descendants, it's not a root.
1319 if (p[0] in descendants) or (p[1] in descendants):
1387 if (p[0] in descendants) or (p[1] in descendants):
1320 roots.remove(n)
1388 roots.remove(n)
1321 else:
1389 else:
1322 p = tuple(self.parents(n))
1390 p = tuple(self.parents(n))
1323 # A node is a descendant if either of its parents are
1391 # A node is a descendant if either of its parents are
1324 # descendants. (We seeded the dependents list with the roots
1392 # descendants. (We seeded the dependents list with the roots
1325 # up there, remember?)
1393 # up there, remember?)
1326 if (p[0] in descendants) or (p[1] in descendants):
1394 if (p[0] in descendants) or (p[1] in descendants):
1327 descendants.add(n)
1395 descendants.add(n)
1328 isdescendant = True
1396 isdescendant = True
1329 if isdescendant and ((ancestors is None) or (n in ancestors)):
1397 if isdescendant and ((ancestors is None) or (n in ancestors)):
1330 # Only include nodes that are both descendants and ancestors.
1398 # Only include nodes that are both descendants and ancestors.
1331 orderedout.append(n)
1399 orderedout.append(n)
1332 if (ancestors is not None) and (n in heads):
1400 if (ancestors is not None) and (n in heads):
1333 # We're trying to figure out which heads are reachable
1401 # We're trying to figure out which heads are reachable
1334 # from roots.
1402 # from roots.
1335 # Mark this head as having been reached
1403 # Mark this head as having been reached
1336 heads[n] = True
1404 heads[n] = True
1337 elif ancestors is None:
1405 elif ancestors is None:
1338 # Otherwise, we're trying to discover the heads.
1406 # Otherwise, we're trying to discover the heads.
1339 # Assume this is a head because if it isn't, the next step
1407 # Assume this is a head because if it isn't, the next step
1340 # will eventually remove it.
1408 # will eventually remove it.
1341 heads[n] = True
1409 heads[n] = True
1342 # But, obviously its parents aren't.
1410 # But, obviously its parents aren't.
1343 for p in self.parents(n):
1411 for p in self.parents(n):
1344 heads.pop(p, None)
1412 heads.pop(p, None)
1345 heads = [head for head, flag in heads.items() if flag]
1413 heads = [head for head, flag in heads.items() if flag]
1346 roots = list(roots)
1414 roots = list(roots)
1347 assert orderedout
1415 assert orderedout
1348 assert roots
1416 assert roots
1349 assert heads
1417 assert heads
1350 return (orderedout, roots, heads)
1418 return (orderedout, roots, heads)
1351
1419
1352 def headrevs(self, revs=None):
1420 def headrevs(self, revs=None):
1353 if revs is None:
1421 if revs is None:
1354 try:
1422 try:
1355 return self.index.headrevs()
1423 return self.index.headrevs()
1356 except AttributeError:
1424 except AttributeError:
1357 return self._headrevs()
1425 return self._headrevs()
1358 if rustdagop is not None and self.index.rust_ext_compat:
1426 if rustdagop is not None and self.index.rust_ext_compat:
1359 return rustdagop.headrevs(self.index, revs)
1427 return rustdagop.headrevs(self.index, revs)
1360 return dagop.headrevs(revs, self._uncheckedparentrevs)
1428 return dagop.headrevs(revs, self._uncheckedparentrevs)
1361
1429
1362 def computephases(self, roots):
1430 def computephases(self, roots):
1363 return self.index.computephasesmapsets(roots)
1431 return self.index.computephasesmapsets(roots)
1364
1432
1365 def _headrevs(self):
1433 def _headrevs(self):
1366 count = len(self)
1434 count = len(self)
1367 if not count:
1435 if not count:
1368 return [nullrev]
1436 return [nullrev]
1369 # we won't iter over filtered rev so nobody is a head at start
1437 # we won't iter over filtered rev so nobody is a head at start
1370 ishead = [0] * (count + 1)
1438 ishead = [0] * (count + 1)
1371 index = self.index
1439 index = self.index
1372 for r in self:
1440 for r in self:
1373 ishead[r] = 1 # I may be an head
1441 ishead[r] = 1 # I may be an head
1374 e = index[r]
1442 e = index[r]
1375 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1443 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1376 return [r for r, val in enumerate(ishead) if val]
1444 return [r for r, val in enumerate(ishead) if val]
1377
1445
1378 def heads(self, start=None, stop=None):
1446 def heads(self, start=None, stop=None):
1379 """return the list of all nodes that have no children
1447 """return the list of all nodes that have no children
1380
1448
1381 if start is specified, only heads that are descendants of
1449 if start is specified, only heads that are descendants of
1382 start will be returned
1450 start will be returned
1383 if stop is specified, it will consider all the revs from stop
1451 if stop is specified, it will consider all the revs from stop
1384 as if they had no children
1452 as if they had no children
1385 """
1453 """
1386 if start is None and stop is None:
1454 if start is None and stop is None:
1387 if not len(self):
1455 if not len(self):
1388 return [self.nullid]
1456 return [self.nullid]
1389 return [self.node(r) for r in self.headrevs()]
1457 return [self.node(r) for r in self.headrevs()]
1390
1458
1391 if start is None:
1459 if start is None:
1392 start = nullrev
1460 start = nullrev
1393 else:
1461 else:
1394 start = self.rev(start)
1462 start = self.rev(start)
1395
1463
1396 stoprevs = {self.rev(n) for n in stop or []}
1464 stoprevs = {self.rev(n) for n in stop or []}
1397
1465
1398 revs = dagop.headrevssubset(
1466 revs = dagop.headrevssubset(
1399 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1467 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1400 )
1468 )
1401
1469
1402 return [self.node(rev) for rev in revs]
1470 return [self.node(rev) for rev in revs]
1403
1471
1404 def children(self, node):
1472 def children(self, node):
1405 """find the children of a given node"""
1473 """find the children of a given node"""
1406 c = []
1474 c = []
1407 p = self.rev(node)
1475 p = self.rev(node)
1408 for r in self.revs(start=p + 1):
1476 for r in self.revs(start=p + 1):
1409 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1477 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1410 if prevs:
1478 if prevs:
1411 for pr in prevs:
1479 for pr in prevs:
1412 if pr == p:
1480 if pr == p:
1413 c.append(self.node(r))
1481 c.append(self.node(r))
1414 elif p == nullrev:
1482 elif p == nullrev:
1415 c.append(self.node(r))
1483 c.append(self.node(r))
1416 return c
1484 return c
1417
1485
1418 def commonancestorsheads(self, a, b):
1486 def commonancestorsheads(self, a, b):
1419 """calculate all the heads of the common ancestors of nodes a and b"""
1487 """calculate all the heads of the common ancestors of nodes a and b"""
1420 a, b = self.rev(a), self.rev(b)
1488 a, b = self.rev(a), self.rev(b)
1421 ancs = self._commonancestorsheads(a, b)
1489 ancs = self._commonancestorsheads(a, b)
1422 return pycompat.maplist(self.node, ancs)
1490 return pycompat.maplist(self.node, ancs)
1423
1491
1424 def _commonancestorsheads(self, *revs):
1492 def _commonancestorsheads(self, *revs):
1425 """calculate all the heads of the common ancestors of revs"""
1493 """calculate all the heads of the common ancestors of revs"""
1426 try:
1494 try:
1427 ancs = self.index.commonancestorsheads(*revs)
1495 ancs = self.index.commonancestorsheads(*revs)
1428 except (AttributeError, OverflowError): # C implementation failed
1496 except (AttributeError, OverflowError): # C implementation failed
1429 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1497 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1430 return ancs
1498 return ancs
1431
1499
1432 def isancestor(self, a, b):
1500 def isancestor(self, a, b):
1433 """return True if node a is an ancestor of node b
1501 """return True if node a is an ancestor of node b
1434
1502
1435 A revision is considered an ancestor of itself."""
1503 A revision is considered an ancestor of itself."""
1436 a, b = self.rev(a), self.rev(b)
1504 a, b = self.rev(a), self.rev(b)
1437 return self.isancestorrev(a, b)
1505 return self.isancestorrev(a, b)
1438
1506
1439 def isancestorrev(self, a, b):
1507 def isancestorrev(self, a, b):
1440 """return True if revision a is an ancestor of revision b
1508 """return True if revision a is an ancestor of revision b
1441
1509
1442 A revision is considered an ancestor of itself.
1510 A revision is considered an ancestor of itself.
1443
1511
1444 The implementation of this is trivial but the use of
1512 The implementation of this is trivial but the use of
1445 reachableroots is not."""
1513 reachableroots is not."""
1446 if a == nullrev:
1514 if a == nullrev:
1447 return True
1515 return True
1448 elif a == b:
1516 elif a == b:
1449 return True
1517 return True
1450 elif a > b:
1518 elif a > b:
1451 return False
1519 return False
1452 return bool(self.reachableroots(a, [b], [a], includepath=False))
1520 return bool(self.reachableroots(a, [b], [a], includepath=False))
1453
1521
1454 def reachableroots(self, minroot, heads, roots, includepath=False):
1522 def reachableroots(self, minroot, heads, roots, includepath=False):
1455 """return (heads(::(<roots> and <roots>::<heads>)))
1523 """return (heads(::(<roots> and <roots>::<heads>)))
1456
1524
1457 If includepath is True, return (<roots>::<heads>)."""
1525 If includepath is True, return (<roots>::<heads>)."""
1458 try:
1526 try:
1459 return self.index.reachableroots2(
1527 return self.index.reachableroots2(
1460 minroot, heads, roots, includepath
1528 minroot, heads, roots, includepath
1461 )
1529 )
1462 except AttributeError:
1530 except AttributeError:
1463 return dagop._reachablerootspure(
1531 return dagop._reachablerootspure(
1464 self.parentrevs, minroot, roots, heads, includepath
1532 self.parentrevs, minroot, roots, heads, includepath
1465 )
1533 )
1466
1534
1467 def ancestor(self, a, b):
1535 def ancestor(self, a, b):
1468 """calculate the "best" common ancestor of nodes a and b"""
1536 """calculate the "best" common ancestor of nodes a and b"""
1469
1537
1470 a, b = self.rev(a), self.rev(b)
1538 a, b = self.rev(a), self.rev(b)
1471 try:
1539 try:
1472 ancs = self.index.ancestors(a, b)
1540 ancs = self.index.ancestors(a, b)
1473 except (AttributeError, OverflowError):
1541 except (AttributeError, OverflowError):
1474 ancs = ancestor.ancestors(self.parentrevs, a, b)
1542 ancs = ancestor.ancestors(self.parentrevs, a, b)
1475 if ancs:
1543 if ancs:
1476 # choose a consistent winner when there's a tie
1544 # choose a consistent winner when there's a tie
1477 return min(map(self.node, ancs))
1545 return min(map(self.node, ancs))
1478 return self.nullid
1546 return self.nullid
1479
1547
1480 def _match(self, id):
1548 def _match(self, id):
1481 if isinstance(id, int):
1549 if isinstance(id, int):
1482 # rev
1550 # rev
1483 return self.node(id)
1551 return self.node(id)
1484 if len(id) == self.nodeconstants.nodelen:
1552 if len(id) == self.nodeconstants.nodelen:
1485 # possibly a binary node
1553 # possibly a binary node
1486 # odds of a binary node being all hex in ASCII are 1 in 10**25
1554 # odds of a binary node being all hex in ASCII are 1 in 10**25
1487 try:
1555 try:
1488 node = id
1556 node = id
1489 self.rev(node) # quick search the index
1557 self.rev(node) # quick search the index
1490 return node
1558 return node
1491 except error.LookupError:
1559 except error.LookupError:
1492 pass # may be partial hex id
1560 pass # may be partial hex id
1493 try:
1561 try:
1494 # str(rev)
1562 # str(rev)
1495 rev = int(id)
1563 rev = int(id)
1496 if b"%d" % rev != id:
1564 if b"%d" % rev != id:
1497 raise ValueError
1565 raise ValueError
1498 if rev < 0:
1566 if rev < 0:
1499 rev = len(self) + rev
1567 rev = len(self) + rev
1500 if rev < 0 or rev >= len(self):
1568 if rev < 0 or rev >= len(self):
1501 raise ValueError
1569 raise ValueError
1502 return self.node(rev)
1570 return self.node(rev)
1503 except (ValueError, OverflowError):
1571 except (ValueError, OverflowError):
1504 pass
1572 pass
1505 if len(id) == 2 * self.nodeconstants.nodelen:
1573 if len(id) == 2 * self.nodeconstants.nodelen:
1506 try:
1574 try:
1507 # a full hex nodeid?
1575 # a full hex nodeid?
1508 node = bin(id)
1576 node = bin(id)
1509 self.rev(node)
1577 self.rev(node)
1510 return node
1578 return node
1511 except (binascii.Error, error.LookupError):
1579 except (binascii.Error, error.LookupError):
1512 pass
1580 pass
1513
1581
1514 def _partialmatch(self, id):
1582 def _partialmatch(self, id):
1515 # we don't care wdirfilenodeids as they should be always full hash
1583 # we don't care wdirfilenodeids as they should be always full hash
1516 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1584 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1517 ambiguous = False
1585 ambiguous = False
1518 try:
1586 try:
1519 partial = self.index.partialmatch(id)
1587 partial = self.index.partialmatch(id)
1520 if partial and self.hasnode(partial):
1588 if partial and self.hasnode(partial):
1521 if maybewdir:
1589 if maybewdir:
1522 # single 'ff...' match in radix tree, ambiguous with wdir
1590 # single 'ff...' match in radix tree, ambiguous with wdir
1523 ambiguous = True
1591 ambiguous = True
1524 else:
1592 else:
1525 return partial
1593 return partial
1526 elif maybewdir:
1594 elif maybewdir:
1527 # no 'ff...' match in radix tree, wdir identified
1595 # no 'ff...' match in radix tree, wdir identified
1528 raise error.WdirUnsupported
1596 raise error.WdirUnsupported
1529 else:
1597 else:
1530 return None
1598 return None
1531 except error.RevlogError:
1599 except error.RevlogError:
1532 # parsers.c radix tree lookup gave multiple matches
1600 # parsers.c radix tree lookup gave multiple matches
1533 # fast path: for unfiltered changelog, radix tree is accurate
1601 # fast path: for unfiltered changelog, radix tree is accurate
1534 if not getattr(self, 'filteredrevs', None):
1602 if not getattr(self, 'filteredrevs', None):
1535 ambiguous = True
1603 ambiguous = True
1536 # fall through to slow path that filters hidden revisions
1604 # fall through to slow path that filters hidden revisions
1537 except (AttributeError, ValueError):
1605 except (AttributeError, ValueError):
1538 # we are pure python, or key is not hex
1606 # we are pure python, or key is not hex
1539 pass
1607 pass
1540 if ambiguous:
1608 if ambiguous:
1541 raise error.AmbiguousPrefixLookupError(
1609 raise error.AmbiguousPrefixLookupError(
1542 id, self.display_id, _(b'ambiguous identifier')
1610 id, self.display_id, _(b'ambiguous identifier')
1543 )
1611 )
1544
1612
1545 if id in self._pcache:
1613 if id in self._pcache:
1546 return self._pcache[id]
1614 return self._pcache[id]
1547
1615
1548 if len(id) <= 40:
1616 if len(id) <= 40:
1549 # hex(node)[:...]
1617 # hex(node)[:...]
1550 l = len(id) // 2 * 2 # grab an even number of digits
1618 l = len(id) // 2 * 2 # grab an even number of digits
1551 try:
1619 try:
1552 # we're dropping the last digit, so let's check that it's hex,
1620 # we're dropping the last digit, so let's check that it's hex,
1553 # to avoid the expensive computation below if it's not
1621 # to avoid the expensive computation below if it's not
1554 if len(id) % 2 > 0:
1622 if len(id) % 2 > 0:
1555 if not (id[-1] in hexdigits):
1623 if not (id[-1] in hexdigits):
1556 return None
1624 return None
1557 prefix = bin(id[:l])
1625 prefix = bin(id[:l])
1558 except binascii.Error:
1626 except binascii.Error:
1559 pass
1627 pass
1560 else:
1628 else:
1561 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1629 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1562 nl = [
1630 nl = [
1563 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1631 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1564 ]
1632 ]
1565 if self.nodeconstants.nullhex.startswith(id):
1633 if self.nodeconstants.nullhex.startswith(id):
1566 nl.append(self.nullid)
1634 nl.append(self.nullid)
1567 if len(nl) > 0:
1635 if len(nl) > 0:
1568 if len(nl) == 1 and not maybewdir:
1636 if len(nl) == 1 and not maybewdir:
1569 self._pcache[id] = nl[0]
1637 self._pcache[id] = nl[0]
1570 return nl[0]
1638 return nl[0]
1571 raise error.AmbiguousPrefixLookupError(
1639 raise error.AmbiguousPrefixLookupError(
1572 id, self.display_id, _(b'ambiguous identifier')
1640 id, self.display_id, _(b'ambiguous identifier')
1573 )
1641 )
1574 if maybewdir:
1642 if maybewdir:
1575 raise error.WdirUnsupported
1643 raise error.WdirUnsupported
1576 return None
1644 return None
1577
1645
1578 def lookup(self, id):
1646 def lookup(self, id):
1579 """locate a node based on:
1647 """locate a node based on:
1580 - revision number or str(revision number)
1648 - revision number or str(revision number)
1581 - nodeid or subset of hex nodeid
1649 - nodeid or subset of hex nodeid
1582 """
1650 """
1583 n = self._match(id)
1651 n = self._match(id)
1584 if n is not None:
1652 if n is not None:
1585 return n
1653 return n
1586 n = self._partialmatch(id)
1654 n = self._partialmatch(id)
1587 if n:
1655 if n:
1588 return n
1656 return n
1589
1657
1590 raise error.LookupError(id, self.display_id, _(b'no match found'))
1658 raise error.LookupError(id, self.display_id, _(b'no match found'))
1591
1659
1592 def shortest(self, node, minlength=1):
1660 def shortest(self, node, minlength=1):
1593 """Find the shortest unambiguous prefix that matches node."""
1661 """Find the shortest unambiguous prefix that matches node."""
1594
1662
1595 def isvalid(prefix):
1663 def isvalid(prefix):
1596 try:
1664 try:
1597 matchednode = self._partialmatch(prefix)
1665 matchednode = self._partialmatch(prefix)
1598 except error.AmbiguousPrefixLookupError:
1666 except error.AmbiguousPrefixLookupError:
1599 return False
1667 return False
1600 except error.WdirUnsupported:
1668 except error.WdirUnsupported:
1601 # single 'ff...' match
1669 # single 'ff...' match
1602 return True
1670 return True
1603 if matchednode is None:
1671 if matchednode is None:
1604 raise error.LookupError(node, self.display_id, _(b'no node'))
1672 raise error.LookupError(node, self.display_id, _(b'no node'))
1605 return True
1673 return True
1606
1674
1607 def maybewdir(prefix):
1675 def maybewdir(prefix):
1608 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1676 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1609
1677
1610 hexnode = hex(node)
1678 hexnode = hex(node)
1611
1679
1612 def disambiguate(hexnode, minlength):
1680 def disambiguate(hexnode, minlength):
1613 """Disambiguate against wdirid."""
1681 """Disambiguate against wdirid."""
1614 for length in range(minlength, len(hexnode) + 1):
1682 for length in range(minlength, len(hexnode) + 1):
1615 prefix = hexnode[:length]
1683 prefix = hexnode[:length]
1616 if not maybewdir(prefix):
1684 if not maybewdir(prefix):
1617 return prefix
1685 return prefix
1618
1686
1619 if not getattr(self, 'filteredrevs', None):
1687 if not getattr(self, 'filteredrevs', None):
1620 try:
1688 try:
1621 length = max(self.index.shortest(node), minlength)
1689 length = max(self.index.shortest(node), minlength)
1622 return disambiguate(hexnode, length)
1690 return disambiguate(hexnode, length)
1623 except error.RevlogError:
1691 except error.RevlogError:
1624 if node != self.nodeconstants.wdirid:
1692 if node != self.nodeconstants.wdirid:
1625 raise error.LookupError(
1693 raise error.LookupError(
1626 node, self.display_id, _(b'no node')
1694 node, self.display_id, _(b'no node')
1627 )
1695 )
1628 except AttributeError:
1696 except AttributeError:
1629 # Fall through to pure code
1697 # Fall through to pure code
1630 pass
1698 pass
1631
1699
1632 if node == self.nodeconstants.wdirid:
1700 if node == self.nodeconstants.wdirid:
1633 for length in range(minlength, len(hexnode) + 1):
1701 for length in range(minlength, len(hexnode) + 1):
1634 prefix = hexnode[:length]
1702 prefix = hexnode[:length]
1635 if isvalid(prefix):
1703 if isvalid(prefix):
1636 return prefix
1704 return prefix
1637
1705
1638 for length in range(minlength, len(hexnode) + 1):
1706 for length in range(minlength, len(hexnode) + 1):
1639 prefix = hexnode[:length]
1707 prefix = hexnode[:length]
1640 if isvalid(prefix):
1708 if isvalid(prefix):
1641 return disambiguate(hexnode, length)
1709 return disambiguate(hexnode, length)
1642
1710
1643 def cmp(self, node, text):
1711 def cmp(self, node, text):
1644 """compare text with a given file revision
1712 """compare text with a given file revision
1645
1713
1646 returns True if text is different than what is stored.
1714 returns True if text is different than what is stored.
1647 """
1715 """
1648 p1, p2 = self.parents(node)
1716 p1, p2 = self.parents(node)
1649 return storageutil.hashrevisionsha1(text, p1, p2) != node
1717 return storageutil.hashrevisionsha1(text, p1, p2) != node
1650
1718
1651 def _getsegmentforrevs(self, startrev, endrev, df=None):
1719 def _getsegmentforrevs(self, startrev, endrev, df=None):
1652 """Obtain a segment of raw data corresponding to a range of revisions.
1720 """Obtain a segment of raw data corresponding to a range of revisions.
1653
1721
1654 Accepts the start and end revisions and an optional already-open
1722 Accepts the start and end revisions and an optional already-open
1655 file handle to be used for reading. If the file handle is read, its
1723 file handle to be used for reading. If the file handle is read, its
1656 seek position will not be preserved.
1724 seek position will not be preserved.
1657
1725
1658 Requests for data may be satisfied by a cache.
1726 Requests for data may be satisfied by a cache.
1659
1727
1660 Returns a 2-tuple of (offset, data) for the requested range of
1728 Returns a 2-tuple of (offset, data) for the requested range of
1661 revisions. Offset is the integer offset from the beginning of the
1729 revisions. Offset is the integer offset from the beginning of the
1662 revlog and data is a str or buffer of the raw byte data.
1730 revlog and data is a str or buffer of the raw byte data.
1663
1731
1664 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1732 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1665 to determine where each revision's data begins and ends.
1733 to determine where each revision's data begins and ends.
1666 """
1734 """
1667 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1735 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1668 # (functions are expensive).
1736 # (functions are expensive).
1669 index = self.index
1737 index = self.index
1670 istart = index[startrev]
1738 istart = index[startrev]
1671 start = int(istart[0] >> 16)
1739 start = int(istart[0] >> 16)
1672 if startrev == endrev:
1740 if startrev == endrev:
1673 end = start + istart[1]
1741 end = start + istart[1]
1674 else:
1742 else:
1675 iend = index[endrev]
1743 iend = index[endrev]
1676 end = int(iend[0] >> 16) + iend[1]
1744 end = int(iend[0] >> 16) + iend[1]
1677
1745
1678 if self._inline:
1746 if self._inline:
1679 start += (startrev + 1) * self.index.entry_size
1747 start += (startrev + 1) * self.index.entry_size
1680 end += (endrev + 1) * self.index.entry_size
1748 end += (endrev + 1) * self.index.entry_size
1681 length = end - start
1749 length = end - start
1682
1750
1683 return start, self._segmentfile.read_chunk(start, length, df)
1751 return start, self._segmentfile.read_chunk(start, length, df)
1684
1752
1685 def _chunk(self, rev, df=None):
1753 def _chunk(self, rev, df=None):
1686 """Obtain a single decompressed chunk for a revision.
1754 """Obtain a single decompressed chunk for a revision.
1687
1755
1688 Accepts an integer revision and an optional already-open file handle
1756 Accepts an integer revision and an optional already-open file handle
1689 to be used for reading. If used, the seek position of the file will not
1757 to be used for reading. If used, the seek position of the file will not
1690 be preserved.
1758 be preserved.
1691
1759
1692 Returns a str holding uncompressed data for the requested revision.
1760 Returns a str holding uncompressed data for the requested revision.
1693 """
1761 """
1694 compression_mode = self.index[rev][10]
1762 compression_mode = self.index[rev][10]
1695 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1763 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1696 if compression_mode == COMP_MODE_PLAIN:
1764 if compression_mode == COMP_MODE_PLAIN:
1697 return data
1765 return data
1698 elif compression_mode == COMP_MODE_DEFAULT:
1766 elif compression_mode == COMP_MODE_DEFAULT:
1699 return self._decompressor(data)
1767 return self._decompressor(data)
1700 elif compression_mode == COMP_MODE_INLINE:
1768 elif compression_mode == COMP_MODE_INLINE:
1701 return self.decompress(data)
1769 return self.decompress(data)
1702 else:
1770 else:
1703 msg = b'unknown compression mode %d'
1771 msg = b'unknown compression mode %d'
1704 msg %= compression_mode
1772 msg %= compression_mode
1705 raise error.RevlogError(msg)
1773 raise error.RevlogError(msg)
1706
1774
1707 def _chunks(self, revs, df=None, targetsize=None):
1775 def _chunks(self, revs, df=None, targetsize=None):
1708 """Obtain decompressed chunks for the specified revisions.
1776 """Obtain decompressed chunks for the specified revisions.
1709
1777
1710 Accepts an iterable of numeric revisions that are assumed to be in
1778 Accepts an iterable of numeric revisions that are assumed to be in
1711 ascending order. Also accepts an optional already-open file handle
1779 ascending order. Also accepts an optional already-open file handle
1712 to be used for reading. If used, the seek position of the file will
1780 to be used for reading. If used, the seek position of the file will
1713 not be preserved.
1781 not be preserved.
1714
1782
1715 This function is similar to calling ``self._chunk()`` multiple times,
1783 This function is similar to calling ``self._chunk()`` multiple times,
1716 but is faster.
1784 but is faster.
1717
1785
1718 Returns a list with decompressed data for each requested revision.
1786 Returns a list with decompressed data for each requested revision.
1719 """
1787 """
1720 if not revs:
1788 if not revs:
1721 return []
1789 return []
1722 start = self.start
1790 start = self.start
1723 length = self.length
1791 length = self.length
1724 inline = self._inline
1792 inline = self._inline
1725 iosize = self.index.entry_size
1793 iosize = self.index.entry_size
1726 buffer = util.buffer
1794 buffer = util.buffer
1727
1795
1728 l = []
1796 l = []
1729 ladd = l.append
1797 ladd = l.append
1730
1798
1731 if not self._withsparseread:
1799 if not self._withsparseread:
1732 slicedchunks = (revs,)
1800 slicedchunks = (revs,)
1733 else:
1801 else:
1734 slicedchunks = deltautil.slicechunk(
1802 slicedchunks = deltautil.slicechunk(
1735 self, revs, targetsize=targetsize
1803 self, revs, targetsize=targetsize
1736 )
1804 )
1737
1805
1738 for revschunk in slicedchunks:
1806 for revschunk in slicedchunks:
1739 firstrev = revschunk[0]
1807 firstrev = revschunk[0]
1740 # Skip trailing revisions with empty diff
1808 # Skip trailing revisions with empty diff
1741 for lastrev in revschunk[::-1]:
1809 for lastrev in revschunk[::-1]:
1742 if length(lastrev) != 0:
1810 if length(lastrev) != 0:
1743 break
1811 break
1744
1812
1745 try:
1813 try:
1746 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1814 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1747 except OverflowError:
1815 except OverflowError:
1748 # issue4215 - we can't cache a run of chunks greater than
1816 # issue4215 - we can't cache a run of chunks greater than
1749 # 2G on Windows
1817 # 2G on Windows
1750 return [self._chunk(rev, df=df) for rev in revschunk]
1818 return [self._chunk(rev, df=df) for rev in revschunk]
1751
1819
1752 decomp = self.decompress
1820 decomp = self.decompress
1753 # self._decompressor might be None, but will not be used in that case
1821 # self._decompressor might be None, but will not be used in that case
1754 def_decomp = self._decompressor
1822 def_decomp = self._decompressor
1755 for rev in revschunk:
1823 for rev in revschunk:
1756 chunkstart = start(rev)
1824 chunkstart = start(rev)
1757 if inline:
1825 if inline:
1758 chunkstart += (rev + 1) * iosize
1826 chunkstart += (rev + 1) * iosize
1759 chunklength = length(rev)
1827 chunklength = length(rev)
1760 comp_mode = self.index[rev][10]
1828 comp_mode = self.index[rev][10]
1761 c = buffer(data, chunkstart - offset, chunklength)
1829 c = buffer(data, chunkstart - offset, chunklength)
1762 if comp_mode == COMP_MODE_PLAIN:
1830 if comp_mode == COMP_MODE_PLAIN:
1763 ladd(c)
1831 ladd(c)
1764 elif comp_mode == COMP_MODE_INLINE:
1832 elif comp_mode == COMP_MODE_INLINE:
1765 ladd(decomp(c))
1833 ladd(decomp(c))
1766 elif comp_mode == COMP_MODE_DEFAULT:
1834 elif comp_mode == COMP_MODE_DEFAULT:
1767 ladd(def_decomp(c))
1835 ladd(def_decomp(c))
1768 else:
1836 else:
1769 msg = b'unknown compression mode %d'
1837 msg = b'unknown compression mode %d'
1770 msg %= comp_mode
1838 msg %= comp_mode
1771 raise error.RevlogError(msg)
1839 raise error.RevlogError(msg)
1772
1840
1773 return l
1841 return l
1774
1842
1775 def deltaparent(self, rev):
1843 def deltaparent(self, rev):
1776 """return deltaparent of the given revision"""
1844 """return deltaparent of the given revision"""
1777 base = self.index[rev][3]
1845 base = self.index[rev][3]
1778 if base == rev:
1846 if base == rev:
1779 return nullrev
1847 return nullrev
1780 elif self._generaldelta:
1848 elif self._generaldelta:
1781 return base
1849 return base
1782 else:
1850 else:
1783 return rev - 1
1851 return rev - 1
1784
1852
1785 def issnapshot(self, rev):
1853 def issnapshot(self, rev):
1786 """tells whether rev is a snapshot"""
1854 """tells whether rev is a snapshot"""
1787 if not self._sparserevlog:
1855 if not self._sparserevlog:
1788 return self.deltaparent(rev) == nullrev
1856 return self.deltaparent(rev) == nullrev
1789 elif util.safehasattr(self.index, 'issnapshot'):
1857 elif util.safehasattr(self.index, 'issnapshot'):
1790 # directly assign the method to cache the testing and access
1858 # directly assign the method to cache the testing and access
1791 self.issnapshot = self.index.issnapshot
1859 self.issnapshot = self.index.issnapshot
1792 return self.issnapshot(rev)
1860 return self.issnapshot(rev)
1793 if rev == nullrev:
1861 if rev == nullrev:
1794 return True
1862 return True
1795 entry = self.index[rev]
1863 entry = self.index[rev]
1796 base = entry[3]
1864 base = entry[3]
1797 if base == rev:
1865 if base == rev:
1798 return True
1866 return True
1799 if base == nullrev:
1867 if base == nullrev:
1800 return True
1868 return True
1801 p1 = entry[5]
1869 p1 = entry[5]
1802 while self.length(p1) == 0:
1870 while self.length(p1) == 0:
1803 b = self.deltaparent(p1)
1871 b = self.deltaparent(p1)
1804 if b == p1:
1872 if b == p1:
1805 break
1873 break
1806 p1 = b
1874 p1 = b
1807 p2 = entry[6]
1875 p2 = entry[6]
1808 while self.length(p2) == 0:
1876 while self.length(p2) == 0:
1809 b = self.deltaparent(p2)
1877 b = self.deltaparent(p2)
1810 if b == p2:
1878 if b == p2:
1811 break
1879 break
1812 p2 = b
1880 p2 = b
1813 if base == p1 or base == p2:
1881 if base == p1 or base == p2:
1814 return False
1882 return False
1815 return self.issnapshot(base)
1883 return self.issnapshot(base)
1816
1884
1817 def snapshotdepth(self, rev):
1885 def snapshotdepth(self, rev):
1818 """number of snapshot in the chain before this one"""
1886 """number of snapshot in the chain before this one"""
1819 if not self.issnapshot(rev):
1887 if not self.issnapshot(rev):
1820 raise error.ProgrammingError(b'revision %d not a snapshot')
1888 raise error.ProgrammingError(b'revision %d not a snapshot')
1821 return len(self._deltachain(rev)[0]) - 1
1889 return len(self._deltachain(rev)[0]) - 1
1822
1890
1823 def revdiff(self, rev1, rev2):
1891 def revdiff(self, rev1, rev2):
1824 """return or calculate a delta between two revisions
1892 """return or calculate a delta between two revisions
1825
1893
1826 The delta calculated is in binary form and is intended to be written to
1894 The delta calculated is in binary form and is intended to be written to
1827 revlog data directly. So this function needs raw revision data.
1895 revlog data directly. So this function needs raw revision data.
1828 """
1896 """
1829 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1897 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1830 return bytes(self._chunk(rev2))
1898 return bytes(self._chunk(rev2))
1831
1899
1832 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1900 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1833
1901
1834 def revision(self, nodeorrev, _df=None):
1902 def revision(self, nodeorrev, _df=None):
1835 """return an uncompressed revision of a given node or revision
1903 """return an uncompressed revision of a given node or revision
1836 number.
1904 number.
1837
1905
1838 _df - an existing file handle to read from. (internal-only)
1906 _df - an existing file handle to read from. (internal-only)
1839 """
1907 """
1840 return self._revisiondata(nodeorrev, _df)
1908 return self._revisiondata(nodeorrev, _df)
1841
1909
1842 def sidedata(self, nodeorrev, _df=None):
1910 def sidedata(self, nodeorrev, _df=None):
1843 """a map of extra data related to the changeset but not part of the hash
1911 """a map of extra data related to the changeset but not part of the hash
1844
1912
1845 This function currently return a dictionary. However, more advanced
1913 This function currently return a dictionary. However, more advanced
1846 mapping object will likely be used in the future for a more
1914 mapping object will likely be used in the future for a more
1847 efficient/lazy code.
1915 efficient/lazy code.
1848 """
1916 """
1849 # deal with <nodeorrev> argument type
1917 # deal with <nodeorrev> argument type
1850 if isinstance(nodeorrev, int):
1918 if isinstance(nodeorrev, int):
1851 rev = nodeorrev
1919 rev = nodeorrev
1852 else:
1920 else:
1853 rev = self.rev(nodeorrev)
1921 rev = self.rev(nodeorrev)
1854 return self._sidedata(rev)
1922 return self._sidedata(rev)
1855
1923
1856 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1924 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1857 # deal with <nodeorrev> argument type
1925 # deal with <nodeorrev> argument type
1858 if isinstance(nodeorrev, int):
1926 if isinstance(nodeorrev, int):
1859 rev = nodeorrev
1927 rev = nodeorrev
1860 node = self.node(rev)
1928 node = self.node(rev)
1861 else:
1929 else:
1862 node = nodeorrev
1930 node = nodeorrev
1863 rev = None
1931 rev = None
1864
1932
1865 # fast path the special `nullid` rev
1933 # fast path the special `nullid` rev
1866 if node == self.nullid:
1934 if node == self.nullid:
1867 return b""
1935 return b""
1868
1936
1869 # ``rawtext`` is the text as stored inside the revlog. Might be the
1937 # ``rawtext`` is the text as stored inside the revlog. Might be the
1870 # revision or might need to be processed to retrieve the revision.
1938 # revision or might need to be processed to retrieve the revision.
1871 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1939 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1872
1940
1873 if raw and validated:
1941 if raw and validated:
1874 # if we don't want to process the raw text and that raw
1942 # if we don't want to process the raw text and that raw
1875 # text is cached, we can exit early.
1943 # text is cached, we can exit early.
1876 return rawtext
1944 return rawtext
1877 if rev is None:
1945 if rev is None:
1878 rev = self.rev(node)
1946 rev = self.rev(node)
1879 # the revlog's flag for this revision
1947 # the revlog's flag for this revision
1880 # (usually alter its state or content)
1948 # (usually alter its state or content)
1881 flags = self.flags(rev)
1949 flags = self.flags(rev)
1882
1950
1883 if validated and flags == REVIDX_DEFAULT_FLAGS:
1951 if validated and flags == REVIDX_DEFAULT_FLAGS:
1884 # no extra flags set, no flag processor runs, text = rawtext
1952 # no extra flags set, no flag processor runs, text = rawtext
1885 return rawtext
1953 return rawtext
1886
1954
1887 if raw:
1955 if raw:
1888 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1956 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1889 text = rawtext
1957 text = rawtext
1890 else:
1958 else:
1891 r = flagutil.processflagsread(self, rawtext, flags)
1959 r = flagutil.processflagsread(self, rawtext, flags)
1892 text, validatehash = r
1960 text, validatehash = r
1893 if validatehash:
1961 if validatehash:
1894 self.checkhash(text, node, rev=rev)
1962 self.checkhash(text, node, rev=rev)
1895 if not validated:
1963 if not validated:
1896 self._revisioncache = (node, rev, rawtext)
1964 self._revisioncache = (node, rev, rawtext)
1897
1965
1898 return text
1966 return text
1899
1967
1900 def _rawtext(self, node, rev, _df=None):
1968 def _rawtext(self, node, rev, _df=None):
1901 """return the possibly unvalidated rawtext for a revision
1969 """return the possibly unvalidated rawtext for a revision
1902
1970
1903 returns (rev, rawtext, validated)
1971 returns (rev, rawtext, validated)
1904 """
1972 """
1905
1973
1906 # revision in the cache (could be useful to apply delta)
1974 # revision in the cache (could be useful to apply delta)
1907 cachedrev = None
1975 cachedrev = None
1908 # An intermediate text to apply deltas to
1976 # An intermediate text to apply deltas to
1909 basetext = None
1977 basetext = None
1910
1978
1911 # Check if we have the entry in cache
1979 # Check if we have the entry in cache
1912 # The cache entry looks like (node, rev, rawtext)
1980 # The cache entry looks like (node, rev, rawtext)
1913 if self._revisioncache:
1981 if self._revisioncache:
1914 if self._revisioncache[0] == node:
1982 if self._revisioncache[0] == node:
1915 return (rev, self._revisioncache[2], True)
1983 return (rev, self._revisioncache[2], True)
1916 cachedrev = self._revisioncache[1]
1984 cachedrev = self._revisioncache[1]
1917
1985
1918 if rev is None:
1986 if rev is None:
1919 rev = self.rev(node)
1987 rev = self.rev(node)
1920
1988
1921 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1989 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1922 if stopped:
1990 if stopped:
1923 basetext = self._revisioncache[2]
1991 basetext = self._revisioncache[2]
1924
1992
1925 # drop cache to save memory, the caller is expected to
1993 # drop cache to save memory, the caller is expected to
1926 # update self._revisioncache after validating the text
1994 # update self._revisioncache after validating the text
1927 self._revisioncache = None
1995 self._revisioncache = None
1928
1996
1929 targetsize = None
1997 targetsize = None
1930 rawsize = self.index[rev][2]
1998 rawsize = self.index[rev][2]
1931 if 0 <= rawsize:
1999 if 0 <= rawsize:
1932 targetsize = 4 * rawsize
2000 targetsize = 4 * rawsize
1933
2001
1934 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2002 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1935 if basetext is None:
2003 if basetext is None:
1936 basetext = bytes(bins[0])
2004 basetext = bytes(bins[0])
1937 bins = bins[1:]
2005 bins = bins[1:]
1938
2006
1939 rawtext = mdiff.patches(basetext, bins)
2007 rawtext = mdiff.patches(basetext, bins)
1940 del basetext # let us have a chance to free memory early
2008 del basetext # let us have a chance to free memory early
1941 return (rev, rawtext, False)
2009 return (rev, rawtext, False)
1942
2010
1943 def _sidedata(self, rev):
2011 def _sidedata(self, rev):
1944 """Return the sidedata for a given revision number."""
2012 """Return the sidedata for a given revision number."""
1945 index_entry = self.index[rev]
2013 index_entry = self.index[rev]
1946 sidedata_offset = index_entry[8]
2014 sidedata_offset = index_entry[8]
1947 sidedata_size = index_entry[9]
2015 sidedata_size = index_entry[9]
1948
2016
1949 if self._inline:
2017 if self._inline:
1950 sidedata_offset += self.index.entry_size * (1 + rev)
2018 sidedata_offset += self.index.entry_size * (1 + rev)
1951 if sidedata_size == 0:
2019 if sidedata_size == 0:
1952 return {}
2020 return {}
1953
2021
1954 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2022 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1955 filename = self._sidedatafile
2023 filename = self._sidedatafile
1956 end = self._docket.sidedata_end
2024 end = self._docket.sidedata_end
1957 offset = sidedata_offset
2025 offset = sidedata_offset
1958 length = sidedata_size
2026 length = sidedata_size
1959 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2027 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1960 raise error.RevlogError(m)
2028 raise error.RevlogError(m)
1961
2029
1962 comp_segment = self._segmentfile_sidedata.read_chunk(
2030 comp_segment = self._segmentfile_sidedata.read_chunk(
1963 sidedata_offset, sidedata_size
2031 sidedata_offset, sidedata_size
1964 )
2032 )
1965
2033
1966 comp = self.index[rev][11]
2034 comp = self.index[rev][11]
1967 if comp == COMP_MODE_PLAIN:
2035 if comp == COMP_MODE_PLAIN:
1968 segment = comp_segment
2036 segment = comp_segment
1969 elif comp == COMP_MODE_DEFAULT:
2037 elif comp == COMP_MODE_DEFAULT:
1970 segment = self._decompressor(comp_segment)
2038 segment = self._decompressor(comp_segment)
1971 elif comp == COMP_MODE_INLINE:
2039 elif comp == COMP_MODE_INLINE:
1972 segment = self.decompress(comp_segment)
2040 segment = self.decompress(comp_segment)
1973 else:
2041 else:
1974 msg = b'unknown compression mode %d'
2042 msg = b'unknown compression mode %d'
1975 msg %= comp
2043 msg %= comp
1976 raise error.RevlogError(msg)
2044 raise error.RevlogError(msg)
1977
2045
1978 sidedata = sidedatautil.deserialize_sidedata(segment)
2046 sidedata = sidedatautil.deserialize_sidedata(segment)
1979 return sidedata
2047 return sidedata
1980
2048
1981 def rawdata(self, nodeorrev, _df=None):
2049 def rawdata(self, nodeorrev, _df=None):
1982 """return an uncompressed raw data of a given node or revision number.
2050 """return an uncompressed raw data of a given node or revision number.
1983
2051
1984 _df - an existing file handle to read from. (internal-only)
2052 _df - an existing file handle to read from. (internal-only)
1985 """
2053 """
1986 return self._revisiondata(nodeorrev, _df, raw=True)
2054 return self._revisiondata(nodeorrev, _df, raw=True)
1987
2055
1988 def hash(self, text, p1, p2):
2056 def hash(self, text, p1, p2):
1989 """Compute a node hash.
2057 """Compute a node hash.
1990
2058
1991 Available as a function so that subclasses can replace the hash
2059 Available as a function so that subclasses can replace the hash
1992 as needed.
2060 as needed.
1993 """
2061 """
1994 return storageutil.hashrevisionsha1(text, p1, p2)
2062 return storageutil.hashrevisionsha1(text, p1, p2)
1995
2063
1996 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2064 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1997 """Check node hash integrity.
2065 """Check node hash integrity.
1998
2066
1999 Available as a function so that subclasses can extend hash mismatch
2067 Available as a function so that subclasses can extend hash mismatch
2000 behaviors as needed.
2068 behaviors as needed.
2001 """
2069 """
2002 try:
2070 try:
2003 if p1 is None and p2 is None:
2071 if p1 is None and p2 is None:
2004 p1, p2 = self.parents(node)
2072 p1, p2 = self.parents(node)
2005 if node != self.hash(text, p1, p2):
2073 if node != self.hash(text, p1, p2):
2006 # Clear the revision cache on hash failure. The revision cache
2074 # Clear the revision cache on hash failure. The revision cache
2007 # only stores the raw revision and clearing the cache does have
2075 # only stores the raw revision and clearing the cache does have
2008 # the side-effect that we won't have a cache hit when the raw
2076 # the side-effect that we won't have a cache hit when the raw
2009 # revision data is accessed. But this case should be rare and
2077 # revision data is accessed. But this case should be rare and
2010 # it is extra work to teach the cache about the hash
2078 # it is extra work to teach the cache about the hash
2011 # verification state.
2079 # verification state.
2012 if self._revisioncache and self._revisioncache[0] == node:
2080 if self._revisioncache and self._revisioncache[0] == node:
2013 self._revisioncache = None
2081 self._revisioncache = None
2014
2082
2015 revornode = rev
2083 revornode = rev
2016 if revornode is None:
2084 if revornode is None:
2017 revornode = templatefilters.short(hex(node))
2085 revornode = templatefilters.short(hex(node))
2018 raise error.RevlogError(
2086 raise error.RevlogError(
2019 _(b"integrity check failed on %s:%s")
2087 _(b"integrity check failed on %s:%s")
2020 % (self.display_id, pycompat.bytestr(revornode))
2088 % (self.display_id, pycompat.bytestr(revornode))
2021 )
2089 )
2022 except error.RevlogError:
2090 except error.RevlogError:
2023 if self._censorable and storageutil.iscensoredtext(text):
2091 if self._censorable and storageutil.iscensoredtext(text):
2024 raise error.CensoredNodeError(self.display_id, node, text)
2092 raise error.CensoredNodeError(self.display_id, node, text)
2025 raise
2093 raise
2026
2094
2027 def _enforceinlinesize(self, tr, side_write=True):
2095 def _enforceinlinesize(self, tr, side_write=True):
2028 """Check if the revlog is too big for inline and convert if so.
2096 """Check if the revlog is too big for inline and convert if so.
2029
2097
2030 This should be called after revisions are added to the revlog. If the
2098 This should be called after revisions are added to the revlog. If the
2031 revlog has grown too large to be an inline revlog, it will convert it
2099 revlog has grown too large to be an inline revlog, it will convert it
2032 to use multiple index and data files.
2100 to use multiple index and data files.
2033 """
2101 """
2034 tiprev = len(self) - 1
2102 tiprev = len(self) - 1
2035 total_size = self.start(tiprev) + self.length(tiprev)
2103 total_size = self.start(tiprev) + self.length(tiprev)
2036 if not self._inline or total_size < _maxinline:
2104 if not self._inline or total_size < _maxinline:
2037 return
2105 return
2038
2106
2039 troffset = tr.findoffset(self._indexfile)
2107 troffset = tr.findoffset(self._indexfile)
2040 if troffset is None:
2108 if troffset is None:
2041 raise error.RevlogError(
2109 raise error.RevlogError(
2042 _(b"%s not found in the transaction") % self._indexfile
2110 _(b"%s not found in the transaction") % self._indexfile
2043 )
2111 )
2044 if troffset:
2112 if troffset:
2045 tr.addbackup(self._indexfile, for_offset=True)
2113 tr.addbackup(self._indexfile, for_offset=True)
2046 tr.add(self._datafile, 0)
2114 tr.add(self._datafile, 0)
2047
2115
2048 existing_handles = False
2116 existing_handles = False
2049 if self._writinghandles is not None:
2117 if self._writinghandles is not None:
2050 existing_handles = True
2118 existing_handles = True
2051 fp = self._writinghandles[0]
2119 fp = self._writinghandles[0]
2052 fp.flush()
2120 fp.flush()
2053 fp.close()
2121 fp.close()
2054 # We can't use the cached file handle after close(). So prevent
2122 # We can't use the cached file handle after close(). So prevent
2055 # its usage.
2123 # its usage.
2056 self._writinghandles = None
2124 self._writinghandles = None
2057 self._segmentfile.writing_handle = None
2125 self._segmentfile.writing_handle = None
2058 # No need to deal with sidedata writing handle as it is only
2126 # No need to deal with sidedata writing handle as it is only
2059 # relevant with revlog-v2 which is never inline, not reaching
2127 # relevant with revlog-v2 which is never inline, not reaching
2060 # this code
2128 # this code
2061 if side_write:
2129 if side_write:
2062 old_index_file_path = self._indexfile
2130 old_index_file_path = self._indexfile
2063 new_index_file_path = self._indexfile + b'.s'
2131 new_index_file_path = self._indexfile + b'.s'
2064 opener = self.opener
2132 opener = self.opener
2065 weak_self = weakref.ref(self)
2133 weak_self = weakref.ref(self)
2066
2134
2067 # the "split" index replace the real index when the transaction is finalized
2135 # the "split" index replace the real index when the transaction is finalized
2068 def finalize_callback(tr):
2136 def finalize_callback(tr):
2069 opener.rename(
2137 opener.rename(
2070 new_index_file_path,
2138 new_index_file_path,
2071 old_index_file_path,
2139 old_index_file_path,
2072 checkambig=True,
2140 checkambig=True,
2073 )
2141 )
2074 maybe_self = weak_self()
2142 maybe_self = weak_self()
2075 if maybe_self is not None:
2143 if maybe_self is not None:
2076 maybe_self._indexfile = old_index_file_path
2144 maybe_self._indexfile = old_index_file_path
2077
2145
2078 def abort_callback(tr):
2146 def abort_callback(tr):
2079 maybe_self = weak_self()
2147 maybe_self = weak_self()
2080 if maybe_self is not None:
2148 if maybe_self is not None:
2081 maybe_self._indexfile = old_index_file_path
2149 maybe_self._indexfile = old_index_file_path
2082
2150
2083 tr.registertmp(new_index_file_path)
2151 tr.registertmp(new_index_file_path)
2084 if self.target[1] is not None:
2152 if self.target[1] is not None:
2085 callback_id = b'000-revlog-split-%d-%s' % self.target
2153 callback_id = b'000-revlog-split-%d-%s' % self.target
2086 else:
2154 else:
2087 callback_id = b'000-revlog-split-%d' % self.target[0]
2155 callback_id = b'000-revlog-split-%d' % self.target[0]
2088 tr.addfinalize(callback_id, finalize_callback)
2156 tr.addfinalize(callback_id, finalize_callback)
2089 tr.addabort(callback_id, abort_callback)
2157 tr.addabort(callback_id, abort_callback)
2090
2158
2091 new_dfh = self._datafp(b'w+')
2159 new_dfh = self._datafp(b'w+')
2092 new_dfh.truncate(0) # drop any potentially existing data
2160 new_dfh.truncate(0) # drop any potentially existing data
2093 try:
2161 try:
2094 with self._indexfp() as read_ifh:
2162 with self._indexfp() as read_ifh:
2095 for r in self:
2163 for r in self:
2096 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2164 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2097 new_dfh.flush()
2165 new_dfh.flush()
2098
2166
2099 if side_write:
2167 if side_write:
2100 self._indexfile = new_index_file_path
2168 self._indexfile = new_index_file_path
2101 with self.__index_new_fp() as fp:
2169 with self.__index_new_fp() as fp:
2102 self._format_flags &= ~FLAG_INLINE_DATA
2170 self._format_flags &= ~FLAG_INLINE_DATA
2103 self._inline = False
2171 self._inline = False
2104 for i in self:
2172 for i in self:
2105 e = self.index.entry_binary(i)
2173 e = self.index.entry_binary(i)
2106 if i == 0 and self._docket is None:
2174 if i == 0 and self._docket is None:
2107 header = self._format_flags | self._format_version
2175 header = self._format_flags | self._format_version
2108 header = self.index.pack_header(header)
2176 header = self.index.pack_header(header)
2109 e = header + e
2177 e = header + e
2110 fp.write(e)
2178 fp.write(e)
2111 if self._docket is not None:
2179 if self._docket is not None:
2112 self._docket.index_end = fp.tell()
2180 self._docket.index_end = fp.tell()
2113
2181
2114 # If we don't use side-write, the temp file replace the real
2182 # If we don't use side-write, the temp file replace the real
2115 # index when we exit the context manager
2183 # index when we exit the context manager
2116
2184
2117 nodemaputil.setup_persistent_nodemap(tr, self)
2185 nodemaputil.setup_persistent_nodemap(tr, self)
2118 self._segmentfile = randomaccessfile.randomaccessfile(
2186 self._segmentfile = randomaccessfile.randomaccessfile(
2119 self.opener,
2187 self.opener,
2120 self._datafile,
2188 self._datafile,
2121 self._chunkcachesize,
2189 self._chunkcachesize,
2122 )
2190 )
2123
2191
2124 if existing_handles:
2192 if existing_handles:
2125 # switched from inline to conventional reopen the index
2193 # switched from inline to conventional reopen the index
2126 ifh = self.__index_write_fp()
2194 ifh = self.__index_write_fp()
2127 self._writinghandles = (ifh, new_dfh, None)
2195 self._writinghandles = (ifh, new_dfh, None)
2128 self._segmentfile.writing_handle = new_dfh
2196 self._segmentfile.writing_handle = new_dfh
2129 new_dfh = None
2197 new_dfh = None
2130 # No need to deal with sidedata writing handle as it is only
2198 # No need to deal with sidedata writing handle as it is only
2131 # relevant with revlog-v2 which is never inline, not reaching
2199 # relevant with revlog-v2 which is never inline, not reaching
2132 # this code
2200 # this code
2133 finally:
2201 finally:
2134 if new_dfh is not None:
2202 if new_dfh is not None:
2135 new_dfh.close()
2203 new_dfh.close()
2136
2204
2137 def _nodeduplicatecallback(self, transaction, node):
2205 def _nodeduplicatecallback(self, transaction, node):
2138 """called when trying to add a node already stored."""
2206 """called when trying to add a node already stored."""
2139
2207
2140 @contextlib.contextmanager
2208 @contextlib.contextmanager
2141 def reading(self):
2209 def reading(self):
2142 """Context manager that keeps data and sidedata files open for reading"""
2210 """Context manager that keeps data and sidedata files open for reading"""
2143 with self._segmentfile.reading():
2211 with self._segmentfile.reading():
2144 with self._segmentfile_sidedata.reading():
2212 with self._segmentfile_sidedata.reading():
2145 yield
2213 yield
2146
2214
2147 @contextlib.contextmanager
2215 @contextlib.contextmanager
2148 def _writing(self, transaction):
2216 def _writing(self, transaction):
2149 if self._trypending:
2217 if self._trypending:
2150 msg = b'try to write in a `trypending` revlog: %s'
2218 msg = b'try to write in a `trypending` revlog: %s'
2151 msg %= self.display_id
2219 msg %= self.display_id
2152 raise error.ProgrammingError(msg)
2220 raise error.ProgrammingError(msg)
2153 if self._writinghandles is not None:
2221 if self._writinghandles is not None:
2154 yield
2222 yield
2155 else:
2223 else:
2156 ifh = dfh = sdfh = None
2224 ifh = dfh = sdfh = None
2157 try:
2225 try:
2158 r = len(self)
2226 r = len(self)
2159 # opening the data file.
2227 # opening the data file.
2160 dsize = 0
2228 dsize = 0
2161 if r:
2229 if r:
2162 dsize = self.end(r - 1)
2230 dsize = self.end(r - 1)
2163 dfh = None
2231 dfh = None
2164 if not self._inline:
2232 if not self._inline:
2165 try:
2233 try:
2166 dfh = self._datafp(b"r+")
2234 dfh = self._datafp(b"r+")
2167 if self._docket is None:
2235 if self._docket is None:
2168 dfh.seek(0, os.SEEK_END)
2236 dfh.seek(0, os.SEEK_END)
2169 else:
2237 else:
2170 dfh.seek(self._docket.data_end, os.SEEK_SET)
2238 dfh.seek(self._docket.data_end, os.SEEK_SET)
2171 except FileNotFoundError:
2239 except FileNotFoundError:
2172 dfh = self._datafp(b"w+")
2240 dfh = self._datafp(b"w+")
2173 transaction.add(self._datafile, dsize)
2241 transaction.add(self._datafile, dsize)
2174 if self._sidedatafile is not None:
2242 if self._sidedatafile is not None:
2175 # revlog-v2 does not inline, help Pytype
2243 # revlog-v2 does not inline, help Pytype
2176 assert dfh is not None
2244 assert dfh is not None
2177 try:
2245 try:
2178 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2246 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2179 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2247 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2180 except FileNotFoundError:
2248 except FileNotFoundError:
2181 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2249 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2182 transaction.add(
2250 transaction.add(
2183 self._sidedatafile, self._docket.sidedata_end
2251 self._sidedatafile, self._docket.sidedata_end
2184 )
2252 )
2185
2253
2186 # opening the index file.
2254 # opening the index file.
2187 isize = r * self.index.entry_size
2255 isize = r * self.index.entry_size
2188 ifh = self.__index_write_fp()
2256 ifh = self.__index_write_fp()
2189 if self._inline:
2257 if self._inline:
2190 transaction.add(self._indexfile, dsize + isize)
2258 transaction.add(self._indexfile, dsize + isize)
2191 else:
2259 else:
2192 transaction.add(self._indexfile, isize)
2260 transaction.add(self._indexfile, isize)
2193 # exposing all file handle for writing.
2261 # exposing all file handle for writing.
2194 self._writinghandles = (ifh, dfh, sdfh)
2262 self._writinghandles = (ifh, dfh, sdfh)
2195 self._segmentfile.writing_handle = ifh if self._inline else dfh
2263 self._segmentfile.writing_handle = ifh if self._inline else dfh
2196 self._segmentfile_sidedata.writing_handle = sdfh
2264 self._segmentfile_sidedata.writing_handle = sdfh
2197 yield
2265 yield
2198 if self._docket is not None:
2266 if self._docket is not None:
2199 self._write_docket(transaction)
2267 self._write_docket(transaction)
2200 finally:
2268 finally:
2201 self._writinghandles = None
2269 self._writinghandles = None
2202 self._segmentfile.writing_handle = None
2270 self._segmentfile.writing_handle = None
2203 self._segmentfile_sidedata.writing_handle = None
2271 self._segmentfile_sidedata.writing_handle = None
2204 if dfh is not None:
2272 if dfh is not None:
2205 dfh.close()
2273 dfh.close()
2206 if sdfh is not None:
2274 if sdfh is not None:
2207 sdfh.close()
2275 sdfh.close()
2208 # closing the index file last to avoid exposing referent to
2276 # closing the index file last to avoid exposing referent to
2209 # potential unflushed data content.
2277 # potential unflushed data content.
2210 if ifh is not None:
2278 if ifh is not None:
2211 ifh.close()
2279 ifh.close()
2212
2280
2213 def _write_docket(self, transaction):
2281 def _write_docket(self, transaction):
2214 """write the current docket on disk
2282 """write the current docket on disk
2215
2283
2216 Exist as a method to help changelog to implement transaction logic
2284 Exist as a method to help changelog to implement transaction logic
2217
2285
2218 We could also imagine using the same transaction logic for all revlog
2286 We could also imagine using the same transaction logic for all revlog
2219 since docket are cheap."""
2287 since docket are cheap."""
2220 self._docket.write(transaction)
2288 self._docket.write(transaction)
2221
2289
2222 def addrevision(
2290 def addrevision(
2223 self,
2291 self,
2224 text,
2292 text,
2225 transaction,
2293 transaction,
2226 link,
2294 link,
2227 p1,
2295 p1,
2228 p2,
2296 p2,
2229 cachedelta=None,
2297 cachedelta=None,
2230 node=None,
2298 node=None,
2231 flags=REVIDX_DEFAULT_FLAGS,
2299 flags=REVIDX_DEFAULT_FLAGS,
2232 deltacomputer=None,
2300 deltacomputer=None,
2233 sidedata=None,
2301 sidedata=None,
2234 ):
2302 ):
2235 """add a revision to the log
2303 """add a revision to the log
2236
2304
2237 text - the revision data to add
2305 text - the revision data to add
2238 transaction - the transaction object used for rollback
2306 transaction - the transaction object used for rollback
2239 link - the linkrev data to add
2307 link - the linkrev data to add
2240 p1, p2 - the parent nodeids of the revision
2308 p1, p2 - the parent nodeids of the revision
2241 cachedelta - an optional precomputed delta
2309 cachedelta - an optional precomputed delta
2242 node - nodeid of revision; typically node is not specified, and it is
2310 node - nodeid of revision; typically node is not specified, and it is
2243 computed by default as hash(text, p1, p2), however subclasses might
2311 computed by default as hash(text, p1, p2), however subclasses might
2244 use different hashing method (and override checkhash() in such case)
2312 use different hashing method (and override checkhash() in such case)
2245 flags - the known flags to set on the revision
2313 flags - the known flags to set on the revision
2246 deltacomputer - an optional deltacomputer instance shared between
2314 deltacomputer - an optional deltacomputer instance shared between
2247 multiple calls
2315 multiple calls
2248 """
2316 """
2249 if link == nullrev:
2317 if link == nullrev:
2250 raise error.RevlogError(
2318 raise error.RevlogError(
2251 _(b"attempted to add linkrev -1 to %s") % self.display_id
2319 _(b"attempted to add linkrev -1 to %s") % self.display_id
2252 )
2320 )
2253
2321
2254 if sidedata is None:
2322 if sidedata is None:
2255 sidedata = {}
2323 sidedata = {}
2256 elif sidedata and not self.hassidedata:
2324 elif sidedata and not self.hassidedata:
2257 raise error.ProgrammingError(
2325 raise error.ProgrammingError(
2258 _(b"trying to add sidedata to a revlog who don't support them")
2326 _(b"trying to add sidedata to a revlog who don't support them")
2259 )
2327 )
2260
2328
2261 if flags:
2329 if flags:
2262 node = node or self.hash(text, p1, p2)
2330 node = node or self.hash(text, p1, p2)
2263
2331
2264 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2332 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2265
2333
2266 # If the flag processor modifies the revision data, ignore any provided
2334 # If the flag processor modifies the revision data, ignore any provided
2267 # cachedelta.
2335 # cachedelta.
2268 if rawtext != text:
2336 if rawtext != text:
2269 cachedelta = None
2337 cachedelta = None
2270
2338
2271 if len(rawtext) > _maxentrysize:
2339 if len(rawtext) > _maxentrysize:
2272 raise error.RevlogError(
2340 raise error.RevlogError(
2273 _(
2341 _(
2274 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2342 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2275 )
2343 )
2276 % (self.display_id, len(rawtext))
2344 % (self.display_id, len(rawtext))
2277 )
2345 )
2278
2346
2279 node = node or self.hash(rawtext, p1, p2)
2347 node = node or self.hash(rawtext, p1, p2)
2280 rev = self.index.get_rev(node)
2348 rev = self.index.get_rev(node)
2281 if rev is not None:
2349 if rev is not None:
2282 return rev
2350 return rev
2283
2351
2284 if validatehash:
2352 if validatehash:
2285 self.checkhash(rawtext, node, p1=p1, p2=p2)
2353 self.checkhash(rawtext, node, p1=p1, p2=p2)
2286
2354
2287 return self.addrawrevision(
2355 return self.addrawrevision(
2288 rawtext,
2356 rawtext,
2289 transaction,
2357 transaction,
2290 link,
2358 link,
2291 p1,
2359 p1,
2292 p2,
2360 p2,
2293 node,
2361 node,
2294 flags,
2362 flags,
2295 cachedelta=cachedelta,
2363 cachedelta=cachedelta,
2296 deltacomputer=deltacomputer,
2364 deltacomputer=deltacomputer,
2297 sidedata=sidedata,
2365 sidedata=sidedata,
2298 )
2366 )
2299
2367
2300 def addrawrevision(
2368 def addrawrevision(
2301 self,
2369 self,
2302 rawtext,
2370 rawtext,
2303 transaction,
2371 transaction,
2304 link,
2372 link,
2305 p1,
2373 p1,
2306 p2,
2374 p2,
2307 node,
2375 node,
2308 flags,
2376 flags,
2309 cachedelta=None,
2377 cachedelta=None,
2310 deltacomputer=None,
2378 deltacomputer=None,
2311 sidedata=None,
2379 sidedata=None,
2312 ):
2380 ):
2313 """add a raw revision with known flags, node and parents
2381 """add a raw revision with known flags, node and parents
2314 useful when reusing a revision not stored in this revlog (ex: received
2382 useful when reusing a revision not stored in this revlog (ex: received
2315 over wire, or read from an external bundle).
2383 over wire, or read from an external bundle).
2316 """
2384 """
2317 with self._writing(transaction):
2385 with self._writing(transaction):
2318 return self._addrevision(
2386 return self._addrevision(
2319 node,
2387 node,
2320 rawtext,
2388 rawtext,
2321 transaction,
2389 transaction,
2322 link,
2390 link,
2323 p1,
2391 p1,
2324 p2,
2392 p2,
2325 flags,
2393 flags,
2326 cachedelta,
2394 cachedelta,
2327 deltacomputer=deltacomputer,
2395 deltacomputer=deltacomputer,
2328 sidedata=sidedata,
2396 sidedata=sidedata,
2329 )
2397 )
2330
2398
2331 def compress(self, data):
2399 def compress(self, data):
2332 """Generate a possibly-compressed representation of data."""
2400 """Generate a possibly-compressed representation of data."""
2333 if not data:
2401 if not data:
2334 return b'', data
2402 return b'', data
2335
2403
2336 compressed = self._compressor.compress(data)
2404 compressed = self._compressor.compress(data)
2337
2405
2338 if compressed:
2406 if compressed:
2339 # The revlog compressor added the header in the returned data.
2407 # The revlog compressor added the header in the returned data.
2340 return b'', compressed
2408 return b'', compressed
2341
2409
2342 if data[0:1] == b'\0':
2410 if data[0:1] == b'\0':
2343 return b'', data
2411 return b'', data
2344 return b'u', data
2412 return b'u', data
2345
2413
2346 def decompress(self, data):
2414 def decompress(self, data):
2347 """Decompress a revlog chunk.
2415 """Decompress a revlog chunk.
2348
2416
2349 The chunk is expected to begin with a header identifying the
2417 The chunk is expected to begin with a header identifying the
2350 format type so it can be routed to an appropriate decompressor.
2418 format type so it can be routed to an appropriate decompressor.
2351 """
2419 """
2352 if not data:
2420 if not data:
2353 return data
2421 return data
2354
2422
2355 # Revlogs are read much more frequently than they are written and many
2423 # Revlogs are read much more frequently than they are written and many
2356 # chunks only take microseconds to decompress, so performance is
2424 # chunks only take microseconds to decompress, so performance is
2357 # important here.
2425 # important here.
2358 #
2426 #
2359 # We can make a few assumptions about revlogs:
2427 # We can make a few assumptions about revlogs:
2360 #
2428 #
2361 # 1) the majority of chunks will be compressed (as opposed to inline
2429 # 1) the majority of chunks will be compressed (as opposed to inline
2362 # raw data).
2430 # raw data).
2363 # 2) decompressing *any* data will likely by at least 10x slower than
2431 # 2) decompressing *any* data will likely by at least 10x slower than
2364 # returning raw inline data.
2432 # returning raw inline data.
2365 # 3) we want to prioritize common and officially supported compression
2433 # 3) we want to prioritize common and officially supported compression
2366 # engines
2434 # engines
2367 #
2435 #
2368 # It follows that we want to optimize for "decompress compressed data
2436 # It follows that we want to optimize for "decompress compressed data
2369 # when encoded with common and officially supported compression engines"
2437 # when encoded with common and officially supported compression engines"
2370 # case over "raw data" and "data encoded by less common or non-official
2438 # case over "raw data" and "data encoded by less common or non-official
2371 # compression engines." That is why we have the inline lookup first
2439 # compression engines." That is why we have the inline lookup first
2372 # followed by the compengines lookup.
2440 # followed by the compengines lookup.
2373 #
2441 #
2374 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2442 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2375 # compressed chunks. And this matters for changelog and manifest reads.
2443 # compressed chunks. And this matters for changelog and manifest reads.
2376 t = data[0:1]
2444 t = data[0:1]
2377
2445
2378 if t == b'x':
2446 if t == b'x':
2379 try:
2447 try:
2380 return _zlibdecompress(data)
2448 return _zlibdecompress(data)
2381 except zlib.error as e:
2449 except zlib.error as e:
2382 raise error.RevlogError(
2450 raise error.RevlogError(
2383 _(b'revlog decompress error: %s')
2451 _(b'revlog decompress error: %s')
2384 % stringutil.forcebytestr(e)
2452 % stringutil.forcebytestr(e)
2385 )
2453 )
2386 # '\0' is more common than 'u' so it goes first.
2454 # '\0' is more common than 'u' so it goes first.
2387 elif t == b'\0':
2455 elif t == b'\0':
2388 return data
2456 return data
2389 elif t == b'u':
2457 elif t == b'u':
2390 return util.buffer(data, 1)
2458 return util.buffer(data, 1)
2391
2459
2392 compressor = self._get_decompressor(t)
2460 compressor = self._get_decompressor(t)
2393
2461
2394 return compressor.decompress(data)
2462 return compressor.decompress(data)
2395
2463
2396 def _addrevision(
2464 def _addrevision(
2397 self,
2465 self,
2398 node,
2466 node,
2399 rawtext,
2467 rawtext,
2400 transaction,
2468 transaction,
2401 link,
2469 link,
2402 p1,
2470 p1,
2403 p2,
2471 p2,
2404 flags,
2472 flags,
2405 cachedelta,
2473 cachedelta,
2406 alwayscache=False,
2474 alwayscache=False,
2407 deltacomputer=None,
2475 deltacomputer=None,
2408 sidedata=None,
2476 sidedata=None,
2409 ):
2477 ):
2410 """internal function to add revisions to the log
2478 """internal function to add revisions to the log
2411
2479
2412 see addrevision for argument descriptions.
2480 see addrevision for argument descriptions.
2413
2481
2414 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2482 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2415
2483
2416 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2484 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2417 be used.
2485 be used.
2418
2486
2419 invariants:
2487 invariants:
2420 - rawtext is optional (can be None); if not set, cachedelta must be set.
2488 - rawtext is optional (can be None); if not set, cachedelta must be set.
2421 if both are set, they must correspond to each other.
2489 if both are set, they must correspond to each other.
2422 """
2490 """
2423 if node == self.nullid:
2491 if node == self.nullid:
2424 raise error.RevlogError(
2492 raise error.RevlogError(
2425 _(b"%s: attempt to add null revision") % self.display_id
2493 _(b"%s: attempt to add null revision") % self.display_id
2426 )
2494 )
2427 if (
2495 if (
2428 node == self.nodeconstants.wdirid
2496 node == self.nodeconstants.wdirid
2429 or node in self.nodeconstants.wdirfilenodeids
2497 or node in self.nodeconstants.wdirfilenodeids
2430 ):
2498 ):
2431 raise error.RevlogError(
2499 raise error.RevlogError(
2432 _(b"%s: attempt to add wdir revision") % self.display_id
2500 _(b"%s: attempt to add wdir revision") % self.display_id
2433 )
2501 )
2434 if self._writinghandles is None:
2502 if self._writinghandles is None:
2435 msg = b'adding revision outside `revlog._writing` context'
2503 msg = b'adding revision outside `revlog._writing` context'
2436 raise error.ProgrammingError(msg)
2504 raise error.ProgrammingError(msg)
2437
2505
2438 if self._inline:
2506 if self._inline:
2439 fh = self._writinghandles[0]
2507 fh = self._writinghandles[0]
2440 else:
2508 else:
2441 fh = self._writinghandles[1]
2509 fh = self._writinghandles[1]
2442
2510
2443 btext = [rawtext]
2511 btext = [rawtext]
2444
2512
2445 curr = len(self)
2513 curr = len(self)
2446 prev = curr - 1
2514 prev = curr - 1
2447
2515
2448 offset = self._get_data_offset(prev)
2516 offset = self._get_data_offset(prev)
2449
2517
2450 if self._concurrencychecker:
2518 if self._concurrencychecker:
2451 ifh, dfh, sdfh = self._writinghandles
2519 ifh, dfh, sdfh = self._writinghandles
2452 # XXX no checking for the sidedata file
2520 # XXX no checking for the sidedata file
2453 if self._inline:
2521 if self._inline:
2454 # offset is "as if" it were in the .d file, so we need to add on
2522 # offset is "as if" it were in the .d file, so we need to add on
2455 # the size of the entry metadata.
2523 # the size of the entry metadata.
2456 self._concurrencychecker(
2524 self._concurrencychecker(
2457 ifh, self._indexfile, offset + curr * self.index.entry_size
2525 ifh, self._indexfile, offset + curr * self.index.entry_size
2458 )
2526 )
2459 else:
2527 else:
2460 # Entries in the .i are a consistent size.
2528 # Entries in the .i are a consistent size.
2461 self._concurrencychecker(
2529 self._concurrencychecker(
2462 ifh, self._indexfile, curr * self.index.entry_size
2530 ifh, self._indexfile, curr * self.index.entry_size
2463 )
2531 )
2464 self._concurrencychecker(dfh, self._datafile, offset)
2532 self._concurrencychecker(dfh, self._datafile, offset)
2465
2533
2466 p1r, p2r = self.rev(p1), self.rev(p2)
2534 p1r, p2r = self.rev(p1), self.rev(p2)
2467
2535
2468 # full versions are inserted when the needed deltas
2536 # full versions are inserted when the needed deltas
2469 # become comparable to the uncompressed text
2537 # become comparable to the uncompressed text
2470 if rawtext is None:
2538 if rawtext is None:
2471 # need rawtext size, before changed by flag processors, which is
2539 # need rawtext size, before changed by flag processors, which is
2472 # the non-raw size. use revlog explicitly to avoid filelog's extra
2540 # the non-raw size. use revlog explicitly to avoid filelog's extra
2473 # logic that might remove metadata size.
2541 # logic that might remove metadata size.
2474 textlen = mdiff.patchedsize(
2542 textlen = mdiff.patchedsize(
2475 revlog.size(self, cachedelta[0]), cachedelta[1]
2543 revlog.size(self, cachedelta[0]), cachedelta[1]
2476 )
2544 )
2477 else:
2545 else:
2478 textlen = len(rawtext)
2546 textlen = len(rawtext)
2479
2547
2480 if deltacomputer is None:
2548 if deltacomputer is None:
2481 write_debug = None
2549 write_debug = None
2482 if self._debug_delta:
2550 if self._debug_delta:
2483 write_debug = transaction._report
2551 write_debug = transaction._report
2484 deltacomputer = deltautil.deltacomputer(
2552 deltacomputer = deltautil.deltacomputer(
2485 self, write_debug=write_debug
2553 self, write_debug=write_debug
2486 )
2554 )
2487
2555
2488 if cachedelta is not None and len(cachedelta) == 2:
2556 if cachedelta is not None and len(cachedelta) == 2:
2489 # If the cached delta has no information about how it should be
2557 # If the cached delta has no information about how it should be
2490 # reused, add the default reuse instruction according to the
2558 # reused, add the default reuse instruction according to the
2491 # revlog's configuration.
2559 # revlog's configuration.
2492 if self._generaldelta and self._lazydeltabase:
2560 if self._generaldelta and self._lazydeltabase:
2493 delta_base_reuse = DELTA_BASE_REUSE_TRY
2561 delta_base_reuse = DELTA_BASE_REUSE_TRY
2494 else:
2562 else:
2495 delta_base_reuse = DELTA_BASE_REUSE_NO
2563 delta_base_reuse = DELTA_BASE_REUSE_NO
2496 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2564 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2497
2565
2498 revinfo = revlogutils.revisioninfo(
2566 revinfo = revlogutils.revisioninfo(
2499 node,
2567 node,
2500 p1,
2568 p1,
2501 p2,
2569 p2,
2502 btext,
2570 btext,
2503 textlen,
2571 textlen,
2504 cachedelta,
2572 cachedelta,
2505 flags,
2573 flags,
2506 )
2574 )
2507
2575
2508 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2576 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2509
2577
2510 compression_mode = COMP_MODE_INLINE
2578 compression_mode = COMP_MODE_INLINE
2511 if self._docket is not None:
2579 if self._docket is not None:
2512 default_comp = self._docket.default_compression_header
2580 default_comp = self._docket.default_compression_header
2513 r = deltautil.delta_compression(default_comp, deltainfo)
2581 r = deltautil.delta_compression(default_comp, deltainfo)
2514 compression_mode, deltainfo = r
2582 compression_mode, deltainfo = r
2515
2583
2516 sidedata_compression_mode = COMP_MODE_INLINE
2584 sidedata_compression_mode = COMP_MODE_INLINE
2517 if sidedata and self.hassidedata:
2585 if sidedata and self.hassidedata:
2518 sidedata_compression_mode = COMP_MODE_PLAIN
2586 sidedata_compression_mode = COMP_MODE_PLAIN
2519 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2587 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2520 sidedata_offset = self._docket.sidedata_end
2588 sidedata_offset = self._docket.sidedata_end
2521 h, comp_sidedata = self.compress(serialized_sidedata)
2589 h, comp_sidedata = self.compress(serialized_sidedata)
2522 if (
2590 if (
2523 h != b'u'
2591 h != b'u'
2524 and comp_sidedata[0:1] != b'\0'
2592 and comp_sidedata[0:1] != b'\0'
2525 and len(comp_sidedata) < len(serialized_sidedata)
2593 and len(comp_sidedata) < len(serialized_sidedata)
2526 ):
2594 ):
2527 assert not h
2595 assert not h
2528 if (
2596 if (
2529 comp_sidedata[0:1]
2597 comp_sidedata[0:1]
2530 == self._docket.default_compression_header
2598 == self._docket.default_compression_header
2531 ):
2599 ):
2532 sidedata_compression_mode = COMP_MODE_DEFAULT
2600 sidedata_compression_mode = COMP_MODE_DEFAULT
2533 serialized_sidedata = comp_sidedata
2601 serialized_sidedata = comp_sidedata
2534 else:
2602 else:
2535 sidedata_compression_mode = COMP_MODE_INLINE
2603 sidedata_compression_mode = COMP_MODE_INLINE
2536 serialized_sidedata = comp_sidedata
2604 serialized_sidedata = comp_sidedata
2537 else:
2605 else:
2538 serialized_sidedata = b""
2606 serialized_sidedata = b""
2539 # Don't store the offset if the sidedata is empty, that way
2607 # Don't store the offset if the sidedata is empty, that way
2540 # we can easily detect empty sidedata and they will be no different
2608 # we can easily detect empty sidedata and they will be no different
2541 # than ones we manually add.
2609 # than ones we manually add.
2542 sidedata_offset = 0
2610 sidedata_offset = 0
2543
2611
2544 rank = RANK_UNKNOWN
2612 rank = RANK_UNKNOWN
2545 if self._compute_rank:
2613 if self._compute_rank:
2546 if (p1r, p2r) == (nullrev, nullrev):
2614 if (p1r, p2r) == (nullrev, nullrev):
2547 rank = 1
2615 rank = 1
2548 elif p1r != nullrev and p2r == nullrev:
2616 elif p1r != nullrev and p2r == nullrev:
2549 rank = 1 + self.fast_rank(p1r)
2617 rank = 1 + self.fast_rank(p1r)
2550 elif p1r == nullrev and p2r != nullrev:
2618 elif p1r == nullrev and p2r != nullrev:
2551 rank = 1 + self.fast_rank(p2r)
2619 rank = 1 + self.fast_rank(p2r)
2552 else: # merge node
2620 else: # merge node
2553 if rustdagop is not None and self.index.rust_ext_compat:
2621 if rustdagop is not None and self.index.rust_ext_compat:
2554 rank = rustdagop.rank(self.index, p1r, p2r)
2622 rank = rustdagop.rank(self.index, p1r, p2r)
2555 else:
2623 else:
2556 pmin, pmax = sorted((p1r, p2r))
2624 pmin, pmax = sorted((p1r, p2r))
2557 rank = 1 + self.fast_rank(pmax)
2625 rank = 1 + self.fast_rank(pmax)
2558 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2626 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2559
2627
2560 e = revlogutils.entry(
2628 e = revlogutils.entry(
2561 flags=flags,
2629 flags=flags,
2562 data_offset=offset,
2630 data_offset=offset,
2563 data_compressed_length=deltainfo.deltalen,
2631 data_compressed_length=deltainfo.deltalen,
2564 data_uncompressed_length=textlen,
2632 data_uncompressed_length=textlen,
2565 data_compression_mode=compression_mode,
2633 data_compression_mode=compression_mode,
2566 data_delta_base=deltainfo.base,
2634 data_delta_base=deltainfo.base,
2567 link_rev=link,
2635 link_rev=link,
2568 parent_rev_1=p1r,
2636 parent_rev_1=p1r,
2569 parent_rev_2=p2r,
2637 parent_rev_2=p2r,
2570 node_id=node,
2638 node_id=node,
2571 sidedata_offset=sidedata_offset,
2639 sidedata_offset=sidedata_offset,
2572 sidedata_compressed_length=len(serialized_sidedata),
2640 sidedata_compressed_length=len(serialized_sidedata),
2573 sidedata_compression_mode=sidedata_compression_mode,
2641 sidedata_compression_mode=sidedata_compression_mode,
2574 rank=rank,
2642 rank=rank,
2575 )
2643 )
2576
2644
2577 self.index.append(e)
2645 self.index.append(e)
2578 entry = self.index.entry_binary(curr)
2646 entry = self.index.entry_binary(curr)
2579 if curr == 0 and self._docket is None:
2647 if curr == 0 and self._docket is None:
2580 header = self._format_flags | self._format_version
2648 header = self._format_flags | self._format_version
2581 header = self.index.pack_header(header)
2649 header = self.index.pack_header(header)
2582 entry = header + entry
2650 entry = header + entry
2583 self._writeentry(
2651 self._writeentry(
2584 transaction,
2652 transaction,
2585 entry,
2653 entry,
2586 deltainfo.data,
2654 deltainfo.data,
2587 link,
2655 link,
2588 offset,
2656 offset,
2589 serialized_sidedata,
2657 serialized_sidedata,
2590 sidedata_offset,
2658 sidedata_offset,
2591 )
2659 )
2592
2660
2593 rawtext = btext[0]
2661 rawtext = btext[0]
2594
2662
2595 if alwayscache and rawtext is None:
2663 if alwayscache and rawtext is None:
2596 rawtext = deltacomputer.buildtext(revinfo, fh)
2664 rawtext = deltacomputer.buildtext(revinfo, fh)
2597
2665
2598 if type(rawtext) == bytes: # only accept immutable objects
2666 if type(rawtext) == bytes: # only accept immutable objects
2599 self._revisioncache = (node, curr, rawtext)
2667 self._revisioncache = (node, curr, rawtext)
2600 self._chainbasecache[curr] = deltainfo.chainbase
2668 self._chainbasecache[curr] = deltainfo.chainbase
2601 return curr
2669 return curr
2602
2670
2603 def _get_data_offset(self, prev):
2671 def _get_data_offset(self, prev):
2604 """Returns the current offset in the (in-transaction) data file.
2672 """Returns the current offset in the (in-transaction) data file.
2605 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2673 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2606 file to store that information: since sidedata can be rewritten to the
2674 file to store that information: since sidedata can be rewritten to the
2607 end of the data file within a transaction, you can have cases where, for
2675 end of the data file within a transaction, you can have cases where, for
2608 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2676 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2609 to `n - 1`'s sidedata being written after `n`'s data.
2677 to `n - 1`'s sidedata being written after `n`'s data.
2610
2678
2611 TODO cache this in a docket file before getting out of experimental."""
2679 TODO cache this in a docket file before getting out of experimental."""
2612 if self._docket is None:
2680 if self._docket is None:
2613 return self.end(prev)
2681 return self.end(prev)
2614 else:
2682 else:
2615 return self._docket.data_end
2683 return self._docket.data_end
2616
2684
2617 def _writeentry(
2685 def _writeentry(
2618 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2686 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2619 ):
2687 ):
2620 # Files opened in a+ mode have inconsistent behavior on various
2688 # Files opened in a+ mode have inconsistent behavior on various
2621 # platforms. Windows requires that a file positioning call be made
2689 # platforms. Windows requires that a file positioning call be made
2622 # when the file handle transitions between reads and writes. See
2690 # when the file handle transitions between reads and writes. See
2623 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2691 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2624 # platforms, Python or the platform itself can be buggy. Some versions
2692 # platforms, Python or the platform itself can be buggy. Some versions
2625 # of Solaris have been observed to not append at the end of the file
2693 # of Solaris have been observed to not append at the end of the file
2626 # if the file was seeked to before the end. See issue4943 for more.
2694 # if the file was seeked to before the end. See issue4943 for more.
2627 #
2695 #
2628 # We work around this issue by inserting a seek() before writing.
2696 # We work around this issue by inserting a seek() before writing.
2629 # Note: This is likely not necessary on Python 3. However, because
2697 # Note: This is likely not necessary on Python 3. However, because
2630 # the file handle is reused for reads and may be seeked there, we need
2698 # the file handle is reused for reads and may be seeked there, we need
2631 # to be careful before changing this.
2699 # to be careful before changing this.
2632 if self._writinghandles is None:
2700 if self._writinghandles is None:
2633 msg = b'adding revision outside `revlog._writing` context'
2701 msg = b'adding revision outside `revlog._writing` context'
2634 raise error.ProgrammingError(msg)
2702 raise error.ProgrammingError(msg)
2635 ifh, dfh, sdfh = self._writinghandles
2703 ifh, dfh, sdfh = self._writinghandles
2636 if self._docket is None:
2704 if self._docket is None:
2637 ifh.seek(0, os.SEEK_END)
2705 ifh.seek(0, os.SEEK_END)
2638 else:
2706 else:
2639 ifh.seek(self._docket.index_end, os.SEEK_SET)
2707 ifh.seek(self._docket.index_end, os.SEEK_SET)
2640 if dfh:
2708 if dfh:
2641 if self._docket is None:
2709 if self._docket is None:
2642 dfh.seek(0, os.SEEK_END)
2710 dfh.seek(0, os.SEEK_END)
2643 else:
2711 else:
2644 dfh.seek(self._docket.data_end, os.SEEK_SET)
2712 dfh.seek(self._docket.data_end, os.SEEK_SET)
2645 if sdfh:
2713 if sdfh:
2646 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2714 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2647
2715
2648 curr = len(self) - 1
2716 curr = len(self) - 1
2649 if not self._inline:
2717 if not self._inline:
2650 transaction.add(self._datafile, offset)
2718 transaction.add(self._datafile, offset)
2651 if self._sidedatafile:
2719 if self._sidedatafile:
2652 transaction.add(self._sidedatafile, sidedata_offset)
2720 transaction.add(self._sidedatafile, sidedata_offset)
2653 transaction.add(self._indexfile, curr * len(entry))
2721 transaction.add(self._indexfile, curr * len(entry))
2654 if data[0]:
2722 if data[0]:
2655 dfh.write(data[0])
2723 dfh.write(data[0])
2656 dfh.write(data[1])
2724 dfh.write(data[1])
2657 if sidedata:
2725 if sidedata:
2658 sdfh.write(sidedata)
2726 sdfh.write(sidedata)
2659 ifh.write(entry)
2727 ifh.write(entry)
2660 else:
2728 else:
2661 offset += curr * self.index.entry_size
2729 offset += curr * self.index.entry_size
2662 transaction.add(self._indexfile, offset)
2730 transaction.add(self._indexfile, offset)
2663 ifh.write(entry)
2731 ifh.write(entry)
2664 ifh.write(data[0])
2732 ifh.write(data[0])
2665 ifh.write(data[1])
2733 ifh.write(data[1])
2666 assert not sidedata
2734 assert not sidedata
2667 self._enforceinlinesize(transaction)
2735 self._enforceinlinesize(transaction)
2668 if self._docket is not None:
2736 if self._docket is not None:
2669 # revlog-v2 always has 3 writing handles, help Pytype
2737 # revlog-v2 always has 3 writing handles, help Pytype
2670 wh1 = self._writinghandles[0]
2738 wh1 = self._writinghandles[0]
2671 wh2 = self._writinghandles[1]
2739 wh2 = self._writinghandles[1]
2672 wh3 = self._writinghandles[2]
2740 wh3 = self._writinghandles[2]
2673 assert wh1 is not None
2741 assert wh1 is not None
2674 assert wh2 is not None
2742 assert wh2 is not None
2675 assert wh3 is not None
2743 assert wh3 is not None
2676 self._docket.index_end = wh1.tell()
2744 self._docket.index_end = wh1.tell()
2677 self._docket.data_end = wh2.tell()
2745 self._docket.data_end = wh2.tell()
2678 self._docket.sidedata_end = wh3.tell()
2746 self._docket.sidedata_end = wh3.tell()
2679
2747
2680 nodemaputil.setup_persistent_nodemap(transaction, self)
2748 nodemaputil.setup_persistent_nodemap(transaction, self)
2681
2749
2682 def addgroup(
2750 def addgroup(
2683 self,
2751 self,
2684 deltas,
2752 deltas,
2685 linkmapper,
2753 linkmapper,
2686 transaction,
2754 transaction,
2687 alwayscache=False,
2755 alwayscache=False,
2688 addrevisioncb=None,
2756 addrevisioncb=None,
2689 duplicaterevisioncb=None,
2757 duplicaterevisioncb=None,
2690 debug_info=None,
2758 debug_info=None,
2691 delta_base_reuse_policy=None,
2759 delta_base_reuse_policy=None,
2692 ):
2760 ):
2693 """
2761 """
2694 add a delta group
2762 add a delta group
2695
2763
2696 given a set of deltas, add them to the revision log. the
2764 given a set of deltas, add them to the revision log. the
2697 first delta is against its parent, which should be in our
2765 first delta is against its parent, which should be in our
2698 log, the rest are against the previous delta.
2766 log, the rest are against the previous delta.
2699
2767
2700 If ``addrevisioncb`` is defined, it will be called with arguments of
2768 If ``addrevisioncb`` is defined, it will be called with arguments of
2701 this revlog and the node that was added.
2769 this revlog and the node that was added.
2702 """
2770 """
2703
2771
2704 if self._adding_group:
2772 if self._adding_group:
2705 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2773 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2706
2774
2707 # read the default delta-base reuse policy from revlog config if the
2775 # read the default delta-base reuse policy from revlog config if the
2708 # group did not specify one.
2776 # group did not specify one.
2709 if delta_base_reuse_policy is None:
2777 if delta_base_reuse_policy is None:
2710 if self._generaldelta and self._lazydeltabase:
2778 if self._generaldelta and self._lazydeltabase:
2711 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2779 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2712 else:
2780 else:
2713 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2781 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2714
2782
2715 self._adding_group = True
2783 self._adding_group = True
2716 empty = True
2784 empty = True
2717 try:
2785 try:
2718 with self._writing(transaction):
2786 with self._writing(transaction):
2719 write_debug = None
2787 write_debug = None
2720 if self._debug_delta:
2788 if self._debug_delta:
2721 write_debug = transaction._report
2789 write_debug = transaction._report
2722 deltacomputer = deltautil.deltacomputer(
2790 deltacomputer = deltautil.deltacomputer(
2723 self,
2791 self,
2724 write_debug=write_debug,
2792 write_debug=write_debug,
2725 debug_info=debug_info,
2793 debug_info=debug_info,
2726 )
2794 )
2727 # loop through our set of deltas
2795 # loop through our set of deltas
2728 for data in deltas:
2796 for data in deltas:
2729 (
2797 (
2730 node,
2798 node,
2731 p1,
2799 p1,
2732 p2,
2800 p2,
2733 linknode,
2801 linknode,
2734 deltabase,
2802 deltabase,
2735 delta,
2803 delta,
2736 flags,
2804 flags,
2737 sidedata,
2805 sidedata,
2738 ) = data
2806 ) = data
2739 link = linkmapper(linknode)
2807 link = linkmapper(linknode)
2740 flags = flags or REVIDX_DEFAULT_FLAGS
2808 flags = flags or REVIDX_DEFAULT_FLAGS
2741
2809
2742 rev = self.index.get_rev(node)
2810 rev = self.index.get_rev(node)
2743 if rev is not None:
2811 if rev is not None:
2744 # this can happen if two branches make the same change
2812 # this can happen if two branches make the same change
2745 self._nodeduplicatecallback(transaction, rev)
2813 self._nodeduplicatecallback(transaction, rev)
2746 if duplicaterevisioncb:
2814 if duplicaterevisioncb:
2747 duplicaterevisioncb(self, rev)
2815 duplicaterevisioncb(self, rev)
2748 empty = False
2816 empty = False
2749 continue
2817 continue
2750
2818
2751 for p in (p1, p2):
2819 for p in (p1, p2):
2752 if not self.index.has_node(p):
2820 if not self.index.has_node(p):
2753 raise error.LookupError(
2821 raise error.LookupError(
2754 p, self.radix, _(b'unknown parent')
2822 p, self.radix, _(b'unknown parent')
2755 )
2823 )
2756
2824
2757 if not self.index.has_node(deltabase):
2825 if not self.index.has_node(deltabase):
2758 raise error.LookupError(
2826 raise error.LookupError(
2759 deltabase, self.display_id, _(b'unknown delta base')
2827 deltabase, self.display_id, _(b'unknown delta base')
2760 )
2828 )
2761
2829
2762 baserev = self.rev(deltabase)
2830 baserev = self.rev(deltabase)
2763
2831
2764 if baserev != nullrev and self.iscensored(baserev):
2832 if baserev != nullrev and self.iscensored(baserev):
2765 # if base is censored, delta must be full replacement in a
2833 # if base is censored, delta must be full replacement in a
2766 # single patch operation
2834 # single patch operation
2767 hlen = struct.calcsize(b">lll")
2835 hlen = struct.calcsize(b">lll")
2768 oldlen = self.rawsize(baserev)
2836 oldlen = self.rawsize(baserev)
2769 newlen = len(delta) - hlen
2837 newlen = len(delta) - hlen
2770 if delta[:hlen] != mdiff.replacediffheader(
2838 if delta[:hlen] != mdiff.replacediffheader(
2771 oldlen, newlen
2839 oldlen, newlen
2772 ):
2840 ):
2773 raise error.CensoredBaseError(
2841 raise error.CensoredBaseError(
2774 self.display_id, self.node(baserev)
2842 self.display_id, self.node(baserev)
2775 )
2843 )
2776
2844
2777 if not flags and self._peek_iscensored(baserev, delta):
2845 if not flags and self._peek_iscensored(baserev, delta):
2778 flags |= REVIDX_ISCENSORED
2846 flags |= REVIDX_ISCENSORED
2779
2847
2780 # We assume consumers of addrevisioncb will want to retrieve
2848 # We assume consumers of addrevisioncb will want to retrieve
2781 # the added revision, which will require a call to
2849 # the added revision, which will require a call to
2782 # revision(). revision() will fast path if there is a cache
2850 # revision(). revision() will fast path if there is a cache
2783 # hit. So, we tell _addrevision() to always cache in this case.
2851 # hit. So, we tell _addrevision() to always cache in this case.
2784 # We're only using addgroup() in the context of changegroup
2852 # We're only using addgroup() in the context of changegroup
2785 # generation so the revision data can always be handled as raw
2853 # generation so the revision data can always be handled as raw
2786 # by the flagprocessor.
2854 # by the flagprocessor.
2787 rev = self._addrevision(
2855 rev = self._addrevision(
2788 node,
2856 node,
2789 None,
2857 None,
2790 transaction,
2858 transaction,
2791 link,
2859 link,
2792 p1,
2860 p1,
2793 p2,
2861 p2,
2794 flags,
2862 flags,
2795 (baserev, delta, delta_base_reuse_policy),
2863 (baserev, delta, delta_base_reuse_policy),
2796 alwayscache=alwayscache,
2864 alwayscache=alwayscache,
2797 deltacomputer=deltacomputer,
2865 deltacomputer=deltacomputer,
2798 sidedata=sidedata,
2866 sidedata=sidedata,
2799 )
2867 )
2800
2868
2801 if addrevisioncb:
2869 if addrevisioncb:
2802 addrevisioncb(self, rev)
2870 addrevisioncb(self, rev)
2803 empty = False
2871 empty = False
2804 finally:
2872 finally:
2805 self._adding_group = False
2873 self._adding_group = False
2806 return not empty
2874 return not empty
2807
2875
2808 def iscensored(self, rev):
2876 def iscensored(self, rev):
2809 """Check if a file revision is censored."""
2877 """Check if a file revision is censored."""
2810 if not self._censorable:
2878 if not self._censorable:
2811 return False
2879 return False
2812
2880
2813 return self.flags(rev) & REVIDX_ISCENSORED
2881 return self.flags(rev) & REVIDX_ISCENSORED
2814
2882
2815 def _peek_iscensored(self, baserev, delta):
2883 def _peek_iscensored(self, baserev, delta):
2816 """Quickly check if a delta produces a censored revision."""
2884 """Quickly check if a delta produces a censored revision."""
2817 if not self._censorable:
2885 if not self._censorable:
2818 return False
2886 return False
2819
2887
2820 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2888 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2821
2889
2822 def getstrippoint(self, minlink):
2890 def getstrippoint(self, minlink):
2823 """find the minimum rev that must be stripped to strip the linkrev
2891 """find the minimum rev that must be stripped to strip the linkrev
2824
2892
2825 Returns a tuple containing the minimum rev and a set of all revs that
2893 Returns a tuple containing the minimum rev and a set of all revs that
2826 have linkrevs that will be broken by this strip.
2894 have linkrevs that will be broken by this strip.
2827 """
2895 """
2828 return storageutil.resolvestripinfo(
2896 return storageutil.resolvestripinfo(
2829 minlink,
2897 minlink,
2830 len(self) - 1,
2898 len(self) - 1,
2831 self.headrevs(),
2899 self.headrevs(),
2832 self.linkrev,
2900 self.linkrev,
2833 self.parentrevs,
2901 self.parentrevs,
2834 )
2902 )
2835
2903
2836 def strip(self, minlink, transaction):
2904 def strip(self, minlink, transaction):
2837 """truncate the revlog on the first revision with a linkrev >= minlink
2905 """truncate the revlog on the first revision with a linkrev >= minlink
2838
2906
2839 This function is called when we're stripping revision minlink and
2907 This function is called when we're stripping revision minlink and
2840 its descendants from the repository.
2908 its descendants from the repository.
2841
2909
2842 We have to remove all revisions with linkrev >= minlink, because
2910 We have to remove all revisions with linkrev >= minlink, because
2843 the equivalent changelog revisions will be renumbered after the
2911 the equivalent changelog revisions will be renumbered after the
2844 strip.
2912 strip.
2845
2913
2846 So we truncate the revlog on the first of these revisions, and
2914 So we truncate the revlog on the first of these revisions, and
2847 trust that the caller has saved the revisions that shouldn't be
2915 trust that the caller has saved the revisions that shouldn't be
2848 removed and that it'll re-add them after this truncation.
2916 removed and that it'll re-add them after this truncation.
2849 """
2917 """
2850 if len(self) == 0:
2918 if len(self) == 0:
2851 return
2919 return
2852
2920
2853 rev, _ = self.getstrippoint(minlink)
2921 rev, _ = self.getstrippoint(minlink)
2854 if rev == len(self):
2922 if rev == len(self):
2855 return
2923 return
2856
2924
2857 # first truncate the files on disk
2925 # first truncate the files on disk
2858 data_end = self.start(rev)
2926 data_end = self.start(rev)
2859 if not self._inline:
2927 if not self._inline:
2860 transaction.add(self._datafile, data_end)
2928 transaction.add(self._datafile, data_end)
2861 end = rev * self.index.entry_size
2929 end = rev * self.index.entry_size
2862 else:
2930 else:
2863 end = data_end + (rev * self.index.entry_size)
2931 end = data_end + (rev * self.index.entry_size)
2864
2932
2865 if self._sidedatafile:
2933 if self._sidedatafile:
2866 sidedata_end = self.sidedata_cut_off(rev)
2934 sidedata_end = self.sidedata_cut_off(rev)
2867 transaction.add(self._sidedatafile, sidedata_end)
2935 transaction.add(self._sidedatafile, sidedata_end)
2868
2936
2869 transaction.add(self._indexfile, end)
2937 transaction.add(self._indexfile, end)
2870 if self._docket is not None:
2938 if self._docket is not None:
2871 # XXX we could, leverage the docket while stripping. However it is
2939 # XXX we could, leverage the docket while stripping. However it is
2872 # not powerfull enough at the time of this comment
2940 # not powerfull enough at the time of this comment
2873 self._docket.index_end = end
2941 self._docket.index_end = end
2874 self._docket.data_end = data_end
2942 self._docket.data_end = data_end
2875 self._docket.sidedata_end = sidedata_end
2943 self._docket.sidedata_end = sidedata_end
2876 self._docket.write(transaction, stripping=True)
2944 self._docket.write(transaction, stripping=True)
2877
2945
2878 # then reset internal state in memory to forget those revisions
2946 # then reset internal state in memory to forget those revisions
2879 self._revisioncache = None
2947 self._revisioncache = None
2880 self._chaininfocache = util.lrucachedict(500)
2948 self._chaininfocache = util.lrucachedict(500)
2881 self._segmentfile.clear_cache()
2949 self._segmentfile.clear_cache()
2882 self._segmentfile_sidedata.clear_cache()
2950 self._segmentfile_sidedata.clear_cache()
2883
2951
2884 del self.index[rev:-1]
2952 del self.index[rev:-1]
2885
2953
2886 def checksize(self):
2954 def checksize(self):
2887 """Check size of index and data files
2955 """Check size of index and data files
2888
2956
2889 return a (dd, di) tuple.
2957 return a (dd, di) tuple.
2890 - dd: extra bytes for the "data" file
2958 - dd: extra bytes for the "data" file
2891 - di: extra bytes for the "index" file
2959 - di: extra bytes for the "index" file
2892
2960
2893 A healthy revlog will return (0, 0).
2961 A healthy revlog will return (0, 0).
2894 """
2962 """
2895 expected = 0
2963 expected = 0
2896 if len(self):
2964 if len(self):
2897 expected = max(0, self.end(len(self) - 1))
2965 expected = max(0, self.end(len(self) - 1))
2898
2966
2899 try:
2967 try:
2900 with self._datafp() as f:
2968 with self._datafp() as f:
2901 f.seek(0, io.SEEK_END)
2969 f.seek(0, io.SEEK_END)
2902 actual = f.tell()
2970 actual = f.tell()
2903 dd = actual - expected
2971 dd = actual - expected
2904 except FileNotFoundError:
2972 except FileNotFoundError:
2905 dd = 0
2973 dd = 0
2906
2974
2907 try:
2975 try:
2908 f = self.opener(self._indexfile)
2976 f = self.opener(self._indexfile)
2909 f.seek(0, io.SEEK_END)
2977 f.seek(0, io.SEEK_END)
2910 actual = f.tell()
2978 actual = f.tell()
2911 f.close()
2979 f.close()
2912 s = self.index.entry_size
2980 s = self.index.entry_size
2913 i = max(0, actual // s)
2981 i = max(0, actual // s)
2914 di = actual - (i * s)
2982 di = actual - (i * s)
2915 if self._inline:
2983 if self._inline:
2916 databytes = 0
2984 databytes = 0
2917 for r in self:
2985 for r in self:
2918 databytes += max(0, self.length(r))
2986 databytes += max(0, self.length(r))
2919 dd = 0
2987 dd = 0
2920 di = actual - len(self) * s - databytes
2988 di = actual - len(self) * s - databytes
2921 except FileNotFoundError:
2989 except FileNotFoundError:
2922 di = 0
2990 di = 0
2923
2991
2924 return (dd, di)
2992 return (dd, di)
2925
2993
2926 def files(self):
2994 def files(self):
2927 res = [self._indexfile]
2995 res = [self._indexfile]
2928 if self._docket_file is None:
2996 if self._docket_file is None:
2929 if not self._inline:
2997 if not self._inline:
2930 res.append(self._datafile)
2998 res.append(self._datafile)
2931 else:
2999 else:
2932 res.append(self._docket_file)
3000 res.append(self._docket_file)
2933 res.extend(self._docket.old_index_filepaths(include_empty=False))
3001 res.extend(self._docket.old_index_filepaths(include_empty=False))
2934 if self._docket.data_end:
3002 if self._docket.data_end:
2935 res.append(self._datafile)
3003 res.append(self._datafile)
2936 res.extend(self._docket.old_data_filepaths(include_empty=False))
3004 res.extend(self._docket.old_data_filepaths(include_empty=False))
2937 if self._docket.sidedata_end:
3005 if self._docket.sidedata_end:
2938 res.append(self._sidedatafile)
3006 res.append(self._sidedatafile)
2939 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3007 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2940 return res
3008 return res
2941
3009
2942 def emitrevisions(
3010 def emitrevisions(
2943 self,
3011 self,
2944 nodes,
3012 nodes,
2945 nodesorder=None,
3013 nodesorder=None,
2946 revisiondata=False,
3014 revisiondata=False,
2947 assumehaveparentrevisions=False,
3015 assumehaveparentrevisions=False,
2948 deltamode=repository.CG_DELTAMODE_STD,
3016 deltamode=repository.CG_DELTAMODE_STD,
2949 sidedata_helpers=None,
3017 sidedata_helpers=None,
2950 debug_info=None,
3018 debug_info=None,
2951 ):
3019 ):
2952 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3020 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2953 raise error.ProgrammingError(
3021 raise error.ProgrammingError(
2954 b'unhandled value for nodesorder: %s' % nodesorder
3022 b'unhandled value for nodesorder: %s' % nodesorder
2955 )
3023 )
2956
3024
2957 if nodesorder is None and not self._generaldelta:
3025 if nodesorder is None and not self._generaldelta:
2958 nodesorder = b'storage'
3026 nodesorder = b'storage'
2959
3027
2960 if (
3028 if (
2961 not self._storedeltachains
3029 not self._storedeltachains
2962 and deltamode != repository.CG_DELTAMODE_PREV
3030 and deltamode != repository.CG_DELTAMODE_PREV
2963 ):
3031 ):
2964 deltamode = repository.CG_DELTAMODE_FULL
3032 deltamode = repository.CG_DELTAMODE_FULL
2965
3033
2966 return storageutil.emitrevisions(
3034 return storageutil.emitrevisions(
2967 self,
3035 self,
2968 nodes,
3036 nodes,
2969 nodesorder,
3037 nodesorder,
2970 revlogrevisiondelta,
3038 revlogrevisiondelta,
2971 deltaparentfn=self.deltaparent,
3039 deltaparentfn=self.deltaparent,
2972 candeltafn=self.candelta,
3040 candeltafn=self.candelta,
2973 rawsizefn=self.rawsize,
3041 rawsizefn=self.rawsize,
2974 revdifffn=self.revdiff,
3042 revdifffn=self.revdiff,
2975 flagsfn=self.flags,
3043 flagsfn=self.flags,
2976 deltamode=deltamode,
3044 deltamode=deltamode,
2977 revisiondata=revisiondata,
3045 revisiondata=revisiondata,
2978 assumehaveparentrevisions=assumehaveparentrevisions,
3046 assumehaveparentrevisions=assumehaveparentrevisions,
2979 sidedata_helpers=sidedata_helpers,
3047 sidedata_helpers=sidedata_helpers,
2980 debug_info=debug_info,
3048 debug_info=debug_info,
2981 )
3049 )
2982
3050
2983 DELTAREUSEALWAYS = b'always'
3051 DELTAREUSEALWAYS = b'always'
2984 DELTAREUSESAMEREVS = b'samerevs'
3052 DELTAREUSESAMEREVS = b'samerevs'
2985 DELTAREUSENEVER = b'never'
3053 DELTAREUSENEVER = b'never'
2986
3054
2987 DELTAREUSEFULLADD = b'fulladd'
3055 DELTAREUSEFULLADD = b'fulladd'
2988
3056
2989 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3057 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2990
3058
2991 def clone(
3059 def clone(
2992 self,
3060 self,
2993 tr,
3061 tr,
2994 destrevlog,
3062 destrevlog,
2995 addrevisioncb=None,
3063 addrevisioncb=None,
2996 deltareuse=DELTAREUSESAMEREVS,
3064 deltareuse=DELTAREUSESAMEREVS,
2997 forcedeltabothparents=None,
3065 forcedeltabothparents=None,
2998 sidedata_helpers=None,
3066 sidedata_helpers=None,
2999 ):
3067 ):
3000 """Copy this revlog to another, possibly with format changes.
3068 """Copy this revlog to another, possibly with format changes.
3001
3069
3002 The destination revlog will contain the same revisions and nodes.
3070 The destination revlog will contain the same revisions and nodes.
3003 However, it may not be bit-for-bit identical due to e.g. delta encoding
3071 However, it may not be bit-for-bit identical due to e.g. delta encoding
3004 differences.
3072 differences.
3005
3073
3006 The ``deltareuse`` argument control how deltas from the existing revlog
3074 The ``deltareuse`` argument control how deltas from the existing revlog
3007 are preserved in the destination revlog. The argument can have the
3075 are preserved in the destination revlog. The argument can have the
3008 following values:
3076 following values:
3009
3077
3010 DELTAREUSEALWAYS
3078 DELTAREUSEALWAYS
3011 Deltas will always be reused (if possible), even if the destination
3079 Deltas will always be reused (if possible), even if the destination
3012 revlog would not select the same revisions for the delta. This is the
3080 revlog would not select the same revisions for the delta. This is the
3013 fastest mode of operation.
3081 fastest mode of operation.
3014 DELTAREUSESAMEREVS
3082 DELTAREUSESAMEREVS
3015 Deltas will be reused if the destination revlog would pick the same
3083 Deltas will be reused if the destination revlog would pick the same
3016 revisions for the delta. This mode strikes a balance between speed
3084 revisions for the delta. This mode strikes a balance between speed
3017 and optimization.
3085 and optimization.
3018 DELTAREUSENEVER
3086 DELTAREUSENEVER
3019 Deltas will never be reused. This is the slowest mode of execution.
3087 Deltas will never be reused. This is the slowest mode of execution.
3020 This mode can be used to recompute deltas (e.g. if the diff/delta
3088 This mode can be used to recompute deltas (e.g. if the diff/delta
3021 algorithm changes).
3089 algorithm changes).
3022 DELTAREUSEFULLADD
3090 DELTAREUSEFULLADD
3023 Revision will be re-added as if their were new content. This is
3091 Revision will be re-added as if their were new content. This is
3024 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3092 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3025 eg: large file detection and handling.
3093 eg: large file detection and handling.
3026
3094
3027 Delta computation can be slow, so the choice of delta reuse policy can
3095 Delta computation can be slow, so the choice of delta reuse policy can
3028 significantly affect run time.
3096 significantly affect run time.
3029
3097
3030 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3098 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3031 two extremes. Deltas will be reused if they are appropriate. But if the
3099 two extremes. Deltas will be reused if they are appropriate. But if the
3032 delta could choose a better revision, it will do so. This means if you
3100 delta could choose a better revision, it will do so. This means if you
3033 are converting a non-generaldelta revlog to a generaldelta revlog,
3101 are converting a non-generaldelta revlog to a generaldelta revlog,
3034 deltas will be recomputed if the delta's parent isn't a parent of the
3102 deltas will be recomputed if the delta's parent isn't a parent of the
3035 revision.
3103 revision.
3036
3104
3037 In addition to the delta policy, the ``forcedeltabothparents``
3105 In addition to the delta policy, the ``forcedeltabothparents``
3038 argument controls whether to force compute deltas against both parents
3106 argument controls whether to force compute deltas against both parents
3039 for merges. By default, the current default is used.
3107 for merges. By default, the current default is used.
3040
3108
3041 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3109 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3042 `sidedata_helpers`.
3110 `sidedata_helpers`.
3043 """
3111 """
3044 if deltareuse not in self.DELTAREUSEALL:
3112 if deltareuse not in self.DELTAREUSEALL:
3045 raise ValueError(
3113 raise ValueError(
3046 _(b'value for deltareuse invalid: %s') % deltareuse
3114 _(b'value for deltareuse invalid: %s') % deltareuse
3047 )
3115 )
3048
3116
3049 if len(destrevlog):
3117 if len(destrevlog):
3050 raise ValueError(_(b'destination revlog is not empty'))
3118 raise ValueError(_(b'destination revlog is not empty'))
3051
3119
3052 if getattr(self, 'filteredrevs', None):
3120 if getattr(self, 'filteredrevs', None):
3053 raise ValueError(_(b'source revlog has filtered revisions'))
3121 raise ValueError(_(b'source revlog has filtered revisions'))
3054 if getattr(destrevlog, 'filteredrevs', None):
3122 if getattr(destrevlog, 'filteredrevs', None):
3055 raise ValueError(_(b'destination revlog has filtered revisions'))
3123 raise ValueError(_(b'destination revlog has filtered revisions'))
3056
3124
3057 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3125 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3058 # if possible.
3126 # if possible.
3059 oldlazydelta = destrevlog._lazydelta
3127 oldlazydelta = destrevlog._lazydelta
3060 oldlazydeltabase = destrevlog._lazydeltabase
3128 oldlazydeltabase = destrevlog._lazydeltabase
3061 oldamd = destrevlog._deltabothparents
3129 oldamd = destrevlog._deltabothparents
3062
3130
3063 try:
3131 try:
3064 if deltareuse == self.DELTAREUSEALWAYS:
3132 if deltareuse == self.DELTAREUSEALWAYS:
3065 destrevlog._lazydeltabase = True
3133 destrevlog._lazydeltabase = True
3066 destrevlog._lazydelta = True
3134 destrevlog._lazydelta = True
3067 elif deltareuse == self.DELTAREUSESAMEREVS:
3135 elif deltareuse == self.DELTAREUSESAMEREVS:
3068 destrevlog._lazydeltabase = False
3136 destrevlog._lazydeltabase = False
3069 destrevlog._lazydelta = True
3137 destrevlog._lazydelta = True
3070 elif deltareuse == self.DELTAREUSENEVER:
3138 elif deltareuse == self.DELTAREUSENEVER:
3071 destrevlog._lazydeltabase = False
3139 destrevlog._lazydeltabase = False
3072 destrevlog._lazydelta = False
3140 destrevlog._lazydelta = False
3073
3141
3074 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3142 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3075
3143
3076 self._clone(
3144 self._clone(
3077 tr,
3145 tr,
3078 destrevlog,
3146 destrevlog,
3079 addrevisioncb,
3147 addrevisioncb,
3080 deltareuse,
3148 deltareuse,
3081 forcedeltabothparents,
3149 forcedeltabothparents,
3082 sidedata_helpers,
3150 sidedata_helpers,
3083 )
3151 )
3084
3152
3085 finally:
3153 finally:
3086 destrevlog._lazydelta = oldlazydelta
3154 destrevlog._lazydelta = oldlazydelta
3087 destrevlog._lazydeltabase = oldlazydeltabase
3155 destrevlog._lazydeltabase = oldlazydeltabase
3088 destrevlog._deltabothparents = oldamd
3156 destrevlog._deltabothparents = oldamd
3089
3157
3090 def _clone(
3158 def _clone(
3091 self,
3159 self,
3092 tr,
3160 tr,
3093 destrevlog,
3161 destrevlog,
3094 addrevisioncb,
3162 addrevisioncb,
3095 deltareuse,
3163 deltareuse,
3096 forcedeltabothparents,
3164 forcedeltabothparents,
3097 sidedata_helpers,
3165 sidedata_helpers,
3098 ):
3166 ):
3099 """perform the core duty of `revlog.clone` after parameter processing"""
3167 """perform the core duty of `revlog.clone` after parameter processing"""
3100 write_debug = None
3168 write_debug = None
3101 if self._debug_delta:
3169 if self._debug_delta:
3102 write_debug = tr._report
3170 write_debug = tr._report
3103 deltacomputer = deltautil.deltacomputer(
3171 deltacomputer = deltautil.deltacomputer(
3104 destrevlog,
3172 destrevlog,
3105 write_debug=write_debug,
3173 write_debug=write_debug,
3106 )
3174 )
3107 index = self.index
3175 index = self.index
3108 for rev in self:
3176 for rev in self:
3109 entry = index[rev]
3177 entry = index[rev]
3110
3178
3111 # Some classes override linkrev to take filtered revs into
3179 # Some classes override linkrev to take filtered revs into
3112 # account. Use raw entry from index.
3180 # account. Use raw entry from index.
3113 flags = entry[0] & 0xFFFF
3181 flags = entry[0] & 0xFFFF
3114 linkrev = entry[4]
3182 linkrev = entry[4]
3115 p1 = index[entry[5]][7]
3183 p1 = index[entry[5]][7]
3116 p2 = index[entry[6]][7]
3184 p2 = index[entry[6]][7]
3117 node = entry[7]
3185 node = entry[7]
3118
3186
3119 # (Possibly) reuse the delta from the revlog if allowed and
3187 # (Possibly) reuse the delta from the revlog if allowed and
3120 # the revlog chunk is a delta.
3188 # the revlog chunk is a delta.
3121 cachedelta = None
3189 cachedelta = None
3122 rawtext = None
3190 rawtext = None
3123 if deltareuse == self.DELTAREUSEFULLADD:
3191 if deltareuse == self.DELTAREUSEFULLADD:
3124 text = self._revisiondata(rev)
3192 text = self._revisiondata(rev)
3125 sidedata = self.sidedata(rev)
3193 sidedata = self.sidedata(rev)
3126
3194
3127 if sidedata_helpers is not None:
3195 if sidedata_helpers is not None:
3128 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3196 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3129 self, sidedata_helpers, sidedata, rev
3197 self, sidedata_helpers, sidedata, rev
3130 )
3198 )
3131 flags = flags | new_flags[0] & ~new_flags[1]
3199 flags = flags | new_flags[0] & ~new_flags[1]
3132
3200
3133 destrevlog.addrevision(
3201 destrevlog.addrevision(
3134 text,
3202 text,
3135 tr,
3203 tr,
3136 linkrev,
3204 linkrev,
3137 p1,
3205 p1,
3138 p2,
3206 p2,
3139 cachedelta=cachedelta,
3207 cachedelta=cachedelta,
3140 node=node,
3208 node=node,
3141 flags=flags,
3209 flags=flags,
3142 deltacomputer=deltacomputer,
3210 deltacomputer=deltacomputer,
3143 sidedata=sidedata,
3211 sidedata=sidedata,
3144 )
3212 )
3145 else:
3213 else:
3146 if destrevlog._lazydelta:
3214 if destrevlog._lazydelta:
3147 dp = self.deltaparent(rev)
3215 dp = self.deltaparent(rev)
3148 if dp != nullrev:
3216 if dp != nullrev:
3149 cachedelta = (dp, bytes(self._chunk(rev)))
3217 cachedelta = (dp, bytes(self._chunk(rev)))
3150
3218
3151 sidedata = None
3219 sidedata = None
3152 if not cachedelta:
3220 if not cachedelta:
3153 rawtext = self._revisiondata(rev)
3221 rawtext = self._revisiondata(rev)
3154 sidedata = self.sidedata(rev)
3222 sidedata = self.sidedata(rev)
3155 if sidedata is None:
3223 if sidedata is None:
3156 sidedata = self.sidedata(rev)
3224 sidedata = self.sidedata(rev)
3157
3225
3158 if sidedata_helpers is not None:
3226 if sidedata_helpers is not None:
3159 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3227 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3160 self, sidedata_helpers, sidedata, rev
3228 self, sidedata_helpers, sidedata, rev
3161 )
3229 )
3162 flags = flags | new_flags[0] & ~new_flags[1]
3230 flags = flags | new_flags[0] & ~new_flags[1]
3163
3231
3164 with destrevlog._writing(tr):
3232 with destrevlog._writing(tr):
3165 destrevlog._addrevision(
3233 destrevlog._addrevision(
3166 node,
3234 node,
3167 rawtext,
3235 rawtext,
3168 tr,
3236 tr,
3169 linkrev,
3237 linkrev,
3170 p1,
3238 p1,
3171 p2,
3239 p2,
3172 flags,
3240 flags,
3173 cachedelta,
3241 cachedelta,
3174 deltacomputer=deltacomputer,
3242 deltacomputer=deltacomputer,
3175 sidedata=sidedata,
3243 sidedata=sidedata,
3176 )
3244 )
3177
3245
3178 if addrevisioncb:
3246 if addrevisioncb:
3179 addrevisioncb(self, rev, node)
3247 addrevisioncb(self, rev, node)
3180
3248
3181 def censorrevision(self, tr, censornode, tombstone=b''):
3249 def censorrevision(self, tr, censornode, tombstone=b''):
3182 if self._format_version == REVLOGV0:
3250 if self._format_version == REVLOGV0:
3183 raise error.RevlogError(
3251 raise error.RevlogError(
3184 _(b'cannot censor with version %d revlogs')
3252 _(b'cannot censor with version %d revlogs')
3185 % self._format_version
3253 % self._format_version
3186 )
3254 )
3187 elif self._format_version == REVLOGV1:
3255 elif self._format_version == REVLOGV1:
3188 rewrite.v1_censor(self, tr, censornode, tombstone)
3256 rewrite.v1_censor(self, tr, censornode, tombstone)
3189 else:
3257 else:
3190 rewrite.v2_censor(self, tr, censornode, tombstone)
3258 rewrite.v2_censor(self, tr, censornode, tombstone)
3191
3259
3192 def verifyintegrity(self, state):
3260 def verifyintegrity(self, state):
3193 """Verifies the integrity of the revlog.
3261 """Verifies the integrity of the revlog.
3194
3262
3195 Yields ``revlogproblem`` instances describing problems that are
3263 Yields ``revlogproblem`` instances describing problems that are
3196 found.
3264 found.
3197 """
3265 """
3198 dd, di = self.checksize()
3266 dd, di = self.checksize()
3199 if dd:
3267 if dd:
3200 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3268 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3201 if di:
3269 if di:
3202 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3270 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3203
3271
3204 version = self._format_version
3272 version = self._format_version
3205
3273
3206 # The verifier tells us what version revlog we should be.
3274 # The verifier tells us what version revlog we should be.
3207 if version != state[b'expectedversion']:
3275 if version != state[b'expectedversion']:
3208 yield revlogproblem(
3276 yield revlogproblem(
3209 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3277 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3210 % (self.display_id, version, state[b'expectedversion'])
3278 % (self.display_id, version, state[b'expectedversion'])
3211 )
3279 )
3212
3280
3213 state[b'skipread'] = set()
3281 state[b'skipread'] = set()
3214 state[b'safe_renamed'] = set()
3282 state[b'safe_renamed'] = set()
3215
3283
3216 for rev in self:
3284 for rev in self:
3217 node = self.node(rev)
3285 node = self.node(rev)
3218
3286
3219 # Verify contents. 4 cases to care about:
3287 # Verify contents. 4 cases to care about:
3220 #
3288 #
3221 # common: the most common case
3289 # common: the most common case
3222 # rename: with a rename
3290 # rename: with a rename
3223 # meta: file content starts with b'\1\n', the metadata
3291 # meta: file content starts with b'\1\n', the metadata
3224 # header defined in filelog.py, but without a rename
3292 # header defined in filelog.py, but without a rename
3225 # ext: content stored externally
3293 # ext: content stored externally
3226 #
3294 #
3227 # More formally, their differences are shown below:
3295 # More formally, their differences are shown below:
3228 #
3296 #
3229 # | common | rename | meta | ext
3297 # | common | rename | meta | ext
3230 # -------------------------------------------------------
3298 # -------------------------------------------------------
3231 # flags() | 0 | 0 | 0 | not 0
3299 # flags() | 0 | 0 | 0 | not 0
3232 # renamed() | False | True | False | ?
3300 # renamed() | False | True | False | ?
3233 # rawtext[0:2]=='\1\n'| False | True | True | ?
3301 # rawtext[0:2]=='\1\n'| False | True | True | ?
3234 #
3302 #
3235 # "rawtext" means the raw text stored in revlog data, which
3303 # "rawtext" means the raw text stored in revlog data, which
3236 # could be retrieved by "rawdata(rev)". "text"
3304 # could be retrieved by "rawdata(rev)". "text"
3237 # mentioned below is "revision(rev)".
3305 # mentioned below is "revision(rev)".
3238 #
3306 #
3239 # There are 3 different lengths stored physically:
3307 # There are 3 different lengths stored physically:
3240 # 1. L1: rawsize, stored in revlog index
3308 # 1. L1: rawsize, stored in revlog index
3241 # 2. L2: len(rawtext), stored in revlog data
3309 # 2. L2: len(rawtext), stored in revlog data
3242 # 3. L3: len(text), stored in revlog data if flags==0, or
3310 # 3. L3: len(text), stored in revlog data if flags==0, or
3243 # possibly somewhere else if flags!=0
3311 # possibly somewhere else if flags!=0
3244 #
3312 #
3245 # L1 should be equal to L2. L3 could be different from them.
3313 # L1 should be equal to L2. L3 could be different from them.
3246 # "text" may or may not affect commit hash depending on flag
3314 # "text" may or may not affect commit hash depending on flag
3247 # processors (see flagutil.addflagprocessor).
3315 # processors (see flagutil.addflagprocessor).
3248 #
3316 #
3249 # | common | rename | meta | ext
3317 # | common | rename | meta | ext
3250 # -------------------------------------------------
3318 # -------------------------------------------------
3251 # rawsize() | L1 | L1 | L1 | L1
3319 # rawsize() | L1 | L1 | L1 | L1
3252 # size() | L1 | L2-LM | L1(*) | L1 (?)
3320 # size() | L1 | L2-LM | L1(*) | L1 (?)
3253 # len(rawtext) | L2 | L2 | L2 | L2
3321 # len(rawtext) | L2 | L2 | L2 | L2
3254 # len(text) | L2 | L2 | L2 | L3
3322 # len(text) | L2 | L2 | L2 | L3
3255 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3323 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3256 #
3324 #
3257 # LM: length of metadata, depending on rawtext
3325 # LM: length of metadata, depending on rawtext
3258 # (*): not ideal, see comment in filelog.size
3326 # (*): not ideal, see comment in filelog.size
3259 # (?): could be "- len(meta)" if the resolved content has
3327 # (?): could be "- len(meta)" if the resolved content has
3260 # rename metadata
3328 # rename metadata
3261 #
3329 #
3262 # Checks needed to be done:
3330 # Checks needed to be done:
3263 # 1. length check: L1 == L2, in all cases.
3331 # 1. length check: L1 == L2, in all cases.
3264 # 2. hash check: depending on flag processor, we may need to
3332 # 2. hash check: depending on flag processor, we may need to
3265 # use either "text" (external), or "rawtext" (in revlog).
3333 # use either "text" (external), or "rawtext" (in revlog).
3266
3334
3267 try:
3335 try:
3268 skipflags = state.get(b'skipflags', 0)
3336 skipflags = state.get(b'skipflags', 0)
3269 if skipflags:
3337 if skipflags:
3270 skipflags &= self.flags(rev)
3338 skipflags &= self.flags(rev)
3271
3339
3272 _verify_revision(self, skipflags, state, node)
3340 _verify_revision(self, skipflags, state, node)
3273
3341
3274 l1 = self.rawsize(rev)
3342 l1 = self.rawsize(rev)
3275 l2 = len(self.rawdata(node))
3343 l2 = len(self.rawdata(node))
3276
3344
3277 if l1 != l2:
3345 if l1 != l2:
3278 yield revlogproblem(
3346 yield revlogproblem(
3279 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3347 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3280 node=node,
3348 node=node,
3281 )
3349 )
3282
3350
3283 except error.CensoredNodeError:
3351 except error.CensoredNodeError:
3284 if state[b'erroroncensored']:
3352 if state[b'erroroncensored']:
3285 yield revlogproblem(
3353 yield revlogproblem(
3286 error=_(b'censored file data'), node=node
3354 error=_(b'censored file data'), node=node
3287 )
3355 )
3288 state[b'skipread'].add(node)
3356 state[b'skipread'].add(node)
3289 except Exception as e:
3357 except Exception as e:
3290 yield revlogproblem(
3358 yield revlogproblem(
3291 error=_(b'unpacking %s: %s')
3359 error=_(b'unpacking %s: %s')
3292 % (short(node), stringutil.forcebytestr(e)),
3360 % (short(node), stringutil.forcebytestr(e)),
3293 node=node,
3361 node=node,
3294 )
3362 )
3295 state[b'skipread'].add(node)
3363 state[b'skipread'].add(node)
3296
3364
3297 def storageinfo(
3365 def storageinfo(
3298 self,
3366 self,
3299 exclusivefiles=False,
3367 exclusivefiles=False,
3300 sharedfiles=False,
3368 sharedfiles=False,
3301 revisionscount=False,
3369 revisionscount=False,
3302 trackedsize=False,
3370 trackedsize=False,
3303 storedsize=False,
3371 storedsize=False,
3304 ):
3372 ):
3305 d = {}
3373 d = {}
3306
3374
3307 if exclusivefiles:
3375 if exclusivefiles:
3308 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3376 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3309 if not self._inline:
3377 if not self._inline:
3310 d[b'exclusivefiles'].append((self.opener, self._datafile))
3378 d[b'exclusivefiles'].append((self.opener, self._datafile))
3311
3379
3312 if sharedfiles:
3380 if sharedfiles:
3313 d[b'sharedfiles'] = []
3381 d[b'sharedfiles'] = []
3314
3382
3315 if revisionscount:
3383 if revisionscount:
3316 d[b'revisionscount'] = len(self)
3384 d[b'revisionscount'] = len(self)
3317
3385
3318 if trackedsize:
3386 if trackedsize:
3319 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3387 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3320
3388
3321 if storedsize:
3389 if storedsize:
3322 d[b'storedsize'] = sum(
3390 d[b'storedsize'] = sum(
3323 self.opener.stat(path).st_size for path in self.files()
3391 self.opener.stat(path).st_size for path in self.files()
3324 )
3392 )
3325
3393
3326 return d
3394 return d
3327
3395
3328 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3396 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3329 if not self.hassidedata:
3397 if not self.hassidedata:
3330 return
3398 return
3331 # revlog formats with sidedata support does not support inline
3399 # revlog formats with sidedata support does not support inline
3332 assert not self._inline
3400 assert not self._inline
3333 if not helpers[1] and not helpers[2]:
3401 if not helpers[1] and not helpers[2]:
3334 # Nothing to generate or remove
3402 # Nothing to generate or remove
3335 return
3403 return
3336
3404
3337 new_entries = []
3405 new_entries = []
3338 # append the new sidedata
3406 # append the new sidedata
3339 with self._writing(transaction):
3407 with self._writing(transaction):
3340 ifh, dfh, sdfh = self._writinghandles
3408 ifh, dfh, sdfh = self._writinghandles
3341 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3409 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3342
3410
3343 current_offset = sdfh.tell()
3411 current_offset = sdfh.tell()
3344 for rev in range(startrev, endrev + 1):
3412 for rev in range(startrev, endrev + 1):
3345 entry = self.index[rev]
3413 entry = self.index[rev]
3346 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3414 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3347 store=self,
3415 store=self,
3348 sidedata_helpers=helpers,
3416 sidedata_helpers=helpers,
3349 sidedata={},
3417 sidedata={},
3350 rev=rev,
3418 rev=rev,
3351 )
3419 )
3352
3420
3353 serialized_sidedata = sidedatautil.serialize_sidedata(
3421 serialized_sidedata = sidedatautil.serialize_sidedata(
3354 new_sidedata
3422 new_sidedata
3355 )
3423 )
3356
3424
3357 sidedata_compression_mode = COMP_MODE_INLINE
3425 sidedata_compression_mode = COMP_MODE_INLINE
3358 if serialized_sidedata and self.hassidedata:
3426 if serialized_sidedata and self.hassidedata:
3359 sidedata_compression_mode = COMP_MODE_PLAIN
3427 sidedata_compression_mode = COMP_MODE_PLAIN
3360 h, comp_sidedata = self.compress(serialized_sidedata)
3428 h, comp_sidedata = self.compress(serialized_sidedata)
3361 if (
3429 if (
3362 h != b'u'
3430 h != b'u'
3363 and comp_sidedata[0] != b'\0'
3431 and comp_sidedata[0] != b'\0'
3364 and len(comp_sidedata) < len(serialized_sidedata)
3432 and len(comp_sidedata) < len(serialized_sidedata)
3365 ):
3433 ):
3366 assert not h
3434 assert not h
3367 if (
3435 if (
3368 comp_sidedata[0]
3436 comp_sidedata[0]
3369 == self._docket.default_compression_header
3437 == self._docket.default_compression_header
3370 ):
3438 ):
3371 sidedata_compression_mode = COMP_MODE_DEFAULT
3439 sidedata_compression_mode = COMP_MODE_DEFAULT
3372 serialized_sidedata = comp_sidedata
3440 serialized_sidedata = comp_sidedata
3373 else:
3441 else:
3374 sidedata_compression_mode = COMP_MODE_INLINE
3442 sidedata_compression_mode = COMP_MODE_INLINE
3375 serialized_sidedata = comp_sidedata
3443 serialized_sidedata = comp_sidedata
3376 if entry[8] != 0 or entry[9] != 0:
3444 if entry[8] != 0 or entry[9] != 0:
3377 # rewriting entries that already have sidedata is not
3445 # rewriting entries that already have sidedata is not
3378 # supported yet, because it introduces garbage data in the
3446 # supported yet, because it introduces garbage data in the
3379 # revlog.
3447 # revlog.
3380 msg = b"rewriting existing sidedata is not supported yet"
3448 msg = b"rewriting existing sidedata is not supported yet"
3381 raise error.Abort(msg)
3449 raise error.Abort(msg)
3382
3450
3383 # Apply (potential) flags to add and to remove after running
3451 # Apply (potential) flags to add and to remove after running
3384 # the sidedata helpers
3452 # the sidedata helpers
3385 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3453 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3386 entry_update = (
3454 entry_update = (
3387 current_offset,
3455 current_offset,
3388 len(serialized_sidedata),
3456 len(serialized_sidedata),
3389 new_offset_flags,
3457 new_offset_flags,
3390 sidedata_compression_mode,
3458 sidedata_compression_mode,
3391 )
3459 )
3392
3460
3393 # the sidedata computation might have move the file cursors around
3461 # the sidedata computation might have move the file cursors around
3394 sdfh.seek(current_offset, os.SEEK_SET)
3462 sdfh.seek(current_offset, os.SEEK_SET)
3395 sdfh.write(serialized_sidedata)
3463 sdfh.write(serialized_sidedata)
3396 new_entries.append(entry_update)
3464 new_entries.append(entry_update)
3397 current_offset += len(serialized_sidedata)
3465 current_offset += len(serialized_sidedata)
3398 self._docket.sidedata_end = sdfh.tell()
3466 self._docket.sidedata_end = sdfh.tell()
3399
3467
3400 # rewrite the new index entries
3468 # rewrite the new index entries
3401 ifh.seek(startrev * self.index.entry_size)
3469 ifh.seek(startrev * self.index.entry_size)
3402 for i, e in enumerate(new_entries):
3470 for i, e in enumerate(new_entries):
3403 rev = startrev + i
3471 rev = startrev + i
3404 self.index.replace_sidedata_info(rev, *e)
3472 self.index.replace_sidedata_info(rev, *e)
3405 packed = self.index.entry_binary(rev)
3473 packed = self.index.entry_binary(rev)
3406 if rev == 0 and self._docket is None:
3474 if rev == 0 and self._docket is None:
3407 header = self._format_flags | self._format_version
3475 header = self._format_flags | self._format_version
3408 header = self.index.pack_header(header)
3476 header = self.index.pack_header(header)
3409 packed = header + packed
3477 packed = header + packed
3410 ifh.write(packed)
3478 ifh.write(packed)
@@ -1,1159 +1,1216 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator, List
13 from typing import Generator, List
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 filelog,
22 filelog,
23 manifest,
23 manifest,
24 policy,
24 policy,
25 pycompat,
25 pycompat,
26 util,
26 util,
27 vfs as vfsmod,
27 vfs as vfsmod,
28 )
28 )
29 from .utils import hashutil
29 from .utils import hashutil
30
30
31 parsers = policy.importmod('parsers')
31 parsers = policy.importmod('parsers')
32 # how much bytes should be read from fncache in one read
32 # how much bytes should be read from fncache in one read
33 # It is done to prevent loading large fncache files into memory
33 # It is done to prevent loading large fncache files into memory
34 fncache_chunksize = 10 ** 6
34 fncache_chunksize = 10 ** 6
35
35
36
36
37 def _match_tracked_entry(entry, matcher):
37 def _match_tracked_entry(entry, matcher):
38 """parses a fncache entry and returns whether the entry is tracking a path
38 """parses a fncache entry and returns whether the entry is tracking a path
39 matched by matcher or not.
39 matched by matcher or not.
40
40
41 If matcher is None, returns True"""
41 If matcher is None, returns True"""
42
42
43 if matcher is None:
43 if matcher is None:
44 return True
44 return True
45 if entry.is_filelog:
45 if entry.is_filelog:
46 return matcher(entry.target_id)
46 return matcher(entry.target_id)
47 elif entry.is_manifestlog:
47 elif entry.is_manifestlog:
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50
50
51
51
52 # This avoids a collision between a file named foo and a dir named
52 # This avoids a collision between a file named foo and a dir named
53 # foo.i or foo.d
53 # foo.i or foo.d
54 def _encodedir(path):
54 def _encodedir(path):
55 """
55 """
56 >>> _encodedir(b'data/foo.i')
56 >>> _encodedir(b'data/foo.i')
57 'data/foo.i'
57 'data/foo.i'
58 >>> _encodedir(b'data/foo.i/bla.i')
58 >>> _encodedir(b'data/foo.i/bla.i')
59 'data/foo.i.hg/bla.i'
59 'data/foo.i.hg/bla.i'
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 'data/foo.i.hg.hg/bla.i'
61 'data/foo.i.hg.hg/bla.i'
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 """
64 """
65 return (
65 return (
66 path.replace(b".hg/", b".hg.hg/")
66 path.replace(b".hg/", b".hg.hg/")
67 .replace(b".i/", b".i.hg/")
67 .replace(b".i/", b".i.hg/")
68 .replace(b".d/", b".d.hg/")
68 .replace(b".d/", b".d.hg/")
69 )
69 )
70
70
71
71
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73
73
74
74
75 def decodedir(path):
75 def decodedir(path):
76 """
76 """
77 >>> decodedir(b'data/foo.i')
77 >>> decodedir(b'data/foo.i')
78 'data/foo.i'
78 'data/foo.i'
79 >>> decodedir(b'data/foo.i.hg/bla.i')
79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 'data/foo.i/bla.i'
80 'data/foo.i/bla.i'
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 'data/foo.i.hg/bla.i'
82 'data/foo.i.hg/bla.i'
83 """
83 """
84 if b".hg/" not in path:
84 if b".hg/" not in path:
85 return path
85 return path
86 return (
86 return (
87 path.replace(b".d.hg/", b".d/")
87 path.replace(b".d.hg/", b".d/")
88 .replace(b".i.hg/", b".i/")
88 .replace(b".i.hg/", b".i/")
89 .replace(b".hg.hg/", b".hg/")
89 .replace(b".hg.hg/", b".hg/")
90 )
90 )
91
91
92
92
93 def _reserved():
93 def _reserved():
94 """characters that are problematic for filesystems
94 """characters that are problematic for filesystems
95
95
96 * ascii escapes (0..31)
96 * ascii escapes (0..31)
97 * ascii hi (126..255)
97 * ascii hi (126..255)
98 * windows specials
98 * windows specials
99
99
100 these characters will be escaped by encodefunctions
100 these characters will be escaped by encodefunctions
101 """
101 """
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 for x in range(32):
103 for x in range(32):
104 yield x
104 yield x
105 for x in range(126, 256):
105 for x in range(126, 256):
106 yield x
106 yield x
107 for x in winreserved:
107 for x in winreserved:
108 yield x
108 yield x
109
109
110
110
111 def _buildencodefun():
111 def _buildencodefun():
112 """
112 """
113 >>> enc, dec = _buildencodefun()
113 >>> enc, dec = _buildencodefun()
114
114
115 >>> enc(b'nothing/special.txt')
115 >>> enc(b'nothing/special.txt')
116 'nothing/special.txt'
116 'nothing/special.txt'
117 >>> dec(b'nothing/special.txt')
117 >>> dec(b'nothing/special.txt')
118 'nothing/special.txt'
118 'nothing/special.txt'
119
119
120 >>> enc(b'HELLO')
120 >>> enc(b'HELLO')
121 '_h_e_l_l_o'
121 '_h_e_l_l_o'
122 >>> dec(b'_h_e_l_l_o')
122 >>> dec(b'_h_e_l_l_o')
123 'HELLO'
123 'HELLO'
124
124
125 >>> enc(b'hello:world?')
125 >>> enc(b'hello:world?')
126 'hello~3aworld~3f'
126 'hello~3aworld~3f'
127 >>> dec(b'hello~3aworld~3f')
127 >>> dec(b'hello~3aworld~3f')
128 'hello:world?'
128 'hello:world?'
129
129
130 >>> enc(b'the\\x07quick\\xADshot')
130 >>> enc(b'the\\x07quick\\xADshot')
131 'the~07quick~adshot'
131 'the~07quick~adshot'
132 >>> dec(b'the~07quick~adshot')
132 >>> dec(b'the~07quick~adshot')
133 'the\\x07quick\\xadshot'
133 'the\\x07quick\\xadshot'
134 """
134 """
135 e = b'_'
135 e = b'_'
136 xchr = pycompat.bytechr
136 xchr = pycompat.bytechr
137 asciistr = list(map(xchr, range(127)))
137 asciistr = list(map(xchr, range(127)))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139
139
140 cmap = {x: x for x in asciistr}
140 cmap = {x: x for x in asciistr}
141 for x in _reserved():
141 for x in _reserved():
142 cmap[xchr(x)] = b"~%02x" % x
142 cmap[xchr(x)] = b"~%02x" % x
143 for x in capitals + [ord(e)]:
143 for x in capitals + [ord(e)]:
144 cmap[xchr(x)] = e + xchr(x).lower()
144 cmap[xchr(x)] = e + xchr(x).lower()
145
145
146 dmap = {}
146 dmap = {}
147 for k, v in cmap.items():
147 for k, v in cmap.items():
148 dmap[v] = k
148 dmap[v] = k
149
149
150 def decode(s):
150 def decode(s):
151 i = 0
151 i = 0
152 while i < len(s):
152 while i < len(s):
153 for l in range(1, 4):
153 for l in range(1, 4):
154 try:
154 try:
155 yield dmap[s[i : i + l]]
155 yield dmap[s[i : i + l]]
156 i += l
156 i += l
157 break
157 break
158 except KeyError:
158 except KeyError:
159 pass
159 pass
160 else:
160 else:
161 raise KeyError
161 raise KeyError
162
162
163 return (
163 return (
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join(list(decode(s))),
165 lambda s: b''.join(list(decode(s))),
166 )
166 )
167
167
168
168
169 _encodefname, _decodefname = _buildencodefun()
169 _encodefname, _decodefname = _buildencodefun()
170
170
171
171
172 def encodefilename(s):
172 def encodefilename(s):
173 """
173 """
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 """
176 """
177 return _encodefname(encodedir(s))
177 return _encodefname(encodedir(s))
178
178
179
179
180 def decodefilename(s):
180 def decodefilename(s):
181 """
181 """
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 """
184 """
185 return decodedir(_decodefname(s))
185 return decodedir(_decodefname(s))
186
186
187
187
188 def _buildlowerencodefun():
188 def _buildlowerencodefun():
189 """
189 """
190 >>> f = _buildlowerencodefun()
190 >>> f = _buildlowerencodefun()
191 >>> f(b'nothing/special.txt')
191 >>> f(b'nothing/special.txt')
192 'nothing/special.txt'
192 'nothing/special.txt'
193 >>> f(b'HELLO')
193 >>> f(b'HELLO')
194 'hello'
194 'hello'
195 >>> f(b'hello:world?')
195 >>> f(b'hello:world?')
196 'hello~3aworld~3f'
196 'hello~3aworld~3f'
197 >>> f(b'the\\x07quick\\xADshot')
197 >>> f(b'the\\x07quick\\xADshot')
198 'the~07quick~adshot'
198 'the~07quick~adshot'
199 """
199 """
200 xchr = pycompat.bytechr
200 xchr = pycompat.bytechr
201 cmap = {xchr(x): xchr(x) for x in range(127)}
201 cmap = {xchr(x): xchr(x) for x in range(127)}
202 for x in _reserved():
202 for x in _reserved():
203 cmap[xchr(x)] = b"~%02x" % x
203 cmap[xchr(x)] = b"~%02x" % x
204 for x in range(ord(b"A"), ord(b"Z") + 1):
204 for x in range(ord(b"A"), ord(b"Z") + 1):
205 cmap[xchr(x)] = xchr(x).lower()
205 cmap[xchr(x)] = xchr(x).lower()
206
206
207 def lowerencode(s):
207 def lowerencode(s):
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209
209
210 return lowerencode
210 return lowerencode
211
211
212
212
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214
214
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218
218
219
219
220 def _auxencode(path, dotencode):
220 def _auxencode(path, dotencode):
221 """
221 """
222 Encodes filenames containing names reserved by Windows or which end in
222 Encodes filenames containing names reserved by Windows or which end in
223 period or space. Does not touch other single reserved characters c.
223 period or space. Does not touch other single reserved characters c.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Additionally encodes space or period at the beginning, if dotencode is
225 Additionally encodes space or period at the beginning, if dotencode is
226 True. Parameter path is assumed to be all lowercase.
226 True. Parameter path is assumed to be all lowercase.
227 A segment only needs encoding if a reserved name appears as a
227 A segment only needs encoding if a reserved name appears as a
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 doesn't need encoding.
229 doesn't need encoding.
230
230
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> _auxencode(s.split(b'/'), True)
232 >>> _auxencode(s.split(b'/'), True)
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> _auxencode(s.split(b'/'), False)
235 >>> _auxencode(s.split(b'/'), False)
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 >>> _auxencode([b'foo. '], True)
237 >>> _auxencode([b'foo. '], True)
238 ['foo.~20']
238 ['foo.~20']
239 >>> _auxencode([b' .foo'], True)
239 >>> _auxencode([b' .foo'], True)
240 ['~20.foo']
240 ['~20.foo']
241 """
241 """
242 for i, n in enumerate(path):
242 for i, n in enumerate(path):
243 if not n:
243 if not n:
244 continue
244 continue
245 if dotencode and n[0] in b'. ':
245 if dotencode and n[0] in b'. ':
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 path[i] = n
247 path[i] = n
248 else:
248 else:
249 l = n.find(b'.')
249 l = n.find(b'.')
250 if l == -1:
250 if l == -1:
251 l = len(n)
251 l = len(n)
252 if (l == 3 and n[:3] in _winres3) or (
252 if (l == 3 and n[:3] in _winres3) or (
253 l == 4
253 l == 4
254 and n[3:4] <= b'9'
254 and n[3:4] <= b'9'
255 and n[3:4] >= b'1'
255 and n[3:4] >= b'1'
256 and n[:3] in _winres4
256 and n[:3] in _winres4
257 ):
257 ):
258 # encode third letter ('aux' -> 'au~78')
258 # encode third letter ('aux' -> 'au~78')
259 ec = b"~%02x" % ord(n[2:3])
259 ec = b"~%02x" % ord(n[2:3])
260 n = n[0:2] + ec + n[3:]
260 n = n[0:2] + ec + n[3:]
261 path[i] = n
261 path[i] = n
262 if n[-1] in b'. ':
262 if n[-1] in b'. ':
263 # encode last period or space ('foo...' -> 'foo..~2e')
263 # encode last period or space ('foo...' -> 'foo..~2e')
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 return path
265 return path
266
266
267
267
268 _maxstorepathlen = 120
268 _maxstorepathlen = 120
269 _dirprefixlen = 8
269 _dirprefixlen = 8
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271
271
272
272
273 def _hashencode(path, dotencode):
273 def _hashencode(path, dotencode):
274 digest = hex(hashutil.sha1(path).digest())
274 digest = hex(hashutil.sha1(path).digest())
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 parts = _auxencode(le, dotencode)
276 parts = _auxencode(le, dotencode)
277 basename = parts[-1]
277 basename = parts[-1]
278 _root, ext = os.path.splitext(basename)
278 _root, ext = os.path.splitext(basename)
279 sdirs = []
279 sdirs = []
280 sdirslen = 0
280 sdirslen = 0
281 for p in parts[:-1]:
281 for p in parts[:-1]:
282 d = p[:_dirprefixlen]
282 d = p[:_dirprefixlen]
283 if d[-1] in b'. ':
283 if d[-1] in b'. ':
284 # Windows can't access dirs ending in period or space
284 # Windows can't access dirs ending in period or space
285 d = d[:-1] + b'_'
285 d = d[:-1] + b'_'
286 if sdirslen == 0:
286 if sdirslen == 0:
287 t = len(d)
287 t = len(d)
288 else:
288 else:
289 t = sdirslen + 1 + len(d)
289 t = sdirslen + 1 + len(d)
290 if t > _maxshortdirslen:
290 if t > _maxshortdirslen:
291 break
291 break
292 sdirs.append(d)
292 sdirs.append(d)
293 sdirslen = t
293 sdirslen = t
294 dirs = b'/'.join(sdirs)
294 dirs = b'/'.join(sdirs)
295 if len(dirs) > 0:
295 if len(dirs) > 0:
296 dirs += b'/'
296 dirs += b'/'
297 res = b'dh/' + dirs + digest + ext
297 res = b'dh/' + dirs + digest + ext
298 spaceleft = _maxstorepathlen - len(res)
298 spaceleft = _maxstorepathlen - len(res)
299 if spaceleft > 0:
299 if spaceleft > 0:
300 filler = basename[:spaceleft]
300 filler = basename[:spaceleft]
301 res = b'dh/' + dirs + filler + digest + ext
301 res = b'dh/' + dirs + filler + digest + ext
302 return res
302 return res
303
303
304
304
305 def _hybridencode(path, dotencode):
305 def _hybridencode(path, dotencode):
306 """encodes path with a length limit
306 """encodes path with a length limit
307
307
308 Encodes all paths that begin with 'data/', according to the following.
308 Encodes all paths that begin with 'data/', according to the following.
309
309
310 Default encoding (reversible):
310 Default encoding (reversible):
311
311
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 characters are encoded as '~xx', where xx is the two digit hex code
313 characters are encoded as '~xx', where xx is the two digit hex code
314 of the character (see encodefilename).
314 of the character (see encodefilename).
315 Relevant path components consisting of Windows reserved filenames are
315 Relevant path components consisting of Windows reserved filenames are
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317
317
318 Hashed encoding (not reversible):
318 Hashed encoding (not reversible):
319
319
320 If the default-encoded path is longer than _maxstorepathlen, a
320 If the default-encoded path is longer than _maxstorepathlen, a
321 non-reversible hybrid hashing of the path is done instead.
321 non-reversible hybrid hashing of the path is done instead.
322 This encoding uses up to _dirprefixlen characters of all directory
322 This encoding uses up to _dirprefixlen characters of all directory
323 levels of the lowerencoded path, but not more levels than can fit into
323 levels of the lowerencoded path, but not more levels than can fit into
324 _maxshortdirslen.
324 _maxshortdirslen.
325 Then follows the filler followed by the sha digest of the full path.
325 Then follows the filler followed by the sha digest of the full path.
326 The filler is the beginning of the basename of the lowerencoded path
326 The filler is the beginning of the basename of the lowerencoded path
327 (the basename is everything after the last path separator). The filler
327 (the basename is everything after the last path separator). The filler
328 is as long as possible, filling in characters from the basename until
328 is as long as possible, filling in characters from the basename until
329 the encoded path has _maxstorepathlen characters (or all chars of the
329 the encoded path has _maxstorepathlen characters (or all chars of the
330 basename have been taken).
330 basename have been taken).
331 The extension (e.g. '.i' or '.d') is preserved.
331 The extension (e.g. '.i' or '.d') is preserved.
332
332
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 encoding was used.
334 encoding was used.
335 """
335 """
336 path = encodedir(path)
336 path = encodedir(path)
337 ef = _encodefname(path).split(b'/')
337 ef = _encodefname(path).split(b'/')
338 res = b'/'.join(_auxencode(ef, dotencode))
338 res = b'/'.join(_auxencode(ef, dotencode))
339 if len(res) > _maxstorepathlen:
339 if len(res) > _maxstorepathlen:
340 res = _hashencode(path, dotencode)
340 res = _hashencode(path, dotencode)
341 return res
341 return res
342
342
343
343
344 def _pathencode(path):
344 def _pathencode(path):
345 de = encodedir(path)
345 de = encodedir(path)
346 if len(path) > _maxstorepathlen:
346 if len(path) > _maxstorepathlen:
347 return _hashencode(de, True)
347 return _hashencode(de, True)
348 ef = _encodefname(de).split(b'/')
348 ef = _encodefname(de).split(b'/')
349 res = b'/'.join(_auxencode(ef, True))
349 res = b'/'.join(_auxencode(ef, True))
350 if len(res) > _maxstorepathlen:
350 if len(res) > _maxstorepathlen:
351 return _hashencode(de, True)
351 return _hashencode(de, True)
352 return res
352 return res
353
353
354
354
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356
356
357
357
358 def _plainhybridencode(f):
358 def _plainhybridencode(f):
359 return _hybridencode(f, False)
359 return _hybridencode(f, False)
360
360
361
361
362 def _calcmode(vfs):
362 def _calcmode(vfs):
363 try:
363 try:
364 # files in .hg/ will be created using this mode
364 # files in .hg/ will be created using this mode
365 mode = vfs.stat().st_mode
365 mode = vfs.stat().st_mode
366 # avoid some useless chmods
366 # avoid some useless chmods
367 if (0o777 & ~util.umask) == (0o777 & mode):
367 if (0o777 & ~util.umask) == (0o777 & mode):
368 mode = None
368 mode = None
369 except OSError:
369 except OSError:
370 mode = None
370 mode = None
371 return mode
371 return mode
372
372
373
373
374 _data = [
374 _data = [
375 b'bookmarks',
375 b'bookmarks',
376 b'narrowspec',
376 b'narrowspec',
377 b'data',
377 b'data',
378 b'meta',
378 b'meta',
379 b'00manifest.d',
379 b'00manifest.d',
380 b'00manifest.i',
380 b'00manifest.i',
381 b'00changelog.d',
381 b'00changelog.d',
382 b'00changelog.i',
382 b'00changelog.i',
383 b'phaseroots',
383 b'phaseroots',
384 b'obsstore',
384 b'obsstore',
385 b'requires',
385 b'requires',
386 ]
386 ]
387
387
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_OTHER_EXT = (
389 REVLOG_FILES_OTHER_EXT = (
390 b'.idx',
390 b'.idx',
391 b'.d',
391 b'.d',
392 b'.dat',
392 b'.dat',
393 b'.n',
393 b'.n',
394 b'.nd',
394 b'.nd',
395 b'.sda',
395 b'.sda',
396 )
396 )
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 REVLOG_FILES_LONG_EXT = (
398 REVLOG_FILES_LONG_EXT = (
399 b'.nd',
399 b'.nd',
400 b'.idx',
400 b'.idx',
401 b'.dat',
401 b'.dat',
402 b'.sda',
402 b'.sda',
403 )
403 )
404 # files that are "volatile" and might change between listing and streaming
404 # files that are "volatile" and might change between listing and streaming
405 #
405 #
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 # deleted.
407 # deleted.
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409
409
410 # some exception to the above matching
410 # some exception to the above matching
411 #
411 #
412 # XXX This is currently not in use because of issue6542
412 # XXX This is currently not in use because of issue6542
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414
414
415
415
416 def is_revlog(f, kind, st):
416 def is_revlog(f, kind, st):
417 if kind != stat.S_IFREG:
417 if kind != stat.S_IFREG:
418 return None
418 return None
419 return revlog_type(f)
419 return revlog_type(f)
420
420
421
421
422 def revlog_type(f):
422 def revlog_type(f):
423 # XXX we need to filter `undo.` created by the transaction here, however
423 # XXX we need to filter `undo.` created by the transaction here, however
424 # being naive about it also filter revlog for `undo.*` files, leading to
424 # being naive about it also filter revlog for `undo.*` files, leading to
425 # issue6542. So we no longer use EXCLUDED.
425 # issue6542. So we no longer use EXCLUDED.
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 return FILEFLAGS_REVLOG_MAIN
427 return FILEFLAGS_REVLOG_MAIN
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 t = FILETYPE_FILELOG_OTHER
429 t = FILETYPE_FILELOG_OTHER
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 t |= FILEFLAGS_VOLATILE
431 t |= FILEFLAGS_VOLATILE
432 return t
432 return t
433 return None
433 return None
434
434
435
435
436 # the file is part of changelog data
436 # the file is part of changelog data
437 FILEFLAGS_CHANGELOG = 1 << 13
437 FILEFLAGS_CHANGELOG = 1 << 13
438 # the file is part of manifest data
438 # the file is part of manifest data
439 FILEFLAGS_MANIFESTLOG = 1 << 12
439 FILEFLAGS_MANIFESTLOG = 1 << 12
440 # the file is part of filelog data
440 # the file is part of filelog data
441 FILEFLAGS_FILELOG = 1 << 11
441 FILEFLAGS_FILELOG = 1 << 11
442 # file that are not directly part of a revlog
442 # file that are not directly part of a revlog
443 FILEFLAGS_OTHER = 1 << 10
443 FILEFLAGS_OTHER = 1 << 10
444
444
445 # the main entry point for a revlog
445 # the main entry point for a revlog
446 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 # a secondary file for a revlog
447 # a secondary file for a revlog
448 FILEFLAGS_REVLOG_OTHER = 1 << 0
448 FILEFLAGS_REVLOG_OTHER = 1 << 0
449
449
450 # files that are "volatile" and might change between listing and streaming
450 # files that are "volatile" and might change between listing and streaming
451 FILEFLAGS_VOLATILE = 1 << 20
451 FILEFLAGS_VOLATILE = 1 << 20
452
452
453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 FILETYPE_OTHER = FILEFLAGS_OTHER
459 FILETYPE_OTHER = FILEFLAGS_OTHER
460
460
461
461
462 @attr.s(slots=True)
462 @attr.s(slots=True)
463 class StoreFile:
463 class StoreFile:
464 """a file matching a store entry"""
464 """a file matching a store entry"""
465
465
466 unencoded_path = attr.ib()
466 unencoded_path = attr.ib()
467 _file_size = attr.ib(default=None)
467 _file_size = attr.ib(default=None)
468 is_volatile = attr.ib(default=False)
468 is_volatile = attr.ib(default=False)
469
469
470 def file_size(self, vfs):
470 def file_size(self, vfs):
471 if self._file_size is None:
471 if self._file_size is None:
472 if vfs is None:
472 if vfs is None:
473 msg = b"calling vfs-less file_size without prior call: %s"
473 msg = b"calling vfs-less file_size without prior call: %s"
474 msg %= self.unencoded_path
474 msg %= self.unencoded_path
475 raise error.ProgrammingError(msg)
475 raise error.ProgrammingError(msg)
476 try:
476 try:
477 self._file_size = vfs.stat(self.unencoded_path).st_size
477 self._file_size = vfs.stat(self.unencoded_path).st_size
478 except FileNotFoundError:
478 except FileNotFoundError:
479 self._file_size = 0
479 self._file_size = 0
480 return self._file_size
480 return self._file_size
481
481
482 def get_stream(self, vfs, copies):
482 def get_stream(self, vfs, copies):
483 """return data "stream" information for this file
483 """return data "stream" information for this file
484
484
485 (unencoded_file_path, content_iterator, content_size)
485 (unencoded_file_path, content_iterator, content_size)
486 """
486 """
487 size = self.file_size(None)
487 size = self.file_size(None)
488
488
489 def get_stream():
489 def get_stream():
490 actual_path = copies[vfs.join(self.unencoded_path)]
490 actual_path = copies[vfs.join(self.unencoded_path)]
491 with open(actual_path, 'rb') as fp:
491 with open(actual_path, 'rb') as fp:
492 yield None # ready to stream
492 yield None # ready to stream
493 if size <= 65536:
493 if size <= 65536:
494 yield fp.read(size)
494 yield fp.read(size)
495 else:
495 else:
496 yield from util.filechunkiter(fp, limit=size)
496 yield from util.filechunkiter(fp, limit=size)
497
497
498 s = get_stream()
498 s = get_stream()
499 next(s)
499 next(s)
500 return (self.unencoded_path, s, size)
500 return (self.unencoded_path, s, size)
501
501
502
502
503 @attr.s(slots=True, init=False)
503 @attr.s(slots=True, init=False)
504 class BaseStoreEntry:
504 class BaseStoreEntry:
505 """An entry in the store
505 """An entry in the store
506
506
507 This is returned by `store.walk` and represent some data in the store."""
507 This is returned by `store.walk` and represent some data in the store."""
508
508
509 def files(self) -> List[StoreFile]:
509 def files(self) -> List[StoreFile]:
510 raise NotImplementedError
510 raise NotImplementedError
511
511
512 def get_streams(self, vfs, copies=None):
512 def get_streams(
513 self,
514 repo=None,
515 vfs=None,
516 copies=None,
517 max_changeset=None,
518 ):
513 """return a list of data stream associated to files for this entry
519 """return a list of data stream associated to files for this entry
514
520
515 return [(unencoded_file_path, content_iterator, content_size), …]
521 return [(unencoded_file_path, content_iterator, content_size), …]
516 """
522 """
517 assert vfs is not None
523 assert vfs is not None
518 return [f.get_stream(vfs, copies) for f in self.files()]
524 return [f.get_stream(vfs, copies) for f in self.files()]
519
525
520
526
521 @attr.s(slots=True, init=False)
527 @attr.s(slots=True, init=False)
522 class SimpleStoreEntry(BaseStoreEntry):
528 class SimpleStoreEntry(BaseStoreEntry):
523 """A generic entry in the store"""
529 """A generic entry in the store"""
524
530
525 is_revlog = False
531 is_revlog = False
526
532
527 _entry_path = attr.ib()
533 _entry_path = attr.ib()
528 _is_volatile = attr.ib(default=False)
534 _is_volatile = attr.ib(default=False)
529 _file_size = attr.ib(default=None)
535 _file_size = attr.ib(default=None)
530 _files = attr.ib(default=None)
536 _files = attr.ib(default=None)
531
537
532 def __init__(
538 def __init__(
533 self,
539 self,
534 entry_path,
540 entry_path,
535 is_volatile=False,
541 is_volatile=False,
536 file_size=None,
542 file_size=None,
537 ):
543 ):
538 super().__init__()
544 super().__init__()
539 self._entry_path = entry_path
545 self._entry_path = entry_path
540 self._is_volatile = is_volatile
546 self._is_volatile = is_volatile
541 self._file_size = file_size
547 self._file_size = file_size
542 self._files = None
548 self._files = None
543
549
544 def files(self) -> List[StoreFile]:
550 def files(self) -> List[StoreFile]:
545 if self._files is None:
551 if self._files is None:
546 self._files = [
552 self._files = [
547 StoreFile(
553 StoreFile(
548 unencoded_path=self._entry_path,
554 unencoded_path=self._entry_path,
549 file_size=self._file_size,
555 file_size=self._file_size,
550 is_volatile=self._is_volatile,
556 is_volatile=self._is_volatile,
551 )
557 )
552 ]
558 ]
553 return self._files
559 return self._files
554
560
555
561
556 @attr.s(slots=True, init=False)
562 @attr.s(slots=True, init=False)
557 class RevlogStoreEntry(BaseStoreEntry):
563 class RevlogStoreEntry(BaseStoreEntry):
558 """A revlog entry in the store"""
564 """A revlog entry in the store"""
559
565
560 is_revlog = True
566 is_revlog = True
561
567
562 revlog_type = attr.ib(default=None)
568 revlog_type = attr.ib(default=None)
563 target_id = attr.ib(default=None)
569 target_id = attr.ib(default=None)
564 _path_prefix = attr.ib(default=None)
570 _path_prefix = attr.ib(default=None)
565 _details = attr.ib(default=None)
571 _details = attr.ib(default=None)
566 _files = attr.ib(default=None)
572 _files = attr.ib(default=None)
567
573
568 def __init__(
574 def __init__(
569 self,
575 self,
570 revlog_type,
576 revlog_type,
571 path_prefix,
577 path_prefix,
572 target_id,
578 target_id,
573 details,
579 details,
574 ):
580 ):
575 super().__init__()
581 super().__init__()
576 self.revlog_type = revlog_type
582 self.revlog_type = revlog_type
577 self.target_id = target_id
583 self.target_id = target_id
578 self._path_prefix = path_prefix
584 self._path_prefix = path_prefix
579 assert b'.i' in details, (path_prefix, details)
585 assert b'.i' in details, (path_prefix, details)
580 self._details = details
586 self._details = details
581 self._files = None
587 self._files = None
582
588
583 @property
589 @property
584 def is_changelog(self):
590 def is_changelog(self):
585 return self.revlog_type & FILEFLAGS_CHANGELOG
591 return self.revlog_type & FILEFLAGS_CHANGELOG
586
592
587 @property
593 @property
588 def is_manifestlog(self):
594 def is_manifestlog(self):
589 return self.revlog_type & FILEFLAGS_MANIFESTLOG
595 return self.revlog_type & FILEFLAGS_MANIFESTLOG
590
596
591 @property
597 @property
592 def is_filelog(self):
598 def is_filelog(self):
593 return self.revlog_type & FILEFLAGS_FILELOG
599 return self.revlog_type & FILEFLAGS_FILELOG
594
600
595 def main_file_path(self):
601 def main_file_path(self):
596 """unencoded path of the main revlog file"""
602 """unencoded path of the main revlog file"""
597 return self._path_prefix + b'.i'
603 return self._path_prefix + b'.i'
598
604
599 def files(self) -> List[StoreFile]:
605 def files(self) -> List[StoreFile]:
600 if self._files is None:
606 if self._files is None:
601 self._files = []
607 self._files = []
602 for ext in sorted(self._details, key=_ext_key):
608 for ext in sorted(self._details, key=_ext_key):
603 path = self._path_prefix + ext
609 path = self._path_prefix + ext
604 data = self._details[ext]
610 data = self._details[ext]
605 self._files.append(StoreFile(unencoded_path=path, **data))
611 self._files.append(StoreFile(unencoded_path=path, **data))
606 return self._files
612 return self._files
607
613
614 def get_streams(
615 self,
616 repo=None,
617 vfs=None,
618 copies=None,
619 max_changeset=None,
620 ):
621 if repo is None or max_changeset is None:
622 return super().get_streams(
623 repo=repo,
624 vfs=vfs,
625 copies=copies,
626 max_changeset=max_changeset,
627 )
628 if any(k.endswith(b'.idx') for k in self._details.keys()):
629 # This use revlog-v2, ignore for now
630 return super().get_streams(
631 repo=repo,
632 vfs=vfs,
633 copies=copies,
634 max_changeset=max_changeset,
635 )
636 name_to_ext = {}
637 for ext in self._details.keys():
638 name_to_ext[self._path_prefix + ext] = ext
639 name_to_size = {}
640 for f in self.files():
641 name_to_size[f.unencoded_path] = f.file_size(None)
642 stream = [
643 f.get_stream(vfs, copies)
644 for f in self.files()
645 if name_to_ext[f.unencoded_path] not in (b'.d', b'.i')
646 ]
647
648 rl = self.get_revlog_instance(repo).get_revlog()
649 rl_stream = rl.get_streams(max_changeset)
650 for name, s, size in rl_stream:
651 if name_to_size.get(name, 0) != size:
652 msg = _(b"expected %d bytes but %d provided for %s")
653 msg %= name_to_size.get(name, 0), size, name
654 raise error.Abort(msg)
655 stream.extend(rl_stream)
656 files = self.files()
657 assert len(stream) == len(files), (
658 stream,
659 files,
660 self._path_prefix,
661 self.target_id,
662 )
663 return stream
664
608 def get_revlog_instance(self, repo):
665 def get_revlog_instance(self, repo):
609 """Obtain a revlog instance from this store entry
666 """Obtain a revlog instance from this store entry
610
667
611 An instance of the appropriate class is returned.
668 An instance of the appropriate class is returned.
612 """
669 """
613 if self.is_changelog:
670 if self.is_changelog:
614 return changelog.changelog(repo.svfs)
671 return changelog.changelog(repo.svfs)
615 elif self.is_manifestlog:
672 elif self.is_manifestlog:
616 mandir = self.target_id
673 mandir = self.target_id
617 return manifest.manifestrevlog(
674 return manifest.manifestrevlog(
618 repo.nodeconstants, repo.svfs, tree=mandir
675 repo.nodeconstants, repo.svfs, tree=mandir
619 )
676 )
620 else:
677 else:
621 return filelog.filelog(repo.svfs, self.target_id)
678 return filelog.filelog(repo.svfs, self.target_id)
622
679
623
680
624 def _gather_revlog(files_data):
681 def _gather_revlog(files_data):
625 """group files per revlog prefix
682 """group files per revlog prefix
626
683
627 The returns a two level nested dict. The top level key is the revlog prefix
684 The returns a two level nested dict. The top level key is the revlog prefix
628 without extension, the second level is all the file "suffix" that were
685 without extension, the second level is all the file "suffix" that were
629 seen for this revlog and arbitrary file data as value.
686 seen for this revlog and arbitrary file data as value.
630 """
687 """
631 revlogs = collections.defaultdict(dict)
688 revlogs = collections.defaultdict(dict)
632 for u, value in files_data:
689 for u, value in files_data:
633 name, ext = _split_revlog_ext(u)
690 name, ext = _split_revlog_ext(u)
634 revlogs[name][ext] = value
691 revlogs[name][ext] = value
635 return sorted(revlogs.items())
692 return sorted(revlogs.items())
636
693
637
694
638 def _split_revlog_ext(filename):
695 def _split_revlog_ext(filename):
639 """split the revlog file prefix from the variable extension"""
696 """split the revlog file prefix from the variable extension"""
640 if filename.endswith(REVLOG_FILES_LONG_EXT):
697 if filename.endswith(REVLOG_FILES_LONG_EXT):
641 char = b'-'
698 char = b'-'
642 else:
699 else:
643 char = b'.'
700 char = b'.'
644 idx = filename.rfind(char)
701 idx = filename.rfind(char)
645 return filename[:idx], filename[idx:]
702 return filename[:idx], filename[idx:]
646
703
647
704
648 def _ext_key(ext):
705 def _ext_key(ext):
649 """a key to order revlog suffix
706 """a key to order revlog suffix
650
707
651 important to issue .i after other entry."""
708 important to issue .i after other entry."""
652 # the only important part of this order is to keep the `.i` last.
709 # the only important part of this order is to keep the `.i` last.
653 if ext.endswith(b'.n'):
710 if ext.endswith(b'.n'):
654 return (0, ext)
711 return (0, ext)
655 elif ext.endswith(b'.nd'):
712 elif ext.endswith(b'.nd'):
656 return (10, ext)
713 return (10, ext)
657 elif ext.endswith(b'.d'):
714 elif ext.endswith(b'.d'):
658 return (20, ext)
715 return (20, ext)
659 elif ext.endswith(b'.i'):
716 elif ext.endswith(b'.i'):
660 return (50, ext)
717 return (50, ext)
661 else:
718 else:
662 return (40, ext)
719 return (40, ext)
663
720
664
721
665 class basicstore:
722 class basicstore:
666 '''base class for local repository stores'''
723 '''base class for local repository stores'''
667
724
668 def __init__(self, path, vfstype):
725 def __init__(self, path, vfstype):
669 vfs = vfstype(path)
726 vfs = vfstype(path)
670 self.path = vfs.base
727 self.path = vfs.base
671 self.createmode = _calcmode(vfs)
728 self.createmode = _calcmode(vfs)
672 vfs.createmode = self.createmode
729 vfs.createmode = self.createmode
673 self.rawvfs = vfs
730 self.rawvfs = vfs
674 self.vfs = vfsmod.filtervfs(vfs, encodedir)
731 self.vfs = vfsmod.filtervfs(vfs, encodedir)
675 self.opener = self.vfs
732 self.opener = self.vfs
676
733
677 def join(self, f):
734 def join(self, f):
678 return self.path + b'/' + encodedir(f)
735 return self.path + b'/' + encodedir(f)
679
736
680 def _walk(self, relpath, recurse, undecodable=None):
737 def _walk(self, relpath, recurse, undecodable=None):
681 '''yields (revlog_type, unencoded, size)'''
738 '''yields (revlog_type, unencoded, size)'''
682 path = self.path
739 path = self.path
683 if relpath:
740 if relpath:
684 path += b'/' + relpath
741 path += b'/' + relpath
685 striplen = len(self.path) + 1
742 striplen = len(self.path) + 1
686 l = []
743 l = []
687 if self.rawvfs.isdir(path):
744 if self.rawvfs.isdir(path):
688 visit = [path]
745 visit = [path]
689 readdir = self.rawvfs.readdir
746 readdir = self.rawvfs.readdir
690 while visit:
747 while visit:
691 p = visit.pop()
748 p = visit.pop()
692 for f, kind, st in readdir(p, stat=True):
749 for f, kind, st in readdir(p, stat=True):
693 fp = p + b'/' + f
750 fp = p + b'/' + f
694 rl_type = is_revlog(f, kind, st)
751 rl_type = is_revlog(f, kind, st)
695 if rl_type is not None:
752 if rl_type is not None:
696 n = util.pconvert(fp[striplen:])
753 n = util.pconvert(fp[striplen:])
697 l.append((decodedir(n), (rl_type, st.st_size)))
754 l.append((decodedir(n), (rl_type, st.st_size)))
698 elif kind == stat.S_IFDIR and recurse:
755 elif kind == stat.S_IFDIR and recurse:
699 visit.append(fp)
756 visit.append(fp)
700
757
701 l.sort()
758 l.sort()
702 return l
759 return l
703
760
704 def changelog(self, trypending, concurrencychecker=None):
761 def changelog(self, trypending, concurrencychecker=None):
705 return changelog.changelog(
762 return changelog.changelog(
706 self.vfs,
763 self.vfs,
707 trypending=trypending,
764 trypending=trypending,
708 concurrencychecker=concurrencychecker,
765 concurrencychecker=concurrencychecker,
709 )
766 )
710
767
711 def manifestlog(self, repo, storenarrowmatch):
768 def manifestlog(self, repo, storenarrowmatch):
712 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
769 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
713 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
770 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
714
771
715 def data_entries(
772 def data_entries(
716 self, matcher=None, undecodable=None
773 self, matcher=None, undecodable=None
717 ) -> Generator[BaseStoreEntry, None, None]:
774 ) -> Generator[BaseStoreEntry, None, None]:
718 """Like walk, but excluding the changelog and root manifest.
775 """Like walk, but excluding the changelog and root manifest.
719
776
720 When [undecodable] is None, revlogs names that can't be
777 When [undecodable] is None, revlogs names that can't be
721 decoded cause an exception. When it is provided, it should
778 decoded cause an exception. When it is provided, it should
722 be a list and the filenames that can't be decoded are added
779 be a list and the filenames that can't be decoded are added
723 to it instead. This is very rarely needed."""
780 to it instead. This is very rarely needed."""
724 dirs = [
781 dirs = [
725 (b'data', FILEFLAGS_FILELOG, False),
782 (b'data', FILEFLAGS_FILELOG, False),
726 (b'meta', FILEFLAGS_MANIFESTLOG, True),
783 (b'meta', FILEFLAGS_MANIFESTLOG, True),
727 ]
784 ]
728 for base_dir, rl_type, strip_filename in dirs:
785 for base_dir, rl_type, strip_filename in dirs:
729 files = self._walk(base_dir, True, undecodable=undecodable)
786 files = self._walk(base_dir, True, undecodable=undecodable)
730 files = (f for f in files if f[1][0] is not None)
787 files = (f for f in files if f[1][0] is not None)
731 for revlog, details in _gather_revlog(files):
788 for revlog, details in _gather_revlog(files):
732 file_details = {}
789 file_details = {}
733 revlog_target_id = revlog.split(b'/', 1)[1]
790 revlog_target_id = revlog.split(b'/', 1)[1]
734 if strip_filename and b'/' in revlog:
791 if strip_filename and b'/' in revlog:
735 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
792 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
736 revlog_target_id += b'/'
793 revlog_target_id += b'/'
737 for ext, (t, s) in sorted(details.items()):
794 for ext, (t, s) in sorted(details.items()):
738 file_details[ext] = {
795 file_details[ext] = {
739 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
796 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
740 'file_size': s,
797 'file_size': s,
741 }
798 }
742 yield RevlogStoreEntry(
799 yield RevlogStoreEntry(
743 path_prefix=revlog,
800 path_prefix=revlog,
744 revlog_type=rl_type,
801 revlog_type=rl_type,
745 target_id=revlog_target_id,
802 target_id=revlog_target_id,
746 details=file_details,
803 details=file_details,
747 )
804 )
748
805
749 def top_entries(
806 def top_entries(
750 self, phase=False, obsolescence=False
807 self, phase=False, obsolescence=False
751 ) -> Generator[BaseStoreEntry, None, None]:
808 ) -> Generator[BaseStoreEntry, None, None]:
752 if phase and self.vfs.exists(b'phaseroots'):
809 if phase and self.vfs.exists(b'phaseroots'):
753 yield SimpleStoreEntry(
810 yield SimpleStoreEntry(
754 entry_path=b'phaseroots',
811 entry_path=b'phaseroots',
755 is_volatile=True,
812 is_volatile=True,
756 )
813 )
757
814
758 if obsolescence and self.vfs.exists(b'obsstore'):
815 if obsolescence and self.vfs.exists(b'obsstore'):
759 # XXX if we had the file size it could be non-volatile
816 # XXX if we had the file size it could be non-volatile
760 yield SimpleStoreEntry(
817 yield SimpleStoreEntry(
761 entry_path=b'obsstore',
818 entry_path=b'obsstore',
762 is_volatile=True,
819 is_volatile=True,
763 )
820 )
764
821
765 files = reversed(self._walk(b'', False))
822 files = reversed(self._walk(b'', False))
766
823
767 changelogs = collections.defaultdict(dict)
824 changelogs = collections.defaultdict(dict)
768 manifestlogs = collections.defaultdict(dict)
825 manifestlogs = collections.defaultdict(dict)
769
826
770 for u, (t, s) in files:
827 for u, (t, s) in files:
771 if u.startswith(b'00changelog'):
828 if u.startswith(b'00changelog'):
772 name, ext = _split_revlog_ext(u)
829 name, ext = _split_revlog_ext(u)
773 changelogs[name][ext] = (t, s)
830 changelogs[name][ext] = (t, s)
774 elif u.startswith(b'00manifest'):
831 elif u.startswith(b'00manifest'):
775 name, ext = _split_revlog_ext(u)
832 name, ext = _split_revlog_ext(u)
776 manifestlogs[name][ext] = (t, s)
833 manifestlogs[name][ext] = (t, s)
777 else:
834 else:
778 yield SimpleStoreEntry(
835 yield SimpleStoreEntry(
779 entry_path=u,
836 entry_path=u,
780 is_volatile=bool(t & FILEFLAGS_VOLATILE),
837 is_volatile=bool(t & FILEFLAGS_VOLATILE),
781 file_size=s,
838 file_size=s,
782 )
839 )
783 # yield manifest before changelog
840 # yield manifest before changelog
784 top_rl = [
841 top_rl = [
785 (manifestlogs, FILEFLAGS_MANIFESTLOG),
842 (manifestlogs, FILEFLAGS_MANIFESTLOG),
786 (changelogs, FILEFLAGS_CHANGELOG),
843 (changelogs, FILEFLAGS_CHANGELOG),
787 ]
844 ]
788 assert len(manifestlogs) <= 1
845 assert len(manifestlogs) <= 1
789 assert len(changelogs) <= 1
846 assert len(changelogs) <= 1
790 for data, revlog_type in top_rl:
847 for data, revlog_type in top_rl:
791 for revlog, details in sorted(data.items()):
848 for revlog, details in sorted(data.items()):
792 file_details = {}
849 file_details = {}
793 for ext, (t, s) in details.items():
850 for ext, (t, s) in details.items():
794 file_details[ext] = {
851 file_details[ext] = {
795 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
852 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
796 'file_size': s,
853 'file_size': s,
797 }
854 }
798 yield RevlogStoreEntry(
855 yield RevlogStoreEntry(
799 path_prefix=revlog,
856 path_prefix=revlog,
800 revlog_type=revlog_type,
857 revlog_type=revlog_type,
801 target_id=b'',
858 target_id=b'',
802 details=file_details,
859 details=file_details,
803 )
860 )
804
861
805 def walk(
862 def walk(
806 self, matcher=None, phase=False, obsolescence=False
863 self, matcher=None, phase=False, obsolescence=False
807 ) -> Generator[BaseStoreEntry, None, None]:
864 ) -> Generator[BaseStoreEntry, None, None]:
808 """return files related to data storage (ie: revlogs)
865 """return files related to data storage (ie: revlogs)
809
866
810 yields instance from BaseStoreEntry subclasses
867 yields instance from BaseStoreEntry subclasses
811
868
812 if a matcher is passed, storage files of only those tracked paths
869 if a matcher is passed, storage files of only those tracked paths
813 are passed with matches the matcher
870 are passed with matches the matcher
814 """
871 """
815 # yield data files first
872 # yield data files first
816 for x in self.data_entries(matcher):
873 for x in self.data_entries(matcher):
817 yield x
874 yield x
818 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
875 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
819 yield x
876 yield x
820
877
821 def copylist(self):
878 def copylist(self):
822 return _data
879 return _data
823
880
824 def write(self, tr):
881 def write(self, tr):
825 pass
882 pass
826
883
827 def invalidatecaches(self):
884 def invalidatecaches(self):
828 pass
885 pass
829
886
830 def markremoved(self, fn):
887 def markremoved(self, fn):
831 pass
888 pass
832
889
833 def __contains__(self, path):
890 def __contains__(self, path):
834 '''Checks if the store contains path'''
891 '''Checks if the store contains path'''
835 path = b"/".join((b"data", path))
892 path = b"/".join((b"data", path))
836 # file?
893 # file?
837 if self.vfs.exists(path + b".i"):
894 if self.vfs.exists(path + b".i"):
838 return True
895 return True
839 # dir?
896 # dir?
840 if not path.endswith(b"/"):
897 if not path.endswith(b"/"):
841 path = path + b"/"
898 path = path + b"/"
842 return self.vfs.exists(path)
899 return self.vfs.exists(path)
843
900
844
901
845 class encodedstore(basicstore):
902 class encodedstore(basicstore):
846 def __init__(self, path, vfstype):
903 def __init__(self, path, vfstype):
847 vfs = vfstype(path + b'/store')
904 vfs = vfstype(path + b'/store')
848 self.path = vfs.base
905 self.path = vfs.base
849 self.createmode = _calcmode(vfs)
906 self.createmode = _calcmode(vfs)
850 vfs.createmode = self.createmode
907 vfs.createmode = self.createmode
851 self.rawvfs = vfs
908 self.rawvfs = vfs
852 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
909 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
853 self.opener = self.vfs
910 self.opener = self.vfs
854
911
855 def _walk(self, relpath, recurse, undecodable=None):
912 def _walk(self, relpath, recurse, undecodable=None):
856 old = super()._walk(relpath, recurse)
913 old = super()._walk(relpath, recurse)
857 new = []
914 new = []
858 for f1, value in old:
915 for f1, value in old:
859 try:
916 try:
860 f2 = decodefilename(f1)
917 f2 = decodefilename(f1)
861 except KeyError:
918 except KeyError:
862 if undecodable is None:
919 if undecodable is None:
863 msg = _(b'undecodable revlog name %s') % f1
920 msg = _(b'undecodable revlog name %s') % f1
864 raise error.StorageError(msg)
921 raise error.StorageError(msg)
865 else:
922 else:
866 undecodable.append(f1)
923 undecodable.append(f1)
867 continue
924 continue
868 new.append((f2, value))
925 new.append((f2, value))
869 return new
926 return new
870
927
871 def data_entries(
928 def data_entries(
872 self, matcher=None, undecodable=None
929 self, matcher=None, undecodable=None
873 ) -> Generator[BaseStoreEntry, None, None]:
930 ) -> Generator[BaseStoreEntry, None, None]:
874 entries = super(encodedstore, self).data_entries(
931 entries = super(encodedstore, self).data_entries(
875 undecodable=undecodable
932 undecodable=undecodable
876 )
933 )
877 for entry in entries:
934 for entry in entries:
878 if _match_tracked_entry(entry, matcher):
935 if _match_tracked_entry(entry, matcher):
879 yield entry
936 yield entry
880
937
881 def join(self, f):
938 def join(self, f):
882 return self.path + b'/' + encodefilename(f)
939 return self.path + b'/' + encodefilename(f)
883
940
884 def copylist(self):
941 def copylist(self):
885 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
942 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
886
943
887
944
888 class fncache:
945 class fncache:
889 # the filename used to be partially encoded
946 # the filename used to be partially encoded
890 # hence the encodedir/decodedir dance
947 # hence the encodedir/decodedir dance
891 def __init__(self, vfs):
948 def __init__(self, vfs):
892 self.vfs = vfs
949 self.vfs = vfs
893 self._ignores = set()
950 self._ignores = set()
894 self.entries = None
951 self.entries = None
895 self._dirty = False
952 self._dirty = False
896 # set of new additions to fncache
953 # set of new additions to fncache
897 self.addls = set()
954 self.addls = set()
898
955
899 def ensureloaded(self, warn=None):
956 def ensureloaded(self, warn=None):
900 """read the fncache file if not already read.
957 """read the fncache file if not already read.
901
958
902 If the file on disk is corrupted, raise. If warn is provided,
959 If the file on disk is corrupted, raise. If warn is provided,
903 warn and keep going instead."""
960 warn and keep going instead."""
904 if self.entries is None:
961 if self.entries is None:
905 self._load(warn)
962 self._load(warn)
906
963
907 def _load(self, warn=None):
964 def _load(self, warn=None):
908 '''fill the entries from the fncache file'''
965 '''fill the entries from the fncache file'''
909 self._dirty = False
966 self._dirty = False
910 try:
967 try:
911 fp = self.vfs(b'fncache', mode=b'rb')
968 fp = self.vfs(b'fncache', mode=b'rb')
912 except IOError:
969 except IOError:
913 # skip nonexistent file
970 # skip nonexistent file
914 self.entries = set()
971 self.entries = set()
915 return
972 return
916
973
917 self.entries = set()
974 self.entries = set()
918 chunk = b''
975 chunk = b''
919 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
976 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
920 chunk += c
977 chunk += c
921 try:
978 try:
922 p = chunk.rindex(b'\n')
979 p = chunk.rindex(b'\n')
923 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
980 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
924 chunk = chunk[p + 1 :]
981 chunk = chunk[p + 1 :]
925 except ValueError:
982 except ValueError:
926 # substring '\n' not found, maybe the entry is bigger than the
983 # substring '\n' not found, maybe the entry is bigger than the
927 # chunksize, so let's keep iterating
984 # chunksize, so let's keep iterating
928 pass
985 pass
929
986
930 if chunk:
987 if chunk:
931 msg = _(b"fncache does not ends with a newline")
988 msg = _(b"fncache does not ends with a newline")
932 if warn:
989 if warn:
933 warn(msg + b'\n')
990 warn(msg + b'\n')
934 else:
991 else:
935 raise error.Abort(
992 raise error.Abort(
936 msg,
993 msg,
937 hint=_(
994 hint=_(
938 b"use 'hg debugrebuildfncache' to "
995 b"use 'hg debugrebuildfncache' to "
939 b"rebuild the fncache"
996 b"rebuild the fncache"
940 ),
997 ),
941 )
998 )
942 self._checkentries(fp, warn)
999 self._checkentries(fp, warn)
943 fp.close()
1000 fp.close()
944
1001
945 def _checkentries(self, fp, warn):
1002 def _checkentries(self, fp, warn):
946 """make sure there is no empty string in entries"""
1003 """make sure there is no empty string in entries"""
947 if b'' in self.entries:
1004 if b'' in self.entries:
948 fp.seek(0)
1005 fp.seek(0)
949 for n, line in enumerate(fp):
1006 for n, line in enumerate(fp):
950 if not line.rstrip(b'\n'):
1007 if not line.rstrip(b'\n'):
951 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1008 t = _(b'invalid entry in fncache, line %d') % (n + 1)
952 if warn:
1009 if warn:
953 warn(t + b'\n')
1010 warn(t + b'\n')
954 else:
1011 else:
955 raise error.Abort(t)
1012 raise error.Abort(t)
956
1013
957 def write(self, tr):
1014 def write(self, tr):
958 if self._dirty:
1015 if self._dirty:
959 assert self.entries is not None
1016 assert self.entries is not None
960 self.entries = self.entries | self.addls
1017 self.entries = self.entries | self.addls
961 self.addls = set()
1018 self.addls = set()
962 tr.addbackup(b'fncache')
1019 tr.addbackup(b'fncache')
963 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1020 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
964 if self.entries:
1021 if self.entries:
965 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1022 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
966 fp.close()
1023 fp.close()
967 self._dirty = False
1024 self._dirty = False
968 if self.addls:
1025 if self.addls:
969 # if we have just new entries, let's append them to the fncache
1026 # if we have just new entries, let's append them to the fncache
970 tr.addbackup(b'fncache')
1027 tr.addbackup(b'fncache')
971 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1028 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
972 if self.addls:
1029 if self.addls:
973 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1030 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
974 fp.close()
1031 fp.close()
975 self.entries = None
1032 self.entries = None
976 self.addls = set()
1033 self.addls = set()
977
1034
978 def addignore(self, fn):
1035 def addignore(self, fn):
979 self._ignores.add(fn)
1036 self._ignores.add(fn)
980
1037
981 def add(self, fn):
1038 def add(self, fn):
982 if fn in self._ignores:
1039 if fn in self._ignores:
983 return
1040 return
984 if self.entries is None:
1041 if self.entries is None:
985 self._load()
1042 self._load()
986 if fn not in self.entries:
1043 if fn not in self.entries:
987 self.addls.add(fn)
1044 self.addls.add(fn)
988
1045
989 def remove(self, fn):
1046 def remove(self, fn):
990 if self.entries is None:
1047 if self.entries is None:
991 self._load()
1048 self._load()
992 if fn in self.addls:
1049 if fn in self.addls:
993 self.addls.remove(fn)
1050 self.addls.remove(fn)
994 return
1051 return
995 try:
1052 try:
996 self.entries.remove(fn)
1053 self.entries.remove(fn)
997 self._dirty = True
1054 self._dirty = True
998 except KeyError:
1055 except KeyError:
999 pass
1056 pass
1000
1057
1001 def __contains__(self, fn):
1058 def __contains__(self, fn):
1002 if fn in self.addls:
1059 if fn in self.addls:
1003 return True
1060 return True
1004 if self.entries is None:
1061 if self.entries is None:
1005 self._load()
1062 self._load()
1006 return fn in self.entries
1063 return fn in self.entries
1007
1064
1008 def __iter__(self):
1065 def __iter__(self):
1009 if self.entries is None:
1066 if self.entries is None:
1010 self._load()
1067 self._load()
1011 return iter(self.entries | self.addls)
1068 return iter(self.entries | self.addls)
1012
1069
1013
1070
1014 class _fncachevfs(vfsmod.proxyvfs):
1071 class _fncachevfs(vfsmod.proxyvfs):
1015 def __init__(self, vfs, fnc, encode):
1072 def __init__(self, vfs, fnc, encode):
1016 vfsmod.proxyvfs.__init__(self, vfs)
1073 vfsmod.proxyvfs.__init__(self, vfs)
1017 self.fncache = fnc
1074 self.fncache = fnc
1018 self.encode = encode
1075 self.encode = encode
1019
1076
1020 def __call__(self, path, mode=b'r', *args, **kw):
1077 def __call__(self, path, mode=b'r', *args, **kw):
1021 encoded = self.encode(path)
1078 encoded = self.encode(path)
1022 if (
1079 if (
1023 mode not in (b'r', b'rb')
1080 mode not in (b'r', b'rb')
1024 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1081 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1025 and revlog_type(path) is not None
1082 and revlog_type(path) is not None
1026 ):
1083 ):
1027 # do not trigger a fncache load when adding a file that already is
1084 # do not trigger a fncache load when adding a file that already is
1028 # known to exist.
1085 # known to exist.
1029 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1086 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1030 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1087 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1031 # when appending to an existing file, if the file has size zero,
1088 # when appending to an existing file, if the file has size zero,
1032 # it should be considered as missing. Such zero-size files are
1089 # it should be considered as missing. Such zero-size files are
1033 # the result of truncation when a transaction is aborted.
1090 # the result of truncation when a transaction is aborted.
1034 notload = False
1091 notload = False
1035 if not notload:
1092 if not notload:
1036 self.fncache.add(path)
1093 self.fncache.add(path)
1037 return self.vfs(encoded, mode, *args, **kw)
1094 return self.vfs(encoded, mode, *args, **kw)
1038
1095
1039 def join(self, path):
1096 def join(self, path):
1040 if path:
1097 if path:
1041 return self.vfs.join(self.encode(path))
1098 return self.vfs.join(self.encode(path))
1042 else:
1099 else:
1043 return self.vfs.join(path)
1100 return self.vfs.join(path)
1044
1101
1045 def register_file(self, path):
1102 def register_file(self, path):
1046 """generic hook point to lets fncache steer its stew"""
1103 """generic hook point to lets fncache steer its stew"""
1047 if path.startswith(b'data/') or path.startswith(b'meta/'):
1104 if path.startswith(b'data/') or path.startswith(b'meta/'):
1048 self.fncache.add(path)
1105 self.fncache.add(path)
1049
1106
1050
1107
1051 class fncachestore(basicstore):
1108 class fncachestore(basicstore):
1052 def __init__(self, path, vfstype, dotencode):
1109 def __init__(self, path, vfstype, dotencode):
1053 if dotencode:
1110 if dotencode:
1054 encode = _pathencode
1111 encode = _pathencode
1055 else:
1112 else:
1056 encode = _plainhybridencode
1113 encode = _plainhybridencode
1057 self.encode = encode
1114 self.encode = encode
1058 vfs = vfstype(path + b'/store')
1115 vfs = vfstype(path + b'/store')
1059 self.path = vfs.base
1116 self.path = vfs.base
1060 self.pathsep = self.path + b'/'
1117 self.pathsep = self.path + b'/'
1061 self.createmode = _calcmode(vfs)
1118 self.createmode = _calcmode(vfs)
1062 vfs.createmode = self.createmode
1119 vfs.createmode = self.createmode
1063 self.rawvfs = vfs
1120 self.rawvfs = vfs
1064 fnc = fncache(vfs)
1121 fnc = fncache(vfs)
1065 self.fncache = fnc
1122 self.fncache = fnc
1066 self.vfs = _fncachevfs(vfs, fnc, encode)
1123 self.vfs = _fncachevfs(vfs, fnc, encode)
1067 self.opener = self.vfs
1124 self.opener = self.vfs
1068
1125
1069 def join(self, f):
1126 def join(self, f):
1070 return self.pathsep + self.encode(f)
1127 return self.pathsep + self.encode(f)
1071
1128
1072 def getsize(self, path):
1129 def getsize(self, path):
1073 return self.rawvfs.stat(path).st_size
1130 return self.rawvfs.stat(path).st_size
1074
1131
1075 def data_entries(
1132 def data_entries(
1076 self, matcher=None, undecodable=None
1133 self, matcher=None, undecodable=None
1077 ) -> Generator[BaseStoreEntry, None, None]:
1134 ) -> Generator[BaseStoreEntry, None, None]:
1078 files = ((f, revlog_type(f)) for f in self.fncache)
1135 files = ((f, revlog_type(f)) for f in self.fncache)
1079 # Note: all files in fncache should be revlog related, However the
1136 # Note: all files in fncache should be revlog related, However the
1080 # fncache might contains such file added by previous version of
1137 # fncache might contains such file added by previous version of
1081 # Mercurial.
1138 # Mercurial.
1082 files = (f for f in files if f[1] is not None)
1139 files = (f for f in files if f[1] is not None)
1083 by_revlog = _gather_revlog(files)
1140 by_revlog = _gather_revlog(files)
1084 for revlog, details in by_revlog:
1141 for revlog, details in by_revlog:
1085 file_details = {}
1142 file_details = {}
1086 if revlog.startswith(b'data/'):
1143 if revlog.startswith(b'data/'):
1087 rl_type = FILEFLAGS_FILELOG
1144 rl_type = FILEFLAGS_FILELOG
1088 revlog_target_id = revlog.split(b'/', 1)[1]
1145 revlog_target_id = revlog.split(b'/', 1)[1]
1089 elif revlog.startswith(b'meta/'):
1146 elif revlog.startswith(b'meta/'):
1090 rl_type = FILEFLAGS_MANIFESTLOG
1147 rl_type = FILEFLAGS_MANIFESTLOG
1091 # drop the initial directory and the `00manifest` file part
1148 # drop the initial directory and the `00manifest` file part
1092 tmp = revlog.split(b'/', 1)[1]
1149 tmp = revlog.split(b'/', 1)[1]
1093 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1150 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1094 else:
1151 else:
1095 # unreachable
1152 # unreachable
1096 assert False, revlog
1153 assert False, revlog
1097 for ext, t in details.items():
1154 for ext, t in details.items():
1098 file_details[ext] = {
1155 file_details[ext] = {
1099 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1156 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1100 }
1157 }
1101 entry = RevlogStoreEntry(
1158 entry = RevlogStoreEntry(
1102 path_prefix=revlog,
1159 path_prefix=revlog,
1103 revlog_type=rl_type,
1160 revlog_type=rl_type,
1104 target_id=revlog_target_id,
1161 target_id=revlog_target_id,
1105 details=file_details,
1162 details=file_details,
1106 )
1163 )
1107 if _match_tracked_entry(entry, matcher):
1164 if _match_tracked_entry(entry, matcher):
1108 yield entry
1165 yield entry
1109
1166
1110 def copylist(self):
1167 def copylist(self):
1111 d = (
1168 d = (
1112 b'bookmarks',
1169 b'bookmarks',
1113 b'narrowspec',
1170 b'narrowspec',
1114 b'data',
1171 b'data',
1115 b'meta',
1172 b'meta',
1116 b'dh',
1173 b'dh',
1117 b'fncache',
1174 b'fncache',
1118 b'phaseroots',
1175 b'phaseroots',
1119 b'obsstore',
1176 b'obsstore',
1120 b'00manifest.d',
1177 b'00manifest.d',
1121 b'00manifest.i',
1178 b'00manifest.i',
1122 b'00changelog.d',
1179 b'00changelog.d',
1123 b'00changelog.i',
1180 b'00changelog.i',
1124 b'requires',
1181 b'requires',
1125 )
1182 )
1126 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1183 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1127
1184
1128 def write(self, tr):
1185 def write(self, tr):
1129 self.fncache.write(tr)
1186 self.fncache.write(tr)
1130
1187
1131 def invalidatecaches(self):
1188 def invalidatecaches(self):
1132 self.fncache.entries = None
1189 self.fncache.entries = None
1133 self.fncache.addls = set()
1190 self.fncache.addls = set()
1134
1191
1135 def markremoved(self, fn):
1192 def markremoved(self, fn):
1136 self.fncache.remove(fn)
1193 self.fncache.remove(fn)
1137
1194
1138 def _exists(self, f):
1195 def _exists(self, f):
1139 ef = self.encode(f)
1196 ef = self.encode(f)
1140 try:
1197 try:
1141 self.getsize(ef)
1198 self.getsize(ef)
1142 return True
1199 return True
1143 except FileNotFoundError:
1200 except FileNotFoundError:
1144 return False
1201 return False
1145
1202
1146 def __contains__(self, path):
1203 def __contains__(self, path):
1147 '''Checks if the store contains path'''
1204 '''Checks if the store contains path'''
1148 path = b"/".join((b"data", path))
1205 path = b"/".join((b"data", path))
1149 # check for files (exact match)
1206 # check for files (exact match)
1150 e = path + b'.i'
1207 e = path + b'.i'
1151 if e in self.fncache and self._exists(e):
1208 if e in self.fncache and self._exists(e):
1152 return True
1209 return True
1153 # now check for directories (prefix match)
1210 # now check for directories (prefix match)
1154 if not path.endswith(b'/'):
1211 if not path.endswith(b'/'):
1155 path += b'/'
1212 path += b'/'
1156 for e in self.fncache:
1213 for e in self.fncache:
1157 if e.startswith(path) and self._exists(e):
1214 if e.startswith(path) and self._exists(e):
1158 return True
1215 return True
1159 return False
1216 return False
@@ -1,966 +1,970 b''
1 # streamclone.py - producing and consuming streaming repository data
1 # streamclone.py - producing and consuming streaming repository data
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import contextlib
9 import contextlib
10 import os
10 import os
11 import struct
11 import struct
12
12
13 from .i18n import _
13 from .i18n import _
14 from .interfaces import repository
14 from .interfaces import repository
15 from . import (
15 from . import (
16 bookmarks,
16 bookmarks,
17 bundle2 as bundle2mod,
17 bundle2 as bundle2mod,
18 cacheutil,
18 cacheutil,
19 error,
19 error,
20 narrowspec,
20 narrowspec,
21 phases,
21 phases,
22 pycompat,
22 pycompat,
23 requirements as requirementsmod,
23 requirements as requirementsmod,
24 scmutil,
24 scmutil,
25 store,
25 store,
26 transaction,
26 transaction,
27 util,
27 util,
28 )
28 )
29 from .revlogutils import (
29 from .revlogutils import (
30 nodemap,
30 nodemap,
31 )
31 )
32
32
33
33
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
35 """determine the final set of requirement for a new stream clone
35 """determine the final set of requirement for a new stream clone
36
36
37 this method combine the "default" requirements that a new repository would
37 this method combine the "default" requirements that a new repository would
38 use with the constaint we get from the stream clone content. We keep local
38 use with the constaint we get from the stream clone content. We keep local
39 configuration choice when possible.
39 configuration choice when possible.
40 """
40 """
41 requirements = set(default_requirements)
41 requirements = set(default_requirements)
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
43 requirements.update(streamed_requirements)
43 requirements.update(streamed_requirements)
44 return requirements
44 return requirements
45
45
46
46
47 def streamed_requirements(repo):
47 def streamed_requirements(repo):
48 """the set of requirement the new clone will have to support
48 """the set of requirement the new clone will have to support
49
49
50 This is used for advertising the stream options and to generate the actual
50 This is used for advertising the stream options and to generate the actual
51 stream content."""
51 stream content."""
52 requiredformats = (
52 requiredformats = (
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
54 )
54 )
55 return requiredformats
55 return requiredformats
56
56
57
57
58 def canperformstreamclone(pullop, bundle2=False):
58 def canperformstreamclone(pullop, bundle2=False):
59 """Whether it is possible to perform a streaming clone as part of pull.
59 """Whether it is possible to perform a streaming clone as part of pull.
60
60
61 ``bundle2`` will cause the function to consider stream clone through
61 ``bundle2`` will cause the function to consider stream clone through
62 bundle2 and only through bundle2.
62 bundle2 and only through bundle2.
63
63
64 Returns a tuple of (supported, requirements). ``supported`` is True if
64 Returns a tuple of (supported, requirements). ``supported`` is True if
65 streaming clone is supported and False otherwise. ``requirements`` is
65 streaming clone is supported and False otherwise. ``requirements`` is
66 a set of repo requirements from the remote, or ``None`` if stream clone
66 a set of repo requirements from the remote, or ``None`` if stream clone
67 isn't supported.
67 isn't supported.
68 """
68 """
69 repo = pullop.repo
69 repo = pullop.repo
70 remote = pullop.remote
70 remote = pullop.remote
71
71
72 # should we consider streaming clone at all ?
72 # should we consider streaming clone at all ?
73 streamrequested = pullop.streamclonerequested
73 streamrequested = pullop.streamclonerequested
74 # If we don't have a preference, let the server decide for us. This
74 # If we don't have a preference, let the server decide for us. This
75 # likely only comes into play in LANs.
75 # likely only comes into play in LANs.
76 if streamrequested is None:
76 if streamrequested is None:
77 # The server can advertise whether to prefer streaming clone.
77 # The server can advertise whether to prefer streaming clone.
78 streamrequested = remote.capable(b'stream-preferred')
78 streamrequested = remote.capable(b'stream-preferred')
79 if not streamrequested:
79 if not streamrequested:
80 return False, None
80 return False, None
81
81
82 # Streaming clone only works on an empty destination repository
82 # Streaming clone only works on an empty destination repository
83 if len(repo):
83 if len(repo):
84 return False, None
84 return False, None
85
85
86 # Streaming clone only works if all data is being requested.
86 # Streaming clone only works if all data is being requested.
87 if pullop.heads:
87 if pullop.heads:
88 return False, None
88 return False, None
89
89
90 bundle2supported = False
90 bundle2supported = False
91 if pullop.canusebundle2:
91 if pullop.canusebundle2:
92 local_caps = bundle2mod.getrepocaps(repo, role=b'client')
92 local_caps = bundle2mod.getrepocaps(repo, role=b'client')
93 local_supported = set(local_caps.get(b'stream', []))
93 local_supported = set(local_caps.get(b'stream', []))
94 remote_supported = set(pullop.remotebundle2caps.get(b'stream', []))
94 remote_supported = set(pullop.remotebundle2caps.get(b'stream', []))
95 bundle2supported = bool(local_supported & remote_supported)
95 bundle2supported = bool(local_supported & remote_supported)
96 # else
96 # else
97 # Server doesn't support bundle2 stream clone or doesn't support
97 # Server doesn't support bundle2 stream clone or doesn't support
98 # the versions we support. Fall back and possibly allow legacy.
98 # the versions we support. Fall back and possibly allow legacy.
99
99
100 # Ensures legacy code path uses available bundle2.
100 # Ensures legacy code path uses available bundle2.
101 if bundle2supported and not bundle2:
101 if bundle2supported and not bundle2:
102 return False, None
102 return False, None
103 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
103 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
104 elif bundle2 and not bundle2supported:
104 elif bundle2 and not bundle2supported:
105 return False, None
105 return False, None
106
106
107 # In order for stream clone to work, the client has to support all the
107 # In order for stream clone to work, the client has to support all the
108 # requirements advertised by the server.
108 # requirements advertised by the server.
109 #
109 #
110 # The server advertises its requirements via the "stream" and "streamreqs"
110 # The server advertises its requirements via the "stream" and "streamreqs"
111 # capability. "stream" (a value-less capability) is advertised if and only
111 # capability. "stream" (a value-less capability) is advertised if and only
112 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
112 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
113 # is advertised and contains a comma-delimited list of requirements.
113 # is advertised and contains a comma-delimited list of requirements.
114 requirements = set()
114 requirements = set()
115 if remote.capable(b'stream'):
115 if remote.capable(b'stream'):
116 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
116 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
117 else:
117 else:
118 streamreqs = remote.capable(b'streamreqs')
118 streamreqs = remote.capable(b'streamreqs')
119 # This is weird and shouldn't happen with modern servers.
119 # This is weird and shouldn't happen with modern servers.
120 if not streamreqs:
120 if not streamreqs:
121 pullop.repo.ui.warn(
121 pullop.repo.ui.warn(
122 _(
122 _(
123 b'warning: stream clone requested but server has them '
123 b'warning: stream clone requested but server has them '
124 b'disabled\n'
124 b'disabled\n'
125 )
125 )
126 )
126 )
127 return False, None
127 return False, None
128
128
129 streamreqs = set(streamreqs.split(b','))
129 streamreqs = set(streamreqs.split(b','))
130 # Server requires something we don't support. Bail.
130 # Server requires something we don't support. Bail.
131 missingreqs = streamreqs - repo.supported
131 missingreqs = streamreqs - repo.supported
132 if missingreqs:
132 if missingreqs:
133 pullop.repo.ui.warn(
133 pullop.repo.ui.warn(
134 _(
134 _(
135 b'warning: stream clone requested but client is missing '
135 b'warning: stream clone requested but client is missing '
136 b'requirements: %s\n'
136 b'requirements: %s\n'
137 )
137 )
138 % b', '.join(sorted(missingreqs))
138 % b', '.join(sorted(missingreqs))
139 )
139 )
140 pullop.repo.ui.warn(
140 pullop.repo.ui.warn(
141 _(
141 _(
142 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
142 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
143 b'for more information)\n'
143 b'for more information)\n'
144 )
144 )
145 )
145 )
146 return False, None
146 return False, None
147 requirements = streamreqs
147 requirements = streamreqs
148
148
149 return True, requirements
149 return True, requirements
150
150
151
151
152 def maybeperformlegacystreamclone(pullop):
152 def maybeperformlegacystreamclone(pullop):
153 """Possibly perform a legacy stream clone operation.
153 """Possibly perform a legacy stream clone operation.
154
154
155 Legacy stream clones are performed as part of pull but before all other
155 Legacy stream clones are performed as part of pull but before all other
156 operations.
156 operations.
157
157
158 A legacy stream clone will not be performed if a bundle2 stream clone is
158 A legacy stream clone will not be performed if a bundle2 stream clone is
159 supported.
159 supported.
160 """
160 """
161 from . import localrepo
161 from . import localrepo
162
162
163 supported, requirements = canperformstreamclone(pullop)
163 supported, requirements = canperformstreamclone(pullop)
164
164
165 if not supported:
165 if not supported:
166 return
166 return
167
167
168 repo = pullop.repo
168 repo = pullop.repo
169 remote = pullop.remote
169 remote = pullop.remote
170
170
171 # Save remote branchmap. We will use it later to speed up branchcache
171 # Save remote branchmap. We will use it later to speed up branchcache
172 # creation.
172 # creation.
173 rbranchmap = None
173 rbranchmap = None
174 if remote.capable(b'branchmap'):
174 if remote.capable(b'branchmap'):
175 with remote.commandexecutor() as e:
175 with remote.commandexecutor() as e:
176 rbranchmap = e.callcommand(b'branchmap', {}).result()
176 rbranchmap = e.callcommand(b'branchmap', {}).result()
177
177
178 repo.ui.status(_(b'streaming all changes\n'))
178 repo.ui.status(_(b'streaming all changes\n'))
179
179
180 with remote.commandexecutor() as e:
180 with remote.commandexecutor() as e:
181 fp = e.callcommand(b'stream_out', {}).result()
181 fp = e.callcommand(b'stream_out', {}).result()
182
182
183 # TODO strictly speaking, this code should all be inside the context
183 # TODO strictly speaking, this code should all be inside the context
184 # manager because the context manager is supposed to ensure all wire state
184 # manager because the context manager is supposed to ensure all wire state
185 # is flushed when exiting. But the legacy peers don't do this, so it
185 # is flushed when exiting. But the legacy peers don't do this, so it
186 # doesn't matter.
186 # doesn't matter.
187 l = fp.readline()
187 l = fp.readline()
188 try:
188 try:
189 resp = int(l)
189 resp = int(l)
190 except ValueError:
190 except ValueError:
191 raise error.ResponseError(
191 raise error.ResponseError(
192 _(b'unexpected response from remote server:'), l
192 _(b'unexpected response from remote server:'), l
193 )
193 )
194 if resp == 1:
194 if resp == 1:
195 raise error.Abort(_(b'operation forbidden by server'))
195 raise error.Abort(_(b'operation forbidden by server'))
196 elif resp == 2:
196 elif resp == 2:
197 raise error.Abort(_(b'locking the remote repository failed'))
197 raise error.Abort(_(b'locking the remote repository failed'))
198 elif resp != 0:
198 elif resp != 0:
199 raise error.Abort(_(b'the server sent an unknown error code'))
199 raise error.Abort(_(b'the server sent an unknown error code'))
200
200
201 l = fp.readline()
201 l = fp.readline()
202 try:
202 try:
203 filecount, bytecount = map(int, l.split(b' ', 1))
203 filecount, bytecount = map(int, l.split(b' ', 1))
204 except (ValueError, TypeError):
204 except (ValueError, TypeError):
205 raise error.ResponseError(
205 raise error.ResponseError(
206 _(b'unexpected response from remote server:'), l
206 _(b'unexpected response from remote server:'), l
207 )
207 )
208
208
209 with repo.lock():
209 with repo.lock():
210 consumev1(repo, fp, filecount, bytecount)
210 consumev1(repo, fp, filecount, bytecount)
211 repo.requirements = new_stream_clone_requirements(
211 repo.requirements = new_stream_clone_requirements(
212 repo.requirements,
212 repo.requirements,
213 requirements,
213 requirements,
214 )
214 )
215 repo.svfs.options = localrepo.resolvestorevfsoptions(
215 repo.svfs.options = localrepo.resolvestorevfsoptions(
216 repo.ui, repo.requirements, repo.features
216 repo.ui, repo.requirements, repo.features
217 )
217 )
218 scmutil.writereporequirements(repo)
218 scmutil.writereporequirements(repo)
219 nodemap.post_stream_cleanup(repo)
219 nodemap.post_stream_cleanup(repo)
220
220
221 if rbranchmap:
221 if rbranchmap:
222 repo._branchcaches.replace(repo, rbranchmap)
222 repo._branchcaches.replace(repo, rbranchmap)
223
223
224 repo.invalidate()
224 repo.invalidate()
225
225
226
226
227 def allowservergeneration(repo):
227 def allowservergeneration(repo):
228 """Whether streaming clones are allowed from the server."""
228 """Whether streaming clones are allowed from the server."""
229 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
229 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
230 return False
230 return False
231
231
232 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
232 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
233 return False
233 return False
234
234
235 # The way stream clone works makes it impossible to hide secret changesets.
235 # The way stream clone works makes it impossible to hide secret changesets.
236 # So don't allow this by default.
236 # So don't allow this by default.
237 secret = phases.hassecret(repo)
237 secret = phases.hassecret(repo)
238 if secret:
238 if secret:
239 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
239 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
240
240
241 return True
241 return True
242
242
243
243
244 # This is it's own function so extensions can override it.
244 # This is it's own function so extensions can override it.
245 def _walkstreamfiles(repo, matcher=None, phase=False, obsolescence=False):
245 def _walkstreamfiles(repo, matcher=None, phase=False, obsolescence=False):
246 return repo.store.walk(matcher, phase=phase, obsolescence=obsolescence)
246 return repo.store.walk(matcher, phase=phase, obsolescence=obsolescence)
247
247
248
248
249 def generatev1(repo):
249 def generatev1(repo):
250 """Emit content for version 1 of a streaming clone.
250 """Emit content for version 1 of a streaming clone.
251
251
252 This returns a 3-tuple of (file count, byte size, data iterator).
252 This returns a 3-tuple of (file count, byte size, data iterator).
253
253
254 The data iterator consists of N entries for each file being transferred.
254 The data iterator consists of N entries for each file being transferred.
255 Each file entry starts as a line with the file name and integer size
255 Each file entry starts as a line with the file name and integer size
256 delimited by a null byte.
256 delimited by a null byte.
257
257
258 The raw file data follows. Following the raw file data is the next file
258 The raw file data follows. Following the raw file data is the next file
259 entry, or EOF.
259 entry, or EOF.
260
260
261 When used on the wire protocol, an additional line indicating protocol
261 When used on the wire protocol, an additional line indicating protocol
262 success will be prepended to the stream. This function is not responsible
262 success will be prepended to the stream. This function is not responsible
263 for adding it.
263 for adding it.
264
264
265 This function will obtain a repository lock to ensure a consistent view of
265 This function will obtain a repository lock to ensure a consistent view of
266 the store is captured. It therefore may raise LockError.
266 the store is captured. It therefore may raise LockError.
267 """
267 """
268 entries = []
268 entries = []
269 total_bytes = 0
269 total_bytes = 0
270 # Get consistent snapshot of repo, lock during scan.
270 # Get consistent snapshot of repo, lock during scan.
271 with repo.lock():
271 with repo.lock():
272 repo.ui.debug(b'scanning\n')
272 repo.ui.debug(b'scanning\n')
273 for entry in _walkstreamfiles(repo):
273 for entry in _walkstreamfiles(repo):
274 for f in entry.files():
274 for f in entry.files():
275 file_size = f.file_size(repo.store.vfs)
275 file_size = f.file_size(repo.store.vfs)
276 if file_size:
276 if file_size:
277 entries.append((f.unencoded_path, file_size))
277 entries.append((f.unencoded_path, file_size))
278 total_bytes += file_size
278 total_bytes += file_size
279 _test_sync_point_walk_1(repo)
279 _test_sync_point_walk_1(repo)
280 _test_sync_point_walk_2(repo)
280 _test_sync_point_walk_2(repo)
281
281
282 repo.ui.debug(
282 repo.ui.debug(
283 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
283 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
284 )
284 )
285
285
286 svfs = repo.svfs
286 svfs = repo.svfs
287 debugflag = repo.ui.debugflag
287 debugflag = repo.ui.debugflag
288
288
289 def emitrevlogdata():
289 def emitrevlogdata():
290 for name, size in entries:
290 for name, size in entries:
291 if debugflag:
291 if debugflag:
292 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
292 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
293 # partially encode name over the wire for backwards compat
293 # partially encode name over the wire for backwards compat
294 yield b'%s\0%d\n' % (store.encodedir(name), size)
294 yield b'%s\0%d\n' % (store.encodedir(name), size)
295 # auditing at this stage is both pointless (paths are already
295 # auditing at this stage is both pointless (paths are already
296 # trusted by the local repo) and expensive
296 # trusted by the local repo) and expensive
297 with svfs(name, b'rb', auditpath=False) as fp:
297 with svfs(name, b'rb', auditpath=False) as fp:
298 if size <= 65536:
298 if size <= 65536:
299 yield fp.read(size)
299 yield fp.read(size)
300 else:
300 else:
301 for chunk in util.filechunkiter(fp, limit=size):
301 for chunk in util.filechunkiter(fp, limit=size):
302 yield chunk
302 yield chunk
303
303
304 return len(entries), total_bytes, emitrevlogdata()
304 return len(entries), total_bytes, emitrevlogdata()
305
305
306
306
307 def generatev1wireproto(repo):
307 def generatev1wireproto(repo):
308 """Emit content for version 1 of streaming clone suitable for the wire.
308 """Emit content for version 1 of streaming clone suitable for the wire.
309
309
310 This is the data output from ``generatev1()`` with 2 header lines. The
310 This is the data output from ``generatev1()`` with 2 header lines. The
311 first line indicates overall success. The 2nd contains the file count and
311 first line indicates overall success. The 2nd contains the file count and
312 byte size of payload.
312 byte size of payload.
313
313
314 The success line contains "0" for success, "1" for stream generation not
314 The success line contains "0" for success, "1" for stream generation not
315 allowed, and "2" for error locking the repository (possibly indicating
315 allowed, and "2" for error locking the repository (possibly indicating
316 a permissions error for the server process).
316 a permissions error for the server process).
317 """
317 """
318 if not allowservergeneration(repo):
318 if not allowservergeneration(repo):
319 yield b'1\n'
319 yield b'1\n'
320 return
320 return
321
321
322 try:
322 try:
323 filecount, bytecount, it = generatev1(repo)
323 filecount, bytecount, it = generatev1(repo)
324 except error.LockError:
324 except error.LockError:
325 yield b'2\n'
325 yield b'2\n'
326 return
326 return
327
327
328 # Indicates successful response.
328 # Indicates successful response.
329 yield b'0\n'
329 yield b'0\n'
330 yield b'%d %d\n' % (filecount, bytecount)
330 yield b'%d %d\n' % (filecount, bytecount)
331 for chunk in it:
331 for chunk in it:
332 yield chunk
332 yield chunk
333
333
334
334
335 def generatebundlev1(repo, compression=b'UN'):
335 def generatebundlev1(repo, compression=b'UN'):
336 """Emit content for version 1 of a stream clone bundle.
336 """Emit content for version 1 of a stream clone bundle.
337
337
338 The first 4 bytes of the output ("HGS1") denote this as stream clone
338 The first 4 bytes of the output ("HGS1") denote this as stream clone
339 bundle version 1.
339 bundle version 1.
340
340
341 The next 2 bytes indicate the compression type. Only "UN" is currently
341 The next 2 bytes indicate the compression type. Only "UN" is currently
342 supported.
342 supported.
343
343
344 The next 16 bytes are two 64-bit big endian unsigned integers indicating
344 The next 16 bytes are two 64-bit big endian unsigned integers indicating
345 file count and byte count, respectively.
345 file count and byte count, respectively.
346
346
347 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
347 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
348 of the requirements string, including a trailing \0. The following N bytes
348 of the requirements string, including a trailing \0. The following N bytes
349 are the requirements string, which is ASCII containing a comma-delimited
349 are the requirements string, which is ASCII containing a comma-delimited
350 list of repo requirements that are needed to support the data.
350 list of repo requirements that are needed to support the data.
351
351
352 The remaining content is the output of ``generatev1()`` (which may be
352 The remaining content is the output of ``generatev1()`` (which may be
353 compressed in the future).
353 compressed in the future).
354
354
355 Returns a tuple of (requirements, data generator).
355 Returns a tuple of (requirements, data generator).
356 """
356 """
357 if compression != b'UN':
357 if compression != b'UN':
358 raise ValueError(b'we do not support the compression argument yet')
358 raise ValueError(b'we do not support the compression argument yet')
359
359
360 requirements = streamed_requirements(repo)
360 requirements = streamed_requirements(repo)
361 requires = b','.join(sorted(requirements))
361 requires = b','.join(sorted(requirements))
362
362
363 def gen():
363 def gen():
364 yield b'HGS1'
364 yield b'HGS1'
365 yield compression
365 yield compression
366
366
367 filecount, bytecount, it = generatev1(repo)
367 filecount, bytecount, it = generatev1(repo)
368 repo.ui.status(
368 repo.ui.status(
369 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
369 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
370 )
370 )
371
371
372 yield struct.pack(b'>QQ', filecount, bytecount)
372 yield struct.pack(b'>QQ', filecount, bytecount)
373 yield struct.pack(b'>H', len(requires) + 1)
373 yield struct.pack(b'>H', len(requires) + 1)
374 yield requires + b'\0'
374 yield requires + b'\0'
375
375
376 # This is where we'll add compression in the future.
376 # This is where we'll add compression in the future.
377 assert compression == b'UN'
377 assert compression == b'UN'
378
378
379 progress = repo.ui.makeprogress(
379 progress = repo.ui.makeprogress(
380 _(b'bundle'), total=bytecount, unit=_(b'bytes')
380 _(b'bundle'), total=bytecount, unit=_(b'bytes')
381 )
381 )
382 progress.update(0)
382 progress.update(0)
383
383
384 for chunk in it:
384 for chunk in it:
385 progress.increment(step=len(chunk))
385 progress.increment(step=len(chunk))
386 yield chunk
386 yield chunk
387
387
388 progress.complete()
388 progress.complete()
389
389
390 return requirements, gen()
390 return requirements, gen()
391
391
392
392
393 def consumev1(repo, fp, filecount, bytecount):
393 def consumev1(repo, fp, filecount, bytecount):
394 """Apply the contents from version 1 of a streaming clone file handle.
394 """Apply the contents from version 1 of a streaming clone file handle.
395
395
396 This takes the output from "stream_out" and applies it to the specified
396 This takes the output from "stream_out" and applies it to the specified
397 repository.
397 repository.
398
398
399 Like "stream_out," the status line added by the wire protocol is not
399 Like "stream_out," the status line added by the wire protocol is not
400 handled by this function.
400 handled by this function.
401 """
401 """
402 with repo.lock():
402 with repo.lock():
403 repo.ui.status(
403 repo.ui.status(
404 _(b'%d files to transfer, %s of data\n')
404 _(b'%d files to transfer, %s of data\n')
405 % (filecount, util.bytecount(bytecount))
405 % (filecount, util.bytecount(bytecount))
406 )
406 )
407 progress = repo.ui.makeprogress(
407 progress = repo.ui.makeprogress(
408 _(b'clone'), total=bytecount, unit=_(b'bytes')
408 _(b'clone'), total=bytecount, unit=_(b'bytes')
409 )
409 )
410 progress.update(0)
410 progress.update(0)
411 start = util.timer()
411 start = util.timer()
412
412
413 # TODO: get rid of (potential) inconsistency
413 # TODO: get rid of (potential) inconsistency
414 #
414 #
415 # If transaction is started and any @filecache property is
415 # If transaction is started and any @filecache property is
416 # changed at this point, it causes inconsistency between
416 # changed at this point, it causes inconsistency between
417 # in-memory cached property and streamclone-ed file on the
417 # in-memory cached property and streamclone-ed file on the
418 # disk. Nested transaction prevents transaction scope "clone"
418 # disk. Nested transaction prevents transaction scope "clone"
419 # below from writing in-memory changes out at the end of it,
419 # below from writing in-memory changes out at the end of it,
420 # even though in-memory changes are discarded at the end of it
420 # even though in-memory changes are discarded at the end of it
421 # regardless of transaction nesting.
421 # regardless of transaction nesting.
422 #
422 #
423 # But transaction nesting can't be simply prohibited, because
423 # But transaction nesting can't be simply prohibited, because
424 # nesting occurs also in ordinary case (e.g. enabling
424 # nesting occurs also in ordinary case (e.g. enabling
425 # clonebundles).
425 # clonebundles).
426
426
427 with repo.transaction(b'clone'):
427 with repo.transaction(b'clone'):
428 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
428 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
429 for i in range(filecount):
429 for i in range(filecount):
430 # XXX doesn't support '\n' or '\r' in filenames
430 # XXX doesn't support '\n' or '\r' in filenames
431 l = fp.readline()
431 l = fp.readline()
432 try:
432 try:
433 name, size = l.split(b'\0', 1)
433 name, size = l.split(b'\0', 1)
434 size = int(size)
434 size = int(size)
435 except (ValueError, TypeError):
435 except (ValueError, TypeError):
436 raise error.ResponseError(
436 raise error.ResponseError(
437 _(b'unexpected response from remote server:'), l
437 _(b'unexpected response from remote server:'), l
438 )
438 )
439 if repo.ui.debugflag:
439 if repo.ui.debugflag:
440 repo.ui.debug(
440 repo.ui.debug(
441 b'adding %s (%s)\n' % (name, util.bytecount(size))
441 b'adding %s (%s)\n' % (name, util.bytecount(size))
442 )
442 )
443 # for backwards compat, name was partially encoded
443 # for backwards compat, name was partially encoded
444 path = store.decodedir(name)
444 path = store.decodedir(name)
445 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
445 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
446 for chunk in util.filechunkiter(fp, limit=size):
446 for chunk in util.filechunkiter(fp, limit=size):
447 progress.increment(step=len(chunk))
447 progress.increment(step=len(chunk))
448 ofp.write(chunk)
448 ofp.write(chunk)
449
449
450 # force @filecache properties to be reloaded from
450 # force @filecache properties to be reloaded from
451 # streamclone-ed file at next access
451 # streamclone-ed file at next access
452 repo.invalidate(clearfilecache=True)
452 repo.invalidate(clearfilecache=True)
453
453
454 elapsed = util.timer() - start
454 elapsed = util.timer() - start
455 if elapsed <= 0:
455 if elapsed <= 0:
456 elapsed = 0.001
456 elapsed = 0.001
457 progress.complete()
457 progress.complete()
458 repo.ui.status(
458 repo.ui.status(
459 _(b'transferred %s in %.1f seconds (%s/sec)\n')
459 _(b'transferred %s in %.1f seconds (%s/sec)\n')
460 % (
460 % (
461 util.bytecount(bytecount),
461 util.bytecount(bytecount),
462 elapsed,
462 elapsed,
463 util.bytecount(bytecount / elapsed),
463 util.bytecount(bytecount / elapsed),
464 )
464 )
465 )
465 )
466
466
467
467
468 def readbundle1header(fp):
468 def readbundle1header(fp):
469 compression = fp.read(2)
469 compression = fp.read(2)
470 if compression != b'UN':
470 if compression != b'UN':
471 raise error.Abort(
471 raise error.Abort(
472 _(
472 _(
473 b'only uncompressed stream clone bundles are '
473 b'only uncompressed stream clone bundles are '
474 b'supported; got %s'
474 b'supported; got %s'
475 )
475 )
476 % compression
476 % compression
477 )
477 )
478
478
479 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
479 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
480 requireslen = struct.unpack(b'>H', fp.read(2))[0]
480 requireslen = struct.unpack(b'>H', fp.read(2))[0]
481 requires = fp.read(requireslen)
481 requires = fp.read(requireslen)
482
482
483 if not requires.endswith(b'\0'):
483 if not requires.endswith(b'\0'):
484 raise error.Abort(
484 raise error.Abort(
485 _(
485 _(
486 b'malformed stream clone bundle: '
486 b'malformed stream clone bundle: '
487 b'requirements not properly encoded'
487 b'requirements not properly encoded'
488 )
488 )
489 )
489 )
490
490
491 requirements = set(requires.rstrip(b'\0').split(b','))
491 requirements = set(requires.rstrip(b'\0').split(b','))
492
492
493 return filecount, bytecount, requirements
493 return filecount, bytecount, requirements
494
494
495
495
496 def applybundlev1(repo, fp):
496 def applybundlev1(repo, fp):
497 """Apply the content from a stream clone bundle version 1.
497 """Apply the content from a stream clone bundle version 1.
498
498
499 We assume the 4 byte header has been read and validated and the file handle
499 We assume the 4 byte header has been read and validated and the file handle
500 is at the 2 byte compression identifier.
500 is at the 2 byte compression identifier.
501 """
501 """
502 if len(repo):
502 if len(repo):
503 raise error.Abort(
503 raise error.Abort(
504 _(b'cannot apply stream clone bundle on non-empty repo')
504 _(b'cannot apply stream clone bundle on non-empty repo')
505 )
505 )
506
506
507 filecount, bytecount, requirements = readbundle1header(fp)
507 filecount, bytecount, requirements = readbundle1header(fp)
508 missingreqs = requirements - repo.supported
508 missingreqs = requirements - repo.supported
509 if missingreqs:
509 if missingreqs:
510 raise error.Abort(
510 raise error.Abort(
511 _(b'unable to apply stream clone: unsupported format: %s')
511 _(b'unable to apply stream clone: unsupported format: %s')
512 % b', '.join(sorted(missingreqs))
512 % b', '.join(sorted(missingreqs))
513 )
513 )
514
514
515 consumev1(repo, fp, filecount, bytecount)
515 consumev1(repo, fp, filecount, bytecount)
516 nodemap.post_stream_cleanup(repo)
516 nodemap.post_stream_cleanup(repo)
517
517
518
518
519 class streamcloneapplier:
519 class streamcloneapplier:
520 """Class to manage applying streaming clone bundles.
520 """Class to manage applying streaming clone bundles.
521
521
522 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
522 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
523 readers to perform bundle type-specific functionality.
523 readers to perform bundle type-specific functionality.
524 """
524 """
525
525
526 def __init__(self, fh):
526 def __init__(self, fh):
527 self._fh = fh
527 self._fh = fh
528
528
529 def apply(self, repo):
529 def apply(self, repo):
530 return applybundlev1(repo, self._fh)
530 return applybundlev1(repo, self._fh)
531
531
532
532
533 # type of file to stream
533 # type of file to stream
534 _fileappend = 0 # append only file
534 _fileappend = 0 # append only file
535 _filefull = 1 # full snapshot file
535 _filefull = 1 # full snapshot file
536
536
537 # Source of the file
537 # Source of the file
538 _srcstore = b's' # store (svfs)
538 _srcstore = b's' # store (svfs)
539 _srccache = b'c' # cache (cache)
539 _srccache = b'c' # cache (cache)
540
540
541 # This is it's own function so extensions can override it.
541 # This is it's own function so extensions can override it.
542 def _walkstreamfullstorefiles(repo):
542 def _walkstreamfullstorefiles(repo):
543 """list snapshot file from the store"""
543 """list snapshot file from the store"""
544 fnames = []
544 fnames = []
545 if not repo.publishing():
545 if not repo.publishing():
546 fnames.append(b'phaseroots')
546 fnames.append(b'phaseroots')
547 return fnames
547 return fnames
548
548
549
549
550 def _filterfull(entry, copy, vfsmap):
550 def _filterfull(entry, copy, vfsmap):
551 """actually copy the snapshot files"""
551 """actually copy the snapshot files"""
552 src, name, ftype, data = entry
552 src, name, ftype, data = entry
553 if ftype != _filefull:
553 if ftype != _filefull:
554 return entry
554 return entry
555 return (src, name, ftype, copy(vfsmap[src].join(name)))
555 return (src, name, ftype, copy(vfsmap[src].join(name)))
556
556
557
557
558 class TempCopyManager:
558 class TempCopyManager:
559 """Manage temporary backup of volatile file during stream clone
559 """Manage temporary backup of volatile file during stream clone
560
560
561 This should be used as a Python context, the copies will be discarded when
561 This should be used as a Python context, the copies will be discarded when
562 exiting the context.
562 exiting the context.
563
563
564 A copy can be done by calling the object on the real path (encoded full
564 A copy can be done by calling the object on the real path (encoded full
565 path)
565 path)
566
566
567 The backup path can be retrieved using the __getitem__ protocol, obj[path].
567 The backup path can be retrieved using the __getitem__ protocol, obj[path].
568 On file without backup, it will return the unmodified path. (equivalent to
568 On file without backup, it will return the unmodified path. (equivalent to
569 `dict.get(x, x)`)
569 `dict.get(x, x)`)
570 """
570 """
571
571
572 def __init__(self):
572 def __init__(self):
573 self._copies = None
573 self._copies = None
574 self._dst_dir = None
574 self._dst_dir = None
575
575
576 def __enter__(self):
576 def __enter__(self):
577 if self._copies is not None:
577 if self._copies is not None:
578 msg = "Copies context already open"
578 msg = "Copies context already open"
579 raise error.ProgrammingError(msg)
579 raise error.ProgrammingError(msg)
580 self._copies = {}
580 self._copies = {}
581 self._dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
581 self._dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
582 return self
582 return self
583
583
584 def __call__(self, src):
584 def __call__(self, src):
585 """create a backup of the file at src"""
585 """create a backup of the file at src"""
586 prefix = os.path.basename(src)
586 prefix = os.path.basename(src)
587 fd, dst = pycompat.mkstemp(prefix=prefix, dir=self._dst_dir)
587 fd, dst = pycompat.mkstemp(prefix=prefix, dir=self._dst_dir)
588 os.close(fd)
588 os.close(fd)
589 self._copies[src] = dst
589 self._copies[src] = dst
590 util.copyfiles(src, dst, hardlink=True)
590 util.copyfiles(src, dst, hardlink=True)
591 return dst
591 return dst
592
592
593 def __getitem__(self, src):
593 def __getitem__(self, src):
594 """return the path to a valid version of `src`
594 """return the path to a valid version of `src`
595
595
596 If the file has no backup, the path of the file is returned
596 If the file has no backup, the path of the file is returned
597 unmodified."""
597 unmodified."""
598 return self._copies.get(src, src)
598 return self._copies.get(src, src)
599
599
600 def __exit__(self, *args, **kwars):
600 def __exit__(self, *args, **kwars):
601 """discard all backups"""
601 """discard all backups"""
602 for tmp in self._copies.values():
602 for tmp in self._copies.values():
603 util.tryunlink(tmp)
603 util.tryunlink(tmp)
604 util.tryrmdir(self._dst_dir)
604 util.tryrmdir(self._dst_dir)
605 self._copies = None
605 self._copies = None
606 self._dst_dir = None
606 self._dst_dir = None
607
607
608
608
609 def _makemap(repo):
609 def _makemap(repo):
610 """make a (src -> vfs) map for the repo"""
610 """make a (src -> vfs) map for the repo"""
611 vfsmap = {
611 vfsmap = {
612 _srcstore: repo.svfs,
612 _srcstore: repo.svfs,
613 _srccache: repo.cachevfs,
613 _srccache: repo.cachevfs,
614 }
614 }
615 # we keep repo.vfs out of the on purpose, ther are too many danger there
615 # we keep repo.vfs out of the on purpose, ther are too many danger there
616 # (eg: .hg/hgrc)
616 # (eg: .hg/hgrc)
617 assert repo.vfs not in vfsmap.values()
617 assert repo.vfs not in vfsmap.values()
618
618
619 return vfsmap
619 return vfsmap
620
620
621
621
622 def _emit2(repo, entries):
622 def _emit2(repo, entries):
623 """actually emit the stream bundle"""
623 """actually emit the stream bundle"""
624 vfsmap = _makemap(repo)
624 vfsmap = _makemap(repo)
625 # we keep repo.vfs out of the on purpose, ther are too many danger there
625 # we keep repo.vfs out of the on purpose, ther are too many danger there
626 # (eg: .hg/hgrc),
626 # (eg: .hg/hgrc),
627 #
627 #
628 # this assert is duplicated (from _makemap) as author might think this is
628 # this assert is duplicated (from _makemap) as author might think this is
629 # fine, while this is really not fine.
629 # fine, while this is really not fine.
630 if repo.vfs in vfsmap.values():
630 if repo.vfs in vfsmap.values():
631 raise error.ProgrammingError(
631 raise error.ProgrammingError(
632 b'repo.vfs must not be added to vfsmap for security reasons'
632 b'repo.vfs must not be added to vfsmap for security reasons'
633 )
633 )
634
634
635 # translate the vfs one
635 # translate the vfs one
636 entries = [(vfs_key, vfsmap[vfs_key], e) for (vfs_key, e) in entries]
636 entries = [(vfs_key, vfsmap[vfs_key], e) for (vfs_key, e) in entries]
637
637
638 max_linkrev = len(repo)
638 file_count = totalfilesize = 0
639 file_count = totalfilesize = 0
639 # record the expected size of every file
640 # record the expected size of every file
640 for k, vfs, e in entries:
641 for k, vfs, e in entries:
641 for f in e.files():
642 for f in e.files():
642 file_count += 1
643 file_count += 1
643 totalfilesize += f.file_size(vfs)
644 totalfilesize += f.file_size(vfs)
644
645
645 progress = repo.ui.makeprogress(
646 progress = repo.ui.makeprogress(
646 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
647 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
647 )
648 )
648 progress.update(0)
649 progress.update(0)
649 with TempCopyManager() as copy, progress:
650 with TempCopyManager() as copy, progress:
650 # create a copy of volatile files
651 # create a copy of volatile files
651 for k, vfs, e in entries:
652 for k, vfs, e in entries:
652 for f in e.files():
653 for f in e.files():
653 if f.is_volatile:
654 if f.is_volatile:
654 copy(vfs.join(f.unencoded_path))
655 copy(vfs.join(f.unencoded_path))
655 # the first yield release the lock on the repository
656 # the first yield release the lock on the repository
656 yield file_count, totalfilesize
657 yield file_count, totalfilesize
657 totalbytecount = 0
658 totalbytecount = 0
658
659
659 for src, vfs, e in entries:
660 for src, vfs, e in entries:
660 for name, stream, size in e.get_streams(vfs, copies=copy):
661 entry_streams = e.get_streams(
662 repo=repo, vfs=vfs, copies=copy, max_changeset=max_linkrev
663 )
664 for name, stream, size in entry_streams:
661 yield src
665 yield src
662 yield util.uvarintencode(len(name))
666 yield util.uvarintencode(len(name))
663 yield util.uvarintencode(size)
667 yield util.uvarintencode(size)
664 yield name
668 yield name
665 bytecount = 0
669 bytecount = 0
666 for chunk in stream:
670 for chunk in stream:
667 bytecount += len(chunk)
671 bytecount += len(chunk)
668 totalbytecount += len(chunk)
672 totalbytecount += len(chunk)
669 progress.update(totalbytecount)
673 progress.update(totalbytecount)
670 yield chunk
674 yield chunk
671 if bytecount != size:
675 if bytecount != size:
672 # Would most likely be caused by a race due to `hg
676 # Would most likely be caused by a race due to `hg
673 # strip` or a revlog split
677 # strip` or a revlog split
674 msg = _(
678 msg = _(
675 b'clone could only read %d bytes from %s, but '
679 b'clone could only read %d bytes from %s, but '
676 b'expected %d bytes'
680 b'expected %d bytes'
677 )
681 )
678 raise error.Abort(msg % (bytecount, name, size))
682 raise error.Abort(msg % (bytecount, name, size))
679
683
680
684
681 def _test_sync_point_walk_1(repo):
685 def _test_sync_point_walk_1(repo):
682 """a function for synchronisation during tests"""
686 """a function for synchronisation during tests"""
683
687
684
688
685 def _test_sync_point_walk_2(repo):
689 def _test_sync_point_walk_2(repo):
686 """a function for synchronisation during tests"""
690 """a function for synchronisation during tests"""
687
691
688
692
689 def _entries_walk(repo, includes, excludes, includeobsmarkers):
693 def _entries_walk(repo, includes, excludes, includeobsmarkers):
690 """emit a seris of files information useful to clone a repo
694 """emit a seris of files information useful to clone a repo
691
695
692 return (vfs-key, entry) iterator
696 return (vfs-key, entry) iterator
693
697
694 Where `entry` is StoreEntry. (used even for cache entries)
698 Where `entry` is StoreEntry. (used even for cache entries)
695 """
699 """
696 assert repo._currentlock(repo._lockref) is not None
700 assert repo._currentlock(repo._lockref) is not None
697
701
698 matcher = None
702 matcher = None
699 if includes or excludes:
703 if includes or excludes:
700 matcher = narrowspec.match(repo.root, includes, excludes)
704 matcher = narrowspec.match(repo.root, includes, excludes)
701
705
702 phase = not repo.publishing()
706 phase = not repo.publishing()
703 entries = _walkstreamfiles(
707 entries = _walkstreamfiles(
704 repo,
708 repo,
705 matcher,
709 matcher,
706 phase=phase,
710 phase=phase,
707 obsolescence=includeobsmarkers,
711 obsolescence=includeobsmarkers,
708 )
712 )
709 for entry in entries:
713 for entry in entries:
710 yield (_srcstore, entry)
714 yield (_srcstore, entry)
711
715
712 for name in cacheutil.cachetocopy(repo):
716 for name in cacheutil.cachetocopy(repo):
713 if repo.cachevfs.exists(name):
717 if repo.cachevfs.exists(name):
714 # not really a StoreEntry, but close enough
718 # not really a StoreEntry, but close enough
715 entry = store.SimpleStoreEntry(
719 entry = store.SimpleStoreEntry(
716 entry_path=name,
720 entry_path=name,
717 is_volatile=True,
721 is_volatile=True,
718 )
722 )
719 yield (_srccache, entry)
723 yield (_srccache, entry)
720
724
721
725
722 def generatev2(repo, includes, excludes, includeobsmarkers):
726 def generatev2(repo, includes, excludes, includeobsmarkers):
723 """Emit content for version 2 of a streaming clone.
727 """Emit content for version 2 of a streaming clone.
724
728
725 the data stream consists the following entries:
729 the data stream consists the following entries:
726 1) A char representing the file destination (eg: store or cache)
730 1) A char representing the file destination (eg: store or cache)
727 2) A varint containing the length of the filename
731 2) A varint containing the length of the filename
728 3) A varint containing the length of file data
732 3) A varint containing the length of file data
729 4) N bytes containing the filename (the internal, store-agnostic form)
733 4) N bytes containing the filename (the internal, store-agnostic form)
730 5) N bytes containing the file data
734 5) N bytes containing the file data
731
735
732 Returns a 3-tuple of (file count, file size, data iterator).
736 Returns a 3-tuple of (file count, file size, data iterator).
733 """
737 """
734
738
735 with repo.lock():
739 with repo.lock():
736
740
737 repo.ui.debug(b'scanning\n')
741 repo.ui.debug(b'scanning\n')
738
742
739 entries = _entries_walk(
743 entries = _entries_walk(
740 repo,
744 repo,
741 includes=includes,
745 includes=includes,
742 excludes=excludes,
746 excludes=excludes,
743 includeobsmarkers=includeobsmarkers,
747 includeobsmarkers=includeobsmarkers,
744 )
748 )
745
749
746 chunks = _emit2(repo, entries)
750 chunks = _emit2(repo, entries)
747 first = next(chunks)
751 first = next(chunks)
748 file_count, total_file_size = first
752 file_count, total_file_size = first
749 _test_sync_point_walk_1(repo)
753 _test_sync_point_walk_1(repo)
750 _test_sync_point_walk_2(repo)
754 _test_sync_point_walk_2(repo)
751
755
752 return file_count, total_file_size, chunks
756 return file_count, total_file_size, chunks
753
757
754
758
755 def generatev3(repo, includes, excludes, includeobsmarkers):
759 def generatev3(repo, includes, excludes, includeobsmarkers):
756 return generatev2(repo, includes, excludes, includeobsmarkers)
760 return generatev2(repo, includes, excludes, includeobsmarkers)
757
761
758
762
759 @contextlib.contextmanager
763 @contextlib.contextmanager
760 def nested(*ctxs):
764 def nested(*ctxs):
761 this = ctxs[0]
765 this = ctxs[0]
762 rest = ctxs[1:]
766 rest = ctxs[1:]
763 with this:
767 with this:
764 if rest:
768 if rest:
765 with nested(*rest):
769 with nested(*rest):
766 yield
770 yield
767 else:
771 else:
768 yield
772 yield
769
773
770
774
771 def consumev2(repo, fp, filecount, filesize):
775 def consumev2(repo, fp, filecount, filesize):
772 """Apply the contents from a version 2 streaming clone.
776 """Apply the contents from a version 2 streaming clone.
773
777
774 Data is read from an object that only needs to provide a ``read(size)``
778 Data is read from an object that only needs to provide a ``read(size)``
775 method.
779 method.
776 """
780 """
777 with repo.lock():
781 with repo.lock():
778 repo.ui.status(
782 repo.ui.status(
779 _(b'%d files to transfer, %s of data\n')
783 _(b'%d files to transfer, %s of data\n')
780 % (filecount, util.bytecount(filesize))
784 % (filecount, util.bytecount(filesize))
781 )
785 )
782
786
783 start = util.timer()
787 start = util.timer()
784 progress = repo.ui.makeprogress(
788 progress = repo.ui.makeprogress(
785 _(b'clone'), total=filesize, unit=_(b'bytes')
789 _(b'clone'), total=filesize, unit=_(b'bytes')
786 )
790 )
787 progress.update(0)
791 progress.update(0)
788
792
789 vfsmap = _makemap(repo)
793 vfsmap = _makemap(repo)
790 # we keep repo.vfs out of the on purpose, ther are too many danger
794 # we keep repo.vfs out of the on purpose, ther are too many danger
791 # there (eg: .hg/hgrc),
795 # there (eg: .hg/hgrc),
792 #
796 #
793 # this assert is duplicated (from _makemap) as author might think this
797 # this assert is duplicated (from _makemap) as author might think this
794 # is fine, while this is really not fine.
798 # is fine, while this is really not fine.
795 if repo.vfs in vfsmap.values():
799 if repo.vfs in vfsmap.values():
796 raise error.ProgrammingError(
800 raise error.ProgrammingError(
797 b'repo.vfs must not be added to vfsmap for security reasons'
801 b'repo.vfs must not be added to vfsmap for security reasons'
798 )
802 )
799
803
800 with repo.transaction(b'clone'):
804 with repo.transaction(b'clone'):
801 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
805 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
802 with nested(*ctxs):
806 with nested(*ctxs):
803 for i in range(filecount):
807 for i in range(filecount):
804 src = util.readexactly(fp, 1)
808 src = util.readexactly(fp, 1)
805 vfs = vfsmap[src]
809 vfs = vfsmap[src]
806 namelen = util.uvarintdecodestream(fp)
810 namelen = util.uvarintdecodestream(fp)
807 datalen = util.uvarintdecodestream(fp)
811 datalen = util.uvarintdecodestream(fp)
808
812
809 name = util.readexactly(fp, namelen)
813 name = util.readexactly(fp, namelen)
810
814
811 if repo.ui.debugflag:
815 if repo.ui.debugflag:
812 repo.ui.debug(
816 repo.ui.debug(
813 b'adding [%s] %s (%s)\n'
817 b'adding [%s] %s (%s)\n'
814 % (src, name, util.bytecount(datalen))
818 % (src, name, util.bytecount(datalen))
815 )
819 )
816
820
817 with vfs(name, b'w') as ofp:
821 with vfs(name, b'w') as ofp:
818 for chunk in util.filechunkiter(fp, limit=datalen):
822 for chunk in util.filechunkiter(fp, limit=datalen):
819 progress.increment(step=len(chunk))
823 progress.increment(step=len(chunk))
820 ofp.write(chunk)
824 ofp.write(chunk)
821
825
822 # force @filecache properties to be reloaded from
826 # force @filecache properties to be reloaded from
823 # streamclone-ed file at next access
827 # streamclone-ed file at next access
824 repo.invalidate(clearfilecache=True)
828 repo.invalidate(clearfilecache=True)
825
829
826 elapsed = util.timer() - start
830 elapsed = util.timer() - start
827 if elapsed <= 0:
831 if elapsed <= 0:
828 elapsed = 0.001
832 elapsed = 0.001
829 repo.ui.status(
833 repo.ui.status(
830 _(b'transferred %s in %.1f seconds (%s/sec)\n')
834 _(b'transferred %s in %.1f seconds (%s/sec)\n')
831 % (
835 % (
832 util.bytecount(progress.pos),
836 util.bytecount(progress.pos),
833 elapsed,
837 elapsed,
834 util.bytecount(progress.pos / elapsed),
838 util.bytecount(progress.pos / elapsed),
835 )
839 )
836 )
840 )
837 progress.complete()
841 progress.complete()
838
842
839
843
840 def applybundlev2(repo, fp, filecount, filesize, requirements):
844 def applybundlev2(repo, fp, filecount, filesize, requirements):
841 from . import localrepo
845 from . import localrepo
842
846
843 missingreqs = [r for r in requirements if r not in repo.supported]
847 missingreqs = [r for r in requirements if r not in repo.supported]
844 if missingreqs:
848 if missingreqs:
845 raise error.Abort(
849 raise error.Abort(
846 _(b'unable to apply stream clone: unsupported format: %s')
850 _(b'unable to apply stream clone: unsupported format: %s')
847 % b', '.join(sorted(missingreqs))
851 % b', '.join(sorted(missingreqs))
848 )
852 )
849
853
850 consumev2(repo, fp, filecount, filesize)
854 consumev2(repo, fp, filecount, filesize)
851
855
852 repo.requirements = new_stream_clone_requirements(
856 repo.requirements = new_stream_clone_requirements(
853 repo.requirements,
857 repo.requirements,
854 requirements,
858 requirements,
855 )
859 )
856 repo.svfs.options = localrepo.resolvestorevfsoptions(
860 repo.svfs.options = localrepo.resolvestorevfsoptions(
857 repo.ui, repo.requirements, repo.features
861 repo.ui, repo.requirements, repo.features
858 )
862 )
859 scmutil.writereporequirements(repo)
863 scmutil.writereporequirements(repo)
860 nodemap.post_stream_cleanup(repo)
864 nodemap.post_stream_cleanup(repo)
861
865
862
866
863 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
867 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
864 hardlink = [True]
868 hardlink = [True]
865
869
866 def copy_used():
870 def copy_used():
867 hardlink[0] = False
871 hardlink[0] = False
868 progress.topic = _(b'copying')
872 progress.topic = _(b'copying')
869
873
870 for k, path in entries:
874 for k, path in entries:
871 src_vfs = src_vfs_map[k]
875 src_vfs = src_vfs_map[k]
872 dst_vfs = dst_vfs_map[k]
876 dst_vfs = dst_vfs_map[k]
873 src_path = src_vfs.join(path)
877 src_path = src_vfs.join(path)
874 dst_path = dst_vfs.join(path)
878 dst_path = dst_vfs.join(path)
875 # We cannot use dirname and makedirs of dst_vfs here because the store
879 # We cannot use dirname and makedirs of dst_vfs here because the store
876 # encoding confuses them. See issue 6581 for details.
880 # encoding confuses them. See issue 6581 for details.
877 dirname = os.path.dirname(dst_path)
881 dirname = os.path.dirname(dst_path)
878 if not os.path.exists(dirname):
882 if not os.path.exists(dirname):
879 util.makedirs(dirname)
883 util.makedirs(dirname)
880 dst_vfs.register_file(path)
884 dst_vfs.register_file(path)
881 # XXX we could use the #nb_bytes argument.
885 # XXX we could use the #nb_bytes argument.
882 util.copyfile(
886 util.copyfile(
883 src_path,
887 src_path,
884 dst_path,
888 dst_path,
885 hardlink=hardlink[0],
889 hardlink=hardlink[0],
886 no_hardlink_cb=copy_used,
890 no_hardlink_cb=copy_used,
887 check_fs_hardlink=False,
891 check_fs_hardlink=False,
888 )
892 )
889 progress.increment()
893 progress.increment()
890 return hardlink[0]
894 return hardlink[0]
891
895
892
896
893 def local_copy(src_repo, dest_repo):
897 def local_copy(src_repo, dest_repo):
894 """copy all content from one local repository to another
898 """copy all content from one local repository to another
895
899
896 This is useful for local clone"""
900 This is useful for local clone"""
897 src_store_requirements = {
901 src_store_requirements = {
898 r
902 r
899 for r in src_repo.requirements
903 for r in src_repo.requirements
900 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
904 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
901 }
905 }
902 dest_store_requirements = {
906 dest_store_requirements = {
903 r
907 r
904 for r in dest_repo.requirements
908 for r in dest_repo.requirements
905 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
909 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
906 }
910 }
907 assert src_store_requirements == dest_store_requirements
911 assert src_store_requirements == dest_store_requirements
908
912
909 with dest_repo.lock():
913 with dest_repo.lock():
910 with src_repo.lock():
914 with src_repo.lock():
911
915
912 # bookmark is not integrated to the streaming as it might use the
916 # bookmark is not integrated to the streaming as it might use the
913 # `repo.vfs` and they are too many sentitive data accessible
917 # `repo.vfs` and they are too many sentitive data accessible
914 # through `repo.vfs` to expose it to streaming clone.
918 # through `repo.vfs` to expose it to streaming clone.
915 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
919 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
916 srcbookmarks = src_book_vfs.join(b'bookmarks')
920 srcbookmarks = src_book_vfs.join(b'bookmarks')
917 bm_count = 0
921 bm_count = 0
918 if os.path.exists(srcbookmarks):
922 if os.path.exists(srcbookmarks):
919 bm_count = 1
923 bm_count = 1
920
924
921 entries = _entries_walk(
925 entries = _entries_walk(
922 src_repo,
926 src_repo,
923 includes=None,
927 includes=None,
924 excludes=None,
928 excludes=None,
925 includeobsmarkers=True,
929 includeobsmarkers=True,
926 )
930 )
927 entries = list(entries)
931 entries = list(entries)
928 src_vfs_map = _makemap(src_repo)
932 src_vfs_map = _makemap(src_repo)
929 dest_vfs_map = _makemap(dest_repo)
933 dest_vfs_map = _makemap(dest_repo)
930 total_files = sum(len(e[1].files()) for e in entries) + bm_count
934 total_files = sum(len(e[1].files()) for e in entries) + bm_count
931 progress = src_repo.ui.makeprogress(
935 progress = src_repo.ui.makeprogress(
932 topic=_(b'linking'),
936 topic=_(b'linking'),
933 total=total_files,
937 total=total_files,
934 unit=_(b'files'),
938 unit=_(b'files'),
935 )
939 )
936 # copy files
940 # copy files
937 #
941 #
938 # We could copy the full file while the source repository is locked
942 # We could copy the full file while the source repository is locked
939 # and the other one without the lock. However, in the linking case,
943 # and the other one without the lock. However, in the linking case,
940 # this would also requires checks that nobody is appending any data
944 # this would also requires checks that nobody is appending any data
941 # to the files while we do the clone, so this is not done yet. We
945 # to the files while we do the clone, so this is not done yet. We
942 # could do this blindly when copying files.
946 # could do this blindly when copying files.
943 files = [
947 files = [
944 (vfs_key, f.unencoded_path)
948 (vfs_key, f.unencoded_path)
945 for vfs_key, e in entries
949 for vfs_key, e in entries
946 for f in e.files()
950 for f in e.files()
947 ]
951 ]
948 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
952 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
949
953
950 # copy bookmarks over
954 # copy bookmarks over
951 if bm_count:
955 if bm_count:
952 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
956 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
953 dstbookmarks = dst_book_vfs.join(b'bookmarks')
957 dstbookmarks = dst_book_vfs.join(b'bookmarks')
954 util.copyfile(srcbookmarks, dstbookmarks)
958 util.copyfile(srcbookmarks, dstbookmarks)
955 progress.complete()
959 progress.complete()
956 if hardlink:
960 if hardlink:
957 msg = b'linked %d files\n'
961 msg = b'linked %d files\n'
958 else:
962 else:
959 msg = b'copied %d files\n'
963 msg = b'copied %d files\n'
960 src_repo.ui.debug(msg % total_files)
964 src_repo.ui.debug(msg % total_files)
961
965
962 with dest_repo.transaction(b"localclone") as tr:
966 with dest_repo.transaction(b"localclone") as tr:
963 dest_repo.store.write(tr)
967 dest_repo.store.write(tr)
964
968
965 # clean up transaction file as they do not make sense
969 # clean up transaction file as they do not make sense
966 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
970 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
@@ -1,150 +1,150 b''
1 Test stream cloning while a revlog split happens
1 Test stream cloning while a revlog split happens
2 ------------------------------------------------
2 ------------------------------------------------
3
3
4 #testcases stream-bundle2-v2 stream-bundle2-v3
4 #testcases stream-bundle2-v2 stream-bundle2-v3
5
5
6 #if stream-bundle2-v3
6 #if stream-bundle2-v3
7 $ cat << EOF >> $HGRCPATH
7 $ cat << EOF >> $HGRCPATH
8 > [experimental]
8 > [experimental]
9 > stream-v3 = yes
9 > stream-v3 = yes
10 > EOF
10 > EOF
11 #endif
11 #endif
12
12
13 setup a repository for tests
13 setup a repository for tests
14 ----------------------------
14 ----------------------------
15
15
16 $ cat >> $HGRCPATH << EOF
16 $ cat >> $HGRCPATH << EOF
17 > [format]
17 > [format]
18 > # skip compression to make it easy to trigger a split
18 > # skip compression to make it easy to trigger a split
19 > revlog-compression=none
19 > revlog-compression=none
20 > EOF
20 > EOF
21
21
22 $ hg init server
22 $ hg init server
23 $ cd server
23 $ cd server
24 $ file="some-file"
24 $ file="some-file"
25 $ printf '%20d' '1' > $file
25 $ printf '%20d' '1' > $file
26 $ hg commit -Aqma
26 $ hg commit -Aqma
27 $ printf '%1024d' '1' > $file
27 $ printf '%1024d' '1' > $file
28 $ hg commit -Aqmb
28 $ hg commit -Aqmb
29 $ printf '%20d' '1' > $file
29 $ printf '%20d' '1' > $file
30 $ hg commit -Aqmc
30 $ hg commit -Aqmc
31
31
32 check the revlog is inline
32 check the revlog is inline
33
33
34 $ f -s .hg/store/data/some-file*
34 $ f -s .hg/store/data/some-file*
35 .hg/store/data/some-file.i: size=1259
35 .hg/store/data/some-file.i: size=1259
36 $ hg debug-revlog-index some-file
36 $ hg debug-revlog-index some-file
37 rev linkrev nodeid p1-nodeid p2-nodeid
37 rev linkrev nodeid p1-nodeid p2-nodeid
38 0 0 ed70cecbc103 000000000000 000000000000
38 0 0 ed70cecbc103 000000000000 000000000000
39 1 1 7241018db64c ed70cecbc103 000000000000
39 1 1 7241018db64c ed70cecbc103 000000000000
40 2 2 fa1120531cc1 7241018db64c 000000000000
40 2 2 fa1120531cc1 7241018db64c 000000000000
41 $ cd ..
41 $ cd ..
42
42
43 setup synchronisation file
43 setup synchronisation file
44
44
45 $ HG_TEST_STREAM_WALKED_FILE_1="$TESTTMP/sync_file_walked_1"
45 $ HG_TEST_STREAM_WALKED_FILE_1="$TESTTMP/sync_file_walked_1"
46 $ export HG_TEST_STREAM_WALKED_FILE_1
46 $ export HG_TEST_STREAM_WALKED_FILE_1
47 $ HG_TEST_STREAM_WALKED_FILE_2="$TESTTMP/sync_file_walked_2"
47 $ HG_TEST_STREAM_WALKED_FILE_2="$TESTTMP/sync_file_walked_2"
48 $ export HG_TEST_STREAM_WALKED_FILE_2
48 $ export HG_TEST_STREAM_WALKED_FILE_2
49 $ HG_TEST_STREAM_WALKED_FILE_3="$TESTTMP/sync_file_walked_3"
49 $ HG_TEST_STREAM_WALKED_FILE_3="$TESTTMP/sync_file_walked_3"
50 $ export HG_TEST_STREAM_WALKED_FILE_3
50 $ export HG_TEST_STREAM_WALKED_FILE_3
51
51
52
52
53 Test stream-clone raced by a revlog-split
53 Test stream-clone raced by a revlog-split
54 =========================================
54 =========================================
55
55
56 Test stream-clone where the file is split right after the lock section is done
56 Test stream-clone where the file is split right after the lock section is done
57
57
58 Start the server
58 Start the server
59
59
60 $ hg serve -R server \
60 $ hg serve -R server \
61 > -p $HGPORT1 -d --error errors.log --pid-file=hg.pid \
61 > -p $HGPORT1 -d --error errors.log --pid-file=hg.pid \
62 > --config extensions.stream_steps="$RUNTESTDIR/testlib/ext-stream-clone-steps.py"
62 > --config extensions.stream_steps="$RUNTESTDIR/testlib/ext-stream-clone-steps.py"
63 $ cat hg.pid >> $DAEMON_PIDS
63 $ cat hg.pid >> $DAEMON_PIDS
64
64
65 Start a client doing a streaming clone
65 Start a client doing a streaming clone
66
66
67 $ (hg clone -q --stream -U http://localhost:$HGPORT1 clone-while-split > client.log 2>&1; touch "$HG_TEST_STREAM_WALKED_FILE_3") &
67 $ (hg clone -q --stream -U http://localhost:$HGPORT1 clone-while-split > client.log 2>&1; touch "$HG_TEST_STREAM_WALKED_FILE_3") &
68
68
69 Wait for the server to be done collecting data
69 Wait for the server to be done collecting data
70
70
71 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_1
71 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_1
72
72
73 trigger a split
73 trigger a split
74
74
75 $ dd if=/dev/zero of=server/$file bs=1k count=128 > /dev/null 2>&1
75 $ dd if=/dev/zero of=server/$file bs=1k count=128 > /dev/null 2>&1
76 $ hg -R server ci -m "triggering a split" --config ui.timeout.warn=-1
76 $ hg -R server ci -m "triggering a split" --config ui.timeout.warn=-1
77
77
78 unlock the stream generation
78 unlock the stream generation
79
79
80 $ touch $HG_TEST_STREAM_WALKED_FILE_2
80 $ touch $HG_TEST_STREAM_WALKED_FILE_2
81
81
82 wait for the client to be done cloning.
82 wait for the client to be done cloning.
83
83
84 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_3
84 $ $RUNTESTDIR/testlib/wait-on-file 10 $HG_TEST_STREAM_WALKED_FILE_3
85
85
86 Check everything is fine
86 Check everything is fine
87
87
88 $ cat client.log
88 $ cat client.log
89 remote: abort: unexpected error: clone could only read 256 bytes from data/some-file.i, but expected 1259 bytes (known-bad-output !)
89 remote: abort: unexpected error: expected 0 bytes but 1067 provided for data/some-file.d (known-bad-output !)
90 abort: pull failed on remote (known-bad-output !)
90 abort: pull failed on remote (known-bad-output !)
91 $ tail -2 errors.log
91 $ tail -2 errors.log
92 mercurial.error.Abort: clone could only read 256 bytes from data/some-file.i, but expected 1259 bytes (known-bad-output !)
92 mercurial.error.Abort: expected 0 bytes but 1067 provided for data/some-file.d (known-bad-output !)
93 (known-bad-output !)
93 (known-bad-output !)
94 $ hg -R clone-while-split verify
94 $ hg -R clone-while-split verify
95 checking changesets (missing-correct-output !)
95 checking changesets (missing-correct-output !)
96 checking manifests (missing-correct-output !)
96 checking manifests (missing-correct-output !)
97 crosschecking files in changesets and manifests (missing-correct-output !)
97 crosschecking files in changesets and manifests (missing-correct-output !)
98 checking files (missing-correct-output !)
98 checking files (missing-correct-output !)
99 checking dirstate (missing-correct-output !)
99 checking dirstate (missing-correct-output !)
100 checked 3 changesets with 3 changes to 1 files (missing-correct-output !)
100 checked 3 changesets with 3 changes to 1 files (missing-correct-output !)
101 abort: repository clone-while-split not found (known-bad-output !)
101 abort: repository clone-while-split not found (known-bad-output !)
102 [255]
102 [255]
103 $ hg -R clone-while-split tip
103 $ hg -R clone-while-split tip
104 changeset: 2:dbd9854c38a6 (missing-correct-output !)
104 changeset: 2:dbd9854c38a6 (missing-correct-output !)
105 tag: tip (missing-correct-output !)
105 tag: tip (missing-correct-output !)
106 user: test (missing-correct-output !)
106 user: test (missing-correct-output !)
107 date: Thu Jan 01 00:00:00 1970 +0000 (missing-correct-output !)
107 date: Thu Jan 01 00:00:00 1970 +0000 (missing-correct-output !)
108 summary: c (missing-correct-output !)
108 summary: c (missing-correct-output !)
109 (missing-correct-output !)
109 (missing-correct-output !)
110 abort: repository clone-while-split not found (known-bad-output !)
110 abort: repository clone-while-split not found (known-bad-output !)
111 [255]
111 [255]
112 $ hg -R clone-while-split debug-revlog-index some-file
112 $ hg -R clone-while-split debug-revlog-index some-file
113 rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !)
113 rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !)
114 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !)
114 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !)
115 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !)
115 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !)
116 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !)
116 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !)
117 abort: repository clone-while-split not found (known-bad-output !)
117 abort: repository clone-while-split not found (known-bad-output !)
118 [255]
118 [255]
119
119
120 subsequent pull work
120 subsequent pull work
121
121
122 $ hg -R clone-while-split pull
122 $ hg -R clone-while-split pull
123 pulling from http://localhost:$HGPORT1/ (missing-correct-output !)
123 pulling from http://localhost:$HGPORT1/ (missing-correct-output !)
124 searching for changes (missing-correct-output !)
124 searching for changes (missing-correct-output !)
125 adding changesets (missing-correct-output !)
125 adding changesets (missing-correct-output !)
126 adding manifests (missing-correct-output !)
126 adding manifests (missing-correct-output !)
127 adding file changes (missing-correct-output !)
127 adding file changes (missing-correct-output !)
128 added 1 changesets with 1 changes to 1 files (missing-correct-output !)
128 added 1 changesets with 1 changes to 1 files (missing-correct-output !)
129 new changesets df05c6cb1406 (missing-correct-output !)
129 new changesets df05c6cb1406 (missing-correct-output !)
130 (run 'hg update' to get a working copy) (missing-correct-output !)
130 (run 'hg update' to get a working copy) (missing-correct-output !)
131 abort: repository clone-while-split not found (known-bad-output !)
131 abort: repository clone-while-split not found (known-bad-output !)
132 [255]
132 [255]
133
133
134 $ hg -R clone-while-split debug-revlog-index some-file
134 $ hg -R clone-while-split debug-revlog-index some-file
135 rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !)
135 rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !)
136 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !)
136 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !)
137 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !)
137 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !)
138 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !)
138 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !)
139 3 3 a631378adaa3 fa1120531cc1 000000000000 (missing-correct-output !)
139 3 3 a631378adaa3 fa1120531cc1 000000000000 (missing-correct-output !)
140 abort: repository clone-while-split not found (known-bad-output !)
140 abort: repository clone-while-split not found (known-bad-output !)
141 [255]
141 [255]
142 $ hg -R clone-while-split verify
142 $ hg -R clone-while-split verify
143 checking changesets (missing-correct-output !)
143 checking changesets (missing-correct-output !)
144 checking manifests (missing-correct-output !)
144 checking manifests (missing-correct-output !)
145 crosschecking files in changesets and manifests (missing-correct-output !)
145 crosschecking files in changesets and manifests (missing-correct-output !)
146 checking files (missing-correct-output !)
146 checking files (missing-correct-output !)
147 checking dirstate (missing-correct-output !)
147 checking dirstate (missing-correct-output !)
148 checked 4 changesets with 4 changes to 1 files (missing-correct-output !)
148 checked 4 changesets with 4 changes to 1 files (missing-correct-output !)
149 abort: repository clone-while-split not found (known-bad-output !)
149 abort: repository clone-while-split not found (known-bad-output !)
150 [255]
150 [255]
General Comments 0
You need to be logged in to leave comments. Login now