##// END OF EJS Templates
revlog: make `clearcaches()` signature consistent with ManifestRevlog...
Matt Harbison -
r52765:5e79783d default
parent child Browse files
Show More
@@ -1,2144 +1,2144
1 # repository.py - Interfaces and base classes for repositories and peers.
1 # repository.py - Interfaces and base classes for repositories and peers.
2 # coding: utf-8
2 # coding: utf-8
3 #
3 #
4 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import annotations
9 from __future__ import annotations
10
10
11 from ..i18n import _
11 from ..i18n import _
12 from .. import error
12 from .. import error
13 from . import util as interfaceutil
13 from . import util as interfaceutil
14
14
15 # Local repository feature string.
15 # Local repository feature string.
16
16
17 # Revlogs are being used for file storage.
17 # Revlogs are being used for file storage.
18 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
19 # The storage part of the repository is shared from an external source.
19 # The storage part of the repository is shared from an external source.
20 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
21 # LFS supported for backing file storage.
21 # LFS supported for backing file storage.
22 REPO_FEATURE_LFS = b'lfs'
22 REPO_FEATURE_LFS = b'lfs'
23 # Repository supports being stream cloned.
23 # Repository supports being stream cloned.
24 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 REPO_FEATURE_STREAM_CLONE = b'streamclone'
25 # Repository supports (at least) some sidedata to be stored
25 # Repository supports (at least) some sidedata to be stored
26 REPO_FEATURE_SIDE_DATA = b'side-data'
26 REPO_FEATURE_SIDE_DATA = b'side-data'
27 # Files storage may lack data for all ancestors.
27 # Files storage may lack data for all ancestors.
28 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
28 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
29
29
30 REVISION_FLAG_CENSORED = 1 << 15
30 REVISION_FLAG_CENSORED = 1 << 15
31 REVISION_FLAG_ELLIPSIS = 1 << 14
31 REVISION_FLAG_ELLIPSIS = 1 << 14
32 REVISION_FLAG_EXTSTORED = 1 << 13
32 REVISION_FLAG_EXTSTORED = 1 << 13
33 REVISION_FLAG_HASCOPIESINFO = 1 << 12
33 REVISION_FLAG_HASCOPIESINFO = 1 << 12
34
34
35 REVISION_FLAGS_KNOWN = (
35 REVISION_FLAGS_KNOWN = (
36 REVISION_FLAG_CENSORED
36 REVISION_FLAG_CENSORED
37 | REVISION_FLAG_ELLIPSIS
37 | REVISION_FLAG_ELLIPSIS
38 | REVISION_FLAG_EXTSTORED
38 | REVISION_FLAG_EXTSTORED
39 | REVISION_FLAG_HASCOPIESINFO
39 | REVISION_FLAG_HASCOPIESINFO
40 )
40 )
41
41
42 CG_DELTAMODE_STD = b'default'
42 CG_DELTAMODE_STD = b'default'
43 CG_DELTAMODE_PREV = b'previous'
43 CG_DELTAMODE_PREV = b'previous'
44 CG_DELTAMODE_FULL = b'fulltext'
44 CG_DELTAMODE_FULL = b'fulltext'
45 CG_DELTAMODE_P1 = b'p1'
45 CG_DELTAMODE_P1 = b'p1'
46
46
47
47
48 ## Cache related constants:
48 ## Cache related constants:
49 #
49 #
50 # Used to control which cache should be warmed in a repo.updatecaches(…) call.
50 # Used to control which cache should be warmed in a repo.updatecaches(…) call.
51
51
52 # Warm branchmaps of all known repoview's filter-level
52 # Warm branchmaps of all known repoview's filter-level
53 CACHE_BRANCHMAP_ALL = b"branchmap-all"
53 CACHE_BRANCHMAP_ALL = b"branchmap-all"
54 # Warm branchmaps of repoview's filter-level used by server
54 # Warm branchmaps of repoview's filter-level used by server
55 CACHE_BRANCHMAP_SERVED = b"branchmap-served"
55 CACHE_BRANCHMAP_SERVED = b"branchmap-served"
56 # Warm internal changelog cache (eg: persistent nodemap)
56 # Warm internal changelog cache (eg: persistent nodemap)
57 CACHE_CHANGELOG_CACHE = b"changelog-cache"
57 CACHE_CHANGELOG_CACHE = b"changelog-cache"
58 # check of a branchmap can use the "pure topo" mode
58 # check of a branchmap can use the "pure topo" mode
59 CACHE_BRANCHMAP_DETECT_PURE_TOPO = b"branchmap-detect-pure-topo"
59 CACHE_BRANCHMAP_DETECT_PURE_TOPO = b"branchmap-detect-pure-topo"
60 # Warm full manifest cache
60 # Warm full manifest cache
61 CACHE_FULL_MANIFEST = b"full-manifest"
61 CACHE_FULL_MANIFEST = b"full-manifest"
62 # Warm file-node-tags cache
62 # Warm file-node-tags cache
63 CACHE_FILE_NODE_TAGS = b"file-node-tags"
63 CACHE_FILE_NODE_TAGS = b"file-node-tags"
64 # Warm internal manifestlog cache (eg: persistent nodemap)
64 # Warm internal manifestlog cache (eg: persistent nodemap)
65 CACHE_MANIFESTLOG_CACHE = b"manifestlog-cache"
65 CACHE_MANIFESTLOG_CACHE = b"manifestlog-cache"
66 # Warn rev branch cache
66 # Warn rev branch cache
67 CACHE_REV_BRANCH = b"rev-branch-cache"
67 CACHE_REV_BRANCH = b"rev-branch-cache"
68 # Warm tags' cache for default repoview'
68 # Warm tags' cache for default repoview'
69 CACHE_TAGS_DEFAULT = b"tags-default"
69 CACHE_TAGS_DEFAULT = b"tags-default"
70 # Warm tags' cache for repoview's filter-level used by server
70 # Warm tags' cache for repoview's filter-level used by server
71 CACHE_TAGS_SERVED = b"tags-served"
71 CACHE_TAGS_SERVED = b"tags-served"
72
72
73 # the cache to warm by default after a simple transaction
73 # the cache to warm by default after a simple transaction
74 # (this is a mutable set to let extension update it)
74 # (this is a mutable set to let extension update it)
75 CACHES_DEFAULT = {
75 CACHES_DEFAULT = {
76 CACHE_BRANCHMAP_SERVED,
76 CACHE_BRANCHMAP_SERVED,
77 }
77 }
78
78
79 # the caches to warm when warming all of them
79 # the caches to warm when warming all of them
80 # (this is a mutable set to let extension update it)
80 # (this is a mutable set to let extension update it)
81 CACHES_ALL = {
81 CACHES_ALL = {
82 CACHE_BRANCHMAP_SERVED,
82 CACHE_BRANCHMAP_SERVED,
83 CACHE_BRANCHMAP_ALL,
83 CACHE_BRANCHMAP_ALL,
84 CACHE_BRANCHMAP_DETECT_PURE_TOPO,
84 CACHE_BRANCHMAP_DETECT_PURE_TOPO,
85 CACHE_CHANGELOG_CACHE,
85 CACHE_CHANGELOG_CACHE,
86 CACHE_FILE_NODE_TAGS,
86 CACHE_FILE_NODE_TAGS,
87 CACHE_FULL_MANIFEST,
87 CACHE_FULL_MANIFEST,
88 CACHE_MANIFESTLOG_CACHE,
88 CACHE_MANIFESTLOG_CACHE,
89 CACHE_TAGS_DEFAULT,
89 CACHE_TAGS_DEFAULT,
90 CACHE_TAGS_SERVED,
90 CACHE_TAGS_SERVED,
91 }
91 }
92
92
93 # the cache to warm by default on simple call
93 # the cache to warm by default on simple call
94 # (this is a mutable set to let extension update it)
94 # (this is a mutable set to let extension update it)
95 CACHES_POST_CLONE = CACHES_ALL.copy()
95 CACHES_POST_CLONE = CACHES_ALL.copy()
96 CACHES_POST_CLONE.discard(CACHE_FILE_NODE_TAGS)
96 CACHES_POST_CLONE.discard(CACHE_FILE_NODE_TAGS)
97
97
98
98
99 class ipeerconnection(interfaceutil.Interface):
99 class ipeerconnection(interfaceutil.Interface):
100 """Represents a "connection" to a repository.
100 """Represents a "connection" to a repository.
101
101
102 This is the base interface for representing a connection to a repository.
102 This is the base interface for representing a connection to a repository.
103 It holds basic properties and methods applicable to all peer types.
103 It holds basic properties and methods applicable to all peer types.
104
104
105 This is not a complete interface definition and should not be used
105 This is not a complete interface definition and should not be used
106 outside of this module.
106 outside of this module.
107 """
107 """
108
108
109 ui = interfaceutil.Attribute("""ui.ui instance""")
109 ui = interfaceutil.Attribute("""ui.ui instance""")
110 path = interfaceutil.Attribute("""a urlutil.path instance or None""")
110 path = interfaceutil.Attribute("""a urlutil.path instance or None""")
111
111
112 def url():
112 def url():
113 """Returns a URL string representing this peer.
113 """Returns a URL string representing this peer.
114
114
115 Currently, implementations expose the raw URL used to construct the
115 Currently, implementations expose the raw URL used to construct the
116 instance. It may contain credentials as part of the URL. The
116 instance. It may contain credentials as part of the URL. The
117 expectations of the value aren't well-defined and this could lead to
117 expectations of the value aren't well-defined and this could lead to
118 data leakage.
118 data leakage.
119
119
120 TODO audit/clean consumers and more clearly define the contents of this
120 TODO audit/clean consumers and more clearly define the contents of this
121 value.
121 value.
122 """
122 """
123
123
124 def local():
124 def local():
125 """Returns a local repository instance.
125 """Returns a local repository instance.
126
126
127 If the peer represents a local repository, returns an object that
127 If the peer represents a local repository, returns an object that
128 can be used to interface with it. Otherwise returns ``None``.
128 can be used to interface with it. Otherwise returns ``None``.
129 """
129 """
130
130
131 def canpush():
131 def canpush():
132 """Returns a boolean indicating if this peer can be pushed to."""
132 """Returns a boolean indicating if this peer can be pushed to."""
133
133
134 def close():
134 def close():
135 """Close the connection to this peer.
135 """Close the connection to this peer.
136
136
137 This is called when the peer will no longer be used. Resources
137 This is called when the peer will no longer be used. Resources
138 associated with the peer should be cleaned up.
138 associated with the peer should be cleaned up.
139 """
139 """
140
140
141
141
142 class ipeercapabilities(interfaceutil.Interface):
142 class ipeercapabilities(interfaceutil.Interface):
143 """Peer sub-interface related to capabilities."""
143 """Peer sub-interface related to capabilities."""
144
144
145 def capable(name):
145 def capable(name):
146 """Determine support for a named capability.
146 """Determine support for a named capability.
147
147
148 Returns ``False`` if capability not supported.
148 Returns ``False`` if capability not supported.
149
149
150 Returns ``True`` if boolean capability is supported. Returns a string
150 Returns ``True`` if boolean capability is supported. Returns a string
151 if capability support is non-boolean.
151 if capability support is non-boolean.
152
152
153 Capability strings may or may not map to wire protocol capabilities.
153 Capability strings may or may not map to wire protocol capabilities.
154 """
154 """
155
155
156 def requirecap(name, purpose):
156 def requirecap(name, purpose):
157 """Require a capability to be present.
157 """Require a capability to be present.
158
158
159 Raises a ``CapabilityError`` if the capability isn't present.
159 Raises a ``CapabilityError`` if the capability isn't present.
160 """
160 """
161
161
162
162
163 class ipeercommands(interfaceutil.Interface):
163 class ipeercommands(interfaceutil.Interface):
164 """Client-side interface for communicating over the wire protocol.
164 """Client-side interface for communicating over the wire protocol.
165
165
166 This interface is used as a gateway to the Mercurial wire protocol.
166 This interface is used as a gateway to the Mercurial wire protocol.
167 methods commonly call wire protocol commands of the same name.
167 methods commonly call wire protocol commands of the same name.
168 """
168 """
169
169
170 def branchmap():
170 def branchmap():
171 """Obtain heads in named branches.
171 """Obtain heads in named branches.
172
172
173 Returns a dict mapping branch name to an iterable of nodes that are
173 Returns a dict mapping branch name to an iterable of nodes that are
174 heads on that branch.
174 heads on that branch.
175 """
175 """
176
176
177 def capabilities():
177 def capabilities():
178 """Obtain capabilities of the peer.
178 """Obtain capabilities of the peer.
179
179
180 Returns a set of string capabilities.
180 Returns a set of string capabilities.
181 """
181 """
182
182
183 def get_cached_bundle_inline(path):
183 def get_cached_bundle_inline(path):
184 """Retrieve a clonebundle across the wire.
184 """Retrieve a clonebundle across the wire.
185
185
186 Returns a chunkbuffer
186 Returns a chunkbuffer
187 """
187 """
188
188
189 def clonebundles():
189 def clonebundles():
190 """Obtains the clone bundles manifest for the repo.
190 """Obtains the clone bundles manifest for the repo.
191
191
192 Returns the manifest as unparsed bytes.
192 Returns the manifest as unparsed bytes.
193 """
193 """
194
194
195 def debugwireargs(one, two, three=None, four=None, five=None):
195 def debugwireargs(one, two, three=None, four=None, five=None):
196 """Used to facilitate debugging of arguments passed over the wire."""
196 """Used to facilitate debugging of arguments passed over the wire."""
197
197
198 def getbundle(source, **kwargs):
198 def getbundle(source, **kwargs):
199 """Obtain remote repository data as a bundle.
199 """Obtain remote repository data as a bundle.
200
200
201 This command is how the bulk of repository data is transferred from
201 This command is how the bulk of repository data is transferred from
202 the peer to the local repository
202 the peer to the local repository
203
203
204 Returns a generator of bundle data.
204 Returns a generator of bundle data.
205 """
205 """
206
206
207 def heads():
207 def heads():
208 """Determine all known head revisions in the peer.
208 """Determine all known head revisions in the peer.
209
209
210 Returns an iterable of binary nodes.
210 Returns an iterable of binary nodes.
211 """
211 """
212
212
213 def known(nodes):
213 def known(nodes):
214 """Determine whether multiple nodes are known.
214 """Determine whether multiple nodes are known.
215
215
216 Accepts an iterable of nodes whose presence to check for.
216 Accepts an iterable of nodes whose presence to check for.
217
217
218 Returns an iterable of booleans indicating of the corresponding node
218 Returns an iterable of booleans indicating of the corresponding node
219 at that index is known to the peer.
219 at that index is known to the peer.
220 """
220 """
221
221
222 def listkeys(namespace):
222 def listkeys(namespace):
223 """Obtain all keys in a pushkey namespace.
223 """Obtain all keys in a pushkey namespace.
224
224
225 Returns an iterable of key names.
225 Returns an iterable of key names.
226 """
226 """
227
227
228 def lookup(key):
228 def lookup(key):
229 """Resolve a value to a known revision.
229 """Resolve a value to a known revision.
230
230
231 Returns a binary node of the resolved revision on success.
231 Returns a binary node of the resolved revision on success.
232 """
232 """
233
233
234 def pushkey(namespace, key, old, new):
234 def pushkey(namespace, key, old, new):
235 """Set a value using the ``pushkey`` protocol.
235 """Set a value using the ``pushkey`` protocol.
236
236
237 Arguments correspond to the pushkey namespace and key to operate on and
237 Arguments correspond to the pushkey namespace and key to operate on and
238 the old and new values for that key.
238 the old and new values for that key.
239
239
240 Returns a string with the peer result. The value inside varies by the
240 Returns a string with the peer result. The value inside varies by the
241 namespace.
241 namespace.
242 """
242 """
243
243
244 def stream_out():
244 def stream_out():
245 """Obtain streaming clone data.
245 """Obtain streaming clone data.
246
246
247 Successful result should be a generator of data chunks.
247 Successful result should be a generator of data chunks.
248 """
248 """
249
249
250 def unbundle(bundle, heads, url):
250 def unbundle(bundle, heads, url):
251 """Transfer repository data to the peer.
251 """Transfer repository data to the peer.
252
252
253 This is how the bulk of data during a push is transferred.
253 This is how the bulk of data during a push is transferred.
254
254
255 Returns the integer number of heads added to the peer.
255 Returns the integer number of heads added to the peer.
256 """
256 """
257
257
258
258
259 class ipeerlegacycommands(interfaceutil.Interface):
259 class ipeerlegacycommands(interfaceutil.Interface):
260 """Interface for implementing support for legacy wire protocol commands.
260 """Interface for implementing support for legacy wire protocol commands.
261
261
262 Wire protocol commands transition to legacy status when they are no longer
262 Wire protocol commands transition to legacy status when they are no longer
263 used by modern clients. To facilitate identifying which commands are
263 used by modern clients. To facilitate identifying which commands are
264 legacy, the interfaces are split.
264 legacy, the interfaces are split.
265 """
265 """
266
266
267 def between(pairs):
267 def between(pairs):
268 """Obtain nodes between pairs of nodes.
268 """Obtain nodes between pairs of nodes.
269
269
270 ``pairs`` is an iterable of node pairs.
270 ``pairs`` is an iterable of node pairs.
271
271
272 Returns an iterable of iterables of nodes corresponding to each
272 Returns an iterable of iterables of nodes corresponding to each
273 requested pair.
273 requested pair.
274 """
274 """
275
275
276 def branches(nodes):
276 def branches(nodes):
277 """Obtain ancestor changesets of specific nodes back to a branch point.
277 """Obtain ancestor changesets of specific nodes back to a branch point.
278
278
279 For each requested node, the peer finds the first ancestor node that is
279 For each requested node, the peer finds the first ancestor node that is
280 a DAG root or is a merge.
280 a DAG root or is a merge.
281
281
282 Returns an iterable of iterables with the resolved values for each node.
282 Returns an iterable of iterables with the resolved values for each node.
283 """
283 """
284
284
285 def changegroup(nodes, source):
285 def changegroup(nodes, source):
286 """Obtain a changegroup with data for descendants of specified nodes."""
286 """Obtain a changegroup with data for descendants of specified nodes."""
287
287
288 def changegroupsubset(bases, heads, source):
288 def changegroupsubset(bases, heads, source):
289 pass
289 pass
290
290
291
291
292 class ipeercommandexecutor(interfaceutil.Interface):
292 class ipeercommandexecutor(interfaceutil.Interface):
293 """Represents a mechanism to execute remote commands.
293 """Represents a mechanism to execute remote commands.
294
294
295 This is the primary interface for requesting that wire protocol commands
295 This is the primary interface for requesting that wire protocol commands
296 be executed. Instances of this interface are active in a context manager
296 be executed. Instances of this interface are active in a context manager
297 and have a well-defined lifetime. When the context manager exits, all
297 and have a well-defined lifetime. When the context manager exits, all
298 outstanding requests are waited on.
298 outstanding requests are waited on.
299 """
299 """
300
300
301 def callcommand(name, args):
301 def callcommand(name, args):
302 """Request that a named command be executed.
302 """Request that a named command be executed.
303
303
304 Receives the command name and a dictionary of command arguments.
304 Receives the command name and a dictionary of command arguments.
305
305
306 Returns a ``concurrent.futures.Future`` that will resolve to the
306 Returns a ``concurrent.futures.Future`` that will resolve to the
307 result of that command request. That exact value is left up to
307 result of that command request. That exact value is left up to
308 the implementation and possibly varies by command.
308 the implementation and possibly varies by command.
309
309
310 Not all commands can coexist with other commands in an executor
310 Not all commands can coexist with other commands in an executor
311 instance: it depends on the underlying wire protocol transport being
311 instance: it depends on the underlying wire protocol transport being
312 used and the command itself.
312 used and the command itself.
313
313
314 Implementations MAY call ``sendcommands()`` automatically if the
314 Implementations MAY call ``sendcommands()`` automatically if the
315 requested command can not coexist with other commands in this executor.
315 requested command can not coexist with other commands in this executor.
316
316
317 Implementations MAY call ``sendcommands()`` automatically when the
317 Implementations MAY call ``sendcommands()`` automatically when the
318 future's ``result()`` is called. So, consumers using multiple
318 future's ``result()`` is called. So, consumers using multiple
319 commands with an executor MUST ensure that ``result()`` is not called
319 commands with an executor MUST ensure that ``result()`` is not called
320 until all command requests have been issued.
320 until all command requests have been issued.
321 """
321 """
322
322
323 def sendcommands():
323 def sendcommands():
324 """Trigger submission of queued command requests.
324 """Trigger submission of queued command requests.
325
325
326 Not all transports submit commands as soon as they are requested to
326 Not all transports submit commands as soon as they are requested to
327 run. When called, this method forces queued command requests to be
327 run. When called, this method forces queued command requests to be
328 issued. It will no-op if all commands have already been sent.
328 issued. It will no-op if all commands have already been sent.
329
329
330 When called, no more new commands may be issued with this executor.
330 When called, no more new commands may be issued with this executor.
331 """
331 """
332
332
333 def close():
333 def close():
334 """Signal that this command request is finished.
334 """Signal that this command request is finished.
335
335
336 When called, no more new commands may be issued. All outstanding
336 When called, no more new commands may be issued. All outstanding
337 commands that have previously been issued are waited on before
337 commands that have previously been issued are waited on before
338 returning. This not only includes waiting for the futures to resolve,
338 returning. This not only includes waiting for the futures to resolve,
339 but also waiting for all response data to arrive. In other words,
339 but also waiting for all response data to arrive. In other words,
340 calling this waits for all on-wire state for issued command requests
340 calling this waits for all on-wire state for issued command requests
341 to finish.
341 to finish.
342
342
343 When used as a context manager, this method is called when exiting the
343 When used as a context manager, this method is called when exiting the
344 context manager.
344 context manager.
345
345
346 This method may call ``sendcommands()`` if there are buffered commands.
346 This method may call ``sendcommands()`` if there are buffered commands.
347 """
347 """
348
348
349
349
350 class ipeerrequests(interfaceutil.Interface):
350 class ipeerrequests(interfaceutil.Interface):
351 """Interface for executing commands on a peer."""
351 """Interface for executing commands on a peer."""
352
352
353 limitedarguments = interfaceutil.Attribute(
353 limitedarguments = interfaceutil.Attribute(
354 """True if the peer cannot receive large argument value for commands."""
354 """True if the peer cannot receive large argument value for commands."""
355 )
355 )
356
356
357 def commandexecutor():
357 def commandexecutor():
358 """A context manager that resolves to an ipeercommandexecutor.
358 """A context manager that resolves to an ipeercommandexecutor.
359
359
360 The object this resolves to can be used to issue command requests
360 The object this resolves to can be used to issue command requests
361 to the peer.
361 to the peer.
362
362
363 Callers should call its ``callcommand`` method to issue command
363 Callers should call its ``callcommand`` method to issue command
364 requests.
364 requests.
365
365
366 A new executor should be obtained for each distinct set of commands
366 A new executor should be obtained for each distinct set of commands
367 (possibly just a single command) that the consumer wants to execute
367 (possibly just a single command) that the consumer wants to execute
368 as part of a single operation or round trip. This is because some
368 as part of a single operation or round trip. This is because some
369 peers are half-duplex and/or don't support persistent connections.
369 peers are half-duplex and/or don't support persistent connections.
370 e.g. in the case of HTTP peers, commands sent to an executor represent
370 e.g. in the case of HTTP peers, commands sent to an executor represent
371 a single HTTP request. While some peers may support multiple command
371 a single HTTP request. While some peers may support multiple command
372 sends over the wire per executor, consumers need to code to the least
372 sends over the wire per executor, consumers need to code to the least
373 capable peer. So it should be assumed that command executors buffer
373 capable peer. So it should be assumed that command executors buffer
374 called commands until they are told to send them and that each
374 called commands until they are told to send them and that each
375 command executor could result in a new connection or wire-level request
375 command executor could result in a new connection or wire-level request
376 being issued.
376 being issued.
377 """
377 """
378
378
379
379
380 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
380 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
381 """Unified interface for peer repositories.
381 """Unified interface for peer repositories.
382
382
383 All peer instances must conform to this interface.
383 All peer instances must conform to this interface.
384 """
384 """
385
385
386
386
387 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
387 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
388 """Unified peer interface for wire protocol version 2 peers."""
388 """Unified peer interface for wire protocol version 2 peers."""
389
389
390 apidescriptor = interfaceutil.Attribute(
390 apidescriptor = interfaceutil.Attribute(
391 """Data structure holding description of server API."""
391 """Data structure holding description of server API."""
392 )
392 )
393
393
394
394
395 @interfaceutil.implementer(ipeerbase)
395 @interfaceutil.implementer(ipeerbase)
396 class peer:
396 class peer:
397 """Base class for peer repositories."""
397 """Base class for peer repositories."""
398
398
399 limitedarguments = False
399 limitedarguments = False
400
400
401 def __init__(self, ui, path=None, remotehidden=False):
401 def __init__(self, ui, path=None, remotehidden=False):
402 self.ui = ui
402 self.ui = ui
403 self.path = path
403 self.path = path
404
404
405 def capable(self, name):
405 def capable(self, name):
406 caps = self.capabilities()
406 caps = self.capabilities()
407 if name in caps:
407 if name in caps:
408 return True
408 return True
409
409
410 name = b'%s=' % name
410 name = b'%s=' % name
411 for cap in caps:
411 for cap in caps:
412 if cap.startswith(name):
412 if cap.startswith(name):
413 return cap[len(name) :]
413 return cap[len(name) :]
414
414
415 return False
415 return False
416
416
417 def requirecap(self, name, purpose):
417 def requirecap(self, name, purpose):
418 if self.capable(name):
418 if self.capable(name):
419 return
419 return
420
420
421 raise error.CapabilityError(
421 raise error.CapabilityError(
422 _(
422 _(
423 b'cannot %s; remote repository does not support the '
423 b'cannot %s; remote repository does not support the '
424 b'\'%s\' capability'
424 b'\'%s\' capability'
425 )
425 )
426 % (purpose, name)
426 % (purpose, name)
427 )
427 )
428
428
429
429
430 class iverifyproblem(interfaceutil.Interface):
430 class iverifyproblem(interfaceutil.Interface):
431 """Represents a problem with the integrity of the repository.
431 """Represents a problem with the integrity of the repository.
432
432
433 Instances of this interface are emitted to describe an integrity issue
433 Instances of this interface are emitted to describe an integrity issue
434 with a repository (e.g. corrupt storage, missing data, etc).
434 with a repository (e.g. corrupt storage, missing data, etc).
435
435
436 Instances are essentially messages associated with severity.
436 Instances are essentially messages associated with severity.
437 """
437 """
438
438
439 warning = interfaceutil.Attribute(
439 warning = interfaceutil.Attribute(
440 """Message indicating a non-fatal problem."""
440 """Message indicating a non-fatal problem."""
441 )
441 )
442
442
443 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
443 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
444
444
445 node = interfaceutil.Attribute(
445 node = interfaceutil.Attribute(
446 """Revision encountering the problem.
446 """Revision encountering the problem.
447
447
448 ``None`` means the problem doesn't apply to a single revision.
448 ``None`` means the problem doesn't apply to a single revision.
449 """
449 """
450 )
450 )
451
451
452
452
453 class irevisiondelta(interfaceutil.Interface):
453 class irevisiondelta(interfaceutil.Interface):
454 """Represents a delta between one revision and another.
454 """Represents a delta between one revision and another.
455
455
456 Instances convey enough information to allow a revision to be exchanged
456 Instances convey enough information to allow a revision to be exchanged
457 with another repository.
457 with another repository.
458
458
459 Instances represent the fulltext revision data or a delta against
459 Instances represent the fulltext revision data or a delta against
460 another revision. Therefore the ``revision`` and ``delta`` attributes
460 another revision. Therefore the ``revision`` and ``delta`` attributes
461 are mutually exclusive.
461 are mutually exclusive.
462
462
463 Typically used for changegroup generation.
463 Typically used for changegroup generation.
464 """
464 """
465
465
466 node = interfaceutil.Attribute("""20 byte node of this revision.""")
466 node = interfaceutil.Attribute("""20 byte node of this revision.""")
467
467
468 p1node = interfaceutil.Attribute(
468 p1node = interfaceutil.Attribute(
469 """20 byte node of 1st parent of this revision."""
469 """20 byte node of 1st parent of this revision."""
470 )
470 )
471
471
472 p2node = interfaceutil.Attribute(
472 p2node = interfaceutil.Attribute(
473 """20 byte node of 2nd parent of this revision."""
473 """20 byte node of 2nd parent of this revision."""
474 )
474 )
475
475
476 linknode = interfaceutil.Attribute(
476 linknode = interfaceutil.Attribute(
477 """20 byte node of the changelog revision this node is linked to."""
477 """20 byte node of the changelog revision this node is linked to."""
478 )
478 )
479
479
480 flags = interfaceutil.Attribute(
480 flags = interfaceutil.Attribute(
481 """2 bytes of integer flags that apply to this revision.
481 """2 bytes of integer flags that apply to this revision.
482
482
483 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
483 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
484 """
484 """
485 )
485 )
486
486
487 basenode = interfaceutil.Attribute(
487 basenode = interfaceutil.Attribute(
488 """20 byte node of the revision this data is a delta against.
488 """20 byte node of the revision this data is a delta against.
489
489
490 ``nullid`` indicates that the revision is a full revision and not
490 ``nullid`` indicates that the revision is a full revision and not
491 a delta.
491 a delta.
492 """
492 """
493 )
493 )
494
494
495 baserevisionsize = interfaceutil.Attribute(
495 baserevisionsize = interfaceutil.Attribute(
496 """Size of base revision this delta is against.
496 """Size of base revision this delta is against.
497
497
498 May be ``None`` if ``basenode`` is ``nullid``.
498 May be ``None`` if ``basenode`` is ``nullid``.
499 """
499 """
500 )
500 )
501
501
502 revision = interfaceutil.Attribute(
502 revision = interfaceutil.Attribute(
503 """Raw fulltext of revision data for this node."""
503 """Raw fulltext of revision data for this node."""
504 )
504 )
505
505
506 delta = interfaceutil.Attribute(
506 delta = interfaceutil.Attribute(
507 """Delta between ``basenode`` and ``node``.
507 """Delta between ``basenode`` and ``node``.
508
508
509 Stored in the bdiff delta format.
509 Stored in the bdiff delta format.
510 """
510 """
511 )
511 )
512
512
513 sidedata = interfaceutil.Attribute(
513 sidedata = interfaceutil.Attribute(
514 """Raw sidedata bytes for the given revision."""
514 """Raw sidedata bytes for the given revision."""
515 )
515 )
516
516
517 protocol_flags = interfaceutil.Attribute(
517 protocol_flags = interfaceutil.Attribute(
518 """Single byte of integer flags that can influence the protocol.
518 """Single byte of integer flags that can influence the protocol.
519
519
520 This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
520 This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
521 """
521 """
522 )
522 )
523
523
524
524
525 class ifilerevisionssequence(interfaceutil.Interface):
525 class ifilerevisionssequence(interfaceutil.Interface):
526 """Contains index data for all revisions of a file.
526 """Contains index data for all revisions of a file.
527
527
528 Types implementing this behave like lists of tuples. The index
528 Types implementing this behave like lists of tuples. The index
529 in the list corresponds to the revision number. The values contain
529 in the list corresponds to the revision number. The values contain
530 index metadata.
530 index metadata.
531
531
532 The *null* revision (revision number -1) is always the last item
532 The *null* revision (revision number -1) is always the last item
533 in the index.
533 in the index.
534 """
534 """
535
535
536 def __len__():
536 def __len__():
537 """The total number of revisions."""
537 """The total number of revisions."""
538
538
539 def __getitem__(rev):
539 def __getitem__(rev):
540 """Returns the object having a specific revision number.
540 """Returns the object having a specific revision number.
541
541
542 Returns an 8-tuple with the following fields:
542 Returns an 8-tuple with the following fields:
543
543
544 offset+flags
544 offset+flags
545 Contains the offset and flags for the revision. 64-bit unsigned
545 Contains the offset and flags for the revision. 64-bit unsigned
546 integer where first 6 bytes are the offset and the next 2 bytes
546 integer where first 6 bytes are the offset and the next 2 bytes
547 are flags. The offset can be 0 if it is not used by the store.
547 are flags. The offset can be 0 if it is not used by the store.
548 compressed size
548 compressed size
549 Size of the revision data in the store. It can be 0 if it isn't
549 Size of the revision data in the store. It can be 0 if it isn't
550 needed by the store.
550 needed by the store.
551 uncompressed size
551 uncompressed size
552 Fulltext size. It can be 0 if it isn't needed by the store.
552 Fulltext size. It can be 0 if it isn't needed by the store.
553 base revision
553 base revision
554 Revision number of revision the delta for storage is encoded
554 Revision number of revision the delta for storage is encoded
555 against. -1 indicates not encoded against a base revision.
555 against. -1 indicates not encoded against a base revision.
556 link revision
556 link revision
557 Revision number of changelog revision this entry is related to.
557 Revision number of changelog revision this entry is related to.
558 p1 revision
558 p1 revision
559 Revision number of 1st parent. -1 if no 1st parent.
559 Revision number of 1st parent. -1 if no 1st parent.
560 p2 revision
560 p2 revision
561 Revision number of 2nd parent. -1 if no 1st parent.
561 Revision number of 2nd parent. -1 if no 1st parent.
562 node
562 node
563 Binary node value for this revision number.
563 Binary node value for this revision number.
564
564
565 Negative values should index off the end of the sequence. ``-1``
565 Negative values should index off the end of the sequence. ``-1``
566 should return the null revision. ``-2`` should return the most
566 should return the null revision. ``-2`` should return the most
567 recent revision.
567 recent revision.
568 """
568 """
569
569
570 def __contains__(rev):
570 def __contains__(rev):
571 """Whether a revision number exists."""
571 """Whether a revision number exists."""
572
572
573 def insert(self, i, entry):
573 def insert(self, i, entry):
574 """Add an item to the index at specific revision."""
574 """Add an item to the index at specific revision."""
575
575
576
576
577 class ifileindex(interfaceutil.Interface):
577 class ifileindex(interfaceutil.Interface):
578 """Storage interface for index data of a single file.
578 """Storage interface for index data of a single file.
579
579
580 File storage data is divided into index metadata and data storage.
580 File storage data is divided into index metadata and data storage.
581 This interface defines the index portion of the interface.
581 This interface defines the index portion of the interface.
582
582
583 The index logically consists of:
583 The index logically consists of:
584
584
585 * A mapping between revision numbers and nodes.
585 * A mapping between revision numbers and nodes.
586 * DAG data (storing and querying the relationship between nodes).
586 * DAG data (storing and querying the relationship between nodes).
587 * Metadata to facilitate storage.
587 * Metadata to facilitate storage.
588 """
588 """
589
589
590 nullid = interfaceutil.Attribute(
590 nullid = interfaceutil.Attribute(
591 """node for the null revision for use as delta base."""
591 """node for the null revision for use as delta base."""
592 )
592 )
593
593
594 def __len__():
594 def __len__():
595 """Obtain the number of revisions stored for this file."""
595 """Obtain the number of revisions stored for this file."""
596
596
597 def __iter__():
597 def __iter__():
598 """Iterate over revision numbers for this file."""
598 """Iterate over revision numbers for this file."""
599
599
600 def hasnode(node):
600 def hasnode(node):
601 """Returns a bool indicating if a node is known to this store.
601 """Returns a bool indicating if a node is known to this store.
602
602
603 Implementations must only return True for full, binary node values:
603 Implementations must only return True for full, binary node values:
604 hex nodes, revision numbers, and partial node matches must be
604 hex nodes, revision numbers, and partial node matches must be
605 rejected.
605 rejected.
606
606
607 The null node is never present.
607 The null node is never present.
608 """
608 """
609
609
610 def revs(start=0, stop=None):
610 def revs(start=0, stop=None):
611 """Iterate over revision numbers for this file, with control."""
611 """Iterate over revision numbers for this file, with control."""
612
612
613 def parents(node):
613 def parents(node):
614 """Returns a 2-tuple of parent nodes for a revision.
614 """Returns a 2-tuple of parent nodes for a revision.
615
615
616 Values will be ``nullid`` if the parent is empty.
616 Values will be ``nullid`` if the parent is empty.
617 """
617 """
618
618
619 def parentrevs(rev):
619 def parentrevs(rev):
620 """Like parents() but operates on revision numbers."""
620 """Like parents() but operates on revision numbers."""
621
621
622 def rev(node):
622 def rev(node):
623 """Obtain the revision number given a node.
623 """Obtain the revision number given a node.
624
624
625 Raises ``error.LookupError`` if the node is not known.
625 Raises ``error.LookupError`` if the node is not known.
626 """
626 """
627
627
628 def node(rev):
628 def node(rev):
629 """Obtain the node value given a revision number.
629 """Obtain the node value given a revision number.
630
630
631 Raises ``IndexError`` if the node is not known.
631 Raises ``IndexError`` if the node is not known.
632 """
632 """
633
633
634 def lookup(node):
634 def lookup(node):
635 """Attempt to resolve a value to a node.
635 """Attempt to resolve a value to a node.
636
636
637 Value can be a binary node, hex node, revision number, or a string
637 Value can be a binary node, hex node, revision number, or a string
638 that can be converted to an integer.
638 that can be converted to an integer.
639
639
640 Raises ``error.LookupError`` if a node could not be resolved.
640 Raises ``error.LookupError`` if a node could not be resolved.
641 """
641 """
642
642
643 def linkrev(rev):
643 def linkrev(rev):
644 """Obtain the changeset revision number a revision is linked to."""
644 """Obtain the changeset revision number a revision is linked to."""
645
645
646 def iscensored(rev):
646 def iscensored(rev):
647 """Return whether a revision's content has been censored."""
647 """Return whether a revision's content has been censored."""
648
648
649 def commonancestorsheads(node1, node2):
649 def commonancestorsheads(node1, node2):
650 """Obtain an iterable of nodes containing heads of common ancestors.
650 """Obtain an iterable of nodes containing heads of common ancestors.
651
651
652 See ``ancestor.commonancestorsheads()``.
652 See ``ancestor.commonancestorsheads()``.
653 """
653 """
654
654
655 def descendants(revs):
655 def descendants(revs):
656 """Obtain descendant revision numbers for a set of revision numbers.
656 """Obtain descendant revision numbers for a set of revision numbers.
657
657
658 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
658 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
659 """
659 """
660
660
661 def heads(start=None, stop=None):
661 def heads(start=None, stop=None):
662 """Obtain a list of nodes that are DAG heads, with control.
662 """Obtain a list of nodes that are DAG heads, with control.
663
663
664 The set of revisions examined can be limited by specifying
664 The set of revisions examined can be limited by specifying
665 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
665 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
666 iterable of nodes. DAG traversal starts at earlier revision
666 iterable of nodes. DAG traversal starts at earlier revision
667 ``start`` and iterates forward until any node in ``stop`` is
667 ``start`` and iterates forward until any node in ``stop`` is
668 encountered.
668 encountered.
669 """
669 """
670
670
671 def children(node):
671 def children(node):
672 """Obtain nodes that are children of a node.
672 """Obtain nodes that are children of a node.
673
673
674 Returns a list of nodes.
674 Returns a list of nodes.
675 """
675 """
676
676
677
677
678 class ifiledata(interfaceutil.Interface):
678 class ifiledata(interfaceutil.Interface):
679 """Storage interface for data storage of a specific file.
679 """Storage interface for data storage of a specific file.
680
680
681 This complements ``ifileindex`` and provides an interface for accessing
681 This complements ``ifileindex`` and provides an interface for accessing
682 data for a tracked file.
682 data for a tracked file.
683 """
683 """
684
684
685 def size(rev):
685 def size(rev):
686 """Obtain the fulltext size of file data.
686 """Obtain the fulltext size of file data.
687
687
688 Any metadata is excluded from size measurements.
688 Any metadata is excluded from size measurements.
689 """
689 """
690
690
691 def revision(node):
691 def revision(node):
692 """Obtain fulltext data for a node.
692 """Obtain fulltext data for a node.
693
693
694 By default, any storage transformations are applied before the data
694 By default, any storage transformations are applied before the data
695 is returned. If ``raw`` is True, non-raw storage transformations
695 is returned. If ``raw`` is True, non-raw storage transformations
696 are not applied.
696 are not applied.
697
697
698 The fulltext data may contain a header containing metadata. Most
698 The fulltext data may contain a header containing metadata. Most
699 consumers should use ``read()`` to obtain the actual file data.
699 consumers should use ``read()`` to obtain the actual file data.
700 """
700 """
701
701
702 def rawdata(node):
702 def rawdata(node):
703 """Obtain raw data for a node."""
703 """Obtain raw data for a node."""
704
704
705 def read(node):
705 def read(node):
706 """Resolve file fulltext data.
706 """Resolve file fulltext data.
707
707
708 This is similar to ``revision()`` except any metadata in the data
708 This is similar to ``revision()`` except any metadata in the data
709 headers is stripped.
709 headers is stripped.
710 """
710 """
711
711
712 def renamed(node):
712 def renamed(node):
713 """Obtain copy metadata for a node.
713 """Obtain copy metadata for a node.
714
714
715 Returns ``False`` if no copy metadata is stored or a 2-tuple of
715 Returns ``False`` if no copy metadata is stored or a 2-tuple of
716 (path, node) from which this revision was copied.
716 (path, node) from which this revision was copied.
717 """
717 """
718
718
719 def cmp(node, fulltext):
719 def cmp(node, fulltext):
720 """Compare fulltext to another revision.
720 """Compare fulltext to another revision.
721
721
722 Returns True if the fulltext is different from what is stored.
722 Returns True if the fulltext is different from what is stored.
723
723
724 This takes copy metadata into account.
724 This takes copy metadata into account.
725
725
726 TODO better document the copy metadata and censoring logic.
726 TODO better document the copy metadata and censoring logic.
727 """
727 """
728
728
729 def emitrevisions(
729 def emitrevisions(
730 nodes,
730 nodes,
731 nodesorder=None,
731 nodesorder=None,
732 revisiondata=False,
732 revisiondata=False,
733 assumehaveparentrevisions=False,
733 assumehaveparentrevisions=False,
734 deltamode=CG_DELTAMODE_STD,
734 deltamode=CG_DELTAMODE_STD,
735 ):
735 ):
736 """Produce ``irevisiondelta`` for revisions.
736 """Produce ``irevisiondelta`` for revisions.
737
737
738 Given an iterable of nodes, emits objects conforming to the
738 Given an iterable of nodes, emits objects conforming to the
739 ``irevisiondelta`` interface that describe revisions in storage.
739 ``irevisiondelta`` interface that describe revisions in storage.
740
740
741 This method is a generator.
741 This method is a generator.
742
742
743 The input nodes may be unordered. Implementations must ensure that a
743 The input nodes may be unordered. Implementations must ensure that a
744 node's parents are emitted before the node itself. Transitively, this
744 node's parents are emitted before the node itself. Transitively, this
745 means that a node may only be emitted once all its ancestors in
745 means that a node may only be emitted once all its ancestors in
746 ``nodes`` have also been emitted.
746 ``nodes`` have also been emitted.
747
747
748 By default, emits "index" data (the ``node``, ``p1node``, and
748 By default, emits "index" data (the ``node``, ``p1node``, and
749 ``p2node`` attributes). If ``revisiondata`` is set, revision data
749 ``p2node`` attributes). If ``revisiondata`` is set, revision data
750 will also be present on the emitted objects.
750 will also be present on the emitted objects.
751
751
752 With default argument values, implementations can choose to emit
752 With default argument values, implementations can choose to emit
753 either fulltext revision data or a delta. When emitting deltas,
753 either fulltext revision data or a delta. When emitting deltas,
754 implementations must consider whether the delta's base revision
754 implementations must consider whether the delta's base revision
755 fulltext is available to the receiver.
755 fulltext is available to the receiver.
756
756
757 The base revision fulltext is guaranteed to be available if any of
757 The base revision fulltext is guaranteed to be available if any of
758 the following are met:
758 the following are met:
759
759
760 * Its fulltext revision was emitted by this method call.
760 * Its fulltext revision was emitted by this method call.
761 * A delta for that revision was emitted by this method call.
761 * A delta for that revision was emitted by this method call.
762 * ``assumehaveparentrevisions`` is True and the base revision is a
762 * ``assumehaveparentrevisions`` is True and the base revision is a
763 parent of the node.
763 parent of the node.
764
764
765 ``nodesorder`` can be used to control the order that revisions are
765 ``nodesorder`` can be used to control the order that revisions are
766 emitted. By default, revisions can be reordered as long as they are
766 emitted. By default, revisions can be reordered as long as they are
767 in DAG topological order (see above). If the value is ``nodes``,
767 in DAG topological order (see above). If the value is ``nodes``,
768 the iteration order from ``nodes`` should be used. If the value is
768 the iteration order from ``nodes`` should be used. If the value is
769 ``storage``, then the native order from the backing storage layer
769 ``storage``, then the native order from the backing storage layer
770 is used. (Not all storage layers will have strong ordering and behavior
770 is used. (Not all storage layers will have strong ordering and behavior
771 of this mode is storage-dependent.) ``nodes`` ordering can force
771 of this mode is storage-dependent.) ``nodes`` ordering can force
772 revisions to be emitted before their ancestors, so consumers should
772 revisions to be emitted before their ancestors, so consumers should
773 use it with care.
773 use it with care.
774
774
775 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
775 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
776 be set and it is the caller's responsibility to resolve it, if needed.
776 be set and it is the caller's responsibility to resolve it, if needed.
777
777
778 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
778 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
779 all revision data should be emitted as deltas against the revision
779 all revision data should be emitted as deltas against the revision
780 emitted just prior. The initial revision should be a delta against its
780 emitted just prior. The initial revision should be a delta against its
781 1st parent.
781 1st parent.
782 """
782 """
783
783
784
784
785 class ifilemutation(interfaceutil.Interface):
785 class ifilemutation(interfaceutil.Interface):
786 """Storage interface for mutation events of a tracked file."""
786 """Storage interface for mutation events of a tracked file."""
787
787
788 def add(filedata, meta, transaction, linkrev, p1, p2):
788 def add(filedata, meta, transaction, linkrev, p1, p2):
789 """Add a new revision to the store.
789 """Add a new revision to the store.
790
790
791 Takes file data, dictionary of metadata, a transaction, linkrev,
791 Takes file data, dictionary of metadata, a transaction, linkrev,
792 and parent nodes.
792 and parent nodes.
793
793
794 Returns the node that was added.
794 Returns the node that was added.
795
795
796 May no-op if a revision matching the supplied data is already stored.
796 May no-op if a revision matching the supplied data is already stored.
797 """
797 """
798
798
799 def addrevision(
799 def addrevision(
800 revisiondata,
800 revisiondata,
801 transaction,
801 transaction,
802 linkrev,
802 linkrev,
803 p1,
803 p1,
804 p2,
804 p2,
805 node=None,
805 node=None,
806 flags=0,
806 flags=0,
807 cachedelta=None,
807 cachedelta=None,
808 ):
808 ):
809 """Add a new revision to the store and return its number.
809 """Add a new revision to the store and return its number.
810
810
811 This is similar to ``add()`` except it operates at a lower level.
811 This is similar to ``add()`` except it operates at a lower level.
812
812
813 The data passed in already contains a metadata header, if any.
813 The data passed in already contains a metadata header, if any.
814
814
815 ``node`` and ``flags`` can be used to define the expected node and
815 ``node`` and ``flags`` can be used to define the expected node and
816 the flags to use with storage. ``flags`` is a bitwise value composed
816 the flags to use with storage. ``flags`` is a bitwise value composed
817 of the various ``REVISION_FLAG_*`` constants.
817 of the various ``REVISION_FLAG_*`` constants.
818
818
819 ``add()`` is usually called when adding files from e.g. the working
819 ``add()`` is usually called when adding files from e.g. the working
820 directory. ``addrevision()`` is often called by ``add()`` and for
820 directory. ``addrevision()`` is often called by ``add()`` and for
821 scenarios where revision data has already been computed, such as when
821 scenarios where revision data has already been computed, such as when
822 applying raw data from a peer repo.
822 applying raw data from a peer repo.
823 """
823 """
824
824
825 def addgroup(
825 def addgroup(
826 deltas,
826 deltas,
827 linkmapper,
827 linkmapper,
828 transaction,
828 transaction,
829 addrevisioncb=None,
829 addrevisioncb=None,
830 duplicaterevisioncb=None,
830 duplicaterevisioncb=None,
831 maybemissingparents=False,
831 maybemissingparents=False,
832 ):
832 ):
833 """Process a series of deltas for storage.
833 """Process a series of deltas for storage.
834
834
835 ``deltas`` is an iterable of 7-tuples of
835 ``deltas`` is an iterable of 7-tuples of
836 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
836 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
837 to add.
837 to add.
838
838
839 The ``delta`` field contains ``mpatch`` data to apply to a base
839 The ``delta`` field contains ``mpatch`` data to apply to a base
840 revision, identified by ``deltabase``. The base node can be
840 revision, identified by ``deltabase``. The base node can be
841 ``nullid``, in which case the header from the delta can be ignored
841 ``nullid``, in which case the header from the delta can be ignored
842 and the delta used as the fulltext.
842 and the delta used as the fulltext.
843
843
844 ``alwayscache`` instructs the lower layers to cache the content of the
844 ``alwayscache`` instructs the lower layers to cache the content of the
845 newly added revision, even if it needs to be explicitly computed.
845 newly added revision, even if it needs to be explicitly computed.
846 This used to be the default when ``addrevisioncb`` was provided up to
846 This used to be the default when ``addrevisioncb`` was provided up to
847 Mercurial 5.8.
847 Mercurial 5.8.
848
848
849 ``addrevisioncb`` should be called for each new rev as it is committed.
849 ``addrevisioncb`` should be called for each new rev as it is committed.
850 ``duplicaterevisioncb`` should be called for all revs with a
850 ``duplicaterevisioncb`` should be called for all revs with a
851 pre-existing node.
851 pre-existing node.
852
852
853 ``maybemissingparents`` is a bool indicating whether the incoming
853 ``maybemissingparents`` is a bool indicating whether the incoming
854 data may reference parents/ancestor revisions that aren't present.
854 data may reference parents/ancestor revisions that aren't present.
855 This flag is set when receiving data into a "shallow" store that
855 This flag is set when receiving data into a "shallow" store that
856 doesn't hold all history.
856 doesn't hold all history.
857
857
858 Returns a list of nodes that were processed. A node will be in the list
858 Returns a list of nodes that were processed. A node will be in the list
859 even if it existed in the store previously.
859 even if it existed in the store previously.
860 """
860 """
861
861
862 def censorrevision(tr, node, tombstone=b''):
862 def censorrevision(tr, node, tombstone=b''):
863 """Remove the content of a single revision.
863 """Remove the content of a single revision.
864
864
865 The specified ``node`` will have its content purged from storage.
865 The specified ``node`` will have its content purged from storage.
866 Future attempts to access the revision data for this node will
866 Future attempts to access the revision data for this node will
867 result in failure.
867 result in failure.
868
868
869 A ``tombstone`` message can optionally be stored. This message may be
869 A ``tombstone`` message can optionally be stored. This message may be
870 displayed to users when they attempt to access the missing revision
870 displayed to users when they attempt to access the missing revision
871 data.
871 data.
872
872
873 Storage backends may have stored deltas against the previous content
873 Storage backends may have stored deltas against the previous content
874 in this revision. As part of censoring a revision, these storage
874 in this revision. As part of censoring a revision, these storage
875 backends are expected to rewrite any internally stored deltas such
875 backends are expected to rewrite any internally stored deltas such
876 that they no longer reference the deleted content.
876 that they no longer reference the deleted content.
877 """
877 """
878
878
879 def getstrippoint(minlink):
879 def getstrippoint(minlink):
880 """Find the minimum revision that must be stripped to strip a linkrev.
880 """Find the minimum revision that must be stripped to strip a linkrev.
881
881
882 Returns a 2-tuple containing the minimum revision number and a set
882 Returns a 2-tuple containing the minimum revision number and a set
883 of all revisions numbers that would be broken by this strip.
883 of all revisions numbers that would be broken by this strip.
884
884
885 TODO this is highly revlog centric and should be abstracted into
885 TODO this is highly revlog centric and should be abstracted into
886 a higher-level deletion API. ``repair.strip()`` relies on this.
886 a higher-level deletion API. ``repair.strip()`` relies on this.
887 """
887 """
888
888
889 def strip(minlink, transaction):
889 def strip(minlink, transaction):
890 """Remove storage of items starting at a linkrev.
890 """Remove storage of items starting at a linkrev.
891
891
892 This uses ``getstrippoint()`` to determine the first node to remove.
892 This uses ``getstrippoint()`` to determine the first node to remove.
893 Then it effectively truncates storage for all revisions after that.
893 Then it effectively truncates storage for all revisions after that.
894
894
895 TODO this is highly revlog centric and should be abstracted into a
895 TODO this is highly revlog centric and should be abstracted into a
896 higher-level deletion API.
896 higher-level deletion API.
897 """
897 """
898
898
899
899
900 class ifilestorage(ifileindex, ifiledata, ifilemutation):
900 class ifilestorage(ifileindex, ifiledata, ifilemutation):
901 """Complete storage interface for a single tracked file."""
901 """Complete storage interface for a single tracked file."""
902
902
903 def files():
903 def files():
904 """Obtain paths that are backing storage for this file.
904 """Obtain paths that are backing storage for this file.
905
905
906 TODO this is used heavily by verify code and there should probably
906 TODO this is used heavily by verify code and there should probably
907 be a better API for that.
907 be a better API for that.
908 """
908 """
909
909
910 def storageinfo(
910 def storageinfo(
911 exclusivefiles=False,
911 exclusivefiles=False,
912 sharedfiles=False,
912 sharedfiles=False,
913 revisionscount=False,
913 revisionscount=False,
914 trackedsize=False,
914 trackedsize=False,
915 storedsize=False,
915 storedsize=False,
916 ):
916 ):
917 """Obtain information about storage for this file's data.
917 """Obtain information about storage for this file's data.
918
918
919 Returns a dict describing storage for this tracked path. The keys
919 Returns a dict describing storage for this tracked path. The keys
920 in the dict map to arguments of the same. The arguments are bools
920 in the dict map to arguments of the same. The arguments are bools
921 indicating whether to calculate and obtain that data.
921 indicating whether to calculate and obtain that data.
922
922
923 exclusivefiles
923 exclusivefiles
924 Iterable of (vfs, path) describing files that are exclusively
924 Iterable of (vfs, path) describing files that are exclusively
925 used to back storage for this tracked path.
925 used to back storage for this tracked path.
926
926
927 sharedfiles
927 sharedfiles
928 Iterable of (vfs, path) describing files that are used to back
928 Iterable of (vfs, path) describing files that are used to back
929 storage for this tracked path. Those files may also provide storage
929 storage for this tracked path. Those files may also provide storage
930 for other stored entities.
930 for other stored entities.
931
931
932 revisionscount
932 revisionscount
933 Number of revisions available for retrieval.
933 Number of revisions available for retrieval.
934
934
935 trackedsize
935 trackedsize
936 Total size in bytes of all tracked revisions. This is a sum of the
936 Total size in bytes of all tracked revisions. This is a sum of the
937 length of the fulltext of all revisions.
937 length of the fulltext of all revisions.
938
938
939 storedsize
939 storedsize
940 Total size in bytes used to store data for all tracked revisions.
940 Total size in bytes used to store data for all tracked revisions.
941 This is commonly less than ``trackedsize`` due to internal usage
941 This is commonly less than ``trackedsize`` due to internal usage
942 of deltas rather than fulltext revisions.
942 of deltas rather than fulltext revisions.
943
943
944 Not all storage backends may support all queries are have a reasonable
944 Not all storage backends may support all queries are have a reasonable
945 value to use. In that case, the value should be set to ``None`` and
945 value to use. In that case, the value should be set to ``None`` and
946 callers are expected to handle this special value.
946 callers are expected to handle this special value.
947 """
947 """
948
948
949 def verifyintegrity(state):
949 def verifyintegrity(state):
950 """Verifies the integrity of file storage.
950 """Verifies the integrity of file storage.
951
951
952 ``state`` is a dict holding state of the verifier process. It can be
952 ``state`` is a dict holding state of the verifier process. It can be
953 used to communicate data between invocations of multiple storage
953 used to communicate data between invocations of multiple storage
954 primitives.
954 primitives.
955
955
956 If individual revisions cannot have their revision content resolved,
956 If individual revisions cannot have their revision content resolved,
957 the method is expected to set the ``skipread`` key to a set of nodes
957 the method is expected to set the ``skipread`` key to a set of nodes
958 that encountered problems. If set, the method can also add the node(s)
958 that encountered problems. If set, the method can also add the node(s)
959 to ``safe_renamed`` in order to indicate nodes that may perform the
959 to ``safe_renamed`` in order to indicate nodes that may perform the
960 rename checks with currently accessible data.
960 rename checks with currently accessible data.
961
961
962 The method yields objects conforming to the ``iverifyproblem``
962 The method yields objects conforming to the ``iverifyproblem``
963 interface.
963 interface.
964 """
964 """
965
965
966
966
967 class idirs(interfaceutil.Interface):
967 class idirs(interfaceutil.Interface):
968 """Interface representing a collection of directories from paths.
968 """Interface representing a collection of directories from paths.
969
969
970 This interface is essentially a derived data structure representing
970 This interface is essentially a derived data structure representing
971 directories from a collection of paths.
971 directories from a collection of paths.
972 """
972 """
973
973
974 def addpath(path):
974 def addpath(path):
975 """Add a path to the collection.
975 """Add a path to the collection.
976
976
977 All directories in the path will be added to the collection.
977 All directories in the path will be added to the collection.
978 """
978 """
979
979
980 def delpath(path):
980 def delpath(path):
981 """Remove a path from the collection.
981 """Remove a path from the collection.
982
982
983 If the removal was the last path in a particular directory, the
983 If the removal was the last path in a particular directory, the
984 directory is removed from the collection.
984 directory is removed from the collection.
985 """
985 """
986
986
987 def __iter__():
987 def __iter__():
988 """Iterate over the directories in this collection of paths."""
988 """Iterate over the directories in this collection of paths."""
989
989
990 def __contains__(path):
990 def __contains__(path):
991 """Whether a specific directory is in this collection."""
991 """Whether a specific directory is in this collection."""
992
992
993
993
994 class imanifestdict(interfaceutil.Interface):
994 class imanifestdict(interfaceutil.Interface):
995 """Interface representing a manifest data structure.
995 """Interface representing a manifest data structure.
996
996
997 A manifest is effectively a dict mapping paths to entries. Each entry
997 A manifest is effectively a dict mapping paths to entries. Each entry
998 consists of a binary node and extra flags affecting that entry.
998 consists of a binary node and extra flags affecting that entry.
999 """
999 """
1000
1000
1001 def __getitem__(path):
1001 def __getitem__(path):
1002 """Returns the binary node value for a path in the manifest.
1002 """Returns the binary node value for a path in the manifest.
1003
1003
1004 Raises ``KeyError`` if the path does not exist in the manifest.
1004 Raises ``KeyError`` if the path does not exist in the manifest.
1005
1005
1006 Equivalent to ``self.find(path)[0]``.
1006 Equivalent to ``self.find(path)[0]``.
1007 """
1007 """
1008
1008
1009 def find(path):
1009 def find(path):
1010 """Returns the entry for a path in the manifest.
1010 """Returns the entry for a path in the manifest.
1011
1011
1012 Returns a 2-tuple of (node, flags).
1012 Returns a 2-tuple of (node, flags).
1013
1013
1014 Raises ``KeyError`` if the path does not exist in the manifest.
1014 Raises ``KeyError`` if the path does not exist in the manifest.
1015 """
1015 """
1016
1016
1017 def __len__():
1017 def __len__():
1018 """Return the number of entries in the manifest."""
1018 """Return the number of entries in the manifest."""
1019
1019
1020 def __nonzero__():
1020 def __nonzero__():
1021 """Returns True if the manifest has entries, False otherwise."""
1021 """Returns True if the manifest has entries, False otherwise."""
1022
1022
1023 __bool__ = __nonzero__
1023 __bool__ = __nonzero__
1024
1024
1025 def set(path, node, flags):
1025 def set(path, node, flags):
1026 """Define the node value and flags for a path in the manifest.
1026 """Define the node value and flags for a path in the manifest.
1027
1027
1028 Equivalent to __setitem__ followed by setflag, but can be more efficient.
1028 Equivalent to __setitem__ followed by setflag, but can be more efficient.
1029 """
1029 """
1030
1030
1031 def __setitem__(path, node):
1031 def __setitem__(path, node):
1032 """Define the node value for a path in the manifest.
1032 """Define the node value for a path in the manifest.
1033
1033
1034 If the path is already in the manifest, its flags will be copied to
1034 If the path is already in the manifest, its flags will be copied to
1035 the new entry.
1035 the new entry.
1036 """
1036 """
1037
1037
1038 def __contains__(path):
1038 def __contains__(path):
1039 """Whether a path exists in the manifest."""
1039 """Whether a path exists in the manifest."""
1040
1040
1041 def __delitem__(path):
1041 def __delitem__(path):
1042 """Remove a path from the manifest.
1042 """Remove a path from the manifest.
1043
1043
1044 Raises ``KeyError`` if the path is not in the manifest.
1044 Raises ``KeyError`` if the path is not in the manifest.
1045 """
1045 """
1046
1046
1047 def __iter__():
1047 def __iter__():
1048 """Iterate over paths in the manifest."""
1048 """Iterate over paths in the manifest."""
1049
1049
1050 def iterkeys():
1050 def iterkeys():
1051 """Iterate over paths in the manifest."""
1051 """Iterate over paths in the manifest."""
1052
1052
1053 def keys():
1053 def keys():
1054 """Obtain a list of paths in the manifest."""
1054 """Obtain a list of paths in the manifest."""
1055
1055
1056 def filesnotin(other, match=None):
1056 def filesnotin(other, match=None):
1057 """Obtain the set of paths in this manifest but not in another.
1057 """Obtain the set of paths in this manifest but not in another.
1058
1058
1059 ``match`` is an optional matcher function to be applied to both
1059 ``match`` is an optional matcher function to be applied to both
1060 manifests.
1060 manifests.
1061
1061
1062 Returns a set of paths.
1062 Returns a set of paths.
1063 """
1063 """
1064
1064
1065 def dirs():
1065 def dirs():
1066 """Returns an object implementing the ``idirs`` interface."""
1066 """Returns an object implementing the ``idirs`` interface."""
1067
1067
1068 def hasdir(dir):
1068 def hasdir(dir):
1069 """Returns a bool indicating if a directory is in this manifest."""
1069 """Returns a bool indicating if a directory is in this manifest."""
1070
1070
1071 def walk(match):
1071 def walk(match):
1072 """Generator of paths in manifest satisfying a matcher.
1072 """Generator of paths in manifest satisfying a matcher.
1073
1073
1074 If the matcher has explicit files listed and they don't exist in
1074 If the matcher has explicit files listed and they don't exist in
1075 the manifest, ``match.bad()`` is called for each missing file.
1075 the manifest, ``match.bad()`` is called for each missing file.
1076 """
1076 """
1077
1077
1078 def diff(other, match=None, clean=False):
1078 def diff(other, match=None, clean=False):
1079 """Find differences between this manifest and another.
1079 """Find differences between this manifest and another.
1080
1080
1081 This manifest is compared to ``other``.
1081 This manifest is compared to ``other``.
1082
1082
1083 If ``match`` is provided, the two manifests are filtered against this
1083 If ``match`` is provided, the two manifests are filtered against this
1084 matcher and only entries satisfying the matcher are compared.
1084 matcher and only entries satisfying the matcher are compared.
1085
1085
1086 If ``clean`` is True, unchanged files are included in the returned
1086 If ``clean`` is True, unchanged files are included in the returned
1087 object.
1087 object.
1088
1088
1089 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1089 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1090 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1090 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1091 represents the node and flags for this manifest and ``(node2, flag2)``
1091 represents the node and flags for this manifest and ``(node2, flag2)``
1092 are the same for the other manifest.
1092 are the same for the other manifest.
1093 """
1093 """
1094
1094
1095 def setflag(path, flag):
1095 def setflag(path, flag):
1096 """Set the flag value for a given path.
1096 """Set the flag value for a given path.
1097
1097
1098 Raises ``KeyError`` if the path is not already in the manifest.
1098 Raises ``KeyError`` if the path is not already in the manifest.
1099 """
1099 """
1100
1100
1101 def get(path, default=None):
1101 def get(path, default=None):
1102 """Obtain the node value for a path or a default value if missing."""
1102 """Obtain the node value for a path or a default value if missing."""
1103
1103
1104 def flags(path):
1104 def flags(path):
1105 """Return the flags value for a path (default: empty bytestring)."""
1105 """Return the flags value for a path (default: empty bytestring)."""
1106
1106
1107 def copy():
1107 def copy():
1108 """Return a copy of this manifest."""
1108 """Return a copy of this manifest."""
1109
1109
1110 def items():
1110 def items():
1111 """Returns an iterable of (path, node) for items in this manifest."""
1111 """Returns an iterable of (path, node) for items in this manifest."""
1112
1112
1113 def iteritems():
1113 def iteritems():
1114 """Identical to items()."""
1114 """Identical to items()."""
1115
1115
1116 def iterentries():
1116 def iterentries():
1117 """Returns an iterable of (path, node, flags) for this manifest.
1117 """Returns an iterable of (path, node, flags) for this manifest.
1118
1118
1119 Similar to ``iteritems()`` except items are a 3-tuple and include
1119 Similar to ``iteritems()`` except items are a 3-tuple and include
1120 flags.
1120 flags.
1121 """
1121 """
1122
1122
1123 def text():
1123 def text():
1124 """Obtain the raw data representation for this manifest.
1124 """Obtain the raw data representation for this manifest.
1125
1125
1126 Result is used to create a manifest revision.
1126 Result is used to create a manifest revision.
1127 """
1127 """
1128
1128
1129 def fastdelta(base, changes):
1129 def fastdelta(base, changes):
1130 """Obtain a delta between this manifest and another given changes.
1130 """Obtain a delta between this manifest and another given changes.
1131
1131
1132 ``base`` in the raw data representation for another manifest.
1132 ``base`` in the raw data representation for another manifest.
1133
1133
1134 ``changes`` is an iterable of ``(path, to_delete)``.
1134 ``changes`` is an iterable of ``(path, to_delete)``.
1135
1135
1136 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1136 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1137 delta between ``base`` and this manifest.
1137 delta between ``base`` and this manifest.
1138
1138
1139 If this manifest implementation can't support ``fastdelta()``,
1139 If this manifest implementation can't support ``fastdelta()``,
1140 raise ``mercurial.manifest.FastdeltaUnavailable``.
1140 raise ``mercurial.manifest.FastdeltaUnavailable``.
1141 """
1141 """
1142
1142
1143
1143
1144 class imanifestrevisionbase(interfaceutil.Interface):
1144 class imanifestrevisionbase(interfaceutil.Interface):
1145 """Base interface representing a single revision of a manifest.
1145 """Base interface representing a single revision of a manifest.
1146
1146
1147 Should not be used as a primary interface: should always be inherited
1147 Should not be used as a primary interface: should always be inherited
1148 as part of a larger interface.
1148 as part of a larger interface.
1149 """
1149 """
1150
1150
1151 def copy():
1151 def copy():
1152 """Obtain a copy of this manifest instance.
1152 """Obtain a copy of this manifest instance.
1153
1153
1154 Returns an object conforming to the ``imanifestrevisionwritable``
1154 Returns an object conforming to the ``imanifestrevisionwritable``
1155 interface. The instance will be associated with the same
1155 interface. The instance will be associated with the same
1156 ``imanifestlog`` collection as this instance.
1156 ``imanifestlog`` collection as this instance.
1157 """
1157 """
1158
1158
1159 def read():
1159 def read():
1160 """Obtain the parsed manifest data structure.
1160 """Obtain the parsed manifest data structure.
1161
1161
1162 The returned object conforms to the ``imanifestdict`` interface.
1162 The returned object conforms to the ``imanifestdict`` interface.
1163 """
1163 """
1164
1164
1165
1165
1166 class imanifestrevisionstored(imanifestrevisionbase):
1166 class imanifestrevisionstored(imanifestrevisionbase):
1167 """Interface representing a manifest revision committed to storage."""
1167 """Interface representing a manifest revision committed to storage."""
1168
1168
1169 def node():
1169 def node():
1170 """The binary node for this manifest."""
1170 """The binary node for this manifest."""
1171
1171
1172 parents = interfaceutil.Attribute(
1172 parents = interfaceutil.Attribute(
1173 """List of binary nodes that are parents for this manifest revision."""
1173 """List of binary nodes that are parents for this manifest revision."""
1174 )
1174 )
1175
1175
1176 def readdelta(shallow=False):
1176 def readdelta(shallow=False):
1177 """Obtain the manifest data structure representing changes from parent.
1177 """Obtain the manifest data structure representing changes from parent.
1178
1178
1179 This manifest is compared to its 1st parent. A new manifest
1179 This manifest is compared to its 1st parent. A new manifest
1180 representing those differences is constructed.
1180 representing those differences is constructed.
1181
1181
1182 If `shallow` is True, this will read the delta for this directory,
1182 If `shallow` is True, this will read the delta for this directory,
1183 without recursively reading subdirectory manifests. Instead, any
1183 without recursively reading subdirectory manifests. Instead, any
1184 subdirectory entry will be reported as it appears in the manifest, i.e.
1184 subdirectory entry will be reported as it appears in the manifest, i.e.
1185 the subdirectory will be reported among files and distinguished only by
1185 the subdirectory will be reported among files and distinguished only by
1186 its 't' flag. This only apply if the underlying manifest support it.
1186 its 't' flag. This only apply if the underlying manifest support it.
1187
1187
1188 The returned object conforms to the ``imanifestdict`` interface.
1188 The returned object conforms to the ``imanifestdict`` interface.
1189 """
1189 """
1190
1190
1191 def read_any_fast_delta(valid_bases=None, *, shallow=False):
1191 def read_any_fast_delta(valid_bases=None, *, shallow=False):
1192 """read some manifest information as fast if possible
1192 """read some manifest information as fast if possible
1193
1193
1194 This might return a "delta", a manifest object containing only file
1194 This might return a "delta", a manifest object containing only file
1195 changed compared to another revisions. The `valid_bases` argument
1195 changed compared to another revisions. The `valid_bases` argument
1196 control the set of revision that might be used as a base.
1196 control the set of revision that might be used as a base.
1197
1197
1198 If no delta can be retrieved quickly, a full read of the manifest will
1198 If no delta can be retrieved quickly, a full read of the manifest will
1199 be performed instead.
1199 be performed instead.
1200
1200
1201 The function return a tuple with two elements. The first one is the
1201 The function return a tuple with two elements. The first one is the
1202 delta base used (or None if we did a full read), the second one is the
1202 delta base used (or None if we did a full read), the second one is the
1203 manifest information.
1203 manifest information.
1204
1204
1205 If `shallow` is True, this will read the delta for this directory,
1205 If `shallow` is True, this will read the delta for this directory,
1206 without recursively reading subdirectory manifests. Instead, any
1206 without recursively reading subdirectory manifests. Instead, any
1207 subdirectory entry will be reported as it appears in the manifest, i.e.
1207 subdirectory entry will be reported as it appears in the manifest, i.e.
1208 the subdirectory will be reported among files and distinguished only by
1208 the subdirectory will be reported among files and distinguished only by
1209 its 't' flag. This only apply if the underlying manifest support it.
1209 its 't' flag. This only apply if the underlying manifest support it.
1210
1210
1211 The returned object conforms to the ``imanifestdict`` interface.
1211 The returned object conforms to the ``imanifestdict`` interface.
1212 """
1212 """
1213
1213
1214 def read_delta_parents(*, shallow=False, exact=True):
1214 def read_delta_parents(*, shallow=False, exact=True):
1215 """return a diff from this revision against both parents.
1215 """return a diff from this revision against both parents.
1216
1216
1217 If `exact` is False, this might return a superset of the diff, containing
1217 If `exact` is False, this might return a superset of the diff, containing
1218 files that are actually present as is in one of the parents.
1218 files that are actually present as is in one of the parents.
1219
1219
1220 If `shallow` is True, this will read the delta for this directory,
1220 If `shallow` is True, this will read the delta for this directory,
1221 without recursively reading subdirectory manifests. Instead, any
1221 without recursively reading subdirectory manifests. Instead, any
1222 subdirectory entry will be reported as it appears in the manifest, i.e.
1222 subdirectory entry will be reported as it appears in the manifest, i.e.
1223 the subdirectory will be reported among files and distinguished only by
1223 the subdirectory will be reported among files and distinguished only by
1224 its 't' flag. This only apply if the underlying manifest support it.
1224 its 't' flag. This only apply if the underlying manifest support it.
1225
1225
1226 The returned object conforms to the ``imanifestdict`` interface."""
1226 The returned object conforms to the ``imanifestdict`` interface."""
1227
1227
1228 def read_delta_new_entries(*, shallow=False):
1228 def read_delta_new_entries(*, shallow=False):
1229 """Return a manifest containing just the entries that might be new to
1229 """Return a manifest containing just the entries that might be new to
1230 the repository.
1230 the repository.
1231
1231
1232 This is often equivalent to a diff against both parents, but without
1232 This is often equivalent to a diff against both parents, but without
1233 garantee. For performance reason, It might contains more files in some cases.
1233 garantee. For performance reason, It might contains more files in some cases.
1234
1234
1235 If `shallow` is True, this will read the delta for this directory,
1235 If `shallow` is True, this will read the delta for this directory,
1236 without recursively reading subdirectory manifests. Instead, any
1236 without recursively reading subdirectory manifests. Instead, any
1237 subdirectory entry will be reported as it appears in the manifest, i.e.
1237 subdirectory entry will be reported as it appears in the manifest, i.e.
1238 the subdirectory will be reported among files and distinguished only by
1238 the subdirectory will be reported among files and distinguished only by
1239 its 't' flag. This only apply if the underlying manifest support it.
1239 its 't' flag. This only apply if the underlying manifest support it.
1240
1240
1241 The returned object conforms to the ``imanifestdict`` interface."""
1241 The returned object conforms to the ``imanifestdict`` interface."""
1242
1242
1243 def readfast(shallow=False):
1243 def readfast(shallow=False):
1244 """Calls either ``read()`` or ``readdelta()``.
1244 """Calls either ``read()`` or ``readdelta()``.
1245
1245
1246 The faster of the two options is called.
1246 The faster of the two options is called.
1247 """
1247 """
1248
1248
1249 def find(key):
1249 def find(key):
1250 """Calls self.read().find(key)``.
1250 """Calls self.read().find(key)``.
1251
1251
1252 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1252 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1253 """
1253 """
1254
1254
1255
1255
1256 class imanifestrevisionwritable(imanifestrevisionbase):
1256 class imanifestrevisionwritable(imanifestrevisionbase):
1257 """Interface representing a manifest revision that can be committed."""
1257 """Interface representing a manifest revision that can be committed."""
1258
1258
1259 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1259 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1260 """Add this revision to storage.
1260 """Add this revision to storage.
1261
1261
1262 Takes a transaction object, the changeset revision number it will
1262 Takes a transaction object, the changeset revision number it will
1263 be associated with, its parent nodes, and lists of added and
1263 be associated with, its parent nodes, and lists of added and
1264 removed paths.
1264 removed paths.
1265
1265
1266 If match is provided, storage can choose not to inspect or write out
1266 If match is provided, storage can choose not to inspect or write out
1267 items that do not match. Storage is still required to be able to provide
1267 items that do not match. Storage is still required to be able to provide
1268 the full manifest in the future for any directories written (these
1268 the full manifest in the future for any directories written (these
1269 manifests should not be "narrowed on disk").
1269 manifests should not be "narrowed on disk").
1270
1270
1271 Returns the binary node of the created revision.
1271 Returns the binary node of the created revision.
1272 """
1272 """
1273
1273
1274
1274
1275 class imanifeststorage(interfaceutil.Interface):
1275 class imanifeststorage(interfaceutil.Interface):
1276 """Storage interface for manifest data."""
1276 """Storage interface for manifest data."""
1277
1277
1278 nodeconstants = interfaceutil.Attribute(
1278 nodeconstants = interfaceutil.Attribute(
1279 """nodeconstants used by the current repository."""
1279 """nodeconstants used by the current repository."""
1280 )
1280 )
1281
1281
1282 tree = interfaceutil.Attribute(
1282 tree = interfaceutil.Attribute(
1283 """The path to the directory this manifest tracks.
1283 """The path to the directory this manifest tracks.
1284
1284
1285 The empty bytestring represents the root manifest.
1285 The empty bytestring represents the root manifest.
1286 """
1286 """
1287 )
1287 )
1288
1288
1289 index = interfaceutil.Attribute(
1289 index = interfaceutil.Attribute(
1290 """An ``ifilerevisionssequence`` instance."""
1290 """An ``ifilerevisionssequence`` instance."""
1291 )
1291 )
1292
1292
1293 opener = interfaceutil.Attribute(
1293 opener = interfaceutil.Attribute(
1294 """VFS opener to use to access underlying files used for storage.
1294 """VFS opener to use to access underlying files used for storage.
1295
1295
1296 TODO this is revlog specific and should not be exposed.
1296 TODO this is revlog specific and should not be exposed.
1297 """
1297 """
1298 )
1298 )
1299
1299
1300 fulltextcache = interfaceutil.Attribute(
1300 fulltextcache = interfaceutil.Attribute(
1301 """Dict with cache of fulltexts.
1301 """Dict with cache of fulltexts.
1302
1302
1303 TODO this doesn't feel appropriate for the storage interface.
1303 TODO this doesn't feel appropriate for the storage interface.
1304 """
1304 """
1305 )
1305 )
1306
1306
1307 def __len__():
1307 def __len__():
1308 """Obtain the number of revisions stored for this manifest."""
1308 """Obtain the number of revisions stored for this manifest."""
1309
1309
1310 def __iter__():
1310 def __iter__():
1311 """Iterate over revision numbers for this manifest."""
1311 """Iterate over revision numbers for this manifest."""
1312
1312
1313 def rev(node):
1313 def rev(node):
1314 """Obtain the revision number given a binary node.
1314 """Obtain the revision number given a binary node.
1315
1315
1316 Raises ``error.LookupError`` if the node is not known.
1316 Raises ``error.LookupError`` if the node is not known.
1317 """
1317 """
1318
1318
1319 def node(rev):
1319 def node(rev):
1320 """Obtain the node value given a revision number.
1320 """Obtain the node value given a revision number.
1321
1321
1322 Raises ``error.LookupError`` if the revision is not known.
1322 Raises ``error.LookupError`` if the revision is not known.
1323 """
1323 """
1324
1324
1325 def lookup(value):
1325 def lookup(value):
1326 """Attempt to resolve a value to a node.
1326 """Attempt to resolve a value to a node.
1327
1327
1328 Value can be a binary node, hex node, revision number, or a bytes
1328 Value can be a binary node, hex node, revision number, or a bytes
1329 that can be converted to an integer.
1329 that can be converted to an integer.
1330
1330
1331 Raises ``error.LookupError`` if a ndoe could not be resolved.
1331 Raises ``error.LookupError`` if a ndoe could not be resolved.
1332 """
1332 """
1333
1333
1334 def parents(node):
1334 def parents(node):
1335 """Returns a 2-tuple of parent nodes for a node.
1335 """Returns a 2-tuple of parent nodes for a node.
1336
1336
1337 Values will be ``nullid`` if the parent is empty.
1337 Values will be ``nullid`` if the parent is empty.
1338 """
1338 """
1339
1339
1340 def parentrevs(rev):
1340 def parentrevs(rev):
1341 """Like parents() but operates on revision numbers."""
1341 """Like parents() but operates on revision numbers."""
1342
1342
1343 def linkrev(rev):
1343 def linkrev(rev):
1344 """Obtain the changeset revision number a revision is linked to."""
1344 """Obtain the changeset revision number a revision is linked to."""
1345
1345
1346 def revision(node):
1346 def revision(node):
1347 """Obtain fulltext data for a node."""
1347 """Obtain fulltext data for a node."""
1348
1348
1349 def rawdata(node):
1349 def rawdata(node):
1350 """Obtain raw data for a node."""
1350 """Obtain raw data for a node."""
1351
1351
1352 def revdiff(rev1, rev2):
1352 def revdiff(rev1, rev2):
1353 """Obtain a delta between two revision numbers.
1353 """Obtain a delta between two revision numbers.
1354
1354
1355 The returned data is the result of ``bdiff.bdiff()`` on the raw
1355 The returned data is the result of ``bdiff.bdiff()`` on the raw
1356 revision data.
1356 revision data.
1357 """
1357 """
1358
1358
1359 def cmp(node, fulltext):
1359 def cmp(node, fulltext):
1360 """Compare fulltext to another revision.
1360 """Compare fulltext to another revision.
1361
1361
1362 Returns True if the fulltext is different from what is stored.
1362 Returns True if the fulltext is different from what is stored.
1363 """
1363 """
1364
1364
1365 def emitrevisions(
1365 def emitrevisions(
1366 nodes,
1366 nodes,
1367 nodesorder=None,
1367 nodesorder=None,
1368 revisiondata=False,
1368 revisiondata=False,
1369 assumehaveparentrevisions=False,
1369 assumehaveparentrevisions=False,
1370 ):
1370 ):
1371 """Produce ``irevisiondelta`` describing revisions.
1371 """Produce ``irevisiondelta`` describing revisions.
1372
1372
1373 See the documentation for ``ifiledata`` for more.
1373 See the documentation for ``ifiledata`` for more.
1374 """
1374 """
1375
1375
1376 def addgroup(
1376 def addgroup(
1377 deltas,
1377 deltas,
1378 linkmapper,
1378 linkmapper,
1379 transaction,
1379 transaction,
1380 addrevisioncb=None,
1380 addrevisioncb=None,
1381 duplicaterevisioncb=None,
1381 duplicaterevisioncb=None,
1382 ):
1382 ):
1383 """Process a series of deltas for storage.
1383 """Process a series of deltas for storage.
1384
1384
1385 See the documentation in ``ifilemutation`` for more.
1385 See the documentation in ``ifilemutation`` for more.
1386 """
1386 """
1387
1387
1388 def rawsize(rev):
1388 def rawsize(rev):
1389 """Obtain the size of tracked data.
1389 """Obtain the size of tracked data.
1390
1390
1391 Is equivalent to ``len(m.rawdata(node))``.
1391 Is equivalent to ``len(m.rawdata(node))``.
1392
1392
1393 TODO this method is only used by upgrade code and may be removed.
1393 TODO this method is only used by upgrade code and may be removed.
1394 """
1394 """
1395
1395
1396 def getstrippoint(minlink):
1396 def getstrippoint(minlink):
1397 """Find minimum revision that must be stripped to strip a linkrev.
1397 """Find minimum revision that must be stripped to strip a linkrev.
1398
1398
1399 See the documentation in ``ifilemutation`` for more.
1399 See the documentation in ``ifilemutation`` for more.
1400 """
1400 """
1401
1401
1402 def strip(minlink, transaction):
1402 def strip(minlink, transaction):
1403 """Remove storage of items starting at a linkrev.
1403 """Remove storage of items starting at a linkrev.
1404
1404
1405 See the documentation in ``ifilemutation`` for more.
1405 See the documentation in ``ifilemutation`` for more.
1406 """
1406 """
1407
1407
1408 def checksize():
1408 def checksize():
1409 """Obtain the expected sizes of backing files.
1409 """Obtain the expected sizes of backing files.
1410
1410
1411 TODO this is used by verify and it should not be part of the interface.
1411 TODO this is used by verify and it should not be part of the interface.
1412 """
1412 """
1413
1413
1414 def files():
1414 def files():
1415 """Obtain paths that are backing storage for this manifest.
1415 """Obtain paths that are backing storage for this manifest.
1416
1416
1417 TODO this is used by verify and there should probably be a better API
1417 TODO this is used by verify and there should probably be a better API
1418 for this functionality.
1418 for this functionality.
1419 """
1419 """
1420
1420
1421 def deltaparent(rev):
1421 def deltaparent(rev):
1422 """Obtain the revision that a revision is delta'd against.
1422 """Obtain the revision that a revision is delta'd against.
1423
1423
1424 TODO delta encoding is an implementation detail of storage and should
1424 TODO delta encoding is an implementation detail of storage and should
1425 not be exposed to the storage interface.
1425 not be exposed to the storage interface.
1426 """
1426 """
1427
1427
1428 def clone(tr, dest, **kwargs):
1428 def clone(tr, dest, **kwargs):
1429 """Clone this instance to another."""
1429 """Clone this instance to another."""
1430
1430
1431 def clearcaches(clear_persisted_data=False):
1431 def clearcaches(clear_persisted_data=False):
1432 """Clear any caches associated with this instance."""
1432 """Clear any caches associated with this instance."""
1433
1433
1434 def dirlog(d):
1434 def dirlog(d):
1435 """Obtain a manifest storage instance for a tree."""
1435 """Obtain a manifest storage instance for a tree."""
1436
1436
1437 def add(
1437 def add(
1438 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1438 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1439 ):
1439 ):
1440 """Add a revision to storage.
1440 """Add a revision to storage.
1441
1441
1442 ``m`` is an object conforming to ``imanifestdict``.
1442 ``m`` is an object conforming to ``imanifestdict``.
1443
1443
1444 ``link`` is the linkrev revision number.
1444 ``link`` is the linkrev revision number.
1445
1445
1446 ``p1`` and ``p2`` are the parent revision numbers.
1446 ``p1`` and ``p2`` are the parent revision numbers.
1447
1447
1448 ``added`` and ``removed`` are iterables of added and removed paths,
1448 ``added`` and ``removed`` are iterables of added and removed paths,
1449 respectively.
1449 respectively.
1450
1450
1451 ``readtree`` is a function that can be used to read the child tree(s)
1451 ``readtree`` is a function that can be used to read the child tree(s)
1452 when recursively writing the full tree structure when using
1452 when recursively writing the full tree structure when using
1453 treemanifets.
1453 treemanifets.
1454
1454
1455 ``match`` is a matcher that can be used to hint to storage that not all
1455 ``match`` is a matcher that can be used to hint to storage that not all
1456 paths must be inspected; this is an optimization and can be safely
1456 paths must be inspected; this is an optimization and can be safely
1457 ignored. Note that the storage must still be able to reproduce a full
1457 ignored. Note that the storage must still be able to reproduce a full
1458 manifest including files that did not match.
1458 manifest including files that did not match.
1459 """
1459 """
1460
1460
1461 def storageinfo(
1461 def storageinfo(
1462 exclusivefiles=False,
1462 exclusivefiles=False,
1463 sharedfiles=False,
1463 sharedfiles=False,
1464 revisionscount=False,
1464 revisionscount=False,
1465 trackedsize=False,
1465 trackedsize=False,
1466 storedsize=False,
1466 storedsize=False,
1467 ):
1467 ):
1468 """Obtain information about storage for this manifest's data.
1468 """Obtain information about storage for this manifest's data.
1469
1469
1470 See ``ifilestorage.storageinfo()`` for a description of this method.
1470 See ``ifilestorage.storageinfo()`` for a description of this method.
1471 This one behaves the same way, except for manifest data.
1471 This one behaves the same way, except for manifest data.
1472 """
1472 """
1473
1473
1474 def get_revlog():
1474 def get_revlog():
1475 """return an actual revlog instance if any
1475 """return an actual revlog instance if any
1476
1476
1477 This exist because a lot of code leverage the fact the underlying
1477 This exist because a lot of code leverage the fact the underlying
1478 storage is a revlog for optimization, so giving simple way to access
1478 storage is a revlog for optimization, so giving simple way to access
1479 the revlog instance helps such code.
1479 the revlog instance helps such code.
1480 """
1480 """
1481
1481
1482
1482
1483 class imanifestlog(interfaceutil.Interface):
1483 class imanifestlog(interfaceutil.Interface):
1484 """Interface representing a collection of manifest snapshots.
1484 """Interface representing a collection of manifest snapshots.
1485
1485
1486 Represents the root manifest in a repository.
1486 Represents the root manifest in a repository.
1487
1487
1488 Also serves as a means to access nested tree manifests and to cache
1488 Also serves as a means to access nested tree manifests and to cache
1489 tree manifests.
1489 tree manifests.
1490 """
1490 """
1491
1491
1492 nodeconstants = interfaceutil.Attribute(
1492 nodeconstants = interfaceutil.Attribute(
1493 """nodeconstants used by the current repository."""
1493 """nodeconstants used by the current repository."""
1494 )
1494 )
1495
1495
1496 narrowed = interfaceutil.Attribute(
1496 narrowed = interfaceutil.Attribute(
1497 """True, is the manifest is narrowed by a matcher"""
1497 """True, is the manifest is narrowed by a matcher"""
1498 )
1498 )
1499
1499
1500 def __getitem__(node):
1500 def __getitem__(node):
1501 """Obtain a manifest instance for a given binary node.
1501 """Obtain a manifest instance for a given binary node.
1502
1502
1503 Equivalent to calling ``self.get('', node)``.
1503 Equivalent to calling ``self.get('', node)``.
1504
1504
1505 The returned object conforms to the ``imanifestrevisionstored``
1505 The returned object conforms to the ``imanifestrevisionstored``
1506 interface.
1506 interface.
1507 """
1507 """
1508
1508
1509 def get(tree, node, verify=True):
1509 def get(tree, node, verify=True):
1510 """Retrieve the manifest instance for a given directory and binary node.
1510 """Retrieve the manifest instance for a given directory and binary node.
1511
1511
1512 ``node`` always refers to the node of the root manifest (which will be
1512 ``node`` always refers to the node of the root manifest (which will be
1513 the only manifest if flat manifests are being used).
1513 the only manifest if flat manifests are being used).
1514
1514
1515 If ``tree`` is the empty string, the root manifest is returned.
1515 If ``tree`` is the empty string, the root manifest is returned.
1516 Otherwise the manifest for the specified directory will be returned
1516 Otherwise the manifest for the specified directory will be returned
1517 (requires tree manifests).
1517 (requires tree manifests).
1518
1518
1519 If ``verify`` is True, ``LookupError`` is raised if the node is not
1519 If ``verify`` is True, ``LookupError`` is raised if the node is not
1520 known.
1520 known.
1521
1521
1522 The returned object conforms to the ``imanifestrevisionstored``
1522 The returned object conforms to the ``imanifestrevisionstored``
1523 interface.
1523 interface.
1524 """
1524 """
1525
1525
1526 def getstorage(tree):
1526 def getstorage(tree):
1527 """Retrieve an interface to storage for a particular tree.
1527 """Retrieve an interface to storage for a particular tree.
1528
1528
1529 If ``tree`` is the empty bytestring, storage for the root manifest will
1529 If ``tree`` is the empty bytestring, storage for the root manifest will
1530 be returned. Otherwise storage for a tree manifest is returned.
1530 be returned. Otherwise storage for a tree manifest is returned.
1531
1531
1532 TODO formalize interface for returned object.
1532 TODO formalize interface for returned object.
1533 """
1533 """
1534
1534
1535 def clearcaches():
1535 def clearcaches(clear_persisted_data: bool = False) -> None:
1536 """Clear caches associated with this collection."""
1536 """Clear caches associated with this collection."""
1537
1537
1538 def rev(node):
1538 def rev(node):
1539 """Obtain the revision number for a binary node.
1539 """Obtain the revision number for a binary node.
1540
1540
1541 Raises ``error.LookupError`` if the node is not known.
1541 Raises ``error.LookupError`` if the node is not known.
1542 """
1542 """
1543
1543
1544 def update_caches(transaction):
1544 def update_caches(transaction):
1545 """update whatever cache are relevant for the used storage."""
1545 """update whatever cache are relevant for the used storage."""
1546
1546
1547
1547
1548 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1548 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1549 """Local repository sub-interface providing access to tracked file storage.
1549 """Local repository sub-interface providing access to tracked file storage.
1550
1550
1551 This interface defines how a repository accesses storage for a single
1551 This interface defines how a repository accesses storage for a single
1552 tracked file path.
1552 tracked file path.
1553 """
1553 """
1554
1554
1555 def file(f):
1555 def file(f):
1556 """Obtain a filelog for a tracked path.
1556 """Obtain a filelog for a tracked path.
1557
1557
1558 The returned type conforms to the ``ifilestorage`` interface.
1558 The returned type conforms to the ``ifilestorage`` interface.
1559 """
1559 """
1560
1560
1561
1561
1562 class ilocalrepositorymain(interfaceutil.Interface):
1562 class ilocalrepositorymain(interfaceutil.Interface):
1563 """Main interface for local repositories.
1563 """Main interface for local repositories.
1564
1564
1565 This currently captures the reality of things - not how things should be.
1565 This currently captures the reality of things - not how things should be.
1566 """
1566 """
1567
1567
1568 nodeconstants = interfaceutil.Attribute(
1568 nodeconstants = interfaceutil.Attribute(
1569 """Constant nodes matching the hash function used by the repository."""
1569 """Constant nodes matching the hash function used by the repository."""
1570 )
1570 )
1571 nullid = interfaceutil.Attribute(
1571 nullid = interfaceutil.Attribute(
1572 """null revision for the hash function used by the repository."""
1572 """null revision for the hash function used by the repository."""
1573 )
1573 )
1574
1574
1575 supported = interfaceutil.Attribute(
1575 supported = interfaceutil.Attribute(
1576 """Set of requirements that this repo is capable of opening."""
1576 """Set of requirements that this repo is capable of opening."""
1577 )
1577 )
1578
1578
1579 requirements = interfaceutil.Attribute(
1579 requirements = interfaceutil.Attribute(
1580 """Set of requirements this repo uses."""
1580 """Set of requirements this repo uses."""
1581 )
1581 )
1582
1582
1583 features = interfaceutil.Attribute(
1583 features = interfaceutil.Attribute(
1584 """Set of "features" this repository supports.
1584 """Set of "features" this repository supports.
1585
1585
1586 A "feature" is a loosely-defined term. It can refer to a feature
1586 A "feature" is a loosely-defined term. It can refer to a feature
1587 in the classical sense or can describe an implementation detail
1587 in the classical sense or can describe an implementation detail
1588 of the repository. For example, a ``readonly`` feature may denote
1588 of the repository. For example, a ``readonly`` feature may denote
1589 the repository as read-only. Or a ``revlogfilestore`` feature may
1589 the repository as read-only. Or a ``revlogfilestore`` feature may
1590 denote that the repository is using revlogs for file storage.
1590 denote that the repository is using revlogs for file storage.
1591
1591
1592 The intent of features is to provide a machine-queryable mechanism
1592 The intent of features is to provide a machine-queryable mechanism
1593 for repo consumers to test for various repository characteristics.
1593 for repo consumers to test for various repository characteristics.
1594
1594
1595 Features are similar to ``requirements``. The main difference is that
1595 Features are similar to ``requirements``. The main difference is that
1596 requirements are stored on-disk and represent requirements to open the
1596 requirements are stored on-disk and represent requirements to open the
1597 repository. Features are more run-time capabilities of the repository
1597 repository. Features are more run-time capabilities of the repository
1598 and more granular capabilities (which may be derived from requirements).
1598 and more granular capabilities (which may be derived from requirements).
1599 """
1599 """
1600 )
1600 )
1601
1601
1602 filtername = interfaceutil.Attribute(
1602 filtername = interfaceutil.Attribute(
1603 """Name of the repoview that is active on this repo."""
1603 """Name of the repoview that is active on this repo."""
1604 )
1604 )
1605
1605
1606 vfs_map = interfaceutil.Attribute(
1606 vfs_map = interfaceutil.Attribute(
1607 """a bytes-key β†’ vfs mapping used by transaction and others"""
1607 """a bytes-key β†’ vfs mapping used by transaction and others"""
1608 )
1608 )
1609
1609
1610 wvfs = interfaceutil.Attribute(
1610 wvfs = interfaceutil.Attribute(
1611 """VFS used to access the working directory."""
1611 """VFS used to access the working directory."""
1612 )
1612 )
1613
1613
1614 vfs = interfaceutil.Attribute(
1614 vfs = interfaceutil.Attribute(
1615 """VFS rooted at the .hg directory.
1615 """VFS rooted at the .hg directory.
1616
1616
1617 Used to access repository data not in the store.
1617 Used to access repository data not in the store.
1618 """
1618 """
1619 )
1619 )
1620
1620
1621 svfs = interfaceutil.Attribute(
1621 svfs = interfaceutil.Attribute(
1622 """VFS rooted at the store.
1622 """VFS rooted at the store.
1623
1623
1624 Used to access repository data in the store. Typically .hg/store.
1624 Used to access repository data in the store. Typically .hg/store.
1625 But can point elsewhere if the store is shared.
1625 But can point elsewhere if the store is shared.
1626 """
1626 """
1627 )
1627 )
1628
1628
1629 root = interfaceutil.Attribute(
1629 root = interfaceutil.Attribute(
1630 """Path to the root of the working directory."""
1630 """Path to the root of the working directory."""
1631 )
1631 )
1632
1632
1633 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1633 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1634
1634
1635 origroot = interfaceutil.Attribute(
1635 origroot = interfaceutil.Attribute(
1636 """The filesystem path that was used to construct the repo."""
1636 """The filesystem path that was used to construct the repo."""
1637 )
1637 )
1638
1638
1639 auditor = interfaceutil.Attribute(
1639 auditor = interfaceutil.Attribute(
1640 """A pathauditor for the working directory.
1640 """A pathauditor for the working directory.
1641
1641
1642 This checks if a path refers to a nested repository.
1642 This checks if a path refers to a nested repository.
1643
1643
1644 Operates on the filesystem.
1644 Operates on the filesystem.
1645 """
1645 """
1646 )
1646 )
1647
1647
1648 nofsauditor = interfaceutil.Attribute(
1648 nofsauditor = interfaceutil.Attribute(
1649 """A pathauditor for the working directory.
1649 """A pathauditor for the working directory.
1650
1650
1651 This is like ``auditor`` except it doesn't do filesystem checks.
1651 This is like ``auditor`` except it doesn't do filesystem checks.
1652 """
1652 """
1653 )
1653 )
1654
1654
1655 baseui = interfaceutil.Attribute(
1655 baseui = interfaceutil.Attribute(
1656 """Original ui instance passed into constructor."""
1656 """Original ui instance passed into constructor."""
1657 )
1657 )
1658
1658
1659 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1659 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1660
1660
1661 sharedpath = interfaceutil.Attribute(
1661 sharedpath = interfaceutil.Attribute(
1662 """Path to the .hg directory of the repo this repo was shared from."""
1662 """Path to the .hg directory of the repo this repo was shared from."""
1663 )
1663 )
1664
1664
1665 store = interfaceutil.Attribute("""A store instance.""")
1665 store = interfaceutil.Attribute("""A store instance.""")
1666
1666
1667 spath = interfaceutil.Attribute("""Path to the store.""")
1667 spath = interfaceutil.Attribute("""Path to the store.""")
1668
1668
1669 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1669 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1670
1670
1671 cachevfs = interfaceutil.Attribute(
1671 cachevfs = interfaceutil.Attribute(
1672 """A VFS used to access the cache directory.
1672 """A VFS used to access the cache directory.
1673
1673
1674 Typically .hg/cache.
1674 Typically .hg/cache.
1675 """
1675 """
1676 )
1676 )
1677
1677
1678 wcachevfs = interfaceutil.Attribute(
1678 wcachevfs = interfaceutil.Attribute(
1679 """A VFS used to access the cache directory dedicated to working copy
1679 """A VFS used to access the cache directory dedicated to working copy
1680
1680
1681 Typically .hg/wcache.
1681 Typically .hg/wcache.
1682 """
1682 """
1683 )
1683 )
1684
1684
1685 filteredrevcache = interfaceutil.Attribute(
1685 filteredrevcache = interfaceutil.Attribute(
1686 """Holds sets of revisions to be filtered."""
1686 """Holds sets of revisions to be filtered."""
1687 )
1687 )
1688
1688
1689 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1689 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1690
1690
1691 filecopiesmode = interfaceutil.Attribute(
1691 filecopiesmode = interfaceutil.Attribute(
1692 """The way files copies should be dealt with in this repo."""
1692 """The way files copies should be dealt with in this repo."""
1693 )
1693 )
1694
1694
1695 def close():
1695 def close():
1696 """Close the handle on this repository."""
1696 """Close the handle on this repository."""
1697
1697
1698 def peer(path=None):
1698 def peer(path=None):
1699 """Obtain an object conforming to the ``peer`` interface."""
1699 """Obtain an object conforming to the ``peer`` interface."""
1700
1700
1701 def unfiltered():
1701 def unfiltered():
1702 """Obtain an unfiltered/raw view of this repo."""
1702 """Obtain an unfiltered/raw view of this repo."""
1703
1703
1704 def filtered(name, visibilityexceptions=None):
1704 def filtered(name, visibilityexceptions=None):
1705 """Obtain a named view of this repository."""
1705 """Obtain a named view of this repository."""
1706
1706
1707 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1707 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1708
1708
1709 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1709 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1710
1710
1711 manifestlog = interfaceutil.Attribute(
1711 manifestlog = interfaceutil.Attribute(
1712 """An instance conforming to the ``imanifestlog`` interface.
1712 """An instance conforming to the ``imanifestlog`` interface.
1713
1713
1714 Provides access to manifests for the repository.
1714 Provides access to manifests for the repository.
1715 """
1715 """
1716 )
1716 )
1717
1717
1718 dirstate = interfaceutil.Attribute("""Working directory state.""")
1718 dirstate = interfaceutil.Attribute("""Working directory state.""")
1719
1719
1720 narrowpats = interfaceutil.Attribute(
1720 narrowpats = interfaceutil.Attribute(
1721 """Matcher patterns for this repository's narrowspec."""
1721 """Matcher patterns for this repository's narrowspec."""
1722 )
1722 )
1723
1723
1724 def narrowmatch(match=None, includeexact=False):
1724 def narrowmatch(match=None, includeexact=False):
1725 """Obtain a matcher for the narrowspec."""
1725 """Obtain a matcher for the narrowspec."""
1726
1726
1727 def setnarrowpats(newincludes, newexcludes):
1727 def setnarrowpats(newincludes, newexcludes):
1728 """Define the narrowspec for this repository."""
1728 """Define the narrowspec for this repository."""
1729
1729
1730 def __getitem__(changeid):
1730 def __getitem__(changeid):
1731 """Try to resolve a changectx."""
1731 """Try to resolve a changectx."""
1732
1732
1733 def __contains__(changeid):
1733 def __contains__(changeid):
1734 """Whether a changeset exists."""
1734 """Whether a changeset exists."""
1735
1735
1736 def __nonzero__():
1736 def __nonzero__():
1737 """Always returns True."""
1737 """Always returns True."""
1738 return True
1738 return True
1739
1739
1740 __bool__ = __nonzero__
1740 __bool__ = __nonzero__
1741
1741
1742 def __len__():
1742 def __len__():
1743 """Returns the number of changesets in the repo."""
1743 """Returns the number of changesets in the repo."""
1744
1744
1745 def __iter__():
1745 def __iter__():
1746 """Iterate over revisions in the changelog."""
1746 """Iterate over revisions in the changelog."""
1747
1747
1748 def revs(expr, *args):
1748 def revs(expr, *args):
1749 """Evaluate a revset.
1749 """Evaluate a revset.
1750
1750
1751 Emits revisions.
1751 Emits revisions.
1752 """
1752 """
1753
1753
1754 def set(expr, *args):
1754 def set(expr, *args):
1755 """Evaluate a revset.
1755 """Evaluate a revset.
1756
1756
1757 Emits changectx instances.
1757 Emits changectx instances.
1758 """
1758 """
1759
1759
1760 def anyrevs(specs, user=False, localalias=None):
1760 def anyrevs(specs, user=False, localalias=None):
1761 """Find revisions matching one of the given revsets."""
1761 """Find revisions matching one of the given revsets."""
1762
1762
1763 def url():
1763 def url():
1764 """Returns a string representing the location of this repo."""
1764 """Returns a string representing the location of this repo."""
1765
1765
1766 def hook(name, throw=False, **args):
1766 def hook(name, throw=False, **args):
1767 """Call a hook."""
1767 """Call a hook."""
1768
1768
1769 def tags():
1769 def tags():
1770 """Return a mapping of tag to node."""
1770 """Return a mapping of tag to node."""
1771
1771
1772 def tagtype(tagname):
1772 def tagtype(tagname):
1773 """Return the type of a given tag."""
1773 """Return the type of a given tag."""
1774
1774
1775 def tagslist():
1775 def tagslist():
1776 """Return a list of tags ordered by revision."""
1776 """Return a list of tags ordered by revision."""
1777
1777
1778 def nodetags(node):
1778 def nodetags(node):
1779 """Return the tags associated with a node."""
1779 """Return the tags associated with a node."""
1780
1780
1781 def nodebookmarks(node):
1781 def nodebookmarks(node):
1782 """Return the list of bookmarks pointing to the specified node."""
1782 """Return the list of bookmarks pointing to the specified node."""
1783
1783
1784 def branchmap():
1784 def branchmap():
1785 """Return a mapping of branch to heads in that branch."""
1785 """Return a mapping of branch to heads in that branch."""
1786
1786
1787 def revbranchcache():
1787 def revbranchcache():
1788 pass
1788 pass
1789
1789
1790 def register_changeset(rev, changelogrevision):
1790 def register_changeset(rev, changelogrevision):
1791 """Extension point for caches for new nodes.
1791 """Extension point for caches for new nodes.
1792
1792
1793 Multiple consumers are expected to need parts of the changelogrevision,
1793 Multiple consumers are expected to need parts of the changelogrevision,
1794 so it is provided as optimization to avoid duplicate lookups. A simple
1794 so it is provided as optimization to avoid duplicate lookups. A simple
1795 cache would be fragile when other revisions are accessed, too."""
1795 cache would be fragile when other revisions are accessed, too."""
1796 pass
1796 pass
1797
1797
1798 def branchtip(branchtip, ignoremissing=False):
1798 def branchtip(branchtip, ignoremissing=False):
1799 """Return the tip node for a given branch."""
1799 """Return the tip node for a given branch."""
1800
1800
1801 def lookup(key):
1801 def lookup(key):
1802 """Resolve the node for a revision."""
1802 """Resolve the node for a revision."""
1803
1803
1804 def lookupbranch(key):
1804 def lookupbranch(key):
1805 """Look up the branch name of the given revision or branch name."""
1805 """Look up the branch name of the given revision or branch name."""
1806
1806
1807 def known(nodes):
1807 def known(nodes):
1808 """Determine whether a series of nodes is known.
1808 """Determine whether a series of nodes is known.
1809
1809
1810 Returns a list of bools.
1810 Returns a list of bools.
1811 """
1811 """
1812
1812
1813 def local():
1813 def local():
1814 """Whether the repository is local."""
1814 """Whether the repository is local."""
1815 return True
1815 return True
1816
1816
1817 def publishing():
1817 def publishing():
1818 """Whether the repository is a publishing repository."""
1818 """Whether the repository is a publishing repository."""
1819
1819
1820 def cancopy():
1820 def cancopy():
1821 pass
1821 pass
1822
1822
1823 def shared():
1823 def shared():
1824 """The type of shared repository or None."""
1824 """The type of shared repository or None."""
1825
1825
1826 def wjoin(f, *insidef):
1826 def wjoin(f, *insidef):
1827 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1827 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1828
1828
1829 def setparents(p1, p2):
1829 def setparents(p1, p2):
1830 """Set the parent nodes of the working directory."""
1830 """Set the parent nodes of the working directory."""
1831
1831
1832 def filectx(path, changeid=None, fileid=None):
1832 def filectx(path, changeid=None, fileid=None):
1833 """Obtain a filectx for the given file revision."""
1833 """Obtain a filectx for the given file revision."""
1834
1834
1835 def getcwd():
1835 def getcwd():
1836 """Obtain the current working directory from the dirstate."""
1836 """Obtain the current working directory from the dirstate."""
1837
1837
1838 def pathto(f, cwd=None):
1838 def pathto(f, cwd=None):
1839 """Obtain the relative path to a file."""
1839 """Obtain the relative path to a file."""
1840
1840
1841 def adddatafilter(name, fltr):
1841 def adddatafilter(name, fltr):
1842 pass
1842 pass
1843
1843
1844 def wread(filename):
1844 def wread(filename):
1845 """Read a file from wvfs, using data filters."""
1845 """Read a file from wvfs, using data filters."""
1846
1846
1847 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1847 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1848 """Write data to a file in the wvfs, using data filters."""
1848 """Write data to a file in the wvfs, using data filters."""
1849
1849
1850 def wwritedata(filename, data):
1850 def wwritedata(filename, data):
1851 """Resolve data for writing to the wvfs, using data filters."""
1851 """Resolve data for writing to the wvfs, using data filters."""
1852
1852
1853 def currenttransaction():
1853 def currenttransaction():
1854 """Obtain the current transaction instance or None."""
1854 """Obtain the current transaction instance or None."""
1855
1855
1856 def transaction(desc, report=None):
1856 def transaction(desc, report=None):
1857 """Open a new transaction to write to the repository."""
1857 """Open a new transaction to write to the repository."""
1858
1858
1859 def undofiles():
1859 def undofiles():
1860 """Returns a list of (vfs, path) for files to undo transactions."""
1860 """Returns a list of (vfs, path) for files to undo transactions."""
1861
1861
1862 def recover():
1862 def recover():
1863 """Roll back an interrupted transaction."""
1863 """Roll back an interrupted transaction."""
1864
1864
1865 def rollback(dryrun=False, force=False):
1865 def rollback(dryrun=False, force=False):
1866 """Undo the last transaction.
1866 """Undo the last transaction.
1867
1867
1868 DANGEROUS.
1868 DANGEROUS.
1869 """
1869 """
1870
1870
1871 def updatecaches(tr=None, full=False, caches=None):
1871 def updatecaches(tr=None, full=False, caches=None):
1872 """Warm repo caches."""
1872 """Warm repo caches."""
1873
1873
1874 def invalidatecaches():
1874 def invalidatecaches():
1875 """Invalidate cached data due to the repository mutating."""
1875 """Invalidate cached data due to the repository mutating."""
1876
1876
1877 def invalidatevolatilesets():
1877 def invalidatevolatilesets():
1878 pass
1878 pass
1879
1879
1880 def invalidatedirstate():
1880 def invalidatedirstate():
1881 """Invalidate the dirstate."""
1881 """Invalidate the dirstate."""
1882
1882
1883 def invalidate(clearfilecache=False):
1883 def invalidate(clearfilecache=False):
1884 pass
1884 pass
1885
1885
1886 def invalidateall():
1886 def invalidateall():
1887 pass
1887 pass
1888
1888
1889 def lock(wait=True):
1889 def lock(wait=True):
1890 """Lock the repository store and return a lock instance."""
1890 """Lock the repository store and return a lock instance."""
1891
1891
1892 def currentlock():
1892 def currentlock():
1893 """Return the lock if it's held or None."""
1893 """Return the lock if it's held or None."""
1894
1894
1895 def wlock(wait=True):
1895 def wlock(wait=True):
1896 """Lock the non-store parts of the repository."""
1896 """Lock the non-store parts of the repository."""
1897
1897
1898 def currentwlock():
1898 def currentwlock():
1899 """Return the wlock if it's held or None."""
1899 """Return the wlock if it's held or None."""
1900
1900
1901 def checkcommitpatterns(wctx, match, status, fail):
1901 def checkcommitpatterns(wctx, match, status, fail):
1902 pass
1902 pass
1903
1903
1904 def commit(
1904 def commit(
1905 text=b'',
1905 text=b'',
1906 user=None,
1906 user=None,
1907 date=None,
1907 date=None,
1908 match=None,
1908 match=None,
1909 force=False,
1909 force=False,
1910 editor=False,
1910 editor=False,
1911 extra=None,
1911 extra=None,
1912 ):
1912 ):
1913 """Add a new revision to the repository."""
1913 """Add a new revision to the repository."""
1914
1914
1915 def commitctx(ctx, error=False, origctx=None):
1915 def commitctx(ctx, error=False, origctx=None):
1916 """Commit a commitctx instance to the repository."""
1916 """Commit a commitctx instance to the repository."""
1917
1917
1918 def destroying():
1918 def destroying():
1919 """Inform the repository that nodes are about to be destroyed."""
1919 """Inform the repository that nodes are about to be destroyed."""
1920
1920
1921 def destroyed():
1921 def destroyed():
1922 """Inform the repository that nodes have been destroyed."""
1922 """Inform the repository that nodes have been destroyed."""
1923
1923
1924 def status(
1924 def status(
1925 node1=b'.',
1925 node1=b'.',
1926 node2=None,
1926 node2=None,
1927 match=None,
1927 match=None,
1928 ignored=False,
1928 ignored=False,
1929 clean=False,
1929 clean=False,
1930 unknown=False,
1930 unknown=False,
1931 listsubrepos=False,
1931 listsubrepos=False,
1932 ):
1932 ):
1933 """Convenience method to call repo[x].status()."""
1933 """Convenience method to call repo[x].status()."""
1934
1934
1935 def addpostdsstatus(ps):
1935 def addpostdsstatus(ps):
1936 pass
1936 pass
1937
1937
1938 def postdsstatus():
1938 def postdsstatus():
1939 pass
1939 pass
1940
1940
1941 def clearpostdsstatus():
1941 def clearpostdsstatus():
1942 pass
1942 pass
1943
1943
1944 def heads(start=None):
1944 def heads(start=None):
1945 """Obtain list of nodes that are DAG heads."""
1945 """Obtain list of nodes that are DAG heads."""
1946
1946
1947 def branchheads(branch=None, start=None, closed=False):
1947 def branchheads(branch=None, start=None, closed=False):
1948 pass
1948 pass
1949
1949
1950 def branches(nodes):
1950 def branches(nodes):
1951 pass
1951 pass
1952
1952
1953 def between(pairs):
1953 def between(pairs):
1954 pass
1954 pass
1955
1955
1956 def checkpush(pushop):
1956 def checkpush(pushop):
1957 pass
1957 pass
1958
1958
1959 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1959 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1960
1960
1961 def pushkey(namespace, key, old, new):
1961 def pushkey(namespace, key, old, new):
1962 pass
1962 pass
1963
1963
1964 def listkeys(namespace):
1964 def listkeys(namespace):
1965 pass
1965 pass
1966
1966
1967 def debugwireargs(one, two, three=None, four=None, five=None):
1967 def debugwireargs(one, two, three=None, four=None, five=None):
1968 pass
1968 pass
1969
1969
1970 def savecommitmessage(text):
1970 def savecommitmessage(text):
1971 pass
1971 pass
1972
1972
1973 def register_sidedata_computer(
1973 def register_sidedata_computer(
1974 kind, category, keys, computer, flags, replace=False
1974 kind, category, keys, computer, flags, replace=False
1975 ):
1975 ):
1976 pass
1976 pass
1977
1977
1978 def register_wanted_sidedata(category):
1978 def register_wanted_sidedata(category):
1979 pass
1979 pass
1980
1980
1981
1981
1982 class completelocalrepository(
1982 class completelocalrepository(
1983 ilocalrepositorymain, ilocalrepositoryfilestorage
1983 ilocalrepositorymain, ilocalrepositoryfilestorage
1984 ):
1984 ):
1985 """Complete interface for a local repository."""
1985 """Complete interface for a local repository."""
1986
1986
1987
1987
1988 class iwireprotocolcommandcacher(interfaceutil.Interface):
1988 class iwireprotocolcommandcacher(interfaceutil.Interface):
1989 """Represents a caching backend for wire protocol commands.
1989 """Represents a caching backend for wire protocol commands.
1990
1990
1991 Wire protocol version 2 supports transparent caching of many commands.
1991 Wire protocol version 2 supports transparent caching of many commands.
1992 To leverage this caching, servers can activate objects that cache
1992 To leverage this caching, servers can activate objects that cache
1993 command responses. Objects handle both cache writing and reading.
1993 command responses. Objects handle both cache writing and reading.
1994 This interface defines how that response caching mechanism works.
1994 This interface defines how that response caching mechanism works.
1995
1995
1996 Wire protocol version 2 commands emit a series of objects that are
1996 Wire protocol version 2 commands emit a series of objects that are
1997 serialized and sent to the client. The caching layer exists between
1997 serialized and sent to the client. The caching layer exists between
1998 the invocation of the command function and the sending of its output
1998 the invocation of the command function and the sending of its output
1999 objects to an output layer.
1999 objects to an output layer.
2000
2000
2001 Instances of this interface represent a binding to a cache that
2001 Instances of this interface represent a binding to a cache that
2002 can serve a response (in place of calling a command function) and/or
2002 can serve a response (in place of calling a command function) and/or
2003 write responses to a cache for subsequent use.
2003 write responses to a cache for subsequent use.
2004
2004
2005 When a command request arrives, the following happens with regards
2005 When a command request arrives, the following happens with regards
2006 to this interface:
2006 to this interface:
2007
2007
2008 1. The server determines whether the command request is cacheable.
2008 1. The server determines whether the command request is cacheable.
2009 2. If it is, an instance of this interface is spawned.
2009 2. If it is, an instance of this interface is spawned.
2010 3. The cacher is activated in a context manager (``__enter__`` is called).
2010 3. The cacher is activated in a context manager (``__enter__`` is called).
2011 4. A cache *key* for that request is derived. This will call the
2011 4. A cache *key* for that request is derived. This will call the
2012 instance's ``adjustcachekeystate()`` method so the derivation
2012 instance's ``adjustcachekeystate()`` method so the derivation
2013 can be influenced.
2013 can be influenced.
2014 5. The cacher is informed of the derived cache key via a call to
2014 5. The cacher is informed of the derived cache key via a call to
2015 ``setcachekey()``.
2015 ``setcachekey()``.
2016 6. The cacher's ``lookup()`` method is called to test for presence of
2016 6. The cacher's ``lookup()`` method is called to test for presence of
2017 the derived key in the cache.
2017 the derived key in the cache.
2018 7. If ``lookup()`` returns a hit, that cached result is used in place
2018 7. If ``lookup()`` returns a hit, that cached result is used in place
2019 of invoking the command function. ``__exit__`` is called and the instance
2019 of invoking the command function. ``__exit__`` is called and the instance
2020 is discarded.
2020 is discarded.
2021 8. The command function is invoked.
2021 8. The command function is invoked.
2022 9. ``onobject()`` is called for each object emitted by the command
2022 9. ``onobject()`` is called for each object emitted by the command
2023 function.
2023 function.
2024 10. After the final object is seen, ``onfinished()`` is called.
2024 10. After the final object is seen, ``onfinished()`` is called.
2025 11. ``__exit__`` is called to signal the end of use of the instance.
2025 11. ``__exit__`` is called to signal the end of use of the instance.
2026
2026
2027 Cache *key* derivation can be influenced by the instance.
2027 Cache *key* derivation can be influenced by the instance.
2028
2028
2029 Cache keys are initially derived by a deterministic representation of
2029 Cache keys are initially derived by a deterministic representation of
2030 the command request. This includes the command name, arguments, protocol
2030 the command request. This includes the command name, arguments, protocol
2031 version, etc. This initial key derivation is performed by CBOR-encoding a
2031 version, etc. This initial key derivation is performed by CBOR-encoding a
2032 data structure and feeding that output into a hasher.
2032 data structure and feeding that output into a hasher.
2033
2033
2034 Instances of this interface can influence this initial key derivation
2034 Instances of this interface can influence this initial key derivation
2035 via ``adjustcachekeystate()``.
2035 via ``adjustcachekeystate()``.
2036
2036
2037 The instance is informed of the derived cache key via a call to
2037 The instance is informed of the derived cache key via a call to
2038 ``setcachekey()``. The instance must store the key locally so it can
2038 ``setcachekey()``. The instance must store the key locally so it can
2039 be consulted on subsequent operations that may require it.
2039 be consulted on subsequent operations that may require it.
2040
2040
2041 When constructed, the instance has access to a callable that can be used
2041 When constructed, the instance has access to a callable that can be used
2042 for encoding response objects. This callable receives as its single
2042 for encoding response objects. This callable receives as its single
2043 argument an object emitted by a command function. It returns an iterable
2043 argument an object emitted by a command function. It returns an iterable
2044 of bytes chunks representing the encoded object. Unless the cacher is
2044 of bytes chunks representing the encoded object. Unless the cacher is
2045 caching native Python objects in memory or has a way of reconstructing
2045 caching native Python objects in memory or has a way of reconstructing
2046 the original Python objects, implementations typically call this function
2046 the original Python objects, implementations typically call this function
2047 to produce bytes from the output objects and then store those bytes in
2047 to produce bytes from the output objects and then store those bytes in
2048 the cache. When it comes time to re-emit those bytes, they are wrapped
2048 the cache. When it comes time to re-emit those bytes, they are wrapped
2049 in a ``wireprototypes.encodedresponse`` instance to tell the output
2049 in a ``wireprototypes.encodedresponse`` instance to tell the output
2050 layer that they are pre-encoded.
2050 layer that they are pre-encoded.
2051
2051
2052 When receiving the objects emitted by the command function, instances
2052 When receiving the objects emitted by the command function, instances
2053 can choose what to do with those objects. The simplest thing to do is
2053 can choose what to do with those objects. The simplest thing to do is
2054 re-emit the original objects. They will be forwarded to the output
2054 re-emit the original objects. They will be forwarded to the output
2055 layer and will be processed as if the cacher did not exist.
2055 layer and will be processed as if the cacher did not exist.
2056
2056
2057 Implementations could also choose to not emit objects - instead locally
2057 Implementations could also choose to not emit objects - instead locally
2058 buffering objects or their encoded representation. They could then emit
2058 buffering objects or their encoded representation. They could then emit
2059 a single "coalesced" object when ``onfinished()`` is called. In
2059 a single "coalesced" object when ``onfinished()`` is called. In
2060 this way, the implementation would function as a filtering layer of
2060 this way, the implementation would function as a filtering layer of
2061 sorts.
2061 sorts.
2062
2062
2063 When caching objects, typically the encoded form of the object will
2063 When caching objects, typically the encoded form of the object will
2064 be stored. Keep in mind that if the original object is forwarded to
2064 be stored. Keep in mind that if the original object is forwarded to
2065 the output layer, it will need to be encoded there as well. For large
2065 the output layer, it will need to be encoded there as well. For large
2066 output, this redundant encoding could add overhead. Implementations
2066 output, this redundant encoding could add overhead. Implementations
2067 could wrap the encoded object data in ``wireprototypes.encodedresponse``
2067 could wrap the encoded object data in ``wireprototypes.encodedresponse``
2068 instances to avoid this overhead.
2068 instances to avoid this overhead.
2069 """
2069 """
2070
2070
2071 def __enter__():
2071 def __enter__():
2072 """Marks the instance as active.
2072 """Marks the instance as active.
2073
2073
2074 Should return self.
2074 Should return self.
2075 """
2075 """
2076
2076
2077 def __exit__(exctype, excvalue, exctb):
2077 def __exit__(exctype, excvalue, exctb):
2078 """Called when cacher is no longer used.
2078 """Called when cacher is no longer used.
2079
2079
2080 This can be used by implementations to perform cleanup actions (e.g.
2080 This can be used by implementations to perform cleanup actions (e.g.
2081 disconnecting network sockets, aborting a partially cached response.
2081 disconnecting network sockets, aborting a partially cached response.
2082 """
2082 """
2083
2083
2084 def adjustcachekeystate(state):
2084 def adjustcachekeystate(state):
2085 """Influences cache key derivation by adjusting state to derive key.
2085 """Influences cache key derivation by adjusting state to derive key.
2086
2086
2087 A dict defining the state used to derive the cache key is passed.
2087 A dict defining the state used to derive the cache key is passed.
2088
2088
2089 Implementations can modify this dict to record additional state that
2089 Implementations can modify this dict to record additional state that
2090 is wanted to influence key derivation.
2090 is wanted to influence key derivation.
2091
2091
2092 Implementations are *highly* encouraged to not modify or delete
2092 Implementations are *highly* encouraged to not modify or delete
2093 existing keys.
2093 existing keys.
2094 """
2094 """
2095
2095
2096 def setcachekey(key):
2096 def setcachekey(key):
2097 """Record the derived cache key for this request.
2097 """Record the derived cache key for this request.
2098
2098
2099 Instances may mutate the key for internal usage, as desired. e.g.
2099 Instances may mutate the key for internal usage, as desired. e.g.
2100 instances may wish to prepend the repo name, introduce path
2100 instances may wish to prepend the repo name, introduce path
2101 components for filesystem or URL addressing, etc. Behavior is up to
2101 components for filesystem or URL addressing, etc. Behavior is up to
2102 the cache.
2102 the cache.
2103
2103
2104 Returns a bool indicating if the request is cacheable by this
2104 Returns a bool indicating if the request is cacheable by this
2105 instance.
2105 instance.
2106 """
2106 """
2107
2107
2108 def lookup():
2108 def lookup():
2109 """Attempt to resolve an entry in the cache.
2109 """Attempt to resolve an entry in the cache.
2110
2110
2111 The instance is instructed to look for the cache key that it was
2111 The instance is instructed to look for the cache key that it was
2112 informed about via the call to ``setcachekey()``.
2112 informed about via the call to ``setcachekey()``.
2113
2113
2114 If there's no cache hit or the cacher doesn't wish to use the cached
2114 If there's no cache hit or the cacher doesn't wish to use the cached
2115 entry, ``None`` should be returned.
2115 entry, ``None`` should be returned.
2116
2116
2117 Else, a dict defining the cached result should be returned. The
2117 Else, a dict defining the cached result should be returned. The
2118 dict may have the following keys:
2118 dict may have the following keys:
2119
2119
2120 objs
2120 objs
2121 An iterable of objects that should be sent to the client. That
2121 An iterable of objects that should be sent to the client. That
2122 iterable of objects is expected to be what the command function
2122 iterable of objects is expected to be what the command function
2123 would return if invoked or an equivalent representation thereof.
2123 would return if invoked or an equivalent representation thereof.
2124 """
2124 """
2125
2125
2126 def onobject(obj):
2126 def onobject(obj):
2127 """Called when a new object is emitted from the command function.
2127 """Called when a new object is emitted from the command function.
2128
2128
2129 Receives as its argument the object that was emitted from the
2129 Receives as its argument the object that was emitted from the
2130 command function.
2130 command function.
2131
2131
2132 This method returns an iterator of objects to forward to the output
2132 This method returns an iterator of objects to forward to the output
2133 layer. The easiest implementation is a generator that just
2133 layer. The easiest implementation is a generator that just
2134 ``yield obj``.
2134 ``yield obj``.
2135 """
2135 """
2136
2136
2137 def onfinished():
2137 def onfinished():
2138 """Called after all objects have been emitted from the command function.
2138 """Called after all objects have been emitted from the command function.
2139
2139
2140 Implementations should return an iterator of objects to forward to
2140 Implementations should return an iterator of objects to forward to
2141 the output layer.
2141 the output layer.
2142
2142
2143 This method can be a generator.
2143 This method can be a generator.
2144 """
2144 """
@@ -1,2798 +1,2798
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import annotations
8 from __future__ import annotations
9
9
10 import heapq
10 import heapq
11 import itertools
11 import itertools
12 import struct
12 import struct
13 import typing
13 import typing
14 import weakref
14 import weakref
15
15
16 from typing import (
16 from typing import (
17 ByteString,
17 ByteString,
18 Callable,
18 Callable,
19 Collection,
19 Collection,
20 Dict,
20 Dict,
21 Iterable,
21 Iterable,
22 Iterator,
22 Iterator,
23 List,
23 List,
24 Optional,
24 Optional,
25 Set,
25 Set,
26 Tuple,
26 Tuple,
27 Union,
27 Union,
28 cast,
28 cast,
29 )
29 )
30
30
31 from .i18n import _
31 from .i18n import _
32 from .node import (
32 from .node import (
33 bin,
33 bin,
34 hex,
34 hex,
35 nullrev,
35 nullrev,
36 )
36 )
37 from . import (
37 from . import (
38 encoding,
38 encoding,
39 error,
39 error,
40 match as matchmod,
40 match as matchmod,
41 mdiff,
41 mdiff,
42 pathutil,
42 pathutil,
43 policy,
43 policy,
44 pycompat,
44 pycompat,
45 revlog,
45 revlog,
46 util,
46 util,
47 )
47 )
48 from .interfaces import (
48 from .interfaces import (
49 repository,
49 repository,
50 util as interfaceutil,
50 util as interfaceutil,
51 )
51 )
52 from .revlogutils import (
52 from .revlogutils import (
53 constants as revlog_constants,
53 constants as revlog_constants,
54 )
54 )
55
55
56 parsers = policy.importmod('parsers')
56 parsers = policy.importmod('parsers')
57 propertycache = util.propertycache
57 propertycache = util.propertycache
58
58
59 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
59 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
60 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
60 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
61
61
62
62
63 def _parse(nodelen, data: bytes):
63 def _parse(nodelen, data: bytes):
64 # This method does a little bit of excessive-looking
64 # This method does a little bit of excessive-looking
65 # precondition checking. This is so that the behavior of this
65 # precondition checking. This is so that the behavior of this
66 # class exactly matches its C counterpart to try and help
66 # class exactly matches its C counterpart to try and help
67 # prevent surprise breakage for anyone that develops against
67 # prevent surprise breakage for anyone that develops against
68 # the pure version.
68 # the pure version.
69 if data and data[-1:] != b'\n':
69 if data and data[-1:] != b'\n':
70 raise ValueError(b'Manifest did not end in a newline.')
70 raise ValueError(b'Manifest did not end in a newline.')
71 prev = None
71 prev = None
72 for l in data.splitlines():
72 for l in data.splitlines():
73 if prev is not None and prev > l:
73 if prev is not None and prev > l:
74 raise ValueError(b'Manifest lines not in sorted order.')
74 raise ValueError(b'Manifest lines not in sorted order.')
75 prev = l
75 prev = l
76 f, n = l.split(b'\0')
76 f, n = l.split(b'\0')
77 nl = len(n)
77 nl = len(n)
78 flags = n[-1:]
78 flags = n[-1:]
79 if flags in _manifestflags:
79 if flags in _manifestflags:
80 n = n[:-1]
80 n = n[:-1]
81 nl -= 1
81 nl -= 1
82 else:
82 else:
83 flags = b''
83 flags = b''
84 if nl != 2 * nodelen:
84 if nl != 2 * nodelen:
85 raise ValueError(b'Invalid manifest line')
85 raise ValueError(b'Invalid manifest line')
86
86
87 yield f, bin(n), flags
87 yield f, bin(n), flags
88
88
89
89
90 def _text(it):
90 def _text(it):
91 files = []
91 files = []
92 lines = []
92 lines = []
93 for f, n, fl in it:
93 for f, n, fl in it:
94 files.append(f)
94 files.append(f)
95 # if this is changed to support newlines in filenames,
95 # if this is changed to support newlines in filenames,
96 # be sure to check the templates/ dir again (especially *-raw.tmpl)
96 # be sure to check the templates/ dir again (especially *-raw.tmpl)
97 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
97 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
98
98
99 _checkforbidden(files)
99 _checkforbidden(files)
100 return b''.join(lines)
100 return b''.join(lines)
101
101
102
102
103 class lazymanifestiter:
103 class lazymanifestiter:
104 def __init__(self, lm: '_LazyManifest') -> None:
104 def __init__(self, lm: '_LazyManifest') -> None:
105 self.pos = 0
105 self.pos = 0
106 self.lm = lm
106 self.lm = lm
107
107
108 def __iter__(self) -> 'lazymanifestiter':
108 def __iter__(self) -> 'lazymanifestiter':
109 return self
109 return self
110
110
111 def next(self) -> bytes:
111 def next(self) -> bytes:
112 try:
112 try:
113 data, pos = self.lm._get(self.pos)
113 data, pos = self.lm._get(self.pos)
114 except IndexError:
114 except IndexError:
115 raise StopIteration
115 raise StopIteration
116 if pos == -1:
116 if pos == -1:
117 assert isinstance(data, tuple)
117 assert isinstance(data, tuple)
118 self.pos += 1
118 self.pos += 1
119 return data[0]
119 return data[0]
120 assert isinstance(data, bytes)
120 assert isinstance(data, bytes)
121 self.pos += 1
121 self.pos += 1
122 zeropos = data.find(b'\x00', pos)
122 zeropos = data.find(b'\x00', pos)
123 return data[pos:zeropos]
123 return data[pos:zeropos]
124
124
125 __next__ = next
125 __next__ = next
126
126
127
127
128 class lazymanifestiterentries:
128 class lazymanifestiterentries:
129 def __init__(self, lm: '_LazyManifest') -> None:
129 def __init__(self, lm: '_LazyManifest') -> None:
130 self.lm = lm
130 self.lm = lm
131 self.pos = 0
131 self.pos = 0
132
132
133 def __iter__(self) -> 'lazymanifestiterentries':
133 def __iter__(self) -> 'lazymanifestiterentries':
134 return self
134 return self
135
135
136 def next(self) -> Tuple[bytes, bytes, bytes]:
136 def next(self) -> Tuple[bytes, bytes, bytes]:
137 try:
137 try:
138 data, pos = self.lm._get(self.pos)
138 data, pos = self.lm._get(self.pos)
139 except IndexError:
139 except IndexError:
140 raise StopIteration
140 raise StopIteration
141 if pos == -1:
141 if pos == -1:
142 assert isinstance(data, tuple)
142 assert isinstance(data, tuple)
143 self.pos += 1
143 self.pos += 1
144 return data
144 return data
145 assert isinstance(data, bytes)
145 assert isinstance(data, bytes)
146 zeropos = data.find(b'\x00', pos)
146 zeropos = data.find(b'\x00', pos)
147 nlpos = data.find(b'\n', pos)
147 nlpos = data.find(b'\n', pos)
148 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
148 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
149 raise error.StorageError(b'Invalid manifest line')
149 raise error.StorageError(b'Invalid manifest line')
150 flags = data[nlpos - 1 : nlpos]
150 flags = data[nlpos - 1 : nlpos]
151 if flags in _manifestflags:
151 if flags in _manifestflags:
152 hlen = nlpos - zeropos - 2
152 hlen = nlpos - zeropos - 2
153 else:
153 else:
154 hlen = nlpos - zeropos - 1
154 hlen = nlpos - zeropos - 1
155 flags = b''
155 flags = b''
156 if hlen != 2 * self.lm._nodelen:
156 if hlen != 2 * self.lm._nodelen:
157 raise error.StorageError(b'Invalid manifest line')
157 raise error.StorageError(b'Invalid manifest line')
158 hashval = unhexlify(
158 hashval = unhexlify(
159 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
159 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
160 )
160 )
161 self.pos += 1
161 self.pos += 1
162 return (data[pos:zeropos], hashval, flags)
162 return (data[pos:zeropos], hashval, flags)
163
163
164 __next__ = next
164 __next__ = next
165
165
166
166
167 def unhexlify(data: bytes, extra: int, pos, length: int):
167 def unhexlify(data: bytes, extra: int, pos, length: int):
168 s = bin(data[pos : pos + length])
168 s = bin(data[pos : pos + length])
169 if extra:
169 if extra:
170 s += bytes([extra & 0xFF])
170 s += bytes([extra & 0xFF])
171 return s
171 return s
172
172
173
173
174 def _cmp(a, b):
174 def _cmp(a, b):
175 return (a > b) - (a < b)
175 return (a > b) - (a < b)
176
176
177
177
178 _manifestflags = {b'', b'l', b't', b'x'}
178 _manifestflags = {b'', b'l', b't', b'x'}
179
179
180
180
181 class _LazyManifest:
181 class _LazyManifest:
182 """A pure python manifest backed by a byte string. It is supplimented with
182 """A pure python manifest backed by a byte string. It is supplimented with
183 internal lists as it is modified, until it is compacted back to a pure byte
183 internal lists as it is modified, until it is compacted back to a pure byte
184 string.
184 string.
185
185
186 ``data`` is the initial manifest data.
186 ``data`` is the initial manifest data.
187
187
188 ``positions`` is a list of offsets, one per manifest entry. Positive
188 ``positions`` is a list of offsets, one per manifest entry. Positive
189 values are offsets into ``data``, negative values are offsets into the
189 values are offsets into ``data``, negative values are offsets into the
190 ``extradata`` list. When an entry is removed, its entry is dropped from
190 ``extradata`` list. When an entry is removed, its entry is dropped from
191 ``positions``. The values are encoded such that when walking the list and
191 ``positions``. The values are encoded such that when walking the list and
192 indexing into ``data`` or ``extradata`` as appropriate, the entries are
192 indexing into ``data`` or ``extradata`` as appropriate, the entries are
193 sorted by filename.
193 sorted by filename.
194
194
195 ``extradata`` is a list of (key, hash, flags) for entries that were added or
195 ``extradata`` is a list of (key, hash, flags) for entries that were added or
196 modified since the manifest was created or compacted.
196 modified since the manifest was created or compacted.
197 """
197 """
198
198
199 def __init__(
199 def __init__(
200 self,
200 self,
201 nodelen: int,
201 nodelen: int,
202 data: bytes,
202 data: bytes,
203 positions=None,
203 positions=None,
204 extrainfo=None,
204 extrainfo=None,
205 extradata=None,
205 extradata=None,
206 hasremovals: bool = False,
206 hasremovals: bool = False,
207 ):
207 ):
208 self._nodelen = nodelen
208 self._nodelen = nodelen
209 if positions is None:
209 if positions is None:
210 self.positions = self.findlines(data)
210 self.positions = self.findlines(data)
211 self.extrainfo = [0] * len(self.positions)
211 self.extrainfo = [0] * len(self.positions)
212 self.data = data
212 self.data = data
213 self.extradata = []
213 self.extradata = []
214 self.hasremovals = False
214 self.hasremovals = False
215 else:
215 else:
216 self.positions = positions[:]
216 self.positions = positions[:]
217 self.extrainfo = extrainfo[:]
217 self.extrainfo = extrainfo[:]
218 self.extradata = extradata[:]
218 self.extradata = extradata[:]
219 self.data = data
219 self.data = data
220 self.hasremovals = hasremovals
220 self.hasremovals = hasremovals
221
221
222 def findlines(self, data: bytes) -> List[int]:
222 def findlines(self, data: bytes) -> List[int]:
223 if not data:
223 if not data:
224 return []
224 return []
225 pos = data.find(b"\n")
225 pos = data.find(b"\n")
226 if pos == -1 or data[-1:] != b'\n':
226 if pos == -1 or data[-1:] != b'\n':
227 raise ValueError(b"Manifest did not end in a newline.")
227 raise ValueError(b"Manifest did not end in a newline.")
228 positions = [0]
228 positions = [0]
229 prev = data[: data.find(b'\x00')]
229 prev = data[: data.find(b'\x00')]
230 while pos < len(data) - 1 and pos != -1:
230 while pos < len(data) - 1 and pos != -1:
231 positions.append(pos + 1)
231 positions.append(pos + 1)
232 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
232 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
233 if nexts < prev:
233 if nexts < prev:
234 raise ValueError(b"Manifest lines not in sorted order.")
234 raise ValueError(b"Manifest lines not in sorted order.")
235 prev = nexts
235 prev = nexts
236 pos = data.find(b"\n", pos + 1)
236 pos = data.find(b"\n", pos + 1)
237 return positions
237 return positions
238
238
239 def _get(
239 def _get(
240 self, index: int
240 self, index: int
241 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
241 ) -> Tuple[Union[bytes, Tuple[bytes, bytes, bytes]], int]:
242 # get the position encoded in pos:
242 # get the position encoded in pos:
243 # positive number is an index in 'data'
243 # positive number is an index in 'data'
244 # negative number is in extrapieces
244 # negative number is in extrapieces
245 pos = self.positions[index]
245 pos = self.positions[index]
246 if pos >= 0:
246 if pos >= 0:
247 return self.data, pos
247 return self.data, pos
248 return self.extradata[-pos - 1], -1
248 return self.extradata[-pos - 1], -1
249
249
250 def _getkey(self, pos) -> bytes:
250 def _getkey(self, pos) -> bytes:
251 if pos >= 0:
251 if pos >= 0:
252 return self.data[pos : self.data.find(b'\x00', pos + 1)]
252 return self.data[pos : self.data.find(b'\x00', pos + 1)]
253 return self.extradata[-pos - 1][0]
253 return self.extradata[-pos - 1][0]
254
254
255 def bsearch(self, key: bytes) -> int:
255 def bsearch(self, key: bytes) -> int:
256 first = 0
256 first = 0
257 last = len(self.positions) - 1
257 last = len(self.positions) - 1
258
258
259 while first <= last:
259 while first <= last:
260 midpoint = (first + last) // 2
260 midpoint = (first + last) // 2
261 nextpos = self.positions[midpoint]
261 nextpos = self.positions[midpoint]
262 candidate = self._getkey(nextpos)
262 candidate = self._getkey(nextpos)
263 r = _cmp(key, candidate)
263 r = _cmp(key, candidate)
264 if r == 0:
264 if r == 0:
265 return midpoint
265 return midpoint
266 else:
266 else:
267 if r < 0:
267 if r < 0:
268 last = midpoint - 1
268 last = midpoint - 1
269 else:
269 else:
270 first = midpoint + 1
270 first = midpoint + 1
271 return -1
271 return -1
272
272
273 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
273 def bsearch2(self, key: bytes) -> Tuple[int, bool]:
274 # same as the above, but will always return the position
274 # same as the above, but will always return the position
275 # done for performance reasons
275 # done for performance reasons
276 first = 0
276 first = 0
277 last = len(self.positions) - 1
277 last = len(self.positions) - 1
278
278
279 while first <= last:
279 while first <= last:
280 midpoint = (first + last) // 2
280 midpoint = (first + last) // 2
281 nextpos = self.positions[midpoint]
281 nextpos = self.positions[midpoint]
282 candidate = self._getkey(nextpos)
282 candidate = self._getkey(nextpos)
283 r = _cmp(key, candidate)
283 r = _cmp(key, candidate)
284 if r == 0:
284 if r == 0:
285 return (midpoint, True)
285 return (midpoint, True)
286 else:
286 else:
287 if r < 0:
287 if r < 0:
288 last = midpoint - 1
288 last = midpoint - 1
289 else:
289 else:
290 first = midpoint + 1
290 first = midpoint + 1
291 return (first, False)
291 return (first, False)
292
292
293 def __contains__(self, key: bytes) -> bool:
293 def __contains__(self, key: bytes) -> bool:
294 return self.bsearch(key) != -1
294 return self.bsearch(key) != -1
295
295
296 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
296 def __getitem__(self, key: bytes) -> Tuple[bytes, bytes]:
297 if not isinstance(key, bytes):
297 if not isinstance(key, bytes):
298 raise TypeError(b"getitem: manifest keys must be a bytes.")
298 raise TypeError(b"getitem: manifest keys must be a bytes.")
299 needle = self.bsearch(key)
299 needle = self.bsearch(key)
300 if needle == -1:
300 if needle == -1:
301 raise KeyError
301 raise KeyError
302 data, pos = self._get(needle)
302 data, pos = self._get(needle)
303 if pos == -1:
303 if pos == -1:
304 assert isinstance(data, tuple)
304 assert isinstance(data, tuple)
305 return (data[1], data[2])
305 return (data[1], data[2])
306
306
307 assert isinstance(data, bytes)
307 assert isinstance(data, bytes)
308 zeropos = data.find(b'\x00', pos)
308 zeropos = data.find(b'\x00', pos)
309 nlpos = data.find(b'\n', zeropos)
309 nlpos = data.find(b'\n', zeropos)
310 assert 0 <= needle <= len(self.positions)
310 assert 0 <= needle <= len(self.positions)
311 assert len(self.extrainfo) == len(self.positions)
311 assert len(self.extrainfo) == len(self.positions)
312 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
312 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
313 raise error.StorageError(b'Invalid manifest line')
313 raise error.StorageError(b'Invalid manifest line')
314 hlen = nlpos - zeropos - 1
314 hlen = nlpos - zeropos - 1
315 flags = data[nlpos - 1 : nlpos]
315 flags = data[nlpos - 1 : nlpos]
316 if flags in _manifestflags:
316 if flags in _manifestflags:
317 hlen -= 1
317 hlen -= 1
318 else:
318 else:
319 flags = b''
319 flags = b''
320 if hlen != 2 * self._nodelen:
320 if hlen != 2 * self._nodelen:
321 raise error.StorageError(b'Invalid manifest line')
321 raise error.StorageError(b'Invalid manifest line')
322 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
322 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
323 return (hashval, flags)
323 return (hashval, flags)
324
324
325 def __delitem__(self, key: bytes) -> None:
325 def __delitem__(self, key: bytes) -> None:
326 needle, found = self.bsearch2(key)
326 needle, found = self.bsearch2(key)
327 if not found:
327 if not found:
328 raise KeyError
328 raise KeyError
329 cur = self.positions[needle]
329 cur = self.positions[needle]
330 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
330 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
331 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
331 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
332 if cur >= 0:
332 if cur >= 0:
333 # This does NOT unsort the list as far as the search functions are
333 # This does NOT unsort the list as far as the search functions are
334 # concerned, as they only examine lines mapped by self.positions.
334 # concerned, as they only examine lines mapped by self.positions.
335 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
335 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
336 self.hasremovals = True
336 self.hasremovals = True
337
337
338 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
338 def __setitem__(self, key: bytes, value: Tuple[bytes, bytes]):
339 if not isinstance(key, bytes):
339 if not isinstance(key, bytes):
340 raise TypeError(b"setitem: manifest keys must be a byte string.")
340 raise TypeError(b"setitem: manifest keys must be a byte string.")
341 if not isinstance(value, tuple) or len(value) != 2:
341 if not isinstance(value, tuple) or len(value) != 2:
342 raise TypeError(
342 raise TypeError(
343 b"Manifest values must be a tuple of (node, flags)."
343 b"Manifest values must be a tuple of (node, flags)."
344 )
344 )
345 hashval = value[0]
345 hashval = value[0]
346 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
346 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
347 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
347 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
348 flags = value[1]
348 flags = value[1]
349 if not isinstance(flags, bytes) or len(flags) > 1:
349 if not isinstance(flags, bytes) or len(flags) > 1:
350 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
350 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
351 needle, found = self.bsearch2(key)
351 needle, found = self.bsearch2(key)
352 if found:
352 if found:
353 # put the item
353 # put the item
354 pos = self.positions[needle]
354 pos = self.positions[needle]
355 if pos < 0:
355 if pos < 0:
356 self.extradata[-pos - 1] = (key, hashval, value[1])
356 self.extradata[-pos - 1] = (key, hashval, value[1])
357 else:
357 else:
358 # just don't bother
358 # just don't bother
359 self.extradata.append((key, hashval, value[1]))
359 self.extradata.append((key, hashval, value[1]))
360 self.positions[needle] = -len(self.extradata)
360 self.positions[needle] = -len(self.extradata)
361 else:
361 else:
362 # not found, put it in with extra positions
362 # not found, put it in with extra positions
363 self.extradata.append((key, hashval, value[1]))
363 self.extradata.append((key, hashval, value[1]))
364 self.positions = (
364 self.positions = (
365 self.positions[:needle]
365 self.positions[:needle]
366 + [-len(self.extradata)]
366 + [-len(self.extradata)]
367 + self.positions[needle:]
367 + self.positions[needle:]
368 )
368 )
369 self.extrainfo = (
369 self.extrainfo = (
370 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
370 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
371 )
371 )
372
372
373 def copy(self) -> '_LazyManifest':
373 def copy(self) -> '_LazyManifest':
374 # XXX call _compact like in C?
374 # XXX call _compact like in C?
375 return _lazymanifest(
375 return _lazymanifest(
376 self._nodelen,
376 self._nodelen,
377 self.data,
377 self.data,
378 self.positions,
378 self.positions,
379 self.extrainfo,
379 self.extrainfo,
380 self.extradata,
380 self.extradata,
381 self.hasremovals,
381 self.hasremovals,
382 )
382 )
383
383
384 def _compact(self) -> None:
384 def _compact(self) -> None:
385 # hopefully not called TOO often
385 # hopefully not called TOO often
386 if len(self.extradata) == 0 and not self.hasremovals:
386 if len(self.extradata) == 0 and not self.hasremovals:
387 return
387 return
388 l = []
388 l = []
389 i = 0
389 i = 0
390 offset = 0
390 offset = 0
391 self.extrainfo = [0] * len(self.positions)
391 self.extrainfo = [0] * len(self.positions)
392 while i < len(self.positions):
392 while i < len(self.positions):
393 if self.positions[i] >= 0:
393 if self.positions[i] >= 0:
394 cur = self.positions[i]
394 cur = self.positions[i]
395 last_cut = cur
395 last_cut = cur
396
396
397 # Collect all contiguous entries in the buffer at the current
397 # Collect all contiguous entries in the buffer at the current
398 # offset, breaking out only for added/modified items held in
398 # offset, breaking out only for added/modified items held in
399 # extradata, or a deleted line prior to the next position.
399 # extradata, or a deleted line prior to the next position.
400 while True:
400 while True:
401 self.positions[i] = offset
401 self.positions[i] = offset
402 i += 1
402 i += 1
403 if i == len(self.positions) or self.positions[i] < 0:
403 if i == len(self.positions) or self.positions[i] < 0:
404 break
404 break
405
405
406 # A removed file has no positions[] entry, but does have an
406 # A removed file has no positions[] entry, but does have an
407 # overwritten first byte. Break out and find the end of the
407 # overwritten first byte. Break out and find the end of the
408 # current good entry/entries if there is a removed file
408 # current good entry/entries if there is a removed file
409 # before the next position.
409 # before the next position.
410 if (
410 if (
411 self.hasremovals
411 self.hasremovals
412 and self.data.find(b'\n\x00', cur, self.positions[i])
412 and self.data.find(b'\n\x00', cur, self.positions[i])
413 != -1
413 != -1
414 ):
414 ):
415 break
415 break
416
416
417 offset += self.positions[i] - cur
417 offset += self.positions[i] - cur
418 cur = self.positions[i]
418 cur = self.positions[i]
419 end_cut = self.data.find(b'\n', cur)
419 end_cut = self.data.find(b'\n', cur)
420 if end_cut != -1:
420 if end_cut != -1:
421 end_cut += 1
421 end_cut += 1
422 offset += end_cut - cur
422 offset += end_cut - cur
423 l.append(self.data[last_cut:end_cut])
423 l.append(self.data[last_cut:end_cut])
424 else:
424 else:
425 while i < len(self.positions) and self.positions[i] < 0:
425 while i < len(self.positions) and self.positions[i] < 0:
426 cur = self.positions[i]
426 cur = self.positions[i]
427 t = self.extradata[-cur - 1]
427 t = self.extradata[-cur - 1]
428 l.append(self._pack(t))
428 l.append(self._pack(t))
429 self.positions[i] = offset
429 self.positions[i] = offset
430 # Hashes are either 20 bytes (old sha1s) or 32
430 # Hashes are either 20 bytes (old sha1s) or 32
431 # bytes (new non-sha1).
431 # bytes (new non-sha1).
432 hlen = 20
432 hlen = 20
433 if len(t[1]) > 25:
433 if len(t[1]) > 25:
434 hlen = 32
434 hlen = 32
435 if len(t[1]) > hlen:
435 if len(t[1]) > hlen:
436 self.extrainfo[i] = ord(t[1][hlen + 1])
436 self.extrainfo[i] = ord(t[1][hlen + 1])
437 offset += len(l[-1])
437 offset += len(l[-1])
438 i += 1
438 i += 1
439 self.data = b''.join(l)
439 self.data = b''.join(l)
440 self.hasremovals = False
440 self.hasremovals = False
441 self.extradata = []
441 self.extradata = []
442
442
443 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
443 def _pack(self, d: Tuple[bytes, bytes, bytes]) -> bytes:
444 n = d[1]
444 n = d[1]
445 assert len(n) in (20, 32)
445 assert len(n) in (20, 32)
446 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
446 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
447
447
448 def text(self) -> ByteString:
448 def text(self) -> ByteString:
449 self._compact()
449 self._compact()
450 return self.data
450 return self.data
451
451
452 def diff(
452 def diff(
453 self, m2: '_LazyManifest', clean: bool = False
453 self, m2: '_LazyManifest', clean: bool = False
454 ) -> Dict[
454 ) -> Dict[
455 bytes,
455 bytes,
456 Optional[
456 Optional[
457 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
457 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
458 ],
458 ],
459 ]:
459 ]:
460 '''Finds changes between the current manifest and m2.'''
460 '''Finds changes between the current manifest and m2.'''
461 # XXX think whether efficiency matters here
461 # XXX think whether efficiency matters here
462 diff = {}
462 diff = {}
463
463
464 for fn, e1, flags in self.iterentries():
464 for fn, e1, flags in self.iterentries():
465 if fn not in m2:
465 if fn not in m2:
466 diff[fn] = (e1, flags), (None, b'')
466 diff[fn] = (e1, flags), (None, b'')
467 else:
467 else:
468 e2 = m2[fn]
468 e2 = m2[fn]
469 if (e1, flags) != e2:
469 if (e1, flags) != e2:
470 diff[fn] = (e1, flags), e2
470 diff[fn] = (e1, flags), e2
471 elif clean:
471 elif clean:
472 diff[fn] = None
472 diff[fn] = None
473
473
474 for fn, e2, flags in m2.iterentries():
474 for fn, e2, flags in m2.iterentries():
475 if fn not in self:
475 if fn not in self:
476 diff[fn] = (None, b''), (e2, flags)
476 diff[fn] = (None, b''), (e2, flags)
477
477
478 return diff
478 return diff
479
479
480 def iterentries(self) -> lazymanifestiterentries:
480 def iterentries(self) -> lazymanifestiterentries:
481 return lazymanifestiterentries(self)
481 return lazymanifestiterentries(self)
482
482
483 def iterkeys(self) -> lazymanifestiter:
483 def iterkeys(self) -> lazymanifestiter:
484 return lazymanifestiter(self)
484 return lazymanifestiter(self)
485
485
486 def __iter__(self) -> lazymanifestiter:
486 def __iter__(self) -> lazymanifestiter:
487 return lazymanifestiter(self)
487 return lazymanifestiter(self)
488
488
489 def __len__(self) -> int:
489 def __len__(self) -> int:
490 return len(self.positions)
490 return len(self.positions)
491
491
492 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
492 def filtercopy(self, filterfn: Callable[[bytes], bool]) -> '_LazyManifest':
493 # XXX should be optimized
493 # XXX should be optimized
494 c = _lazymanifest(self._nodelen, b'')
494 c = _lazymanifest(self._nodelen, b'')
495 for f, n, fl in self.iterentries():
495 for f, n, fl in self.iterentries():
496 if filterfn(f):
496 if filterfn(f):
497 c[f] = n, fl
497 c[f] = n, fl
498 return c
498 return c
499
499
500
500
501 try:
501 try:
502 _lazymanifest = parsers.lazymanifest
502 _lazymanifest = parsers.lazymanifest
503 except AttributeError:
503 except AttributeError:
504 _lazymanifest = _LazyManifest
504 _lazymanifest = _LazyManifest
505
505
506
506
507 class ManifestDict:
507 class ManifestDict:
508 def __init__(self, nodelen: int, data: ByteString = b''):
508 def __init__(self, nodelen: int, data: ByteString = b''):
509 self._nodelen = nodelen
509 self._nodelen = nodelen
510 self._lm = _lazymanifest(nodelen, data)
510 self._lm = _lazymanifest(nodelen, data)
511
511
512 def __getitem__(self, key: bytes) -> bytes:
512 def __getitem__(self, key: bytes) -> bytes:
513 return self._lm[key][0]
513 return self._lm[key][0]
514
514
515 def find(self, key: bytes) -> Tuple[bytes, bytes]:
515 def find(self, key: bytes) -> Tuple[bytes, bytes]:
516 return self._lm[key]
516 return self._lm[key]
517
517
518 def __len__(self) -> int:
518 def __len__(self) -> int:
519 return len(self._lm)
519 return len(self._lm)
520
520
521 def __nonzero__(self) -> bool:
521 def __nonzero__(self) -> bool:
522 # nonzero is covered by the __len__ function, but implementing it here
522 # nonzero is covered by the __len__ function, but implementing it here
523 # makes it easier for extensions to override.
523 # makes it easier for extensions to override.
524 return len(self._lm) != 0
524 return len(self._lm) != 0
525
525
526 __bool__ = __nonzero__
526 __bool__ = __nonzero__
527
527
528 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
528 def set(self, key: bytes, node: bytes, flags: bytes) -> None:
529 self._lm[key] = node, flags
529 self._lm[key] = node, flags
530
530
531 def __setitem__(self, key: bytes, node: bytes) -> None:
531 def __setitem__(self, key: bytes, node: bytes) -> None:
532 self._lm[key] = node, self.flags(key)
532 self._lm[key] = node, self.flags(key)
533
533
534 def __contains__(self, key: bytes) -> bool:
534 def __contains__(self, key: bytes) -> bool:
535 if key is None:
535 if key is None:
536 return False
536 return False
537 return key in self._lm
537 return key in self._lm
538
538
539 def __delitem__(self, key: bytes) -> None:
539 def __delitem__(self, key: bytes) -> None:
540 del self._lm[key]
540 del self._lm[key]
541
541
542 def __iter__(self) -> Iterator[bytes]:
542 def __iter__(self) -> Iterator[bytes]:
543 return self._lm.__iter__()
543 return self._lm.__iter__()
544
544
545 def iterkeys(self) -> Iterator[bytes]:
545 def iterkeys(self) -> Iterator[bytes]:
546 return self._lm.iterkeys()
546 return self._lm.iterkeys()
547
547
548 def keys(self) -> List[bytes]:
548 def keys(self) -> List[bytes]:
549 return list(self.iterkeys())
549 return list(self.iterkeys())
550
550
551 def filesnotin(self, m2, match=None) -> Set[bytes]:
551 def filesnotin(self, m2, match=None) -> Set[bytes]:
552 '''Set of files in this manifest that are not in the other'''
552 '''Set of files in this manifest that are not in the other'''
553 if match is not None:
553 if match is not None:
554 match = matchmod.badmatch(match, lambda path, msg: None)
554 match = matchmod.badmatch(match, lambda path, msg: None)
555 sm2 = set(m2.walk(match))
555 sm2 = set(m2.walk(match))
556 return {f for f in self.walk(match) if f not in sm2}
556 return {f for f in self.walk(match) if f not in sm2}
557 return {f for f in self if f not in m2}
557 return {f for f in self if f not in m2}
558
558
559 @propertycache
559 @propertycache
560 def _dirs(self) -> pathutil.dirs:
560 def _dirs(self) -> pathutil.dirs:
561 return pathutil.dirs(self)
561 return pathutil.dirs(self)
562
562
563 def dirs(self) -> pathutil.dirs:
563 def dirs(self) -> pathutil.dirs:
564 return self._dirs
564 return self._dirs
565
565
566 def hasdir(self, dir: bytes) -> bool:
566 def hasdir(self, dir: bytes) -> bool:
567 return dir in self._dirs
567 return dir in self._dirs
568
568
569 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
569 def _filesfastpath(self, match: matchmod.basematcher) -> bool:
570 """Checks whether we can correctly and quickly iterate over matcher
570 """Checks whether we can correctly and quickly iterate over matcher
571 files instead of over manifest files."""
571 files instead of over manifest files."""
572 files = match.files()
572 files = match.files()
573 return len(files) < 100 and (
573 return len(files) < 100 and (
574 match.isexact()
574 match.isexact()
575 or (match.prefix() and all(fn in self for fn in files))
575 or (match.prefix() and all(fn in self for fn in files))
576 )
576 )
577
577
578 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
578 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
579 """Generates matching file names.
579 """Generates matching file names.
580
580
581 Equivalent to manifest.matches(match).iterkeys(), but without creating
581 Equivalent to manifest.matches(match).iterkeys(), but without creating
582 an entirely new manifest.
582 an entirely new manifest.
583
583
584 It also reports nonexistent files by marking them bad with match.bad().
584 It also reports nonexistent files by marking them bad with match.bad().
585 """
585 """
586 if match.always():
586 if match.always():
587 for f in iter(self):
587 for f in iter(self):
588 yield f
588 yield f
589 return
589 return
590
590
591 fset = set(match.files())
591 fset = set(match.files())
592
592
593 # avoid the entire walk if we're only looking for specific files
593 # avoid the entire walk if we're only looking for specific files
594 if self._filesfastpath(match):
594 if self._filesfastpath(match):
595 for fn in sorted(fset):
595 for fn in sorted(fset):
596 if fn in self:
596 if fn in self:
597 yield fn
597 yield fn
598 return
598 return
599
599
600 for fn in self:
600 for fn in self:
601 if fn in fset:
601 if fn in fset:
602 # specified pattern is the exact name
602 # specified pattern is the exact name
603 fset.remove(fn)
603 fset.remove(fn)
604 if match(fn):
604 if match(fn):
605 yield fn
605 yield fn
606
606
607 # for dirstate.walk, files=[''] means "walk the whole tree".
607 # for dirstate.walk, files=[''] means "walk the whole tree".
608 # follow that here, too
608 # follow that here, too
609 fset.discard(b'')
609 fset.discard(b'')
610
610
611 for fn in sorted(fset):
611 for fn in sorted(fset):
612 if not self.hasdir(fn):
612 if not self.hasdir(fn):
613 match.bad(fn, None)
613 match.bad(fn, None)
614
614
615 def _matches(self, match: matchmod.basematcher) -> 'ManifestDict':
615 def _matches(self, match: matchmod.basematcher) -> 'ManifestDict':
616 '''generate a new manifest filtered by the match argument'''
616 '''generate a new manifest filtered by the match argument'''
617 if match.always():
617 if match.always():
618 return self.copy()
618 return self.copy()
619
619
620 if self._filesfastpath(match):
620 if self._filesfastpath(match):
621 m = manifestdict(self._nodelen)
621 m = manifestdict(self._nodelen)
622 lm = self._lm
622 lm = self._lm
623 for fn in match.files():
623 for fn in match.files():
624 if fn in lm:
624 if fn in lm:
625 m._lm[fn] = lm[fn]
625 m._lm[fn] = lm[fn]
626 return m
626 return m
627
627
628 m = manifestdict(self._nodelen)
628 m = manifestdict(self._nodelen)
629 m._lm = self._lm.filtercopy(match)
629 m._lm = self._lm.filtercopy(match)
630 return m
630 return m
631
631
632 def diff(
632 def diff(
633 self,
633 self,
634 m2: 'ManifestDict',
634 m2: 'ManifestDict',
635 match: Optional[matchmod.basematcher] = None,
635 match: Optional[matchmod.basematcher] = None,
636 clean: bool = False,
636 clean: bool = False,
637 ) -> Dict[
637 ) -> Dict[
638 bytes,
638 bytes,
639 Optional[
639 Optional[
640 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
640 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
641 ],
641 ],
642 ]:
642 ]:
643 """Finds changes between the current manifest and m2.
643 """Finds changes between the current manifest and m2.
644
644
645 Args:
645 Args:
646 m2: the manifest to which this manifest should be compared.
646 m2: the manifest to which this manifest should be compared.
647 clean: if true, include files unchanged between these manifests
647 clean: if true, include files unchanged between these manifests
648 with a None value in the returned dictionary.
648 with a None value in the returned dictionary.
649
649
650 The result is returned as a dict with filename as key and
650 The result is returned as a dict with filename as key and
651 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
651 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
652 nodeid in the current/other manifest and fl1/fl2 is the flag
652 nodeid in the current/other manifest and fl1/fl2 is the flag
653 in the current/other manifest. Where the file does not exist,
653 in the current/other manifest. Where the file does not exist,
654 the nodeid will be None and the flags will be the empty
654 the nodeid will be None and the flags will be the empty
655 string.
655 string.
656 """
656 """
657 if match:
657 if match:
658 m1 = self._matches(match)
658 m1 = self._matches(match)
659 m2 = m2._matches(match)
659 m2 = m2._matches(match)
660 return m1.diff(m2, clean=clean)
660 return m1.diff(m2, clean=clean)
661 return self._lm.diff(m2._lm, clean)
661 return self._lm.diff(m2._lm, clean)
662
662
663 def setflag(self, key: bytes, flag: bytes) -> None:
663 def setflag(self, key: bytes, flag: bytes) -> None:
664 if flag not in _manifestflags:
664 if flag not in _manifestflags:
665 raise TypeError(b"Invalid manifest flag set.")
665 raise TypeError(b"Invalid manifest flag set.")
666 self._lm[key] = self[key], flag
666 self._lm[key] = self[key], flag
667
667
668 def get(self, key: bytes, default=None) -> Optional[bytes]:
668 def get(self, key: bytes, default=None) -> Optional[bytes]:
669 try:
669 try:
670 return self._lm[key][0]
670 return self._lm[key][0]
671 except KeyError:
671 except KeyError:
672 return default
672 return default
673
673
674 def flags(self, key: bytes) -> bytes:
674 def flags(self, key: bytes) -> bytes:
675 try:
675 try:
676 return self._lm[key][1]
676 return self._lm[key][1]
677 except KeyError:
677 except KeyError:
678 return b''
678 return b''
679
679
680 def copy(self) -> 'ManifestDict':
680 def copy(self) -> 'ManifestDict':
681 c = manifestdict(self._nodelen)
681 c = manifestdict(self._nodelen)
682 c._lm = self._lm.copy()
682 c._lm = self._lm.copy()
683 return c
683 return c
684
684
685 def items(self) -> Iterator[Tuple[bytes, bytes]]:
685 def items(self) -> Iterator[Tuple[bytes, bytes]]:
686 return (x[:2] for x in self._lm.iterentries())
686 return (x[:2] for x in self._lm.iterentries())
687
687
688 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
688 def iteritems(self) -> Iterator[Tuple[bytes, bytes]]:
689 return (x[:2] for x in self._lm.iterentries())
689 return (x[:2] for x in self._lm.iterentries())
690
690
691 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
691 def iterentries(self) -> Iterator[Tuple[bytes, bytes, bytes]]:
692 return self._lm.iterentries()
692 return self._lm.iterentries()
693
693
694 def text(self) -> ByteString:
694 def text(self) -> ByteString:
695 # most likely uses native version
695 # most likely uses native version
696 return self._lm.text()
696 return self._lm.text()
697
697
698 def fastdelta(
698 def fastdelta(
699 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
699 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
700 ) -> Tuple[ByteString, ByteString]:
700 ) -> Tuple[ByteString, ByteString]:
701 """Given a base manifest text as a bytearray and a list of changes
701 """Given a base manifest text as a bytearray and a list of changes
702 relative to that text, compute a delta that can be used by revlog.
702 relative to that text, compute a delta that can be used by revlog.
703 """
703 """
704 delta = []
704 delta = []
705 dstart = None
705 dstart = None
706 dend = None
706 dend = None
707 dline = [b""]
707 dline = [b""]
708 start = 0
708 start = 0
709 # zero copy representation of base as a buffer
709 # zero copy representation of base as a buffer
710 addbuf = util.buffer(base)
710 addbuf = util.buffer(base)
711
711
712 changes = list(changes)
712 changes = list(changes)
713 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
713 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
714 # start with a readonly loop that finds the offset of
714 # start with a readonly loop that finds the offset of
715 # each line and creates the deltas
715 # each line and creates the deltas
716 for f, todelete in changes:
716 for f, todelete in changes:
717 # bs will either be the index of the item or the insert point
717 # bs will either be the index of the item or the insert point
718 start, end = _msearch(addbuf, f, start)
718 start, end = _msearch(addbuf, f, start)
719 if not todelete:
719 if not todelete:
720 h, fl = self._lm[f]
720 h, fl = self._lm[f]
721 l = b"%s\0%s%s\n" % (f, hex(h), fl)
721 l = b"%s\0%s%s\n" % (f, hex(h), fl)
722 else:
722 else:
723 if start == end:
723 if start == end:
724 # item we want to delete was not found, error out
724 # item we want to delete was not found, error out
725 raise AssertionError(
725 raise AssertionError(
726 _(b"failed to remove %s from manifest") % f
726 _(b"failed to remove %s from manifest") % f
727 )
727 )
728 l = b""
728 l = b""
729 if dstart is not None and dstart <= start and dend >= start:
729 if dstart is not None and dstart <= start and dend >= start:
730 if dend < end:
730 if dend < end:
731 dend = end
731 dend = end
732 if l:
732 if l:
733 dline.append(l)
733 dline.append(l)
734 else:
734 else:
735 if dstart is not None:
735 if dstart is not None:
736 delta.append((dstart, dend, b"".join(dline)))
736 delta.append((dstart, dend, b"".join(dline)))
737 dstart = start
737 dstart = start
738 dend = end
738 dend = end
739 dline = [l]
739 dline = [l]
740
740
741 if dstart is not None:
741 if dstart is not None:
742 delta.append((dstart, dend, b"".join(dline)))
742 delta.append((dstart, dend, b"".join(dline)))
743 # apply the delta to the base, and get a delta for addrevision
743 # apply the delta to the base, and get a delta for addrevision
744 deltatext, arraytext = _addlistdelta(base, delta)
744 deltatext, arraytext = _addlistdelta(base, delta)
745 else:
745 else:
746 # For large changes, it's much cheaper to just build the text and
746 # For large changes, it's much cheaper to just build the text and
747 # diff it.
747 # diff it.
748 arraytext = bytearray(self.text())
748 arraytext = bytearray(self.text())
749 deltatext = mdiff.textdiff(
749 deltatext = mdiff.textdiff(
750 util.buffer(base), util.buffer(arraytext)
750 util.buffer(base), util.buffer(arraytext)
751 )
751 )
752
752
753 return arraytext, deltatext
753 return arraytext, deltatext
754
754
755
755
756 manifestdict = interfaceutil.implementer(repository.imanifestdict)(ManifestDict)
756 manifestdict = interfaceutil.implementer(repository.imanifestdict)(ManifestDict)
757
757
758 if typing.TYPE_CHECKING:
758 if typing.TYPE_CHECKING:
759 manifestdict = ManifestDict
759 manifestdict = ManifestDict
760
760
761
761
762 def _msearch(
762 def _msearch(
763 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
763 m: ByteString, s: bytes, lo: int = 0, hi: Optional[int] = None
764 ) -> Tuple[int, int]:
764 ) -> Tuple[int, int]:
765 """return a tuple (start, end) that says where to find s within m.
765 """return a tuple (start, end) that says where to find s within m.
766
766
767 If the string is found m[start:end] are the line containing
767 If the string is found m[start:end] are the line containing
768 that string. If start == end the string was not found and
768 that string. If start == end the string was not found and
769 they indicate the proper sorted insertion point.
769 they indicate the proper sorted insertion point.
770 """
770 """
771
771
772 def advance(i: int, c: bytes):
772 def advance(i: int, c: bytes):
773 while i < lenm and m[i : i + 1] != c:
773 while i < lenm and m[i : i + 1] != c:
774 i += 1
774 i += 1
775 return i
775 return i
776
776
777 if not s:
777 if not s:
778 return (lo, lo)
778 return (lo, lo)
779 lenm = len(m)
779 lenm = len(m)
780 if not hi:
780 if not hi:
781 hi = lenm
781 hi = lenm
782 while lo < hi:
782 while lo < hi:
783 mid = (lo + hi) // 2
783 mid = (lo + hi) // 2
784 start = mid
784 start = mid
785 while start > 0 and m[start - 1 : start] != b'\n':
785 while start > 0 and m[start - 1 : start] != b'\n':
786 start -= 1
786 start -= 1
787 end = advance(start, b'\0')
787 end = advance(start, b'\0')
788 if bytes(m[start:end]) < s:
788 if bytes(m[start:end]) < s:
789 # we know that after the null there are 40 bytes of sha1
789 # we know that after the null there are 40 bytes of sha1
790 # this translates to the bisect lo = mid + 1
790 # this translates to the bisect lo = mid + 1
791 lo = advance(end + 40, b'\n') + 1
791 lo = advance(end + 40, b'\n') + 1
792 else:
792 else:
793 # this translates to the bisect hi = mid
793 # this translates to the bisect hi = mid
794 hi = start
794 hi = start
795 end = advance(lo, b'\0')
795 end = advance(lo, b'\0')
796 found = m[lo:end]
796 found = m[lo:end]
797 if s == found:
797 if s == found:
798 # we know that after the null there are 40 bytes of sha1
798 # we know that after the null there are 40 bytes of sha1
799 end = advance(end + 40, b'\n')
799 end = advance(end + 40, b'\n')
800 return (lo, end + 1)
800 return (lo, end + 1)
801 else:
801 else:
802 return (lo, lo)
802 return (lo, lo)
803
803
804
804
805 def _checkforbidden(l: Iterable[bytes]) -> None:
805 def _checkforbidden(l: Iterable[bytes]) -> None:
806 """Check filenames for illegal characters."""
806 """Check filenames for illegal characters."""
807 for f in l:
807 for f in l:
808 if b'\n' in f or b'\r' in f:
808 if b'\n' in f or b'\r' in f:
809 raise error.StorageError(
809 raise error.StorageError(
810 _(b"'\\n' and '\\r' disallowed in filenames: %r")
810 _(b"'\\n' and '\\r' disallowed in filenames: %r")
811 % pycompat.bytestr(f)
811 % pycompat.bytestr(f)
812 )
812 )
813
813
814
814
815 # apply the changes collected during the bisect loop to our addlist
815 # apply the changes collected during the bisect loop to our addlist
816 # return a delta suitable for addrevision
816 # return a delta suitable for addrevision
817 def _addlistdelta(
817 def _addlistdelta(
818 addlist: ByteString,
818 addlist: ByteString,
819 x: Iterable[Tuple[int, int, bytes]],
819 x: Iterable[Tuple[int, int, bytes]],
820 ) -> Tuple[bytes, ByteString]:
820 ) -> Tuple[bytes, ByteString]:
821 # for large addlist arrays, building a new array is cheaper
821 # for large addlist arrays, building a new array is cheaper
822 # than repeatedly modifying the existing one
822 # than repeatedly modifying the existing one
823 currentposition = 0
823 currentposition = 0
824 newaddlist = bytearray()
824 newaddlist = bytearray()
825
825
826 for start, end, content in x:
826 for start, end, content in x:
827 newaddlist += addlist[currentposition:start]
827 newaddlist += addlist[currentposition:start]
828 if content:
828 if content:
829 newaddlist += bytearray(content)
829 newaddlist += bytearray(content)
830
830
831 currentposition = end
831 currentposition = end
832
832
833 newaddlist += addlist[currentposition:]
833 newaddlist += addlist[currentposition:]
834
834
835 deltatext = b"".join(
835 deltatext = b"".join(
836 struct.pack(b">lll", start, end, len(content)) + content
836 struct.pack(b">lll", start, end, len(content)) + content
837 for start, end, content in x
837 for start, end, content in x
838 )
838 )
839 return deltatext, newaddlist
839 return deltatext, newaddlist
840
840
841
841
842 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
842 def _splittopdir(f: bytes) -> Tuple[bytes, bytes]:
843 if b'/' in f:
843 if b'/' in f:
844 dir, subpath = f.split(b'/', 1)
844 dir, subpath = f.split(b'/', 1)
845 return dir + b'/', subpath
845 return dir + b'/', subpath
846 else:
846 else:
847 return b'', f
847 return b'', f
848
848
849
849
850 _noop = lambda s: None
850 _noop = lambda s: None
851
851
852
852
853 class TreeManifest:
853 class TreeManifest:
854 _dir: bytes
854 _dir: bytes
855 _dirs: Dict[bytes, 'TreeManifest']
855 _dirs: Dict[bytes, 'TreeManifest']
856 _dirty: bool
856 _dirty: bool
857 _files: Dict[bytes, bytes]
857 _files: Dict[bytes, bytes]
858 _flags: Dict[bytes, bytes]
858 _flags: Dict[bytes, bytes]
859
859
860 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
860 def __init__(self, nodeconstants, dir: bytes = b'', text: bytes = b''):
861 self._dir = dir
861 self._dir = dir
862 self.nodeconstants = nodeconstants
862 self.nodeconstants = nodeconstants
863 self._node = self.nodeconstants.nullid
863 self._node = self.nodeconstants.nullid
864 self._nodelen = self.nodeconstants.nodelen
864 self._nodelen = self.nodeconstants.nodelen
865 self._loadfunc = _noop
865 self._loadfunc = _noop
866 self._copyfunc = _noop
866 self._copyfunc = _noop
867 self._dirty = False
867 self._dirty = False
868 self._dirs = {}
868 self._dirs = {}
869 self._lazydirs: Dict[
869 self._lazydirs: Dict[
870 bytes,
870 bytes,
871 Tuple[bytes, Callable[[bytes, bytes], 'TreeManifest'], bool],
871 Tuple[bytes, Callable[[bytes, bytes], 'TreeManifest'], bool],
872 ] = {}
872 ] = {}
873 # Using _lazymanifest here is a little slower than plain old dicts
873 # Using _lazymanifest here is a little slower than plain old dicts
874 self._files = {}
874 self._files = {}
875 self._flags = {}
875 self._flags = {}
876 if text:
876 if text:
877
877
878 def readsubtree(subdir, subm):
878 def readsubtree(subdir, subm):
879 raise AssertionError(
879 raise AssertionError(
880 b'treemanifest constructor only accepts flat manifests'
880 b'treemanifest constructor only accepts flat manifests'
881 )
881 )
882
882
883 self.parse(text, readsubtree)
883 self.parse(text, readsubtree)
884 self._dirty = True # Mark flat manifest dirty after parsing
884 self._dirty = True # Mark flat manifest dirty after parsing
885
885
886 def _subpath(self, path: bytes) -> bytes:
886 def _subpath(self, path: bytes) -> bytes:
887 return self._dir + path
887 return self._dir + path
888
888
889 def _loadalllazy(self) -> None:
889 def _loadalllazy(self) -> None:
890 selfdirs = self._dirs
890 selfdirs = self._dirs
891 subpath = self._subpath
891 subpath = self._subpath
892 for d, (node, readsubtree, docopy) in self._lazydirs.items():
892 for d, (node, readsubtree, docopy) in self._lazydirs.items():
893 if docopy:
893 if docopy:
894 selfdirs[d] = readsubtree(subpath(d), node).copy()
894 selfdirs[d] = readsubtree(subpath(d), node).copy()
895 else:
895 else:
896 selfdirs[d] = readsubtree(subpath(d), node)
896 selfdirs[d] = readsubtree(subpath(d), node)
897 self._lazydirs.clear()
897 self._lazydirs.clear()
898
898
899 def _loadlazy(self, d: bytes) -> None:
899 def _loadlazy(self, d: bytes) -> None:
900 v = self._lazydirs.get(d)
900 v = self._lazydirs.get(d)
901 if v is not None:
901 if v is not None:
902 node, readsubtree, docopy = v
902 node, readsubtree, docopy = v
903 if docopy:
903 if docopy:
904 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
904 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
905 else:
905 else:
906 self._dirs[d] = readsubtree(self._subpath(d), node)
906 self._dirs[d] = readsubtree(self._subpath(d), node)
907 del self._lazydirs[d]
907 del self._lazydirs[d]
908
908
909 def _loadchildrensetlazy(
909 def _loadchildrensetlazy(
910 self, visit: Union[Set[bytes], bytes]
910 self, visit: Union[Set[bytes], bytes]
911 ) -> Optional[Set[bytes]]:
911 ) -> Optional[Set[bytes]]:
912 if not visit:
912 if not visit:
913 return None
913 return None
914 if visit == b'all' or visit == b'this':
914 if visit == b'all' or visit == b'this':
915 self._loadalllazy()
915 self._loadalllazy()
916 return None
916 return None
917
917
918 visit = cast(Set[bytes], visit)
918 visit = cast(Set[bytes], visit)
919
919
920 loadlazy = self._loadlazy
920 loadlazy = self._loadlazy
921 for k in visit:
921 for k in visit:
922 loadlazy(k + b'/')
922 loadlazy(k + b'/')
923 return visit
923 return visit
924
924
925 def _loaddifflazy(self, t1: 'TreeManifest', t2: 'TreeManifest'):
925 def _loaddifflazy(self, t1: 'TreeManifest', t2: 'TreeManifest'):
926 """load items in t1 and t2 if they're needed for diffing.
926 """load items in t1 and t2 if they're needed for diffing.
927
927
928 The criteria currently is:
928 The criteria currently is:
929 - if it's not present in _lazydirs in either t1 or t2, load it in the
929 - if it's not present in _lazydirs in either t1 or t2, load it in the
930 other (it may already be loaded or it may not exist, doesn't matter)
930 other (it may already be loaded or it may not exist, doesn't matter)
931 - if it's present in _lazydirs in both, compare the nodeid; if it
931 - if it's present in _lazydirs in both, compare the nodeid; if it
932 differs, load it in both
932 differs, load it in both
933 """
933 """
934 toloadlazy = []
934 toloadlazy = []
935 for d, v1 in t1._lazydirs.items():
935 for d, v1 in t1._lazydirs.items():
936 v2 = t2._lazydirs.get(d)
936 v2 = t2._lazydirs.get(d)
937 if v2 is None or v2[0] != v1[0]:
937 if v2 is None or v2[0] != v1[0]:
938 toloadlazy.append(d)
938 toloadlazy.append(d)
939 for d, v1 in t2._lazydirs.items():
939 for d, v1 in t2._lazydirs.items():
940 if d not in t1._lazydirs:
940 if d not in t1._lazydirs:
941 toloadlazy.append(d)
941 toloadlazy.append(d)
942
942
943 for d in toloadlazy:
943 for d in toloadlazy:
944 t1._loadlazy(d)
944 t1._loadlazy(d)
945 t2._loadlazy(d)
945 t2._loadlazy(d)
946
946
947 def __len__(self) -> int:
947 def __len__(self) -> int:
948 self._load()
948 self._load()
949 size = len(self._files)
949 size = len(self._files)
950 self._loadalllazy()
950 self._loadalllazy()
951 for m in self._dirs.values():
951 for m in self._dirs.values():
952 size += m.__len__()
952 size += m.__len__()
953 return size
953 return size
954
954
955 def __nonzero__(self) -> bool:
955 def __nonzero__(self) -> bool:
956 # Faster than "__len__() != 0" since it avoids loading sub-manifests
956 # Faster than "__len__() != 0" since it avoids loading sub-manifests
957 return not self._isempty()
957 return not self._isempty()
958
958
959 __bool__ = __nonzero__
959 __bool__ = __nonzero__
960
960
961 def _isempty(self) -> bool:
961 def _isempty(self) -> bool:
962 self._load() # for consistency; already loaded by all callers
962 self._load() # for consistency; already loaded by all callers
963 # See if we can skip loading everything.
963 # See if we can skip loading everything.
964 if self._files or (
964 if self._files or (
965 self._dirs and any(not m._isempty() for m in self._dirs.values())
965 self._dirs and any(not m._isempty() for m in self._dirs.values())
966 ):
966 ):
967 return False
967 return False
968 self._loadalllazy()
968 self._loadalllazy()
969 return not self._dirs or all(m._isempty() for m in self._dirs.values())
969 return not self._dirs or all(m._isempty() for m in self._dirs.values())
970
970
971 @encoding.strmethod
971 @encoding.strmethod
972 def __repr__(self) -> bytes:
972 def __repr__(self) -> bytes:
973 return (
973 return (
974 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
974 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
975 % (
975 % (
976 self._dir,
976 self._dir,
977 hex(self._node),
977 hex(self._node),
978 bool(self._loadfunc is _noop),
978 bool(self._loadfunc is _noop),
979 self._dirty,
979 self._dirty,
980 id(self),
980 id(self),
981 )
981 )
982 )
982 )
983
983
984 def dir(self) -> bytes:
984 def dir(self) -> bytes:
985 """The directory that this tree manifest represents, including a
985 """The directory that this tree manifest represents, including a
986 trailing '/'. Empty string for the repo root directory."""
986 trailing '/'. Empty string for the repo root directory."""
987 return self._dir
987 return self._dir
988
988
989 def node(self) -> bytes:
989 def node(self) -> bytes:
990 """This node of this instance. nullid for unsaved instances. Should
990 """This node of this instance. nullid for unsaved instances. Should
991 be updated when the instance is read or written from a revlog.
991 be updated when the instance is read or written from a revlog.
992 """
992 """
993 assert not self._dirty
993 assert not self._dirty
994 return self._node
994 return self._node
995
995
996 def setnode(self, node: bytes) -> None:
996 def setnode(self, node: bytes) -> None:
997 self._node = node
997 self._node = node
998 self._dirty = False
998 self._dirty = False
999
999
1000 def iterentries(
1000 def iterentries(
1001 self,
1001 self,
1002 ) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest'], bytes]]:
1002 ) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest'], bytes]]:
1003 self._load()
1003 self._load()
1004 self._loadalllazy()
1004 self._loadalllazy()
1005 for p, n in sorted(
1005 for p, n in sorted(
1006 itertools.chain(self._dirs.items(), self._files.items())
1006 itertools.chain(self._dirs.items(), self._files.items())
1007 ):
1007 ):
1008 if p in self._files:
1008 if p in self._files:
1009 yield self._subpath(p), n, self._flags.get(p, b'')
1009 yield self._subpath(p), n, self._flags.get(p, b'')
1010 else:
1010 else:
1011 for x in n.iterentries():
1011 for x in n.iterentries():
1012 yield x
1012 yield x
1013
1013
1014 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest']]]:
1014 def items(self) -> Iterator[Tuple[bytes, Union[bytes, 'TreeManifest']]]:
1015 self._load()
1015 self._load()
1016 self._loadalllazy()
1016 self._loadalllazy()
1017 for p, n in sorted(
1017 for p, n in sorted(
1018 itertools.chain(self._dirs.items(), self._files.items())
1018 itertools.chain(self._dirs.items(), self._files.items())
1019 ):
1019 ):
1020 if p in self._files:
1020 if p in self._files:
1021 yield self._subpath(p), n
1021 yield self._subpath(p), n
1022 else:
1022 else:
1023 for f, sn in n.items():
1023 for f, sn in n.items():
1024 yield f, sn
1024 yield f, sn
1025
1025
1026 iteritems = items
1026 iteritems = items
1027
1027
1028 def iterkeys(self) -> Iterator[bytes]:
1028 def iterkeys(self) -> Iterator[bytes]:
1029 self._load()
1029 self._load()
1030 self._loadalllazy()
1030 self._loadalllazy()
1031 for p in sorted(itertools.chain(self._dirs, self._files)):
1031 for p in sorted(itertools.chain(self._dirs, self._files)):
1032 if p in self._files:
1032 if p in self._files:
1033 yield self._subpath(p)
1033 yield self._subpath(p)
1034 else:
1034 else:
1035 for f in self._dirs[p]:
1035 for f in self._dirs[p]:
1036 yield f
1036 yield f
1037
1037
1038 def keys(self) -> List[bytes]:
1038 def keys(self) -> List[bytes]:
1039 return list(self.iterkeys())
1039 return list(self.iterkeys())
1040
1040
1041 def __iter__(self) -> Iterator[bytes]:
1041 def __iter__(self) -> Iterator[bytes]:
1042 return self.iterkeys()
1042 return self.iterkeys()
1043
1043
1044 def __contains__(self, f: bytes) -> bool:
1044 def __contains__(self, f: bytes) -> bool:
1045 if f is None:
1045 if f is None:
1046 return False
1046 return False
1047 self._load()
1047 self._load()
1048 dir, subpath = _splittopdir(f)
1048 dir, subpath = _splittopdir(f)
1049 if dir:
1049 if dir:
1050 self._loadlazy(dir)
1050 self._loadlazy(dir)
1051
1051
1052 if dir not in self._dirs:
1052 if dir not in self._dirs:
1053 return False
1053 return False
1054
1054
1055 return self._dirs[dir].__contains__(subpath)
1055 return self._dirs[dir].__contains__(subpath)
1056 else:
1056 else:
1057 return f in self._files
1057 return f in self._files
1058
1058
1059 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1059 def get(self, f: bytes, default: Optional[bytes] = None) -> Optional[bytes]:
1060 self._load()
1060 self._load()
1061 dir, subpath = _splittopdir(f)
1061 dir, subpath = _splittopdir(f)
1062 if dir:
1062 if dir:
1063 self._loadlazy(dir)
1063 self._loadlazy(dir)
1064
1064
1065 if dir not in self._dirs:
1065 if dir not in self._dirs:
1066 return default
1066 return default
1067 return self._dirs[dir].get(subpath, default)
1067 return self._dirs[dir].get(subpath, default)
1068 else:
1068 else:
1069 return self._files.get(f, default)
1069 return self._files.get(f, default)
1070
1070
1071 def __getitem__(self, f: bytes) -> bytes:
1071 def __getitem__(self, f: bytes) -> bytes:
1072 self._load()
1072 self._load()
1073 dir, subpath = _splittopdir(f)
1073 dir, subpath = _splittopdir(f)
1074 if dir:
1074 if dir:
1075 self._loadlazy(dir)
1075 self._loadlazy(dir)
1076
1076
1077 return self._dirs[dir].__getitem__(subpath)
1077 return self._dirs[dir].__getitem__(subpath)
1078 else:
1078 else:
1079 return self._files[f]
1079 return self._files[f]
1080
1080
1081 def flags(self, f: bytes) -> bytes:
1081 def flags(self, f: bytes) -> bytes:
1082 self._load()
1082 self._load()
1083 dir, subpath = _splittopdir(f)
1083 dir, subpath = _splittopdir(f)
1084 if dir:
1084 if dir:
1085 self._loadlazy(dir)
1085 self._loadlazy(dir)
1086
1086
1087 if dir not in self._dirs:
1087 if dir not in self._dirs:
1088 return b''
1088 return b''
1089 return self._dirs[dir].flags(subpath)
1089 return self._dirs[dir].flags(subpath)
1090 else:
1090 else:
1091 if f in self._lazydirs or f in self._dirs:
1091 if f in self._lazydirs or f in self._dirs:
1092 return b''
1092 return b''
1093 return self._flags.get(f, b'')
1093 return self._flags.get(f, b'')
1094
1094
1095 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1095 def find(self, f: bytes) -> Tuple[bytes, bytes]:
1096 self._load()
1096 self._load()
1097 dir, subpath = _splittopdir(f)
1097 dir, subpath = _splittopdir(f)
1098 if dir:
1098 if dir:
1099 self._loadlazy(dir)
1099 self._loadlazy(dir)
1100
1100
1101 return self._dirs[dir].find(subpath)
1101 return self._dirs[dir].find(subpath)
1102 else:
1102 else:
1103 return self._files[f], self._flags.get(f, b'')
1103 return self._files[f], self._flags.get(f, b'')
1104
1104
1105 def __delitem__(self, f: bytes) -> None:
1105 def __delitem__(self, f: bytes) -> None:
1106 self._load()
1106 self._load()
1107 dir, subpath = _splittopdir(f)
1107 dir, subpath = _splittopdir(f)
1108 if dir:
1108 if dir:
1109 self._loadlazy(dir)
1109 self._loadlazy(dir)
1110
1110
1111 self._dirs[dir].__delitem__(subpath)
1111 self._dirs[dir].__delitem__(subpath)
1112 # If the directory is now empty, remove it
1112 # If the directory is now empty, remove it
1113 if self._dirs[dir]._isempty():
1113 if self._dirs[dir]._isempty():
1114 del self._dirs[dir]
1114 del self._dirs[dir]
1115 else:
1115 else:
1116 del self._files[f]
1116 del self._files[f]
1117 if f in self._flags:
1117 if f in self._flags:
1118 del self._flags[f]
1118 del self._flags[f]
1119 self._dirty = True
1119 self._dirty = True
1120
1120
1121 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1121 def set(self, f: bytes, node: bytes, flags: bytes) -> None:
1122 """Set both the node and the flags for path f."""
1122 """Set both the node and the flags for path f."""
1123 assert node is not None
1123 assert node is not None
1124 if flags not in _manifestflags:
1124 if flags not in _manifestflags:
1125 raise TypeError(b"Invalid manifest flag set.")
1125 raise TypeError(b"Invalid manifest flag set.")
1126 self._load()
1126 self._load()
1127 dir, subpath = _splittopdir(f)
1127 dir, subpath = _splittopdir(f)
1128 if dir:
1128 if dir:
1129 self._loadlazy(dir)
1129 self._loadlazy(dir)
1130 if dir not in self._dirs:
1130 if dir not in self._dirs:
1131 self._dirs[dir] = treemanifest(
1131 self._dirs[dir] = treemanifest(
1132 self.nodeconstants, self._subpath(dir)
1132 self.nodeconstants, self._subpath(dir)
1133 )
1133 )
1134 self._dirs[dir].set(subpath, node, flags)
1134 self._dirs[dir].set(subpath, node, flags)
1135 else:
1135 else:
1136 assert len(node) in (20, 32)
1136 assert len(node) in (20, 32)
1137 self._files[f] = node
1137 self._files[f] = node
1138 self._flags[f] = flags
1138 self._flags[f] = flags
1139 self._dirty = True
1139 self._dirty = True
1140
1140
1141 def __setitem__(self, f: bytes, n: bytes) -> None:
1141 def __setitem__(self, f: bytes, n: bytes) -> None:
1142 assert n is not None
1142 assert n is not None
1143 self._load()
1143 self._load()
1144 dir, subpath = _splittopdir(f)
1144 dir, subpath = _splittopdir(f)
1145 if dir:
1145 if dir:
1146 self._loadlazy(dir)
1146 self._loadlazy(dir)
1147 if dir not in self._dirs:
1147 if dir not in self._dirs:
1148 self._dirs[dir] = treemanifest(
1148 self._dirs[dir] = treemanifest(
1149 self.nodeconstants, self._subpath(dir)
1149 self.nodeconstants, self._subpath(dir)
1150 )
1150 )
1151 self._dirs[dir].__setitem__(subpath, n)
1151 self._dirs[dir].__setitem__(subpath, n)
1152 else:
1152 else:
1153 # manifest nodes are either 20 bytes or 32 bytes,
1153 # manifest nodes are either 20 bytes or 32 bytes,
1154 # depending on the hash in use. Assert this as historically
1154 # depending on the hash in use. Assert this as historically
1155 # sometimes extra bytes were added.
1155 # sometimes extra bytes were added.
1156 assert len(n) in (20, 32)
1156 assert len(n) in (20, 32)
1157 self._files[f] = n
1157 self._files[f] = n
1158 self._dirty = True
1158 self._dirty = True
1159
1159
1160 def _load(self) -> None:
1160 def _load(self) -> None:
1161 if self._loadfunc is not _noop:
1161 if self._loadfunc is not _noop:
1162 lf, self._loadfunc = self._loadfunc, _noop
1162 lf, self._loadfunc = self._loadfunc, _noop
1163 lf(self)
1163 lf(self)
1164 elif self._copyfunc is not _noop:
1164 elif self._copyfunc is not _noop:
1165 cf, self._copyfunc = self._copyfunc, _noop
1165 cf, self._copyfunc = self._copyfunc, _noop
1166 cf(self)
1166 cf(self)
1167
1167
1168 def setflag(self, f: bytes, flags: bytes) -> None:
1168 def setflag(self, f: bytes, flags: bytes) -> None:
1169 """Set the flags (symlink, executable) for path f."""
1169 """Set the flags (symlink, executable) for path f."""
1170 if flags not in _manifestflags:
1170 if flags not in _manifestflags:
1171 raise TypeError(b"Invalid manifest flag set.")
1171 raise TypeError(b"Invalid manifest flag set.")
1172 self._load()
1172 self._load()
1173 dir, subpath = _splittopdir(f)
1173 dir, subpath = _splittopdir(f)
1174 if dir:
1174 if dir:
1175 self._loadlazy(dir)
1175 self._loadlazy(dir)
1176 if dir not in self._dirs:
1176 if dir not in self._dirs:
1177 self._dirs[dir] = treemanifest(
1177 self._dirs[dir] = treemanifest(
1178 self.nodeconstants, self._subpath(dir)
1178 self.nodeconstants, self._subpath(dir)
1179 )
1179 )
1180 self._dirs[dir].setflag(subpath, flags)
1180 self._dirs[dir].setflag(subpath, flags)
1181 else:
1181 else:
1182 self._flags[f] = flags
1182 self._flags[f] = flags
1183 self._dirty = True
1183 self._dirty = True
1184
1184
1185 def copy(self) -> 'TreeManifest':
1185 def copy(self) -> 'TreeManifest':
1186 copy = treemanifest(self.nodeconstants, self._dir)
1186 copy = treemanifest(self.nodeconstants, self._dir)
1187 copy._node = self._node
1187 copy._node = self._node
1188 copy._dirty = self._dirty
1188 copy._dirty = self._dirty
1189 if self._copyfunc is _noop:
1189 if self._copyfunc is _noop:
1190
1190
1191 def _copyfunc(s):
1191 def _copyfunc(s):
1192 self._load()
1192 self._load()
1193 s._lazydirs = {
1193 s._lazydirs = {
1194 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1194 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1195 }
1195 }
1196 sdirs = s._dirs
1196 sdirs = s._dirs
1197 for d, v in self._dirs.items():
1197 for d, v in self._dirs.items():
1198 sdirs[d] = v.copy()
1198 sdirs[d] = v.copy()
1199 s._files = dict.copy(self._files)
1199 s._files = dict.copy(self._files)
1200 s._flags = dict.copy(self._flags)
1200 s._flags = dict.copy(self._flags)
1201
1201
1202 if self._loadfunc is _noop:
1202 if self._loadfunc is _noop:
1203 _copyfunc(copy)
1203 _copyfunc(copy)
1204 else:
1204 else:
1205 copy._copyfunc = _copyfunc
1205 copy._copyfunc = _copyfunc
1206 else:
1206 else:
1207 copy._copyfunc = self._copyfunc
1207 copy._copyfunc = self._copyfunc
1208 return copy
1208 return copy
1209
1209
1210 def filesnotin(
1210 def filesnotin(
1211 self, m2: 'TreeManifest', match: Optional[matchmod.basematcher] = None
1211 self, m2: 'TreeManifest', match: Optional[matchmod.basematcher] = None
1212 ) -> Set[bytes]:
1212 ) -> Set[bytes]:
1213 '''Set of files in this manifest that are not in the other'''
1213 '''Set of files in this manifest that are not in the other'''
1214 if match and not match.always():
1214 if match and not match.always():
1215 m1 = self._matches(match)
1215 m1 = self._matches(match)
1216 m2 = m2._matches(match)
1216 m2 = m2._matches(match)
1217 return m1.filesnotin(m2)
1217 return m1.filesnotin(m2)
1218
1218
1219 files = set()
1219 files = set()
1220
1220
1221 def _filesnotin(t1, t2):
1221 def _filesnotin(t1, t2):
1222 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1222 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1223 return
1223 return
1224 t1._load()
1224 t1._load()
1225 t2._load()
1225 t2._load()
1226 self._loaddifflazy(t1, t2)
1226 self._loaddifflazy(t1, t2)
1227 for d, m1 in t1._dirs.items():
1227 for d, m1 in t1._dirs.items():
1228 if d in t2._dirs:
1228 if d in t2._dirs:
1229 m2 = t2._dirs[d]
1229 m2 = t2._dirs[d]
1230 _filesnotin(m1, m2)
1230 _filesnotin(m1, m2)
1231 else:
1231 else:
1232 files.update(m1.iterkeys())
1232 files.update(m1.iterkeys())
1233
1233
1234 for fn in t1._files:
1234 for fn in t1._files:
1235 if fn not in t2._files:
1235 if fn not in t2._files:
1236 files.add(t1._subpath(fn))
1236 files.add(t1._subpath(fn))
1237
1237
1238 _filesnotin(self, m2)
1238 _filesnotin(self, m2)
1239 return files
1239 return files
1240
1240
1241 @propertycache
1241 @propertycache
1242 def _alldirs(self) -> pathutil.dirs:
1242 def _alldirs(self) -> pathutil.dirs:
1243 return pathutil.dirs(self)
1243 return pathutil.dirs(self)
1244
1244
1245 def dirs(self) -> pathutil.dirs:
1245 def dirs(self) -> pathutil.dirs:
1246 return self._alldirs
1246 return self._alldirs
1247
1247
1248 def hasdir(self, dir: bytes) -> bool:
1248 def hasdir(self, dir: bytes) -> bool:
1249 self._load()
1249 self._load()
1250 topdir, subdir = _splittopdir(dir)
1250 topdir, subdir = _splittopdir(dir)
1251 if topdir:
1251 if topdir:
1252 self._loadlazy(topdir)
1252 self._loadlazy(topdir)
1253 if topdir in self._dirs:
1253 if topdir in self._dirs:
1254 return self._dirs[topdir].hasdir(subdir)
1254 return self._dirs[topdir].hasdir(subdir)
1255 return False
1255 return False
1256 dirslash = dir + b'/'
1256 dirslash = dir + b'/'
1257 return dirslash in self._dirs or dirslash in self._lazydirs
1257 return dirslash in self._dirs or dirslash in self._lazydirs
1258
1258
1259 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1259 def walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1260 """Generates matching file names.
1260 """Generates matching file names.
1261
1261
1262 It also reports nonexistent files by marking them bad with match.bad().
1262 It also reports nonexistent files by marking them bad with match.bad().
1263 """
1263 """
1264 if match.always():
1264 if match.always():
1265 for f in iter(self):
1265 for f in iter(self):
1266 yield f
1266 yield f
1267 return
1267 return
1268
1268
1269 fset = set(match.files())
1269 fset = set(match.files())
1270
1270
1271 for fn in self._walk(match):
1271 for fn in self._walk(match):
1272 if fn in fset:
1272 if fn in fset:
1273 # specified pattern is the exact name
1273 # specified pattern is the exact name
1274 fset.remove(fn)
1274 fset.remove(fn)
1275 yield fn
1275 yield fn
1276
1276
1277 # for dirstate.walk, files=[''] means "walk the whole tree".
1277 # for dirstate.walk, files=[''] means "walk the whole tree".
1278 # follow that here, too
1278 # follow that here, too
1279 fset.discard(b'')
1279 fset.discard(b'')
1280
1280
1281 for fn in sorted(fset):
1281 for fn in sorted(fset):
1282 if not self.hasdir(fn):
1282 if not self.hasdir(fn):
1283 match.bad(fn, None)
1283 match.bad(fn, None)
1284
1284
1285 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1285 def _walk(self, match: matchmod.basematcher) -> Iterator[bytes]:
1286 '''Recursively generates matching file names for walk().'''
1286 '''Recursively generates matching file names for walk().'''
1287 visit = match.visitchildrenset(self._dir[:-1])
1287 visit = match.visitchildrenset(self._dir[:-1])
1288 if not visit:
1288 if not visit:
1289 return
1289 return
1290
1290
1291 # yield this dir's files and walk its submanifests
1291 # yield this dir's files and walk its submanifests
1292 self._load()
1292 self._load()
1293 visit = self._loadchildrensetlazy(visit)
1293 visit = self._loadchildrensetlazy(visit)
1294 for p in sorted(list(self._dirs) + list(self._files)):
1294 for p in sorted(list(self._dirs) + list(self._files)):
1295 if p in self._files:
1295 if p in self._files:
1296 fullp = self._subpath(p)
1296 fullp = self._subpath(p)
1297 if match(fullp):
1297 if match(fullp):
1298 yield fullp
1298 yield fullp
1299 else:
1299 else:
1300 if not visit or p[:-1] in visit:
1300 if not visit or p[:-1] in visit:
1301 for f in self._dirs[p]._walk(match):
1301 for f in self._dirs[p]._walk(match):
1302 yield f
1302 yield f
1303
1303
1304 def _matches(self, match: matchmod.basematcher) -> 'TreeManifest':
1304 def _matches(self, match: matchmod.basematcher) -> 'TreeManifest':
1305 """recursively generate a new manifest filtered by the match argument."""
1305 """recursively generate a new manifest filtered by the match argument."""
1306 if match.always():
1306 if match.always():
1307 return self.copy()
1307 return self.copy()
1308 return self._matches_inner(match)
1308 return self._matches_inner(match)
1309
1309
1310 def _matches_inner(self, match: matchmod.basematcher) -> 'TreeManifest':
1310 def _matches_inner(self, match: matchmod.basematcher) -> 'TreeManifest':
1311 if match.always():
1311 if match.always():
1312 return self.copy()
1312 return self.copy()
1313
1313
1314 visit = match.visitchildrenset(self._dir[:-1])
1314 visit = match.visitchildrenset(self._dir[:-1])
1315 if visit == b'all':
1315 if visit == b'all':
1316 return self.copy()
1316 return self.copy()
1317 ret = treemanifest(self.nodeconstants, self._dir)
1317 ret = treemanifest(self.nodeconstants, self._dir)
1318 if not visit:
1318 if not visit:
1319 return ret
1319 return ret
1320
1320
1321 self._load()
1321 self._load()
1322 for fn in self._files:
1322 for fn in self._files:
1323 # While visitchildrenset *usually* lists only subdirs, this is
1323 # While visitchildrenset *usually* lists only subdirs, this is
1324 # actually up to the matcher and may have some files in the set().
1324 # actually up to the matcher and may have some files in the set().
1325 # If visit == 'this', we should obviously look at the files in this
1325 # If visit == 'this', we should obviously look at the files in this
1326 # directory; if visit is a set, and fn is in it, we should inspect
1326 # directory; if visit is a set, and fn is in it, we should inspect
1327 # fn (but no need to inspect things not in the set).
1327 # fn (but no need to inspect things not in the set).
1328 if visit != b'this' and fn not in visit:
1328 if visit != b'this' and fn not in visit:
1329 continue
1329 continue
1330 fullp = self._subpath(fn)
1330 fullp = self._subpath(fn)
1331 # visitchildrenset isn't perfect, we still need to call the regular
1331 # visitchildrenset isn't perfect, we still need to call the regular
1332 # matcher code to further filter results.
1332 # matcher code to further filter results.
1333 if not match(fullp):
1333 if not match(fullp):
1334 continue
1334 continue
1335 ret._files[fn] = self._files[fn]
1335 ret._files[fn] = self._files[fn]
1336 if fn in self._flags:
1336 if fn in self._flags:
1337 ret._flags[fn] = self._flags[fn]
1337 ret._flags[fn] = self._flags[fn]
1338
1338
1339 visit = self._loadchildrensetlazy(visit)
1339 visit = self._loadchildrensetlazy(visit)
1340 for dir, subm in self._dirs.items():
1340 for dir, subm in self._dirs.items():
1341 if visit and dir[:-1] not in visit:
1341 if visit and dir[:-1] not in visit:
1342 continue
1342 continue
1343 m = subm._matches_inner(match)
1343 m = subm._matches_inner(match)
1344 if not m._isempty():
1344 if not m._isempty():
1345 ret._dirs[dir] = m
1345 ret._dirs[dir] = m
1346
1346
1347 if not ret._isempty():
1347 if not ret._isempty():
1348 ret._dirty = True
1348 ret._dirty = True
1349 return ret
1349 return ret
1350
1350
1351 def fastdelta(
1351 def fastdelta(
1352 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1352 self, base: ByteString, changes: Iterable[Tuple[bytes, bool]]
1353 ) -> ByteString:
1353 ) -> ByteString:
1354 raise FastdeltaUnavailable()
1354 raise FastdeltaUnavailable()
1355
1355
1356 def diff(
1356 def diff(
1357 self,
1357 self,
1358 m2: 'TreeManifest',
1358 m2: 'TreeManifest',
1359 match: Optional[matchmod.basematcher] = None,
1359 match: Optional[matchmod.basematcher] = None,
1360 clean: bool = False,
1360 clean: bool = False,
1361 ) -> Dict[
1361 ) -> Dict[
1362 bytes,
1362 bytes,
1363 Optional[
1363 Optional[
1364 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1364 Tuple[Tuple[Optional[bytes], bytes], Tuple[Optional[bytes], bytes]]
1365 ],
1365 ],
1366 ]:
1366 ]:
1367 """Finds changes between the current manifest and m2.
1367 """Finds changes between the current manifest and m2.
1368
1368
1369 Args:
1369 Args:
1370 m2: the manifest to which this manifest should be compared.
1370 m2: the manifest to which this manifest should be compared.
1371 clean: if true, include files unchanged between these manifests
1371 clean: if true, include files unchanged between these manifests
1372 with a None value in the returned dictionary.
1372 with a None value in the returned dictionary.
1373
1373
1374 The result is returned as a dict with filename as key and
1374 The result is returned as a dict with filename as key and
1375 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1375 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1376 nodeid in the current/other manifest and fl1/fl2 is the flag
1376 nodeid in the current/other manifest and fl1/fl2 is the flag
1377 in the current/other manifest. Where the file does not exist,
1377 in the current/other manifest. Where the file does not exist,
1378 the nodeid will be None and the flags will be the empty
1378 the nodeid will be None and the flags will be the empty
1379 string.
1379 string.
1380 """
1380 """
1381 if match and not match.always():
1381 if match and not match.always():
1382 m1 = self._matches(match)
1382 m1 = self._matches(match)
1383 m2 = m2._matches(match)
1383 m2 = m2._matches(match)
1384 return m1.diff(m2, clean=clean)
1384 return m1.diff(m2, clean=clean)
1385 result = {}
1385 result = {}
1386 emptytree = treemanifest(self.nodeconstants)
1386 emptytree = treemanifest(self.nodeconstants)
1387
1387
1388 def _iterativediff(t1, t2, stack):
1388 def _iterativediff(t1, t2, stack):
1389 """compares two tree manifests and append new tree-manifests which
1389 """compares two tree manifests and append new tree-manifests which
1390 needs to be compared to stack"""
1390 needs to be compared to stack"""
1391 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1391 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1392 return
1392 return
1393 t1._load()
1393 t1._load()
1394 t2._load()
1394 t2._load()
1395 self._loaddifflazy(t1, t2)
1395 self._loaddifflazy(t1, t2)
1396
1396
1397 for d, m1 in t1._dirs.items():
1397 for d, m1 in t1._dirs.items():
1398 m2 = t2._dirs.get(d, emptytree)
1398 m2 = t2._dirs.get(d, emptytree)
1399 stack.append((m1, m2))
1399 stack.append((m1, m2))
1400
1400
1401 for d, m2 in t2._dirs.items():
1401 for d, m2 in t2._dirs.items():
1402 if d not in t1._dirs:
1402 if d not in t1._dirs:
1403 stack.append((emptytree, m2))
1403 stack.append((emptytree, m2))
1404
1404
1405 for fn, n1 in t1._files.items():
1405 for fn, n1 in t1._files.items():
1406 fl1 = t1._flags.get(fn, b'')
1406 fl1 = t1._flags.get(fn, b'')
1407 n2 = t2._files.get(fn, None)
1407 n2 = t2._files.get(fn, None)
1408 fl2 = t2._flags.get(fn, b'')
1408 fl2 = t2._flags.get(fn, b'')
1409 if n1 != n2 or fl1 != fl2:
1409 if n1 != n2 or fl1 != fl2:
1410 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1410 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1411 elif clean:
1411 elif clean:
1412 result[t1._subpath(fn)] = None
1412 result[t1._subpath(fn)] = None
1413
1413
1414 for fn, n2 in t2._files.items():
1414 for fn, n2 in t2._files.items():
1415 if fn not in t1._files:
1415 if fn not in t1._files:
1416 fl2 = t2._flags.get(fn, b'')
1416 fl2 = t2._flags.get(fn, b'')
1417 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1417 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1418
1418
1419 stackls = []
1419 stackls = []
1420 _iterativediff(self, m2, stackls)
1420 _iterativediff(self, m2, stackls)
1421 while stackls:
1421 while stackls:
1422 t1, t2 = stackls.pop()
1422 t1, t2 = stackls.pop()
1423 # stackls is populated in the function call
1423 # stackls is populated in the function call
1424 _iterativediff(t1, t2, stackls)
1424 _iterativediff(t1, t2, stackls)
1425 return result
1425 return result
1426
1426
1427 def unmodifiedsince(self, m2: 'TreeManifest') -> bool:
1427 def unmodifiedsince(self, m2: 'TreeManifest') -> bool:
1428 return not self._dirty and not m2._dirty and self._node == m2._node
1428 return not self._dirty and not m2._dirty and self._node == m2._node
1429
1429
1430 def parse(
1430 def parse(
1431 self,
1431 self,
1432 text: bytes,
1432 text: bytes,
1433 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1433 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1434 ) -> None:
1434 ) -> None:
1435 selflazy = self._lazydirs
1435 selflazy = self._lazydirs
1436 for f, n, fl in _parse(self._nodelen, text):
1436 for f, n, fl in _parse(self._nodelen, text):
1437 if fl == b't':
1437 if fl == b't':
1438 f = f + b'/'
1438 f = f + b'/'
1439 # False below means "doesn't need to be copied" and can use the
1439 # False below means "doesn't need to be copied" and can use the
1440 # cached value from readsubtree directly.
1440 # cached value from readsubtree directly.
1441 selflazy[f] = (n, readsubtree, False)
1441 selflazy[f] = (n, readsubtree, False)
1442 elif b'/' in f:
1442 elif b'/' in f:
1443 # This is a flat manifest, so use __setitem__ and setflag rather
1443 # This is a flat manifest, so use __setitem__ and setflag rather
1444 # than assigning directly to _files and _flags, so we can
1444 # than assigning directly to _files and _flags, so we can
1445 # assign a path in a subdirectory, and to mark dirty (compared
1445 # assign a path in a subdirectory, and to mark dirty (compared
1446 # to nullid).
1446 # to nullid).
1447 self[f] = n
1447 self[f] = n
1448 if fl:
1448 if fl:
1449 self.setflag(f, fl)
1449 self.setflag(f, fl)
1450 else:
1450 else:
1451 # Assigning to _files and _flags avoids marking as dirty,
1451 # Assigning to _files and _flags avoids marking as dirty,
1452 # and should be a little faster.
1452 # and should be a little faster.
1453 self._files[f] = n
1453 self._files[f] = n
1454 if fl:
1454 if fl:
1455 self._flags[f] = fl
1455 self._flags[f] = fl
1456
1456
1457 def text(self) -> ByteString:
1457 def text(self) -> ByteString:
1458 """Get the full data of this manifest as a bytestring."""
1458 """Get the full data of this manifest as a bytestring."""
1459 self._load()
1459 self._load()
1460 return _text(self.iterentries())
1460 return _text(self.iterentries())
1461
1461
1462 def dirtext(self) -> ByteString:
1462 def dirtext(self) -> ByteString:
1463 """Get the full data of this directory as a bytestring. Make sure that
1463 """Get the full data of this directory as a bytestring. Make sure that
1464 any submanifests have been written first, so their nodeids are correct.
1464 any submanifests have been written first, so their nodeids are correct.
1465 """
1465 """
1466 self._load()
1466 self._load()
1467 flags = self.flags
1467 flags = self.flags
1468 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1468 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1469 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1469 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1470 files = [(f, self._files[f], flags(f)) for f in self._files]
1470 files = [(f, self._files[f], flags(f)) for f in self._files]
1471 return _text(sorted(dirs + files + lazydirs))
1471 return _text(sorted(dirs + files + lazydirs))
1472
1472
1473 def read(
1473 def read(
1474 self,
1474 self,
1475 gettext: Callable[[], ByteString],
1475 gettext: Callable[[], ByteString],
1476 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1476 readsubtree: Callable[[bytes, bytes], 'TreeManifest'],
1477 ) -> None:
1477 ) -> None:
1478 def _load_for_read(s):
1478 def _load_for_read(s):
1479 s.parse(gettext(), readsubtree)
1479 s.parse(gettext(), readsubtree)
1480 s._dirty = False
1480 s._dirty = False
1481
1481
1482 self._loadfunc = _load_for_read
1482 self._loadfunc = _load_for_read
1483
1483
1484 def writesubtrees(
1484 def writesubtrees(
1485 self,
1485 self,
1486 m1: 'TreeManifest',
1486 m1: 'TreeManifest',
1487 m2: 'TreeManifest',
1487 m2: 'TreeManifest',
1488 writesubtree: Callable[
1488 writesubtree: Callable[
1489 [
1489 [
1490 Callable[['TreeManifest'], None],
1490 Callable[['TreeManifest'], None],
1491 bytes,
1491 bytes,
1492 bytes,
1492 bytes,
1493 matchmod.basematcher,
1493 matchmod.basematcher,
1494 ],
1494 ],
1495 None,
1495 None,
1496 ],
1496 ],
1497 match: matchmod.basematcher,
1497 match: matchmod.basematcher,
1498 ) -> None:
1498 ) -> None:
1499 self._load() # for consistency; should never have any effect here
1499 self._load() # for consistency; should never have any effect here
1500 m1._load()
1500 m1._load()
1501 m2._load()
1501 m2._load()
1502 emptytree = treemanifest(self.nodeconstants)
1502 emptytree = treemanifest(self.nodeconstants)
1503
1503
1504 def getnode(m, d):
1504 def getnode(m, d):
1505 ld = m._lazydirs.get(d)
1505 ld = m._lazydirs.get(d)
1506 if ld:
1506 if ld:
1507 return ld[0]
1507 return ld[0]
1508 tree = m._dirs.get(d, emptytree)
1508 tree = m._dirs.get(d, emptytree)
1509 assert tree is not None # helps pytype
1509 assert tree is not None # helps pytype
1510 return tree._node
1510 return tree._node
1511
1511
1512 # let's skip investigating things that `match` says we do not need.
1512 # let's skip investigating things that `match` says we do not need.
1513 visit = match.visitchildrenset(self._dir[:-1])
1513 visit = match.visitchildrenset(self._dir[:-1])
1514 visit = self._loadchildrensetlazy(visit)
1514 visit = self._loadchildrensetlazy(visit)
1515 if visit == b'this' or visit == b'all':
1515 if visit == b'this' or visit == b'all':
1516 visit = None
1516 visit = None
1517 for d, subm in self._dirs.items():
1517 for d, subm in self._dirs.items():
1518 if visit and d[:-1] not in visit:
1518 if visit and d[:-1] not in visit:
1519 continue
1519 continue
1520 subp1 = getnode(m1, d)
1520 subp1 = getnode(m1, d)
1521 subp2 = getnode(m2, d)
1521 subp2 = getnode(m2, d)
1522 if subp1 == self.nodeconstants.nullid:
1522 if subp1 == self.nodeconstants.nullid:
1523 subp1, subp2 = subp2, subp1
1523 subp1, subp2 = subp2, subp1
1524 writesubtree(subm, subp1, subp2, match)
1524 writesubtree(subm, subp1, subp2, match)
1525
1525
1526 def walksubtrees(
1526 def walksubtrees(
1527 self, matcher: Optional[matchmod.basematcher] = None
1527 self, matcher: Optional[matchmod.basematcher] = None
1528 ) -> Iterator['TreeManifest']:
1528 ) -> Iterator['TreeManifest']:
1529 """Returns an iterator of the subtrees of this manifest, including this
1529 """Returns an iterator of the subtrees of this manifest, including this
1530 manifest itself.
1530 manifest itself.
1531
1531
1532 If `matcher` is provided, it only returns subtrees that match.
1532 If `matcher` is provided, it only returns subtrees that match.
1533 """
1533 """
1534 if matcher and not matcher.visitdir(self._dir[:-1]):
1534 if matcher and not matcher.visitdir(self._dir[:-1]):
1535 return
1535 return
1536 if not matcher or matcher(self._dir[:-1]):
1536 if not matcher or matcher(self._dir[:-1]):
1537 yield self
1537 yield self
1538
1538
1539 self._load()
1539 self._load()
1540 # OPT: use visitchildrenset to avoid loading everything.
1540 # OPT: use visitchildrenset to avoid loading everything.
1541 self._loadalllazy()
1541 self._loadalllazy()
1542 for d, subm in self._dirs.items():
1542 for d, subm in self._dirs.items():
1543 for subtree in subm.walksubtrees(matcher=matcher):
1543 for subtree in subm.walksubtrees(matcher=matcher):
1544 yield subtree
1544 yield subtree
1545
1545
1546
1546
1547 treemanifest = interfaceutil.implementer(repository.imanifestdict)(TreeManifest)
1547 treemanifest = interfaceutil.implementer(repository.imanifestdict)(TreeManifest)
1548
1548
1549 if typing.TYPE_CHECKING:
1549 if typing.TYPE_CHECKING:
1550 treemanifest = TreeManifest
1550 treemanifest = TreeManifest
1551
1551
1552
1552
1553 class manifestfulltextcache(util.lrucachedict):
1553 class manifestfulltextcache(util.lrucachedict):
1554 """File-backed LRU cache for the manifest cache
1554 """File-backed LRU cache for the manifest cache
1555
1555
1556 File consists of entries, up to EOF:
1556 File consists of entries, up to EOF:
1557
1557
1558 - 20 bytes node, 4 bytes length, <length> manifest data
1558 - 20 bytes node, 4 bytes length, <length> manifest data
1559
1559
1560 These are written in reverse cache order (oldest to newest).
1560 These are written in reverse cache order (oldest to newest).
1561
1561
1562 """
1562 """
1563
1563
1564 _file = b'manifestfulltextcache'
1564 _file = b'manifestfulltextcache'
1565
1565
1566 def __init__(self, max):
1566 def __init__(self, max):
1567 super(manifestfulltextcache, self).__init__(max)
1567 super(manifestfulltextcache, self).__init__(max)
1568 self._dirty = False
1568 self._dirty = False
1569 self._read = False
1569 self._read = False
1570 self._opener = None
1570 self._opener = None
1571
1571
1572 def read(self):
1572 def read(self):
1573 if self._read or self._opener is None:
1573 if self._read or self._opener is None:
1574 return
1574 return
1575
1575
1576 try:
1576 try:
1577 with self._opener(self._file) as fp:
1577 with self._opener(self._file) as fp:
1578 set = super(manifestfulltextcache, self).__setitem__
1578 set = super(manifestfulltextcache, self).__setitem__
1579 # ignore trailing data, this is a cache, corruption is skipped
1579 # ignore trailing data, this is a cache, corruption is skipped
1580 while True:
1580 while True:
1581 # TODO do we need to do work here for sha1 portability?
1581 # TODO do we need to do work here for sha1 portability?
1582 node = fp.read(20)
1582 node = fp.read(20)
1583 if len(node) < 20:
1583 if len(node) < 20:
1584 break
1584 break
1585 try:
1585 try:
1586 size = struct.unpack(b'>L', fp.read(4))[0]
1586 size = struct.unpack(b'>L', fp.read(4))[0]
1587 except struct.error:
1587 except struct.error:
1588 break
1588 break
1589 value = bytearray(fp.read(size))
1589 value = bytearray(fp.read(size))
1590 if len(value) != size:
1590 if len(value) != size:
1591 break
1591 break
1592 set(node, value)
1592 set(node, value)
1593 except IOError:
1593 except IOError:
1594 # the file is allowed to be missing
1594 # the file is allowed to be missing
1595 pass
1595 pass
1596
1596
1597 self._read = True
1597 self._read = True
1598 self._dirty = False
1598 self._dirty = False
1599
1599
1600 def write(self):
1600 def write(self):
1601 if not self._dirty or self._opener is None:
1601 if not self._dirty or self._opener is None:
1602 return
1602 return
1603 # rotate backwards to the first used node
1603 # rotate backwards to the first used node
1604 try:
1604 try:
1605 with self._opener(
1605 with self._opener(
1606 self._file, b'w', atomictemp=True, checkambig=True
1606 self._file, b'w', atomictemp=True, checkambig=True
1607 ) as fp:
1607 ) as fp:
1608 node = self._head.prev
1608 node = self._head.prev
1609 while True:
1609 while True:
1610 if node.key in self._cache:
1610 if node.key in self._cache:
1611 fp.write(node.key)
1611 fp.write(node.key)
1612 fp.write(struct.pack(b'>L', len(node.value)))
1612 fp.write(struct.pack(b'>L', len(node.value)))
1613 fp.write(node.value)
1613 fp.write(node.value)
1614 if node is self._head:
1614 if node is self._head:
1615 break
1615 break
1616 node = node.prev
1616 node = node.prev
1617 except IOError:
1617 except IOError:
1618 # We could not write the cache (eg: permission error)
1618 # We could not write the cache (eg: permission error)
1619 # the content can be missing.
1619 # the content can be missing.
1620 #
1620 #
1621 # We could try harder and see if we could recreate a wcache
1621 # We could try harder and see if we could recreate a wcache
1622 # directory were we coudl write too.
1622 # directory were we coudl write too.
1623 #
1623 #
1624 # XXX the error pass silently, having some way to issue an error
1624 # XXX the error pass silently, having some way to issue an error
1625 # log `ui.log` would be nice.
1625 # log `ui.log` would be nice.
1626 pass
1626 pass
1627
1627
1628 def __len__(self):
1628 def __len__(self):
1629 if not self._read:
1629 if not self._read:
1630 self.read()
1630 self.read()
1631 return super(manifestfulltextcache, self).__len__()
1631 return super(manifestfulltextcache, self).__len__()
1632
1632
1633 def __contains__(self, k):
1633 def __contains__(self, k):
1634 if not self._read:
1634 if not self._read:
1635 self.read()
1635 self.read()
1636 return super(manifestfulltextcache, self).__contains__(k)
1636 return super(manifestfulltextcache, self).__contains__(k)
1637
1637
1638 def __iter__(self):
1638 def __iter__(self):
1639 if not self._read:
1639 if not self._read:
1640 self.read()
1640 self.read()
1641 return super(manifestfulltextcache, self).__iter__()
1641 return super(manifestfulltextcache, self).__iter__()
1642
1642
1643 def __getitem__(self, k):
1643 def __getitem__(self, k):
1644 if not self._read:
1644 if not self._read:
1645 self.read()
1645 self.read()
1646 # the cache lru order can change on read
1646 # the cache lru order can change on read
1647 setdirty = self._cache.get(k) is not self._head
1647 setdirty = self._cache.get(k) is not self._head
1648 value = super(manifestfulltextcache, self).__getitem__(k)
1648 value = super(manifestfulltextcache, self).__getitem__(k)
1649 if setdirty:
1649 if setdirty:
1650 self._dirty = True
1650 self._dirty = True
1651 return value
1651 return value
1652
1652
1653 def __setitem__(self, k, v):
1653 def __setitem__(self, k, v):
1654 if not self._read:
1654 if not self._read:
1655 self.read()
1655 self.read()
1656 super(manifestfulltextcache, self).__setitem__(k, v)
1656 super(manifestfulltextcache, self).__setitem__(k, v)
1657 self._dirty = True
1657 self._dirty = True
1658
1658
1659 def __delitem__(self, k):
1659 def __delitem__(self, k):
1660 if not self._read:
1660 if not self._read:
1661 self.read()
1661 self.read()
1662 super(manifestfulltextcache, self).__delitem__(k)
1662 super(manifestfulltextcache, self).__delitem__(k)
1663 self._dirty = True
1663 self._dirty = True
1664
1664
1665 def get(self, k, default=None):
1665 def get(self, k, default=None):
1666 if not self._read:
1666 if not self._read:
1667 self.read()
1667 self.read()
1668 return super(manifestfulltextcache, self).get(k, default=default)
1668 return super(manifestfulltextcache, self).get(k, default=default)
1669
1669
1670 def clear(self, clear_persisted_data=False):
1670 def clear(self, clear_persisted_data=False):
1671 super(manifestfulltextcache, self).clear()
1671 super(manifestfulltextcache, self).clear()
1672 if clear_persisted_data:
1672 if clear_persisted_data:
1673 self._dirty = True
1673 self._dirty = True
1674 self.write()
1674 self.write()
1675 self._read = False
1675 self._read = False
1676
1676
1677
1677
1678 # and upper bound of what we expect from compression
1678 # and upper bound of what we expect from compression
1679 # (real live value seems to be "3")
1679 # (real live value seems to be "3")
1680 MAXCOMPRESSION = 3
1680 MAXCOMPRESSION = 3
1681
1681
1682
1682
1683 class FastdeltaUnavailable(Exception):
1683 class FastdeltaUnavailable(Exception):
1684 """Exception raised when fastdelta isn't usable on a manifest."""
1684 """Exception raised when fastdelta isn't usable on a manifest."""
1685
1685
1686
1686
1687 class ManifestRevlog:
1687 class ManifestRevlog:
1688 """A revlog that stores manifest texts. This is responsible for caching the
1688 """A revlog that stores manifest texts. This is responsible for caching the
1689 full-text manifest contents.
1689 full-text manifest contents.
1690 """
1690 """
1691
1691
1692 def __init__(
1692 def __init__(
1693 self,
1693 self,
1694 nodeconstants,
1694 nodeconstants,
1695 opener,
1695 opener,
1696 tree=b'',
1696 tree=b'',
1697 dirlogcache=None,
1697 dirlogcache=None,
1698 treemanifest=False,
1698 treemanifest=False,
1699 ):
1699 ):
1700 """Constructs a new manifest revlog
1700 """Constructs a new manifest revlog
1701
1701
1702 `indexfile` - used by extensions to have two manifests at once, like
1702 `indexfile` - used by extensions to have two manifests at once, like
1703 when transitioning between flatmanifeset and treemanifests.
1703 when transitioning between flatmanifeset and treemanifests.
1704
1704
1705 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1705 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1706 options can also be used to make this a tree manifest revlog. The opener
1706 options can also be used to make this a tree manifest revlog. The opener
1707 option takes precedence, so if it is set to True, we ignore whatever
1707 option takes precedence, so if it is set to True, we ignore whatever
1708 value is passed in to the constructor.
1708 value is passed in to the constructor.
1709 """
1709 """
1710 self.nodeconstants = nodeconstants
1710 self.nodeconstants = nodeconstants
1711 # During normal operations, we expect to deal with not more than four
1711 # During normal operations, we expect to deal with not more than four
1712 # revs at a time (such as during commit --amend). When rebasing large
1712 # revs at a time (such as during commit --amend). When rebasing large
1713 # stacks of commits, the number can go up, hence the config knob below.
1713 # stacks of commits, the number can go up, hence the config knob below.
1714 cachesize = 4
1714 cachesize = 4
1715 optiontreemanifest = False
1715 optiontreemanifest = False
1716 persistentnodemap = False
1716 persistentnodemap = False
1717 opts = getattr(opener, 'options', None)
1717 opts = getattr(opener, 'options', None)
1718 if opts is not None:
1718 if opts is not None:
1719 cachesize = opts.get(b'manifestcachesize', cachesize)
1719 cachesize = opts.get(b'manifestcachesize', cachesize)
1720 optiontreemanifest = opts.get(b'treemanifest', False)
1720 optiontreemanifest = opts.get(b'treemanifest', False)
1721 persistentnodemap = opts.get(b'persistent-nodemap', False)
1721 persistentnodemap = opts.get(b'persistent-nodemap', False)
1722
1722
1723 self._treeondisk = optiontreemanifest or treemanifest
1723 self._treeondisk = optiontreemanifest or treemanifest
1724
1724
1725 self._fulltextcache = manifestfulltextcache(cachesize)
1725 self._fulltextcache = manifestfulltextcache(cachesize)
1726
1726
1727 if tree:
1727 if tree:
1728 assert self._treeondisk, (tree, b'opts is %r' % opts)
1728 assert self._treeondisk, (tree, b'opts is %r' % opts)
1729
1729
1730 radix = b'00manifest'
1730 radix = b'00manifest'
1731 if tree:
1731 if tree:
1732 radix = b"meta/" + tree + radix
1732 radix = b"meta/" + tree + radix
1733
1733
1734 self.tree = tree
1734 self.tree = tree
1735
1735
1736 # The dirlogcache is kept on the root manifest log
1736 # The dirlogcache is kept on the root manifest log
1737 if tree:
1737 if tree:
1738 self._dirlogcache = dirlogcache
1738 self._dirlogcache = dirlogcache
1739 else:
1739 else:
1740 self._dirlogcache = {b'': self}
1740 self._dirlogcache = {b'': self}
1741
1741
1742 self._revlog = revlog.revlog(
1742 self._revlog = revlog.revlog(
1743 opener,
1743 opener,
1744 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1744 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1745 radix=radix,
1745 radix=radix,
1746 # only root indexfile is cached
1746 # only root indexfile is cached
1747 checkambig=not bool(tree),
1747 checkambig=not bool(tree),
1748 mmaplargeindex=True,
1748 mmaplargeindex=True,
1749 upperboundcomp=MAXCOMPRESSION,
1749 upperboundcomp=MAXCOMPRESSION,
1750 persistentnodemap=persistentnodemap,
1750 persistentnodemap=persistentnodemap,
1751 )
1751 )
1752
1752
1753 self.index = self._revlog.index
1753 self.index = self._revlog.index
1754
1754
1755 def get_revlog(self):
1755 def get_revlog(self):
1756 """return an actual revlog instance if any
1756 """return an actual revlog instance if any
1757
1757
1758 This exist because a lot of code leverage the fact the underlying
1758 This exist because a lot of code leverage the fact the underlying
1759 storage is a revlog for optimization, so giving simple way to access
1759 storage is a revlog for optimization, so giving simple way to access
1760 the revlog instance helps such code.
1760 the revlog instance helps such code.
1761 """
1761 """
1762 return self._revlog
1762 return self._revlog
1763
1763
1764 def _setupmanifestcachehooks(self, repo):
1764 def _setupmanifestcachehooks(self, repo):
1765 """Persist the manifestfulltextcache on lock release"""
1765 """Persist the manifestfulltextcache on lock release"""
1766 if not hasattr(repo, '_wlockref'):
1766 if not hasattr(repo, '_wlockref'):
1767 return
1767 return
1768
1768
1769 self._fulltextcache._opener = repo.wcachevfs
1769 self._fulltextcache._opener = repo.wcachevfs
1770 if repo._currentlock(repo._wlockref) is None:
1770 if repo._currentlock(repo._wlockref) is None:
1771 return
1771 return
1772
1772
1773 reporef = weakref.ref(repo)
1773 reporef = weakref.ref(repo)
1774 manifestrevlogref = weakref.ref(self)
1774 manifestrevlogref = weakref.ref(self)
1775
1775
1776 def persistmanifestcache(success):
1776 def persistmanifestcache(success):
1777 # Repo is in an unknown state, do not persist.
1777 # Repo is in an unknown state, do not persist.
1778 if not success:
1778 if not success:
1779 return
1779 return
1780
1780
1781 repo = reporef()
1781 repo = reporef()
1782 self = manifestrevlogref()
1782 self = manifestrevlogref()
1783 if repo is None or self is None:
1783 if repo is None or self is None:
1784 return
1784 return
1785 if repo.manifestlog.getstorage(b'') is not self:
1785 if repo.manifestlog.getstorage(b'') is not self:
1786 # there's a different manifest in play now, abort
1786 # there's a different manifest in play now, abort
1787 return
1787 return
1788 self._fulltextcache.write()
1788 self._fulltextcache.write()
1789
1789
1790 repo._afterlock(persistmanifestcache)
1790 repo._afterlock(persistmanifestcache)
1791
1791
1792 @property
1792 @property
1793 def fulltextcache(self):
1793 def fulltextcache(self):
1794 return self._fulltextcache
1794 return self._fulltextcache
1795
1795
1796 def clearcaches(self, clear_persisted_data=False):
1796 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1797 self._revlog.clearcaches()
1797 self._revlog.clearcaches()
1798 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1798 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1799 self._dirlogcache = {self.tree: self}
1799 self._dirlogcache = {self.tree: self}
1800
1800
1801 def dirlog(self, d):
1801 def dirlog(self, d):
1802 if d:
1802 if d:
1803 assert self._treeondisk
1803 assert self._treeondisk
1804 if d not in self._dirlogcache:
1804 if d not in self._dirlogcache:
1805 mfrevlog = manifestrevlog(
1805 mfrevlog = manifestrevlog(
1806 self.nodeconstants,
1806 self.nodeconstants,
1807 self.opener,
1807 self.opener,
1808 d,
1808 d,
1809 self._dirlogcache,
1809 self._dirlogcache,
1810 treemanifest=self._treeondisk,
1810 treemanifest=self._treeondisk,
1811 )
1811 )
1812 self._dirlogcache[d] = mfrevlog
1812 self._dirlogcache[d] = mfrevlog
1813 return self._dirlogcache[d]
1813 return self._dirlogcache[d]
1814
1814
1815 def add(
1815 def add(
1816 self,
1816 self,
1817 m,
1817 m,
1818 transaction,
1818 transaction,
1819 link,
1819 link,
1820 p1,
1820 p1,
1821 p2,
1821 p2,
1822 added: Iterable[bytes],
1822 added: Iterable[bytes],
1823 removed: Iterable[bytes],
1823 removed: Iterable[bytes],
1824 readtree=None,
1824 readtree=None,
1825 match=None,
1825 match=None,
1826 ):
1826 ):
1827 """add some manifest entry in to the manifest log
1827 """add some manifest entry in to the manifest log
1828
1828
1829 input:
1829 input:
1830
1830
1831 m: the manifest dict we want to store
1831 m: the manifest dict we want to store
1832 transaction: the open transaction
1832 transaction: the open transaction
1833 p1: manifest-node of p1
1833 p1: manifest-node of p1
1834 p2: manifest-node of p2
1834 p2: manifest-node of p2
1835 added: file added/changed compared to parent
1835 added: file added/changed compared to parent
1836 removed: file removed compared to parent
1836 removed: file removed compared to parent
1837
1837
1838 tree manifest input:
1838 tree manifest input:
1839
1839
1840 readtree: a function to read a subtree
1840 readtree: a function to read a subtree
1841 match: a filematcher for the subpart of the tree manifest
1841 match: a filematcher for the subpart of the tree manifest
1842 """
1842 """
1843 try:
1843 try:
1844 if p1 not in self.fulltextcache:
1844 if p1 not in self.fulltextcache:
1845 raise FastdeltaUnavailable()
1845 raise FastdeltaUnavailable()
1846 # If our first parent is in the manifest cache, we can
1846 # If our first parent is in the manifest cache, we can
1847 # compute a delta here using properties we know about the
1847 # compute a delta here using properties we know about the
1848 # manifest up-front, which may save time later for the
1848 # manifest up-front, which may save time later for the
1849 # revlog layer.
1849 # revlog layer.
1850
1850
1851 _checkforbidden(added)
1851 _checkforbidden(added)
1852 # combine the changed lists into one sorted iterator
1852 # combine the changed lists into one sorted iterator
1853 work = heapq.merge(
1853 work = heapq.merge(
1854 [(x, False) for x in sorted(added)],
1854 [(x, False) for x in sorted(added)],
1855 [(x, True) for x in sorted(removed)],
1855 [(x, True) for x in sorted(removed)],
1856 )
1856 )
1857
1857
1858 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1858 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1859 cachedelta = self._revlog.rev(p1), deltatext
1859 cachedelta = self._revlog.rev(p1), deltatext
1860 text = util.buffer(arraytext)
1860 text = util.buffer(arraytext)
1861 rev = self._revlog.addrevision(
1861 rev = self._revlog.addrevision(
1862 text, transaction, link, p1, p2, cachedelta
1862 text, transaction, link, p1, p2, cachedelta
1863 )
1863 )
1864 n = self._revlog.node(rev)
1864 n = self._revlog.node(rev)
1865 except FastdeltaUnavailable:
1865 except FastdeltaUnavailable:
1866 # The first parent manifest isn't already loaded or the
1866 # The first parent manifest isn't already loaded or the
1867 # manifest implementation doesn't support fastdelta, so
1867 # manifest implementation doesn't support fastdelta, so
1868 # we'll just encode a fulltext of the manifest and pass
1868 # we'll just encode a fulltext of the manifest and pass
1869 # that through to the revlog layer, and let it handle the
1869 # that through to the revlog layer, and let it handle the
1870 # delta process.
1870 # delta process.
1871 if self._treeondisk:
1871 if self._treeondisk:
1872 assert readtree, b"readtree must be set for treemanifest writes"
1872 assert readtree, b"readtree must be set for treemanifest writes"
1873 assert match, b"match must be specified for treemanifest writes"
1873 assert match, b"match must be specified for treemanifest writes"
1874 m1 = readtree(self.tree, p1)
1874 m1 = readtree(self.tree, p1)
1875 m2 = readtree(self.tree, p2)
1875 m2 = readtree(self.tree, p2)
1876 n = self._addtree(
1876 n = self._addtree(
1877 m, transaction, link, m1, m2, readtree, match=match
1877 m, transaction, link, m1, m2, readtree, match=match
1878 )
1878 )
1879 arraytext = None
1879 arraytext = None
1880 else:
1880 else:
1881 text = m.text()
1881 text = m.text()
1882 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1882 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1883 n = self._revlog.node(rev)
1883 n = self._revlog.node(rev)
1884 arraytext = bytearray(text)
1884 arraytext = bytearray(text)
1885
1885
1886 if arraytext is not None:
1886 if arraytext is not None:
1887 self.fulltextcache[n] = arraytext
1887 self.fulltextcache[n] = arraytext
1888
1888
1889 return n
1889 return n
1890
1890
1891 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1891 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1892 # If the manifest is unchanged compared to one parent,
1892 # If the manifest is unchanged compared to one parent,
1893 # don't write a new revision
1893 # don't write a new revision
1894 if self.tree != b'' and (
1894 if self.tree != b'' and (
1895 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1895 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1896 ):
1896 ):
1897 return m.node()
1897 return m.node()
1898
1898
1899 def writesubtree(subm, subp1, subp2, match):
1899 def writesubtree(subm, subp1, subp2, match):
1900 sublog = self.dirlog(subm.dir())
1900 sublog = self.dirlog(subm.dir())
1901 sublog.add(
1901 sublog.add(
1902 subm,
1902 subm,
1903 transaction,
1903 transaction,
1904 link,
1904 link,
1905 subp1,
1905 subp1,
1906 subp2,
1906 subp2,
1907 None,
1907 None,
1908 None,
1908 None,
1909 readtree=readtree,
1909 readtree=readtree,
1910 match=match,
1910 match=match,
1911 )
1911 )
1912
1912
1913 m.writesubtrees(m1, m2, writesubtree, match)
1913 m.writesubtrees(m1, m2, writesubtree, match)
1914 text = m.dirtext()
1914 text = m.dirtext()
1915 n = None
1915 n = None
1916 if self.tree != b'':
1916 if self.tree != b'':
1917 # Double-check whether contents are unchanged to one parent
1917 # Double-check whether contents are unchanged to one parent
1918 if text == m1.dirtext():
1918 if text == m1.dirtext():
1919 n = m1.node()
1919 n = m1.node()
1920 elif text == m2.dirtext():
1920 elif text == m2.dirtext():
1921 n = m2.node()
1921 n = m2.node()
1922
1922
1923 if not n:
1923 if not n:
1924 rev = self._revlog.addrevision(
1924 rev = self._revlog.addrevision(
1925 text, transaction, link, m1.node(), m2.node()
1925 text, transaction, link, m1.node(), m2.node()
1926 )
1926 )
1927 n = self._revlog.node(rev)
1927 n = self._revlog.node(rev)
1928
1928
1929 # Save nodeid so parent manifest can calculate its nodeid
1929 # Save nodeid so parent manifest can calculate its nodeid
1930 m.setnode(n)
1930 m.setnode(n)
1931 return n
1931 return n
1932
1932
1933 def __len__(self):
1933 def __len__(self):
1934 return len(self._revlog)
1934 return len(self._revlog)
1935
1935
1936 def __iter__(self):
1936 def __iter__(self):
1937 return self._revlog.__iter__()
1937 return self._revlog.__iter__()
1938
1938
1939 def rev(self, node):
1939 def rev(self, node):
1940 return self._revlog.rev(node)
1940 return self._revlog.rev(node)
1941
1941
1942 def node(self, rev):
1942 def node(self, rev):
1943 return self._revlog.node(rev)
1943 return self._revlog.node(rev)
1944
1944
1945 def lookup(self, value):
1945 def lookup(self, value):
1946 return self._revlog.lookup(value)
1946 return self._revlog.lookup(value)
1947
1947
1948 def parentrevs(self, rev):
1948 def parentrevs(self, rev):
1949 return self._revlog.parentrevs(rev)
1949 return self._revlog.parentrevs(rev)
1950
1950
1951 def parents(self, node):
1951 def parents(self, node):
1952 return self._revlog.parents(node)
1952 return self._revlog.parents(node)
1953
1953
1954 def linkrev(self, rev):
1954 def linkrev(self, rev):
1955 return self._revlog.linkrev(rev)
1955 return self._revlog.linkrev(rev)
1956
1956
1957 def checksize(self):
1957 def checksize(self):
1958 return self._revlog.checksize()
1958 return self._revlog.checksize()
1959
1959
1960 def revision(self, node):
1960 def revision(self, node):
1961 return self._revlog.revision(node)
1961 return self._revlog.revision(node)
1962
1962
1963 def rawdata(self, node):
1963 def rawdata(self, node):
1964 return self._revlog.rawdata(node)
1964 return self._revlog.rawdata(node)
1965
1965
1966 def revdiff(self, rev1, rev2):
1966 def revdiff(self, rev1, rev2):
1967 return self._revlog.revdiff(rev1, rev2)
1967 return self._revlog.revdiff(rev1, rev2)
1968
1968
1969 def cmp(self, node, text):
1969 def cmp(self, node, text):
1970 return self._revlog.cmp(node, text)
1970 return self._revlog.cmp(node, text)
1971
1971
1972 def deltaparent(self, rev):
1972 def deltaparent(self, rev):
1973 return self._revlog.deltaparent(rev)
1973 return self._revlog.deltaparent(rev)
1974
1974
1975 def emitrevisions(
1975 def emitrevisions(
1976 self,
1976 self,
1977 nodes,
1977 nodes,
1978 nodesorder=None,
1978 nodesorder=None,
1979 revisiondata=False,
1979 revisiondata=False,
1980 assumehaveparentrevisions=False,
1980 assumehaveparentrevisions=False,
1981 deltamode=repository.CG_DELTAMODE_STD,
1981 deltamode=repository.CG_DELTAMODE_STD,
1982 sidedata_helpers=None,
1982 sidedata_helpers=None,
1983 debug_info=None,
1983 debug_info=None,
1984 ):
1984 ):
1985 return self._revlog.emitrevisions(
1985 return self._revlog.emitrevisions(
1986 nodes,
1986 nodes,
1987 nodesorder=nodesorder,
1987 nodesorder=nodesorder,
1988 revisiondata=revisiondata,
1988 revisiondata=revisiondata,
1989 assumehaveparentrevisions=assumehaveparentrevisions,
1989 assumehaveparentrevisions=assumehaveparentrevisions,
1990 deltamode=deltamode,
1990 deltamode=deltamode,
1991 sidedata_helpers=sidedata_helpers,
1991 sidedata_helpers=sidedata_helpers,
1992 debug_info=debug_info,
1992 debug_info=debug_info,
1993 )
1993 )
1994
1994
1995 def addgroup(
1995 def addgroup(
1996 self,
1996 self,
1997 deltas,
1997 deltas,
1998 linkmapper,
1998 linkmapper,
1999 transaction,
1999 transaction,
2000 alwayscache=False,
2000 alwayscache=False,
2001 addrevisioncb=None,
2001 addrevisioncb=None,
2002 duplicaterevisioncb=None,
2002 duplicaterevisioncb=None,
2003 debug_info=None,
2003 debug_info=None,
2004 delta_base_reuse_policy=None,
2004 delta_base_reuse_policy=None,
2005 ):
2005 ):
2006 return self._revlog.addgroup(
2006 return self._revlog.addgroup(
2007 deltas,
2007 deltas,
2008 linkmapper,
2008 linkmapper,
2009 transaction,
2009 transaction,
2010 alwayscache=alwayscache,
2010 alwayscache=alwayscache,
2011 addrevisioncb=addrevisioncb,
2011 addrevisioncb=addrevisioncb,
2012 duplicaterevisioncb=duplicaterevisioncb,
2012 duplicaterevisioncb=duplicaterevisioncb,
2013 debug_info=debug_info,
2013 debug_info=debug_info,
2014 delta_base_reuse_policy=delta_base_reuse_policy,
2014 delta_base_reuse_policy=delta_base_reuse_policy,
2015 )
2015 )
2016
2016
2017 def rawsize(self, rev):
2017 def rawsize(self, rev):
2018 return self._revlog.rawsize(rev)
2018 return self._revlog.rawsize(rev)
2019
2019
2020 def getstrippoint(self, minlink):
2020 def getstrippoint(self, minlink):
2021 return self._revlog.getstrippoint(minlink)
2021 return self._revlog.getstrippoint(minlink)
2022
2022
2023 def strip(self, minlink, transaction):
2023 def strip(self, minlink, transaction):
2024 return self._revlog.strip(minlink, transaction)
2024 return self._revlog.strip(minlink, transaction)
2025
2025
2026 def files(self):
2026 def files(self):
2027 return self._revlog.files()
2027 return self._revlog.files()
2028
2028
2029 def clone(self, tr, destrevlog, **kwargs):
2029 def clone(self, tr, destrevlog, **kwargs):
2030 if not isinstance(destrevlog, manifestrevlog):
2030 if not isinstance(destrevlog, manifestrevlog):
2031 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2031 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
2032
2032
2033 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2033 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
2034
2034
2035 def storageinfo(
2035 def storageinfo(
2036 self,
2036 self,
2037 exclusivefiles=False,
2037 exclusivefiles=False,
2038 sharedfiles=False,
2038 sharedfiles=False,
2039 revisionscount=False,
2039 revisionscount=False,
2040 trackedsize=False,
2040 trackedsize=False,
2041 storedsize=False,
2041 storedsize=False,
2042 ):
2042 ):
2043 return self._revlog.storageinfo(
2043 return self._revlog.storageinfo(
2044 exclusivefiles=exclusivefiles,
2044 exclusivefiles=exclusivefiles,
2045 sharedfiles=sharedfiles,
2045 sharedfiles=sharedfiles,
2046 revisionscount=revisionscount,
2046 revisionscount=revisionscount,
2047 trackedsize=trackedsize,
2047 trackedsize=trackedsize,
2048 storedsize=storedsize,
2048 storedsize=storedsize,
2049 )
2049 )
2050
2050
2051 @property
2051 @property
2052 def opener(self):
2052 def opener(self):
2053 return self._revlog.opener
2053 return self._revlog.opener
2054
2054
2055 @opener.setter
2055 @opener.setter
2056 def opener(self, value):
2056 def opener(self, value):
2057 self._revlog.opener = value
2057 self._revlog.opener = value
2058
2058
2059
2059
2060 manifestrevlog = interfaceutil.implementer(repository.imanifeststorage)(
2060 manifestrevlog = interfaceutil.implementer(repository.imanifeststorage)(
2061 ManifestRevlog
2061 ManifestRevlog
2062 )
2062 )
2063
2063
2064 if typing.TYPE_CHECKING:
2064 if typing.TYPE_CHECKING:
2065 manifestrevlog = ManifestRevlog
2065 manifestrevlog = ManifestRevlog
2066
2066
2067 AnyManifestCtx = Union['ManifestCtx', 'TreeManifestCtx']
2067 AnyManifestCtx = Union['ManifestCtx', 'TreeManifestCtx']
2068 AnyManifestDict = Union[ManifestDict, TreeManifest]
2068 AnyManifestDict = Union[ManifestDict, TreeManifest]
2069
2069
2070
2070
2071 class ManifestLog:
2071 class ManifestLog:
2072 """A collection class representing the collection of manifest snapshots
2072 """A collection class representing the collection of manifest snapshots
2073 referenced by commits in the repository.
2073 referenced by commits in the repository.
2074
2074
2075 In this situation, 'manifest' refers to the abstract concept of a snapshot
2075 In this situation, 'manifest' refers to the abstract concept of a snapshot
2076 of the list of files in the given commit. Consumers of the output of this
2076 of the list of files in the given commit. Consumers of the output of this
2077 class do not care about the implementation details of the actual manifests
2077 class do not care about the implementation details of the actual manifests
2078 they receive (i.e. tree or flat or lazily loaded, etc)."""
2078 they receive (i.e. tree or flat or lazily loaded, etc)."""
2079
2079
2080 def __init__(self, opener, repo, rootstore, narrowmatch):
2080 def __init__(self, opener, repo, rootstore, narrowmatch):
2081 self.nodeconstants = repo.nodeconstants
2081 self.nodeconstants = repo.nodeconstants
2082 usetreemanifest = False
2082 usetreemanifest = False
2083 cachesize = 4
2083 cachesize = 4
2084
2084
2085 opts = getattr(opener, 'options', None)
2085 opts = getattr(opener, 'options', None)
2086 if opts is not None:
2086 if opts is not None:
2087 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2087 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
2088 cachesize = opts.get(b'manifestcachesize', cachesize)
2088 cachesize = opts.get(b'manifestcachesize', cachesize)
2089
2089
2090 self._treemanifests = usetreemanifest
2090 self._treemanifests = usetreemanifest
2091
2091
2092 self._rootstore = rootstore
2092 self._rootstore = rootstore
2093 self._rootstore._setupmanifestcachehooks(repo)
2093 self._rootstore._setupmanifestcachehooks(repo)
2094 self._narrowmatch = narrowmatch
2094 self._narrowmatch = narrowmatch
2095
2095
2096 # A cache of the manifestctx or treemanifestctx for each directory
2096 # A cache of the manifestctx or treemanifestctx for each directory
2097 self._dirmancache = {}
2097 self._dirmancache = {}
2098 self._dirmancache[b''] = util.lrucachedict(cachesize)
2098 self._dirmancache[b''] = util.lrucachedict(cachesize)
2099
2099
2100 self._cachesize = cachesize
2100 self._cachesize = cachesize
2101
2101
2102 def __getitem__(self, node):
2102 def __getitem__(self, node):
2103 """Retrieves the manifest instance for the given node. Throws a
2103 """Retrieves the manifest instance for the given node. Throws a
2104 LookupError if not found.
2104 LookupError if not found.
2105 """
2105 """
2106 return self.get(b'', node)
2106 return self.get(b'', node)
2107
2107
2108 @property
2108 @property
2109 def narrowed(self):
2109 def narrowed(self):
2110 return not (self._narrowmatch is None or self._narrowmatch.always())
2110 return not (self._narrowmatch is None or self._narrowmatch.always())
2111
2111
2112 def get(
2112 def get(
2113 self, tree: bytes, node: bytes, verify: bool = True
2113 self, tree: bytes, node: bytes, verify: bool = True
2114 ) -> AnyManifestCtx:
2114 ) -> AnyManifestCtx:
2115 """Retrieves the manifest instance for the given node. Throws a
2115 """Retrieves the manifest instance for the given node. Throws a
2116 LookupError if not found.
2116 LookupError if not found.
2117
2117
2118 `verify` - if True an exception will be thrown if the node is not in
2118 `verify` - if True an exception will be thrown if the node is not in
2119 the revlog
2119 the revlog
2120 """
2120 """
2121 if node in self._dirmancache.get(tree, ()):
2121 if node in self._dirmancache.get(tree, ()):
2122 return self._dirmancache[tree][node]
2122 return self._dirmancache[tree][node]
2123
2123
2124 if not self._narrowmatch.always():
2124 if not self._narrowmatch.always():
2125 if not self._narrowmatch.visitdir(tree[:-1]):
2125 if not self._narrowmatch.visitdir(tree[:-1]):
2126 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2126 return excludeddirmanifestctx(self.nodeconstants, tree, node)
2127 if tree:
2127 if tree:
2128 if self._rootstore._treeondisk:
2128 if self._rootstore._treeondisk:
2129 if verify:
2129 if verify:
2130 # Side-effect is LookupError is raised if node doesn't
2130 # Side-effect is LookupError is raised if node doesn't
2131 # exist.
2131 # exist.
2132 self.getstorage(tree).rev(node)
2132 self.getstorage(tree).rev(node)
2133
2133
2134 m = treemanifestctx(self, tree, node)
2134 m = treemanifestctx(self, tree, node)
2135 else:
2135 else:
2136 raise error.Abort(
2136 raise error.Abort(
2137 _(
2137 _(
2138 b"cannot ask for manifest directory '%s' in a flat "
2138 b"cannot ask for manifest directory '%s' in a flat "
2139 b"manifest"
2139 b"manifest"
2140 )
2140 )
2141 % tree
2141 % tree
2142 )
2142 )
2143 else:
2143 else:
2144 if verify:
2144 if verify:
2145 # Side-effect is LookupError is raised if node doesn't exist.
2145 # Side-effect is LookupError is raised if node doesn't exist.
2146 self._rootstore.rev(node)
2146 self._rootstore.rev(node)
2147
2147
2148 if self._treemanifests:
2148 if self._treemanifests:
2149 m = treemanifestctx(self, b'', node)
2149 m = treemanifestctx(self, b'', node)
2150 else:
2150 else:
2151 m = manifestctx(self, node)
2151 m = manifestctx(self, node)
2152
2152
2153 if node != self.nodeconstants.nullid:
2153 if node != self.nodeconstants.nullid:
2154 mancache = self._dirmancache.get(tree)
2154 mancache = self._dirmancache.get(tree)
2155 if not mancache:
2155 if not mancache:
2156 mancache = util.lrucachedict(self._cachesize)
2156 mancache = util.lrucachedict(self._cachesize)
2157 self._dirmancache[tree] = mancache
2157 self._dirmancache[tree] = mancache
2158 mancache[node] = m
2158 mancache[node] = m
2159 return m
2159 return m
2160
2160
2161 def getstorage(self, tree):
2161 def getstorage(self, tree):
2162 return self._rootstore.dirlog(tree)
2162 return self._rootstore.dirlog(tree)
2163
2163
2164 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2164 def clearcaches(self, clear_persisted_data: bool = False) -> None:
2165 self._dirmancache.clear()
2165 self._dirmancache.clear()
2166 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2166 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2167
2167
2168 def rev(self, node) -> int:
2168 def rev(self, node) -> int:
2169 return self._rootstore.rev(node)
2169 return self._rootstore.rev(node)
2170
2170
2171 def update_caches(self, transaction) -> None:
2171 def update_caches(self, transaction) -> None:
2172 return self._rootstore._revlog.update_caches(transaction=transaction)
2172 return self._rootstore._revlog.update_caches(transaction=transaction)
2173
2173
2174
2174
2175 manifestlog = interfaceutil.implementer(repository.imanifestlog)(ManifestLog)
2175 manifestlog = interfaceutil.implementer(repository.imanifestlog)(ManifestLog)
2176
2176
2177 if typing.TYPE_CHECKING:
2177 if typing.TYPE_CHECKING:
2178 manifestlog = ManifestLog
2178 manifestlog = ManifestLog
2179
2179
2180
2180
2181 class MemManifestCtx:
2181 class MemManifestCtx:
2182 _manifestdict: ManifestDict
2182 _manifestdict: ManifestDict
2183
2183
2184 def __init__(self, manifestlog):
2184 def __init__(self, manifestlog):
2185 self._manifestlog = manifestlog
2185 self._manifestlog = manifestlog
2186 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2186 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2187
2187
2188 def _storage(self) -> ManifestRevlog:
2188 def _storage(self) -> ManifestRevlog:
2189 return self._manifestlog.getstorage(b'')
2189 return self._manifestlog.getstorage(b'')
2190
2190
2191 def copy(self) -> 'MemManifestCtx':
2191 def copy(self) -> 'MemManifestCtx':
2192 memmf = memmanifestctx(self._manifestlog)
2192 memmf = memmanifestctx(self._manifestlog)
2193 memmf._manifestdict = self.read().copy()
2193 memmf._manifestdict = self.read().copy()
2194 return memmf
2194 return memmf
2195
2195
2196 def read(self) -> 'ManifestDict':
2196 def read(self) -> 'ManifestDict':
2197 return self._manifestdict
2197 return self._manifestdict
2198
2198
2199 def write(self, transaction, link, p1, p2, added, removed, match=None):
2199 def write(self, transaction, link, p1, p2, added, removed, match=None):
2200 return self._storage().add(
2200 return self._storage().add(
2201 self._manifestdict,
2201 self._manifestdict,
2202 transaction,
2202 transaction,
2203 link,
2203 link,
2204 p1,
2204 p1,
2205 p2,
2205 p2,
2206 added,
2206 added,
2207 removed,
2207 removed,
2208 match=match,
2208 match=match,
2209 )
2209 )
2210
2210
2211
2211
2212 memmanifestctx = interfaceutil.implementer(
2212 memmanifestctx = interfaceutil.implementer(
2213 repository.imanifestrevisionwritable
2213 repository.imanifestrevisionwritable
2214 )(MemManifestCtx)
2214 )(MemManifestCtx)
2215
2215
2216 if typing.TYPE_CHECKING:
2216 if typing.TYPE_CHECKING:
2217 memmanifestctx = MemManifestCtx
2217 memmanifestctx = MemManifestCtx
2218
2218
2219
2219
2220 class ManifestCtx:
2220 class ManifestCtx:
2221 """A class representing a single revision of a manifest, including its
2221 """A class representing a single revision of a manifest, including its
2222 contents, its parent revs, and its linkrev.
2222 contents, its parent revs, and its linkrev.
2223 """
2223 """
2224
2224
2225 _data: Optional[ManifestDict]
2225 _data: Optional[ManifestDict]
2226
2226
2227 def __init__(self, manifestlog, node):
2227 def __init__(self, manifestlog, node):
2228 self._manifestlog = manifestlog
2228 self._manifestlog = manifestlog
2229 self._data = None
2229 self._data = None
2230
2230
2231 self._node = node
2231 self._node = node
2232
2232
2233 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2233 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2234 # but let's add it later when something needs it and we can load it
2234 # but let's add it later when something needs it and we can load it
2235 # lazily.
2235 # lazily.
2236 # self.p1, self.p2 = store.parents(node)
2236 # self.p1, self.p2 = store.parents(node)
2237 # rev = store.rev(node)
2237 # rev = store.rev(node)
2238 # self.linkrev = store.linkrev(rev)
2238 # self.linkrev = store.linkrev(rev)
2239
2239
2240 def _storage(self) -> 'ManifestRevlog':
2240 def _storage(self) -> 'ManifestRevlog':
2241 return self._manifestlog.getstorage(b'')
2241 return self._manifestlog.getstorage(b'')
2242
2242
2243 def node(self) -> bytes:
2243 def node(self) -> bytes:
2244 return self._node
2244 return self._node
2245
2245
2246 def copy(self) -> MemManifestCtx:
2246 def copy(self) -> MemManifestCtx:
2247 memmf = memmanifestctx(self._manifestlog)
2247 memmf = memmanifestctx(self._manifestlog)
2248 memmf._manifestdict = self.read().copy()
2248 memmf._manifestdict = self.read().copy()
2249 return memmf
2249 return memmf
2250
2250
2251 @propertycache
2251 @propertycache
2252 def parents(self) -> Tuple[bytes, bytes]:
2252 def parents(self) -> Tuple[bytes, bytes]:
2253 return self._storage().parents(self._node)
2253 return self._storage().parents(self._node)
2254
2254
2255 def read(self) -> 'ManifestDict':
2255 def read(self) -> 'ManifestDict':
2256 if self._data is None:
2256 if self._data is None:
2257 nc = self._manifestlog.nodeconstants
2257 nc = self._manifestlog.nodeconstants
2258 if self._node == nc.nullid:
2258 if self._node == nc.nullid:
2259 self._data = manifestdict(nc.nodelen)
2259 self._data = manifestdict(nc.nodelen)
2260 else:
2260 else:
2261 store = self._storage()
2261 store = self._storage()
2262 if self._node in store.fulltextcache:
2262 if self._node in store.fulltextcache:
2263 text = pycompat.bytestr(store.fulltextcache[self._node])
2263 text = pycompat.bytestr(store.fulltextcache[self._node])
2264 else:
2264 else:
2265 text = store.revision(self._node)
2265 text = store.revision(self._node)
2266 arraytext = bytearray(text)
2266 arraytext = bytearray(text)
2267 store.fulltextcache[self._node] = arraytext
2267 store.fulltextcache[self._node] = arraytext
2268 self._data = manifestdict(nc.nodelen, text)
2268 self._data = manifestdict(nc.nodelen, text)
2269 return self._data
2269 return self._data
2270
2270
2271 def readfast(self, shallow: bool = False) -> 'ManifestDict':
2271 def readfast(self, shallow: bool = False) -> 'ManifestDict':
2272 """Calls either readdelta or read, based on which would be less work.
2272 """Calls either readdelta or read, based on which would be less work.
2273 readdelta is called if the delta is against the p1, and therefore can be
2273 readdelta is called if the delta is against the p1, and therefore can be
2274 read quickly.
2274 read quickly.
2275
2275
2276 If `shallow` is True, nothing changes since this is a flat manifest.
2276 If `shallow` is True, nothing changes since this is a flat manifest.
2277 """
2277 """
2278 util.nouideprecwarn(
2278 util.nouideprecwarn(
2279 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2279 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2280 b"6.9",
2280 b"6.9",
2281 stacklevel=2,
2281 stacklevel=2,
2282 )
2282 )
2283 store = self._storage()
2283 store = self._storage()
2284 r = store.rev(self._node)
2284 r = store.rev(self._node)
2285 deltaparent = store.deltaparent(r)
2285 deltaparent = store.deltaparent(r)
2286 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2286 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2287 return self.readdelta()
2287 return self.readdelta()
2288 return self.read()
2288 return self.read()
2289
2289
2290 def readdelta(self, shallow: bool = False) -> 'ManifestDict':
2290 def readdelta(self, shallow: bool = False) -> 'ManifestDict':
2291 """Returns a manifest containing just the entries that are present
2291 """Returns a manifest containing just the entries that are present
2292 in this manifest, but not in its p1 manifest. This is efficient to read
2292 in this manifest, but not in its p1 manifest. This is efficient to read
2293 if the revlog delta is already p1.
2293 if the revlog delta is already p1.
2294
2294
2295 Changing the value of `shallow` has no effect on flat manifests.
2295 Changing the value of `shallow` has no effect on flat manifests.
2296 """
2296 """
2297 util.nouideprecwarn(
2297 util.nouideprecwarn(
2298 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2298 b'"readfast" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2299 b"6.9",
2299 b"6.9",
2300 stacklevel=2,
2300 stacklevel=2,
2301 )
2301 )
2302 store = self._storage()
2302 store = self._storage()
2303 r = store.rev(self._node)
2303 r = store.rev(self._node)
2304 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2304 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2305 return manifestdict(store.nodeconstants.nodelen, d)
2305 return manifestdict(store.nodeconstants.nodelen, d)
2306
2306
2307 def read_any_fast_delta(
2307 def read_any_fast_delta(
2308 self,
2308 self,
2309 valid_bases: Optional[Collection[int]] = None,
2309 valid_bases: Optional[Collection[int]] = None,
2310 *,
2310 *,
2311 shallow: bool = False,
2311 shallow: bool = False,
2312 ) -> Tuple[Optional[int], ManifestDict]:
2312 ) -> Tuple[Optional[int], ManifestDict]:
2313 """see `imanifestrevisionstored` documentation"""
2313 """see `imanifestrevisionstored` documentation"""
2314 store = self._storage()
2314 store = self._storage()
2315 r = store.rev(self._node)
2315 r = store.rev(self._node)
2316 deltaparent = store.deltaparent(r)
2316 deltaparent = store.deltaparent(r)
2317 if valid_bases is None:
2317 if valid_bases is None:
2318 # make sure the next check is True
2318 # make sure the next check is True
2319 valid_bases = (deltaparent,)
2319 valid_bases = (deltaparent,)
2320 if deltaparent != nullrev and deltaparent in valid_bases:
2320 if deltaparent != nullrev and deltaparent in valid_bases:
2321 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2321 d = mdiff.patchtext(store.revdiff(deltaparent, r))
2322 return (
2322 return (
2323 deltaparent,
2323 deltaparent,
2324 manifestdict(store.nodeconstants.nodelen, d),
2324 manifestdict(store.nodeconstants.nodelen, d),
2325 )
2325 )
2326 return (None, self.read())
2326 return (None, self.read())
2327
2327
2328 def read_delta_parents(
2328 def read_delta_parents(
2329 self,
2329 self,
2330 *,
2330 *,
2331 shallow: bool = False,
2331 shallow: bool = False,
2332 exact: bool = True,
2332 exact: bool = True,
2333 ) -> ManifestDict:
2333 ) -> ManifestDict:
2334 """see `interface.imanifestrevisionbase` documentations"""
2334 """see `interface.imanifestrevisionbase` documentations"""
2335 store = self._storage()
2335 store = self._storage()
2336 r = store.rev(self._node)
2336 r = store.rev(self._node)
2337 deltaparent = store.deltaparent(r)
2337 deltaparent = store.deltaparent(r)
2338 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2338 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2339 if not exact and deltaparent in parents:
2339 if not exact and deltaparent in parents:
2340 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2340 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2341 return manifestdict(store.nodeconstants.nodelen, d)
2341 return manifestdict(store.nodeconstants.nodelen, d)
2342 elif not exact or len(parents) == 0:
2342 elif not exact or len(parents) == 0:
2343 return self.read()
2343 return self.read()
2344 elif len(parents) == 1:
2344 elif len(parents) == 1:
2345 p = parents[0]
2345 p = parents[0]
2346 d = mdiff.patchtext(store.revdiff(p, r))
2346 d = mdiff.patchtext(store.revdiff(p, r))
2347 return manifestdict(store.nodeconstants.nodelen, d)
2347 return manifestdict(store.nodeconstants.nodelen, d)
2348 else:
2348 else:
2349 p1, p2 = parents
2349 p1, p2 = parents
2350 d1 = mdiff.patchtext(store.revdiff(p1, r))
2350 d1 = mdiff.patchtext(store.revdiff(p1, r))
2351 d2 = mdiff.patchtext(store.revdiff(p2, r))
2351 d2 = mdiff.patchtext(store.revdiff(p2, r))
2352 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2352 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2353 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2353 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2354 md = manifestdict(store.nodeconstants.nodelen)
2354 md = manifestdict(store.nodeconstants.nodelen)
2355 for f, new_node, new_flag in d1.iterentries():
2355 for f, new_node, new_flag in d1.iterentries():
2356 if f not in d2:
2356 if f not in d2:
2357 continue
2357 continue
2358 if new_node is not None:
2358 if new_node is not None:
2359 md.set(f, new_node, new_flag)
2359 md.set(f, new_node, new_flag)
2360 return md
2360 return md
2361
2361
2362 def read_delta_new_entries(self, *, shallow=False) -> ManifestDict:
2362 def read_delta_new_entries(self, *, shallow=False) -> ManifestDict:
2363 """see `interface.imanifestrevisionbase` documentations"""
2363 """see `interface.imanifestrevisionbase` documentations"""
2364 # If we are using narrow, returning a delta against an arbitrary
2364 # If we are using narrow, returning a delta against an arbitrary
2365 # changeset might return file outside the narrowspec. This can create
2365 # changeset might return file outside the narrowspec. This can create
2366 # issue when running validation server side with strict security as
2366 # issue when running validation server side with strict security as
2367 # push from low priviledge usage might be seen as adding new revision
2367 # push from low priviledge usage might be seen as adding new revision
2368 # for files they cannot touch. So we are strict if narrow is involved.
2368 # for files they cannot touch. So we are strict if narrow is involved.
2369 if self._manifestlog.narrowed:
2369 if self._manifestlog.narrowed:
2370 return self.read_delta_parents(shallow=shallow, exact=True)
2370 return self.read_delta_parents(shallow=shallow, exact=True)
2371 store = self._storage()
2371 store = self._storage()
2372 r = store.rev(self._node)
2372 r = store.rev(self._node)
2373 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2373 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2374 return manifestdict(store.nodeconstants.nodelen, d)
2374 return manifestdict(store.nodeconstants.nodelen, d)
2375
2375
2376 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2376 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2377 return self.read().find(key)
2377 return self.read().find(key)
2378
2378
2379
2379
2380 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2380 manifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2381 ManifestCtx
2381 ManifestCtx
2382 )
2382 )
2383
2383
2384 if typing.TYPE_CHECKING:
2384 if typing.TYPE_CHECKING:
2385 manifestctx = ManifestCtx
2385 manifestctx = ManifestCtx
2386
2386
2387
2387
2388 class MemTreeManifestCtx:
2388 class MemTreeManifestCtx:
2389 _treemanifest: TreeManifest
2389 _treemanifest: TreeManifest
2390
2390
2391 def __init__(self, manifestlog, dir=b''):
2391 def __init__(self, manifestlog, dir=b''):
2392 self._manifestlog = manifestlog
2392 self._manifestlog = manifestlog
2393 self._dir = dir
2393 self._dir = dir
2394 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2394 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2395
2395
2396 def _storage(self) -> ManifestRevlog:
2396 def _storage(self) -> ManifestRevlog:
2397 return self._manifestlog.getstorage(b'')
2397 return self._manifestlog.getstorage(b'')
2398
2398
2399 def copy(self) -> 'MemTreeManifestCtx':
2399 def copy(self) -> 'MemTreeManifestCtx':
2400 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2400 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2401 memmf._treemanifest = self._treemanifest.copy()
2401 memmf._treemanifest = self._treemanifest.copy()
2402 return memmf
2402 return memmf
2403
2403
2404 def read(self) -> 'TreeManifest':
2404 def read(self) -> 'TreeManifest':
2405 return self._treemanifest
2405 return self._treemanifest
2406
2406
2407 def write(self, transaction, link, p1, p2, added, removed, match=None):
2407 def write(self, transaction, link, p1, p2, added, removed, match=None):
2408 def readtree(dir, node):
2408 def readtree(dir, node):
2409 return self._manifestlog.get(dir, node).read()
2409 return self._manifestlog.get(dir, node).read()
2410
2410
2411 return self._storage().add(
2411 return self._storage().add(
2412 self._treemanifest,
2412 self._treemanifest,
2413 transaction,
2413 transaction,
2414 link,
2414 link,
2415 p1,
2415 p1,
2416 p2,
2416 p2,
2417 added,
2417 added,
2418 removed,
2418 removed,
2419 readtree=readtree,
2419 readtree=readtree,
2420 match=match,
2420 match=match,
2421 )
2421 )
2422
2422
2423
2423
2424 memtreemanifestctx = interfaceutil.implementer(
2424 memtreemanifestctx = interfaceutil.implementer(
2425 repository.imanifestrevisionwritable
2425 repository.imanifestrevisionwritable
2426 )(MemTreeManifestCtx)
2426 )(MemTreeManifestCtx)
2427
2427
2428 if typing.TYPE_CHECKING:
2428 if typing.TYPE_CHECKING:
2429 memtreemanifestctx = MemTreeManifestCtx
2429 memtreemanifestctx = MemTreeManifestCtx
2430
2430
2431
2431
2432 class TreeManifestCtx:
2432 class TreeManifestCtx:
2433 _data: Optional[TreeManifest]
2433 _data: Optional[TreeManifest]
2434
2434
2435 def __init__(self, manifestlog, dir, node):
2435 def __init__(self, manifestlog, dir, node):
2436 self._manifestlog = manifestlog
2436 self._manifestlog = manifestlog
2437 self._dir = dir
2437 self._dir = dir
2438 self._data = None
2438 self._data = None
2439
2439
2440 self._node = node
2440 self._node = node
2441
2441
2442 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2442 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2443 # we can instantiate treemanifestctx objects for directories we don't
2443 # we can instantiate treemanifestctx objects for directories we don't
2444 # have on disk.
2444 # have on disk.
2445 # self.p1, self.p2 = store.parents(node)
2445 # self.p1, self.p2 = store.parents(node)
2446 # rev = store.rev(node)
2446 # rev = store.rev(node)
2447 # self.linkrev = store.linkrev(rev)
2447 # self.linkrev = store.linkrev(rev)
2448
2448
2449 def _storage(self) -> ManifestRevlog:
2449 def _storage(self) -> ManifestRevlog:
2450 narrowmatch = self._manifestlog._narrowmatch
2450 narrowmatch = self._manifestlog._narrowmatch
2451 if not narrowmatch.always():
2451 if not narrowmatch.always():
2452 if not narrowmatch.visitdir(self._dir[:-1]):
2452 if not narrowmatch.visitdir(self._dir[:-1]):
2453 return excludedmanifestrevlog(
2453 return excludedmanifestrevlog(
2454 self._manifestlog.nodeconstants, self._dir
2454 self._manifestlog.nodeconstants, self._dir
2455 )
2455 )
2456 return self._manifestlog.getstorage(self._dir)
2456 return self._manifestlog.getstorage(self._dir)
2457
2457
2458 def read(self) -> 'TreeManifest':
2458 def read(self) -> 'TreeManifest':
2459 if self._data is None:
2459 if self._data is None:
2460 store = self._storage()
2460 store = self._storage()
2461 if self._node == self._manifestlog.nodeconstants.nullid:
2461 if self._node == self._manifestlog.nodeconstants.nullid:
2462 self._data = treemanifest(self._manifestlog.nodeconstants)
2462 self._data = treemanifest(self._manifestlog.nodeconstants)
2463 # TODO accessing non-public API
2463 # TODO accessing non-public API
2464 elif store._treeondisk:
2464 elif store._treeondisk:
2465 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2465 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2466
2466
2467 def gettext():
2467 def gettext():
2468 return store.revision(self._node)
2468 return store.revision(self._node)
2469
2469
2470 def readsubtree(dir, subm):
2470 def readsubtree(dir, subm):
2471 # Set verify to False since we need to be able to create
2471 # Set verify to False since we need to be able to create
2472 # subtrees for trees that don't exist on disk.
2472 # subtrees for trees that don't exist on disk.
2473 return self._manifestlog.get(dir, subm, verify=False).read()
2473 return self._manifestlog.get(dir, subm, verify=False).read()
2474
2474
2475 m.read(gettext, readsubtree)
2475 m.read(gettext, readsubtree)
2476 m.setnode(self._node)
2476 m.setnode(self._node)
2477 self._data = m
2477 self._data = m
2478 else:
2478 else:
2479 if self._node in store.fulltextcache:
2479 if self._node in store.fulltextcache:
2480 text = pycompat.bytestr(store.fulltextcache[self._node])
2480 text = pycompat.bytestr(store.fulltextcache[self._node])
2481 else:
2481 else:
2482 text = store.revision(self._node)
2482 text = store.revision(self._node)
2483 arraytext = bytearray(text)
2483 arraytext = bytearray(text)
2484 store.fulltextcache[self._node] = arraytext
2484 store.fulltextcache[self._node] = arraytext
2485 self._data = treemanifest(
2485 self._data = treemanifest(
2486 self._manifestlog.nodeconstants, dir=self._dir, text=text
2486 self._manifestlog.nodeconstants, dir=self._dir, text=text
2487 )
2487 )
2488
2488
2489 return self._data
2489 return self._data
2490
2490
2491 def node(self) -> bytes:
2491 def node(self) -> bytes:
2492 return self._node
2492 return self._node
2493
2493
2494 def copy(self) -> 'MemTreeManifestCtx':
2494 def copy(self) -> 'MemTreeManifestCtx':
2495 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2495 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2496 memmf._treemanifest = self.read().copy()
2496 memmf._treemanifest = self.read().copy()
2497 return memmf
2497 return memmf
2498
2498
2499 @propertycache
2499 @propertycache
2500 def parents(self) -> Tuple[bytes, bytes]:
2500 def parents(self) -> Tuple[bytes, bytes]:
2501 return self._storage().parents(self._node)
2501 return self._storage().parents(self._node)
2502
2502
2503 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2503 def readdelta(self, shallow: bool = False) -> AnyManifestDict:
2504 """see `imanifestrevisionstored` documentation"""
2504 """see `imanifestrevisionstored` documentation"""
2505 util.nouideprecwarn(
2505 util.nouideprecwarn(
2506 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2506 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_new_entries"',
2507 b"6.9",
2507 b"6.9",
2508 stacklevel=2,
2508 stacklevel=2,
2509 )
2509 )
2510 store = self._storage()
2510 store = self._storage()
2511 if shallow:
2511 if shallow:
2512 r = store.rev(self._node)
2512 r = store.rev(self._node)
2513 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2513 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2514 return manifestdict(store.nodeconstants.nodelen, d)
2514 return manifestdict(store.nodeconstants.nodelen, d)
2515 else:
2515 else:
2516 # Need to perform a slow delta
2516 # Need to perform a slow delta
2517 r0 = store.deltaparent(store.rev(self._node))
2517 r0 = store.deltaparent(store.rev(self._node))
2518 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2518 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2519 m1 = self.read()
2519 m1 = self.read()
2520 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2520 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2521 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2521 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2522 if n1:
2522 if n1:
2523 md[f] = n1
2523 md[f] = n1
2524 if fl1:
2524 if fl1:
2525 md.setflag(f, fl1)
2525 md.setflag(f, fl1)
2526 return md
2526 return md
2527
2527
2528 def read_any_fast_delta(
2528 def read_any_fast_delta(
2529 self,
2529 self,
2530 valid_bases: Optional[Collection[int]] = None,
2530 valid_bases: Optional[Collection[int]] = None,
2531 *,
2531 *,
2532 shallow: bool = False,
2532 shallow: bool = False,
2533 ) -> Tuple[Optional[int], AnyManifestDict]:
2533 ) -> Tuple[Optional[int], AnyManifestDict]:
2534 """see `imanifestrevisionstored` documentation"""
2534 """see `imanifestrevisionstored` documentation"""
2535 store = self._storage()
2535 store = self._storage()
2536 r = store.rev(self._node)
2536 r = store.rev(self._node)
2537 deltaparent = store.deltaparent(r)
2537 deltaparent = store.deltaparent(r)
2538
2538
2539 if valid_bases is None:
2539 if valid_bases is None:
2540 # make sure the next check is True
2540 # make sure the next check is True
2541 valid_bases = (deltaparent,)
2541 valid_bases = (deltaparent,)
2542 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2542 can_use_delta = deltaparent != nullrev and deltaparent in valid_bases
2543
2543
2544 if shallow:
2544 if shallow:
2545 if can_use_delta:
2545 if can_use_delta:
2546 return (deltaparent, self._read_storage_delta_shallow())
2546 return (deltaparent, self._read_storage_delta_shallow())
2547 else:
2547 else:
2548 d = store.revision(self._node)
2548 d = store.revision(self._node)
2549 return (None, manifestdict(store.nodeconstants.nodelen, d))
2549 return (None, manifestdict(store.nodeconstants.nodelen, d))
2550 else:
2550 else:
2551 # note: This use "slow_delta" here is cargo culted from the previous
2551 # note: This use "slow_delta" here is cargo culted from the previous
2552 # implementation. I am not sure it make sense since the goal here is to
2552 # implementation. I am not sure it make sense since the goal here is to
2553 # be fast, so why are we computing a delta? On the other hand, tree
2553 # be fast, so why are we computing a delta? On the other hand, tree
2554 # manifest delta as fairly "cheap" and allow for skipping whole part of
2554 # manifest delta as fairly "cheap" and allow for skipping whole part of
2555 # the tree that a full read would access. So it might be a good idea.
2555 # the tree that a full read would access. So it might be a good idea.
2556 #
2556 #
2557 # If we realize we don't need delta here, we should simply use:
2557 # If we realize we don't need delta here, we should simply use:
2558 #
2558 #
2559 # return (None, self.read())
2559 # return (None, self.read())
2560 if can_use_delta:
2560 if can_use_delta:
2561 return (None, self._read_storage_slow_delta(base=deltaparent))
2561 return (None, self._read_storage_slow_delta(base=deltaparent))
2562 else:
2562 else:
2563 parents = [
2563 parents = [
2564 p
2564 p
2565 for p in store.parentrevs(r)
2565 for p in store.parentrevs(r)
2566 if p is not nullrev and p in valid_bases
2566 if p is not nullrev and p in valid_bases
2567 ]
2567 ]
2568 if parents:
2568 if parents:
2569 best_base = max(parents)
2569 best_base = max(parents)
2570 else:
2570 else:
2571 best_base = max(valid_bases)
2571 best_base = max(valid_bases)
2572 return (None, self._read_storage_slow_delta(base=best_base))
2572 return (None, self._read_storage_slow_delta(base=best_base))
2573
2573
2574 def _read_storage_delta_shallow(self) -> ManifestDict:
2574 def _read_storage_delta_shallow(self) -> ManifestDict:
2575 store = self._storage()
2575 store = self._storage()
2576 r = store.rev(self._node)
2576 r = store.rev(self._node)
2577 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2577 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2578 return manifestdict(store.nodeconstants.nodelen, d)
2578 return manifestdict(store.nodeconstants.nodelen, d)
2579
2579
2580 def _read_storage_slow_delta(self, base) -> 'TreeManifest':
2580 def _read_storage_slow_delta(self, base) -> 'TreeManifest':
2581 store = self._storage()
2581 store = self._storage()
2582 if base is None:
2582 if base is None:
2583 base = store.deltaparent(store.rev(self._node))
2583 base = store.deltaparent(store.rev(self._node))
2584 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2584 m0 = self._manifestlog.get(self._dir, store.node(base)).read()
2585 m1 = self.read()
2585 m1 = self.read()
2586 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2586 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2587 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2587 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2588 if n1:
2588 if n1:
2589 md[f] = n1
2589 md[f] = n1
2590 if fl1:
2590 if fl1:
2591 md.setflag(f, fl1)
2591 md.setflag(f, fl1)
2592 return md
2592 return md
2593
2593
2594 def read_delta_parents(
2594 def read_delta_parents(
2595 self,
2595 self,
2596 *,
2596 *,
2597 shallow: bool = False,
2597 shallow: bool = False,
2598 exact: bool = True,
2598 exact: bool = True,
2599 ) -> AnyManifestDict:
2599 ) -> AnyManifestDict:
2600 """see `interface.imanifestrevisionbase` documentations"""
2600 """see `interface.imanifestrevisionbase` documentations"""
2601 store = self._storage()
2601 store = self._storage()
2602 r = store.rev(self._node)
2602 r = store.rev(self._node)
2603 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2603 parents = [p for p in store.parentrevs(r) if p is not nullrev]
2604 if not exact:
2604 if not exact:
2605 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2605 return self.read_any_fast_delta(parents, shallow=shallow)[1]
2606 elif len(parents) == 0:
2606 elif len(parents) == 0:
2607 if shallow:
2607 if shallow:
2608 d = store.revision(self._node)
2608 d = store.revision(self._node)
2609 return manifestdict(store.nodeconstants.nodelen, d)
2609 return manifestdict(store.nodeconstants.nodelen, d)
2610 else:
2610 else:
2611 return self.read()
2611 return self.read()
2612 elif len(parents) == 1:
2612 elif len(parents) == 1:
2613 p = parents[0]
2613 p = parents[0]
2614 if shallow:
2614 if shallow:
2615 d = mdiff.patchtext(store.revdiff(p, r))
2615 d = mdiff.patchtext(store.revdiff(p, r))
2616 return manifestdict(store.nodeconstants.nodelen, d)
2616 return manifestdict(store.nodeconstants.nodelen, d)
2617 else:
2617 else:
2618 return self._read_storage_slow_delta(base=p)
2618 return self._read_storage_slow_delta(base=p)
2619 else:
2619 else:
2620 p1, p2 = parents
2620 p1, p2 = parents
2621 if shallow:
2621 if shallow:
2622 d1 = mdiff.patchtext(store.revdiff(p1, r))
2622 d1 = mdiff.patchtext(store.revdiff(p1, r))
2623 d2 = mdiff.patchtext(store.revdiff(p2, r))
2623 d2 = mdiff.patchtext(store.revdiff(p2, r))
2624 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2624 d1 = manifestdict(store.nodeconstants.nodelen, d1)
2625 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2625 d2 = manifestdict(store.nodeconstants.nodelen, d2)
2626 md = manifestdict(store.nodeconstants.nodelen)
2626 md = manifestdict(store.nodeconstants.nodelen)
2627 for f, new_node, new_flag in d1.iterentries():
2627 for f, new_node, new_flag in d1.iterentries():
2628 if f not in d2:
2628 if f not in d2:
2629 continue
2629 continue
2630 if new_node is not None:
2630 if new_node is not None:
2631 md.set(f, new_node, new_flag)
2631 md.set(f, new_node, new_flag)
2632 return md
2632 return md
2633 else:
2633 else:
2634 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2634 m1 = self._manifestlog.get(self._dir, store.node(p1)).read()
2635 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2635 m2 = self._manifestlog.get(self._dir, store.node(p2)).read()
2636 mc = self.read()
2636 mc = self.read()
2637 d1 = m1.diff(mc)
2637 d1 = m1.diff(mc)
2638 d2 = m2.diff(mc)
2638 d2 = m2.diff(mc)
2639 md = treemanifest(
2639 md = treemanifest(
2640 self._manifestlog.nodeconstants,
2640 self._manifestlog.nodeconstants,
2641 dir=self._dir,
2641 dir=self._dir,
2642 )
2642 )
2643 for f, new_node, new_flag in d1.iterentries():
2643 for f, new_node, new_flag in d1.iterentries():
2644 if f not in d2:
2644 if f not in d2:
2645 continue
2645 continue
2646 if new_node is not None:
2646 if new_node is not None:
2647 md.set(f, new_node, new_flag)
2647 md.set(f, new_node, new_flag)
2648 return md
2648 return md
2649
2649
2650 def read_delta_new_entries(
2650 def read_delta_new_entries(
2651 self, *, shallow: bool = False
2651 self, *, shallow: bool = False
2652 ) -> AnyManifestDict:
2652 ) -> AnyManifestDict:
2653 """see `interface.imanifestrevisionbase` documentations"""
2653 """see `interface.imanifestrevisionbase` documentations"""
2654 # If we are using narrow, returning a delta against an arbitrary
2654 # If we are using narrow, returning a delta against an arbitrary
2655 # changeset might return file outside the narrowspec. This can create
2655 # changeset might return file outside the narrowspec. This can create
2656 # issue when running validation server side with strict security as
2656 # issue when running validation server side with strict security as
2657 # push from low priviledge usage might be seen as adding new revision
2657 # push from low priviledge usage might be seen as adding new revision
2658 # for files they cannot touch. So we are strict if narrow is involved.
2658 # for files they cannot touch. So we are strict if narrow is involved.
2659 if self._manifestlog.narrowed:
2659 if self._manifestlog.narrowed:
2660 return self.read_delta_parents(shallow=shallow, exact=True)
2660 return self.read_delta_parents(shallow=shallow, exact=True)
2661 # delegate to existing another existing method for simplicity
2661 # delegate to existing another existing method for simplicity
2662 store = self._storage()
2662 store = self._storage()
2663 r = store.rev(self._node)
2663 r = store.rev(self._node)
2664 bases = (store.deltaparent(r),)
2664 bases = (store.deltaparent(r),)
2665 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2665 return self.read_any_fast_delta(bases, shallow=shallow)[1]
2666
2666
2667 def readfast(self, shallow=False) -> AnyManifestDict:
2667 def readfast(self, shallow=False) -> AnyManifestDict:
2668 """Calls either readdelta or read, based on which would be less work.
2668 """Calls either readdelta or read, based on which would be less work.
2669 readdelta is called if the delta is against the p1, and therefore can be
2669 readdelta is called if the delta is against the p1, and therefore can be
2670 read quickly.
2670 read quickly.
2671
2671
2672 If `shallow` is True, it only returns the entries from this manifest,
2672 If `shallow` is True, it only returns the entries from this manifest,
2673 and not any submanifests.
2673 and not any submanifests.
2674 """
2674 """
2675 util.nouideprecwarn(
2675 util.nouideprecwarn(
2676 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2676 b'"readdelta" is deprecated use "read_any_fast_delta" or "read_delta_parents"',
2677 b"6.9",
2677 b"6.9",
2678 stacklevel=2,
2678 stacklevel=2,
2679 )
2679 )
2680 store = self._storage()
2680 store = self._storage()
2681 r = store.rev(self._node)
2681 r = store.rev(self._node)
2682 deltaparent = store.deltaparent(r)
2682 deltaparent = store.deltaparent(r)
2683 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2683 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2684 return self.readdelta(shallow=shallow)
2684 return self.readdelta(shallow=shallow)
2685
2685
2686 if shallow:
2686 if shallow:
2687 return manifestdict(
2687 return manifestdict(
2688 store.nodeconstants.nodelen, store.revision(self._node)
2688 store.nodeconstants.nodelen, store.revision(self._node)
2689 )
2689 )
2690 else:
2690 else:
2691 return self.read()
2691 return self.read()
2692
2692
2693 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2693 def find(self, key: bytes) -> Tuple[bytes, bytes]:
2694 return self.read().find(key)
2694 return self.read().find(key)
2695
2695
2696
2696
2697 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2697 treemanifestctx = interfaceutil.implementer(repository.imanifestrevisionstored)(
2698 TreeManifestCtx
2698 TreeManifestCtx
2699 )
2699 )
2700
2700
2701 if typing.TYPE_CHECKING:
2701 if typing.TYPE_CHECKING:
2702 treemanifestctx = TreeManifestCtx
2702 treemanifestctx = TreeManifestCtx
2703
2703
2704
2704
2705 class excludeddir(treemanifest):
2705 class excludeddir(treemanifest):
2706 """Stand-in for a directory that is excluded from the repository.
2706 """Stand-in for a directory that is excluded from the repository.
2707
2707
2708 With narrowing active on a repository that uses treemanifests,
2708 With narrowing active on a repository that uses treemanifests,
2709 some of the directory revlogs will be excluded from the resulting
2709 some of the directory revlogs will be excluded from the resulting
2710 clone. This is a huge storage win for clients, but means we need
2710 clone. This is a huge storage win for clients, but means we need
2711 some sort of pseudo-manifest to surface to internals so we can
2711 some sort of pseudo-manifest to surface to internals so we can
2712 detect a merge conflict outside the narrowspec. That's what this
2712 detect a merge conflict outside the narrowspec. That's what this
2713 class is: it stands in for a directory whose node is known, but
2713 class is: it stands in for a directory whose node is known, but
2714 whose contents are unknown.
2714 whose contents are unknown.
2715 """
2715 """
2716
2716
2717 _files: Dict[bytes, bytes]
2717 _files: Dict[bytes, bytes]
2718 _flags: Dict[bytes, bytes]
2718 _flags: Dict[bytes, bytes]
2719
2719
2720 def __init__(self, nodeconstants, dir, node):
2720 def __init__(self, nodeconstants, dir, node):
2721 super(excludeddir, self).__init__(nodeconstants, dir)
2721 super(excludeddir, self).__init__(nodeconstants, dir)
2722 self._node = node
2722 self._node = node
2723 # Add an empty file, which will be included by iterators and such,
2723 # Add an empty file, which will be included by iterators and such,
2724 # appearing as the directory itself (i.e. something like "dir/")
2724 # appearing as the directory itself (i.e. something like "dir/")
2725 self._files[b''] = node
2725 self._files[b''] = node
2726 self._flags[b''] = b't'
2726 self._flags[b''] = b't'
2727
2727
2728 # Manifests outside the narrowspec should never be modified, so avoid
2728 # Manifests outside the narrowspec should never be modified, so avoid
2729 # copying. This makes a noticeable difference when there are very many
2729 # copying. This makes a noticeable difference when there are very many
2730 # directories outside the narrowspec. Also, it makes sense for the copy to
2730 # directories outside the narrowspec. Also, it makes sense for the copy to
2731 # be of the same type as the original, which would not happen with the
2731 # be of the same type as the original, which would not happen with the
2732 # super type's copy().
2732 # super type's copy().
2733 def copy(self):
2733 def copy(self):
2734 return self
2734 return self
2735
2735
2736
2736
2737 class excludeddirmanifestctx(treemanifestctx):
2737 class excludeddirmanifestctx(treemanifestctx):
2738 """context wrapper for excludeddir - see that docstring for rationale"""
2738 """context wrapper for excludeddir - see that docstring for rationale"""
2739
2739
2740 def __init__(self, nodeconstants, dir, node):
2740 def __init__(self, nodeconstants, dir, node):
2741 self.nodeconstants = nodeconstants
2741 self.nodeconstants = nodeconstants
2742 self._dir = dir
2742 self._dir = dir
2743 self._node = node
2743 self._node = node
2744
2744
2745 def read(self):
2745 def read(self):
2746 return excludeddir(self.nodeconstants, self._dir, self._node)
2746 return excludeddir(self.nodeconstants, self._dir, self._node)
2747
2747
2748 def readfast(self, shallow=False):
2748 def readfast(self, shallow=False):
2749 # special version of readfast since we don't have underlying storage
2749 # special version of readfast since we don't have underlying storage
2750 return self.read()
2750 return self.read()
2751
2751
2752 def write(self, *args):
2752 def write(self, *args):
2753 raise error.ProgrammingError(
2753 raise error.ProgrammingError(
2754 b'attempt to write manifest from excluded dir %s' % self._dir
2754 b'attempt to write manifest from excluded dir %s' % self._dir
2755 )
2755 )
2756
2756
2757
2757
2758 class excludedmanifestrevlog(manifestrevlog):
2758 class excludedmanifestrevlog(manifestrevlog):
2759 """Stand-in for excluded treemanifest revlogs.
2759 """Stand-in for excluded treemanifest revlogs.
2760
2760
2761 When narrowing is active on a treemanifest repository, we'll have
2761 When narrowing is active on a treemanifest repository, we'll have
2762 references to directories we can't see due to the revlog being
2762 references to directories we can't see due to the revlog being
2763 skipped. This class exists to conform to the manifestrevlog
2763 skipped. This class exists to conform to the manifestrevlog
2764 interface for those directories and proactively prevent writes to
2764 interface for those directories and proactively prevent writes to
2765 outside the narrowspec.
2765 outside the narrowspec.
2766 """
2766 """
2767
2767
2768 def __init__(self, nodeconstants, dir):
2768 def __init__(self, nodeconstants, dir):
2769 self.nodeconstants = nodeconstants
2769 self.nodeconstants = nodeconstants
2770 self._dir = dir
2770 self._dir = dir
2771
2771
2772 def __len__(self):
2772 def __len__(self):
2773 raise error.ProgrammingError(
2773 raise error.ProgrammingError(
2774 b'attempt to get length of excluded dir %s' % self._dir
2774 b'attempt to get length of excluded dir %s' % self._dir
2775 )
2775 )
2776
2776
2777 def rev(self, node):
2777 def rev(self, node):
2778 raise error.ProgrammingError(
2778 raise error.ProgrammingError(
2779 b'attempt to get rev from excluded dir %s' % self._dir
2779 b'attempt to get rev from excluded dir %s' % self._dir
2780 )
2780 )
2781
2781
2782 def linkrev(self, node):
2782 def linkrev(self, node):
2783 raise error.ProgrammingError(
2783 raise error.ProgrammingError(
2784 b'attempt to get linkrev from excluded dir %s' % self._dir
2784 b'attempt to get linkrev from excluded dir %s' % self._dir
2785 )
2785 )
2786
2786
2787 def node(self, rev):
2787 def node(self, rev):
2788 raise error.ProgrammingError(
2788 raise error.ProgrammingError(
2789 b'attempt to get node from excluded dir %s' % self._dir
2789 b'attempt to get node from excluded dir %s' % self._dir
2790 )
2790 )
2791
2791
2792 def add(self, *args, **kwargs):
2792 def add(self, *args, **kwargs):
2793 # We should never write entries in dirlogs outside the narrow clone.
2793 # We should never write entries in dirlogs outside the narrow clone.
2794 # However, the method still gets called from writesubtree() in
2794 # However, the method still gets called from writesubtree() in
2795 # _addtree(), so we need to handle it. We should possibly make that
2795 # _addtree(), so we need to handle it. We should possibly make that
2796 # avoid calling add() with a clean manifest (_dirty is always False
2796 # avoid calling add() with a clean manifest (_dirty is always False
2797 # in excludeddir instances).
2797 # in excludeddir instances).
2798 pass
2798 pass
@@ -1,4128 +1,4128
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import annotations
15 from __future__ import annotations
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import functools
20 import functools
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import typing
24 import typing
25 import weakref
25 import weakref
26 import zlib
26 import zlib
27
27
28 from typing import (
28 from typing import (
29 Iterable,
29 Iterable,
30 Iterator,
30 Iterator,
31 Optional,
31 Optional,
32 Tuple,
32 Tuple,
33 )
33 )
34
34
35 # import stuff from node for others to import from revlog
35 # import stuff from node for others to import from revlog
36 from .node import (
36 from .node import (
37 bin,
37 bin,
38 hex,
38 hex,
39 nullrev,
39 nullrev,
40 sha1nodeconstants,
40 sha1nodeconstants,
41 short,
41 short,
42 wdirrev,
42 wdirrev,
43 )
43 )
44 from .i18n import _
44 from .i18n import _
45 from .revlogutils.constants import (
45 from .revlogutils.constants import (
46 ALL_KINDS,
46 ALL_KINDS,
47 CHANGELOGV2,
47 CHANGELOGV2,
48 COMP_MODE_DEFAULT,
48 COMP_MODE_DEFAULT,
49 COMP_MODE_INLINE,
49 COMP_MODE_INLINE,
50 COMP_MODE_PLAIN,
50 COMP_MODE_PLAIN,
51 DELTA_BASE_REUSE_NO,
51 DELTA_BASE_REUSE_NO,
52 DELTA_BASE_REUSE_TRY,
52 DELTA_BASE_REUSE_TRY,
53 ENTRY_RANK,
53 ENTRY_RANK,
54 FEATURES_BY_VERSION,
54 FEATURES_BY_VERSION,
55 FLAG_GENERALDELTA,
55 FLAG_GENERALDELTA,
56 FLAG_INLINE_DATA,
56 FLAG_INLINE_DATA,
57 INDEX_HEADER,
57 INDEX_HEADER,
58 KIND_CHANGELOG,
58 KIND_CHANGELOG,
59 KIND_FILELOG,
59 KIND_FILELOG,
60 RANK_UNKNOWN,
60 RANK_UNKNOWN,
61 REVLOGV0,
61 REVLOGV0,
62 REVLOGV1,
62 REVLOGV1,
63 REVLOGV1_FLAGS,
63 REVLOGV1_FLAGS,
64 REVLOGV2,
64 REVLOGV2,
65 REVLOGV2_FLAGS,
65 REVLOGV2_FLAGS,
66 REVLOG_DEFAULT_FLAGS,
66 REVLOG_DEFAULT_FLAGS,
67 REVLOG_DEFAULT_FORMAT,
67 REVLOG_DEFAULT_FORMAT,
68 REVLOG_DEFAULT_VERSION,
68 REVLOG_DEFAULT_VERSION,
69 SUPPORTED_FLAGS,
69 SUPPORTED_FLAGS,
70 )
70 )
71 from .revlogutils.flagutil import (
71 from .revlogutils.flagutil import (
72 REVIDX_DEFAULT_FLAGS,
72 REVIDX_DEFAULT_FLAGS,
73 REVIDX_ELLIPSIS,
73 REVIDX_ELLIPSIS,
74 REVIDX_EXTSTORED,
74 REVIDX_EXTSTORED,
75 REVIDX_FLAGS_ORDER,
75 REVIDX_FLAGS_ORDER,
76 REVIDX_HASCOPIESINFO,
76 REVIDX_HASCOPIESINFO,
77 REVIDX_ISCENSORED,
77 REVIDX_ISCENSORED,
78 REVIDX_RAWTEXT_CHANGING_FLAGS,
78 REVIDX_RAWTEXT_CHANGING_FLAGS,
79 )
79 )
80 from .thirdparty import attr
80 from .thirdparty import attr
81
81
82 # Force pytype to use the non-vendored package
82 # Force pytype to use the non-vendored package
83 if typing.TYPE_CHECKING:
83 if typing.TYPE_CHECKING:
84 # noinspection PyPackageRequirements
84 # noinspection PyPackageRequirements
85 import attr
85 import attr
86
86
87 from . import (
87 from . import (
88 ancestor,
88 ancestor,
89 dagop,
89 dagop,
90 error,
90 error,
91 mdiff,
91 mdiff,
92 policy,
92 policy,
93 pycompat,
93 pycompat,
94 revlogutils,
94 revlogutils,
95 templatefilters,
95 templatefilters,
96 util,
96 util,
97 vfs as vfsmod,
97 vfs as vfsmod,
98 )
98 )
99 from .interfaces import (
99 from .interfaces import (
100 repository,
100 repository,
101 util as interfaceutil,
101 util as interfaceutil,
102 )
102 )
103 from .revlogutils import (
103 from .revlogutils import (
104 deltas as deltautil,
104 deltas as deltautil,
105 docket as docketutil,
105 docket as docketutil,
106 flagutil,
106 flagutil,
107 nodemap as nodemaputil,
107 nodemap as nodemaputil,
108 randomaccessfile,
108 randomaccessfile,
109 revlogv0,
109 revlogv0,
110 rewrite,
110 rewrite,
111 sidedata as sidedatautil,
111 sidedata as sidedatautil,
112 )
112 )
113 from .utils import (
113 from .utils import (
114 storageutil,
114 storageutil,
115 stringutil,
115 stringutil,
116 )
116 )
117
117
118 # blanked usage of all the name to prevent pyflakes constraints
118 # blanked usage of all the name to prevent pyflakes constraints
119 # We need these name available in the module for extensions.
119 # We need these name available in the module for extensions.
120
120
121 REVLOGV0
121 REVLOGV0
122 REVLOGV1
122 REVLOGV1
123 REVLOGV2
123 REVLOGV2
124 CHANGELOGV2
124 CHANGELOGV2
125 FLAG_INLINE_DATA
125 FLAG_INLINE_DATA
126 FLAG_GENERALDELTA
126 FLAG_GENERALDELTA
127 REVLOG_DEFAULT_FLAGS
127 REVLOG_DEFAULT_FLAGS
128 REVLOG_DEFAULT_FORMAT
128 REVLOG_DEFAULT_FORMAT
129 REVLOG_DEFAULT_VERSION
129 REVLOG_DEFAULT_VERSION
130 REVLOGV1_FLAGS
130 REVLOGV1_FLAGS
131 REVLOGV2_FLAGS
131 REVLOGV2_FLAGS
132 REVIDX_ISCENSORED
132 REVIDX_ISCENSORED
133 REVIDX_ELLIPSIS
133 REVIDX_ELLIPSIS
134 REVIDX_HASCOPIESINFO
134 REVIDX_HASCOPIESINFO
135 REVIDX_EXTSTORED
135 REVIDX_EXTSTORED
136 REVIDX_DEFAULT_FLAGS
136 REVIDX_DEFAULT_FLAGS
137 REVIDX_FLAGS_ORDER
137 REVIDX_FLAGS_ORDER
138 REVIDX_RAWTEXT_CHANGING_FLAGS
138 REVIDX_RAWTEXT_CHANGING_FLAGS
139
139
140 parsers = policy.importmod('parsers')
140 parsers = policy.importmod('parsers')
141 rustancestor = policy.importrust('ancestor')
141 rustancestor = policy.importrust('ancestor')
142 rustdagop = policy.importrust('dagop')
142 rustdagop = policy.importrust('dagop')
143 rustrevlog = policy.importrust('revlog')
143 rustrevlog = policy.importrust('revlog')
144
144
145 # Aliased for performance.
145 # Aliased for performance.
146 _zlibdecompress = zlib.decompress
146 _zlibdecompress = zlib.decompress
147
147
148 # max size of inline data embedded into a revlog
148 # max size of inline data embedded into a revlog
149 _maxinline = 131072
149 _maxinline = 131072
150
150
151
151
152 # Flag processors for REVIDX_ELLIPSIS.
152 # Flag processors for REVIDX_ELLIPSIS.
153 def ellipsisreadprocessor(rl, text):
153 def ellipsisreadprocessor(rl, text):
154 return text, False
154 return text, False
155
155
156
156
157 def ellipsiswriteprocessor(rl, text):
157 def ellipsiswriteprocessor(rl, text):
158 return text, False
158 return text, False
159
159
160
160
161 def ellipsisrawprocessor(rl, text):
161 def ellipsisrawprocessor(rl, text):
162 return False
162 return False
163
163
164
164
165 ellipsisprocessor = (
165 ellipsisprocessor = (
166 ellipsisreadprocessor,
166 ellipsisreadprocessor,
167 ellipsiswriteprocessor,
167 ellipsiswriteprocessor,
168 ellipsisrawprocessor,
168 ellipsisrawprocessor,
169 )
169 )
170
170
171
171
172 def _verify_revision(rl, skipflags, state, node):
172 def _verify_revision(rl, skipflags, state, node):
173 """Verify the integrity of the given revlog ``node`` while providing a hook
173 """Verify the integrity of the given revlog ``node`` while providing a hook
174 point for extensions to influence the operation."""
174 point for extensions to influence the operation."""
175 if skipflags:
175 if skipflags:
176 state[b'skipread'].add(node)
176 state[b'skipread'].add(node)
177 else:
177 else:
178 # Side-effect: read content and verify hash.
178 # Side-effect: read content and verify hash.
179 rl.revision(node)
179 rl.revision(node)
180
180
181
181
182 # True if a fast implementation for persistent-nodemap is available
182 # True if a fast implementation for persistent-nodemap is available
183 #
183 #
184 # We also consider we have a "fast" implementation in "pure" python because
184 # We also consider we have a "fast" implementation in "pure" python because
185 # people using pure don't really have performance consideration (and a
185 # people using pure don't really have performance consideration (and a
186 # wheelbarrow of other slowness source)
186 # wheelbarrow of other slowness source)
187 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
187 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
188 parsers, 'BaseIndexObject'
188 parsers, 'BaseIndexObject'
189 )
189 )
190
190
191
191
192 @attr.s(slots=True)
192 @attr.s(slots=True)
193 class RevLogRevisionDelta:
193 class RevLogRevisionDelta:
194 node = attr.ib()
194 node = attr.ib()
195 p1node = attr.ib()
195 p1node = attr.ib()
196 p2node = attr.ib()
196 p2node = attr.ib()
197 basenode = attr.ib()
197 basenode = attr.ib()
198 flags = attr.ib()
198 flags = attr.ib()
199 baserevisionsize = attr.ib()
199 baserevisionsize = attr.ib()
200 revision = attr.ib()
200 revision = attr.ib()
201 delta = attr.ib()
201 delta = attr.ib()
202 sidedata = attr.ib()
202 sidedata = attr.ib()
203 protocol_flags = attr.ib()
203 protocol_flags = attr.ib()
204 linknode = attr.ib(default=None)
204 linknode = attr.ib(default=None)
205
205
206
206
207 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
207 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
208 RevLogRevisionDelta
208 RevLogRevisionDelta
209 )
209 )
210
210
211 if typing.TYPE_CHECKING:
211 if typing.TYPE_CHECKING:
212 revlogrevisiondelta = RevLogRevisionDelta
212 revlogrevisiondelta = RevLogRevisionDelta
213
213
214
214
215 @attr.s(frozen=True)
215 @attr.s(frozen=True)
216 class RevLogProblem:
216 class RevLogProblem:
217 warning = attr.ib(default=None, type=Optional[bytes])
217 warning = attr.ib(default=None, type=Optional[bytes])
218 error = attr.ib(default=None, type=Optional[bytes])
218 error = attr.ib(default=None, type=Optional[bytes])
219 node = attr.ib(default=None, type=Optional[bytes])
219 node = attr.ib(default=None, type=Optional[bytes])
220
220
221
221
222 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
222 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
223 RevLogProblem
223 RevLogProblem
224 )
224 )
225
225
226 if typing.TYPE_CHECKING:
226 if typing.TYPE_CHECKING:
227 revlogproblem = RevLogProblem
227 revlogproblem = RevLogProblem
228
228
229
229
230 def parse_index_v1(data, inline):
230 def parse_index_v1(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 index, cache = parsers.parse_index2(data, inline)
232 index, cache = parsers.parse_index2(data, inline)
233 return index, cache
233 return index, cache
234
234
235
235
236 def parse_index_v2(data, inline):
236 def parse_index_v2(data, inline):
237 # call the C implementation to parse the index data
237 # call the C implementation to parse the index data
238 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
238 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
239 return index, cache
239 return index, cache
240
240
241
241
242 def parse_index_cl_v2(data, inline):
242 def parse_index_cl_v2(data, inline):
243 # call the C implementation to parse the index data
243 # call the C implementation to parse the index data
244 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
244 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
245 return index, cache
245 return index, cache
246
246
247
247
248 if hasattr(parsers, 'parse_index_devel_nodemap'):
248 if hasattr(parsers, 'parse_index_devel_nodemap'):
249
249
250 def parse_index_v1_nodemap(data, inline):
250 def parse_index_v1_nodemap(data, inline):
251 index, cache = parsers.parse_index_devel_nodemap(data, inline)
251 index, cache = parsers.parse_index_devel_nodemap(data, inline)
252 return index, cache
252 return index, cache
253
253
254 else:
254 else:
255 parse_index_v1_nodemap = None
255 parse_index_v1_nodemap = None
256
256
257
257
258 def parse_index_v1_rust(data, inline, default_header):
258 def parse_index_v1_rust(data, inline, default_header):
259 cache = (0, data) if inline else None
259 cache = (0, data) if inline else None
260 return rustrevlog.Index(data, default_header), cache
260 return rustrevlog.Index(data, default_header), cache
261
261
262
262
263 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
263 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
264 # signed integer)
264 # signed integer)
265 _maxentrysize = 0x7FFFFFFF
265 _maxentrysize = 0x7FFFFFFF
266
266
267 FILE_TOO_SHORT_MSG = _(
267 FILE_TOO_SHORT_MSG = _(
268 b'cannot read from revlog %s;'
268 b'cannot read from revlog %s;'
269 b' expected %d bytes from offset %d, data size is %d'
269 b' expected %d bytes from offset %d, data size is %d'
270 )
270 )
271
271
272 hexdigits = b'0123456789abcdefABCDEF'
272 hexdigits = b'0123456789abcdefABCDEF'
273
273
274
274
275 class _Config:
275 class _Config:
276 def copy(self):
276 def copy(self):
277 return self.__class__(**self.__dict__)
277 return self.__class__(**self.__dict__)
278
278
279
279
280 @attr.s()
280 @attr.s()
281 class FeatureConfig(_Config):
281 class FeatureConfig(_Config):
282 """Hold configuration values about the available revlog features"""
282 """Hold configuration values about the available revlog features"""
283
283
284 # the default compression engine
284 # the default compression engine
285 compression_engine = attr.ib(default=b'zlib')
285 compression_engine = attr.ib(default=b'zlib')
286 # compression engines options
286 # compression engines options
287 compression_engine_options = attr.ib(default=attr.Factory(dict))
287 compression_engine_options = attr.ib(default=attr.Factory(dict))
288
288
289 # can we use censor on this revlog
289 # can we use censor on this revlog
290 censorable = attr.ib(default=False)
290 censorable = attr.ib(default=False)
291 # does this revlog use the "side data" feature
291 # does this revlog use the "side data" feature
292 has_side_data = attr.ib(default=False)
292 has_side_data = attr.ib(default=False)
293 # might remove rank configuration once the computation has no impact
293 # might remove rank configuration once the computation has no impact
294 compute_rank = attr.ib(default=False)
294 compute_rank = attr.ib(default=False)
295 # parent order is supposed to be semantically irrelevant, so we
295 # parent order is supposed to be semantically irrelevant, so we
296 # normally resort parents to ensure that the first parent is non-null,
296 # normally resort parents to ensure that the first parent is non-null,
297 # if there is a non-null parent at all.
297 # if there is a non-null parent at all.
298 # filelog abuses the parent order as flag to mark some instances of
298 # filelog abuses the parent order as flag to mark some instances of
299 # meta-encoded files, so allow it to disable this behavior.
299 # meta-encoded files, so allow it to disable this behavior.
300 canonical_parent_order = attr.ib(default=False)
300 canonical_parent_order = attr.ib(default=False)
301 # can ellipsis commit be used
301 # can ellipsis commit be used
302 enable_ellipsis = attr.ib(default=False)
302 enable_ellipsis = attr.ib(default=False)
303
303
304 def copy(self):
304 def copy(self):
305 new = super().copy()
305 new = super().copy()
306 new.compression_engine_options = self.compression_engine_options.copy()
306 new.compression_engine_options = self.compression_engine_options.copy()
307 return new
307 return new
308
308
309
309
310 @attr.s()
310 @attr.s()
311 class DataConfig(_Config):
311 class DataConfig(_Config):
312 """Hold configuration value about how the revlog data are read"""
312 """Hold configuration value about how the revlog data are read"""
313
313
314 # should we try to open the "pending" version of the revlog
314 # should we try to open the "pending" version of the revlog
315 try_pending = attr.ib(default=False)
315 try_pending = attr.ib(default=False)
316 # should we try to open the "splitted" version of the revlog
316 # should we try to open the "splitted" version of the revlog
317 try_split = attr.ib(default=False)
317 try_split = attr.ib(default=False)
318 # When True, indexfile should be opened with checkambig=True at writing,
318 # When True, indexfile should be opened with checkambig=True at writing,
319 # to avoid file stat ambiguity.
319 # to avoid file stat ambiguity.
320 check_ambig = attr.ib(default=False)
320 check_ambig = attr.ib(default=False)
321
321
322 # If true, use mmap instead of reading to deal with large index
322 # If true, use mmap instead of reading to deal with large index
323 mmap_large_index = attr.ib(default=False)
323 mmap_large_index = attr.ib(default=False)
324 # how much data is large
324 # how much data is large
325 mmap_index_threshold = attr.ib(default=None)
325 mmap_index_threshold = attr.ib(default=None)
326 # How much data to read and cache into the raw revlog data cache.
326 # How much data to read and cache into the raw revlog data cache.
327 chunk_cache_size = attr.ib(default=65536)
327 chunk_cache_size = attr.ib(default=65536)
328
328
329 # The size of the uncompressed cache compared to the largest revision seen.
329 # The size of the uncompressed cache compared to the largest revision seen.
330 uncompressed_cache_factor = attr.ib(default=None)
330 uncompressed_cache_factor = attr.ib(default=None)
331
331
332 # The number of chunk cached
332 # The number of chunk cached
333 uncompressed_cache_count = attr.ib(default=None)
333 uncompressed_cache_count = attr.ib(default=None)
334
334
335 # Allow sparse reading of the revlog data
335 # Allow sparse reading of the revlog data
336 with_sparse_read = attr.ib(default=False)
336 with_sparse_read = attr.ib(default=False)
337 # minimal density of a sparse read chunk
337 # minimal density of a sparse read chunk
338 sr_density_threshold = attr.ib(default=0.50)
338 sr_density_threshold = attr.ib(default=0.50)
339 # minimal size of data we skip when performing sparse read
339 # minimal size of data we skip when performing sparse read
340 sr_min_gap_size = attr.ib(default=262144)
340 sr_min_gap_size = attr.ib(default=262144)
341
341
342 # are delta encoded against arbitrary bases.
342 # are delta encoded against arbitrary bases.
343 generaldelta = attr.ib(default=False)
343 generaldelta = attr.ib(default=False)
344
344
345
345
346 @attr.s()
346 @attr.s()
347 class DeltaConfig(_Config):
347 class DeltaConfig(_Config):
348 """Hold configuration value about how new delta are computed
348 """Hold configuration value about how new delta are computed
349
349
350 Some attributes are duplicated from DataConfig to help havign each object
350 Some attributes are duplicated from DataConfig to help havign each object
351 self contained.
351 self contained.
352 """
352 """
353
353
354 # can delta be encoded against arbitrary bases.
354 # can delta be encoded against arbitrary bases.
355 general_delta = attr.ib(default=False)
355 general_delta = attr.ib(default=False)
356 # Allow sparse writing of the revlog data
356 # Allow sparse writing of the revlog data
357 sparse_revlog = attr.ib(default=False)
357 sparse_revlog = attr.ib(default=False)
358 # maximum length of a delta chain
358 # maximum length of a delta chain
359 max_chain_len = attr.ib(default=None)
359 max_chain_len = attr.ib(default=None)
360 # Maximum distance between delta chain base start and end
360 # Maximum distance between delta chain base start and end
361 max_deltachain_span = attr.ib(default=-1)
361 max_deltachain_span = attr.ib(default=-1)
362 # If `upper_bound_comp` is not None, this is the expected maximal gain from
362 # If `upper_bound_comp` is not None, this is the expected maximal gain from
363 # compression for the data content.
363 # compression for the data content.
364 upper_bound_comp = attr.ib(default=None)
364 upper_bound_comp = attr.ib(default=None)
365 # Should we try a delta against both parent
365 # Should we try a delta against both parent
366 delta_both_parents = attr.ib(default=True)
366 delta_both_parents = attr.ib(default=True)
367 # Test delta base candidate group by chunk of this maximal size.
367 # Test delta base candidate group by chunk of this maximal size.
368 candidate_group_chunk_size = attr.ib(default=0)
368 candidate_group_chunk_size = attr.ib(default=0)
369 # Should we display debug information about delta computation
369 # Should we display debug information about delta computation
370 debug_delta = attr.ib(default=False)
370 debug_delta = attr.ib(default=False)
371 # trust incoming delta by default
371 # trust incoming delta by default
372 lazy_delta = attr.ib(default=True)
372 lazy_delta = attr.ib(default=True)
373 # trust the base of incoming delta by default
373 # trust the base of incoming delta by default
374 lazy_delta_base = attr.ib(default=False)
374 lazy_delta_base = attr.ib(default=False)
375
375
376
376
377 class _InnerRevlog:
377 class _InnerRevlog:
378 """An inner layer of the revlog object
378 """An inner layer of the revlog object
379
379
380 That layer exist to be able to delegate some operation to Rust, its
380 That layer exist to be able to delegate some operation to Rust, its
381 boundaries are arbitrary and based on what we can delegate to Rust.
381 boundaries are arbitrary and based on what we can delegate to Rust.
382 """
382 """
383
383
384 opener: vfsmod.vfs
384 opener: vfsmod.vfs
385
385
386 def __init__(
386 def __init__(
387 self,
387 self,
388 opener: vfsmod.vfs,
388 opener: vfsmod.vfs,
389 index,
389 index,
390 index_file,
390 index_file,
391 data_file,
391 data_file,
392 sidedata_file,
392 sidedata_file,
393 inline,
393 inline,
394 data_config,
394 data_config,
395 delta_config,
395 delta_config,
396 feature_config,
396 feature_config,
397 chunk_cache,
397 chunk_cache,
398 default_compression_header,
398 default_compression_header,
399 ):
399 ):
400 self.opener = opener
400 self.opener = opener
401 self.index = index
401 self.index = index
402
402
403 self.index_file = index_file
403 self.index_file = index_file
404 self.data_file = data_file
404 self.data_file = data_file
405 self.sidedata_file = sidedata_file
405 self.sidedata_file = sidedata_file
406 self.inline = inline
406 self.inline = inline
407 self.data_config = data_config
407 self.data_config = data_config
408 self.delta_config = delta_config
408 self.delta_config = delta_config
409 self.feature_config = feature_config
409 self.feature_config = feature_config
410
410
411 # used during diverted write.
411 # used during diverted write.
412 self._orig_index_file = None
412 self._orig_index_file = None
413
413
414 self._default_compression_header = default_compression_header
414 self._default_compression_header = default_compression_header
415
415
416 # index
416 # index
417
417
418 # 3-tuple of file handles being used for active writing.
418 # 3-tuple of file handles being used for active writing.
419 self._writinghandles = None
419 self._writinghandles = None
420
420
421 self._segmentfile = randomaccessfile.randomaccessfile(
421 self._segmentfile = randomaccessfile.randomaccessfile(
422 self.opener,
422 self.opener,
423 (self.index_file if self.inline else self.data_file),
423 (self.index_file if self.inline else self.data_file),
424 self.data_config.chunk_cache_size,
424 self.data_config.chunk_cache_size,
425 chunk_cache,
425 chunk_cache,
426 )
426 )
427 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
427 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
428 self.opener,
428 self.opener,
429 self.sidedata_file,
429 self.sidedata_file,
430 self.data_config.chunk_cache_size,
430 self.data_config.chunk_cache_size,
431 )
431 )
432
432
433 # revlog header -> revlog compressor
433 # revlog header -> revlog compressor
434 self._decompressors = {}
434 self._decompressors = {}
435 # 3-tuple of (node, rev, text) for a raw revision.
435 # 3-tuple of (node, rev, text) for a raw revision.
436 self._revisioncache = None
436 self._revisioncache = None
437
437
438 # cache some uncompressed chunks
438 # cache some uncompressed chunks
439 # rev β†’ uncompressed_chunk
439 # rev β†’ uncompressed_chunk
440 #
440 #
441 # the max cost is dynamically updated to be proportionnal to the
441 # the max cost is dynamically updated to be proportionnal to the
442 # size of revision we actually encounter.
442 # size of revision we actually encounter.
443 self._uncompressed_chunk_cache = None
443 self._uncompressed_chunk_cache = None
444 if self.data_config.uncompressed_cache_factor is not None:
444 if self.data_config.uncompressed_cache_factor is not None:
445 self._uncompressed_chunk_cache = util.lrucachedict(
445 self._uncompressed_chunk_cache = util.lrucachedict(
446 self.data_config.uncompressed_cache_count,
446 self.data_config.uncompressed_cache_count,
447 maxcost=65536, # some arbitrary initial value
447 maxcost=65536, # some arbitrary initial value
448 )
448 )
449
449
450 self._delay_buffer = None
450 self._delay_buffer = None
451
451
452 def __len__(self):
452 def __len__(self):
453 return len(self.index)
453 return len(self.index)
454
454
455 def clear_cache(self):
455 def clear_cache(self):
456 assert not self.is_delaying
456 assert not self.is_delaying
457 self._revisioncache = None
457 self._revisioncache = None
458 if self._uncompressed_chunk_cache is not None:
458 if self._uncompressed_chunk_cache is not None:
459 self._uncompressed_chunk_cache.clear()
459 self._uncompressed_chunk_cache.clear()
460 self._segmentfile.clear_cache()
460 self._segmentfile.clear_cache()
461 self._segmentfile_sidedata.clear_cache()
461 self._segmentfile_sidedata.clear_cache()
462
462
463 @property
463 @property
464 def canonical_index_file(self):
464 def canonical_index_file(self):
465 if self._orig_index_file is not None:
465 if self._orig_index_file is not None:
466 return self._orig_index_file
466 return self._orig_index_file
467 return self.index_file
467 return self.index_file
468
468
469 @property
469 @property
470 def is_delaying(self):
470 def is_delaying(self):
471 """is the revlog is currently delaying the visibility of written data?
471 """is the revlog is currently delaying the visibility of written data?
472
472
473 The delaying mechanism can be either in-memory or written on disk in a
473 The delaying mechanism can be either in-memory or written on disk in a
474 side-file."""
474 side-file."""
475 return (self._delay_buffer is not None) or (
475 return (self._delay_buffer is not None) or (
476 self._orig_index_file is not None
476 self._orig_index_file is not None
477 )
477 )
478
478
479 # Derived from index values.
479 # Derived from index values.
480
480
481 def start(self, rev):
481 def start(self, rev):
482 """the offset of the data chunk for this revision"""
482 """the offset of the data chunk for this revision"""
483 return int(self.index[rev][0] >> 16)
483 return int(self.index[rev][0] >> 16)
484
484
485 def length(self, rev):
485 def length(self, rev):
486 """the length of the data chunk for this revision"""
486 """the length of the data chunk for this revision"""
487 return self.index[rev][1]
487 return self.index[rev][1]
488
488
489 def end(self, rev):
489 def end(self, rev):
490 """the end of the data chunk for this revision"""
490 """the end of the data chunk for this revision"""
491 return self.start(rev) + self.length(rev)
491 return self.start(rev) + self.length(rev)
492
492
493 def deltaparent(self, rev):
493 def deltaparent(self, rev):
494 """return deltaparent of the given revision"""
494 """return deltaparent of the given revision"""
495 base = self.index[rev][3]
495 base = self.index[rev][3]
496 if base == rev:
496 if base == rev:
497 return nullrev
497 return nullrev
498 elif self.delta_config.general_delta:
498 elif self.delta_config.general_delta:
499 return base
499 return base
500 else:
500 else:
501 return rev - 1
501 return rev - 1
502
502
503 def issnapshot(self, rev):
503 def issnapshot(self, rev):
504 """tells whether rev is a snapshot"""
504 """tells whether rev is a snapshot"""
505 if not self.delta_config.sparse_revlog:
505 if not self.delta_config.sparse_revlog:
506 return self.deltaparent(rev) == nullrev
506 return self.deltaparent(rev) == nullrev
507 elif hasattr(self.index, 'issnapshot'):
507 elif hasattr(self.index, 'issnapshot'):
508 # directly assign the method to cache the testing and access
508 # directly assign the method to cache the testing and access
509 self.issnapshot = self.index.issnapshot
509 self.issnapshot = self.index.issnapshot
510 return self.issnapshot(rev)
510 return self.issnapshot(rev)
511 if rev == nullrev:
511 if rev == nullrev:
512 return True
512 return True
513 entry = self.index[rev]
513 entry = self.index[rev]
514 base = entry[3]
514 base = entry[3]
515 if base == rev:
515 if base == rev:
516 return True
516 return True
517 if base == nullrev:
517 if base == nullrev:
518 return True
518 return True
519 p1 = entry[5]
519 p1 = entry[5]
520 while self.length(p1) == 0:
520 while self.length(p1) == 0:
521 b = self.deltaparent(p1)
521 b = self.deltaparent(p1)
522 if b == p1:
522 if b == p1:
523 break
523 break
524 p1 = b
524 p1 = b
525 p2 = entry[6]
525 p2 = entry[6]
526 while self.length(p2) == 0:
526 while self.length(p2) == 0:
527 b = self.deltaparent(p2)
527 b = self.deltaparent(p2)
528 if b == p2:
528 if b == p2:
529 break
529 break
530 p2 = b
530 p2 = b
531 if base == p1 or base == p2:
531 if base == p1 or base == p2:
532 return False
532 return False
533 return self.issnapshot(base)
533 return self.issnapshot(base)
534
534
535 def _deltachain(self, rev, stoprev=None):
535 def _deltachain(self, rev, stoprev=None):
536 """Obtain the delta chain for a revision.
536 """Obtain the delta chain for a revision.
537
537
538 ``stoprev`` specifies a revision to stop at. If not specified, we
538 ``stoprev`` specifies a revision to stop at. If not specified, we
539 stop at the base of the chain.
539 stop at the base of the chain.
540
540
541 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
541 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
542 revs in ascending order and ``stopped`` is a bool indicating whether
542 revs in ascending order and ``stopped`` is a bool indicating whether
543 ``stoprev`` was hit.
543 ``stoprev`` was hit.
544 """
544 """
545 generaldelta = self.delta_config.general_delta
545 generaldelta = self.delta_config.general_delta
546 # Try C implementation.
546 # Try C implementation.
547 try:
547 try:
548 return self.index.deltachain(rev, stoprev, generaldelta)
548 return self.index.deltachain(rev, stoprev, generaldelta)
549 except AttributeError:
549 except AttributeError:
550 pass
550 pass
551
551
552 chain = []
552 chain = []
553
553
554 # Alias to prevent attribute lookup in tight loop.
554 # Alias to prevent attribute lookup in tight loop.
555 index = self.index
555 index = self.index
556
556
557 iterrev = rev
557 iterrev = rev
558 e = index[iterrev]
558 e = index[iterrev]
559 while iterrev != e[3] and iterrev != stoprev:
559 while iterrev != e[3] and iterrev != stoprev:
560 chain.append(iterrev)
560 chain.append(iterrev)
561 if generaldelta:
561 if generaldelta:
562 iterrev = e[3]
562 iterrev = e[3]
563 else:
563 else:
564 iterrev -= 1
564 iterrev -= 1
565 e = index[iterrev]
565 e = index[iterrev]
566
566
567 if iterrev == stoprev:
567 if iterrev == stoprev:
568 stopped = True
568 stopped = True
569 else:
569 else:
570 chain.append(iterrev)
570 chain.append(iterrev)
571 stopped = False
571 stopped = False
572
572
573 chain.reverse()
573 chain.reverse()
574 return chain, stopped
574 return chain, stopped
575
575
576 @util.propertycache
576 @util.propertycache
577 def _compressor(self):
577 def _compressor(self):
578 engine = util.compengines[self.feature_config.compression_engine]
578 engine = util.compengines[self.feature_config.compression_engine]
579 return engine.revlogcompressor(
579 return engine.revlogcompressor(
580 self.feature_config.compression_engine_options
580 self.feature_config.compression_engine_options
581 )
581 )
582
582
583 @util.propertycache
583 @util.propertycache
584 def _decompressor(self):
584 def _decompressor(self):
585 """the default decompressor"""
585 """the default decompressor"""
586 if self._default_compression_header is None:
586 if self._default_compression_header is None:
587 return None
587 return None
588 t = self._default_compression_header
588 t = self._default_compression_header
589 c = self._get_decompressor(t)
589 c = self._get_decompressor(t)
590 return c.decompress
590 return c.decompress
591
591
592 def _get_decompressor(self, t: bytes):
592 def _get_decompressor(self, t: bytes):
593 try:
593 try:
594 compressor = self._decompressors[t]
594 compressor = self._decompressors[t]
595 except KeyError:
595 except KeyError:
596 try:
596 try:
597 engine = util.compengines.forrevlogheader(t)
597 engine = util.compengines.forrevlogheader(t)
598 compressor = engine.revlogcompressor(
598 compressor = engine.revlogcompressor(
599 self.feature_config.compression_engine_options
599 self.feature_config.compression_engine_options
600 )
600 )
601 self._decompressors[t] = compressor
601 self._decompressors[t] = compressor
602 except KeyError:
602 except KeyError:
603 raise error.RevlogError(
603 raise error.RevlogError(
604 _(b'unknown compression type %s') % binascii.hexlify(t)
604 _(b'unknown compression type %s') % binascii.hexlify(t)
605 )
605 )
606 return compressor
606 return compressor
607
607
608 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
608 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
609 """Generate a possibly-compressed representation of data."""
609 """Generate a possibly-compressed representation of data."""
610 if not data:
610 if not data:
611 return b'', data
611 return b'', data
612
612
613 compressed = self._compressor.compress(data)
613 compressed = self._compressor.compress(data)
614
614
615 if compressed:
615 if compressed:
616 # The revlog compressor added the header in the returned data.
616 # The revlog compressor added the header in the returned data.
617 return b'', compressed
617 return b'', compressed
618
618
619 if data[0:1] == b'\0':
619 if data[0:1] == b'\0':
620 return b'', data
620 return b'', data
621 return b'u', data
621 return b'u', data
622
622
623 def decompress(self, data: bytes):
623 def decompress(self, data: bytes):
624 """Decompress a revlog chunk.
624 """Decompress a revlog chunk.
625
625
626 The chunk is expected to begin with a header identifying the
626 The chunk is expected to begin with a header identifying the
627 format type so it can be routed to an appropriate decompressor.
627 format type so it can be routed to an appropriate decompressor.
628 """
628 """
629 if not data:
629 if not data:
630 return data
630 return data
631
631
632 # Revlogs are read much more frequently than they are written and many
632 # Revlogs are read much more frequently than they are written and many
633 # chunks only take microseconds to decompress, so performance is
633 # chunks only take microseconds to decompress, so performance is
634 # important here.
634 # important here.
635 #
635 #
636 # We can make a few assumptions about revlogs:
636 # We can make a few assumptions about revlogs:
637 #
637 #
638 # 1) the majority of chunks will be compressed (as opposed to inline
638 # 1) the majority of chunks will be compressed (as opposed to inline
639 # raw data).
639 # raw data).
640 # 2) decompressing *any* data will likely by at least 10x slower than
640 # 2) decompressing *any* data will likely by at least 10x slower than
641 # returning raw inline data.
641 # returning raw inline data.
642 # 3) we want to prioritize common and officially supported compression
642 # 3) we want to prioritize common and officially supported compression
643 # engines
643 # engines
644 #
644 #
645 # It follows that we want to optimize for "decompress compressed data
645 # It follows that we want to optimize for "decompress compressed data
646 # when encoded with common and officially supported compression engines"
646 # when encoded with common and officially supported compression engines"
647 # case over "raw data" and "data encoded by less common or non-official
647 # case over "raw data" and "data encoded by less common or non-official
648 # compression engines." That is why we have the inline lookup first
648 # compression engines." That is why we have the inline lookup first
649 # followed by the compengines lookup.
649 # followed by the compengines lookup.
650 #
650 #
651 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
651 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
652 # compressed chunks. And this matters for changelog and manifest reads.
652 # compressed chunks. And this matters for changelog and manifest reads.
653 t = data[0:1]
653 t = data[0:1]
654
654
655 if t == b'x':
655 if t == b'x':
656 try:
656 try:
657 return _zlibdecompress(data)
657 return _zlibdecompress(data)
658 except zlib.error as e:
658 except zlib.error as e:
659 raise error.RevlogError(
659 raise error.RevlogError(
660 _(b'revlog decompress error: %s')
660 _(b'revlog decompress error: %s')
661 % stringutil.forcebytestr(e)
661 % stringutil.forcebytestr(e)
662 )
662 )
663 # '\0' is more common than 'u' so it goes first.
663 # '\0' is more common than 'u' so it goes first.
664 elif t == b'\0':
664 elif t == b'\0':
665 return data
665 return data
666 elif t == b'u':
666 elif t == b'u':
667 return util.buffer(data, 1)
667 return util.buffer(data, 1)
668
668
669 compressor = self._get_decompressor(t)
669 compressor = self._get_decompressor(t)
670
670
671 return compressor.decompress(data)
671 return compressor.decompress(data)
672
672
673 @contextlib.contextmanager
673 @contextlib.contextmanager
674 def reading(self):
674 def reading(self):
675 """Context manager that keeps data and sidedata files open for reading"""
675 """Context manager that keeps data and sidedata files open for reading"""
676 if len(self.index) == 0:
676 if len(self.index) == 0:
677 yield # nothing to be read
677 yield # nothing to be read
678 elif self._delay_buffer is not None and self.inline:
678 elif self._delay_buffer is not None and self.inline:
679 msg = "revlog with delayed write should not be inline"
679 msg = "revlog with delayed write should not be inline"
680 raise error.ProgrammingError(msg)
680 raise error.ProgrammingError(msg)
681 else:
681 else:
682 with self._segmentfile.reading():
682 with self._segmentfile.reading():
683 with self._segmentfile_sidedata.reading():
683 with self._segmentfile_sidedata.reading():
684 yield
684 yield
685
685
686 @property
686 @property
687 def is_writing(self):
687 def is_writing(self):
688 """True is a writing context is open"""
688 """True is a writing context is open"""
689 return self._writinghandles is not None
689 return self._writinghandles is not None
690
690
691 @property
691 @property
692 def is_open(self):
692 def is_open(self):
693 """True if any file handle is being held
693 """True if any file handle is being held
694
694
695 Used for assert and debug in the python code"""
695 Used for assert and debug in the python code"""
696 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
696 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
697
697
698 @contextlib.contextmanager
698 @contextlib.contextmanager
699 def writing(self, transaction, data_end=None, sidedata_end=None):
699 def writing(self, transaction, data_end=None, sidedata_end=None):
700 """Open the revlog files for writing
700 """Open the revlog files for writing
701
701
702 Add content to a revlog should be done within such context.
702 Add content to a revlog should be done within such context.
703 """
703 """
704 if self.is_writing:
704 if self.is_writing:
705 yield
705 yield
706 else:
706 else:
707 ifh = dfh = sdfh = None
707 ifh = dfh = sdfh = None
708 try:
708 try:
709 r = len(self.index)
709 r = len(self.index)
710 # opening the data file.
710 # opening the data file.
711 dsize = 0
711 dsize = 0
712 if r:
712 if r:
713 dsize = self.end(r - 1)
713 dsize = self.end(r - 1)
714 dfh = None
714 dfh = None
715 if not self.inline:
715 if not self.inline:
716 try:
716 try:
717 dfh = self.opener(self.data_file, mode=b"r+")
717 dfh = self.opener(self.data_file, mode=b"r+")
718 if data_end is None:
718 if data_end is None:
719 dfh.seek(0, os.SEEK_END)
719 dfh.seek(0, os.SEEK_END)
720 else:
720 else:
721 dfh.seek(data_end, os.SEEK_SET)
721 dfh.seek(data_end, os.SEEK_SET)
722 except FileNotFoundError:
722 except FileNotFoundError:
723 dfh = self.opener(self.data_file, mode=b"w+")
723 dfh = self.opener(self.data_file, mode=b"w+")
724 transaction.add(self.data_file, dsize)
724 transaction.add(self.data_file, dsize)
725 if self.sidedata_file is not None:
725 if self.sidedata_file is not None:
726 assert sidedata_end is not None
726 assert sidedata_end is not None
727 # revlog-v2 does not inline, help Pytype
727 # revlog-v2 does not inline, help Pytype
728 assert dfh is not None
728 assert dfh is not None
729 try:
729 try:
730 sdfh = self.opener(self.sidedata_file, mode=b"r+")
730 sdfh = self.opener(self.sidedata_file, mode=b"r+")
731 dfh.seek(sidedata_end, os.SEEK_SET)
731 dfh.seek(sidedata_end, os.SEEK_SET)
732 except FileNotFoundError:
732 except FileNotFoundError:
733 sdfh = self.opener(self.sidedata_file, mode=b"w+")
733 sdfh = self.opener(self.sidedata_file, mode=b"w+")
734 transaction.add(self.sidedata_file, sidedata_end)
734 transaction.add(self.sidedata_file, sidedata_end)
735
735
736 # opening the index file.
736 # opening the index file.
737 isize = r * self.index.entry_size
737 isize = r * self.index.entry_size
738 ifh = self.__index_write_fp()
738 ifh = self.__index_write_fp()
739 if self.inline:
739 if self.inline:
740 transaction.add(self.index_file, dsize + isize)
740 transaction.add(self.index_file, dsize + isize)
741 else:
741 else:
742 transaction.add(self.index_file, isize)
742 transaction.add(self.index_file, isize)
743 # exposing all file handle for writing.
743 # exposing all file handle for writing.
744 self._writinghandles = (ifh, dfh, sdfh)
744 self._writinghandles = (ifh, dfh, sdfh)
745 self._segmentfile.writing_handle = ifh if self.inline else dfh
745 self._segmentfile.writing_handle = ifh if self.inline else dfh
746 self._segmentfile_sidedata.writing_handle = sdfh
746 self._segmentfile_sidedata.writing_handle = sdfh
747 yield
747 yield
748 finally:
748 finally:
749 self._writinghandles = None
749 self._writinghandles = None
750 self._segmentfile.writing_handle = None
750 self._segmentfile.writing_handle = None
751 self._segmentfile_sidedata.writing_handle = None
751 self._segmentfile_sidedata.writing_handle = None
752 if dfh is not None:
752 if dfh is not None:
753 dfh.close()
753 dfh.close()
754 if sdfh is not None:
754 if sdfh is not None:
755 sdfh.close()
755 sdfh.close()
756 # closing the index file last to avoid exposing referent to
756 # closing the index file last to avoid exposing referent to
757 # potential unflushed data content.
757 # potential unflushed data content.
758 if ifh is not None:
758 if ifh is not None:
759 ifh.close()
759 ifh.close()
760
760
761 def __index_write_fp(self, index_end=None):
761 def __index_write_fp(self, index_end=None):
762 """internal method to open the index file for writing
762 """internal method to open the index file for writing
763
763
764 You should not use this directly and use `_writing` instead
764 You should not use this directly and use `_writing` instead
765 """
765 """
766 try:
766 try:
767 if self._delay_buffer is None:
767 if self._delay_buffer is None:
768 f = self.opener(
768 f = self.opener(
769 self.index_file,
769 self.index_file,
770 mode=b"r+",
770 mode=b"r+",
771 checkambig=self.data_config.check_ambig,
771 checkambig=self.data_config.check_ambig,
772 )
772 )
773 else:
773 else:
774 # check_ambig affect we way we open file for writing, however
774 # check_ambig affect we way we open file for writing, however
775 # here, we do not actually open a file for writting as write
775 # here, we do not actually open a file for writting as write
776 # will appened to a delay_buffer. So check_ambig is not
776 # will appened to a delay_buffer. So check_ambig is not
777 # meaningful and unneeded here.
777 # meaningful and unneeded here.
778 f = randomaccessfile.appender(
778 f = randomaccessfile.appender(
779 self.opener, self.index_file, b"r+", self._delay_buffer
779 self.opener, self.index_file, b"r+", self._delay_buffer
780 )
780 )
781 if index_end is None:
781 if index_end is None:
782 f.seek(0, os.SEEK_END)
782 f.seek(0, os.SEEK_END)
783 else:
783 else:
784 f.seek(index_end, os.SEEK_SET)
784 f.seek(index_end, os.SEEK_SET)
785 return f
785 return f
786 except FileNotFoundError:
786 except FileNotFoundError:
787 if self._delay_buffer is None:
787 if self._delay_buffer is None:
788 return self.opener(
788 return self.opener(
789 self.index_file,
789 self.index_file,
790 mode=b"w+",
790 mode=b"w+",
791 checkambig=self.data_config.check_ambig,
791 checkambig=self.data_config.check_ambig,
792 )
792 )
793 else:
793 else:
794 return randomaccessfile.appender(
794 return randomaccessfile.appender(
795 self.opener, self.index_file, b"w+", self._delay_buffer
795 self.opener, self.index_file, b"w+", self._delay_buffer
796 )
796 )
797
797
798 def __index_new_fp(self):
798 def __index_new_fp(self):
799 """internal method to create a new index file for writing
799 """internal method to create a new index file for writing
800
800
801 You should not use this unless you are upgrading from inline revlog
801 You should not use this unless you are upgrading from inline revlog
802 """
802 """
803 return self.opener(
803 return self.opener(
804 self.index_file,
804 self.index_file,
805 mode=b"w",
805 mode=b"w",
806 checkambig=self.data_config.check_ambig,
806 checkambig=self.data_config.check_ambig,
807 )
807 )
808
808
809 def split_inline(self, tr, header, new_index_file_path=None):
809 def split_inline(self, tr, header, new_index_file_path=None):
810 """split the data of an inline revlog into an index and a data file"""
810 """split the data of an inline revlog into an index and a data file"""
811 assert self._delay_buffer is None
811 assert self._delay_buffer is None
812 existing_handles = False
812 existing_handles = False
813 if self._writinghandles is not None:
813 if self._writinghandles is not None:
814 existing_handles = True
814 existing_handles = True
815 fp = self._writinghandles[0]
815 fp = self._writinghandles[0]
816 fp.flush()
816 fp.flush()
817 fp.close()
817 fp.close()
818 # We can't use the cached file handle after close(). So prevent
818 # We can't use the cached file handle after close(). So prevent
819 # its usage.
819 # its usage.
820 self._writinghandles = None
820 self._writinghandles = None
821 self._segmentfile.writing_handle = None
821 self._segmentfile.writing_handle = None
822 # No need to deal with sidedata writing handle as it is only
822 # No need to deal with sidedata writing handle as it is only
823 # relevant with revlog-v2 which is never inline, not reaching
823 # relevant with revlog-v2 which is never inline, not reaching
824 # this code
824 # this code
825
825
826 new_dfh = self.opener(self.data_file, mode=b"w+")
826 new_dfh = self.opener(self.data_file, mode=b"w+")
827 new_dfh.truncate(0) # drop any potentially existing data
827 new_dfh.truncate(0) # drop any potentially existing data
828 try:
828 try:
829 with self.reading():
829 with self.reading():
830 for r in range(len(self.index)):
830 for r in range(len(self.index)):
831 new_dfh.write(self.get_segment_for_revs(r, r)[1])
831 new_dfh.write(self.get_segment_for_revs(r, r)[1])
832 new_dfh.flush()
832 new_dfh.flush()
833
833
834 if new_index_file_path is not None:
834 if new_index_file_path is not None:
835 self.index_file = new_index_file_path
835 self.index_file = new_index_file_path
836 with self.__index_new_fp() as fp:
836 with self.__index_new_fp() as fp:
837 self.inline = False
837 self.inline = False
838 for i in range(len(self.index)):
838 for i in range(len(self.index)):
839 e = self.index.entry_binary(i)
839 e = self.index.entry_binary(i)
840 if i == 0:
840 if i == 0:
841 packed_header = self.index.pack_header(header)
841 packed_header = self.index.pack_header(header)
842 e = packed_header + e
842 e = packed_header + e
843 fp.write(e)
843 fp.write(e)
844
844
845 # If we don't use side-write, the temp file replace the real
845 # If we don't use side-write, the temp file replace the real
846 # index when we exit the context manager
846 # index when we exit the context manager
847
847
848 self._segmentfile = randomaccessfile.randomaccessfile(
848 self._segmentfile = randomaccessfile.randomaccessfile(
849 self.opener,
849 self.opener,
850 self.data_file,
850 self.data_file,
851 self.data_config.chunk_cache_size,
851 self.data_config.chunk_cache_size,
852 )
852 )
853
853
854 if existing_handles:
854 if existing_handles:
855 # switched from inline to conventional reopen the index
855 # switched from inline to conventional reopen the index
856 ifh = self.__index_write_fp()
856 ifh = self.__index_write_fp()
857 self._writinghandles = (ifh, new_dfh, None)
857 self._writinghandles = (ifh, new_dfh, None)
858 self._segmentfile.writing_handle = new_dfh
858 self._segmentfile.writing_handle = new_dfh
859 new_dfh = None
859 new_dfh = None
860 # No need to deal with sidedata writing handle as it is only
860 # No need to deal with sidedata writing handle as it is only
861 # relevant with revlog-v2 which is never inline, not reaching
861 # relevant with revlog-v2 which is never inline, not reaching
862 # this code
862 # this code
863 finally:
863 finally:
864 if new_dfh is not None:
864 if new_dfh is not None:
865 new_dfh.close()
865 new_dfh.close()
866 return self.index_file
866 return self.index_file
867
867
868 def get_segment_for_revs(self, startrev, endrev):
868 def get_segment_for_revs(self, startrev, endrev):
869 """Obtain a segment of raw data corresponding to a range of revisions.
869 """Obtain a segment of raw data corresponding to a range of revisions.
870
870
871 Accepts the start and end revisions and an optional already-open
871 Accepts the start and end revisions and an optional already-open
872 file handle to be used for reading. If the file handle is read, its
872 file handle to be used for reading. If the file handle is read, its
873 seek position will not be preserved.
873 seek position will not be preserved.
874
874
875 Requests for data may be satisfied by a cache.
875 Requests for data may be satisfied by a cache.
876
876
877 Returns a 2-tuple of (offset, data) for the requested range of
877 Returns a 2-tuple of (offset, data) for the requested range of
878 revisions. Offset is the integer offset from the beginning of the
878 revisions. Offset is the integer offset from the beginning of the
879 revlog and data is a str or buffer of the raw byte data.
879 revlog and data is a str or buffer of the raw byte data.
880
880
881 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
881 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
882 to determine where each revision's data begins and ends.
882 to determine where each revision's data begins and ends.
883
883
884 API: we should consider making this a private part of the InnerRevlog
884 API: we should consider making this a private part of the InnerRevlog
885 at some point.
885 at some point.
886 """
886 """
887 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
887 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
888 # (functions are expensive).
888 # (functions are expensive).
889 index = self.index
889 index = self.index
890 istart = index[startrev]
890 istart = index[startrev]
891 start = int(istart[0] >> 16)
891 start = int(istart[0] >> 16)
892 if startrev == endrev:
892 if startrev == endrev:
893 end = start + istart[1]
893 end = start + istart[1]
894 else:
894 else:
895 iend = index[endrev]
895 iend = index[endrev]
896 end = int(iend[0] >> 16) + iend[1]
896 end = int(iend[0] >> 16) + iend[1]
897
897
898 if self.inline:
898 if self.inline:
899 start += (startrev + 1) * self.index.entry_size
899 start += (startrev + 1) * self.index.entry_size
900 end += (endrev + 1) * self.index.entry_size
900 end += (endrev + 1) * self.index.entry_size
901 length = end - start
901 length = end - start
902
902
903 return start, self._segmentfile.read_chunk(start, length)
903 return start, self._segmentfile.read_chunk(start, length)
904
904
905 def _chunk(self, rev):
905 def _chunk(self, rev):
906 """Obtain a single decompressed chunk for a revision.
906 """Obtain a single decompressed chunk for a revision.
907
907
908 Accepts an integer revision and an optional already-open file handle
908 Accepts an integer revision and an optional already-open file handle
909 to be used for reading. If used, the seek position of the file will not
909 to be used for reading. If used, the seek position of the file will not
910 be preserved.
910 be preserved.
911
911
912 Returns a str holding uncompressed data for the requested revision.
912 Returns a str holding uncompressed data for the requested revision.
913 """
913 """
914 if self._uncompressed_chunk_cache is not None:
914 if self._uncompressed_chunk_cache is not None:
915 uncomp = self._uncompressed_chunk_cache.get(rev)
915 uncomp = self._uncompressed_chunk_cache.get(rev)
916 if uncomp is not None:
916 if uncomp is not None:
917 return uncomp
917 return uncomp
918
918
919 compression_mode = self.index[rev][10]
919 compression_mode = self.index[rev][10]
920 data = self.get_segment_for_revs(rev, rev)[1]
920 data = self.get_segment_for_revs(rev, rev)[1]
921 if compression_mode == COMP_MODE_PLAIN:
921 if compression_mode == COMP_MODE_PLAIN:
922 uncomp = data
922 uncomp = data
923 elif compression_mode == COMP_MODE_DEFAULT:
923 elif compression_mode == COMP_MODE_DEFAULT:
924 uncomp = self._decompressor(data)
924 uncomp = self._decompressor(data)
925 elif compression_mode == COMP_MODE_INLINE:
925 elif compression_mode == COMP_MODE_INLINE:
926 uncomp = self.decompress(data)
926 uncomp = self.decompress(data)
927 else:
927 else:
928 msg = b'unknown compression mode %d'
928 msg = b'unknown compression mode %d'
929 msg %= compression_mode
929 msg %= compression_mode
930 raise error.RevlogError(msg)
930 raise error.RevlogError(msg)
931 if self._uncompressed_chunk_cache is not None:
931 if self._uncompressed_chunk_cache is not None:
932 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
932 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
933 return uncomp
933 return uncomp
934
934
935 def _chunks(self, revs, targetsize=None):
935 def _chunks(self, revs, targetsize=None):
936 """Obtain decompressed chunks for the specified revisions.
936 """Obtain decompressed chunks for the specified revisions.
937
937
938 Accepts an iterable of numeric revisions that are assumed to be in
938 Accepts an iterable of numeric revisions that are assumed to be in
939 ascending order.
939 ascending order.
940
940
941 This function is similar to calling ``self._chunk()`` multiple times,
941 This function is similar to calling ``self._chunk()`` multiple times,
942 but is faster.
942 but is faster.
943
943
944 Returns a list with decompressed data for each requested revision.
944 Returns a list with decompressed data for each requested revision.
945 """
945 """
946 if not revs:
946 if not revs:
947 return []
947 return []
948 start = self.start
948 start = self.start
949 length = self.length
949 length = self.length
950 inline = self.inline
950 inline = self.inline
951 iosize = self.index.entry_size
951 iosize = self.index.entry_size
952 buffer = util.buffer
952 buffer = util.buffer
953
953
954 fetched_revs = []
954 fetched_revs = []
955 fadd = fetched_revs.append
955 fadd = fetched_revs.append
956
956
957 chunks = []
957 chunks = []
958 ladd = chunks.append
958 ladd = chunks.append
959
959
960 if self._uncompressed_chunk_cache is None:
960 if self._uncompressed_chunk_cache is None:
961 fetched_revs = revs
961 fetched_revs = revs
962 else:
962 else:
963 for rev in revs:
963 for rev in revs:
964 cached_value = self._uncompressed_chunk_cache.get(rev)
964 cached_value = self._uncompressed_chunk_cache.get(rev)
965 if cached_value is None:
965 if cached_value is None:
966 fadd(rev)
966 fadd(rev)
967 else:
967 else:
968 ladd((rev, cached_value))
968 ladd((rev, cached_value))
969
969
970 if not fetched_revs:
970 if not fetched_revs:
971 slicedchunks = ()
971 slicedchunks = ()
972 elif not self.data_config.with_sparse_read:
972 elif not self.data_config.with_sparse_read:
973 slicedchunks = (fetched_revs,)
973 slicedchunks = (fetched_revs,)
974 else:
974 else:
975 slicedchunks = deltautil.slicechunk(
975 slicedchunks = deltautil.slicechunk(
976 self,
976 self,
977 fetched_revs,
977 fetched_revs,
978 targetsize=targetsize,
978 targetsize=targetsize,
979 )
979 )
980
980
981 for revschunk in slicedchunks:
981 for revschunk in slicedchunks:
982 firstrev = revschunk[0]
982 firstrev = revschunk[0]
983 # Skip trailing revisions with empty diff
983 # Skip trailing revisions with empty diff
984 for lastrev in revschunk[::-1]:
984 for lastrev in revschunk[::-1]:
985 if length(lastrev) != 0:
985 if length(lastrev) != 0:
986 break
986 break
987
987
988 try:
988 try:
989 offset, data = self.get_segment_for_revs(firstrev, lastrev)
989 offset, data = self.get_segment_for_revs(firstrev, lastrev)
990 except OverflowError:
990 except OverflowError:
991 # issue4215 - we can't cache a run of chunks greater than
991 # issue4215 - we can't cache a run of chunks greater than
992 # 2G on Windows
992 # 2G on Windows
993 for rev in revschunk:
993 for rev in revschunk:
994 ladd((rev, self._chunk(rev)))
994 ladd((rev, self._chunk(rev)))
995
995
996 decomp = self.decompress
996 decomp = self.decompress
997 # self._decompressor might be None, but will not be used in that case
997 # self._decompressor might be None, but will not be used in that case
998 def_decomp = self._decompressor
998 def_decomp = self._decompressor
999 for rev in revschunk:
999 for rev in revschunk:
1000 chunkstart = start(rev)
1000 chunkstart = start(rev)
1001 if inline:
1001 if inline:
1002 chunkstart += (rev + 1) * iosize
1002 chunkstart += (rev + 1) * iosize
1003 chunklength = length(rev)
1003 chunklength = length(rev)
1004 comp_mode = self.index[rev][10]
1004 comp_mode = self.index[rev][10]
1005 c = buffer(data, chunkstart - offset, chunklength)
1005 c = buffer(data, chunkstart - offset, chunklength)
1006 if comp_mode == COMP_MODE_PLAIN:
1006 if comp_mode == COMP_MODE_PLAIN:
1007 c = c
1007 c = c
1008 elif comp_mode == COMP_MODE_INLINE:
1008 elif comp_mode == COMP_MODE_INLINE:
1009 c = decomp(c)
1009 c = decomp(c)
1010 elif comp_mode == COMP_MODE_DEFAULT:
1010 elif comp_mode == COMP_MODE_DEFAULT:
1011 c = def_decomp(c)
1011 c = def_decomp(c)
1012 else:
1012 else:
1013 msg = b'unknown compression mode %d'
1013 msg = b'unknown compression mode %d'
1014 msg %= comp_mode
1014 msg %= comp_mode
1015 raise error.RevlogError(msg)
1015 raise error.RevlogError(msg)
1016 ladd((rev, c))
1016 ladd((rev, c))
1017 if self._uncompressed_chunk_cache is not None:
1017 if self._uncompressed_chunk_cache is not None:
1018 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1018 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1019
1019
1020 chunks.sort()
1020 chunks.sort()
1021 return [x[1] for x in chunks]
1021 return [x[1] for x in chunks]
1022
1022
1023 def raw_text(self, node, rev) -> bytes:
1023 def raw_text(self, node, rev) -> bytes:
1024 """return the possibly unvalidated rawtext for a revision
1024 """return the possibly unvalidated rawtext for a revision
1025
1025
1026 returns rawtext
1026 returns rawtext
1027 """
1027 """
1028
1028
1029 # revision in the cache (could be useful to apply delta)
1029 # revision in the cache (could be useful to apply delta)
1030 cachedrev = None
1030 cachedrev = None
1031 # An intermediate text to apply deltas to
1031 # An intermediate text to apply deltas to
1032 basetext = None
1032 basetext = None
1033
1033
1034 # Check if we have the entry in cache
1034 # Check if we have the entry in cache
1035 # The cache entry looks like (node, rev, rawtext)
1035 # The cache entry looks like (node, rev, rawtext)
1036 if self._revisioncache:
1036 if self._revisioncache:
1037 cachedrev = self._revisioncache[1]
1037 cachedrev = self._revisioncache[1]
1038
1038
1039 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1039 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1040 if stopped:
1040 if stopped:
1041 basetext = self._revisioncache[2]
1041 basetext = self._revisioncache[2]
1042
1042
1043 # drop cache to save memory, the caller is expected to
1043 # drop cache to save memory, the caller is expected to
1044 # update self._inner._revisioncache after validating the text
1044 # update self._inner._revisioncache after validating the text
1045 self._revisioncache = None
1045 self._revisioncache = None
1046
1046
1047 targetsize = None
1047 targetsize = None
1048 rawsize = self.index[rev][2]
1048 rawsize = self.index[rev][2]
1049 if 0 <= rawsize:
1049 if 0 <= rawsize:
1050 targetsize = 4 * rawsize
1050 targetsize = 4 * rawsize
1051
1051
1052 if self._uncompressed_chunk_cache is not None:
1052 if self._uncompressed_chunk_cache is not None:
1053 # dynamically update the uncompressed_chunk_cache size to the
1053 # dynamically update the uncompressed_chunk_cache size to the
1054 # largest revision we saw in this revlog.
1054 # largest revision we saw in this revlog.
1055 factor = self.data_config.uncompressed_cache_factor
1055 factor = self.data_config.uncompressed_cache_factor
1056 candidate_size = rawsize * factor
1056 candidate_size = rawsize * factor
1057 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1057 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1058 self._uncompressed_chunk_cache.maxcost = candidate_size
1058 self._uncompressed_chunk_cache.maxcost = candidate_size
1059
1059
1060 bins = self._chunks(chain, targetsize=targetsize)
1060 bins = self._chunks(chain, targetsize=targetsize)
1061 if basetext is None:
1061 if basetext is None:
1062 basetext = bytes(bins[0])
1062 basetext = bytes(bins[0])
1063 bins = bins[1:]
1063 bins = bins[1:]
1064
1064
1065 rawtext = mdiff.patches(basetext, bins)
1065 rawtext = mdiff.patches(basetext, bins)
1066 del basetext # let us have a chance to free memory early
1066 del basetext # let us have a chance to free memory early
1067 return rawtext
1067 return rawtext
1068
1068
1069 def sidedata(self, rev, sidedata_end):
1069 def sidedata(self, rev, sidedata_end):
1070 """Return the sidedata for a given revision number."""
1070 """Return the sidedata for a given revision number."""
1071 index_entry = self.index[rev]
1071 index_entry = self.index[rev]
1072 sidedata_offset = index_entry[8]
1072 sidedata_offset = index_entry[8]
1073 sidedata_size = index_entry[9]
1073 sidedata_size = index_entry[9]
1074
1074
1075 if self.inline:
1075 if self.inline:
1076 sidedata_offset += self.index.entry_size * (1 + rev)
1076 sidedata_offset += self.index.entry_size * (1 + rev)
1077 if sidedata_size == 0:
1077 if sidedata_size == 0:
1078 return {}
1078 return {}
1079
1079
1080 if sidedata_end < sidedata_offset + sidedata_size:
1080 if sidedata_end < sidedata_offset + sidedata_size:
1081 filename = self.sidedata_file
1081 filename = self.sidedata_file
1082 end = sidedata_end
1082 end = sidedata_end
1083 offset = sidedata_offset
1083 offset = sidedata_offset
1084 length = sidedata_size
1084 length = sidedata_size
1085 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1085 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1086 raise error.RevlogError(m)
1086 raise error.RevlogError(m)
1087
1087
1088 comp_segment = self._segmentfile_sidedata.read_chunk(
1088 comp_segment = self._segmentfile_sidedata.read_chunk(
1089 sidedata_offset, sidedata_size
1089 sidedata_offset, sidedata_size
1090 )
1090 )
1091
1091
1092 comp = self.index[rev][11]
1092 comp = self.index[rev][11]
1093 if comp == COMP_MODE_PLAIN:
1093 if comp == COMP_MODE_PLAIN:
1094 segment = comp_segment
1094 segment = comp_segment
1095 elif comp == COMP_MODE_DEFAULT:
1095 elif comp == COMP_MODE_DEFAULT:
1096 segment = self._decompressor(comp_segment)
1096 segment = self._decompressor(comp_segment)
1097 elif comp == COMP_MODE_INLINE:
1097 elif comp == COMP_MODE_INLINE:
1098 segment = self.decompress(comp_segment)
1098 segment = self.decompress(comp_segment)
1099 else:
1099 else:
1100 msg = b'unknown compression mode %d'
1100 msg = b'unknown compression mode %d'
1101 msg %= comp
1101 msg %= comp
1102 raise error.RevlogError(msg)
1102 raise error.RevlogError(msg)
1103
1103
1104 sidedata = sidedatautil.deserialize_sidedata(segment)
1104 sidedata = sidedatautil.deserialize_sidedata(segment)
1105 return sidedata
1105 return sidedata
1106
1106
1107 def write_entry(
1107 def write_entry(
1108 self,
1108 self,
1109 transaction,
1109 transaction,
1110 entry,
1110 entry,
1111 data,
1111 data,
1112 link,
1112 link,
1113 offset,
1113 offset,
1114 sidedata,
1114 sidedata,
1115 sidedata_offset,
1115 sidedata_offset,
1116 index_end,
1116 index_end,
1117 data_end,
1117 data_end,
1118 sidedata_end,
1118 sidedata_end,
1119 ):
1119 ):
1120 # Files opened in a+ mode have inconsistent behavior on various
1120 # Files opened in a+ mode have inconsistent behavior on various
1121 # platforms. Windows requires that a file positioning call be made
1121 # platforms. Windows requires that a file positioning call be made
1122 # when the file handle transitions between reads and writes. See
1122 # when the file handle transitions between reads and writes. See
1123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1124 # platforms, Python or the platform itself can be buggy. Some versions
1124 # platforms, Python or the platform itself can be buggy. Some versions
1125 # of Solaris have been observed to not append at the end of the file
1125 # of Solaris have been observed to not append at the end of the file
1126 # if the file was seeked to before the end. See issue4943 for more.
1126 # if the file was seeked to before the end. See issue4943 for more.
1127 #
1127 #
1128 # We work around this issue by inserting a seek() before writing.
1128 # We work around this issue by inserting a seek() before writing.
1129 # Note: This is likely not necessary on Python 3. However, because
1129 # Note: This is likely not necessary on Python 3. However, because
1130 # the file handle is reused for reads and may be seeked there, we need
1130 # the file handle is reused for reads and may be seeked there, we need
1131 # to be careful before changing this.
1131 # to be careful before changing this.
1132 if self._writinghandles is None:
1132 if self._writinghandles is None:
1133 msg = b'adding revision outside `revlog._writing` context'
1133 msg = b'adding revision outside `revlog._writing` context'
1134 raise error.ProgrammingError(msg)
1134 raise error.ProgrammingError(msg)
1135 ifh, dfh, sdfh = self._writinghandles
1135 ifh, dfh, sdfh = self._writinghandles
1136 if index_end is None:
1136 if index_end is None:
1137 ifh.seek(0, os.SEEK_END)
1137 ifh.seek(0, os.SEEK_END)
1138 else:
1138 else:
1139 ifh.seek(index_end, os.SEEK_SET)
1139 ifh.seek(index_end, os.SEEK_SET)
1140 if dfh:
1140 if dfh:
1141 if data_end is None:
1141 if data_end is None:
1142 dfh.seek(0, os.SEEK_END)
1142 dfh.seek(0, os.SEEK_END)
1143 else:
1143 else:
1144 dfh.seek(data_end, os.SEEK_SET)
1144 dfh.seek(data_end, os.SEEK_SET)
1145 if sdfh:
1145 if sdfh:
1146 sdfh.seek(sidedata_end, os.SEEK_SET)
1146 sdfh.seek(sidedata_end, os.SEEK_SET)
1147
1147
1148 curr = len(self.index) - 1
1148 curr = len(self.index) - 1
1149 if not self.inline:
1149 if not self.inline:
1150 transaction.add(self.data_file, offset)
1150 transaction.add(self.data_file, offset)
1151 if self.sidedata_file:
1151 if self.sidedata_file:
1152 transaction.add(self.sidedata_file, sidedata_offset)
1152 transaction.add(self.sidedata_file, sidedata_offset)
1153 transaction.add(self.canonical_index_file, curr * len(entry))
1153 transaction.add(self.canonical_index_file, curr * len(entry))
1154 if data[0]:
1154 if data[0]:
1155 dfh.write(data[0])
1155 dfh.write(data[0])
1156 dfh.write(data[1])
1156 dfh.write(data[1])
1157 if sidedata:
1157 if sidedata:
1158 sdfh.write(sidedata)
1158 sdfh.write(sidedata)
1159 if self._delay_buffer is None:
1159 if self._delay_buffer is None:
1160 ifh.write(entry)
1160 ifh.write(entry)
1161 else:
1161 else:
1162 self._delay_buffer.append(entry)
1162 self._delay_buffer.append(entry)
1163 elif self._delay_buffer is not None:
1163 elif self._delay_buffer is not None:
1164 msg = b'invalid delayed write on inline revlog'
1164 msg = b'invalid delayed write on inline revlog'
1165 raise error.ProgrammingError(msg)
1165 raise error.ProgrammingError(msg)
1166 else:
1166 else:
1167 offset += curr * self.index.entry_size
1167 offset += curr * self.index.entry_size
1168 transaction.add(self.canonical_index_file, offset)
1168 transaction.add(self.canonical_index_file, offset)
1169 assert not sidedata
1169 assert not sidedata
1170 ifh.write(entry)
1170 ifh.write(entry)
1171 ifh.write(data[0])
1171 ifh.write(data[0])
1172 ifh.write(data[1])
1172 ifh.write(data[1])
1173 return (
1173 return (
1174 ifh.tell(),
1174 ifh.tell(),
1175 dfh.tell() if dfh else None,
1175 dfh.tell() if dfh else None,
1176 sdfh.tell() if sdfh else None,
1176 sdfh.tell() if sdfh else None,
1177 )
1177 )
1178
1178
1179 def _divert_index(self):
1179 def _divert_index(self):
1180 index_file = self.index_file
1180 index_file = self.index_file
1181 # when we encounter a legacy inline-changelog, split it. However it is
1181 # when we encounter a legacy inline-changelog, split it. However it is
1182 # important to use the expected filename for pending content
1182 # important to use the expected filename for pending content
1183 # (<radix>.a) otherwise hooks won't be seeing the content of the
1183 # (<radix>.a) otherwise hooks won't be seeing the content of the
1184 # pending transaction.
1184 # pending transaction.
1185 if index_file.endswith(b'.s'):
1185 if index_file.endswith(b'.s'):
1186 index_file = self.index_file[:-2]
1186 index_file = self.index_file[:-2]
1187 return index_file + b'.a'
1187 return index_file + b'.a'
1188
1188
1189 def delay(self):
1189 def delay(self):
1190 assert not self.is_open
1190 assert not self.is_open
1191 if self.inline:
1191 if self.inline:
1192 msg = "revlog with delayed write should not be inline"
1192 msg = "revlog with delayed write should not be inline"
1193 raise error.ProgrammingError(msg)
1193 raise error.ProgrammingError(msg)
1194 if self._delay_buffer is not None or self._orig_index_file is not None:
1194 if self._delay_buffer is not None or self._orig_index_file is not None:
1195 # delay or divert already in place
1195 # delay or divert already in place
1196 return None
1196 return None
1197 elif len(self.index) == 0:
1197 elif len(self.index) == 0:
1198 self._orig_index_file = self.index_file
1198 self._orig_index_file = self.index_file
1199 self.index_file = self._divert_index()
1199 self.index_file = self._divert_index()
1200 assert self._orig_index_file is not None
1200 assert self._orig_index_file is not None
1201 assert self.index_file is not None
1201 assert self.index_file is not None
1202 if self.opener.exists(self.index_file):
1202 if self.opener.exists(self.index_file):
1203 self.opener.unlink(self.index_file)
1203 self.opener.unlink(self.index_file)
1204 return self.index_file
1204 return self.index_file
1205 else:
1205 else:
1206 self._delay_buffer = []
1206 self._delay_buffer = []
1207 return None
1207 return None
1208
1208
1209 def write_pending(self):
1209 def write_pending(self):
1210 assert not self.is_open
1210 assert not self.is_open
1211 if self.inline:
1211 if self.inline:
1212 msg = "revlog with delayed write should not be inline"
1212 msg = "revlog with delayed write should not be inline"
1213 raise error.ProgrammingError(msg)
1213 raise error.ProgrammingError(msg)
1214 if self._orig_index_file is not None:
1214 if self._orig_index_file is not None:
1215 return None, True
1215 return None, True
1216 any_pending = False
1216 any_pending = False
1217 pending_index_file = self._divert_index()
1217 pending_index_file = self._divert_index()
1218 if self.opener.exists(pending_index_file):
1218 if self.opener.exists(pending_index_file):
1219 self.opener.unlink(pending_index_file)
1219 self.opener.unlink(pending_index_file)
1220 util.copyfile(
1220 util.copyfile(
1221 self.opener.join(self.index_file),
1221 self.opener.join(self.index_file),
1222 self.opener.join(pending_index_file),
1222 self.opener.join(pending_index_file),
1223 )
1223 )
1224 if self._delay_buffer:
1224 if self._delay_buffer:
1225 with self.opener(pending_index_file, b'r+') as ifh:
1225 with self.opener(pending_index_file, b'r+') as ifh:
1226 ifh.seek(0, os.SEEK_END)
1226 ifh.seek(0, os.SEEK_END)
1227 ifh.write(b"".join(self._delay_buffer))
1227 ifh.write(b"".join(self._delay_buffer))
1228 any_pending = True
1228 any_pending = True
1229 self._delay_buffer = None
1229 self._delay_buffer = None
1230 self._orig_index_file = self.index_file
1230 self._orig_index_file = self.index_file
1231 self.index_file = pending_index_file
1231 self.index_file = pending_index_file
1232 return self.index_file, any_pending
1232 return self.index_file, any_pending
1233
1233
1234 def finalize_pending(self):
1234 def finalize_pending(self):
1235 assert not self.is_open
1235 assert not self.is_open
1236 if self.inline:
1236 if self.inline:
1237 msg = "revlog with delayed write should not be inline"
1237 msg = "revlog with delayed write should not be inline"
1238 raise error.ProgrammingError(msg)
1238 raise error.ProgrammingError(msg)
1239
1239
1240 delay = self._delay_buffer is not None
1240 delay = self._delay_buffer is not None
1241 divert = self._orig_index_file is not None
1241 divert = self._orig_index_file is not None
1242
1242
1243 if delay and divert:
1243 if delay and divert:
1244 assert False, "unreachable"
1244 assert False, "unreachable"
1245 elif delay:
1245 elif delay:
1246 if self._delay_buffer:
1246 if self._delay_buffer:
1247 with self.opener(self.index_file, b'r+') as ifh:
1247 with self.opener(self.index_file, b'r+') as ifh:
1248 ifh.seek(0, os.SEEK_END)
1248 ifh.seek(0, os.SEEK_END)
1249 ifh.write(b"".join(self._delay_buffer))
1249 ifh.write(b"".join(self._delay_buffer))
1250 self._delay_buffer = None
1250 self._delay_buffer = None
1251 elif divert:
1251 elif divert:
1252 if self.opener.exists(self.index_file):
1252 if self.opener.exists(self.index_file):
1253 self.opener.rename(
1253 self.opener.rename(
1254 self.index_file,
1254 self.index_file,
1255 self._orig_index_file,
1255 self._orig_index_file,
1256 checkambig=True,
1256 checkambig=True,
1257 )
1257 )
1258 self.index_file = self._orig_index_file
1258 self.index_file = self._orig_index_file
1259 self._orig_index_file = None
1259 self._orig_index_file = None
1260 else:
1260 else:
1261 msg = b"not delay or divert found on this revlog"
1261 msg = b"not delay or divert found on this revlog"
1262 raise error.ProgrammingError(msg)
1262 raise error.ProgrammingError(msg)
1263 return self.canonical_index_file
1263 return self.canonical_index_file
1264
1264
1265
1265
1266 class revlog:
1266 class revlog:
1267 """
1267 """
1268 the underlying revision storage object
1268 the underlying revision storage object
1269
1269
1270 A revlog consists of two parts, an index and the revision data.
1270 A revlog consists of two parts, an index and the revision data.
1271
1271
1272 The index is a file with a fixed record size containing
1272 The index is a file with a fixed record size containing
1273 information on each revision, including its nodeid (hash), the
1273 information on each revision, including its nodeid (hash), the
1274 nodeids of its parents, the position and offset of its data within
1274 nodeids of its parents, the position and offset of its data within
1275 the data file, and the revision it's based on. Finally, each entry
1275 the data file, and the revision it's based on. Finally, each entry
1276 contains a linkrev entry that can serve as a pointer to external
1276 contains a linkrev entry that can serve as a pointer to external
1277 data.
1277 data.
1278
1278
1279 The revision data itself is a linear collection of data chunks.
1279 The revision data itself is a linear collection of data chunks.
1280 Each chunk represents a revision and is usually represented as a
1280 Each chunk represents a revision and is usually represented as a
1281 delta against the previous chunk. To bound lookup time, runs of
1281 delta against the previous chunk. To bound lookup time, runs of
1282 deltas are limited to about 2 times the length of the original
1282 deltas are limited to about 2 times the length of the original
1283 version data. This makes retrieval of a version proportional to
1283 version data. This makes retrieval of a version proportional to
1284 its size, or O(1) relative to the number of revisions.
1284 its size, or O(1) relative to the number of revisions.
1285
1285
1286 Both pieces of the revlog are written to in an append-only
1286 Both pieces of the revlog are written to in an append-only
1287 fashion, which means we never need to rewrite a file to insert or
1287 fashion, which means we never need to rewrite a file to insert or
1288 remove data, and can use some simple techniques to avoid the need
1288 remove data, and can use some simple techniques to avoid the need
1289 for locking while reading.
1289 for locking while reading.
1290
1290
1291 If checkambig, indexfile is opened with checkambig=True at
1291 If checkambig, indexfile is opened with checkambig=True at
1292 writing, to avoid file stat ambiguity.
1292 writing, to avoid file stat ambiguity.
1293
1293
1294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1295 index will be mmapped rather than read if it is larger than the
1295 index will be mmapped rather than read if it is larger than the
1296 configured threshold.
1296 configured threshold.
1297
1297
1298 If censorable is True, the revlog can have censored revisions.
1298 If censorable is True, the revlog can have censored revisions.
1299
1299
1300 If `upperboundcomp` is not None, this is the expected maximal gain from
1300 If `upperboundcomp` is not None, this is the expected maximal gain from
1301 compression for the data content.
1301 compression for the data content.
1302
1302
1303 `concurrencychecker` is an optional function that receives 3 arguments: a
1303 `concurrencychecker` is an optional function that receives 3 arguments: a
1304 file handle, a filename, and an expected position. It should check whether
1304 file handle, a filename, and an expected position. It should check whether
1305 the current position in the file handle is valid, and log/warn/fail (by
1305 the current position in the file handle is valid, and log/warn/fail (by
1306 raising).
1306 raising).
1307
1307
1308 See mercurial/revlogutils/contants.py for details about the content of an
1308 See mercurial/revlogutils/contants.py for details about the content of an
1309 index entry.
1309 index entry.
1310 """
1310 """
1311
1311
1312 _flagserrorclass = error.RevlogError
1312 _flagserrorclass = error.RevlogError
1313 _inner: "_InnerRevlog"
1313 _inner: "_InnerRevlog"
1314
1314
1315 opener: vfsmod.vfs
1315 opener: vfsmod.vfs
1316
1316
1317 @staticmethod
1317 @staticmethod
1318 def is_inline_index(header_bytes):
1318 def is_inline_index(header_bytes):
1319 """Determine if a revlog is inline from the initial bytes of the index"""
1319 """Determine if a revlog is inline from the initial bytes of the index"""
1320 if len(header_bytes) == 0:
1320 if len(header_bytes) == 0:
1321 return True
1321 return True
1322
1322
1323 header = INDEX_HEADER.unpack(header_bytes)[0]
1323 header = INDEX_HEADER.unpack(header_bytes)[0]
1324
1324
1325 _format_flags = header & ~0xFFFF
1325 _format_flags = header & ~0xFFFF
1326 _format_version = header & 0xFFFF
1326 _format_version = header & 0xFFFF
1327
1327
1328 features = FEATURES_BY_VERSION[_format_version]
1328 features = FEATURES_BY_VERSION[_format_version]
1329 return features[b'inline'](_format_flags)
1329 return features[b'inline'](_format_flags)
1330
1330
1331 _docket_file: Optional[bytes]
1331 _docket_file: Optional[bytes]
1332
1332
1333 def __init__(
1333 def __init__(
1334 self,
1334 self,
1335 opener: vfsmod.vfs,
1335 opener: vfsmod.vfs,
1336 target,
1336 target,
1337 radix,
1337 radix,
1338 postfix=None, # only exist for `tmpcensored` now
1338 postfix=None, # only exist for `tmpcensored` now
1339 checkambig=False,
1339 checkambig=False,
1340 mmaplargeindex=False,
1340 mmaplargeindex=False,
1341 censorable=False,
1341 censorable=False,
1342 upperboundcomp=None,
1342 upperboundcomp=None,
1343 persistentnodemap=False,
1343 persistentnodemap=False,
1344 concurrencychecker=None,
1344 concurrencychecker=None,
1345 trypending=False,
1345 trypending=False,
1346 try_split=False,
1346 try_split=False,
1347 canonical_parent_order=True,
1347 canonical_parent_order=True,
1348 data_config=None,
1348 data_config=None,
1349 delta_config=None,
1349 delta_config=None,
1350 feature_config=None,
1350 feature_config=None,
1351 may_inline=True, # may inline new revlog
1351 may_inline=True, # may inline new revlog
1352 ):
1352 ):
1353 """
1353 """
1354 create a revlog object
1354 create a revlog object
1355
1355
1356 opener is a function that abstracts the file opening operation
1356 opener is a function that abstracts the file opening operation
1357 and can be used to implement COW semantics or the like.
1357 and can be used to implement COW semantics or the like.
1358
1358
1359 `target`: a (KIND, ID) tuple that identify the content stored in
1359 `target`: a (KIND, ID) tuple that identify the content stored in
1360 this revlog. It help the rest of the code to understand what the revlog
1360 this revlog. It help the rest of the code to understand what the revlog
1361 is about without having to resort to heuristic and index filename
1361 is about without having to resort to heuristic and index filename
1362 analysis. Note: that this must be reliably be set by normal code, but
1362 analysis. Note: that this must be reliably be set by normal code, but
1363 that test, debug, or performance measurement code might not set this to
1363 that test, debug, or performance measurement code might not set this to
1364 accurate value.
1364 accurate value.
1365 """
1365 """
1366
1366
1367 self.radix = radix
1367 self.radix = radix
1368
1368
1369 self._docket_file = None
1369 self._docket_file = None
1370 self._indexfile = None
1370 self._indexfile = None
1371 self._datafile = None
1371 self._datafile = None
1372 self._sidedatafile = None
1372 self._sidedatafile = None
1373 self._nodemap_file = None
1373 self._nodemap_file = None
1374 self.postfix = postfix
1374 self.postfix = postfix
1375 self._trypending = trypending
1375 self._trypending = trypending
1376 self._try_split = try_split
1376 self._try_split = try_split
1377 self._may_inline = may_inline
1377 self._may_inline = may_inline
1378 self.opener = opener
1378 self.opener = opener
1379 if persistentnodemap:
1379 if persistentnodemap:
1380 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1380 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1381
1381
1382 assert target[0] in ALL_KINDS
1382 assert target[0] in ALL_KINDS
1383 assert len(target) == 2
1383 assert len(target) == 2
1384 self.target = target
1384 self.target = target
1385 if feature_config is not None:
1385 if feature_config is not None:
1386 self.feature_config = feature_config.copy()
1386 self.feature_config = feature_config.copy()
1387 elif b'feature-config' in self.opener.options:
1387 elif b'feature-config' in self.opener.options:
1388 self.feature_config = self.opener.options[b'feature-config'].copy()
1388 self.feature_config = self.opener.options[b'feature-config'].copy()
1389 else:
1389 else:
1390 self.feature_config = FeatureConfig()
1390 self.feature_config = FeatureConfig()
1391 self.feature_config.censorable = censorable
1391 self.feature_config.censorable = censorable
1392 self.feature_config.canonical_parent_order = canonical_parent_order
1392 self.feature_config.canonical_parent_order = canonical_parent_order
1393 if data_config is not None:
1393 if data_config is not None:
1394 self.data_config = data_config.copy()
1394 self.data_config = data_config.copy()
1395 elif b'data-config' in self.opener.options:
1395 elif b'data-config' in self.opener.options:
1396 self.data_config = self.opener.options[b'data-config'].copy()
1396 self.data_config = self.opener.options[b'data-config'].copy()
1397 else:
1397 else:
1398 self.data_config = DataConfig()
1398 self.data_config = DataConfig()
1399 self.data_config.check_ambig = checkambig
1399 self.data_config.check_ambig = checkambig
1400 self.data_config.mmap_large_index = mmaplargeindex
1400 self.data_config.mmap_large_index = mmaplargeindex
1401 if delta_config is not None:
1401 if delta_config is not None:
1402 self.delta_config = delta_config.copy()
1402 self.delta_config = delta_config.copy()
1403 elif b'delta-config' in self.opener.options:
1403 elif b'delta-config' in self.opener.options:
1404 self.delta_config = self.opener.options[b'delta-config'].copy()
1404 self.delta_config = self.opener.options[b'delta-config'].copy()
1405 else:
1405 else:
1406 self.delta_config = DeltaConfig()
1406 self.delta_config = DeltaConfig()
1407 self.delta_config.upper_bound_comp = upperboundcomp
1407 self.delta_config.upper_bound_comp = upperboundcomp
1408
1408
1409 # Maps rev to chain base rev.
1409 # Maps rev to chain base rev.
1410 self._chainbasecache = util.lrucachedict(100)
1410 self._chainbasecache = util.lrucachedict(100)
1411
1411
1412 self.index = None
1412 self.index = None
1413 self._docket = None
1413 self._docket = None
1414 self._nodemap_docket = None
1414 self._nodemap_docket = None
1415 # Mapping of partial identifiers to full nodes.
1415 # Mapping of partial identifiers to full nodes.
1416 self._pcache = {}
1416 self._pcache = {}
1417
1417
1418 # other optionnals features
1418 # other optionnals features
1419
1419
1420 # Make copy of flag processors so each revlog instance can support
1420 # Make copy of flag processors so each revlog instance can support
1421 # custom flags.
1421 # custom flags.
1422 self._flagprocessors = dict(flagutil.flagprocessors)
1422 self._flagprocessors = dict(flagutil.flagprocessors)
1423 # prevent nesting of addgroup
1423 # prevent nesting of addgroup
1424 self._adding_group = None
1424 self._adding_group = None
1425
1425
1426 chunk_cache = self._loadindex()
1426 chunk_cache = self._loadindex()
1427 self._load_inner(chunk_cache)
1427 self._load_inner(chunk_cache)
1428 self._concurrencychecker = concurrencychecker
1428 self._concurrencychecker = concurrencychecker
1429
1429
1430 def _init_opts(self):
1430 def _init_opts(self):
1431 """process options (from above/config) to setup associated default revlog mode
1431 """process options (from above/config) to setup associated default revlog mode
1432
1432
1433 These values might be affected when actually reading on disk information.
1433 These values might be affected when actually reading on disk information.
1434
1434
1435 The relevant values are returned for use in _loadindex().
1435 The relevant values are returned for use in _loadindex().
1436
1436
1437 * newversionflags:
1437 * newversionflags:
1438 version header to use if we need to create a new revlog
1438 version header to use if we need to create a new revlog
1439
1439
1440 * mmapindexthreshold:
1440 * mmapindexthreshold:
1441 minimal index size for start to use mmap
1441 minimal index size for start to use mmap
1442
1442
1443 * force_nodemap:
1443 * force_nodemap:
1444 force the usage of a "development" version of the nodemap code
1444 force the usage of a "development" version of the nodemap code
1445 """
1445 """
1446 opts = self.opener.options
1446 opts = self.opener.options
1447
1447
1448 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1448 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1449 new_header = CHANGELOGV2
1449 new_header = CHANGELOGV2
1450 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1450 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1451 self.feature_config.compute_rank = compute_rank
1451 self.feature_config.compute_rank = compute_rank
1452 elif b'revlogv2' in opts:
1452 elif b'revlogv2' in opts:
1453 new_header = REVLOGV2
1453 new_header = REVLOGV2
1454 elif b'revlogv1' in opts:
1454 elif b'revlogv1' in opts:
1455 new_header = REVLOGV1
1455 new_header = REVLOGV1
1456 if self._may_inline:
1456 if self._may_inline:
1457 new_header |= FLAG_INLINE_DATA
1457 new_header |= FLAG_INLINE_DATA
1458 if b'generaldelta' in opts:
1458 if b'generaldelta' in opts:
1459 new_header |= FLAG_GENERALDELTA
1459 new_header |= FLAG_GENERALDELTA
1460 elif b'revlogv0' in self.opener.options:
1460 elif b'revlogv0' in self.opener.options:
1461 new_header = REVLOGV0
1461 new_header = REVLOGV0
1462 else:
1462 else:
1463 new_header = REVLOG_DEFAULT_VERSION
1463 new_header = REVLOG_DEFAULT_VERSION
1464
1464
1465 mmapindexthreshold = None
1465 mmapindexthreshold = None
1466 if self.data_config.mmap_large_index:
1466 if self.data_config.mmap_large_index:
1467 mmapindexthreshold = self.data_config.mmap_index_threshold
1467 mmapindexthreshold = self.data_config.mmap_index_threshold
1468 if self.feature_config.enable_ellipsis:
1468 if self.feature_config.enable_ellipsis:
1469 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1469 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1470
1470
1471 # revlog v0 doesn't have flag processors
1471 # revlog v0 doesn't have flag processors
1472 for flag, processor in opts.get(b'flagprocessors', {}).items():
1472 for flag, processor in opts.get(b'flagprocessors', {}).items():
1473 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1473 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1474
1474
1475 chunk_cache_size = self.data_config.chunk_cache_size
1475 chunk_cache_size = self.data_config.chunk_cache_size
1476 if chunk_cache_size <= 0:
1476 if chunk_cache_size <= 0:
1477 raise error.RevlogError(
1477 raise error.RevlogError(
1478 _(b'revlog chunk cache size %r is not greater than 0')
1478 _(b'revlog chunk cache size %r is not greater than 0')
1479 % chunk_cache_size
1479 % chunk_cache_size
1480 )
1480 )
1481 elif chunk_cache_size & (chunk_cache_size - 1):
1481 elif chunk_cache_size & (chunk_cache_size - 1):
1482 raise error.RevlogError(
1482 raise error.RevlogError(
1483 _(b'revlog chunk cache size %r is not a power of 2')
1483 _(b'revlog chunk cache size %r is not a power of 2')
1484 % chunk_cache_size
1484 % chunk_cache_size
1485 )
1485 )
1486 force_nodemap = opts.get(b'devel-force-nodemap', False)
1486 force_nodemap = opts.get(b'devel-force-nodemap', False)
1487 return new_header, mmapindexthreshold, force_nodemap
1487 return new_header, mmapindexthreshold, force_nodemap
1488
1488
1489 def _get_data(self, filepath, mmap_threshold, size=None):
1489 def _get_data(self, filepath, mmap_threshold, size=None):
1490 """return a file content with or without mmap
1490 """return a file content with or without mmap
1491
1491
1492 If the file is missing return the empty string"""
1492 If the file is missing return the empty string"""
1493 try:
1493 try:
1494 with self.opener(filepath) as fp:
1494 with self.opener(filepath) as fp:
1495 if mmap_threshold is not None:
1495 if mmap_threshold is not None:
1496 file_size = self.opener.fstat(fp).st_size
1496 file_size = self.opener.fstat(fp).st_size
1497 if (
1497 if (
1498 file_size >= mmap_threshold
1498 file_size >= mmap_threshold
1499 and self.opener.is_mmap_safe(filepath)
1499 and self.opener.is_mmap_safe(filepath)
1500 ):
1500 ):
1501 if size is not None:
1501 if size is not None:
1502 # avoid potentiel mmap crash
1502 # avoid potentiel mmap crash
1503 size = min(file_size, size)
1503 size = min(file_size, size)
1504 # TODO: should .close() to release resources without
1504 # TODO: should .close() to release resources without
1505 # relying on Python GC
1505 # relying on Python GC
1506 if size is None:
1506 if size is None:
1507 return util.buffer(util.mmapread(fp))
1507 return util.buffer(util.mmapread(fp))
1508 else:
1508 else:
1509 return util.buffer(util.mmapread(fp, size))
1509 return util.buffer(util.mmapread(fp, size))
1510 if size is None:
1510 if size is None:
1511 return fp.read()
1511 return fp.read()
1512 else:
1512 else:
1513 return fp.read(size)
1513 return fp.read(size)
1514 except FileNotFoundError:
1514 except FileNotFoundError:
1515 return b''
1515 return b''
1516
1516
1517 def get_streams(self, max_linkrev, force_inline=False):
1517 def get_streams(self, max_linkrev, force_inline=False):
1518 """return a list of streams that represent this revlog
1518 """return a list of streams that represent this revlog
1519
1519
1520 This is used by stream-clone to do bytes to bytes copies of a repository.
1520 This is used by stream-clone to do bytes to bytes copies of a repository.
1521
1521
1522 This streams data for all revisions that refer to a changelog revision up
1522 This streams data for all revisions that refer to a changelog revision up
1523 to `max_linkrev`.
1523 to `max_linkrev`.
1524
1524
1525 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1525 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1526
1526
1527 It returns is a list of three-tuple:
1527 It returns is a list of three-tuple:
1528
1528
1529 [
1529 [
1530 (filename, bytes_stream, stream_size),
1530 (filename, bytes_stream, stream_size),
1531 …
1531 …
1532 ]
1532 ]
1533 """
1533 """
1534 n = len(self)
1534 n = len(self)
1535 index = self.index
1535 index = self.index
1536 while n > 0:
1536 while n > 0:
1537 linkrev = index[n - 1][4]
1537 linkrev = index[n - 1][4]
1538 if linkrev < max_linkrev:
1538 if linkrev < max_linkrev:
1539 break
1539 break
1540 # note: this loop will rarely go through multiple iterations, since
1540 # note: this loop will rarely go through multiple iterations, since
1541 # it only traverses commits created during the current streaming
1541 # it only traverses commits created during the current streaming
1542 # pull operation.
1542 # pull operation.
1543 #
1543 #
1544 # If this become a problem, using a binary search should cap the
1544 # If this become a problem, using a binary search should cap the
1545 # runtime of this.
1545 # runtime of this.
1546 n = n - 1
1546 n = n - 1
1547 if n == 0:
1547 if n == 0:
1548 # no data to send
1548 # no data to send
1549 return []
1549 return []
1550 index_size = n * index.entry_size
1550 index_size = n * index.entry_size
1551 data_size = self.end(n - 1)
1551 data_size = self.end(n - 1)
1552
1552
1553 # XXX we might have been split (or stripped) since the object
1553 # XXX we might have been split (or stripped) since the object
1554 # initialization, We need to close this race too, but having a way to
1554 # initialization, We need to close this race too, but having a way to
1555 # pre-open the file we feed to the revlog and never closing them before
1555 # pre-open the file we feed to the revlog and never closing them before
1556 # we are done streaming.
1556 # we are done streaming.
1557
1557
1558 if self._inline:
1558 if self._inline:
1559
1559
1560 def get_stream():
1560 def get_stream():
1561 with self.opener(self._indexfile, mode=b"r") as fp:
1561 with self.opener(self._indexfile, mode=b"r") as fp:
1562 yield None
1562 yield None
1563 size = index_size + data_size
1563 size = index_size + data_size
1564 if size <= 65536:
1564 if size <= 65536:
1565 yield fp.read(size)
1565 yield fp.read(size)
1566 else:
1566 else:
1567 yield from util.filechunkiter(fp, limit=size)
1567 yield from util.filechunkiter(fp, limit=size)
1568
1568
1569 inline_stream = get_stream()
1569 inline_stream = get_stream()
1570 next(inline_stream)
1570 next(inline_stream)
1571 return [
1571 return [
1572 (self._indexfile, inline_stream, index_size + data_size),
1572 (self._indexfile, inline_stream, index_size + data_size),
1573 ]
1573 ]
1574 elif force_inline:
1574 elif force_inline:
1575
1575
1576 def get_stream():
1576 def get_stream():
1577 with self.reading():
1577 with self.reading():
1578 yield None
1578 yield None
1579
1579
1580 for rev in range(n):
1580 for rev in range(n):
1581 idx = self.index.entry_binary(rev)
1581 idx = self.index.entry_binary(rev)
1582 if rev == 0 and self._docket is None:
1582 if rev == 0 and self._docket is None:
1583 # re-inject the inline flag
1583 # re-inject the inline flag
1584 header = self._format_flags
1584 header = self._format_flags
1585 header |= self._format_version
1585 header |= self._format_version
1586 header |= FLAG_INLINE_DATA
1586 header |= FLAG_INLINE_DATA
1587 header = self.index.pack_header(header)
1587 header = self.index.pack_header(header)
1588 idx = header + idx
1588 idx = header + idx
1589 yield idx
1589 yield idx
1590 yield self._inner.get_segment_for_revs(rev, rev)[1]
1590 yield self._inner.get_segment_for_revs(rev, rev)[1]
1591
1591
1592 inline_stream = get_stream()
1592 inline_stream = get_stream()
1593 next(inline_stream)
1593 next(inline_stream)
1594 return [
1594 return [
1595 (self._indexfile, inline_stream, index_size + data_size),
1595 (self._indexfile, inline_stream, index_size + data_size),
1596 ]
1596 ]
1597 else:
1597 else:
1598
1598
1599 def get_index_stream():
1599 def get_index_stream():
1600 with self.opener(self._indexfile, mode=b"r") as fp:
1600 with self.opener(self._indexfile, mode=b"r") as fp:
1601 yield None
1601 yield None
1602 if index_size <= 65536:
1602 if index_size <= 65536:
1603 yield fp.read(index_size)
1603 yield fp.read(index_size)
1604 else:
1604 else:
1605 yield from util.filechunkiter(fp, limit=index_size)
1605 yield from util.filechunkiter(fp, limit=index_size)
1606
1606
1607 def get_data_stream():
1607 def get_data_stream():
1608 with self._datafp() as fp:
1608 with self._datafp() as fp:
1609 yield None
1609 yield None
1610 if data_size <= 65536:
1610 if data_size <= 65536:
1611 yield fp.read(data_size)
1611 yield fp.read(data_size)
1612 else:
1612 else:
1613 yield from util.filechunkiter(fp, limit=data_size)
1613 yield from util.filechunkiter(fp, limit=data_size)
1614
1614
1615 index_stream = get_index_stream()
1615 index_stream = get_index_stream()
1616 next(index_stream)
1616 next(index_stream)
1617 data_stream = get_data_stream()
1617 data_stream = get_data_stream()
1618 next(data_stream)
1618 next(data_stream)
1619 return [
1619 return [
1620 (self._datafile, data_stream, data_size),
1620 (self._datafile, data_stream, data_size),
1621 (self._indexfile, index_stream, index_size),
1621 (self._indexfile, index_stream, index_size),
1622 ]
1622 ]
1623
1623
1624 def _loadindex(self, docket=None):
1624 def _loadindex(self, docket=None):
1625 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1625 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1626
1626
1627 if self.postfix is not None:
1627 if self.postfix is not None:
1628 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1628 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1629 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1629 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1630 entry_point = b'%s.i.a' % self.radix
1630 entry_point = b'%s.i.a' % self.radix
1631 elif self._try_split and self.opener.exists(self._split_index_file):
1631 elif self._try_split and self.opener.exists(self._split_index_file):
1632 entry_point = self._split_index_file
1632 entry_point = self._split_index_file
1633 else:
1633 else:
1634 entry_point = b'%s.i' % self.radix
1634 entry_point = b'%s.i' % self.radix
1635
1635
1636 if docket is not None:
1636 if docket is not None:
1637 self._docket = docket
1637 self._docket = docket
1638 self._docket_file = entry_point
1638 self._docket_file = entry_point
1639 else:
1639 else:
1640 self._initempty = True
1640 self._initempty = True
1641 entry_data = self._get_data(entry_point, mmapindexthreshold)
1641 entry_data = self._get_data(entry_point, mmapindexthreshold)
1642 if len(entry_data) > 0:
1642 if len(entry_data) > 0:
1643 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1643 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1644 self._initempty = False
1644 self._initempty = False
1645 else:
1645 else:
1646 header = new_header
1646 header = new_header
1647
1647
1648 self._format_flags = header & ~0xFFFF
1648 self._format_flags = header & ~0xFFFF
1649 self._format_version = header & 0xFFFF
1649 self._format_version = header & 0xFFFF
1650
1650
1651 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1651 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1652 if supported_flags is None:
1652 if supported_flags is None:
1653 msg = _(b'unknown version (%d) in revlog %s')
1653 msg = _(b'unknown version (%d) in revlog %s')
1654 msg %= (self._format_version, self.display_id)
1654 msg %= (self._format_version, self.display_id)
1655 raise error.RevlogError(msg)
1655 raise error.RevlogError(msg)
1656 elif self._format_flags & ~supported_flags:
1656 elif self._format_flags & ~supported_flags:
1657 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1657 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1658 display_flag = self._format_flags >> 16
1658 display_flag = self._format_flags >> 16
1659 msg %= (display_flag, self._format_version, self.display_id)
1659 msg %= (display_flag, self._format_version, self.display_id)
1660 raise error.RevlogError(msg)
1660 raise error.RevlogError(msg)
1661
1661
1662 features = FEATURES_BY_VERSION[self._format_version]
1662 features = FEATURES_BY_VERSION[self._format_version]
1663 self._inline = features[b'inline'](self._format_flags)
1663 self._inline = features[b'inline'](self._format_flags)
1664 self.delta_config.general_delta = features[b'generaldelta'](
1664 self.delta_config.general_delta = features[b'generaldelta'](
1665 self._format_flags
1665 self._format_flags
1666 )
1666 )
1667 self.feature_config.has_side_data = features[b'sidedata']
1667 self.feature_config.has_side_data = features[b'sidedata']
1668
1668
1669 if not features[b'docket']:
1669 if not features[b'docket']:
1670 self._indexfile = entry_point
1670 self._indexfile = entry_point
1671 index_data = entry_data
1671 index_data = entry_data
1672 else:
1672 else:
1673 self._docket_file = entry_point
1673 self._docket_file = entry_point
1674 if self._initempty:
1674 if self._initempty:
1675 self._docket = docketutil.default_docket(self, header)
1675 self._docket = docketutil.default_docket(self, header)
1676 else:
1676 else:
1677 self._docket = docketutil.parse_docket(
1677 self._docket = docketutil.parse_docket(
1678 self, entry_data, use_pending=self._trypending
1678 self, entry_data, use_pending=self._trypending
1679 )
1679 )
1680
1680
1681 if self._docket is not None:
1681 if self._docket is not None:
1682 self._indexfile = self._docket.index_filepath()
1682 self._indexfile = self._docket.index_filepath()
1683 index_data = b''
1683 index_data = b''
1684 index_size = self._docket.index_end
1684 index_size = self._docket.index_end
1685 if index_size > 0:
1685 if index_size > 0:
1686 index_data = self._get_data(
1686 index_data = self._get_data(
1687 self._indexfile, mmapindexthreshold, size=index_size
1687 self._indexfile, mmapindexthreshold, size=index_size
1688 )
1688 )
1689 if len(index_data) < index_size:
1689 if len(index_data) < index_size:
1690 msg = _(b'too few index data for %s: got %d, expected %d')
1690 msg = _(b'too few index data for %s: got %d, expected %d')
1691 msg %= (self.display_id, len(index_data), index_size)
1691 msg %= (self.display_id, len(index_data), index_size)
1692 raise error.RevlogError(msg)
1692 raise error.RevlogError(msg)
1693
1693
1694 self._inline = False
1694 self._inline = False
1695 # generaldelta implied by version 2 revlogs.
1695 # generaldelta implied by version 2 revlogs.
1696 self.delta_config.general_delta = True
1696 self.delta_config.general_delta = True
1697 # the logic for persistent nodemap will be dealt with within the
1697 # the logic for persistent nodemap will be dealt with within the
1698 # main docket, so disable it for now.
1698 # main docket, so disable it for now.
1699 self._nodemap_file = None
1699 self._nodemap_file = None
1700
1700
1701 if self._docket is not None:
1701 if self._docket is not None:
1702 self._datafile = self._docket.data_filepath()
1702 self._datafile = self._docket.data_filepath()
1703 self._sidedatafile = self._docket.sidedata_filepath()
1703 self._sidedatafile = self._docket.sidedata_filepath()
1704 elif self.postfix is None:
1704 elif self.postfix is None:
1705 self._datafile = b'%s.d' % self.radix
1705 self._datafile = b'%s.d' % self.radix
1706 else:
1706 else:
1707 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1707 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1708
1708
1709 self.nodeconstants = sha1nodeconstants
1709 self.nodeconstants = sha1nodeconstants
1710 self.nullid = self.nodeconstants.nullid
1710 self.nullid = self.nodeconstants.nullid
1711
1711
1712 # sparse-revlog can't be on without general-delta (issue6056)
1712 # sparse-revlog can't be on without general-delta (issue6056)
1713 if not self.delta_config.general_delta:
1713 if not self.delta_config.general_delta:
1714 self.delta_config.sparse_revlog = False
1714 self.delta_config.sparse_revlog = False
1715
1715
1716 self._storedeltachains = True
1716 self._storedeltachains = True
1717
1717
1718 devel_nodemap = (
1718 devel_nodemap = (
1719 self._nodemap_file
1719 self._nodemap_file
1720 and force_nodemap
1720 and force_nodemap
1721 and parse_index_v1_nodemap is not None
1721 and parse_index_v1_nodemap is not None
1722 )
1722 )
1723
1723
1724 use_rust_index = False
1724 use_rust_index = False
1725 if rustrevlog is not None and self._nodemap_file is not None:
1725 if rustrevlog is not None and self._nodemap_file is not None:
1726 # we would like to use the rust_index in all case, especially
1726 # we would like to use the rust_index in all case, especially
1727 # because it is necessary for AncestorsIterator and LazyAncestors
1727 # because it is necessary for AncestorsIterator and LazyAncestors
1728 # since the 6.7 cycle.
1728 # since the 6.7 cycle.
1729 #
1729 #
1730 # However, the performance impact of inconditionnaly building the
1730 # However, the performance impact of inconditionnaly building the
1731 # nodemap is currently a problem for non-persistent nodemap
1731 # nodemap is currently a problem for non-persistent nodemap
1732 # repository.
1732 # repository.
1733 use_rust_index = True
1733 use_rust_index = True
1734
1734
1735 self._parse_index = parse_index_v1
1735 self._parse_index = parse_index_v1
1736 if self._format_version == REVLOGV0:
1736 if self._format_version == REVLOGV0:
1737 self._parse_index = revlogv0.parse_index_v0
1737 self._parse_index = revlogv0.parse_index_v0
1738 elif self._format_version == REVLOGV2:
1738 elif self._format_version == REVLOGV2:
1739 self._parse_index = parse_index_v2
1739 self._parse_index = parse_index_v2
1740 elif self._format_version == CHANGELOGV2:
1740 elif self._format_version == CHANGELOGV2:
1741 self._parse_index = parse_index_cl_v2
1741 self._parse_index = parse_index_cl_v2
1742 elif devel_nodemap:
1742 elif devel_nodemap:
1743 self._parse_index = parse_index_v1_nodemap
1743 self._parse_index = parse_index_v1_nodemap
1744 elif use_rust_index:
1744 elif use_rust_index:
1745 self._parse_index = functools.partial(
1745 self._parse_index = functools.partial(
1746 parse_index_v1_rust, default_header=new_header
1746 parse_index_v1_rust, default_header=new_header
1747 )
1747 )
1748 try:
1748 try:
1749 d = self._parse_index(index_data, self._inline)
1749 d = self._parse_index(index_data, self._inline)
1750 index, chunkcache = d
1750 index, chunkcache = d
1751 use_nodemap = (
1751 use_nodemap = (
1752 not self._inline
1752 not self._inline
1753 and self._nodemap_file is not None
1753 and self._nodemap_file is not None
1754 and hasattr(index, 'update_nodemap_data')
1754 and hasattr(index, 'update_nodemap_data')
1755 )
1755 )
1756 if use_nodemap:
1756 if use_nodemap:
1757 nodemap_data = nodemaputil.persisted_data(self)
1757 nodemap_data = nodemaputil.persisted_data(self)
1758 if nodemap_data is not None:
1758 if nodemap_data is not None:
1759 docket = nodemap_data[0]
1759 docket = nodemap_data[0]
1760 if (
1760 if (
1761 len(d[0]) > docket.tip_rev
1761 len(d[0]) > docket.tip_rev
1762 and d[0][docket.tip_rev][7] == docket.tip_node
1762 and d[0][docket.tip_rev][7] == docket.tip_node
1763 ):
1763 ):
1764 # no changelog tampering
1764 # no changelog tampering
1765 self._nodemap_docket = docket
1765 self._nodemap_docket = docket
1766 index.update_nodemap_data(*nodemap_data)
1766 index.update_nodemap_data(*nodemap_data)
1767 except (ValueError, IndexError):
1767 except (ValueError, IndexError):
1768 raise error.RevlogError(
1768 raise error.RevlogError(
1769 _(b"index %s is corrupted") % self.display_id
1769 _(b"index %s is corrupted") % self.display_id
1770 )
1770 )
1771 self.index = index
1771 self.index = index
1772 # revnum -> (chain-length, sum-delta-length)
1772 # revnum -> (chain-length, sum-delta-length)
1773 self._chaininfocache = util.lrucachedict(500)
1773 self._chaininfocache = util.lrucachedict(500)
1774
1774
1775 return chunkcache
1775 return chunkcache
1776
1776
1777 def _load_inner(self, chunk_cache):
1777 def _load_inner(self, chunk_cache):
1778 if self._docket is None:
1778 if self._docket is None:
1779 default_compression_header = None
1779 default_compression_header = None
1780 else:
1780 else:
1781 default_compression_header = self._docket.default_compression_header
1781 default_compression_header = self._docket.default_compression_header
1782
1782
1783 self._inner = _InnerRevlog(
1783 self._inner = _InnerRevlog(
1784 opener=self.opener,
1784 opener=self.opener,
1785 index=self.index,
1785 index=self.index,
1786 index_file=self._indexfile,
1786 index_file=self._indexfile,
1787 data_file=self._datafile,
1787 data_file=self._datafile,
1788 sidedata_file=self._sidedatafile,
1788 sidedata_file=self._sidedatafile,
1789 inline=self._inline,
1789 inline=self._inline,
1790 data_config=self.data_config,
1790 data_config=self.data_config,
1791 delta_config=self.delta_config,
1791 delta_config=self.delta_config,
1792 feature_config=self.feature_config,
1792 feature_config=self.feature_config,
1793 chunk_cache=chunk_cache,
1793 chunk_cache=chunk_cache,
1794 default_compression_header=default_compression_header,
1794 default_compression_header=default_compression_header,
1795 )
1795 )
1796
1796
1797 def get_revlog(self):
1797 def get_revlog(self):
1798 """simple function to mirror API of other not-really-revlog API"""
1798 """simple function to mirror API of other not-really-revlog API"""
1799 return self
1799 return self
1800
1800
1801 @util.propertycache
1801 @util.propertycache
1802 def revlog_kind(self):
1802 def revlog_kind(self):
1803 return self.target[0]
1803 return self.target[0]
1804
1804
1805 @util.propertycache
1805 @util.propertycache
1806 def display_id(self):
1806 def display_id(self):
1807 """The public facing "ID" of the revlog that we use in message"""
1807 """The public facing "ID" of the revlog that we use in message"""
1808 if self.revlog_kind == KIND_FILELOG:
1808 if self.revlog_kind == KIND_FILELOG:
1809 # Reference the file without the "data/" prefix, so it is familiar
1809 # Reference the file without the "data/" prefix, so it is familiar
1810 # to the user.
1810 # to the user.
1811 return self.target[1]
1811 return self.target[1]
1812 else:
1812 else:
1813 return self.radix
1813 return self.radix
1814
1814
1815 def _datafp(self, mode=b'r'):
1815 def _datafp(self, mode=b'r'):
1816 """file object for the revlog's data file"""
1816 """file object for the revlog's data file"""
1817 return self.opener(self._datafile, mode=mode)
1817 return self.opener(self._datafile, mode=mode)
1818
1818
1819 def tiprev(self):
1819 def tiprev(self):
1820 return len(self.index) - 1
1820 return len(self.index) - 1
1821
1821
1822 def tip(self):
1822 def tip(self):
1823 return self.node(self.tiprev())
1823 return self.node(self.tiprev())
1824
1824
1825 def __contains__(self, rev):
1825 def __contains__(self, rev):
1826 return 0 <= rev < len(self)
1826 return 0 <= rev < len(self)
1827
1827
1828 def __len__(self):
1828 def __len__(self):
1829 return len(self.index)
1829 return len(self.index)
1830
1830
1831 def __iter__(self) -> Iterator[int]:
1831 def __iter__(self) -> Iterator[int]:
1832 return iter(range(len(self)))
1832 return iter(range(len(self)))
1833
1833
1834 def revs(self, start=0, stop=None):
1834 def revs(self, start=0, stop=None):
1835 """iterate over all rev in this revlog (from start to stop)"""
1835 """iterate over all rev in this revlog (from start to stop)"""
1836 return storageutil.iterrevs(len(self), start=start, stop=stop)
1836 return storageutil.iterrevs(len(self), start=start, stop=stop)
1837
1837
1838 def hasnode(self, node):
1838 def hasnode(self, node):
1839 try:
1839 try:
1840 self.rev(node)
1840 self.rev(node)
1841 return True
1841 return True
1842 except KeyError:
1842 except KeyError:
1843 return False
1843 return False
1844
1844
1845 def _candelta(self, baserev, rev):
1845 def _candelta(self, baserev, rev):
1846 """whether two revisions (baserev, rev) can be delta-ed or not"""
1846 """whether two revisions (baserev, rev) can be delta-ed or not"""
1847 # Disable delta if either rev requires a content-changing flag
1847 # Disable delta if either rev requires a content-changing flag
1848 # processor (ex. LFS). This is because such flag processor can alter
1848 # processor (ex. LFS). This is because such flag processor can alter
1849 # the rawtext content that the delta will be based on, and two clients
1849 # the rawtext content that the delta will be based on, and two clients
1850 # could have a same revlog node with different flags (i.e. different
1850 # could have a same revlog node with different flags (i.e. different
1851 # rawtext contents) and the delta could be incompatible.
1851 # rawtext contents) and the delta could be incompatible.
1852 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1852 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1853 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1853 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1854 ):
1854 ):
1855 return False
1855 return False
1856 return True
1856 return True
1857
1857
1858 def update_caches(self, transaction):
1858 def update_caches(self, transaction):
1859 """update on disk cache
1859 """update on disk cache
1860
1860
1861 If a transaction is passed, the update may be delayed to transaction
1861 If a transaction is passed, the update may be delayed to transaction
1862 commit."""
1862 commit."""
1863 if self._nodemap_file is not None:
1863 if self._nodemap_file is not None:
1864 if transaction is None:
1864 if transaction is None:
1865 nodemaputil.update_persistent_nodemap(self)
1865 nodemaputil.update_persistent_nodemap(self)
1866 else:
1866 else:
1867 nodemaputil.setup_persistent_nodemap(transaction, self)
1867 nodemaputil.setup_persistent_nodemap(transaction, self)
1868
1868
1869 def clearcaches(self):
1869 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1870 """Clear in-memory caches"""
1870 """Clear in-memory caches"""
1871 self._chainbasecache.clear()
1871 self._chainbasecache.clear()
1872 self._inner.clear_cache()
1872 self._inner.clear_cache()
1873 self._pcache = {}
1873 self._pcache = {}
1874 self._nodemap_docket = None
1874 self._nodemap_docket = None
1875 self.index.clearcaches()
1875 self.index.clearcaches()
1876 # The python code is the one responsible for validating the docket, we
1876 # The python code is the one responsible for validating the docket, we
1877 # end up having to refresh it here.
1877 # end up having to refresh it here.
1878 use_nodemap = (
1878 use_nodemap = (
1879 not self._inline
1879 not self._inline
1880 and self._nodemap_file is not None
1880 and self._nodemap_file is not None
1881 and hasattr(self.index, 'update_nodemap_data')
1881 and hasattr(self.index, 'update_nodemap_data')
1882 )
1882 )
1883 if use_nodemap:
1883 if use_nodemap:
1884 nodemap_data = nodemaputil.persisted_data(self)
1884 nodemap_data = nodemaputil.persisted_data(self)
1885 if nodemap_data is not None:
1885 if nodemap_data is not None:
1886 self._nodemap_docket = nodemap_data[0]
1886 self._nodemap_docket = nodemap_data[0]
1887 self.index.update_nodemap_data(*nodemap_data)
1887 self.index.update_nodemap_data(*nodemap_data)
1888
1888
1889 def rev(self, node):
1889 def rev(self, node):
1890 """return the revision number associated with a <nodeid>"""
1890 """return the revision number associated with a <nodeid>"""
1891 try:
1891 try:
1892 return self.index.rev(node)
1892 return self.index.rev(node)
1893 except TypeError:
1893 except TypeError:
1894 raise
1894 raise
1895 except error.RevlogError:
1895 except error.RevlogError:
1896 # parsers.c radix tree lookup failed
1896 # parsers.c radix tree lookup failed
1897 if (
1897 if (
1898 node == self.nodeconstants.wdirid
1898 node == self.nodeconstants.wdirid
1899 or node in self.nodeconstants.wdirfilenodeids
1899 or node in self.nodeconstants.wdirfilenodeids
1900 ):
1900 ):
1901 raise error.WdirUnsupported
1901 raise error.WdirUnsupported
1902 raise error.LookupError(node, self.display_id, _(b'no node'))
1902 raise error.LookupError(node, self.display_id, _(b'no node'))
1903
1903
1904 # Accessors for index entries.
1904 # Accessors for index entries.
1905
1905
1906 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1906 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1907 # are flags.
1907 # are flags.
1908 def start(self, rev):
1908 def start(self, rev):
1909 return int(self.index[rev][0] >> 16)
1909 return int(self.index[rev][0] >> 16)
1910
1910
1911 def sidedata_cut_off(self, rev):
1911 def sidedata_cut_off(self, rev):
1912 sd_cut_off = self.index[rev][8]
1912 sd_cut_off = self.index[rev][8]
1913 if sd_cut_off != 0:
1913 if sd_cut_off != 0:
1914 return sd_cut_off
1914 return sd_cut_off
1915 # This is some annoying dance, because entries without sidedata
1915 # This is some annoying dance, because entries without sidedata
1916 # currently use 0 as their ofsset. (instead of previous-offset +
1916 # currently use 0 as their ofsset. (instead of previous-offset +
1917 # previous-size)
1917 # previous-size)
1918 #
1918 #
1919 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1919 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1920 # In the meantime, we need this.
1920 # In the meantime, we need this.
1921 while 0 <= rev:
1921 while 0 <= rev:
1922 e = self.index[rev]
1922 e = self.index[rev]
1923 if e[9] != 0:
1923 if e[9] != 0:
1924 return e[8] + e[9]
1924 return e[8] + e[9]
1925 rev -= 1
1925 rev -= 1
1926 return 0
1926 return 0
1927
1927
1928 def flags(self, rev):
1928 def flags(self, rev):
1929 return self.index[rev][0] & 0xFFFF
1929 return self.index[rev][0] & 0xFFFF
1930
1930
1931 def length(self, rev):
1931 def length(self, rev):
1932 return self.index[rev][1]
1932 return self.index[rev][1]
1933
1933
1934 def sidedata_length(self, rev):
1934 def sidedata_length(self, rev):
1935 if not self.feature_config.has_side_data:
1935 if not self.feature_config.has_side_data:
1936 return 0
1936 return 0
1937 return self.index[rev][9]
1937 return self.index[rev][9]
1938
1938
1939 def rawsize(self, rev):
1939 def rawsize(self, rev):
1940 """return the length of the uncompressed text for a given revision"""
1940 """return the length of the uncompressed text for a given revision"""
1941 l = self.index[rev][2]
1941 l = self.index[rev][2]
1942 if l >= 0:
1942 if l >= 0:
1943 return l
1943 return l
1944
1944
1945 t = self.rawdata(rev)
1945 t = self.rawdata(rev)
1946 return len(t)
1946 return len(t)
1947
1947
1948 def size(self, rev):
1948 def size(self, rev):
1949 """length of non-raw text (processed by a "read" flag processor)"""
1949 """length of non-raw text (processed by a "read" flag processor)"""
1950 # fast path: if no "read" flag processor could change the content,
1950 # fast path: if no "read" flag processor could change the content,
1951 # size is rawsize. note: ELLIPSIS is known to not change the content.
1951 # size is rawsize. note: ELLIPSIS is known to not change the content.
1952 flags = self.flags(rev)
1952 flags = self.flags(rev)
1953 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1953 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1954 return self.rawsize(rev)
1954 return self.rawsize(rev)
1955
1955
1956 return len(self.revision(rev))
1956 return len(self.revision(rev))
1957
1957
1958 def fast_rank(self, rev):
1958 def fast_rank(self, rev):
1959 """Return the rank of a revision if already known, or None otherwise.
1959 """Return the rank of a revision if already known, or None otherwise.
1960
1960
1961 The rank of a revision is the size of the sub-graph it defines as a
1961 The rank of a revision is the size of the sub-graph it defines as a
1962 head. Equivalently, the rank of a revision `r` is the size of the set
1962 head. Equivalently, the rank of a revision `r` is the size of the set
1963 `ancestors(r)`, `r` included.
1963 `ancestors(r)`, `r` included.
1964
1964
1965 This method returns the rank retrieved from the revlog in constant
1965 This method returns the rank retrieved from the revlog in constant
1966 time. It makes no attempt at computing unknown values for versions of
1966 time. It makes no attempt at computing unknown values for versions of
1967 the revlog which do not persist the rank.
1967 the revlog which do not persist the rank.
1968 """
1968 """
1969 rank = self.index[rev][ENTRY_RANK]
1969 rank = self.index[rev][ENTRY_RANK]
1970 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1970 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1971 return None
1971 return None
1972 if rev == nullrev:
1972 if rev == nullrev:
1973 return 0 # convention
1973 return 0 # convention
1974 return rank
1974 return rank
1975
1975
1976 def chainbase(self, rev):
1976 def chainbase(self, rev):
1977 base = self._chainbasecache.get(rev)
1977 base = self._chainbasecache.get(rev)
1978 if base is not None:
1978 if base is not None:
1979 return base
1979 return base
1980
1980
1981 index = self.index
1981 index = self.index
1982 iterrev = rev
1982 iterrev = rev
1983 base = index[iterrev][3]
1983 base = index[iterrev][3]
1984 while base != iterrev:
1984 while base != iterrev:
1985 iterrev = base
1985 iterrev = base
1986 base = index[iterrev][3]
1986 base = index[iterrev][3]
1987
1987
1988 self._chainbasecache[rev] = base
1988 self._chainbasecache[rev] = base
1989 return base
1989 return base
1990
1990
1991 def linkrev(self, rev):
1991 def linkrev(self, rev):
1992 return self.index[rev][4]
1992 return self.index[rev][4]
1993
1993
1994 def parentrevs(self, rev):
1994 def parentrevs(self, rev):
1995 try:
1995 try:
1996 entry = self.index[rev]
1996 entry = self.index[rev]
1997 except IndexError:
1997 except IndexError:
1998 if rev == wdirrev:
1998 if rev == wdirrev:
1999 raise error.WdirUnsupported
1999 raise error.WdirUnsupported
2000 raise
2000 raise
2001
2001
2002 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2002 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2003 return entry[6], entry[5]
2003 return entry[6], entry[5]
2004 else:
2004 else:
2005 return entry[5], entry[6]
2005 return entry[5], entry[6]
2006
2006
2007 # fast parentrevs(rev) where rev isn't filtered
2007 # fast parentrevs(rev) where rev isn't filtered
2008 _uncheckedparentrevs = parentrevs
2008 _uncheckedparentrevs = parentrevs
2009
2009
2010 def node(self, rev):
2010 def node(self, rev):
2011 try:
2011 try:
2012 return self.index[rev][7]
2012 return self.index[rev][7]
2013 except IndexError:
2013 except IndexError:
2014 if rev == wdirrev:
2014 if rev == wdirrev:
2015 raise error.WdirUnsupported
2015 raise error.WdirUnsupported
2016 raise
2016 raise
2017
2017
2018 # Derived from index values.
2018 # Derived from index values.
2019
2019
2020 def end(self, rev):
2020 def end(self, rev):
2021 return self.start(rev) + self.length(rev)
2021 return self.start(rev) + self.length(rev)
2022
2022
2023 def parents(self, node):
2023 def parents(self, node):
2024 i = self.index
2024 i = self.index
2025 d = i[self.rev(node)]
2025 d = i[self.rev(node)]
2026 # inline node() to avoid function call overhead
2026 # inline node() to avoid function call overhead
2027 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2027 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2028 return i[d[6]][7], i[d[5]][7]
2028 return i[d[6]][7], i[d[5]][7]
2029 else:
2029 else:
2030 return i[d[5]][7], i[d[6]][7]
2030 return i[d[5]][7], i[d[6]][7]
2031
2031
2032 def chainlen(self, rev):
2032 def chainlen(self, rev):
2033 return self._chaininfo(rev)[0]
2033 return self._chaininfo(rev)[0]
2034
2034
2035 def _chaininfo(self, rev):
2035 def _chaininfo(self, rev):
2036 chaininfocache = self._chaininfocache
2036 chaininfocache = self._chaininfocache
2037 if rev in chaininfocache:
2037 if rev in chaininfocache:
2038 return chaininfocache[rev]
2038 return chaininfocache[rev]
2039 index = self.index
2039 index = self.index
2040 generaldelta = self.delta_config.general_delta
2040 generaldelta = self.delta_config.general_delta
2041 iterrev = rev
2041 iterrev = rev
2042 e = index[iterrev]
2042 e = index[iterrev]
2043 clen = 0
2043 clen = 0
2044 compresseddeltalen = 0
2044 compresseddeltalen = 0
2045 while iterrev != e[3]:
2045 while iterrev != e[3]:
2046 clen += 1
2046 clen += 1
2047 compresseddeltalen += e[1]
2047 compresseddeltalen += e[1]
2048 if generaldelta:
2048 if generaldelta:
2049 iterrev = e[3]
2049 iterrev = e[3]
2050 else:
2050 else:
2051 iterrev -= 1
2051 iterrev -= 1
2052 if iterrev in chaininfocache:
2052 if iterrev in chaininfocache:
2053 t = chaininfocache[iterrev]
2053 t = chaininfocache[iterrev]
2054 clen += t[0]
2054 clen += t[0]
2055 compresseddeltalen += t[1]
2055 compresseddeltalen += t[1]
2056 break
2056 break
2057 e = index[iterrev]
2057 e = index[iterrev]
2058 else:
2058 else:
2059 # Add text length of base since decompressing that also takes
2059 # Add text length of base since decompressing that also takes
2060 # work. For cache hits the length is already included.
2060 # work. For cache hits the length is already included.
2061 compresseddeltalen += e[1]
2061 compresseddeltalen += e[1]
2062 r = (clen, compresseddeltalen)
2062 r = (clen, compresseddeltalen)
2063 chaininfocache[rev] = r
2063 chaininfocache[rev] = r
2064 return r
2064 return r
2065
2065
2066 def _deltachain(self, rev, stoprev=None):
2066 def _deltachain(self, rev, stoprev=None):
2067 return self._inner._deltachain(rev, stoprev=stoprev)
2067 return self._inner._deltachain(rev, stoprev=stoprev)
2068
2068
2069 def ancestors(self, revs, stoprev=0, inclusive=False):
2069 def ancestors(self, revs, stoprev=0, inclusive=False):
2070 """Generate the ancestors of 'revs' in reverse revision order.
2070 """Generate the ancestors of 'revs' in reverse revision order.
2071 Does not generate revs lower than stoprev.
2071 Does not generate revs lower than stoprev.
2072
2072
2073 See the documentation for ancestor.lazyancestors for more details."""
2073 See the documentation for ancestor.lazyancestors for more details."""
2074
2074
2075 # first, make sure start revisions aren't filtered
2075 # first, make sure start revisions aren't filtered
2076 revs = list(revs)
2076 revs = list(revs)
2077 checkrev = self.node
2077 checkrev = self.node
2078 for r in revs:
2078 for r in revs:
2079 checkrev(r)
2079 checkrev(r)
2080 # and we're sure ancestors aren't filtered as well
2080 # and we're sure ancestors aren't filtered as well
2081
2081
2082 if rustancestor is not None and self.index.rust_ext_compat:
2082 if rustancestor is not None and self.index.rust_ext_compat:
2083 lazyancestors = rustancestor.LazyAncestors
2083 lazyancestors = rustancestor.LazyAncestors
2084 arg = self.index
2084 arg = self.index
2085 else:
2085 else:
2086 lazyancestors = ancestor.lazyancestors
2086 lazyancestors = ancestor.lazyancestors
2087 arg = self._uncheckedparentrevs
2087 arg = self._uncheckedparentrevs
2088 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2088 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2089
2089
2090 def descendants(self, revs):
2090 def descendants(self, revs):
2091 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2091 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2092
2092
2093 def findcommonmissing(self, common=None, heads=None):
2093 def findcommonmissing(self, common=None, heads=None):
2094 """Return a tuple of the ancestors of common and the ancestors of heads
2094 """Return a tuple of the ancestors of common and the ancestors of heads
2095 that are not ancestors of common. In revset terminology, we return the
2095 that are not ancestors of common. In revset terminology, we return the
2096 tuple:
2096 tuple:
2097
2097
2098 ::common, (::heads) - (::common)
2098 ::common, (::heads) - (::common)
2099
2099
2100 The list is sorted by revision number, meaning it is
2100 The list is sorted by revision number, meaning it is
2101 topologically sorted.
2101 topologically sorted.
2102
2102
2103 'heads' and 'common' are both lists of node IDs. If heads is
2103 'heads' and 'common' are both lists of node IDs. If heads is
2104 not supplied, uses all of the revlog's heads. If common is not
2104 not supplied, uses all of the revlog's heads. If common is not
2105 supplied, uses nullid."""
2105 supplied, uses nullid."""
2106 if common is None:
2106 if common is None:
2107 common = [self.nullid]
2107 common = [self.nullid]
2108 if heads is None:
2108 if heads is None:
2109 heads = self.heads()
2109 heads = self.heads()
2110
2110
2111 common = [self.rev(n) for n in common]
2111 common = [self.rev(n) for n in common]
2112 heads = [self.rev(n) for n in heads]
2112 heads = [self.rev(n) for n in heads]
2113
2113
2114 # we want the ancestors, but inclusive
2114 # we want the ancestors, but inclusive
2115 class lazyset:
2115 class lazyset:
2116 def __init__(self, lazyvalues):
2116 def __init__(self, lazyvalues):
2117 self.addedvalues = set()
2117 self.addedvalues = set()
2118 self.lazyvalues = lazyvalues
2118 self.lazyvalues = lazyvalues
2119
2119
2120 def __contains__(self, value):
2120 def __contains__(self, value):
2121 return value in self.addedvalues or value in self.lazyvalues
2121 return value in self.addedvalues or value in self.lazyvalues
2122
2122
2123 def __iter__(self):
2123 def __iter__(self):
2124 added = self.addedvalues
2124 added = self.addedvalues
2125 for r in added:
2125 for r in added:
2126 yield r
2126 yield r
2127 for r in self.lazyvalues:
2127 for r in self.lazyvalues:
2128 if not r in added:
2128 if not r in added:
2129 yield r
2129 yield r
2130
2130
2131 def add(self, value):
2131 def add(self, value):
2132 self.addedvalues.add(value)
2132 self.addedvalues.add(value)
2133
2133
2134 def update(self, values):
2134 def update(self, values):
2135 self.addedvalues.update(values)
2135 self.addedvalues.update(values)
2136
2136
2137 has = lazyset(self.ancestors(common))
2137 has = lazyset(self.ancestors(common))
2138 has.add(nullrev)
2138 has.add(nullrev)
2139 has.update(common)
2139 has.update(common)
2140
2140
2141 # take all ancestors from heads that aren't in has
2141 # take all ancestors from heads that aren't in has
2142 missing = set()
2142 missing = set()
2143 visit = collections.deque(r for r in heads if r not in has)
2143 visit = collections.deque(r for r in heads if r not in has)
2144 while visit:
2144 while visit:
2145 r = visit.popleft()
2145 r = visit.popleft()
2146 if r in missing:
2146 if r in missing:
2147 continue
2147 continue
2148 else:
2148 else:
2149 missing.add(r)
2149 missing.add(r)
2150 for p in self.parentrevs(r):
2150 for p in self.parentrevs(r):
2151 if p not in has:
2151 if p not in has:
2152 visit.append(p)
2152 visit.append(p)
2153 missing = list(missing)
2153 missing = list(missing)
2154 missing.sort()
2154 missing.sort()
2155 return has, [self.node(miss) for miss in missing]
2155 return has, [self.node(miss) for miss in missing]
2156
2156
2157 def incrementalmissingrevs(self, common=None):
2157 def incrementalmissingrevs(self, common=None):
2158 """Return an object that can be used to incrementally compute the
2158 """Return an object that can be used to incrementally compute the
2159 revision numbers of the ancestors of arbitrary sets that are not
2159 revision numbers of the ancestors of arbitrary sets that are not
2160 ancestors of common. This is an ancestor.incrementalmissingancestors
2160 ancestors of common. This is an ancestor.incrementalmissingancestors
2161 object.
2161 object.
2162
2162
2163 'common' is a list of revision numbers. If common is not supplied, uses
2163 'common' is a list of revision numbers. If common is not supplied, uses
2164 nullrev.
2164 nullrev.
2165 """
2165 """
2166 if common is None:
2166 if common is None:
2167 common = [nullrev]
2167 common = [nullrev]
2168
2168
2169 if rustancestor is not None and self.index.rust_ext_compat:
2169 if rustancestor is not None and self.index.rust_ext_compat:
2170 return rustancestor.MissingAncestors(self.index, common)
2170 return rustancestor.MissingAncestors(self.index, common)
2171 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2171 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2172
2172
2173 def findmissingrevs(self, common=None, heads=None):
2173 def findmissingrevs(self, common=None, heads=None):
2174 """Return the revision numbers of the ancestors of heads that
2174 """Return the revision numbers of the ancestors of heads that
2175 are not ancestors of common.
2175 are not ancestors of common.
2176
2176
2177 More specifically, return a list of revision numbers corresponding to
2177 More specifically, return a list of revision numbers corresponding to
2178 nodes N such that every N satisfies the following constraints:
2178 nodes N such that every N satisfies the following constraints:
2179
2179
2180 1. N is an ancestor of some node in 'heads'
2180 1. N is an ancestor of some node in 'heads'
2181 2. N is not an ancestor of any node in 'common'
2181 2. N is not an ancestor of any node in 'common'
2182
2182
2183 The list is sorted by revision number, meaning it is
2183 The list is sorted by revision number, meaning it is
2184 topologically sorted.
2184 topologically sorted.
2185
2185
2186 'heads' and 'common' are both lists of revision numbers. If heads is
2186 'heads' and 'common' are both lists of revision numbers. If heads is
2187 not supplied, uses all of the revlog's heads. If common is not
2187 not supplied, uses all of the revlog's heads. If common is not
2188 supplied, uses nullid."""
2188 supplied, uses nullid."""
2189 if common is None:
2189 if common is None:
2190 common = [nullrev]
2190 common = [nullrev]
2191 if heads is None:
2191 if heads is None:
2192 heads = self.headrevs()
2192 heads = self.headrevs()
2193
2193
2194 inc = self.incrementalmissingrevs(common=common)
2194 inc = self.incrementalmissingrevs(common=common)
2195 return inc.missingancestors(heads)
2195 return inc.missingancestors(heads)
2196
2196
2197 def findmissing(self, common=None, heads=None):
2197 def findmissing(self, common=None, heads=None):
2198 """Return the ancestors of heads that are not ancestors of common.
2198 """Return the ancestors of heads that are not ancestors of common.
2199
2199
2200 More specifically, return a list of nodes N such that every N
2200 More specifically, return a list of nodes N such that every N
2201 satisfies the following constraints:
2201 satisfies the following constraints:
2202
2202
2203 1. N is an ancestor of some node in 'heads'
2203 1. N is an ancestor of some node in 'heads'
2204 2. N is not an ancestor of any node in 'common'
2204 2. N is not an ancestor of any node in 'common'
2205
2205
2206 The list is sorted by revision number, meaning it is
2206 The list is sorted by revision number, meaning it is
2207 topologically sorted.
2207 topologically sorted.
2208
2208
2209 'heads' and 'common' are both lists of node IDs. If heads is
2209 'heads' and 'common' are both lists of node IDs. If heads is
2210 not supplied, uses all of the revlog's heads. If common is not
2210 not supplied, uses all of the revlog's heads. If common is not
2211 supplied, uses nullid."""
2211 supplied, uses nullid."""
2212 if common is None:
2212 if common is None:
2213 common = [self.nullid]
2213 common = [self.nullid]
2214 if heads is None:
2214 if heads is None:
2215 heads = self.heads()
2215 heads = self.heads()
2216
2216
2217 common = [self.rev(n) for n in common]
2217 common = [self.rev(n) for n in common]
2218 heads = [self.rev(n) for n in heads]
2218 heads = [self.rev(n) for n in heads]
2219
2219
2220 inc = self.incrementalmissingrevs(common=common)
2220 inc = self.incrementalmissingrevs(common=common)
2221 return [self.node(r) for r in inc.missingancestors(heads)]
2221 return [self.node(r) for r in inc.missingancestors(heads)]
2222
2222
2223 def nodesbetween(self, roots=None, heads=None):
2223 def nodesbetween(self, roots=None, heads=None):
2224 """Return a topological path from 'roots' to 'heads'.
2224 """Return a topological path from 'roots' to 'heads'.
2225
2225
2226 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2226 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2227 topologically sorted list of all nodes N that satisfy both of
2227 topologically sorted list of all nodes N that satisfy both of
2228 these constraints:
2228 these constraints:
2229
2229
2230 1. N is a descendant of some node in 'roots'
2230 1. N is a descendant of some node in 'roots'
2231 2. N is an ancestor of some node in 'heads'
2231 2. N is an ancestor of some node in 'heads'
2232
2232
2233 Every node is considered to be both a descendant and an ancestor
2233 Every node is considered to be both a descendant and an ancestor
2234 of itself, so every reachable node in 'roots' and 'heads' will be
2234 of itself, so every reachable node in 'roots' and 'heads' will be
2235 included in 'nodes'.
2235 included in 'nodes'.
2236
2236
2237 'outroots' is the list of reachable nodes in 'roots', i.e., the
2237 'outroots' is the list of reachable nodes in 'roots', i.e., the
2238 subset of 'roots' that is returned in 'nodes'. Likewise,
2238 subset of 'roots' that is returned in 'nodes'. Likewise,
2239 'outheads' is the subset of 'heads' that is also in 'nodes'.
2239 'outheads' is the subset of 'heads' that is also in 'nodes'.
2240
2240
2241 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2241 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2242 unspecified, uses nullid as the only root. If 'heads' is
2242 unspecified, uses nullid as the only root. If 'heads' is
2243 unspecified, uses list of all of the revlog's heads."""
2243 unspecified, uses list of all of the revlog's heads."""
2244 nonodes = ([], [], [])
2244 nonodes = ([], [], [])
2245 if roots is not None:
2245 if roots is not None:
2246 roots = list(roots)
2246 roots = list(roots)
2247 if not roots:
2247 if not roots:
2248 return nonodes
2248 return nonodes
2249 lowestrev = min([self.rev(n) for n in roots])
2249 lowestrev = min([self.rev(n) for n in roots])
2250 else:
2250 else:
2251 roots = [self.nullid] # Everybody's a descendant of nullid
2251 roots = [self.nullid] # Everybody's a descendant of nullid
2252 lowestrev = nullrev
2252 lowestrev = nullrev
2253 if (lowestrev == nullrev) and (heads is None):
2253 if (lowestrev == nullrev) and (heads is None):
2254 # We want _all_ the nodes!
2254 # We want _all_ the nodes!
2255 return (
2255 return (
2256 [self.node(r) for r in self],
2256 [self.node(r) for r in self],
2257 [self.nullid],
2257 [self.nullid],
2258 list(self.heads()),
2258 list(self.heads()),
2259 )
2259 )
2260 if heads is None:
2260 if heads is None:
2261 # All nodes are ancestors, so the latest ancestor is the last
2261 # All nodes are ancestors, so the latest ancestor is the last
2262 # node.
2262 # node.
2263 highestrev = len(self) - 1
2263 highestrev = len(self) - 1
2264 # Set ancestors to None to signal that every node is an ancestor.
2264 # Set ancestors to None to signal that every node is an ancestor.
2265 ancestors = None
2265 ancestors = None
2266 # Set heads to an empty dictionary for later discovery of heads
2266 # Set heads to an empty dictionary for later discovery of heads
2267 heads = {}
2267 heads = {}
2268 else:
2268 else:
2269 heads = list(heads)
2269 heads = list(heads)
2270 if not heads:
2270 if not heads:
2271 return nonodes
2271 return nonodes
2272 ancestors = set()
2272 ancestors = set()
2273 # Turn heads into a dictionary so we can remove 'fake' heads.
2273 # Turn heads into a dictionary so we can remove 'fake' heads.
2274 # Also, later we will be using it to filter out the heads we can't
2274 # Also, later we will be using it to filter out the heads we can't
2275 # find from roots.
2275 # find from roots.
2276 heads = dict.fromkeys(heads, False)
2276 heads = dict.fromkeys(heads, False)
2277 # Start at the top and keep marking parents until we're done.
2277 # Start at the top and keep marking parents until we're done.
2278 nodestotag = set(heads)
2278 nodestotag = set(heads)
2279 # Remember where the top was so we can use it as a limit later.
2279 # Remember where the top was so we can use it as a limit later.
2280 highestrev = max([self.rev(n) for n in nodestotag])
2280 highestrev = max([self.rev(n) for n in nodestotag])
2281 while nodestotag:
2281 while nodestotag:
2282 # grab a node to tag
2282 # grab a node to tag
2283 n = nodestotag.pop()
2283 n = nodestotag.pop()
2284 # Never tag nullid
2284 # Never tag nullid
2285 if n == self.nullid:
2285 if n == self.nullid:
2286 continue
2286 continue
2287 # A node's revision number represents its place in a
2287 # A node's revision number represents its place in a
2288 # topologically sorted list of nodes.
2288 # topologically sorted list of nodes.
2289 r = self.rev(n)
2289 r = self.rev(n)
2290 if r >= lowestrev:
2290 if r >= lowestrev:
2291 if n not in ancestors:
2291 if n not in ancestors:
2292 # If we are possibly a descendant of one of the roots
2292 # If we are possibly a descendant of one of the roots
2293 # and we haven't already been marked as an ancestor
2293 # and we haven't already been marked as an ancestor
2294 ancestors.add(n) # Mark as ancestor
2294 ancestors.add(n) # Mark as ancestor
2295 # Add non-nullid parents to list of nodes to tag.
2295 # Add non-nullid parents to list of nodes to tag.
2296 nodestotag.update(
2296 nodestotag.update(
2297 [p for p in self.parents(n) if p != self.nullid]
2297 [p for p in self.parents(n) if p != self.nullid]
2298 )
2298 )
2299 elif n in heads: # We've seen it before, is it a fake head?
2299 elif n in heads: # We've seen it before, is it a fake head?
2300 # So it is, real heads should not be the ancestors of
2300 # So it is, real heads should not be the ancestors of
2301 # any other heads.
2301 # any other heads.
2302 heads.pop(n)
2302 heads.pop(n)
2303 if not ancestors:
2303 if not ancestors:
2304 return nonodes
2304 return nonodes
2305 # Now that we have our set of ancestors, we want to remove any
2305 # Now that we have our set of ancestors, we want to remove any
2306 # roots that are not ancestors.
2306 # roots that are not ancestors.
2307
2307
2308 # If one of the roots was nullid, everything is included anyway.
2308 # If one of the roots was nullid, everything is included anyway.
2309 if lowestrev > nullrev:
2309 if lowestrev > nullrev:
2310 # But, since we weren't, let's recompute the lowest rev to not
2310 # But, since we weren't, let's recompute the lowest rev to not
2311 # include roots that aren't ancestors.
2311 # include roots that aren't ancestors.
2312
2312
2313 # Filter out roots that aren't ancestors of heads
2313 # Filter out roots that aren't ancestors of heads
2314 roots = [root for root in roots if root in ancestors]
2314 roots = [root for root in roots if root in ancestors]
2315 # Recompute the lowest revision
2315 # Recompute the lowest revision
2316 if roots:
2316 if roots:
2317 lowestrev = min([self.rev(root) for root in roots])
2317 lowestrev = min([self.rev(root) for root in roots])
2318 else:
2318 else:
2319 # No more roots? Return empty list
2319 # No more roots? Return empty list
2320 return nonodes
2320 return nonodes
2321 else:
2321 else:
2322 # We are descending from nullid, and don't need to care about
2322 # We are descending from nullid, and don't need to care about
2323 # any other roots.
2323 # any other roots.
2324 lowestrev = nullrev
2324 lowestrev = nullrev
2325 roots = [self.nullid]
2325 roots = [self.nullid]
2326 # Transform our roots list into a set.
2326 # Transform our roots list into a set.
2327 descendants = set(roots)
2327 descendants = set(roots)
2328 # Also, keep the original roots so we can filter out roots that aren't
2328 # Also, keep the original roots so we can filter out roots that aren't
2329 # 'real' roots (i.e. are descended from other roots).
2329 # 'real' roots (i.e. are descended from other roots).
2330 roots = descendants.copy()
2330 roots = descendants.copy()
2331 # Our topologically sorted list of output nodes.
2331 # Our topologically sorted list of output nodes.
2332 orderedout = []
2332 orderedout = []
2333 # Don't start at nullid since we don't want nullid in our output list,
2333 # Don't start at nullid since we don't want nullid in our output list,
2334 # and if nullid shows up in descendants, empty parents will look like
2334 # and if nullid shows up in descendants, empty parents will look like
2335 # they're descendants.
2335 # they're descendants.
2336 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2336 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2337 n = self.node(r)
2337 n = self.node(r)
2338 isdescendant = False
2338 isdescendant = False
2339 if lowestrev == nullrev: # Everybody is a descendant of nullid
2339 if lowestrev == nullrev: # Everybody is a descendant of nullid
2340 isdescendant = True
2340 isdescendant = True
2341 elif n in descendants:
2341 elif n in descendants:
2342 # n is already a descendant
2342 # n is already a descendant
2343 isdescendant = True
2343 isdescendant = True
2344 # This check only needs to be done here because all the roots
2344 # This check only needs to be done here because all the roots
2345 # will start being marked is descendants before the loop.
2345 # will start being marked is descendants before the loop.
2346 if n in roots:
2346 if n in roots:
2347 # If n was a root, check if it's a 'real' root.
2347 # If n was a root, check if it's a 'real' root.
2348 p = tuple(self.parents(n))
2348 p = tuple(self.parents(n))
2349 # If any of its parents are descendants, it's not a root.
2349 # If any of its parents are descendants, it's not a root.
2350 if (p[0] in descendants) or (p[1] in descendants):
2350 if (p[0] in descendants) or (p[1] in descendants):
2351 roots.remove(n)
2351 roots.remove(n)
2352 else:
2352 else:
2353 p = tuple(self.parents(n))
2353 p = tuple(self.parents(n))
2354 # A node is a descendant if either of its parents are
2354 # A node is a descendant if either of its parents are
2355 # descendants. (We seeded the dependents list with the roots
2355 # descendants. (We seeded the dependents list with the roots
2356 # up there, remember?)
2356 # up there, remember?)
2357 if (p[0] in descendants) or (p[1] in descendants):
2357 if (p[0] in descendants) or (p[1] in descendants):
2358 descendants.add(n)
2358 descendants.add(n)
2359 isdescendant = True
2359 isdescendant = True
2360 if isdescendant and ((ancestors is None) or (n in ancestors)):
2360 if isdescendant and ((ancestors is None) or (n in ancestors)):
2361 # Only include nodes that are both descendants and ancestors.
2361 # Only include nodes that are both descendants and ancestors.
2362 orderedout.append(n)
2362 orderedout.append(n)
2363 if (ancestors is not None) and (n in heads):
2363 if (ancestors is not None) and (n in heads):
2364 # We're trying to figure out which heads are reachable
2364 # We're trying to figure out which heads are reachable
2365 # from roots.
2365 # from roots.
2366 # Mark this head as having been reached
2366 # Mark this head as having been reached
2367 heads[n] = True
2367 heads[n] = True
2368 elif ancestors is None:
2368 elif ancestors is None:
2369 # Otherwise, we're trying to discover the heads.
2369 # Otherwise, we're trying to discover the heads.
2370 # Assume this is a head because if it isn't, the next step
2370 # Assume this is a head because if it isn't, the next step
2371 # will eventually remove it.
2371 # will eventually remove it.
2372 heads[n] = True
2372 heads[n] = True
2373 # But, obviously its parents aren't.
2373 # But, obviously its parents aren't.
2374 for p in self.parents(n):
2374 for p in self.parents(n):
2375 heads.pop(p, None)
2375 heads.pop(p, None)
2376 heads = [head for head, flag in heads.items() if flag]
2376 heads = [head for head, flag in heads.items() if flag]
2377 roots = list(roots)
2377 roots = list(roots)
2378 assert orderedout
2378 assert orderedout
2379 assert roots
2379 assert roots
2380 assert heads
2380 assert heads
2381 return (orderedout, roots, heads)
2381 return (orderedout, roots, heads)
2382
2382
2383 def headrevs(self, revs=None):
2383 def headrevs(self, revs=None):
2384 if revs is None:
2384 if revs is None:
2385 try:
2385 try:
2386 return self.index.headrevs()
2386 return self.index.headrevs()
2387 except AttributeError:
2387 except AttributeError:
2388 return self._headrevs()
2388 return self._headrevs()
2389 if rustdagop is not None and self.index.rust_ext_compat:
2389 if rustdagop is not None and self.index.rust_ext_compat:
2390 return rustdagop.headrevs(self.index, revs)
2390 return rustdagop.headrevs(self.index, revs)
2391 return dagop.headrevs(revs, self._uncheckedparentrevs)
2391 return dagop.headrevs(revs, self._uncheckedparentrevs)
2392
2392
2393 def headrevsdiff(self, start, stop):
2393 def headrevsdiff(self, start, stop):
2394 try:
2394 try:
2395 return self.index.headrevsdiff(start, stop)
2395 return self.index.headrevsdiff(start, stop)
2396 except AttributeError:
2396 except AttributeError:
2397 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2397 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2398
2398
2399 def computephases(self, roots):
2399 def computephases(self, roots):
2400 return self.index.computephasesmapsets(roots)
2400 return self.index.computephasesmapsets(roots)
2401
2401
2402 def _headrevs(self):
2402 def _headrevs(self):
2403 count = len(self)
2403 count = len(self)
2404 if not count:
2404 if not count:
2405 return [nullrev]
2405 return [nullrev]
2406 # we won't iter over filtered rev so nobody is a head at start
2406 # we won't iter over filtered rev so nobody is a head at start
2407 ishead = [0] * (count + 1)
2407 ishead = [0] * (count + 1)
2408 index = self.index
2408 index = self.index
2409 for r in self:
2409 for r in self:
2410 ishead[r] = 1 # I may be an head
2410 ishead[r] = 1 # I may be an head
2411 e = index[r]
2411 e = index[r]
2412 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2412 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2413 return [r for r, val in enumerate(ishead) if val]
2413 return [r for r, val in enumerate(ishead) if val]
2414
2414
2415 def _head_node_ids(self):
2415 def _head_node_ids(self):
2416 try:
2416 try:
2417 return self.index.head_node_ids()
2417 return self.index.head_node_ids()
2418 except AttributeError:
2418 except AttributeError:
2419 return [self.node(r) for r in self.headrevs()]
2419 return [self.node(r) for r in self.headrevs()]
2420
2420
2421 def heads(self, start=None, stop=None):
2421 def heads(self, start=None, stop=None):
2422 """return the list of all nodes that have no children
2422 """return the list of all nodes that have no children
2423
2423
2424 if start is specified, only heads that are descendants of
2424 if start is specified, only heads that are descendants of
2425 start will be returned
2425 start will be returned
2426 if stop is specified, it will consider all the revs from stop
2426 if stop is specified, it will consider all the revs from stop
2427 as if they had no children
2427 as if they had no children
2428 """
2428 """
2429 if start is None and stop is None:
2429 if start is None and stop is None:
2430 if not len(self):
2430 if not len(self):
2431 return [self.nullid]
2431 return [self.nullid]
2432 return self._head_node_ids()
2432 return self._head_node_ids()
2433 if start is None:
2433 if start is None:
2434 start = nullrev
2434 start = nullrev
2435 else:
2435 else:
2436 start = self.rev(start)
2436 start = self.rev(start)
2437
2437
2438 stoprevs = {self.rev(n) for n in stop or []}
2438 stoprevs = {self.rev(n) for n in stop or []}
2439
2439
2440 revs = dagop.headrevssubset(
2440 revs = dagop.headrevssubset(
2441 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2441 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2442 )
2442 )
2443
2443
2444 return [self.node(rev) for rev in revs]
2444 return [self.node(rev) for rev in revs]
2445
2445
2446 def diffheads(self, start, stop):
2446 def diffheads(self, start, stop):
2447 """return the nodes that make up the difference between
2447 """return the nodes that make up the difference between
2448 heads of revs before `start` and heads of revs before `stop`"""
2448 heads of revs before `start` and heads of revs before `stop`"""
2449 removed, added = self.headrevsdiff(start, stop)
2449 removed, added = self.headrevsdiff(start, stop)
2450 return [self.node(r) for r in removed], [self.node(r) for r in added]
2450 return [self.node(r) for r in removed], [self.node(r) for r in added]
2451
2451
2452 def children(self, node):
2452 def children(self, node):
2453 """find the children of a given node"""
2453 """find the children of a given node"""
2454 c = []
2454 c = []
2455 p = self.rev(node)
2455 p = self.rev(node)
2456 for r in self.revs(start=p + 1):
2456 for r in self.revs(start=p + 1):
2457 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2457 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2458 if prevs:
2458 if prevs:
2459 for pr in prevs:
2459 for pr in prevs:
2460 if pr == p:
2460 if pr == p:
2461 c.append(self.node(r))
2461 c.append(self.node(r))
2462 elif p == nullrev:
2462 elif p == nullrev:
2463 c.append(self.node(r))
2463 c.append(self.node(r))
2464 return c
2464 return c
2465
2465
2466 def commonancestorsheads(self, a, b):
2466 def commonancestorsheads(self, a, b):
2467 """calculate all the heads of the common ancestors of nodes a and b"""
2467 """calculate all the heads of the common ancestors of nodes a and b"""
2468 a, b = self.rev(a), self.rev(b)
2468 a, b = self.rev(a), self.rev(b)
2469 ancs = self._commonancestorsheads(a, b)
2469 ancs = self._commonancestorsheads(a, b)
2470 return pycompat.maplist(self.node, ancs)
2470 return pycompat.maplist(self.node, ancs)
2471
2471
2472 def _commonancestorsheads(self, *revs):
2472 def _commonancestorsheads(self, *revs):
2473 """calculate all the heads of the common ancestors of revs"""
2473 """calculate all the heads of the common ancestors of revs"""
2474 try:
2474 try:
2475 ancs = self.index.commonancestorsheads(*revs)
2475 ancs = self.index.commonancestorsheads(*revs)
2476 except (AttributeError, OverflowError): # C implementation failed
2476 except (AttributeError, OverflowError): # C implementation failed
2477 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2477 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2478 return ancs
2478 return ancs
2479
2479
2480 def isancestor(self, a, b):
2480 def isancestor(self, a, b):
2481 """return True if node a is an ancestor of node b
2481 """return True if node a is an ancestor of node b
2482
2482
2483 A revision is considered an ancestor of itself."""
2483 A revision is considered an ancestor of itself."""
2484 a, b = self.rev(a), self.rev(b)
2484 a, b = self.rev(a), self.rev(b)
2485 return self.isancestorrev(a, b)
2485 return self.isancestorrev(a, b)
2486
2486
2487 def isancestorrev(self, a, b):
2487 def isancestorrev(self, a, b):
2488 """return True if revision a is an ancestor of revision b
2488 """return True if revision a is an ancestor of revision b
2489
2489
2490 A revision is considered an ancestor of itself.
2490 A revision is considered an ancestor of itself.
2491
2491
2492 The implementation of this is trivial but the use of
2492 The implementation of this is trivial but the use of
2493 reachableroots is not."""
2493 reachableroots is not."""
2494 if a == nullrev:
2494 if a == nullrev:
2495 return True
2495 return True
2496 elif a == b:
2496 elif a == b:
2497 return True
2497 return True
2498 elif a > b:
2498 elif a > b:
2499 return False
2499 return False
2500 return bool(self.reachableroots(a, [b], [a], includepath=False))
2500 return bool(self.reachableroots(a, [b], [a], includepath=False))
2501
2501
2502 def reachableroots(self, minroot, heads, roots, includepath=False):
2502 def reachableroots(self, minroot, heads, roots, includepath=False):
2503 """return (heads(::(<roots> and <roots>::<heads>)))
2503 """return (heads(::(<roots> and <roots>::<heads>)))
2504
2504
2505 If includepath is True, return (<roots>::<heads>)."""
2505 If includepath is True, return (<roots>::<heads>)."""
2506 try:
2506 try:
2507 return self.index.reachableroots2(
2507 return self.index.reachableroots2(
2508 minroot, heads, roots, includepath
2508 minroot, heads, roots, includepath
2509 )
2509 )
2510 except AttributeError:
2510 except AttributeError:
2511 return dagop._reachablerootspure(
2511 return dagop._reachablerootspure(
2512 self.parentrevs, minroot, roots, heads, includepath
2512 self.parentrevs, minroot, roots, heads, includepath
2513 )
2513 )
2514
2514
2515 def ancestor(self, a, b):
2515 def ancestor(self, a, b):
2516 """calculate the "best" common ancestor of nodes a and b"""
2516 """calculate the "best" common ancestor of nodes a and b"""
2517
2517
2518 a, b = self.rev(a), self.rev(b)
2518 a, b = self.rev(a), self.rev(b)
2519 try:
2519 try:
2520 ancs = self.index.ancestors(a, b)
2520 ancs = self.index.ancestors(a, b)
2521 except (AttributeError, OverflowError):
2521 except (AttributeError, OverflowError):
2522 ancs = ancestor.ancestors(self.parentrevs, a, b)
2522 ancs = ancestor.ancestors(self.parentrevs, a, b)
2523 if ancs:
2523 if ancs:
2524 # choose a consistent winner when there's a tie
2524 # choose a consistent winner when there's a tie
2525 return min(map(self.node, ancs))
2525 return min(map(self.node, ancs))
2526 return self.nullid
2526 return self.nullid
2527
2527
2528 def _match(self, id):
2528 def _match(self, id):
2529 if isinstance(id, int):
2529 if isinstance(id, int):
2530 # rev
2530 # rev
2531 return self.node(id)
2531 return self.node(id)
2532 if len(id) == self.nodeconstants.nodelen:
2532 if len(id) == self.nodeconstants.nodelen:
2533 # possibly a binary node
2533 # possibly a binary node
2534 # odds of a binary node being all hex in ASCII are 1 in 10**25
2534 # odds of a binary node being all hex in ASCII are 1 in 10**25
2535 try:
2535 try:
2536 node = id
2536 node = id
2537 self.rev(node) # quick search the index
2537 self.rev(node) # quick search the index
2538 return node
2538 return node
2539 except error.LookupError:
2539 except error.LookupError:
2540 pass # may be partial hex id
2540 pass # may be partial hex id
2541 try:
2541 try:
2542 # str(rev)
2542 # str(rev)
2543 rev = int(id)
2543 rev = int(id)
2544 if b"%d" % rev != id:
2544 if b"%d" % rev != id:
2545 raise ValueError
2545 raise ValueError
2546 if rev < 0:
2546 if rev < 0:
2547 rev = len(self) + rev
2547 rev = len(self) + rev
2548 if rev < 0 or rev >= len(self):
2548 if rev < 0 or rev >= len(self):
2549 raise ValueError
2549 raise ValueError
2550 return self.node(rev)
2550 return self.node(rev)
2551 except (ValueError, OverflowError):
2551 except (ValueError, OverflowError):
2552 pass
2552 pass
2553 if len(id) == 2 * self.nodeconstants.nodelen:
2553 if len(id) == 2 * self.nodeconstants.nodelen:
2554 try:
2554 try:
2555 # a full hex nodeid?
2555 # a full hex nodeid?
2556 node = bin(id)
2556 node = bin(id)
2557 self.rev(node)
2557 self.rev(node)
2558 return node
2558 return node
2559 except (binascii.Error, error.LookupError):
2559 except (binascii.Error, error.LookupError):
2560 pass
2560 pass
2561
2561
2562 def _partialmatch(self, id):
2562 def _partialmatch(self, id):
2563 # we don't care wdirfilenodeids as they should be always full hash
2563 # we don't care wdirfilenodeids as they should be always full hash
2564 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2564 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2565 ambiguous = False
2565 ambiguous = False
2566 try:
2566 try:
2567 partial = self.index.partialmatch(id)
2567 partial = self.index.partialmatch(id)
2568 if partial and self.hasnode(partial):
2568 if partial and self.hasnode(partial):
2569 if maybewdir:
2569 if maybewdir:
2570 # single 'ff...' match in radix tree, ambiguous with wdir
2570 # single 'ff...' match in radix tree, ambiguous with wdir
2571 ambiguous = True
2571 ambiguous = True
2572 else:
2572 else:
2573 return partial
2573 return partial
2574 elif maybewdir:
2574 elif maybewdir:
2575 # no 'ff...' match in radix tree, wdir identified
2575 # no 'ff...' match in radix tree, wdir identified
2576 raise error.WdirUnsupported
2576 raise error.WdirUnsupported
2577 else:
2577 else:
2578 return None
2578 return None
2579 except error.RevlogError:
2579 except error.RevlogError:
2580 # parsers.c radix tree lookup gave multiple matches
2580 # parsers.c radix tree lookup gave multiple matches
2581 # fast path: for unfiltered changelog, radix tree is accurate
2581 # fast path: for unfiltered changelog, radix tree is accurate
2582 if not getattr(self, 'filteredrevs', None):
2582 if not getattr(self, 'filteredrevs', None):
2583 ambiguous = True
2583 ambiguous = True
2584 # fall through to slow path that filters hidden revisions
2584 # fall through to slow path that filters hidden revisions
2585 except (AttributeError, ValueError):
2585 except (AttributeError, ValueError):
2586 # we are pure python, or key is not hex
2586 # we are pure python, or key is not hex
2587 pass
2587 pass
2588 if ambiguous:
2588 if ambiguous:
2589 raise error.AmbiguousPrefixLookupError(
2589 raise error.AmbiguousPrefixLookupError(
2590 id, self.display_id, _(b'ambiguous identifier')
2590 id, self.display_id, _(b'ambiguous identifier')
2591 )
2591 )
2592
2592
2593 if id in self._pcache:
2593 if id in self._pcache:
2594 return self._pcache[id]
2594 return self._pcache[id]
2595
2595
2596 if len(id) <= 40:
2596 if len(id) <= 40:
2597 # hex(node)[:...]
2597 # hex(node)[:...]
2598 l = len(id) // 2 * 2 # grab an even number of digits
2598 l = len(id) // 2 * 2 # grab an even number of digits
2599 try:
2599 try:
2600 # we're dropping the last digit, so let's check that it's hex,
2600 # we're dropping the last digit, so let's check that it's hex,
2601 # to avoid the expensive computation below if it's not
2601 # to avoid the expensive computation below if it's not
2602 if len(id) % 2 > 0:
2602 if len(id) % 2 > 0:
2603 if not (id[-1] in hexdigits):
2603 if not (id[-1] in hexdigits):
2604 return None
2604 return None
2605 prefix = bin(id[:l])
2605 prefix = bin(id[:l])
2606 except binascii.Error:
2606 except binascii.Error:
2607 pass
2607 pass
2608 else:
2608 else:
2609 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2609 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2610 nl = [
2610 nl = [
2611 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2611 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2612 ]
2612 ]
2613 if self.nodeconstants.nullhex.startswith(id):
2613 if self.nodeconstants.nullhex.startswith(id):
2614 nl.append(self.nullid)
2614 nl.append(self.nullid)
2615 if len(nl) > 0:
2615 if len(nl) > 0:
2616 if len(nl) == 1 and not maybewdir:
2616 if len(nl) == 1 and not maybewdir:
2617 self._pcache[id] = nl[0]
2617 self._pcache[id] = nl[0]
2618 return nl[0]
2618 return nl[0]
2619 raise error.AmbiguousPrefixLookupError(
2619 raise error.AmbiguousPrefixLookupError(
2620 id, self.display_id, _(b'ambiguous identifier')
2620 id, self.display_id, _(b'ambiguous identifier')
2621 )
2621 )
2622 if maybewdir:
2622 if maybewdir:
2623 raise error.WdirUnsupported
2623 raise error.WdirUnsupported
2624 return None
2624 return None
2625
2625
2626 def lookup(self, id):
2626 def lookup(self, id):
2627 """locate a node based on:
2627 """locate a node based on:
2628 - revision number or str(revision number)
2628 - revision number or str(revision number)
2629 - nodeid or subset of hex nodeid
2629 - nodeid or subset of hex nodeid
2630 """
2630 """
2631 n = self._match(id)
2631 n = self._match(id)
2632 if n is not None:
2632 if n is not None:
2633 return n
2633 return n
2634 n = self._partialmatch(id)
2634 n = self._partialmatch(id)
2635 if n:
2635 if n:
2636 return n
2636 return n
2637
2637
2638 raise error.LookupError(id, self.display_id, _(b'no match found'))
2638 raise error.LookupError(id, self.display_id, _(b'no match found'))
2639
2639
2640 def shortest(self, node, minlength=1):
2640 def shortest(self, node, minlength=1):
2641 """Find the shortest unambiguous prefix that matches node."""
2641 """Find the shortest unambiguous prefix that matches node."""
2642
2642
2643 def isvalid(prefix):
2643 def isvalid(prefix):
2644 try:
2644 try:
2645 matchednode = self._partialmatch(prefix)
2645 matchednode = self._partialmatch(prefix)
2646 except error.AmbiguousPrefixLookupError:
2646 except error.AmbiguousPrefixLookupError:
2647 return False
2647 return False
2648 except error.WdirUnsupported:
2648 except error.WdirUnsupported:
2649 # single 'ff...' match
2649 # single 'ff...' match
2650 return True
2650 return True
2651 if matchednode is None:
2651 if matchednode is None:
2652 raise error.LookupError(node, self.display_id, _(b'no node'))
2652 raise error.LookupError(node, self.display_id, _(b'no node'))
2653 return True
2653 return True
2654
2654
2655 def maybewdir(prefix):
2655 def maybewdir(prefix):
2656 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2656 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2657
2657
2658 hexnode = hex(node)
2658 hexnode = hex(node)
2659
2659
2660 def disambiguate(hexnode, minlength):
2660 def disambiguate(hexnode, minlength):
2661 """Disambiguate against wdirid."""
2661 """Disambiguate against wdirid."""
2662 for length in range(minlength, len(hexnode) + 1):
2662 for length in range(minlength, len(hexnode) + 1):
2663 prefix = hexnode[:length]
2663 prefix = hexnode[:length]
2664 if not maybewdir(prefix):
2664 if not maybewdir(prefix):
2665 return prefix
2665 return prefix
2666
2666
2667 if not getattr(self, 'filteredrevs', None):
2667 if not getattr(self, 'filteredrevs', None):
2668 try:
2668 try:
2669 length = max(self.index.shortest(node), minlength)
2669 length = max(self.index.shortest(node), minlength)
2670 return disambiguate(hexnode, length)
2670 return disambiguate(hexnode, length)
2671 except error.RevlogError:
2671 except error.RevlogError:
2672 if node != self.nodeconstants.wdirid:
2672 if node != self.nodeconstants.wdirid:
2673 raise error.LookupError(
2673 raise error.LookupError(
2674 node, self.display_id, _(b'no node')
2674 node, self.display_id, _(b'no node')
2675 )
2675 )
2676 except AttributeError:
2676 except AttributeError:
2677 # Fall through to pure code
2677 # Fall through to pure code
2678 pass
2678 pass
2679
2679
2680 if node == self.nodeconstants.wdirid:
2680 if node == self.nodeconstants.wdirid:
2681 for length in range(minlength, len(hexnode) + 1):
2681 for length in range(minlength, len(hexnode) + 1):
2682 prefix = hexnode[:length]
2682 prefix = hexnode[:length]
2683 if isvalid(prefix):
2683 if isvalid(prefix):
2684 return prefix
2684 return prefix
2685
2685
2686 for length in range(minlength, len(hexnode) + 1):
2686 for length in range(minlength, len(hexnode) + 1):
2687 prefix = hexnode[:length]
2687 prefix = hexnode[:length]
2688 if isvalid(prefix):
2688 if isvalid(prefix):
2689 return disambiguate(hexnode, length)
2689 return disambiguate(hexnode, length)
2690
2690
2691 def cmp(self, node, text):
2691 def cmp(self, node, text):
2692 """compare text with a given file revision
2692 """compare text with a given file revision
2693
2693
2694 returns True if text is different than what is stored.
2694 returns True if text is different than what is stored.
2695 """
2695 """
2696 p1, p2 = self.parents(node)
2696 p1, p2 = self.parents(node)
2697 return storageutil.hashrevisionsha1(text, p1, p2) != node
2697 return storageutil.hashrevisionsha1(text, p1, p2) != node
2698
2698
2699 def deltaparent(self, rev):
2699 def deltaparent(self, rev):
2700 """return deltaparent of the given revision"""
2700 """return deltaparent of the given revision"""
2701 base = self.index[rev][3]
2701 base = self.index[rev][3]
2702 if base == rev:
2702 if base == rev:
2703 return nullrev
2703 return nullrev
2704 elif self.delta_config.general_delta:
2704 elif self.delta_config.general_delta:
2705 return base
2705 return base
2706 else:
2706 else:
2707 return rev - 1
2707 return rev - 1
2708
2708
2709 def issnapshot(self, rev):
2709 def issnapshot(self, rev):
2710 """tells whether rev is a snapshot"""
2710 """tells whether rev is a snapshot"""
2711 ret = self._inner.issnapshot(rev)
2711 ret = self._inner.issnapshot(rev)
2712 self.issnapshot = self._inner.issnapshot
2712 self.issnapshot = self._inner.issnapshot
2713 return ret
2713 return ret
2714
2714
2715 def snapshotdepth(self, rev):
2715 def snapshotdepth(self, rev):
2716 """number of snapshot in the chain before this one"""
2716 """number of snapshot in the chain before this one"""
2717 if not self.issnapshot(rev):
2717 if not self.issnapshot(rev):
2718 raise error.ProgrammingError(b'revision %d not a snapshot')
2718 raise error.ProgrammingError(b'revision %d not a snapshot')
2719 return len(self._inner._deltachain(rev)[0]) - 1
2719 return len(self._inner._deltachain(rev)[0]) - 1
2720
2720
2721 def revdiff(self, rev1, rev2):
2721 def revdiff(self, rev1, rev2):
2722 """return or calculate a delta between two revisions
2722 """return or calculate a delta between two revisions
2723
2723
2724 The delta calculated is in binary form and is intended to be written to
2724 The delta calculated is in binary form and is intended to be written to
2725 revlog data directly. So this function needs raw revision data.
2725 revlog data directly. So this function needs raw revision data.
2726 """
2726 """
2727 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2727 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2728 return bytes(self._inner._chunk(rev2))
2728 return bytes(self._inner._chunk(rev2))
2729
2729
2730 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2730 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2731
2731
2732 def revision(self, nodeorrev):
2732 def revision(self, nodeorrev):
2733 """return an uncompressed revision of a given node or revision
2733 """return an uncompressed revision of a given node or revision
2734 number.
2734 number.
2735 """
2735 """
2736 return self._revisiondata(nodeorrev)
2736 return self._revisiondata(nodeorrev)
2737
2737
2738 def sidedata(self, nodeorrev):
2738 def sidedata(self, nodeorrev):
2739 """a map of extra data related to the changeset but not part of the hash
2739 """a map of extra data related to the changeset but not part of the hash
2740
2740
2741 This function currently return a dictionary. However, more advanced
2741 This function currently return a dictionary. However, more advanced
2742 mapping object will likely be used in the future for a more
2742 mapping object will likely be used in the future for a more
2743 efficient/lazy code.
2743 efficient/lazy code.
2744 """
2744 """
2745 # deal with <nodeorrev> argument type
2745 # deal with <nodeorrev> argument type
2746 if isinstance(nodeorrev, int):
2746 if isinstance(nodeorrev, int):
2747 rev = nodeorrev
2747 rev = nodeorrev
2748 else:
2748 else:
2749 rev = self.rev(nodeorrev)
2749 rev = self.rev(nodeorrev)
2750 return self._sidedata(rev)
2750 return self._sidedata(rev)
2751
2751
2752 def _rawtext(self, node, rev):
2752 def _rawtext(self, node, rev):
2753 """return the possibly unvalidated rawtext for a revision
2753 """return the possibly unvalidated rawtext for a revision
2754
2754
2755 returns (rev, rawtext, validated)
2755 returns (rev, rawtext, validated)
2756 """
2756 """
2757 # Check if we have the entry in cache
2757 # Check if we have the entry in cache
2758 # The cache entry looks like (node, rev, rawtext)
2758 # The cache entry looks like (node, rev, rawtext)
2759 if self._inner._revisioncache:
2759 if self._inner._revisioncache:
2760 if self._inner._revisioncache[0] == node:
2760 if self._inner._revisioncache[0] == node:
2761 return (rev, self._inner._revisioncache[2], True)
2761 return (rev, self._inner._revisioncache[2], True)
2762
2762
2763 if rev is None:
2763 if rev is None:
2764 rev = self.rev(node)
2764 rev = self.rev(node)
2765
2765
2766 text = self._inner.raw_text(node, rev)
2766 text = self._inner.raw_text(node, rev)
2767 return (rev, text, False)
2767 return (rev, text, False)
2768
2768
2769 def _revisiondata(self, nodeorrev, raw=False):
2769 def _revisiondata(self, nodeorrev, raw=False):
2770 # deal with <nodeorrev> argument type
2770 # deal with <nodeorrev> argument type
2771 if isinstance(nodeorrev, int):
2771 if isinstance(nodeorrev, int):
2772 rev = nodeorrev
2772 rev = nodeorrev
2773 node = self.node(rev)
2773 node = self.node(rev)
2774 else:
2774 else:
2775 node = nodeorrev
2775 node = nodeorrev
2776 rev = None
2776 rev = None
2777
2777
2778 # fast path the special `nullid` rev
2778 # fast path the special `nullid` rev
2779 if node == self.nullid:
2779 if node == self.nullid:
2780 return b""
2780 return b""
2781
2781
2782 # ``rawtext`` is the text as stored inside the revlog. Might be the
2782 # ``rawtext`` is the text as stored inside the revlog. Might be the
2783 # revision or might need to be processed to retrieve the revision.
2783 # revision or might need to be processed to retrieve the revision.
2784 rev, rawtext, validated = self._rawtext(node, rev)
2784 rev, rawtext, validated = self._rawtext(node, rev)
2785
2785
2786 if raw and validated:
2786 if raw and validated:
2787 # if we don't want to process the raw text and that raw
2787 # if we don't want to process the raw text and that raw
2788 # text is cached, we can exit early.
2788 # text is cached, we can exit early.
2789 return rawtext
2789 return rawtext
2790 if rev is None:
2790 if rev is None:
2791 rev = self.rev(node)
2791 rev = self.rev(node)
2792 # the revlog's flag for this revision
2792 # the revlog's flag for this revision
2793 # (usually alter its state or content)
2793 # (usually alter its state or content)
2794 flags = self.flags(rev)
2794 flags = self.flags(rev)
2795
2795
2796 if validated and flags == REVIDX_DEFAULT_FLAGS:
2796 if validated and flags == REVIDX_DEFAULT_FLAGS:
2797 # no extra flags set, no flag processor runs, text = rawtext
2797 # no extra flags set, no flag processor runs, text = rawtext
2798 return rawtext
2798 return rawtext
2799
2799
2800 if raw:
2800 if raw:
2801 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2801 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2802 text = rawtext
2802 text = rawtext
2803 else:
2803 else:
2804 r = flagutil.processflagsread(self, rawtext, flags)
2804 r = flagutil.processflagsread(self, rawtext, flags)
2805 text, validatehash = r
2805 text, validatehash = r
2806 if validatehash:
2806 if validatehash:
2807 self.checkhash(text, node, rev=rev)
2807 self.checkhash(text, node, rev=rev)
2808 if not validated:
2808 if not validated:
2809 self._inner._revisioncache = (node, rev, rawtext)
2809 self._inner._revisioncache = (node, rev, rawtext)
2810
2810
2811 return text
2811 return text
2812
2812
2813 def _sidedata(self, rev):
2813 def _sidedata(self, rev):
2814 """Return the sidedata for a given revision number."""
2814 """Return the sidedata for a given revision number."""
2815 if self._sidedatafile is None:
2815 if self._sidedatafile is None:
2816 return {}
2816 return {}
2817 sidedata_end = None
2817 sidedata_end = None
2818 if self._docket is not None:
2818 if self._docket is not None:
2819 sidedata_end = self._docket.sidedata_end
2819 sidedata_end = self._docket.sidedata_end
2820 return self._inner.sidedata(rev, sidedata_end)
2820 return self._inner.sidedata(rev, sidedata_end)
2821
2821
2822 def rawdata(self, nodeorrev):
2822 def rawdata(self, nodeorrev):
2823 """return an uncompressed raw data of a given node or revision number."""
2823 """return an uncompressed raw data of a given node or revision number."""
2824 return self._revisiondata(nodeorrev, raw=True)
2824 return self._revisiondata(nodeorrev, raw=True)
2825
2825
2826 def hash(self, text, p1, p2):
2826 def hash(self, text, p1, p2):
2827 """Compute a node hash.
2827 """Compute a node hash.
2828
2828
2829 Available as a function so that subclasses can replace the hash
2829 Available as a function so that subclasses can replace the hash
2830 as needed.
2830 as needed.
2831 """
2831 """
2832 return storageutil.hashrevisionsha1(text, p1, p2)
2832 return storageutil.hashrevisionsha1(text, p1, p2)
2833
2833
2834 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2834 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2835 """Check node hash integrity.
2835 """Check node hash integrity.
2836
2836
2837 Available as a function so that subclasses can extend hash mismatch
2837 Available as a function so that subclasses can extend hash mismatch
2838 behaviors as needed.
2838 behaviors as needed.
2839 """
2839 """
2840 try:
2840 try:
2841 if p1 is None and p2 is None:
2841 if p1 is None and p2 is None:
2842 p1, p2 = self.parents(node)
2842 p1, p2 = self.parents(node)
2843 if node != self.hash(text, p1, p2):
2843 if node != self.hash(text, p1, p2):
2844 # Clear the revision cache on hash failure. The revision cache
2844 # Clear the revision cache on hash failure. The revision cache
2845 # only stores the raw revision and clearing the cache does have
2845 # only stores the raw revision and clearing the cache does have
2846 # the side-effect that we won't have a cache hit when the raw
2846 # the side-effect that we won't have a cache hit when the raw
2847 # revision data is accessed. But this case should be rare and
2847 # revision data is accessed. But this case should be rare and
2848 # it is extra work to teach the cache about the hash
2848 # it is extra work to teach the cache about the hash
2849 # verification state.
2849 # verification state.
2850 if (
2850 if (
2851 self._inner._revisioncache
2851 self._inner._revisioncache
2852 and self._inner._revisioncache[0] == node
2852 and self._inner._revisioncache[0] == node
2853 ):
2853 ):
2854 self._inner._revisioncache = None
2854 self._inner._revisioncache = None
2855
2855
2856 revornode = rev
2856 revornode = rev
2857 if revornode is None:
2857 if revornode is None:
2858 revornode = templatefilters.short(hex(node))
2858 revornode = templatefilters.short(hex(node))
2859 raise error.RevlogError(
2859 raise error.RevlogError(
2860 _(b"integrity check failed on %s:%s")
2860 _(b"integrity check failed on %s:%s")
2861 % (self.display_id, pycompat.bytestr(revornode))
2861 % (self.display_id, pycompat.bytestr(revornode))
2862 )
2862 )
2863 except error.RevlogError:
2863 except error.RevlogError:
2864 if self.feature_config.censorable and storageutil.iscensoredtext(
2864 if self.feature_config.censorable and storageutil.iscensoredtext(
2865 text
2865 text
2866 ):
2866 ):
2867 raise error.CensoredNodeError(self.display_id, node, text)
2867 raise error.CensoredNodeError(self.display_id, node, text)
2868 raise
2868 raise
2869
2869
2870 @property
2870 @property
2871 def _split_index_file(self):
2871 def _split_index_file(self):
2872 """the path where to expect the index of an ongoing splitting operation
2872 """the path where to expect the index of an ongoing splitting operation
2873
2873
2874 The file will only exist if a splitting operation is in progress, but
2874 The file will only exist if a splitting operation is in progress, but
2875 it is always expected at the same location."""
2875 it is always expected at the same location."""
2876 parts = self.radix.split(b'/')
2876 parts = self.radix.split(b'/')
2877 if len(parts) > 1:
2877 if len(parts) > 1:
2878 # adds a '-s' prefix to the ``data/` or `meta/` base
2878 # adds a '-s' prefix to the ``data/` or `meta/` base
2879 head = parts[0] + b'-s'
2879 head = parts[0] + b'-s'
2880 mids = parts[1:-1]
2880 mids = parts[1:-1]
2881 tail = parts[-1] + b'.i'
2881 tail = parts[-1] + b'.i'
2882 pieces = [head] + mids + [tail]
2882 pieces = [head] + mids + [tail]
2883 return b'/'.join(pieces)
2883 return b'/'.join(pieces)
2884 else:
2884 else:
2885 # the revlog is stored at the root of the store (changelog or
2885 # the revlog is stored at the root of the store (changelog or
2886 # manifest), no risk of collision.
2886 # manifest), no risk of collision.
2887 return self.radix + b'.i.s'
2887 return self.radix + b'.i.s'
2888
2888
2889 def _enforceinlinesize(self, tr):
2889 def _enforceinlinesize(self, tr):
2890 """Check if the revlog is too big for inline and convert if so.
2890 """Check if the revlog is too big for inline and convert if so.
2891
2891
2892 This should be called after revisions are added to the revlog. If the
2892 This should be called after revisions are added to the revlog. If the
2893 revlog has grown too large to be an inline revlog, it will convert it
2893 revlog has grown too large to be an inline revlog, it will convert it
2894 to use multiple index and data files.
2894 to use multiple index and data files.
2895 """
2895 """
2896 tiprev = len(self) - 1
2896 tiprev = len(self) - 1
2897 total_size = self.start(tiprev) + self.length(tiprev)
2897 total_size = self.start(tiprev) + self.length(tiprev)
2898 if not self._inline or (self._may_inline and total_size < _maxinline):
2898 if not self._inline or (self._may_inline and total_size < _maxinline):
2899 return
2899 return
2900
2900
2901 if self._docket is not None:
2901 if self._docket is not None:
2902 msg = b"inline revlog should not have a docket"
2902 msg = b"inline revlog should not have a docket"
2903 raise error.ProgrammingError(msg)
2903 raise error.ProgrammingError(msg)
2904
2904
2905 # In the common case, we enforce inline size because the revlog has
2905 # In the common case, we enforce inline size because the revlog has
2906 # been appened too. And in such case, it must have an initial offset
2906 # been appened too. And in such case, it must have an initial offset
2907 # recorded in the transaction.
2907 # recorded in the transaction.
2908 troffset = tr.findoffset(self._inner.canonical_index_file)
2908 troffset = tr.findoffset(self._inner.canonical_index_file)
2909 pre_touched = troffset is not None
2909 pre_touched = troffset is not None
2910 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2910 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2911 raise error.RevlogError(
2911 raise error.RevlogError(
2912 _(b"%s not found in the transaction") % self._indexfile
2912 _(b"%s not found in the transaction") % self._indexfile
2913 )
2913 )
2914
2914
2915 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2915 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2916 tr.add(self._datafile, 0)
2916 tr.add(self._datafile, 0)
2917
2917
2918 new_index_file_path = None
2918 new_index_file_path = None
2919 old_index_file_path = self._indexfile
2919 old_index_file_path = self._indexfile
2920 new_index_file_path = self._split_index_file
2920 new_index_file_path = self._split_index_file
2921 opener = self.opener
2921 opener = self.opener
2922 weak_self = weakref.ref(self)
2922 weak_self = weakref.ref(self)
2923
2923
2924 # the "split" index replace the real index when the transaction is
2924 # the "split" index replace the real index when the transaction is
2925 # finalized
2925 # finalized
2926 def finalize_callback(tr):
2926 def finalize_callback(tr):
2927 opener.rename(
2927 opener.rename(
2928 new_index_file_path,
2928 new_index_file_path,
2929 old_index_file_path,
2929 old_index_file_path,
2930 checkambig=True,
2930 checkambig=True,
2931 )
2931 )
2932 maybe_self = weak_self()
2932 maybe_self = weak_self()
2933 if maybe_self is not None:
2933 if maybe_self is not None:
2934 maybe_self._indexfile = old_index_file_path
2934 maybe_self._indexfile = old_index_file_path
2935 maybe_self._inner.index_file = maybe_self._indexfile
2935 maybe_self._inner.index_file = maybe_self._indexfile
2936
2936
2937 def abort_callback(tr):
2937 def abort_callback(tr):
2938 maybe_self = weak_self()
2938 maybe_self = weak_self()
2939 if maybe_self is not None:
2939 if maybe_self is not None:
2940 maybe_self._indexfile = old_index_file_path
2940 maybe_self._indexfile = old_index_file_path
2941 maybe_self._inner.inline = True
2941 maybe_self._inner.inline = True
2942 maybe_self._inner.index_file = old_index_file_path
2942 maybe_self._inner.index_file = old_index_file_path
2943
2943
2944 tr.registertmp(new_index_file_path)
2944 tr.registertmp(new_index_file_path)
2945 # we use 001 here to make this this happens after the finalisation of
2945 # we use 001 here to make this this happens after the finalisation of
2946 # pending changelog write (using 000). Otherwise the two finalizer
2946 # pending changelog write (using 000). Otherwise the two finalizer
2947 # would step over each other and delete the changelog.i file.
2947 # would step over each other and delete the changelog.i file.
2948 if self.target[1] is not None:
2948 if self.target[1] is not None:
2949 callback_id = b'001-revlog-split-%d-%s' % self.target
2949 callback_id = b'001-revlog-split-%d-%s' % self.target
2950 else:
2950 else:
2951 callback_id = b'001-revlog-split-%d' % self.target[0]
2951 callback_id = b'001-revlog-split-%d' % self.target[0]
2952 tr.addfinalize(callback_id, finalize_callback)
2952 tr.addfinalize(callback_id, finalize_callback)
2953 tr.addabort(callback_id, abort_callback)
2953 tr.addabort(callback_id, abort_callback)
2954
2954
2955 self._format_flags &= ~FLAG_INLINE_DATA
2955 self._format_flags &= ~FLAG_INLINE_DATA
2956 self._inner.split_inline(
2956 self._inner.split_inline(
2957 tr,
2957 tr,
2958 self._format_flags | self._format_version,
2958 self._format_flags | self._format_version,
2959 new_index_file_path=new_index_file_path,
2959 new_index_file_path=new_index_file_path,
2960 )
2960 )
2961
2961
2962 self._inline = False
2962 self._inline = False
2963 if new_index_file_path is not None:
2963 if new_index_file_path is not None:
2964 self._indexfile = new_index_file_path
2964 self._indexfile = new_index_file_path
2965
2965
2966 nodemaputil.setup_persistent_nodemap(tr, self)
2966 nodemaputil.setup_persistent_nodemap(tr, self)
2967
2967
2968 def _nodeduplicatecallback(self, transaction, node):
2968 def _nodeduplicatecallback(self, transaction, node):
2969 """called when trying to add a node already stored."""
2969 """called when trying to add a node already stored."""
2970
2970
2971 @contextlib.contextmanager
2971 @contextlib.contextmanager
2972 def reading(self):
2972 def reading(self):
2973 with self._inner.reading():
2973 with self._inner.reading():
2974 yield
2974 yield
2975
2975
2976 @contextlib.contextmanager
2976 @contextlib.contextmanager
2977 def _writing(self, transaction):
2977 def _writing(self, transaction):
2978 if self._trypending:
2978 if self._trypending:
2979 msg = b'try to write in a `trypending` revlog: %s'
2979 msg = b'try to write in a `trypending` revlog: %s'
2980 msg %= self.display_id
2980 msg %= self.display_id
2981 raise error.ProgrammingError(msg)
2981 raise error.ProgrammingError(msg)
2982 if self._inner.is_writing:
2982 if self._inner.is_writing:
2983 yield
2983 yield
2984 else:
2984 else:
2985 data_end = None
2985 data_end = None
2986 sidedata_end = None
2986 sidedata_end = None
2987 if self._docket is not None:
2987 if self._docket is not None:
2988 data_end = self._docket.data_end
2988 data_end = self._docket.data_end
2989 sidedata_end = self._docket.sidedata_end
2989 sidedata_end = self._docket.sidedata_end
2990 with self._inner.writing(
2990 with self._inner.writing(
2991 transaction,
2991 transaction,
2992 data_end=data_end,
2992 data_end=data_end,
2993 sidedata_end=sidedata_end,
2993 sidedata_end=sidedata_end,
2994 ):
2994 ):
2995 yield
2995 yield
2996 if self._docket is not None:
2996 if self._docket is not None:
2997 self._write_docket(transaction)
2997 self._write_docket(transaction)
2998
2998
2999 @property
2999 @property
3000 def is_delaying(self):
3000 def is_delaying(self):
3001 return self._inner.is_delaying
3001 return self._inner.is_delaying
3002
3002
3003 def _write_docket(self, transaction):
3003 def _write_docket(self, transaction):
3004 """write the current docket on disk
3004 """write the current docket on disk
3005
3005
3006 Exist as a method to help changelog to implement transaction logic
3006 Exist as a method to help changelog to implement transaction logic
3007
3007
3008 We could also imagine using the same transaction logic for all revlog
3008 We could also imagine using the same transaction logic for all revlog
3009 since docket are cheap."""
3009 since docket are cheap."""
3010 self._docket.write(transaction)
3010 self._docket.write(transaction)
3011
3011
3012 def addrevision(
3012 def addrevision(
3013 self,
3013 self,
3014 text,
3014 text,
3015 transaction,
3015 transaction,
3016 link,
3016 link,
3017 p1,
3017 p1,
3018 p2,
3018 p2,
3019 cachedelta=None,
3019 cachedelta=None,
3020 node=None,
3020 node=None,
3021 flags=REVIDX_DEFAULT_FLAGS,
3021 flags=REVIDX_DEFAULT_FLAGS,
3022 deltacomputer=None,
3022 deltacomputer=None,
3023 sidedata=None,
3023 sidedata=None,
3024 ):
3024 ):
3025 """add a revision to the log
3025 """add a revision to the log
3026
3026
3027 text - the revision data to add
3027 text - the revision data to add
3028 transaction - the transaction object used for rollback
3028 transaction - the transaction object used for rollback
3029 link - the linkrev data to add
3029 link - the linkrev data to add
3030 p1, p2 - the parent nodeids of the revision
3030 p1, p2 - the parent nodeids of the revision
3031 cachedelta - an optional precomputed delta
3031 cachedelta - an optional precomputed delta
3032 node - nodeid of revision; typically node is not specified, and it is
3032 node - nodeid of revision; typically node is not specified, and it is
3033 computed by default as hash(text, p1, p2), however subclasses might
3033 computed by default as hash(text, p1, p2), however subclasses might
3034 use different hashing method (and override checkhash() in such case)
3034 use different hashing method (and override checkhash() in such case)
3035 flags - the known flags to set on the revision
3035 flags - the known flags to set on the revision
3036 deltacomputer - an optional deltacomputer instance shared between
3036 deltacomputer - an optional deltacomputer instance shared between
3037 multiple calls
3037 multiple calls
3038 """
3038 """
3039 if link == nullrev:
3039 if link == nullrev:
3040 raise error.RevlogError(
3040 raise error.RevlogError(
3041 _(b"attempted to add linkrev -1 to %s") % self.display_id
3041 _(b"attempted to add linkrev -1 to %s") % self.display_id
3042 )
3042 )
3043
3043
3044 if sidedata is None:
3044 if sidedata is None:
3045 sidedata = {}
3045 sidedata = {}
3046 elif sidedata and not self.feature_config.has_side_data:
3046 elif sidedata and not self.feature_config.has_side_data:
3047 raise error.ProgrammingError(
3047 raise error.ProgrammingError(
3048 _(b"trying to add sidedata to a revlog who don't support them")
3048 _(b"trying to add sidedata to a revlog who don't support them")
3049 )
3049 )
3050
3050
3051 if flags:
3051 if flags:
3052 node = node or self.hash(text, p1, p2)
3052 node = node or self.hash(text, p1, p2)
3053
3053
3054 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3054 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3055
3055
3056 # If the flag processor modifies the revision data, ignore any provided
3056 # If the flag processor modifies the revision data, ignore any provided
3057 # cachedelta.
3057 # cachedelta.
3058 if rawtext != text:
3058 if rawtext != text:
3059 cachedelta = None
3059 cachedelta = None
3060
3060
3061 if len(rawtext) > _maxentrysize:
3061 if len(rawtext) > _maxentrysize:
3062 raise error.RevlogError(
3062 raise error.RevlogError(
3063 _(
3063 _(
3064 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3064 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3065 )
3065 )
3066 % (self.display_id, len(rawtext))
3066 % (self.display_id, len(rawtext))
3067 )
3067 )
3068
3068
3069 node = node or self.hash(rawtext, p1, p2)
3069 node = node or self.hash(rawtext, p1, p2)
3070 rev = self.index.get_rev(node)
3070 rev = self.index.get_rev(node)
3071 if rev is not None:
3071 if rev is not None:
3072 return rev
3072 return rev
3073
3073
3074 if validatehash:
3074 if validatehash:
3075 self.checkhash(rawtext, node, p1=p1, p2=p2)
3075 self.checkhash(rawtext, node, p1=p1, p2=p2)
3076
3076
3077 return self.addrawrevision(
3077 return self.addrawrevision(
3078 rawtext,
3078 rawtext,
3079 transaction,
3079 transaction,
3080 link,
3080 link,
3081 p1,
3081 p1,
3082 p2,
3082 p2,
3083 node,
3083 node,
3084 flags,
3084 flags,
3085 cachedelta=cachedelta,
3085 cachedelta=cachedelta,
3086 deltacomputer=deltacomputer,
3086 deltacomputer=deltacomputer,
3087 sidedata=sidedata,
3087 sidedata=sidedata,
3088 )
3088 )
3089
3089
3090 def addrawrevision(
3090 def addrawrevision(
3091 self,
3091 self,
3092 rawtext,
3092 rawtext,
3093 transaction,
3093 transaction,
3094 link,
3094 link,
3095 p1,
3095 p1,
3096 p2,
3096 p2,
3097 node,
3097 node,
3098 flags,
3098 flags,
3099 cachedelta=None,
3099 cachedelta=None,
3100 deltacomputer=None,
3100 deltacomputer=None,
3101 sidedata=None,
3101 sidedata=None,
3102 ):
3102 ):
3103 """add a raw revision with known flags, node and parents
3103 """add a raw revision with known flags, node and parents
3104 useful when reusing a revision not stored in this revlog (ex: received
3104 useful when reusing a revision not stored in this revlog (ex: received
3105 over wire, or read from an external bundle).
3105 over wire, or read from an external bundle).
3106 """
3106 """
3107 with self._writing(transaction):
3107 with self._writing(transaction):
3108 return self._addrevision(
3108 return self._addrevision(
3109 node,
3109 node,
3110 rawtext,
3110 rawtext,
3111 transaction,
3111 transaction,
3112 link,
3112 link,
3113 p1,
3113 p1,
3114 p2,
3114 p2,
3115 flags,
3115 flags,
3116 cachedelta,
3116 cachedelta,
3117 deltacomputer=deltacomputer,
3117 deltacomputer=deltacomputer,
3118 sidedata=sidedata,
3118 sidedata=sidedata,
3119 )
3119 )
3120
3120
3121 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3121 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3122 return self._inner.compress(data)
3122 return self._inner.compress(data)
3123
3123
3124 def decompress(self, data):
3124 def decompress(self, data):
3125 return self._inner.decompress(data)
3125 return self._inner.decompress(data)
3126
3126
3127 def _addrevision(
3127 def _addrevision(
3128 self,
3128 self,
3129 node,
3129 node,
3130 rawtext,
3130 rawtext,
3131 transaction,
3131 transaction,
3132 link,
3132 link,
3133 p1,
3133 p1,
3134 p2,
3134 p2,
3135 flags,
3135 flags,
3136 cachedelta,
3136 cachedelta,
3137 alwayscache=False,
3137 alwayscache=False,
3138 deltacomputer=None,
3138 deltacomputer=None,
3139 sidedata=None,
3139 sidedata=None,
3140 ):
3140 ):
3141 """internal function to add revisions to the log
3141 """internal function to add revisions to the log
3142
3142
3143 see addrevision for argument descriptions.
3143 see addrevision for argument descriptions.
3144
3144
3145 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3145 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3146
3146
3147 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3147 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3148 be used.
3148 be used.
3149
3149
3150 invariants:
3150 invariants:
3151 - rawtext is optional (can be None); if not set, cachedelta must be set.
3151 - rawtext is optional (can be None); if not set, cachedelta must be set.
3152 if both are set, they must correspond to each other.
3152 if both are set, they must correspond to each other.
3153 """
3153 """
3154 if node == self.nullid:
3154 if node == self.nullid:
3155 raise error.RevlogError(
3155 raise error.RevlogError(
3156 _(b"%s: attempt to add null revision") % self.display_id
3156 _(b"%s: attempt to add null revision") % self.display_id
3157 )
3157 )
3158 if (
3158 if (
3159 node == self.nodeconstants.wdirid
3159 node == self.nodeconstants.wdirid
3160 or node in self.nodeconstants.wdirfilenodeids
3160 or node in self.nodeconstants.wdirfilenodeids
3161 ):
3161 ):
3162 raise error.RevlogError(
3162 raise error.RevlogError(
3163 _(b"%s: attempt to add wdir revision") % self.display_id
3163 _(b"%s: attempt to add wdir revision") % self.display_id
3164 )
3164 )
3165 if not self._inner.is_writing:
3165 if not self._inner.is_writing:
3166 msg = b'adding revision outside `revlog._writing` context'
3166 msg = b'adding revision outside `revlog._writing` context'
3167 raise error.ProgrammingError(msg)
3167 raise error.ProgrammingError(msg)
3168
3168
3169 btext = [rawtext]
3169 btext = [rawtext]
3170
3170
3171 curr = len(self)
3171 curr = len(self)
3172 prev = curr - 1
3172 prev = curr - 1
3173
3173
3174 offset = self._get_data_offset(prev)
3174 offset = self._get_data_offset(prev)
3175
3175
3176 if self._concurrencychecker:
3176 if self._concurrencychecker:
3177 ifh, dfh, sdfh = self._inner._writinghandles
3177 ifh, dfh, sdfh = self._inner._writinghandles
3178 # XXX no checking for the sidedata file
3178 # XXX no checking for the sidedata file
3179 if self._inline:
3179 if self._inline:
3180 # offset is "as if" it were in the .d file, so we need to add on
3180 # offset is "as if" it were in the .d file, so we need to add on
3181 # the size of the entry metadata.
3181 # the size of the entry metadata.
3182 self._concurrencychecker(
3182 self._concurrencychecker(
3183 ifh, self._indexfile, offset + curr * self.index.entry_size
3183 ifh, self._indexfile, offset + curr * self.index.entry_size
3184 )
3184 )
3185 else:
3185 else:
3186 # Entries in the .i are a consistent size.
3186 # Entries in the .i are a consistent size.
3187 self._concurrencychecker(
3187 self._concurrencychecker(
3188 ifh, self._indexfile, curr * self.index.entry_size
3188 ifh, self._indexfile, curr * self.index.entry_size
3189 )
3189 )
3190 self._concurrencychecker(dfh, self._datafile, offset)
3190 self._concurrencychecker(dfh, self._datafile, offset)
3191
3191
3192 p1r, p2r = self.rev(p1), self.rev(p2)
3192 p1r, p2r = self.rev(p1), self.rev(p2)
3193
3193
3194 # full versions are inserted when the needed deltas
3194 # full versions are inserted when the needed deltas
3195 # become comparable to the uncompressed text
3195 # become comparable to the uncompressed text
3196 if rawtext is None:
3196 if rawtext is None:
3197 # need rawtext size, before changed by flag processors, which is
3197 # need rawtext size, before changed by flag processors, which is
3198 # the non-raw size. use revlog explicitly to avoid filelog's extra
3198 # the non-raw size. use revlog explicitly to avoid filelog's extra
3199 # logic that might remove metadata size.
3199 # logic that might remove metadata size.
3200 textlen = mdiff.patchedsize(
3200 textlen = mdiff.patchedsize(
3201 revlog.size(self, cachedelta[0]), cachedelta[1]
3201 revlog.size(self, cachedelta[0]), cachedelta[1]
3202 )
3202 )
3203 else:
3203 else:
3204 textlen = len(rawtext)
3204 textlen = len(rawtext)
3205
3205
3206 if deltacomputer is None:
3206 if deltacomputer is None:
3207 write_debug = None
3207 write_debug = None
3208 if self.delta_config.debug_delta:
3208 if self.delta_config.debug_delta:
3209 write_debug = transaction._report
3209 write_debug = transaction._report
3210 deltacomputer = deltautil.deltacomputer(
3210 deltacomputer = deltautil.deltacomputer(
3211 self, write_debug=write_debug
3211 self, write_debug=write_debug
3212 )
3212 )
3213
3213
3214 if cachedelta is not None and len(cachedelta) == 2:
3214 if cachedelta is not None and len(cachedelta) == 2:
3215 # If the cached delta has no information about how it should be
3215 # If the cached delta has no information about how it should be
3216 # reused, add the default reuse instruction according to the
3216 # reused, add the default reuse instruction according to the
3217 # revlog's configuration.
3217 # revlog's configuration.
3218 if (
3218 if (
3219 self.delta_config.general_delta
3219 self.delta_config.general_delta
3220 and self.delta_config.lazy_delta_base
3220 and self.delta_config.lazy_delta_base
3221 ):
3221 ):
3222 delta_base_reuse = DELTA_BASE_REUSE_TRY
3222 delta_base_reuse = DELTA_BASE_REUSE_TRY
3223 else:
3223 else:
3224 delta_base_reuse = DELTA_BASE_REUSE_NO
3224 delta_base_reuse = DELTA_BASE_REUSE_NO
3225 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3225 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3226
3226
3227 revinfo = revlogutils.revisioninfo(
3227 revinfo = revlogutils.revisioninfo(
3228 node,
3228 node,
3229 p1,
3229 p1,
3230 p2,
3230 p2,
3231 btext,
3231 btext,
3232 textlen,
3232 textlen,
3233 cachedelta,
3233 cachedelta,
3234 flags,
3234 flags,
3235 )
3235 )
3236
3236
3237 deltainfo = deltacomputer.finddeltainfo(revinfo)
3237 deltainfo = deltacomputer.finddeltainfo(revinfo)
3238
3238
3239 compression_mode = COMP_MODE_INLINE
3239 compression_mode = COMP_MODE_INLINE
3240 if self._docket is not None:
3240 if self._docket is not None:
3241 default_comp = self._docket.default_compression_header
3241 default_comp = self._docket.default_compression_header
3242 r = deltautil.delta_compression(default_comp, deltainfo)
3242 r = deltautil.delta_compression(default_comp, deltainfo)
3243 compression_mode, deltainfo = r
3243 compression_mode, deltainfo = r
3244
3244
3245 sidedata_compression_mode = COMP_MODE_INLINE
3245 sidedata_compression_mode = COMP_MODE_INLINE
3246 if sidedata and self.feature_config.has_side_data:
3246 if sidedata and self.feature_config.has_side_data:
3247 sidedata_compression_mode = COMP_MODE_PLAIN
3247 sidedata_compression_mode = COMP_MODE_PLAIN
3248 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3248 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3249 sidedata_offset = self._docket.sidedata_end
3249 sidedata_offset = self._docket.sidedata_end
3250 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3250 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3251 if (
3251 if (
3252 h != b'u'
3252 h != b'u'
3253 and comp_sidedata[0:1] != b'\0'
3253 and comp_sidedata[0:1] != b'\0'
3254 and len(comp_sidedata) < len(serialized_sidedata)
3254 and len(comp_sidedata) < len(serialized_sidedata)
3255 ):
3255 ):
3256 assert not h
3256 assert not h
3257 if (
3257 if (
3258 comp_sidedata[0:1]
3258 comp_sidedata[0:1]
3259 == self._docket.default_compression_header
3259 == self._docket.default_compression_header
3260 ):
3260 ):
3261 sidedata_compression_mode = COMP_MODE_DEFAULT
3261 sidedata_compression_mode = COMP_MODE_DEFAULT
3262 serialized_sidedata = comp_sidedata
3262 serialized_sidedata = comp_sidedata
3263 else:
3263 else:
3264 sidedata_compression_mode = COMP_MODE_INLINE
3264 sidedata_compression_mode = COMP_MODE_INLINE
3265 serialized_sidedata = comp_sidedata
3265 serialized_sidedata = comp_sidedata
3266 else:
3266 else:
3267 serialized_sidedata = b""
3267 serialized_sidedata = b""
3268 # Don't store the offset if the sidedata is empty, that way
3268 # Don't store the offset if the sidedata is empty, that way
3269 # we can easily detect empty sidedata and they will be no different
3269 # we can easily detect empty sidedata and they will be no different
3270 # than ones we manually add.
3270 # than ones we manually add.
3271 sidedata_offset = 0
3271 sidedata_offset = 0
3272
3272
3273 rank = RANK_UNKNOWN
3273 rank = RANK_UNKNOWN
3274 if self.feature_config.compute_rank:
3274 if self.feature_config.compute_rank:
3275 if (p1r, p2r) == (nullrev, nullrev):
3275 if (p1r, p2r) == (nullrev, nullrev):
3276 rank = 1
3276 rank = 1
3277 elif p1r != nullrev and p2r == nullrev:
3277 elif p1r != nullrev and p2r == nullrev:
3278 rank = 1 + self.fast_rank(p1r)
3278 rank = 1 + self.fast_rank(p1r)
3279 elif p1r == nullrev and p2r != nullrev:
3279 elif p1r == nullrev and p2r != nullrev:
3280 rank = 1 + self.fast_rank(p2r)
3280 rank = 1 + self.fast_rank(p2r)
3281 else: # merge node
3281 else: # merge node
3282 if rustdagop is not None and self.index.rust_ext_compat:
3282 if rustdagop is not None and self.index.rust_ext_compat:
3283 rank = rustdagop.rank(self.index, p1r, p2r)
3283 rank = rustdagop.rank(self.index, p1r, p2r)
3284 else:
3284 else:
3285 pmin, pmax = sorted((p1r, p2r))
3285 pmin, pmax = sorted((p1r, p2r))
3286 rank = 1 + self.fast_rank(pmax)
3286 rank = 1 + self.fast_rank(pmax)
3287 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3287 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3288
3288
3289 e = revlogutils.entry(
3289 e = revlogutils.entry(
3290 flags=flags,
3290 flags=flags,
3291 data_offset=offset,
3291 data_offset=offset,
3292 data_compressed_length=deltainfo.deltalen,
3292 data_compressed_length=deltainfo.deltalen,
3293 data_uncompressed_length=textlen,
3293 data_uncompressed_length=textlen,
3294 data_compression_mode=compression_mode,
3294 data_compression_mode=compression_mode,
3295 data_delta_base=deltainfo.base,
3295 data_delta_base=deltainfo.base,
3296 link_rev=link,
3296 link_rev=link,
3297 parent_rev_1=p1r,
3297 parent_rev_1=p1r,
3298 parent_rev_2=p2r,
3298 parent_rev_2=p2r,
3299 node_id=node,
3299 node_id=node,
3300 sidedata_offset=sidedata_offset,
3300 sidedata_offset=sidedata_offset,
3301 sidedata_compressed_length=len(serialized_sidedata),
3301 sidedata_compressed_length=len(serialized_sidedata),
3302 sidedata_compression_mode=sidedata_compression_mode,
3302 sidedata_compression_mode=sidedata_compression_mode,
3303 rank=rank,
3303 rank=rank,
3304 )
3304 )
3305
3305
3306 self.index.append(e)
3306 self.index.append(e)
3307 entry = self.index.entry_binary(curr)
3307 entry = self.index.entry_binary(curr)
3308 if curr == 0 and self._docket is None:
3308 if curr == 0 and self._docket is None:
3309 header = self._format_flags | self._format_version
3309 header = self._format_flags | self._format_version
3310 header = self.index.pack_header(header)
3310 header = self.index.pack_header(header)
3311 entry = header + entry
3311 entry = header + entry
3312 self._writeentry(
3312 self._writeentry(
3313 transaction,
3313 transaction,
3314 entry,
3314 entry,
3315 deltainfo.data,
3315 deltainfo.data,
3316 link,
3316 link,
3317 offset,
3317 offset,
3318 serialized_sidedata,
3318 serialized_sidedata,
3319 sidedata_offset,
3319 sidedata_offset,
3320 )
3320 )
3321
3321
3322 rawtext = btext[0]
3322 rawtext = btext[0]
3323
3323
3324 if alwayscache and rawtext is None:
3324 if alwayscache and rawtext is None:
3325 rawtext = deltacomputer.buildtext(revinfo)
3325 rawtext = deltacomputer.buildtext(revinfo)
3326
3326
3327 if type(rawtext) == bytes: # only accept immutable objects
3327 if type(rawtext) == bytes: # only accept immutable objects
3328 self._inner._revisioncache = (node, curr, rawtext)
3328 self._inner._revisioncache = (node, curr, rawtext)
3329 self._chainbasecache[curr] = deltainfo.chainbase
3329 self._chainbasecache[curr] = deltainfo.chainbase
3330 return curr
3330 return curr
3331
3331
3332 def _get_data_offset(self, prev):
3332 def _get_data_offset(self, prev):
3333 """Returns the current offset in the (in-transaction) data file.
3333 """Returns the current offset in the (in-transaction) data file.
3334 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3334 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3335 file to store that information: since sidedata can be rewritten to the
3335 file to store that information: since sidedata can be rewritten to the
3336 end of the data file within a transaction, you can have cases where, for
3336 end of the data file within a transaction, you can have cases where, for
3337 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3337 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3338 to `n - 1`'s sidedata being written after `n`'s data.
3338 to `n - 1`'s sidedata being written after `n`'s data.
3339
3339
3340 TODO cache this in a docket file before getting out of experimental."""
3340 TODO cache this in a docket file before getting out of experimental."""
3341 if self._docket is None:
3341 if self._docket is None:
3342 return self.end(prev)
3342 return self.end(prev)
3343 else:
3343 else:
3344 return self._docket.data_end
3344 return self._docket.data_end
3345
3345
3346 def _writeentry(
3346 def _writeentry(
3347 self,
3347 self,
3348 transaction,
3348 transaction,
3349 entry,
3349 entry,
3350 data,
3350 data,
3351 link,
3351 link,
3352 offset,
3352 offset,
3353 sidedata,
3353 sidedata,
3354 sidedata_offset,
3354 sidedata_offset,
3355 ):
3355 ):
3356 # Files opened in a+ mode have inconsistent behavior on various
3356 # Files opened in a+ mode have inconsistent behavior on various
3357 # platforms. Windows requires that a file positioning call be made
3357 # platforms. Windows requires that a file positioning call be made
3358 # when the file handle transitions between reads and writes. See
3358 # when the file handle transitions between reads and writes. See
3359 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3359 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3360 # platforms, Python or the platform itself can be buggy. Some versions
3360 # platforms, Python or the platform itself can be buggy. Some versions
3361 # of Solaris have been observed to not append at the end of the file
3361 # of Solaris have been observed to not append at the end of the file
3362 # if the file was seeked to before the end. See issue4943 for more.
3362 # if the file was seeked to before the end. See issue4943 for more.
3363 #
3363 #
3364 # We work around this issue by inserting a seek() before writing.
3364 # We work around this issue by inserting a seek() before writing.
3365 # Note: This is likely not necessary on Python 3. However, because
3365 # Note: This is likely not necessary on Python 3. However, because
3366 # the file handle is reused for reads and may be seeked there, we need
3366 # the file handle is reused for reads and may be seeked there, we need
3367 # to be careful before changing this.
3367 # to be careful before changing this.
3368 index_end = data_end = sidedata_end = None
3368 index_end = data_end = sidedata_end = None
3369 if self._docket is not None:
3369 if self._docket is not None:
3370 index_end = self._docket.index_end
3370 index_end = self._docket.index_end
3371 data_end = self._docket.data_end
3371 data_end = self._docket.data_end
3372 sidedata_end = self._docket.sidedata_end
3372 sidedata_end = self._docket.sidedata_end
3373
3373
3374 files_end = self._inner.write_entry(
3374 files_end = self._inner.write_entry(
3375 transaction,
3375 transaction,
3376 entry,
3376 entry,
3377 data,
3377 data,
3378 link,
3378 link,
3379 offset,
3379 offset,
3380 sidedata,
3380 sidedata,
3381 sidedata_offset,
3381 sidedata_offset,
3382 index_end,
3382 index_end,
3383 data_end,
3383 data_end,
3384 sidedata_end,
3384 sidedata_end,
3385 )
3385 )
3386 self._enforceinlinesize(transaction)
3386 self._enforceinlinesize(transaction)
3387 if self._docket is not None:
3387 if self._docket is not None:
3388 self._docket.index_end = files_end[0]
3388 self._docket.index_end = files_end[0]
3389 self._docket.data_end = files_end[1]
3389 self._docket.data_end = files_end[1]
3390 self._docket.sidedata_end = files_end[2]
3390 self._docket.sidedata_end = files_end[2]
3391
3391
3392 nodemaputil.setup_persistent_nodemap(transaction, self)
3392 nodemaputil.setup_persistent_nodemap(transaction, self)
3393
3393
3394 def addgroup(
3394 def addgroup(
3395 self,
3395 self,
3396 deltas,
3396 deltas,
3397 linkmapper,
3397 linkmapper,
3398 transaction,
3398 transaction,
3399 alwayscache=False,
3399 alwayscache=False,
3400 addrevisioncb=None,
3400 addrevisioncb=None,
3401 duplicaterevisioncb=None,
3401 duplicaterevisioncb=None,
3402 debug_info=None,
3402 debug_info=None,
3403 delta_base_reuse_policy=None,
3403 delta_base_reuse_policy=None,
3404 ):
3404 ):
3405 """
3405 """
3406 add a delta group
3406 add a delta group
3407
3407
3408 given a set of deltas, add them to the revision log. the
3408 given a set of deltas, add them to the revision log. the
3409 first delta is against its parent, which should be in our
3409 first delta is against its parent, which should be in our
3410 log, the rest are against the previous delta.
3410 log, the rest are against the previous delta.
3411
3411
3412 If ``addrevisioncb`` is defined, it will be called with arguments of
3412 If ``addrevisioncb`` is defined, it will be called with arguments of
3413 this revlog and the node that was added.
3413 this revlog and the node that was added.
3414 """
3414 """
3415
3415
3416 if self._adding_group:
3416 if self._adding_group:
3417 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3417 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3418
3418
3419 # read the default delta-base reuse policy from revlog config if the
3419 # read the default delta-base reuse policy from revlog config if the
3420 # group did not specify one.
3420 # group did not specify one.
3421 if delta_base_reuse_policy is None:
3421 if delta_base_reuse_policy is None:
3422 if (
3422 if (
3423 self.delta_config.general_delta
3423 self.delta_config.general_delta
3424 and self.delta_config.lazy_delta_base
3424 and self.delta_config.lazy_delta_base
3425 ):
3425 ):
3426 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3426 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3427 else:
3427 else:
3428 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3428 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3429
3429
3430 self._adding_group = True
3430 self._adding_group = True
3431 empty = True
3431 empty = True
3432 try:
3432 try:
3433 with self._writing(transaction):
3433 with self._writing(transaction):
3434 write_debug = None
3434 write_debug = None
3435 if self.delta_config.debug_delta:
3435 if self.delta_config.debug_delta:
3436 write_debug = transaction._report
3436 write_debug = transaction._report
3437 deltacomputer = deltautil.deltacomputer(
3437 deltacomputer = deltautil.deltacomputer(
3438 self,
3438 self,
3439 write_debug=write_debug,
3439 write_debug=write_debug,
3440 debug_info=debug_info,
3440 debug_info=debug_info,
3441 )
3441 )
3442 # loop through our set of deltas
3442 # loop through our set of deltas
3443 for data in deltas:
3443 for data in deltas:
3444 (
3444 (
3445 node,
3445 node,
3446 p1,
3446 p1,
3447 p2,
3447 p2,
3448 linknode,
3448 linknode,
3449 deltabase,
3449 deltabase,
3450 delta,
3450 delta,
3451 flags,
3451 flags,
3452 sidedata,
3452 sidedata,
3453 ) = data
3453 ) = data
3454 link = linkmapper(linknode)
3454 link = linkmapper(linknode)
3455 flags = flags or REVIDX_DEFAULT_FLAGS
3455 flags = flags or REVIDX_DEFAULT_FLAGS
3456
3456
3457 rev = self.index.get_rev(node)
3457 rev = self.index.get_rev(node)
3458 if rev is not None:
3458 if rev is not None:
3459 # this can happen if two branches make the same change
3459 # this can happen if two branches make the same change
3460 self._nodeduplicatecallback(transaction, rev)
3460 self._nodeduplicatecallback(transaction, rev)
3461 if duplicaterevisioncb:
3461 if duplicaterevisioncb:
3462 duplicaterevisioncb(self, rev)
3462 duplicaterevisioncb(self, rev)
3463 empty = False
3463 empty = False
3464 continue
3464 continue
3465
3465
3466 for p in (p1, p2):
3466 for p in (p1, p2):
3467 if not self.index.has_node(p):
3467 if not self.index.has_node(p):
3468 raise error.LookupError(
3468 raise error.LookupError(
3469 p, self.radix, _(b'unknown parent')
3469 p, self.radix, _(b'unknown parent')
3470 )
3470 )
3471
3471
3472 if not self.index.has_node(deltabase):
3472 if not self.index.has_node(deltabase):
3473 raise error.LookupError(
3473 raise error.LookupError(
3474 deltabase, self.display_id, _(b'unknown delta base')
3474 deltabase, self.display_id, _(b'unknown delta base')
3475 )
3475 )
3476
3476
3477 baserev = self.rev(deltabase)
3477 baserev = self.rev(deltabase)
3478
3478
3479 if baserev != nullrev and self.iscensored(baserev):
3479 if baserev != nullrev and self.iscensored(baserev):
3480 # if base is censored, delta must be full replacement in a
3480 # if base is censored, delta must be full replacement in a
3481 # single patch operation
3481 # single patch operation
3482 hlen = struct.calcsize(b">lll")
3482 hlen = struct.calcsize(b">lll")
3483 oldlen = self.rawsize(baserev)
3483 oldlen = self.rawsize(baserev)
3484 newlen = len(delta) - hlen
3484 newlen = len(delta) - hlen
3485 if delta[:hlen] != mdiff.replacediffheader(
3485 if delta[:hlen] != mdiff.replacediffheader(
3486 oldlen, newlen
3486 oldlen, newlen
3487 ):
3487 ):
3488 raise error.CensoredBaseError(
3488 raise error.CensoredBaseError(
3489 self.display_id, self.node(baserev)
3489 self.display_id, self.node(baserev)
3490 )
3490 )
3491
3491
3492 if not flags and self._peek_iscensored(baserev, delta):
3492 if not flags and self._peek_iscensored(baserev, delta):
3493 flags |= REVIDX_ISCENSORED
3493 flags |= REVIDX_ISCENSORED
3494
3494
3495 # We assume consumers of addrevisioncb will want to retrieve
3495 # We assume consumers of addrevisioncb will want to retrieve
3496 # the added revision, which will require a call to
3496 # the added revision, which will require a call to
3497 # revision(). revision() will fast path if there is a cache
3497 # revision(). revision() will fast path if there is a cache
3498 # hit. So, we tell _addrevision() to always cache in this case.
3498 # hit. So, we tell _addrevision() to always cache in this case.
3499 # We're only using addgroup() in the context of changegroup
3499 # We're only using addgroup() in the context of changegroup
3500 # generation so the revision data can always be handled as raw
3500 # generation so the revision data can always be handled as raw
3501 # by the flagprocessor.
3501 # by the flagprocessor.
3502 rev = self._addrevision(
3502 rev = self._addrevision(
3503 node,
3503 node,
3504 None,
3504 None,
3505 transaction,
3505 transaction,
3506 link,
3506 link,
3507 p1,
3507 p1,
3508 p2,
3508 p2,
3509 flags,
3509 flags,
3510 (baserev, delta, delta_base_reuse_policy),
3510 (baserev, delta, delta_base_reuse_policy),
3511 alwayscache=alwayscache,
3511 alwayscache=alwayscache,
3512 deltacomputer=deltacomputer,
3512 deltacomputer=deltacomputer,
3513 sidedata=sidedata,
3513 sidedata=sidedata,
3514 )
3514 )
3515
3515
3516 if addrevisioncb:
3516 if addrevisioncb:
3517 addrevisioncb(self, rev)
3517 addrevisioncb(self, rev)
3518 empty = False
3518 empty = False
3519 finally:
3519 finally:
3520 self._adding_group = False
3520 self._adding_group = False
3521 return not empty
3521 return not empty
3522
3522
3523 def iscensored(self, rev):
3523 def iscensored(self, rev):
3524 """Check if a file revision is censored."""
3524 """Check if a file revision is censored."""
3525 if not self.feature_config.censorable:
3525 if not self.feature_config.censorable:
3526 return False
3526 return False
3527
3527
3528 return self.flags(rev) & REVIDX_ISCENSORED
3528 return self.flags(rev) & REVIDX_ISCENSORED
3529
3529
3530 def _peek_iscensored(self, baserev, delta):
3530 def _peek_iscensored(self, baserev, delta):
3531 """Quickly check if a delta produces a censored revision."""
3531 """Quickly check if a delta produces a censored revision."""
3532 if not self.feature_config.censorable:
3532 if not self.feature_config.censorable:
3533 return False
3533 return False
3534
3534
3535 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3535 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3536
3536
3537 def getstrippoint(self, minlink):
3537 def getstrippoint(self, minlink):
3538 """find the minimum rev that must be stripped to strip the linkrev
3538 """find the minimum rev that must be stripped to strip the linkrev
3539
3539
3540 Returns a tuple containing the minimum rev and a set of all revs that
3540 Returns a tuple containing the minimum rev and a set of all revs that
3541 have linkrevs that will be broken by this strip.
3541 have linkrevs that will be broken by this strip.
3542 """
3542 """
3543 return storageutil.resolvestripinfo(
3543 return storageutil.resolvestripinfo(
3544 minlink,
3544 minlink,
3545 len(self) - 1,
3545 len(self) - 1,
3546 self.headrevs(),
3546 self.headrevs(),
3547 self.linkrev,
3547 self.linkrev,
3548 self.parentrevs,
3548 self.parentrevs,
3549 )
3549 )
3550
3550
3551 def strip(self, minlink, transaction):
3551 def strip(self, minlink, transaction):
3552 """truncate the revlog on the first revision with a linkrev >= minlink
3552 """truncate the revlog on the first revision with a linkrev >= minlink
3553
3553
3554 This function is called when we're stripping revision minlink and
3554 This function is called when we're stripping revision minlink and
3555 its descendants from the repository.
3555 its descendants from the repository.
3556
3556
3557 We have to remove all revisions with linkrev >= minlink, because
3557 We have to remove all revisions with linkrev >= minlink, because
3558 the equivalent changelog revisions will be renumbered after the
3558 the equivalent changelog revisions will be renumbered after the
3559 strip.
3559 strip.
3560
3560
3561 So we truncate the revlog on the first of these revisions, and
3561 So we truncate the revlog on the first of these revisions, and
3562 trust that the caller has saved the revisions that shouldn't be
3562 trust that the caller has saved the revisions that shouldn't be
3563 removed and that it'll re-add them after this truncation.
3563 removed and that it'll re-add them after this truncation.
3564 """
3564 """
3565 if len(self) == 0:
3565 if len(self) == 0:
3566 return
3566 return
3567
3567
3568 rev, _ = self.getstrippoint(minlink)
3568 rev, _ = self.getstrippoint(minlink)
3569 if rev == len(self):
3569 if rev == len(self):
3570 return
3570 return
3571
3571
3572 # first truncate the files on disk
3572 # first truncate the files on disk
3573 data_end = self.start(rev)
3573 data_end = self.start(rev)
3574 if not self._inline:
3574 if not self._inline:
3575 transaction.add(self._datafile, data_end)
3575 transaction.add(self._datafile, data_end)
3576 end = rev * self.index.entry_size
3576 end = rev * self.index.entry_size
3577 else:
3577 else:
3578 end = data_end + (rev * self.index.entry_size)
3578 end = data_end + (rev * self.index.entry_size)
3579
3579
3580 if self._sidedatafile:
3580 if self._sidedatafile:
3581 sidedata_end = self.sidedata_cut_off(rev)
3581 sidedata_end = self.sidedata_cut_off(rev)
3582 transaction.add(self._sidedatafile, sidedata_end)
3582 transaction.add(self._sidedatafile, sidedata_end)
3583
3583
3584 transaction.add(self._indexfile, end)
3584 transaction.add(self._indexfile, end)
3585 if self._docket is not None:
3585 if self._docket is not None:
3586 # XXX we could, leverage the docket while stripping. However it is
3586 # XXX we could, leverage the docket while stripping. However it is
3587 # not powerfull enough at the time of this comment
3587 # not powerfull enough at the time of this comment
3588 self._docket.index_end = end
3588 self._docket.index_end = end
3589 self._docket.data_end = data_end
3589 self._docket.data_end = data_end
3590 self._docket.sidedata_end = sidedata_end
3590 self._docket.sidedata_end = sidedata_end
3591 self._docket.write(transaction, stripping=True)
3591 self._docket.write(transaction, stripping=True)
3592
3592
3593 # then reset internal state in memory to forget those revisions
3593 # then reset internal state in memory to forget those revisions
3594 self._chaininfocache = util.lrucachedict(500)
3594 self._chaininfocache = util.lrucachedict(500)
3595 self._inner.clear_cache()
3595 self._inner.clear_cache()
3596
3596
3597 del self.index[rev:-1]
3597 del self.index[rev:-1]
3598
3598
3599 def checksize(self):
3599 def checksize(self):
3600 """Check size of index and data files
3600 """Check size of index and data files
3601
3601
3602 return a (dd, di) tuple.
3602 return a (dd, di) tuple.
3603 - dd: extra bytes for the "data" file
3603 - dd: extra bytes for the "data" file
3604 - di: extra bytes for the "index" file
3604 - di: extra bytes for the "index" file
3605
3605
3606 A healthy revlog will return (0, 0).
3606 A healthy revlog will return (0, 0).
3607 """
3607 """
3608 expected = 0
3608 expected = 0
3609 if len(self):
3609 if len(self):
3610 expected = max(0, self.end(len(self) - 1))
3610 expected = max(0, self.end(len(self) - 1))
3611
3611
3612 try:
3612 try:
3613 with self._datafp() as f:
3613 with self._datafp() as f:
3614 f.seek(0, io.SEEK_END)
3614 f.seek(0, io.SEEK_END)
3615 actual = f.tell()
3615 actual = f.tell()
3616 dd = actual - expected
3616 dd = actual - expected
3617 except FileNotFoundError:
3617 except FileNotFoundError:
3618 dd = 0
3618 dd = 0
3619
3619
3620 try:
3620 try:
3621 f = self.opener(self._indexfile)
3621 f = self.opener(self._indexfile)
3622 f.seek(0, io.SEEK_END)
3622 f.seek(0, io.SEEK_END)
3623 actual = f.tell()
3623 actual = f.tell()
3624 f.close()
3624 f.close()
3625 s = self.index.entry_size
3625 s = self.index.entry_size
3626 i = max(0, actual // s)
3626 i = max(0, actual // s)
3627 di = actual - (i * s)
3627 di = actual - (i * s)
3628 if self._inline:
3628 if self._inline:
3629 databytes = 0
3629 databytes = 0
3630 for r in self:
3630 for r in self:
3631 databytes += max(0, self.length(r))
3631 databytes += max(0, self.length(r))
3632 dd = 0
3632 dd = 0
3633 di = actual - len(self) * s - databytes
3633 di = actual - len(self) * s - databytes
3634 except FileNotFoundError:
3634 except FileNotFoundError:
3635 di = 0
3635 di = 0
3636
3636
3637 return (dd, di)
3637 return (dd, di)
3638
3638
3639 def files(self):
3639 def files(self):
3640 """return list of files that compose this revlog"""
3640 """return list of files that compose this revlog"""
3641 res = [self._indexfile]
3641 res = [self._indexfile]
3642 if self._docket_file is None:
3642 if self._docket_file is None:
3643 if not self._inline:
3643 if not self._inline:
3644 res.append(self._datafile)
3644 res.append(self._datafile)
3645 else:
3645 else:
3646 res.append(self._docket_file)
3646 res.append(self._docket_file)
3647 res.extend(self._docket.old_index_filepaths(include_empty=False))
3647 res.extend(self._docket.old_index_filepaths(include_empty=False))
3648 if self._docket.data_end:
3648 if self._docket.data_end:
3649 res.append(self._datafile)
3649 res.append(self._datafile)
3650 res.extend(self._docket.old_data_filepaths(include_empty=False))
3650 res.extend(self._docket.old_data_filepaths(include_empty=False))
3651 if self._docket.sidedata_end:
3651 if self._docket.sidedata_end:
3652 res.append(self._sidedatafile)
3652 res.append(self._sidedatafile)
3653 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3653 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3654 return res
3654 return res
3655
3655
3656 def emitrevisions(
3656 def emitrevisions(
3657 self,
3657 self,
3658 nodes,
3658 nodes,
3659 nodesorder=None,
3659 nodesorder=None,
3660 revisiondata=False,
3660 revisiondata=False,
3661 assumehaveparentrevisions=False,
3661 assumehaveparentrevisions=False,
3662 deltamode=repository.CG_DELTAMODE_STD,
3662 deltamode=repository.CG_DELTAMODE_STD,
3663 sidedata_helpers=None,
3663 sidedata_helpers=None,
3664 debug_info=None,
3664 debug_info=None,
3665 ):
3665 ):
3666 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3666 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3667 raise error.ProgrammingError(
3667 raise error.ProgrammingError(
3668 b'unhandled value for nodesorder: %s' % nodesorder
3668 b'unhandled value for nodesorder: %s' % nodesorder
3669 )
3669 )
3670
3670
3671 if nodesorder is None and not self.delta_config.general_delta:
3671 if nodesorder is None and not self.delta_config.general_delta:
3672 nodesorder = b'storage'
3672 nodesorder = b'storage'
3673
3673
3674 if (
3674 if (
3675 not self._storedeltachains
3675 not self._storedeltachains
3676 and deltamode != repository.CG_DELTAMODE_PREV
3676 and deltamode != repository.CG_DELTAMODE_PREV
3677 ):
3677 ):
3678 deltamode = repository.CG_DELTAMODE_FULL
3678 deltamode = repository.CG_DELTAMODE_FULL
3679
3679
3680 return storageutil.emitrevisions(
3680 return storageutil.emitrevisions(
3681 self,
3681 self,
3682 nodes,
3682 nodes,
3683 nodesorder,
3683 nodesorder,
3684 revlogrevisiondelta,
3684 revlogrevisiondelta,
3685 deltaparentfn=self.deltaparent,
3685 deltaparentfn=self.deltaparent,
3686 candeltafn=self._candelta,
3686 candeltafn=self._candelta,
3687 rawsizefn=self.rawsize,
3687 rawsizefn=self.rawsize,
3688 revdifffn=self.revdiff,
3688 revdifffn=self.revdiff,
3689 flagsfn=self.flags,
3689 flagsfn=self.flags,
3690 deltamode=deltamode,
3690 deltamode=deltamode,
3691 revisiondata=revisiondata,
3691 revisiondata=revisiondata,
3692 assumehaveparentrevisions=assumehaveparentrevisions,
3692 assumehaveparentrevisions=assumehaveparentrevisions,
3693 sidedata_helpers=sidedata_helpers,
3693 sidedata_helpers=sidedata_helpers,
3694 debug_info=debug_info,
3694 debug_info=debug_info,
3695 )
3695 )
3696
3696
3697 DELTAREUSEALWAYS = b'always'
3697 DELTAREUSEALWAYS = b'always'
3698 DELTAREUSESAMEREVS = b'samerevs'
3698 DELTAREUSESAMEREVS = b'samerevs'
3699 DELTAREUSENEVER = b'never'
3699 DELTAREUSENEVER = b'never'
3700
3700
3701 DELTAREUSEFULLADD = b'fulladd'
3701 DELTAREUSEFULLADD = b'fulladd'
3702
3702
3703 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3703 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3704
3704
3705 def clone(
3705 def clone(
3706 self,
3706 self,
3707 tr,
3707 tr,
3708 destrevlog,
3708 destrevlog,
3709 addrevisioncb=None,
3709 addrevisioncb=None,
3710 deltareuse=DELTAREUSESAMEREVS,
3710 deltareuse=DELTAREUSESAMEREVS,
3711 forcedeltabothparents=None,
3711 forcedeltabothparents=None,
3712 sidedata_helpers=None,
3712 sidedata_helpers=None,
3713 ):
3713 ):
3714 """Copy this revlog to another, possibly with format changes.
3714 """Copy this revlog to another, possibly with format changes.
3715
3715
3716 The destination revlog will contain the same revisions and nodes.
3716 The destination revlog will contain the same revisions and nodes.
3717 However, it may not be bit-for-bit identical due to e.g. delta encoding
3717 However, it may not be bit-for-bit identical due to e.g. delta encoding
3718 differences.
3718 differences.
3719
3719
3720 The ``deltareuse`` argument control how deltas from the existing revlog
3720 The ``deltareuse`` argument control how deltas from the existing revlog
3721 are preserved in the destination revlog. The argument can have the
3721 are preserved in the destination revlog. The argument can have the
3722 following values:
3722 following values:
3723
3723
3724 DELTAREUSEALWAYS
3724 DELTAREUSEALWAYS
3725 Deltas will always be reused (if possible), even if the destination
3725 Deltas will always be reused (if possible), even if the destination
3726 revlog would not select the same revisions for the delta. This is the
3726 revlog would not select the same revisions for the delta. This is the
3727 fastest mode of operation.
3727 fastest mode of operation.
3728 DELTAREUSESAMEREVS
3728 DELTAREUSESAMEREVS
3729 Deltas will be reused if the destination revlog would pick the same
3729 Deltas will be reused if the destination revlog would pick the same
3730 revisions for the delta. This mode strikes a balance between speed
3730 revisions for the delta. This mode strikes a balance between speed
3731 and optimization.
3731 and optimization.
3732 DELTAREUSENEVER
3732 DELTAREUSENEVER
3733 Deltas will never be reused. This is the slowest mode of execution.
3733 Deltas will never be reused. This is the slowest mode of execution.
3734 This mode can be used to recompute deltas (e.g. if the diff/delta
3734 This mode can be used to recompute deltas (e.g. if the diff/delta
3735 algorithm changes).
3735 algorithm changes).
3736 DELTAREUSEFULLADD
3736 DELTAREUSEFULLADD
3737 Revision will be re-added as if their were new content. This is
3737 Revision will be re-added as if their were new content. This is
3738 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3738 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3739 eg: large file detection and handling.
3739 eg: large file detection and handling.
3740
3740
3741 Delta computation can be slow, so the choice of delta reuse policy can
3741 Delta computation can be slow, so the choice of delta reuse policy can
3742 significantly affect run time.
3742 significantly affect run time.
3743
3743
3744 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3744 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3745 two extremes. Deltas will be reused if they are appropriate. But if the
3745 two extremes. Deltas will be reused if they are appropriate. But if the
3746 delta could choose a better revision, it will do so. This means if you
3746 delta could choose a better revision, it will do so. This means if you
3747 are converting a non-generaldelta revlog to a generaldelta revlog,
3747 are converting a non-generaldelta revlog to a generaldelta revlog,
3748 deltas will be recomputed if the delta's parent isn't a parent of the
3748 deltas will be recomputed if the delta's parent isn't a parent of the
3749 revision.
3749 revision.
3750
3750
3751 In addition to the delta policy, the ``forcedeltabothparents``
3751 In addition to the delta policy, the ``forcedeltabothparents``
3752 argument controls whether to force compute deltas against both parents
3752 argument controls whether to force compute deltas against both parents
3753 for merges. By default, the current default is used.
3753 for merges. By default, the current default is used.
3754
3754
3755 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3755 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3756 `sidedata_helpers`.
3756 `sidedata_helpers`.
3757 """
3757 """
3758 if deltareuse not in self.DELTAREUSEALL:
3758 if deltareuse not in self.DELTAREUSEALL:
3759 raise ValueError(
3759 raise ValueError(
3760 _(b'value for deltareuse invalid: %s') % deltareuse
3760 _(b'value for deltareuse invalid: %s') % deltareuse
3761 )
3761 )
3762
3762
3763 if len(destrevlog):
3763 if len(destrevlog):
3764 raise ValueError(_(b'destination revlog is not empty'))
3764 raise ValueError(_(b'destination revlog is not empty'))
3765
3765
3766 if getattr(self, 'filteredrevs', None):
3766 if getattr(self, 'filteredrevs', None):
3767 raise ValueError(_(b'source revlog has filtered revisions'))
3767 raise ValueError(_(b'source revlog has filtered revisions'))
3768 if getattr(destrevlog, 'filteredrevs', None):
3768 if getattr(destrevlog, 'filteredrevs', None):
3769 raise ValueError(_(b'destination revlog has filtered revisions'))
3769 raise ValueError(_(b'destination revlog has filtered revisions'))
3770
3770
3771 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3771 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3772 # if possible.
3772 # if possible.
3773 old_delta_config = destrevlog.delta_config
3773 old_delta_config = destrevlog.delta_config
3774 destrevlog.delta_config = destrevlog.delta_config.copy()
3774 destrevlog.delta_config = destrevlog.delta_config.copy()
3775
3775
3776 try:
3776 try:
3777 if deltareuse == self.DELTAREUSEALWAYS:
3777 if deltareuse == self.DELTAREUSEALWAYS:
3778 destrevlog.delta_config.lazy_delta_base = True
3778 destrevlog.delta_config.lazy_delta_base = True
3779 destrevlog.delta_config.lazy_delta = True
3779 destrevlog.delta_config.lazy_delta = True
3780 elif deltareuse == self.DELTAREUSESAMEREVS:
3780 elif deltareuse == self.DELTAREUSESAMEREVS:
3781 destrevlog.delta_config.lazy_delta_base = False
3781 destrevlog.delta_config.lazy_delta_base = False
3782 destrevlog.delta_config.lazy_delta = True
3782 destrevlog.delta_config.lazy_delta = True
3783 elif deltareuse == self.DELTAREUSENEVER:
3783 elif deltareuse == self.DELTAREUSENEVER:
3784 destrevlog.delta_config.lazy_delta_base = False
3784 destrevlog.delta_config.lazy_delta_base = False
3785 destrevlog.delta_config.lazy_delta = False
3785 destrevlog.delta_config.lazy_delta = False
3786
3786
3787 delta_both_parents = (
3787 delta_both_parents = (
3788 forcedeltabothparents or old_delta_config.delta_both_parents
3788 forcedeltabothparents or old_delta_config.delta_both_parents
3789 )
3789 )
3790 destrevlog.delta_config.delta_both_parents = delta_both_parents
3790 destrevlog.delta_config.delta_both_parents = delta_both_parents
3791
3791
3792 with self.reading(), destrevlog._writing(tr):
3792 with self.reading(), destrevlog._writing(tr):
3793 self._clone(
3793 self._clone(
3794 tr,
3794 tr,
3795 destrevlog,
3795 destrevlog,
3796 addrevisioncb,
3796 addrevisioncb,
3797 deltareuse,
3797 deltareuse,
3798 forcedeltabothparents,
3798 forcedeltabothparents,
3799 sidedata_helpers,
3799 sidedata_helpers,
3800 )
3800 )
3801
3801
3802 finally:
3802 finally:
3803 destrevlog.delta_config = old_delta_config
3803 destrevlog.delta_config = old_delta_config
3804
3804
3805 def _clone(
3805 def _clone(
3806 self,
3806 self,
3807 tr,
3807 tr,
3808 destrevlog,
3808 destrevlog,
3809 addrevisioncb,
3809 addrevisioncb,
3810 deltareuse,
3810 deltareuse,
3811 forcedeltabothparents,
3811 forcedeltabothparents,
3812 sidedata_helpers,
3812 sidedata_helpers,
3813 ):
3813 ):
3814 """perform the core duty of `revlog.clone` after parameter processing"""
3814 """perform the core duty of `revlog.clone` after parameter processing"""
3815 write_debug = None
3815 write_debug = None
3816 if self.delta_config.debug_delta:
3816 if self.delta_config.debug_delta:
3817 write_debug = tr._report
3817 write_debug = tr._report
3818 deltacomputer = deltautil.deltacomputer(
3818 deltacomputer = deltautil.deltacomputer(
3819 destrevlog,
3819 destrevlog,
3820 write_debug=write_debug,
3820 write_debug=write_debug,
3821 )
3821 )
3822 index = self.index
3822 index = self.index
3823 for rev in self:
3823 for rev in self:
3824 entry = index[rev]
3824 entry = index[rev]
3825
3825
3826 # Some classes override linkrev to take filtered revs into
3826 # Some classes override linkrev to take filtered revs into
3827 # account. Use raw entry from index.
3827 # account. Use raw entry from index.
3828 flags = entry[0] & 0xFFFF
3828 flags = entry[0] & 0xFFFF
3829 linkrev = entry[4]
3829 linkrev = entry[4]
3830 p1 = index[entry[5]][7]
3830 p1 = index[entry[5]][7]
3831 p2 = index[entry[6]][7]
3831 p2 = index[entry[6]][7]
3832 node = entry[7]
3832 node = entry[7]
3833
3833
3834 # (Possibly) reuse the delta from the revlog if allowed and
3834 # (Possibly) reuse the delta from the revlog if allowed and
3835 # the revlog chunk is a delta.
3835 # the revlog chunk is a delta.
3836 cachedelta = None
3836 cachedelta = None
3837 rawtext = None
3837 rawtext = None
3838 if deltareuse == self.DELTAREUSEFULLADD:
3838 if deltareuse == self.DELTAREUSEFULLADD:
3839 text = self._revisiondata(rev)
3839 text = self._revisiondata(rev)
3840 sidedata = self.sidedata(rev)
3840 sidedata = self.sidedata(rev)
3841
3841
3842 if sidedata_helpers is not None:
3842 if sidedata_helpers is not None:
3843 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3843 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3844 self, sidedata_helpers, sidedata, rev
3844 self, sidedata_helpers, sidedata, rev
3845 )
3845 )
3846 flags = flags | new_flags[0] & ~new_flags[1]
3846 flags = flags | new_flags[0] & ~new_flags[1]
3847
3847
3848 destrevlog.addrevision(
3848 destrevlog.addrevision(
3849 text,
3849 text,
3850 tr,
3850 tr,
3851 linkrev,
3851 linkrev,
3852 p1,
3852 p1,
3853 p2,
3853 p2,
3854 cachedelta=cachedelta,
3854 cachedelta=cachedelta,
3855 node=node,
3855 node=node,
3856 flags=flags,
3856 flags=flags,
3857 deltacomputer=deltacomputer,
3857 deltacomputer=deltacomputer,
3858 sidedata=sidedata,
3858 sidedata=sidedata,
3859 )
3859 )
3860 else:
3860 else:
3861 if destrevlog.delta_config.lazy_delta:
3861 if destrevlog.delta_config.lazy_delta:
3862 dp = self.deltaparent(rev)
3862 dp = self.deltaparent(rev)
3863 if dp != nullrev:
3863 if dp != nullrev:
3864 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3864 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3865
3865
3866 sidedata = None
3866 sidedata = None
3867 if not cachedelta:
3867 if not cachedelta:
3868 try:
3868 try:
3869 rawtext = self._revisiondata(rev)
3869 rawtext = self._revisiondata(rev)
3870 except error.CensoredNodeError as censored:
3870 except error.CensoredNodeError as censored:
3871 assert flags & REVIDX_ISCENSORED
3871 assert flags & REVIDX_ISCENSORED
3872 rawtext = censored.tombstone
3872 rawtext = censored.tombstone
3873 sidedata = self.sidedata(rev)
3873 sidedata = self.sidedata(rev)
3874 if sidedata is None:
3874 if sidedata is None:
3875 sidedata = self.sidedata(rev)
3875 sidedata = self.sidedata(rev)
3876
3876
3877 if sidedata_helpers is not None:
3877 if sidedata_helpers is not None:
3878 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3878 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3879 self, sidedata_helpers, sidedata, rev
3879 self, sidedata_helpers, sidedata, rev
3880 )
3880 )
3881 flags = flags | new_flags[0] & ~new_flags[1]
3881 flags = flags | new_flags[0] & ~new_flags[1]
3882
3882
3883 destrevlog._addrevision(
3883 destrevlog._addrevision(
3884 node,
3884 node,
3885 rawtext,
3885 rawtext,
3886 tr,
3886 tr,
3887 linkrev,
3887 linkrev,
3888 p1,
3888 p1,
3889 p2,
3889 p2,
3890 flags,
3890 flags,
3891 cachedelta,
3891 cachedelta,
3892 deltacomputer=deltacomputer,
3892 deltacomputer=deltacomputer,
3893 sidedata=sidedata,
3893 sidedata=sidedata,
3894 )
3894 )
3895
3895
3896 if addrevisioncb:
3896 if addrevisioncb:
3897 addrevisioncb(self, rev, node)
3897 addrevisioncb(self, rev, node)
3898
3898
3899 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3899 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3900 if self._format_version == REVLOGV0:
3900 if self._format_version == REVLOGV0:
3901 raise error.RevlogError(
3901 raise error.RevlogError(
3902 _(b'cannot censor with version %d revlogs')
3902 _(b'cannot censor with version %d revlogs')
3903 % self._format_version
3903 % self._format_version
3904 )
3904 )
3905 elif self._format_version == REVLOGV1:
3905 elif self._format_version == REVLOGV1:
3906 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3906 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3907 else:
3907 else:
3908 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3908 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3909
3909
3910 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3910 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3911 """Verifies the integrity of the revlog.
3911 """Verifies the integrity of the revlog.
3912
3912
3913 Yields ``revlogproblem`` instances describing problems that are
3913 Yields ``revlogproblem`` instances describing problems that are
3914 found.
3914 found.
3915 """
3915 """
3916 dd, di = self.checksize()
3916 dd, di = self.checksize()
3917 if dd:
3917 if dd:
3918 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3918 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3919 if di:
3919 if di:
3920 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3920 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3921
3921
3922 version = self._format_version
3922 version = self._format_version
3923
3923
3924 # The verifier tells us what version revlog we should be.
3924 # The verifier tells us what version revlog we should be.
3925 if version != state[b'expectedversion']:
3925 if version != state[b'expectedversion']:
3926 yield revlogproblem(
3926 yield revlogproblem(
3927 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3927 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3928 % (self.display_id, version, state[b'expectedversion'])
3928 % (self.display_id, version, state[b'expectedversion'])
3929 )
3929 )
3930
3930
3931 state[b'skipread'] = set()
3931 state[b'skipread'] = set()
3932 state[b'safe_renamed'] = set()
3932 state[b'safe_renamed'] = set()
3933
3933
3934 for rev in self:
3934 for rev in self:
3935 node = self.node(rev)
3935 node = self.node(rev)
3936
3936
3937 # Verify contents. 4 cases to care about:
3937 # Verify contents. 4 cases to care about:
3938 #
3938 #
3939 # common: the most common case
3939 # common: the most common case
3940 # rename: with a rename
3940 # rename: with a rename
3941 # meta: file content starts with b'\1\n', the metadata
3941 # meta: file content starts with b'\1\n', the metadata
3942 # header defined in filelog.py, but without a rename
3942 # header defined in filelog.py, but without a rename
3943 # ext: content stored externally
3943 # ext: content stored externally
3944 #
3944 #
3945 # More formally, their differences are shown below:
3945 # More formally, their differences are shown below:
3946 #
3946 #
3947 # | common | rename | meta | ext
3947 # | common | rename | meta | ext
3948 # -------------------------------------------------------
3948 # -------------------------------------------------------
3949 # flags() | 0 | 0 | 0 | not 0
3949 # flags() | 0 | 0 | 0 | not 0
3950 # renamed() | False | True | False | ?
3950 # renamed() | False | True | False | ?
3951 # rawtext[0:2]=='\1\n'| False | True | True | ?
3951 # rawtext[0:2]=='\1\n'| False | True | True | ?
3952 #
3952 #
3953 # "rawtext" means the raw text stored in revlog data, which
3953 # "rawtext" means the raw text stored in revlog data, which
3954 # could be retrieved by "rawdata(rev)". "text"
3954 # could be retrieved by "rawdata(rev)". "text"
3955 # mentioned below is "revision(rev)".
3955 # mentioned below is "revision(rev)".
3956 #
3956 #
3957 # There are 3 different lengths stored physically:
3957 # There are 3 different lengths stored physically:
3958 # 1. L1: rawsize, stored in revlog index
3958 # 1. L1: rawsize, stored in revlog index
3959 # 2. L2: len(rawtext), stored in revlog data
3959 # 2. L2: len(rawtext), stored in revlog data
3960 # 3. L3: len(text), stored in revlog data if flags==0, or
3960 # 3. L3: len(text), stored in revlog data if flags==0, or
3961 # possibly somewhere else if flags!=0
3961 # possibly somewhere else if flags!=0
3962 #
3962 #
3963 # L1 should be equal to L2. L3 could be different from them.
3963 # L1 should be equal to L2. L3 could be different from them.
3964 # "text" may or may not affect commit hash depending on flag
3964 # "text" may or may not affect commit hash depending on flag
3965 # processors (see flagutil.addflagprocessor).
3965 # processors (see flagutil.addflagprocessor).
3966 #
3966 #
3967 # | common | rename | meta | ext
3967 # | common | rename | meta | ext
3968 # -------------------------------------------------
3968 # -------------------------------------------------
3969 # rawsize() | L1 | L1 | L1 | L1
3969 # rawsize() | L1 | L1 | L1 | L1
3970 # size() | L1 | L2-LM | L1(*) | L1 (?)
3970 # size() | L1 | L2-LM | L1(*) | L1 (?)
3971 # len(rawtext) | L2 | L2 | L2 | L2
3971 # len(rawtext) | L2 | L2 | L2 | L2
3972 # len(text) | L2 | L2 | L2 | L3
3972 # len(text) | L2 | L2 | L2 | L3
3973 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3973 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3974 #
3974 #
3975 # LM: length of metadata, depending on rawtext
3975 # LM: length of metadata, depending on rawtext
3976 # (*): not ideal, see comment in filelog.size
3976 # (*): not ideal, see comment in filelog.size
3977 # (?): could be "- len(meta)" if the resolved content has
3977 # (?): could be "- len(meta)" if the resolved content has
3978 # rename metadata
3978 # rename metadata
3979 #
3979 #
3980 # Checks needed to be done:
3980 # Checks needed to be done:
3981 # 1. length check: L1 == L2, in all cases.
3981 # 1. length check: L1 == L2, in all cases.
3982 # 2. hash check: depending on flag processor, we may need to
3982 # 2. hash check: depending on flag processor, we may need to
3983 # use either "text" (external), or "rawtext" (in revlog).
3983 # use either "text" (external), or "rawtext" (in revlog).
3984
3984
3985 try:
3985 try:
3986 skipflags = state.get(b'skipflags', 0)
3986 skipflags = state.get(b'skipflags', 0)
3987 if skipflags:
3987 if skipflags:
3988 skipflags &= self.flags(rev)
3988 skipflags &= self.flags(rev)
3989
3989
3990 _verify_revision(self, skipflags, state, node)
3990 _verify_revision(self, skipflags, state, node)
3991
3991
3992 l1 = self.rawsize(rev)
3992 l1 = self.rawsize(rev)
3993 l2 = len(self.rawdata(node))
3993 l2 = len(self.rawdata(node))
3994
3994
3995 if l1 != l2:
3995 if l1 != l2:
3996 yield revlogproblem(
3996 yield revlogproblem(
3997 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3997 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3998 node=node,
3998 node=node,
3999 )
3999 )
4000
4000
4001 except error.CensoredNodeError:
4001 except error.CensoredNodeError:
4002 if state[b'erroroncensored']:
4002 if state[b'erroroncensored']:
4003 yield revlogproblem(
4003 yield revlogproblem(
4004 error=_(b'censored file data'), node=node
4004 error=_(b'censored file data'), node=node
4005 )
4005 )
4006 state[b'skipread'].add(node)
4006 state[b'skipread'].add(node)
4007 except Exception as e:
4007 except Exception as e:
4008 yield revlogproblem(
4008 yield revlogproblem(
4009 error=_(b'unpacking %s: %s')
4009 error=_(b'unpacking %s: %s')
4010 % (short(node), stringutil.forcebytestr(e)),
4010 % (short(node), stringutil.forcebytestr(e)),
4011 node=node,
4011 node=node,
4012 )
4012 )
4013 state[b'skipread'].add(node)
4013 state[b'skipread'].add(node)
4014
4014
4015 def storageinfo(
4015 def storageinfo(
4016 self,
4016 self,
4017 exclusivefiles=False,
4017 exclusivefiles=False,
4018 sharedfiles=False,
4018 sharedfiles=False,
4019 revisionscount=False,
4019 revisionscount=False,
4020 trackedsize=False,
4020 trackedsize=False,
4021 storedsize=False,
4021 storedsize=False,
4022 ):
4022 ):
4023 d = {}
4023 d = {}
4024
4024
4025 if exclusivefiles:
4025 if exclusivefiles:
4026 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4026 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4027 if not self._inline:
4027 if not self._inline:
4028 d[b'exclusivefiles'].append((self.opener, self._datafile))
4028 d[b'exclusivefiles'].append((self.opener, self._datafile))
4029
4029
4030 if sharedfiles:
4030 if sharedfiles:
4031 d[b'sharedfiles'] = []
4031 d[b'sharedfiles'] = []
4032
4032
4033 if revisionscount:
4033 if revisionscount:
4034 d[b'revisionscount'] = len(self)
4034 d[b'revisionscount'] = len(self)
4035
4035
4036 if trackedsize:
4036 if trackedsize:
4037 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4037 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4038
4038
4039 if storedsize:
4039 if storedsize:
4040 d[b'storedsize'] = sum(
4040 d[b'storedsize'] = sum(
4041 self.opener.stat(path).st_size for path in self.files()
4041 self.opener.stat(path).st_size for path in self.files()
4042 )
4042 )
4043
4043
4044 return d
4044 return d
4045
4045
4046 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4046 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4047 if not self.feature_config.has_side_data:
4047 if not self.feature_config.has_side_data:
4048 return
4048 return
4049 # revlog formats with sidedata support does not support inline
4049 # revlog formats with sidedata support does not support inline
4050 assert not self._inline
4050 assert not self._inline
4051 if not helpers[1] and not helpers[2]:
4051 if not helpers[1] and not helpers[2]:
4052 # Nothing to generate or remove
4052 # Nothing to generate or remove
4053 return
4053 return
4054
4054
4055 new_entries = []
4055 new_entries = []
4056 # append the new sidedata
4056 # append the new sidedata
4057 with self._writing(transaction):
4057 with self._writing(transaction):
4058 ifh, dfh, sdfh = self._inner._writinghandles
4058 ifh, dfh, sdfh = self._inner._writinghandles
4059 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4059 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4060
4060
4061 current_offset = sdfh.tell()
4061 current_offset = sdfh.tell()
4062 for rev in range(startrev, endrev + 1):
4062 for rev in range(startrev, endrev + 1):
4063 entry = self.index[rev]
4063 entry = self.index[rev]
4064 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4064 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4065 store=self,
4065 store=self,
4066 sidedata_helpers=helpers,
4066 sidedata_helpers=helpers,
4067 sidedata={},
4067 sidedata={},
4068 rev=rev,
4068 rev=rev,
4069 )
4069 )
4070
4070
4071 serialized_sidedata = sidedatautil.serialize_sidedata(
4071 serialized_sidedata = sidedatautil.serialize_sidedata(
4072 new_sidedata
4072 new_sidedata
4073 )
4073 )
4074
4074
4075 sidedata_compression_mode = COMP_MODE_INLINE
4075 sidedata_compression_mode = COMP_MODE_INLINE
4076 if serialized_sidedata and self.feature_config.has_side_data:
4076 if serialized_sidedata and self.feature_config.has_side_data:
4077 sidedata_compression_mode = COMP_MODE_PLAIN
4077 sidedata_compression_mode = COMP_MODE_PLAIN
4078 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4078 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4079 if (
4079 if (
4080 h != b'u'
4080 h != b'u'
4081 and comp_sidedata[0] != b'\0'
4081 and comp_sidedata[0] != b'\0'
4082 and len(comp_sidedata) < len(serialized_sidedata)
4082 and len(comp_sidedata) < len(serialized_sidedata)
4083 ):
4083 ):
4084 assert not h
4084 assert not h
4085 if (
4085 if (
4086 comp_sidedata[0]
4086 comp_sidedata[0]
4087 == self._docket.default_compression_header
4087 == self._docket.default_compression_header
4088 ):
4088 ):
4089 sidedata_compression_mode = COMP_MODE_DEFAULT
4089 sidedata_compression_mode = COMP_MODE_DEFAULT
4090 serialized_sidedata = comp_sidedata
4090 serialized_sidedata = comp_sidedata
4091 else:
4091 else:
4092 sidedata_compression_mode = COMP_MODE_INLINE
4092 sidedata_compression_mode = COMP_MODE_INLINE
4093 serialized_sidedata = comp_sidedata
4093 serialized_sidedata = comp_sidedata
4094 if entry[8] != 0 or entry[9] != 0:
4094 if entry[8] != 0 or entry[9] != 0:
4095 # rewriting entries that already have sidedata is not
4095 # rewriting entries that already have sidedata is not
4096 # supported yet, because it introduces garbage data in the
4096 # supported yet, because it introduces garbage data in the
4097 # revlog.
4097 # revlog.
4098 msg = b"rewriting existing sidedata is not supported yet"
4098 msg = b"rewriting existing sidedata is not supported yet"
4099 raise error.Abort(msg)
4099 raise error.Abort(msg)
4100
4100
4101 # Apply (potential) flags to add and to remove after running
4101 # Apply (potential) flags to add and to remove after running
4102 # the sidedata helpers
4102 # the sidedata helpers
4103 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4103 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4104 entry_update = (
4104 entry_update = (
4105 current_offset,
4105 current_offset,
4106 len(serialized_sidedata),
4106 len(serialized_sidedata),
4107 new_offset_flags,
4107 new_offset_flags,
4108 sidedata_compression_mode,
4108 sidedata_compression_mode,
4109 )
4109 )
4110
4110
4111 # the sidedata computation might have move the file cursors around
4111 # the sidedata computation might have move the file cursors around
4112 sdfh.seek(current_offset, os.SEEK_SET)
4112 sdfh.seek(current_offset, os.SEEK_SET)
4113 sdfh.write(serialized_sidedata)
4113 sdfh.write(serialized_sidedata)
4114 new_entries.append(entry_update)
4114 new_entries.append(entry_update)
4115 current_offset += len(serialized_sidedata)
4115 current_offset += len(serialized_sidedata)
4116 self._docket.sidedata_end = sdfh.tell()
4116 self._docket.sidedata_end = sdfh.tell()
4117
4117
4118 # rewrite the new index entries
4118 # rewrite the new index entries
4119 ifh.seek(startrev * self.index.entry_size)
4119 ifh.seek(startrev * self.index.entry_size)
4120 for i, e in enumerate(new_entries):
4120 for i, e in enumerate(new_entries):
4121 rev = startrev + i
4121 rev = startrev + i
4122 self.index.replace_sidedata_info(rev, *e)
4122 self.index.replace_sidedata_info(rev, *e)
4123 packed = self.index.entry_binary(rev)
4123 packed = self.index.entry_binary(rev)
4124 if rev == 0 and self._docket is None:
4124 if rev == 0 and self._docket is None:
4125 header = self._format_flags | self._format_version
4125 header = self._format_flags | self._format_version
4126 header = self.index.pack_header(header)
4126 header = self.index.pack_header(header)
4127 packed = header + packed
4127 packed = header + packed
4128 ifh.write(packed)
4128 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now