##// END OF EJS Templates
revlog: drop the df argument to `rawdata`...
marmoute -
r51916:14de1582 default
parent child Browse files
Show More
@@ -1,313 +1,313 b''
1 # filelog.py - file history class for mercurial
1 # filelog.py - file history class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 from .i18n import _
9 from .i18n import _
10 from .node import nullrev
10 from .node import nullrev
11 from . import (
11 from . import (
12 error,
12 error,
13 revlog,
13 revlog,
14 )
14 )
15 from .interfaces import (
15 from .interfaces import (
16 repository,
16 repository,
17 util as interfaceutil,
17 util as interfaceutil,
18 )
18 )
19 from .utils import storageutil
19 from .utils import storageutil
20 from .revlogutils import (
20 from .revlogutils import (
21 constants as revlog_constants,
21 constants as revlog_constants,
22 rewrite,
22 rewrite,
23 )
23 )
24
24
25
25
26 @interfaceutil.implementer(repository.ifilestorage)
26 @interfaceutil.implementer(repository.ifilestorage)
27 class filelog:
27 class filelog:
28 def __init__(self, opener, path, try_split=False):
28 def __init__(self, opener, path, try_split=False):
29 self._revlog = revlog.revlog(
29 self._revlog = revlog.revlog(
30 opener,
30 opener,
31 # XXX should use the unencoded path
31 # XXX should use the unencoded path
32 target=(revlog_constants.KIND_FILELOG, path),
32 target=(revlog_constants.KIND_FILELOG, path),
33 radix=b'/'.join((b'data', path)),
33 radix=b'/'.join((b'data', path)),
34 censorable=True,
34 censorable=True,
35 canonical_parent_order=False, # see comment in revlog.py
35 canonical_parent_order=False, # see comment in revlog.py
36 try_split=try_split,
36 try_split=try_split,
37 )
37 )
38 # Full name of the user visible file, relative to the repository root.
38 # Full name of the user visible file, relative to the repository root.
39 # Used by LFS.
39 # Used by LFS.
40 self._revlog.filename = path
40 self._revlog.filename = path
41 self.nullid = self._revlog.nullid
41 self.nullid = self._revlog.nullid
42 opts = opener.options
42 opts = opener.options
43 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
43 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
44
44
45 def get_revlog(self):
45 def get_revlog(self):
46 """return an actual revlog instance if any
46 """return an actual revlog instance if any
47
47
48 This exist because a lot of code leverage the fact the underlying
48 This exist because a lot of code leverage the fact the underlying
49 storage is a revlog for optimization, so giving simple way to access
49 storage is a revlog for optimization, so giving simple way to access
50 the revlog instance helps such code.
50 the revlog instance helps such code.
51 """
51 """
52 return self._revlog
52 return self._revlog
53
53
54 def __len__(self):
54 def __len__(self):
55 return len(self._revlog)
55 return len(self._revlog)
56
56
57 def __iter__(self):
57 def __iter__(self):
58 return self._revlog.__iter__()
58 return self._revlog.__iter__()
59
59
60 def hasnode(self, node):
60 def hasnode(self, node):
61 if node in (self.nullid, nullrev):
61 if node in (self.nullid, nullrev):
62 return False
62 return False
63
63
64 try:
64 try:
65 self._revlog.rev(node)
65 self._revlog.rev(node)
66 return True
66 return True
67 except (TypeError, ValueError, IndexError, error.LookupError):
67 except (TypeError, ValueError, IndexError, error.LookupError):
68 return False
68 return False
69
69
70 def revs(self, start=0, stop=None):
70 def revs(self, start=0, stop=None):
71 return self._revlog.revs(start=start, stop=stop)
71 return self._revlog.revs(start=start, stop=stop)
72
72
73 def parents(self, node):
73 def parents(self, node):
74 return self._revlog.parents(node)
74 return self._revlog.parents(node)
75
75
76 def parentrevs(self, rev):
76 def parentrevs(self, rev):
77 return self._revlog.parentrevs(rev)
77 return self._revlog.parentrevs(rev)
78
78
79 def rev(self, node):
79 def rev(self, node):
80 return self._revlog.rev(node)
80 return self._revlog.rev(node)
81
81
82 def node(self, rev):
82 def node(self, rev):
83 return self._revlog.node(rev)
83 return self._revlog.node(rev)
84
84
85 def lookup(self, node):
85 def lookup(self, node):
86 return storageutil.fileidlookup(
86 return storageutil.fileidlookup(
87 self._revlog, node, self._revlog.display_id
87 self._revlog, node, self._revlog.display_id
88 )
88 )
89
89
90 def linkrev(self, rev):
90 def linkrev(self, rev):
91 return self._revlog.linkrev(rev)
91 return self._revlog.linkrev(rev)
92
92
93 def commonancestorsheads(self, node1, node2):
93 def commonancestorsheads(self, node1, node2):
94 return self._revlog.commonancestorsheads(node1, node2)
94 return self._revlog.commonancestorsheads(node1, node2)
95
95
96 # Used by dagop.blockdescendants().
96 # Used by dagop.blockdescendants().
97 def descendants(self, revs):
97 def descendants(self, revs):
98 return self._revlog.descendants(revs)
98 return self._revlog.descendants(revs)
99
99
100 def heads(self, start=None, stop=None):
100 def heads(self, start=None, stop=None):
101 return self._revlog.heads(start, stop)
101 return self._revlog.heads(start, stop)
102
102
103 # Used by hgweb, children extension.
103 # Used by hgweb, children extension.
104 def children(self, node):
104 def children(self, node):
105 return self._revlog.children(node)
105 return self._revlog.children(node)
106
106
107 def iscensored(self, rev):
107 def iscensored(self, rev):
108 return self._revlog.iscensored(rev)
108 return self._revlog.iscensored(rev)
109
109
110 def revision(self, node):
110 def revision(self, node):
111 return self._revlog.revision(node)
111 return self._revlog.revision(node)
112
112
113 def rawdata(self, node, _df=None):
113 def rawdata(self, node):
114 return self._revlog.rawdata(node, _df=_df)
114 return self._revlog.rawdata(node)
115
115
116 def emitrevisions(
116 def emitrevisions(
117 self,
117 self,
118 nodes,
118 nodes,
119 nodesorder=None,
119 nodesorder=None,
120 revisiondata=False,
120 revisiondata=False,
121 assumehaveparentrevisions=False,
121 assumehaveparentrevisions=False,
122 deltamode=repository.CG_DELTAMODE_STD,
122 deltamode=repository.CG_DELTAMODE_STD,
123 sidedata_helpers=None,
123 sidedata_helpers=None,
124 debug_info=None,
124 debug_info=None,
125 ):
125 ):
126 return self._revlog.emitrevisions(
126 return self._revlog.emitrevisions(
127 nodes,
127 nodes,
128 nodesorder=nodesorder,
128 nodesorder=nodesorder,
129 revisiondata=revisiondata,
129 revisiondata=revisiondata,
130 assumehaveparentrevisions=assumehaveparentrevisions,
130 assumehaveparentrevisions=assumehaveparentrevisions,
131 deltamode=deltamode,
131 deltamode=deltamode,
132 sidedata_helpers=sidedata_helpers,
132 sidedata_helpers=sidedata_helpers,
133 debug_info=debug_info,
133 debug_info=debug_info,
134 )
134 )
135
135
136 def addrevision(
136 def addrevision(
137 self,
137 self,
138 revisiondata,
138 revisiondata,
139 transaction,
139 transaction,
140 linkrev,
140 linkrev,
141 p1,
141 p1,
142 p2,
142 p2,
143 node=None,
143 node=None,
144 flags=revlog.REVIDX_DEFAULT_FLAGS,
144 flags=revlog.REVIDX_DEFAULT_FLAGS,
145 cachedelta=None,
145 cachedelta=None,
146 ):
146 ):
147 return self._revlog.addrevision(
147 return self._revlog.addrevision(
148 revisiondata,
148 revisiondata,
149 transaction,
149 transaction,
150 linkrev,
150 linkrev,
151 p1,
151 p1,
152 p2,
152 p2,
153 node=node,
153 node=node,
154 flags=flags,
154 flags=flags,
155 cachedelta=cachedelta,
155 cachedelta=cachedelta,
156 )
156 )
157
157
158 def addgroup(
158 def addgroup(
159 self,
159 self,
160 deltas,
160 deltas,
161 linkmapper,
161 linkmapper,
162 transaction,
162 transaction,
163 addrevisioncb=None,
163 addrevisioncb=None,
164 duplicaterevisioncb=None,
164 duplicaterevisioncb=None,
165 maybemissingparents=False,
165 maybemissingparents=False,
166 debug_info=None,
166 debug_info=None,
167 delta_base_reuse_policy=None,
167 delta_base_reuse_policy=None,
168 ):
168 ):
169 if maybemissingparents:
169 if maybemissingparents:
170 raise error.Abort(
170 raise error.Abort(
171 _(
171 _(
172 b'revlog storage does not support missing '
172 b'revlog storage does not support missing '
173 b'parents write mode'
173 b'parents write mode'
174 )
174 )
175 )
175 )
176
176
177 with self._revlog._writing(transaction):
177 with self._revlog._writing(transaction):
178
178
179 if self._fix_issue6528:
179 if self._fix_issue6528:
180 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
180 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
181
181
182 return self._revlog.addgroup(
182 return self._revlog.addgroup(
183 deltas,
183 deltas,
184 linkmapper,
184 linkmapper,
185 transaction,
185 transaction,
186 addrevisioncb=addrevisioncb,
186 addrevisioncb=addrevisioncb,
187 duplicaterevisioncb=duplicaterevisioncb,
187 duplicaterevisioncb=duplicaterevisioncb,
188 debug_info=debug_info,
188 debug_info=debug_info,
189 delta_base_reuse_policy=delta_base_reuse_policy,
189 delta_base_reuse_policy=delta_base_reuse_policy,
190 )
190 )
191
191
192 def getstrippoint(self, minlink):
192 def getstrippoint(self, minlink):
193 return self._revlog.getstrippoint(minlink)
193 return self._revlog.getstrippoint(minlink)
194
194
195 def strip(self, minlink, transaction):
195 def strip(self, minlink, transaction):
196 return self._revlog.strip(minlink, transaction)
196 return self._revlog.strip(minlink, transaction)
197
197
198 def censorrevision(self, tr, node, tombstone=b''):
198 def censorrevision(self, tr, node, tombstone=b''):
199 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
199 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
200
200
201 def files(self):
201 def files(self):
202 return self._revlog.files()
202 return self._revlog.files()
203
203
204 def read(self, node):
204 def read(self, node):
205 return storageutil.filtermetadata(self.revision(node))
205 return storageutil.filtermetadata(self.revision(node))
206
206
207 def add(self, text, meta, transaction, link, p1=None, p2=None):
207 def add(self, text, meta, transaction, link, p1=None, p2=None):
208 if meta or text.startswith(b'\1\n'):
208 if meta or text.startswith(b'\1\n'):
209 text = storageutil.packmeta(meta, text)
209 text = storageutil.packmeta(meta, text)
210 rev = self.addrevision(text, transaction, link, p1, p2)
210 rev = self.addrevision(text, transaction, link, p1, p2)
211 return self.node(rev)
211 return self.node(rev)
212
212
213 def renamed(self, node):
213 def renamed(self, node):
214 return storageutil.filerevisioncopied(self, node)
214 return storageutil.filerevisioncopied(self, node)
215
215
216 def size(self, rev):
216 def size(self, rev):
217 """return the size of a given revision"""
217 """return the size of a given revision"""
218
218
219 # for revisions with renames, we have to go the slow way
219 # for revisions with renames, we have to go the slow way
220 node = self.node(rev)
220 node = self.node(rev)
221 if self.iscensored(rev):
221 if self.iscensored(rev):
222 return 0
222 return 0
223 if self.renamed(node):
223 if self.renamed(node):
224 return len(self.read(node))
224 return len(self.read(node))
225
225
226 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
226 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
227 # XXX See also basefilectx.cmp.
227 # XXX See also basefilectx.cmp.
228 return self._revlog.size(rev)
228 return self._revlog.size(rev)
229
229
230 def cmp(self, node, text):
230 def cmp(self, node, text):
231 """compare text with a given file revision
231 """compare text with a given file revision
232
232
233 returns True if text is different than what is stored.
233 returns True if text is different than what is stored.
234 """
234 """
235 return not storageutil.filedataequivalent(self, node, text)
235 return not storageutil.filedataequivalent(self, node, text)
236
236
237 def verifyintegrity(self, state):
237 def verifyintegrity(self, state):
238 return self._revlog.verifyintegrity(state)
238 return self._revlog.verifyintegrity(state)
239
239
240 def storageinfo(
240 def storageinfo(
241 self,
241 self,
242 exclusivefiles=False,
242 exclusivefiles=False,
243 sharedfiles=False,
243 sharedfiles=False,
244 revisionscount=False,
244 revisionscount=False,
245 trackedsize=False,
245 trackedsize=False,
246 storedsize=False,
246 storedsize=False,
247 ):
247 ):
248 return self._revlog.storageinfo(
248 return self._revlog.storageinfo(
249 exclusivefiles=exclusivefiles,
249 exclusivefiles=exclusivefiles,
250 sharedfiles=sharedfiles,
250 sharedfiles=sharedfiles,
251 revisionscount=revisionscount,
251 revisionscount=revisionscount,
252 trackedsize=trackedsize,
252 trackedsize=trackedsize,
253 storedsize=storedsize,
253 storedsize=storedsize,
254 )
254 )
255
255
256 # Used by repo upgrade.
256 # Used by repo upgrade.
257 def clone(self, tr, destrevlog, **kwargs):
257 def clone(self, tr, destrevlog, **kwargs):
258 if not isinstance(destrevlog, filelog):
258 if not isinstance(destrevlog, filelog):
259 msg = b'expected filelog to clone(), not %r'
259 msg = b'expected filelog to clone(), not %r'
260 msg %= destrevlog
260 msg %= destrevlog
261 raise error.ProgrammingError(msg)
261 raise error.ProgrammingError(msg)
262
262
263 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
263 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
264
264
265
265
266 class narrowfilelog(filelog):
266 class narrowfilelog(filelog):
267 """Filelog variation to be used with narrow stores."""
267 """Filelog variation to be used with narrow stores."""
268
268
269 def __init__(self, opener, path, narrowmatch, try_split=False):
269 def __init__(self, opener, path, narrowmatch, try_split=False):
270 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
270 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
271 self._narrowmatch = narrowmatch
271 self._narrowmatch = narrowmatch
272
272
273 def renamed(self, node):
273 def renamed(self, node):
274 res = super(narrowfilelog, self).renamed(node)
274 res = super(narrowfilelog, self).renamed(node)
275
275
276 # Renames that come from outside the narrowspec are problematic
276 # Renames that come from outside the narrowspec are problematic
277 # because we may lack the base text for the rename. This can result
277 # because we may lack the base text for the rename. This can result
278 # in code attempting to walk the ancestry or compute a diff
278 # in code attempting to walk the ancestry or compute a diff
279 # encountering a missing revision. We address this by silently
279 # encountering a missing revision. We address this by silently
280 # removing rename metadata if the source file is outside the
280 # removing rename metadata if the source file is outside the
281 # narrow spec.
281 # narrow spec.
282 #
282 #
283 # A better solution would be to see if the base revision is available,
283 # A better solution would be to see if the base revision is available,
284 # rather than assuming it isn't.
284 # rather than assuming it isn't.
285 #
285 #
286 # An even better solution would be to teach all consumers of rename
286 # An even better solution would be to teach all consumers of rename
287 # metadata that the base revision may not be available.
287 # metadata that the base revision may not be available.
288 #
288 #
289 # TODO consider better ways of doing this.
289 # TODO consider better ways of doing this.
290 if res and not self._narrowmatch(res[0]):
290 if res and not self._narrowmatch(res[0]):
291 return None
291 return None
292
292
293 return res
293 return res
294
294
295 def size(self, rev):
295 def size(self, rev):
296 # Because we have a custom renamed() that may lie, we need to call
296 # Because we have a custom renamed() that may lie, we need to call
297 # the base renamed() to report accurate results.
297 # the base renamed() to report accurate results.
298 node = self.node(rev)
298 node = self.node(rev)
299 if super(narrowfilelog, self).renamed(node):
299 if super(narrowfilelog, self).renamed(node):
300 return len(self.read(node))
300 return len(self.read(node))
301 else:
301 else:
302 return super(narrowfilelog, self).size(rev)
302 return super(narrowfilelog, self).size(rev)
303
303
304 def cmp(self, node, text):
304 def cmp(self, node, text):
305 # We don't call `super` because narrow parents can be buggy in case of a
305 # We don't call `super` because narrow parents can be buggy in case of a
306 # ambiguous dirstate. Always take the slow path until there is a better
306 # ambiguous dirstate. Always take the slow path until there is a better
307 # fix, see issue6150.
307 # fix, see issue6150.
308
308
309 # Censored files compare against the empty file.
309 # Censored files compare against the empty file.
310 if self.iscensored(self.rev(node)):
310 if self.iscensored(self.rev(node)):
311 return text != b''
311 return text != b''
312
312
313 return self.read(node) != text
313 return self.read(node) != text
@@ -1,2079 +1,2079 b''
1 # repository.py - Interfaces and base classes for repositories and peers.
1 # repository.py - Interfaces and base classes for repositories and peers.
2 # coding: utf-8
2 # coding: utf-8
3 #
3 #
4 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9
9
10 from ..i18n import _
10 from ..i18n import _
11 from .. import error
11 from .. import error
12 from . import util as interfaceutil
12 from . import util as interfaceutil
13
13
14 # Local repository feature string.
14 # Local repository feature string.
15
15
16 # Revlogs are being used for file storage.
16 # Revlogs are being used for file storage.
17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 # The storage part of the repository is shared from an external source.
18 # The storage part of the repository is shared from an external source.
19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 # LFS supported for backing file storage.
20 # LFS supported for backing file storage.
21 REPO_FEATURE_LFS = b'lfs'
21 REPO_FEATURE_LFS = b'lfs'
22 # Repository supports being stream cloned.
22 # Repository supports being stream cloned.
23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 # Repository supports (at least) some sidedata to be stored
24 # Repository supports (at least) some sidedata to be stored
25 REPO_FEATURE_SIDE_DATA = b'side-data'
25 REPO_FEATURE_SIDE_DATA = b'side-data'
26 # Files storage may lack data for all ancestors.
26 # Files storage may lack data for all ancestors.
27 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
27 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
28
28
29 REVISION_FLAG_CENSORED = 1 << 15
29 REVISION_FLAG_CENSORED = 1 << 15
30 REVISION_FLAG_ELLIPSIS = 1 << 14
30 REVISION_FLAG_ELLIPSIS = 1 << 14
31 REVISION_FLAG_EXTSTORED = 1 << 13
31 REVISION_FLAG_EXTSTORED = 1 << 13
32 REVISION_FLAG_HASCOPIESINFO = 1 << 12
32 REVISION_FLAG_HASCOPIESINFO = 1 << 12
33
33
34 REVISION_FLAGS_KNOWN = (
34 REVISION_FLAGS_KNOWN = (
35 REVISION_FLAG_CENSORED
35 REVISION_FLAG_CENSORED
36 | REVISION_FLAG_ELLIPSIS
36 | REVISION_FLAG_ELLIPSIS
37 | REVISION_FLAG_EXTSTORED
37 | REVISION_FLAG_EXTSTORED
38 | REVISION_FLAG_HASCOPIESINFO
38 | REVISION_FLAG_HASCOPIESINFO
39 )
39 )
40
40
41 CG_DELTAMODE_STD = b'default'
41 CG_DELTAMODE_STD = b'default'
42 CG_DELTAMODE_PREV = b'previous'
42 CG_DELTAMODE_PREV = b'previous'
43 CG_DELTAMODE_FULL = b'fulltext'
43 CG_DELTAMODE_FULL = b'fulltext'
44 CG_DELTAMODE_P1 = b'p1'
44 CG_DELTAMODE_P1 = b'p1'
45
45
46
46
47 ## Cache related constants:
47 ## Cache related constants:
48 #
48 #
49 # Used to control which cache should be warmed in a repo.updatecaches(…) call.
49 # Used to control which cache should be warmed in a repo.updatecaches(…) call.
50
50
51 # Warm branchmaps of all known repoview's filter-level
51 # Warm branchmaps of all known repoview's filter-level
52 CACHE_BRANCHMAP_ALL = b"branchmap-all"
52 CACHE_BRANCHMAP_ALL = b"branchmap-all"
53 # Warm branchmaps of repoview's filter-level used by server
53 # Warm branchmaps of repoview's filter-level used by server
54 CACHE_BRANCHMAP_SERVED = b"branchmap-served"
54 CACHE_BRANCHMAP_SERVED = b"branchmap-served"
55 # Warm internal changelog cache (eg: persistent nodemap)
55 # Warm internal changelog cache (eg: persistent nodemap)
56 CACHE_CHANGELOG_CACHE = b"changelog-cache"
56 CACHE_CHANGELOG_CACHE = b"changelog-cache"
57 # Warm full manifest cache
57 # Warm full manifest cache
58 CACHE_FULL_MANIFEST = b"full-manifest"
58 CACHE_FULL_MANIFEST = b"full-manifest"
59 # Warm file-node-tags cache
59 # Warm file-node-tags cache
60 CACHE_FILE_NODE_TAGS = b"file-node-tags"
60 CACHE_FILE_NODE_TAGS = b"file-node-tags"
61 # Warm internal manifestlog cache (eg: persistent nodemap)
61 # Warm internal manifestlog cache (eg: persistent nodemap)
62 CACHE_MANIFESTLOG_CACHE = b"manifestlog-cache"
62 CACHE_MANIFESTLOG_CACHE = b"manifestlog-cache"
63 # Warn rev branch cache
63 # Warn rev branch cache
64 CACHE_REV_BRANCH = b"rev-branch-cache"
64 CACHE_REV_BRANCH = b"rev-branch-cache"
65 # Warm tags' cache for default repoview'
65 # Warm tags' cache for default repoview'
66 CACHE_TAGS_DEFAULT = b"tags-default"
66 CACHE_TAGS_DEFAULT = b"tags-default"
67 # Warm tags' cache for repoview's filter-level used by server
67 # Warm tags' cache for repoview's filter-level used by server
68 CACHE_TAGS_SERVED = b"tags-served"
68 CACHE_TAGS_SERVED = b"tags-served"
69
69
70 # the cache to warm by default after a simple transaction
70 # the cache to warm by default after a simple transaction
71 # (this is a mutable set to let extension update it)
71 # (this is a mutable set to let extension update it)
72 CACHES_DEFAULT = {
72 CACHES_DEFAULT = {
73 CACHE_BRANCHMAP_SERVED,
73 CACHE_BRANCHMAP_SERVED,
74 }
74 }
75
75
76 # the caches to warm when warming all of them
76 # the caches to warm when warming all of them
77 # (this is a mutable set to let extension update it)
77 # (this is a mutable set to let extension update it)
78 CACHES_ALL = {
78 CACHES_ALL = {
79 CACHE_BRANCHMAP_SERVED,
79 CACHE_BRANCHMAP_SERVED,
80 CACHE_BRANCHMAP_ALL,
80 CACHE_BRANCHMAP_ALL,
81 CACHE_CHANGELOG_CACHE,
81 CACHE_CHANGELOG_CACHE,
82 CACHE_FILE_NODE_TAGS,
82 CACHE_FILE_NODE_TAGS,
83 CACHE_FULL_MANIFEST,
83 CACHE_FULL_MANIFEST,
84 CACHE_MANIFESTLOG_CACHE,
84 CACHE_MANIFESTLOG_CACHE,
85 CACHE_TAGS_DEFAULT,
85 CACHE_TAGS_DEFAULT,
86 CACHE_TAGS_SERVED,
86 CACHE_TAGS_SERVED,
87 }
87 }
88
88
89 # the cache to warm by default on simple call
89 # the cache to warm by default on simple call
90 # (this is a mutable set to let extension update it)
90 # (this is a mutable set to let extension update it)
91 CACHES_POST_CLONE = CACHES_ALL.copy()
91 CACHES_POST_CLONE = CACHES_ALL.copy()
92 CACHES_POST_CLONE.discard(CACHE_FILE_NODE_TAGS)
92 CACHES_POST_CLONE.discard(CACHE_FILE_NODE_TAGS)
93
93
94
94
95 class ipeerconnection(interfaceutil.Interface):
95 class ipeerconnection(interfaceutil.Interface):
96 """Represents a "connection" to a repository.
96 """Represents a "connection" to a repository.
97
97
98 This is the base interface for representing a connection to a repository.
98 This is the base interface for representing a connection to a repository.
99 It holds basic properties and methods applicable to all peer types.
99 It holds basic properties and methods applicable to all peer types.
100
100
101 This is not a complete interface definition and should not be used
101 This is not a complete interface definition and should not be used
102 outside of this module.
102 outside of this module.
103 """
103 """
104
104
105 ui = interfaceutil.Attribute("""ui.ui instance""")
105 ui = interfaceutil.Attribute("""ui.ui instance""")
106 path = interfaceutil.Attribute("""a urlutil.path instance or None""")
106 path = interfaceutil.Attribute("""a urlutil.path instance or None""")
107
107
108 def url():
108 def url():
109 """Returns a URL string representing this peer.
109 """Returns a URL string representing this peer.
110
110
111 Currently, implementations expose the raw URL used to construct the
111 Currently, implementations expose the raw URL used to construct the
112 instance. It may contain credentials as part of the URL. The
112 instance. It may contain credentials as part of the URL. The
113 expectations of the value aren't well-defined and this could lead to
113 expectations of the value aren't well-defined and this could lead to
114 data leakage.
114 data leakage.
115
115
116 TODO audit/clean consumers and more clearly define the contents of this
116 TODO audit/clean consumers and more clearly define the contents of this
117 value.
117 value.
118 """
118 """
119
119
120 def local():
120 def local():
121 """Returns a local repository instance.
121 """Returns a local repository instance.
122
122
123 If the peer represents a local repository, returns an object that
123 If the peer represents a local repository, returns an object that
124 can be used to interface with it. Otherwise returns ``None``.
124 can be used to interface with it. Otherwise returns ``None``.
125 """
125 """
126
126
127 def canpush():
127 def canpush():
128 """Returns a boolean indicating if this peer can be pushed to."""
128 """Returns a boolean indicating if this peer can be pushed to."""
129
129
130 def close():
130 def close():
131 """Close the connection to this peer.
131 """Close the connection to this peer.
132
132
133 This is called when the peer will no longer be used. Resources
133 This is called when the peer will no longer be used. Resources
134 associated with the peer should be cleaned up.
134 associated with the peer should be cleaned up.
135 """
135 """
136
136
137
137
138 class ipeercapabilities(interfaceutil.Interface):
138 class ipeercapabilities(interfaceutil.Interface):
139 """Peer sub-interface related to capabilities."""
139 """Peer sub-interface related to capabilities."""
140
140
141 def capable(name):
141 def capable(name):
142 """Determine support for a named capability.
142 """Determine support for a named capability.
143
143
144 Returns ``False`` if capability not supported.
144 Returns ``False`` if capability not supported.
145
145
146 Returns ``True`` if boolean capability is supported. Returns a string
146 Returns ``True`` if boolean capability is supported. Returns a string
147 if capability support is non-boolean.
147 if capability support is non-boolean.
148
148
149 Capability strings may or may not map to wire protocol capabilities.
149 Capability strings may or may not map to wire protocol capabilities.
150 """
150 """
151
151
152 def requirecap(name, purpose):
152 def requirecap(name, purpose):
153 """Require a capability to be present.
153 """Require a capability to be present.
154
154
155 Raises a ``CapabilityError`` if the capability isn't present.
155 Raises a ``CapabilityError`` if the capability isn't present.
156 """
156 """
157
157
158
158
159 class ipeercommands(interfaceutil.Interface):
159 class ipeercommands(interfaceutil.Interface):
160 """Client-side interface for communicating over the wire protocol.
160 """Client-side interface for communicating over the wire protocol.
161
161
162 This interface is used as a gateway to the Mercurial wire protocol.
162 This interface is used as a gateway to the Mercurial wire protocol.
163 methods commonly call wire protocol commands of the same name.
163 methods commonly call wire protocol commands of the same name.
164 """
164 """
165
165
166 def branchmap():
166 def branchmap():
167 """Obtain heads in named branches.
167 """Obtain heads in named branches.
168
168
169 Returns a dict mapping branch name to an iterable of nodes that are
169 Returns a dict mapping branch name to an iterable of nodes that are
170 heads on that branch.
170 heads on that branch.
171 """
171 """
172
172
173 def capabilities():
173 def capabilities():
174 """Obtain capabilities of the peer.
174 """Obtain capabilities of the peer.
175
175
176 Returns a set of string capabilities.
176 Returns a set of string capabilities.
177 """
177 """
178
178
179 def get_cached_bundle_inline(path):
179 def get_cached_bundle_inline(path):
180 """Retrieve a clonebundle across the wire.
180 """Retrieve a clonebundle across the wire.
181
181
182 Returns a chunkbuffer
182 Returns a chunkbuffer
183 """
183 """
184
184
185 def clonebundles():
185 def clonebundles():
186 """Obtains the clone bundles manifest for the repo.
186 """Obtains the clone bundles manifest for the repo.
187
187
188 Returns the manifest as unparsed bytes.
188 Returns the manifest as unparsed bytes.
189 """
189 """
190
190
191 def debugwireargs(one, two, three=None, four=None, five=None):
191 def debugwireargs(one, two, three=None, four=None, five=None):
192 """Used to facilitate debugging of arguments passed over the wire."""
192 """Used to facilitate debugging of arguments passed over the wire."""
193
193
194 def getbundle(source, **kwargs):
194 def getbundle(source, **kwargs):
195 """Obtain remote repository data as a bundle.
195 """Obtain remote repository data as a bundle.
196
196
197 This command is how the bulk of repository data is transferred from
197 This command is how the bulk of repository data is transferred from
198 the peer to the local repository
198 the peer to the local repository
199
199
200 Returns a generator of bundle data.
200 Returns a generator of bundle data.
201 """
201 """
202
202
203 def heads():
203 def heads():
204 """Determine all known head revisions in the peer.
204 """Determine all known head revisions in the peer.
205
205
206 Returns an iterable of binary nodes.
206 Returns an iterable of binary nodes.
207 """
207 """
208
208
209 def known(nodes):
209 def known(nodes):
210 """Determine whether multiple nodes are known.
210 """Determine whether multiple nodes are known.
211
211
212 Accepts an iterable of nodes whose presence to check for.
212 Accepts an iterable of nodes whose presence to check for.
213
213
214 Returns an iterable of booleans indicating of the corresponding node
214 Returns an iterable of booleans indicating of the corresponding node
215 at that index is known to the peer.
215 at that index is known to the peer.
216 """
216 """
217
217
218 def listkeys(namespace):
218 def listkeys(namespace):
219 """Obtain all keys in a pushkey namespace.
219 """Obtain all keys in a pushkey namespace.
220
220
221 Returns an iterable of key names.
221 Returns an iterable of key names.
222 """
222 """
223
223
224 def lookup(key):
224 def lookup(key):
225 """Resolve a value to a known revision.
225 """Resolve a value to a known revision.
226
226
227 Returns a binary node of the resolved revision on success.
227 Returns a binary node of the resolved revision on success.
228 """
228 """
229
229
230 def pushkey(namespace, key, old, new):
230 def pushkey(namespace, key, old, new):
231 """Set a value using the ``pushkey`` protocol.
231 """Set a value using the ``pushkey`` protocol.
232
232
233 Arguments correspond to the pushkey namespace and key to operate on and
233 Arguments correspond to the pushkey namespace and key to operate on and
234 the old and new values for that key.
234 the old and new values for that key.
235
235
236 Returns a string with the peer result. The value inside varies by the
236 Returns a string with the peer result. The value inside varies by the
237 namespace.
237 namespace.
238 """
238 """
239
239
240 def stream_out():
240 def stream_out():
241 """Obtain streaming clone data.
241 """Obtain streaming clone data.
242
242
243 Successful result should be a generator of data chunks.
243 Successful result should be a generator of data chunks.
244 """
244 """
245
245
246 def unbundle(bundle, heads, url):
246 def unbundle(bundle, heads, url):
247 """Transfer repository data to the peer.
247 """Transfer repository data to the peer.
248
248
249 This is how the bulk of data during a push is transferred.
249 This is how the bulk of data during a push is transferred.
250
250
251 Returns the integer number of heads added to the peer.
251 Returns the integer number of heads added to the peer.
252 """
252 """
253
253
254
254
255 class ipeerlegacycommands(interfaceutil.Interface):
255 class ipeerlegacycommands(interfaceutil.Interface):
256 """Interface for implementing support for legacy wire protocol commands.
256 """Interface for implementing support for legacy wire protocol commands.
257
257
258 Wire protocol commands transition to legacy status when they are no longer
258 Wire protocol commands transition to legacy status when they are no longer
259 used by modern clients. To facilitate identifying which commands are
259 used by modern clients. To facilitate identifying which commands are
260 legacy, the interfaces are split.
260 legacy, the interfaces are split.
261 """
261 """
262
262
263 def between(pairs):
263 def between(pairs):
264 """Obtain nodes between pairs of nodes.
264 """Obtain nodes between pairs of nodes.
265
265
266 ``pairs`` is an iterable of node pairs.
266 ``pairs`` is an iterable of node pairs.
267
267
268 Returns an iterable of iterables of nodes corresponding to each
268 Returns an iterable of iterables of nodes corresponding to each
269 requested pair.
269 requested pair.
270 """
270 """
271
271
272 def branches(nodes):
272 def branches(nodes):
273 """Obtain ancestor changesets of specific nodes back to a branch point.
273 """Obtain ancestor changesets of specific nodes back to a branch point.
274
274
275 For each requested node, the peer finds the first ancestor node that is
275 For each requested node, the peer finds the first ancestor node that is
276 a DAG root or is a merge.
276 a DAG root or is a merge.
277
277
278 Returns an iterable of iterables with the resolved values for each node.
278 Returns an iterable of iterables with the resolved values for each node.
279 """
279 """
280
280
281 def changegroup(nodes, source):
281 def changegroup(nodes, source):
282 """Obtain a changegroup with data for descendants of specified nodes."""
282 """Obtain a changegroup with data for descendants of specified nodes."""
283
283
284 def changegroupsubset(bases, heads, source):
284 def changegroupsubset(bases, heads, source):
285 pass
285 pass
286
286
287
287
288 class ipeercommandexecutor(interfaceutil.Interface):
288 class ipeercommandexecutor(interfaceutil.Interface):
289 """Represents a mechanism to execute remote commands.
289 """Represents a mechanism to execute remote commands.
290
290
291 This is the primary interface for requesting that wire protocol commands
291 This is the primary interface for requesting that wire protocol commands
292 be executed. Instances of this interface are active in a context manager
292 be executed. Instances of this interface are active in a context manager
293 and have a well-defined lifetime. When the context manager exits, all
293 and have a well-defined lifetime. When the context manager exits, all
294 outstanding requests are waited on.
294 outstanding requests are waited on.
295 """
295 """
296
296
297 def callcommand(name, args):
297 def callcommand(name, args):
298 """Request that a named command be executed.
298 """Request that a named command be executed.
299
299
300 Receives the command name and a dictionary of command arguments.
300 Receives the command name and a dictionary of command arguments.
301
301
302 Returns a ``concurrent.futures.Future`` that will resolve to the
302 Returns a ``concurrent.futures.Future`` that will resolve to the
303 result of that command request. That exact value is left up to
303 result of that command request. That exact value is left up to
304 the implementation and possibly varies by command.
304 the implementation and possibly varies by command.
305
305
306 Not all commands can coexist with other commands in an executor
306 Not all commands can coexist with other commands in an executor
307 instance: it depends on the underlying wire protocol transport being
307 instance: it depends on the underlying wire protocol transport being
308 used and the command itself.
308 used and the command itself.
309
309
310 Implementations MAY call ``sendcommands()`` automatically if the
310 Implementations MAY call ``sendcommands()`` automatically if the
311 requested command can not coexist with other commands in this executor.
311 requested command can not coexist with other commands in this executor.
312
312
313 Implementations MAY call ``sendcommands()`` automatically when the
313 Implementations MAY call ``sendcommands()`` automatically when the
314 future's ``result()`` is called. So, consumers using multiple
314 future's ``result()`` is called. So, consumers using multiple
315 commands with an executor MUST ensure that ``result()`` is not called
315 commands with an executor MUST ensure that ``result()`` is not called
316 until all command requests have been issued.
316 until all command requests have been issued.
317 """
317 """
318
318
319 def sendcommands():
319 def sendcommands():
320 """Trigger submission of queued command requests.
320 """Trigger submission of queued command requests.
321
321
322 Not all transports submit commands as soon as they are requested to
322 Not all transports submit commands as soon as they are requested to
323 run. When called, this method forces queued command requests to be
323 run. When called, this method forces queued command requests to be
324 issued. It will no-op if all commands have already been sent.
324 issued. It will no-op if all commands have already been sent.
325
325
326 When called, no more new commands may be issued with this executor.
326 When called, no more new commands may be issued with this executor.
327 """
327 """
328
328
329 def close():
329 def close():
330 """Signal that this command request is finished.
330 """Signal that this command request is finished.
331
331
332 When called, no more new commands may be issued. All outstanding
332 When called, no more new commands may be issued. All outstanding
333 commands that have previously been issued are waited on before
333 commands that have previously been issued are waited on before
334 returning. This not only includes waiting for the futures to resolve,
334 returning. This not only includes waiting for the futures to resolve,
335 but also waiting for all response data to arrive. In other words,
335 but also waiting for all response data to arrive. In other words,
336 calling this waits for all on-wire state for issued command requests
336 calling this waits for all on-wire state for issued command requests
337 to finish.
337 to finish.
338
338
339 When used as a context manager, this method is called when exiting the
339 When used as a context manager, this method is called when exiting the
340 context manager.
340 context manager.
341
341
342 This method may call ``sendcommands()`` if there are buffered commands.
342 This method may call ``sendcommands()`` if there are buffered commands.
343 """
343 """
344
344
345
345
346 class ipeerrequests(interfaceutil.Interface):
346 class ipeerrequests(interfaceutil.Interface):
347 """Interface for executing commands on a peer."""
347 """Interface for executing commands on a peer."""
348
348
349 limitedarguments = interfaceutil.Attribute(
349 limitedarguments = interfaceutil.Attribute(
350 """True if the peer cannot receive large argument value for commands."""
350 """True if the peer cannot receive large argument value for commands."""
351 )
351 )
352
352
353 def commandexecutor():
353 def commandexecutor():
354 """A context manager that resolves to an ipeercommandexecutor.
354 """A context manager that resolves to an ipeercommandexecutor.
355
355
356 The object this resolves to can be used to issue command requests
356 The object this resolves to can be used to issue command requests
357 to the peer.
357 to the peer.
358
358
359 Callers should call its ``callcommand`` method to issue command
359 Callers should call its ``callcommand`` method to issue command
360 requests.
360 requests.
361
361
362 A new executor should be obtained for each distinct set of commands
362 A new executor should be obtained for each distinct set of commands
363 (possibly just a single command) that the consumer wants to execute
363 (possibly just a single command) that the consumer wants to execute
364 as part of a single operation or round trip. This is because some
364 as part of a single operation or round trip. This is because some
365 peers are half-duplex and/or don't support persistent connections.
365 peers are half-duplex and/or don't support persistent connections.
366 e.g. in the case of HTTP peers, commands sent to an executor represent
366 e.g. in the case of HTTP peers, commands sent to an executor represent
367 a single HTTP request. While some peers may support multiple command
367 a single HTTP request. While some peers may support multiple command
368 sends over the wire per executor, consumers need to code to the least
368 sends over the wire per executor, consumers need to code to the least
369 capable peer. So it should be assumed that command executors buffer
369 capable peer. So it should be assumed that command executors buffer
370 called commands until they are told to send them and that each
370 called commands until they are told to send them and that each
371 command executor could result in a new connection or wire-level request
371 command executor could result in a new connection or wire-level request
372 being issued.
372 being issued.
373 """
373 """
374
374
375
375
376 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
376 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
377 """Unified interface for peer repositories.
377 """Unified interface for peer repositories.
378
378
379 All peer instances must conform to this interface.
379 All peer instances must conform to this interface.
380 """
380 """
381
381
382
382
383 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
383 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
384 """Unified peer interface for wire protocol version 2 peers."""
384 """Unified peer interface for wire protocol version 2 peers."""
385
385
386 apidescriptor = interfaceutil.Attribute(
386 apidescriptor = interfaceutil.Attribute(
387 """Data structure holding description of server API."""
387 """Data structure holding description of server API."""
388 )
388 )
389
389
390
390
391 @interfaceutil.implementer(ipeerbase)
391 @interfaceutil.implementer(ipeerbase)
392 class peer:
392 class peer:
393 """Base class for peer repositories."""
393 """Base class for peer repositories."""
394
394
395 limitedarguments = False
395 limitedarguments = False
396
396
397 def __init__(self, ui, path=None, remotehidden=False):
397 def __init__(self, ui, path=None, remotehidden=False):
398 self.ui = ui
398 self.ui = ui
399 self.path = path
399 self.path = path
400
400
401 def capable(self, name):
401 def capable(self, name):
402 caps = self.capabilities()
402 caps = self.capabilities()
403 if name in caps:
403 if name in caps:
404 return True
404 return True
405
405
406 name = b'%s=' % name
406 name = b'%s=' % name
407 for cap in caps:
407 for cap in caps:
408 if cap.startswith(name):
408 if cap.startswith(name):
409 return cap[len(name) :]
409 return cap[len(name) :]
410
410
411 return False
411 return False
412
412
413 def requirecap(self, name, purpose):
413 def requirecap(self, name, purpose):
414 if self.capable(name):
414 if self.capable(name):
415 return
415 return
416
416
417 raise error.CapabilityError(
417 raise error.CapabilityError(
418 _(
418 _(
419 b'cannot %s; remote repository does not support the '
419 b'cannot %s; remote repository does not support the '
420 b'\'%s\' capability'
420 b'\'%s\' capability'
421 )
421 )
422 % (purpose, name)
422 % (purpose, name)
423 )
423 )
424
424
425
425
426 class iverifyproblem(interfaceutil.Interface):
426 class iverifyproblem(interfaceutil.Interface):
427 """Represents a problem with the integrity of the repository.
427 """Represents a problem with the integrity of the repository.
428
428
429 Instances of this interface are emitted to describe an integrity issue
429 Instances of this interface are emitted to describe an integrity issue
430 with a repository (e.g. corrupt storage, missing data, etc).
430 with a repository (e.g. corrupt storage, missing data, etc).
431
431
432 Instances are essentially messages associated with severity.
432 Instances are essentially messages associated with severity.
433 """
433 """
434
434
435 warning = interfaceutil.Attribute(
435 warning = interfaceutil.Attribute(
436 """Message indicating a non-fatal problem."""
436 """Message indicating a non-fatal problem."""
437 )
437 )
438
438
439 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
439 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
440
440
441 node = interfaceutil.Attribute(
441 node = interfaceutil.Attribute(
442 """Revision encountering the problem.
442 """Revision encountering the problem.
443
443
444 ``None`` means the problem doesn't apply to a single revision.
444 ``None`` means the problem doesn't apply to a single revision.
445 """
445 """
446 )
446 )
447
447
448
448
449 class irevisiondelta(interfaceutil.Interface):
449 class irevisiondelta(interfaceutil.Interface):
450 """Represents a delta between one revision and another.
450 """Represents a delta between one revision and another.
451
451
452 Instances convey enough information to allow a revision to be exchanged
452 Instances convey enough information to allow a revision to be exchanged
453 with another repository.
453 with another repository.
454
454
455 Instances represent the fulltext revision data or a delta against
455 Instances represent the fulltext revision data or a delta against
456 another revision. Therefore the ``revision`` and ``delta`` attributes
456 another revision. Therefore the ``revision`` and ``delta`` attributes
457 are mutually exclusive.
457 are mutually exclusive.
458
458
459 Typically used for changegroup generation.
459 Typically used for changegroup generation.
460 """
460 """
461
461
462 node = interfaceutil.Attribute("""20 byte node of this revision.""")
462 node = interfaceutil.Attribute("""20 byte node of this revision.""")
463
463
464 p1node = interfaceutil.Attribute(
464 p1node = interfaceutil.Attribute(
465 """20 byte node of 1st parent of this revision."""
465 """20 byte node of 1st parent of this revision."""
466 )
466 )
467
467
468 p2node = interfaceutil.Attribute(
468 p2node = interfaceutil.Attribute(
469 """20 byte node of 2nd parent of this revision."""
469 """20 byte node of 2nd parent of this revision."""
470 )
470 )
471
471
472 linknode = interfaceutil.Attribute(
472 linknode = interfaceutil.Attribute(
473 """20 byte node of the changelog revision this node is linked to."""
473 """20 byte node of the changelog revision this node is linked to."""
474 )
474 )
475
475
476 flags = interfaceutil.Attribute(
476 flags = interfaceutil.Attribute(
477 """2 bytes of integer flags that apply to this revision.
477 """2 bytes of integer flags that apply to this revision.
478
478
479 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
479 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
480 """
480 """
481 )
481 )
482
482
483 basenode = interfaceutil.Attribute(
483 basenode = interfaceutil.Attribute(
484 """20 byte node of the revision this data is a delta against.
484 """20 byte node of the revision this data is a delta against.
485
485
486 ``nullid`` indicates that the revision is a full revision and not
486 ``nullid`` indicates that the revision is a full revision and not
487 a delta.
487 a delta.
488 """
488 """
489 )
489 )
490
490
491 baserevisionsize = interfaceutil.Attribute(
491 baserevisionsize = interfaceutil.Attribute(
492 """Size of base revision this delta is against.
492 """Size of base revision this delta is against.
493
493
494 May be ``None`` if ``basenode`` is ``nullid``.
494 May be ``None`` if ``basenode`` is ``nullid``.
495 """
495 """
496 )
496 )
497
497
498 revision = interfaceutil.Attribute(
498 revision = interfaceutil.Attribute(
499 """Raw fulltext of revision data for this node."""
499 """Raw fulltext of revision data for this node."""
500 )
500 )
501
501
502 delta = interfaceutil.Attribute(
502 delta = interfaceutil.Attribute(
503 """Delta between ``basenode`` and ``node``.
503 """Delta between ``basenode`` and ``node``.
504
504
505 Stored in the bdiff delta format.
505 Stored in the bdiff delta format.
506 """
506 """
507 )
507 )
508
508
509 sidedata = interfaceutil.Attribute(
509 sidedata = interfaceutil.Attribute(
510 """Raw sidedata bytes for the given revision."""
510 """Raw sidedata bytes for the given revision."""
511 )
511 )
512
512
513 protocol_flags = interfaceutil.Attribute(
513 protocol_flags = interfaceutil.Attribute(
514 """Single byte of integer flags that can influence the protocol.
514 """Single byte of integer flags that can influence the protocol.
515
515
516 This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
516 This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
517 """
517 """
518 )
518 )
519
519
520
520
521 class ifilerevisionssequence(interfaceutil.Interface):
521 class ifilerevisionssequence(interfaceutil.Interface):
522 """Contains index data for all revisions of a file.
522 """Contains index data for all revisions of a file.
523
523
524 Types implementing this behave like lists of tuples. The index
524 Types implementing this behave like lists of tuples. The index
525 in the list corresponds to the revision number. The values contain
525 in the list corresponds to the revision number. The values contain
526 index metadata.
526 index metadata.
527
527
528 The *null* revision (revision number -1) is always the last item
528 The *null* revision (revision number -1) is always the last item
529 in the index.
529 in the index.
530 """
530 """
531
531
532 def __len__():
532 def __len__():
533 """The total number of revisions."""
533 """The total number of revisions."""
534
534
535 def __getitem__(rev):
535 def __getitem__(rev):
536 """Returns the object having a specific revision number.
536 """Returns the object having a specific revision number.
537
537
538 Returns an 8-tuple with the following fields:
538 Returns an 8-tuple with the following fields:
539
539
540 offset+flags
540 offset+flags
541 Contains the offset and flags for the revision. 64-bit unsigned
541 Contains the offset and flags for the revision. 64-bit unsigned
542 integer where first 6 bytes are the offset and the next 2 bytes
542 integer where first 6 bytes are the offset and the next 2 bytes
543 are flags. The offset can be 0 if it is not used by the store.
543 are flags. The offset can be 0 if it is not used by the store.
544 compressed size
544 compressed size
545 Size of the revision data in the store. It can be 0 if it isn't
545 Size of the revision data in the store. It can be 0 if it isn't
546 needed by the store.
546 needed by the store.
547 uncompressed size
547 uncompressed size
548 Fulltext size. It can be 0 if it isn't needed by the store.
548 Fulltext size. It can be 0 if it isn't needed by the store.
549 base revision
549 base revision
550 Revision number of revision the delta for storage is encoded
550 Revision number of revision the delta for storage is encoded
551 against. -1 indicates not encoded against a base revision.
551 against. -1 indicates not encoded against a base revision.
552 link revision
552 link revision
553 Revision number of changelog revision this entry is related to.
553 Revision number of changelog revision this entry is related to.
554 p1 revision
554 p1 revision
555 Revision number of 1st parent. -1 if no 1st parent.
555 Revision number of 1st parent. -1 if no 1st parent.
556 p2 revision
556 p2 revision
557 Revision number of 2nd parent. -1 if no 1st parent.
557 Revision number of 2nd parent. -1 if no 1st parent.
558 node
558 node
559 Binary node value for this revision number.
559 Binary node value for this revision number.
560
560
561 Negative values should index off the end of the sequence. ``-1``
561 Negative values should index off the end of the sequence. ``-1``
562 should return the null revision. ``-2`` should return the most
562 should return the null revision. ``-2`` should return the most
563 recent revision.
563 recent revision.
564 """
564 """
565
565
566 def __contains__(rev):
566 def __contains__(rev):
567 """Whether a revision number exists."""
567 """Whether a revision number exists."""
568
568
569 def insert(self, i, entry):
569 def insert(self, i, entry):
570 """Add an item to the index at specific revision."""
570 """Add an item to the index at specific revision."""
571
571
572
572
573 class ifileindex(interfaceutil.Interface):
573 class ifileindex(interfaceutil.Interface):
574 """Storage interface for index data of a single file.
574 """Storage interface for index data of a single file.
575
575
576 File storage data is divided into index metadata and data storage.
576 File storage data is divided into index metadata and data storage.
577 This interface defines the index portion of the interface.
577 This interface defines the index portion of the interface.
578
578
579 The index logically consists of:
579 The index logically consists of:
580
580
581 * A mapping between revision numbers and nodes.
581 * A mapping between revision numbers and nodes.
582 * DAG data (storing and querying the relationship between nodes).
582 * DAG data (storing and querying the relationship between nodes).
583 * Metadata to facilitate storage.
583 * Metadata to facilitate storage.
584 """
584 """
585
585
586 nullid = interfaceutil.Attribute(
586 nullid = interfaceutil.Attribute(
587 """node for the null revision for use as delta base."""
587 """node for the null revision for use as delta base."""
588 )
588 )
589
589
590 def __len__():
590 def __len__():
591 """Obtain the number of revisions stored for this file."""
591 """Obtain the number of revisions stored for this file."""
592
592
593 def __iter__():
593 def __iter__():
594 """Iterate over revision numbers for this file."""
594 """Iterate over revision numbers for this file."""
595
595
596 def hasnode(node):
596 def hasnode(node):
597 """Returns a bool indicating if a node is known to this store.
597 """Returns a bool indicating if a node is known to this store.
598
598
599 Implementations must only return True for full, binary node values:
599 Implementations must only return True for full, binary node values:
600 hex nodes, revision numbers, and partial node matches must be
600 hex nodes, revision numbers, and partial node matches must be
601 rejected.
601 rejected.
602
602
603 The null node is never present.
603 The null node is never present.
604 """
604 """
605
605
606 def revs(start=0, stop=None):
606 def revs(start=0, stop=None):
607 """Iterate over revision numbers for this file, with control."""
607 """Iterate over revision numbers for this file, with control."""
608
608
609 def parents(node):
609 def parents(node):
610 """Returns a 2-tuple of parent nodes for a revision.
610 """Returns a 2-tuple of parent nodes for a revision.
611
611
612 Values will be ``nullid`` if the parent is empty.
612 Values will be ``nullid`` if the parent is empty.
613 """
613 """
614
614
615 def parentrevs(rev):
615 def parentrevs(rev):
616 """Like parents() but operates on revision numbers."""
616 """Like parents() but operates on revision numbers."""
617
617
618 def rev(node):
618 def rev(node):
619 """Obtain the revision number given a node.
619 """Obtain the revision number given a node.
620
620
621 Raises ``error.LookupError`` if the node is not known.
621 Raises ``error.LookupError`` if the node is not known.
622 """
622 """
623
623
624 def node(rev):
624 def node(rev):
625 """Obtain the node value given a revision number.
625 """Obtain the node value given a revision number.
626
626
627 Raises ``IndexError`` if the node is not known.
627 Raises ``IndexError`` if the node is not known.
628 """
628 """
629
629
630 def lookup(node):
630 def lookup(node):
631 """Attempt to resolve a value to a node.
631 """Attempt to resolve a value to a node.
632
632
633 Value can be a binary node, hex node, revision number, or a string
633 Value can be a binary node, hex node, revision number, or a string
634 that can be converted to an integer.
634 that can be converted to an integer.
635
635
636 Raises ``error.LookupError`` if a node could not be resolved.
636 Raises ``error.LookupError`` if a node could not be resolved.
637 """
637 """
638
638
639 def linkrev(rev):
639 def linkrev(rev):
640 """Obtain the changeset revision number a revision is linked to."""
640 """Obtain the changeset revision number a revision is linked to."""
641
641
642 def iscensored(rev):
642 def iscensored(rev):
643 """Return whether a revision's content has been censored."""
643 """Return whether a revision's content has been censored."""
644
644
645 def commonancestorsheads(node1, node2):
645 def commonancestorsheads(node1, node2):
646 """Obtain an iterable of nodes containing heads of common ancestors.
646 """Obtain an iterable of nodes containing heads of common ancestors.
647
647
648 See ``ancestor.commonancestorsheads()``.
648 See ``ancestor.commonancestorsheads()``.
649 """
649 """
650
650
651 def descendants(revs):
651 def descendants(revs):
652 """Obtain descendant revision numbers for a set of revision numbers.
652 """Obtain descendant revision numbers for a set of revision numbers.
653
653
654 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
654 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
655 """
655 """
656
656
657 def heads(start=None, stop=None):
657 def heads(start=None, stop=None):
658 """Obtain a list of nodes that are DAG heads, with control.
658 """Obtain a list of nodes that are DAG heads, with control.
659
659
660 The set of revisions examined can be limited by specifying
660 The set of revisions examined can be limited by specifying
661 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
661 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
662 iterable of nodes. DAG traversal starts at earlier revision
662 iterable of nodes. DAG traversal starts at earlier revision
663 ``start`` and iterates forward until any node in ``stop`` is
663 ``start`` and iterates forward until any node in ``stop`` is
664 encountered.
664 encountered.
665 """
665 """
666
666
667 def children(node):
667 def children(node):
668 """Obtain nodes that are children of a node.
668 """Obtain nodes that are children of a node.
669
669
670 Returns a list of nodes.
670 Returns a list of nodes.
671 """
671 """
672
672
673
673
674 class ifiledata(interfaceutil.Interface):
674 class ifiledata(interfaceutil.Interface):
675 """Storage interface for data storage of a specific file.
675 """Storage interface for data storage of a specific file.
676
676
677 This complements ``ifileindex`` and provides an interface for accessing
677 This complements ``ifileindex`` and provides an interface for accessing
678 data for a tracked file.
678 data for a tracked file.
679 """
679 """
680
680
681 def size(rev):
681 def size(rev):
682 """Obtain the fulltext size of file data.
682 """Obtain the fulltext size of file data.
683
683
684 Any metadata is excluded from size measurements.
684 Any metadata is excluded from size measurements.
685 """
685 """
686
686
687 def revision(node):
687 def revision(node):
688 """Obtain fulltext data for a node.
688 """Obtain fulltext data for a node.
689
689
690 By default, any storage transformations are applied before the data
690 By default, any storage transformations are applied before the data
691 is returned. If ``raw`` is True, non-raw storage transformations
691 is returned. If ``raw`` is True, non-raw storage transformations
692 are not applied.
692 are not applied.
693
693
694 The fulltext data may contain a header containing metadata. Most
694 The fulltext data may contain a header containing metadata. Most
695 consumers should use ``read()`` to obtain the actual file data.
695 consumers should use ``read()`` to obtain the actual file data.
696 """
696 """
697
697
698 def rawdata(node):
698 def rawdata(node):
699 """Obtain raw data for a node."""
699 """Obtain raw data for a node."""
700
700
701 def read(node):
701 def read(node):
702 """Resolve file fulltext data.
702 """Resolve file fulltext data.
703
703
704 This is similar to ``revision()`` except any metadata in the data
704 This is similar to ``revision()`` except any metadata in the data
705 headers is stripped.
705 headers is stripped.
706 """
706 """
707
707
708 def renamed(node):
708 def renamed(node):
709 """Obtain copy metadata for a node.
709 """Obtain copy metadata for a node.
710
710
711 Returns ``False`` if no copy metadata is stored or a 2-tuple of
711 Returns ``False`` if no copy metadata is stored or a 2-tuple of
712 (path, node) from which this revision was copied.
712 (path, node) from which this revision was copied.
713 """
713 """
714
714
715 def cmp(node, fulltext):
715 def cmp(node, fulltext):
716 """Compare fulltext to another revision.
716 """Compare fulltext to another revision.
717
717
718 Returns True if the fulltext is different from what is stored.
718 Returns True if the fulltext is different from what is stored.
719
719
720 This takes copy metadata into account.
720 This takes copy metadata into account.
721
721
722 TODO better document the copy metadata and censoring logic.
722 TODO better document the copy metadata and censoring logic.
723 """
723 """
724
724
725 def emitrevisions(
725 def emitrevisions(
726 nodes,
726 nodes,
727 nodesorder=None,
727 nodesorder=None,
728 revisiondata=False,
728 revisiondata=False,
729 assumehaveparentrevisions=False,
729 assumehaveparentrevisions=False,
730 deltamode=CG_DELTAMODE_STD,
730 deltamode=CG_DELTAMODE_STD,
731 ):
731 ):
732 """Produce ``irevisiondelta`` for revisions.
732 """Produce ``irevisiondelta`` for revisions.
733
733
734 Given an iterable of nodes, emits objects conforming to the
734 Given an iterable of nodes, emits objects conforming to the
735 ``irevisiondelta`` interface that describe revisions in storage.
735 ``irevisiondelta`` interface that describe revisions in storage.
736
736
737 This method is a generator.
737 This method is a generator.
738
738
739 The input nodes may be unordered. Implementations must ensure that a
739 The input nodes may be unordered. Implementations must ensure that a
740 node's parents are emitted before the node itself. Transitively, this
740 node's parents are emitted before the node itself. Transitively, this
741 means that a node may only be emitted once all its ancestors in
741 means that a node may only be emitted once all its ancestors in
742 ``nodes`` have also been emitted.
742 ``nodes`` have also been emitted.
743
743
744 By default, emits "index" data (the ``node``, ``p1node``, and
744 By default, emits "index" data (the ``node``, ``p1node``, and
745 ``p2node`` attributes). If ``revisiondata`` is set, revision data
745 ``p2node`` attributes). If ``revisiondata`` is set, revision data
746 will also be present on the emitted objects.
746 will also be present on the emitted objects.
747
747
748 With default argument values, implementations can choose to emit
748 With default argument values, implementations can choose to emit
749 either fulltext revision data or a delta. When emitting deltas,
749 either fulltext revision data or a delta. When emitting deltas,
750 implementations must consider whether the delta's base revision
750 implementations must consider whether the delta's base revision
751 fulltext is available to the receiver.
751 fulltext is available to the receiver.
752
752
753 The base revision fulltext is guaranteed to be available if any of
753 The base revision fulltext is guaranteed to be available if any of
754 the following are met:
754 the following are met:
755
755
756 * Its fulltext revision was emitted by this method call.
756 * Its fulltext revision was emitted by this method call.
757 * A delta for that revision was emitted by this method call.
757 * A delta for that revision was emitted by this method call.
758 * ``assumehaveparentrevisions`` is True and the base revision is a
758 * ``assumehaveparentrevisions`` is True and the base revision is a
759 parent of the node.
759 parent of the node.
760
760
761 ``nodesorder`` can be used to control the order that revisions are
761 ``nodesorder`` can be used to control the order that revisions are
762 emitted. By default, revisions can be reordered as long as they are
762 emitted. By default, revisions can be reordered as long as they are
763 in DAG topological order (see above). If the value is ``nodes``,
763 in DAG topological order (see above). If the value is ``nodes``,
764 the iteration order from ``nodes`` should be used. If the value is
764 the iteration order from ``nodes`` should be used. If the value is
765 ``storage``, then the native order from the backing storage layer
765 ``storage``, then the native order from the backing storage layer
766 is used. (Not all storage layers will have strong ordering and behavior
766 is used. (Not all storage layers will have strong ordering and behavior
767 of this mode is storage-dependent.) ``nodes`` ordering can force
767 of this mode is storage-dependent.) ``nodes`` ordering can force
768 revisions to be emitted before their ancestors, so consumers should
768 revisions to be emitted before their ancestors, so consumers should
769 use it with care.
769 use it with care.
770
770
771 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
771 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
772 be set and it is the caller's responsibility to resolve it, if needed.
772 be set and it is the caller's responsibility to resolve it, if needed.
773
773
774 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
774 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
775 all revision data should be emitted as deltas against the revision
775 all revision data should be emitted as deltas against the revision
776 emitted just prior. The initial revision should be a delta against its
776 emitted just prior. The initial revision should be a delta against its
777 1st parent.
777 1st parent.
778 """
778 """
779
779
780
780
781 class ifilemutation(interfaceutil.Interface):
781 class ifilemutation(interfaceutil.Interface):
782 """Storage interface for mutation events of a tracked file."""
782 """Storage interface for mutation events of a tracked file."""
783
783
784 def add(filedata, meta, transaction, linkrev, p1, p2):
784 def add(filedata, meta, transaction, linkrev, p1, p2):
785 """Add a new revision to the store.
785 """Add a new revision to the store.
786
786
787 Takes file data, dictionary of metadata, a transaction, linkrev,
787 Takes file data, dictionary of metadata, a transaction, linkrev,
788 and parent nodes.
788 and parent nodes.
789
789
790 Returns the node that was added.
790 Returns the node that was added.
791
791
792 May no-op if a revision matching the supplied data is already stored.
792 May no-op if a revision matching the supplied data is already stored.
793 """
793 """
794
794
795 def addrevision(
795 def addrevision(
796 revisiondata,
796 revisiondata,
797 transaction,
797 transaction,
798 linkrev,
798 linkrev,
799 p1,
799 p1,
800 p2,
800 p2,
801 node=None,
801 node=None,
802 flags=0,
802 flags=0,
803 cachedelta=None,
803 cachedelta=None,
804 ):
804 ):
805 """Add a new revision to the store and return its number.
805 """Add a new revision to the store and return its number.
806
806
807 This is similar to ``add()`` except it operates at a lower level.
807 This is similar to ``add()`` except it operates at a lower level.
808
808
809 The data passed in already contains a metadata header, if any.
809 The data passed in already contains a metadata header, if any.
810
810
811 ``node`` and ``flags`` can be used to define the expected node and
811 ``node`` and ``flags`` can be used to define the expected node and
812 the flags to use with storage. ``flags`` is a bitwise value composed
812 the flags to use with storage. ``flags`` is a bitwise value composed
813 of the various ``REVISION_FLAG_*`` constants.
813 of the various ``REVISION_FLAG_*`` constants.
814
814
815 ``add()`` is usually called when adding files from e.g. the working
815 ``add()`` is usually called when adding files from e.g. the working
816 directory. ``addrevision()`` is often called by ``add()`` and for
816 directory. ``addrevision()`` is often called by ``add()`` and for
817 scenarios where revision data has already been computed, such as when
817 scenarios where revision data has already been computed, such as when
818 applying raw data from a peer repo.
818 applying raw data from a peer repo.
819 """
819 """
820
820
821 def addgroup(
821 def addgroup(
822 deltas,
822 deltas,
823 linkmapper,
823 linkmapper,
824 transaction,
824 transaction,
825 addrevisioncb=None,
825 addrevisioncb=None,
826 duplicaterevisioncb=None,
826 duplicaterevisioncb=None,
827 maybemissingparents=False,
827 maybemissingparents=False,
828 ):
828 ):
829 """Process a series of deltas for storage.
829 """Process a series of deltas for storage.
830
830
831 ``deltas`` is an iterable of 7-tuples of
831 ``deltas`` is an iterable of 7-tuples of
832 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
832 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
833 to add.
833 to add.
834
834
835 The ``delta`` field contains ``mpatch`` data to apply to a base
835 The ``delta`` field contains ``mpatch`` data to apply to a base
836 revision, identified by ``deltabase``. The base node can be
836 revision, identified by ``deltabase``. The base node can be
837 ``nullid``, in which case the header from the delta can be ignored
837 ``nullid``, in which case the header from the delta can be ignored
838 and the delta used as the fulltext.
838 and the delta used as the fulltext.
839
839
840 ``alwayscache`` instructs the lower layers to cache the content of the
840 ``alwayscache`` instructs the lower layers to cache the content of the
841 newly added revision, even if it needs to be explicitly computed.
841 newly added revision, even if it needs to be explicitly computed.
842 This used to be the default when ``addrevisioncb`` was provided up to
842 This used to be the default when ``addrevisioncb`` was provided up to
843 Mercurial 5.8.
843 Mercurial 5.8.
844
844
845 ``addrevisioncb`` should be called for each new rev as it is committed.
845 ``addrevisioncb`` should be called for each new rev as it is committed.
846 ``duplicaterevisioncb`` should be called for all revs with a
846 ``duplicaterevisioncb`` should be called for all revs with a
847 pre-existing node.
847 pre-existing node.
848
848
849 ``maybemissingparents`` is a bool indicating whether the incoming
849 ``maybemissingparents`` is a bool indicating whether the incoming
850 data may reference parents/ancestor revisions that aren't present.
850 data may reference parents/ancestor revisions that aren't present.
851 This flag is set when receiving data into a "shallow" store that
851 This flag is set when receiving data into a "shallow" store that
852 doesn't hold all history.
852 doesn't hold all history.
853
853
854 Returns a list of nodes that were processed. A node will be in the list
854 Returns a list of nodes that were processed. A node will be in the list
855 even if it existed in the store previously.
855 even if it existed in the store previously.
856 """
856 """
857
857
858 def censorrevision(tr, node, tombstone=b''):
858 def censorrevision(tr, node, tombstone=b''):
859 """Remove the content of a single revision.
859 """Remove the content of a single revision.
860
860
861 The specified ``node`` will have its content purged from storage.
861 The specified ``node`` will have its content purged from storage.
862 Future attempts to access the revision data for this node will
862 Future attempts to access the revision data for this node will
863 result in failure.
863 result in failure.
864
864
865 A ``tombstone`` message can optionally be stored. This message may be
865 A ``tombstone`` message can optionally be stored. This message may be
866 displayed to users when they attempt to access the missing revision
866 displayed to users when they attempt to access the missing revision
867 data.
867 data.
868
868
869 Storage backends may have stored deltas against the previous content
869 Storage backends may have stored deltas against the previous content
870 in this revision. As part of censoring a revision, these storage
870 in this revision. As part of censoring a revision, these storage
871 backends are expected to rewrite any internally stored deltas such
871 backends are expected to rewrite any internally stored deltas such
872 that they no longer reference the deleted content.
872 that they no longer reference the deleted content.
873 """
873 """
874
874
875 def getstrippoint(minlink):
875 def getstrippoint(minlink):
876 """Find the minimum revision that must be stripped to strip a linkrev.
876 """Find the minimum revision that must be stripped to strip a linkrev.
877
877
878 Returns a 2-tuple containing the minimum revision number and a set
878 Returns a 2-tuple containing the minimum revision number and a set
879 of all revisions numbers that would be broken by this strip.
879 of all revisions numbers that would be broken by this strip.
880
880
881 TODO this is highly revlog centric and should be abstracted into
881 TODO this is highly revlog centric and should be abstracted into
882 a higher-level deletion API. ``repair.strip()`` relies on this.
882 a higher-level deletion API. ``repair.strip()`` relies on this.
883 """
883 """
884
884
885 def strip(minlink, transaction):
885 def strip(minlink, transaction):
886 """Remove storage of items starting at a linkrev.
886 """Remove storage of items starting at a linkrev.
887
887
888 This uses ``getstrippoint()`` to determine the first node to remove.
888 This uses ``getstrippoint()`` to determine the first node to remove.
889 Then it effectively truncates storage for all revisions after that.
889 Then it effectively truncates storage for all revisions after that.
890
890
891 TODO this is highly revlog centric and should be abstracted into a
891 TODO this is highly revlog centric and should be abstracted into a
892 higher-level deletion API.
892 higher-level deletion API.
893 """
893 """
894
894
895
895
896 class ifilestorage(ifileindex, ifiledata, ifilemutation):
896 class ifilestorage(ifileindex, ifiledata, ifilemutation):
897 """Complete storage interface for a single tracked file."""
897 """Complete storage interface for a single tracked file."""
898
898
899 def files():
899 def files():
900 """Obtain paths that are backing storage for this file.
900 """Obtain paths that are backing storage for this file.
901
901
902 TODO this is used heavily by verify code and there should probably
902 TODO this is used heavily by verify code and there should probably
903 be a better API for that.
903 be a better API for that.
904 """
904 """
905
905
906 def storageinfo(
906 def storageinfo(
907 exclusivefiles=False,
907 exclusivefiles=False,
908 sharedfiles=False,
908 sharedfiles=False,
909 revisionscount=False,
909 revisionscount=False,
910 trackedsize=False,
910 trackedsize=False,
911 storedsize=False,
911 storedsize=False,
912 ):
912 ):
913 """Obtain information about storage for this file's data.
913 """Obtain information about storage for this file's data.
914
914
915 Returns a dict describing storage for this tracked path. The keys
915 Returns a dict describing storage for this tracked path. The keys
916 in the dict map to arguments of the same. The arguments are bools
916 in the dict map to arguments of the same. The arguments are bools
917 indicating whether to calculate and obtain that data.
917 indicating whether to calculate and obtain that data.
918
918
919 exclusivefiles
919 exclusivefiles
920 Iterable of (vfs, path) describing files that are exclusively
920 Iterable of (vfs, path) describing files that are exclusively
921 used to back storage for this tracked path.
921 used to back storage for this tracked path.
922
922
923 sharedfiles
923 sharedfiles
924 Iterable of (vfs, path) describing files that are used to back
924 Iterable of (vfs, path) describing files that are used to back
925 storage for this tracked path. Those files may also provide storage
925 storage for this tracked path. Those files may also provide storage
926 for other stored entities.
926 for other stored entities.
927
927
928 revisionscount
928 revisionscount
929 Number of revisions available for retrieval.
929 Number of revisions available for retrieval.
930
930
931 trackedsize
931 trackedsize
932 Total size in bytes of all tracked revisions. This is a sum of the
932 Total size in bytes of all tracked revisions. This is a sum of the
933 length of the fulltext of all revisions.
933 length of the fulltext of all revisions.
934
934
935 storedsize
935 storedsize
936 Total size in bytes used to store data for all tracked revisions.
936 Total size in bytes used to store data for all tracked revisions.
937 This is commonly less than ``trackedsize`` due to internal usage
937 This is commonly less than ``trackedsize`` due to internal usage
938 of deltas rather than fulltext revisions.
938 of deltas rather than fulltext revisions.
939
939
940 Not all storage backends may support all queries are have a reasonable
940 Not all storage backends may support all queries are have a reasonable
941 value to use. In that case, the value should be set to ``None`` and
941 value to use. In that case, the value should be set to ``None`` and
942 callers are expected to handle this special value.
942 callers are expected to handle this special value.
943 """
943 """
944
944
945 def verifyintegrity(state):
945 def verifyintegrity(state):
946 """Verifies the integrity of file storage.
946 """Verifies the integrity of file storage.
947
947
948 ``state`` is a dict holding state of the verifier process. It can be
948 ``state`` is a dict holding state of the verifier process. It can be
949 used to communicate data between invocations of multiple storage
949 used to communicate data between invocations of multiple storage
950 primitives.
950 primitives.
951
951
952 If individual revisions cannot have their revision content resolved,
952 If individual revisions cannot have their revision content resolved,
953 the method is expected to set the ``skipread`` key to a set of nodes
953 the method is expected to set the ``skipread`` key to a set of nodes
954 that encountered problems. If set, the method can also add the node(s)
954 that encountered problems. If set, the method can also add the node(s)
955 to ``safe_renamed`` in order to indicate nodes that may perform the
955 to ``safe_renamed`` in order to indicate nodes that may perform the
956 rename checks with currently accessible data.
956 rename checks with currently accessible data.
957
957
958 The method yields objects conforming to the ``iverifyproblem``
958 The method yields objects conforming to the ``iverifyproblem``
959 interface.
959 interface.
960 """
960 """
961
961
962
962
963 class idirs(interfaceutil.Interface):
963 class idirs(interfaceutil.Interface):
964 """Interface representing a collection of directories from paths.
964 """Interface representing a collection of directories from paths.
965
965
966 This interface is essentially a derived data structure representing
966 This interface is essentially a derived data structure representing
967 directories from a collection of paths.
967 directories from a collection of paths.
968 """
968 """
969
969
970 def addpath(path):
970 def addpath(path):
971 """Add a path to the collection.
971 """Add a path to the collection.
972
972
973 All directories in the path will be added to the collection.
973 All directories in the path will be added to the collection.
974 """
974 """
975
975
976 def delpath(path):
976 def delpath(path):
977 """Remove a path from the collection.
977 """Remove a path from the collection.
978
978
979 If the removal was the last path in a particular directory, the
979 If the removal was the last path in a particular directory, the
980 directory is removed from the collection.
980 directory is removed from the collection.
981 """
981 """
982
982
983 def __iter__():
983 def __iter__():
984 """Iterate over the directories in this collection of paths."""
984 """Iterate over the directories in this collection of paths."""
985
985
986 def __contains__(path):
986 def __contains__(path):
987 """Whether a specific directory is in this collection."""
987 """Whether a specific directory is in this collection."""
988
988
989
989
990 class imanifestdict(interfaceutil.Interface):
990 class imanifestdict(interfaceutil.Interface):
991 """Interface representing a manifest data structure.
991 """Interface representing a manifest data structure.
992
992
993 A manifest is effectively a dict mapping paths to entries. Each entry
993 A manifest is effectively a dict mapping paths to entries. Each entry
994 consists of a binary node and extra flags affecting that entry.
994 consists of a binary node and extra flags affecting that entry.
995 """
995 """
996
996
997 def __getitem__(path):
997 def __getitem__(path):
998 """Returns the binary node value for a path in the manifest.
998 """Returns the binary node value for a path in the manifest.
999
999
1000 Raises ``KeyError`` if the path does not exist in the manifest.
1000 Raises ``KeyError`` if the path does not exist in the manifest.
1001
1001
1002 Equivalent to ``self.find(path)[0]``.
1002 Equivalent to ``self.find(path)[0]``.
1003 """
1003 """
1004
1004
1005 def find(path):
1005 def find(path):
1006 """Returns the entry for a path in the manifest.
1006 """Returns the entry for a path in the manifest.
1007
1007
1008 Returns a 2-tuple of (node, flags).
1008 Returns a 2-tuple of (node, flags).
1009
1009
1010 Raises ``KeyError`` if the path does not exist in the manifest.
1010 Raises ``KeyError`` if the path does not exist in the manifest.
1011 """
1011 """
1012
1012
1013 def __len__():
1013 def __len__():
1014 """Return the number of entries in the manifest."""
1014 """Return the number of entries in the manifest."""
1015
1015
1016 def __nonzero__():
1016 def __nonzero__():
1017 """Returns True if the manifest has entries, False otherwise."""
1017 """Returns True if the manifest has entries, False otherwise."""
1018
1018
1019 __bool__ = __nonzero__
1019 __bool__ = __nonzero__
1020
1020
1021 def __setitem__(path, node):
1021 def __setitem__(path, node):
1022 """Define the node value for a path in the manifest.
1022 """Define the node value for a path in the manifest.
1023
1023
1024 If the path is already in the manifest, its flags will be copied to
1024 If the path is already in the manifest, its flags will be copied to
1025 the new entry.
1025 the new entry.
1026 """
1026 """
1027
1027
1028 def __contains__(path):
1028 def __contains__(path):
1029 """Whether a path exists in the manifest."""
1029 """Whether a path exists in the manifest."""
1030
1030
1031 def __delitem__(path):
1031 def __delitem__(path):
1032 """Remove a path from the manifest.
1032 """Remove a path from the manifest.
1033
1033
1034 Raises ``KeyError`` if the path is not in the manifest.
1034 Raises ``KeyError`` if the path is not in the manifest.
1035 """
1035 """
1036
1036
1037 def __iter__():
1037 def __iter__():
1038 """Iterate over paths in the manifest."""
1038 """Iterate over paths in the manifest."""
1039
1039
1040 def iterkeys():
1040 def iterkeys():
1041 """Iterate over paths in the manifest."""
1041 """Iterate over paths in the manifest."""
1042
1042
1043 def keys():
1043 def keys():
1044 """Obtain a list of paths in the manifest."""
1044 """Obtain a list of paths in the manifest."""
1045
1045
1046 def filesnotin(other, match=None):
1046 def filesnotin(other, match=None):
1047 """Obtain the set of paths in this manifest but not in another.
1047 """Obtain the set of paths in this manifest but not in another.
1048
1048
1049 ``match`` is an optional matcher function to be applied to both
1049 ``match`` is an optional matcher function to be applied to both
1050 manifests.
1050 manifests.
1051
1051
1052 Returns a set of paths.
1052 Returns a set of paths.
1053 """
1053 """
1054
1054
1055 def dirs():
1055 def dirs():
1056 """Returns an object implementing the ``idirs`` interface."""
1056 """Returns an object implementing the ``idirs`` interface."""
1057
1057
1058 def hasdir(dir):
1058 def hasdir(dir):
1059 """Returns a bool indicating if a directory is in this manifest."""
1059 """Returns a bool indicating if a directory is in this manifest."""
1060
1060
1061 def walk(match):
1061 def walk(match):
1062 """Generator of paths in manifest satisfying a matcher.
1062 """Generator of paths in manifest satisfying a matcher.
1063
1063
1064 If the matcher has explicit files listed and they don't exist in
1064 If the matcher has explicit files listed and they don't exist in
1065 the manifest, ``match.bad()`` is called for each missing file.
1065 the manifest, ``match.bad()`` is called for each missing file.
1066 """
1066 """
1067
1067
1068 def diff(other, match=None, clean=False):
1068 def diff(other, match=None, clean=False):
1069 """Find differences between this manifest and another.
1069 """Find differences between this manifest and another.
1070
1070
1071 This manifest is compared to ``other``.
1071 This manifest is compared to ``other``.
1072
1072
1073 If ``match`` is provided, the two manifests are filtered against this
1073 If ``match`` is provided, the two manifests are filtered against this
1074 matcher and only entries satisfying the matcher are compared.
1074 matcher and only entries satisfying the matcher are compared.
1075
1075
1076 If ``clean`` is True, unchanged files are included in the returned
1076 If ``clean`` is True, unchanged files are included in the returned
1077 object.
1077 object.
1078
1078
1079 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1079 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1080 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1080 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1081 represents the node and flags for this manifest and ``(node2, flag2)``
1081 represents the node and flags for this manifest and ``(node2, flag2)``
1082 are the same for the other manifest.
1082 are the same for the other manifest.
1083 """
1083 """
1084
1084
1085 def setflag(path, flag):
1085 def setflag(path, flag):
1086 """Set the flag value for a given path.
1086 """Set the flag value for a given path.
1087
1087
1088 Raises ``KeyError`` if the path is not already in the manifest.
1088 Raises ``KeyError`` if the path is not already in the manifest.
1089 """
1089 """
1090
1090
1091 def get(path, default=None):
1091 def get(path, default=None):
1092 """Obtain the node value for a path or a default value if missing."""
1092 """Obtain the node value for a path or a default value if missing."""
1093
1093
1094 def flags(path):
1094 def flags(path):
1095 """Return the flags value for a path (default: empty bytestring)."""
1095 """Return the flags value for a path (default: empty bytestring)."""
1096
1096
1097 def copy():
1097 def copy():
1098 """Return a copy of this manifest."""
1098 """Return a copy of this manifest."""
1099
1099
1100 def items():
1100 def items():
1101 """Returns an iterable of (path, node) for items in this manifest."""
1101 """Returns an iterable of (path, node) for items in this manifest."""
1102
1102
1103 def iteritems():
1103 def iteritems():
1104 """Identical to items()."""
1104 """Identical to items()."""
1105
1105
1106 def iterentries():
1106 def iterentries():
1107 """Returns an iterable of (path, node, flags) for this manifest.
1107 """Returns an iterable of (path, node, flags) for this manifest.
1108
1108
1109 Similar to ``iteritems()`` except items are a 3-tuple and include
1109 Similar to ``iteritems()`` except items are a 3-tuple and include
1110 flags.
1110 flags.
1111 """
1111 """
1112
1112
1113 def text():
1113 def text():
1114 """Obtain the raw data representation for this manifest.
1114 """Obtain the raw data representation for this manifest.
1115
1115
1116 Result is used to create a manifest revision.
1116 Result is used to create a manifest revision.
1117 """
1117 """
1118
1118
1119 def fastdelta(base, changes):
1119 def fastdelta(base, changes):
1120 """Obtain a delta between this manifest and another given changes.
1120 """Obtain a delta between this manifest and another given changes.
1121
1121
1122 ``base`` in the raw data representation for another manifest.
1122 ``base`` in the raw data representation for another manifest.
1123
1123
1124 ``changes`` is an iterable of ``(path, to_delete)``.
1124 ``changes`` is an iterable of ``(path, to_delete)``.
1125
1125
1126 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1126 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1127 delta between ``base`` and this manifest.
1127 delta between ``base`` and this manifest.
1128
1128
1129 If this manifest implementation can't support ``fastdelta()``,
1129 If this manifest implementation can't support ``fastdelta()``,
1130 raise ``mercurial.manifest.FastdeltaUnavailable``.
1130 raise ``mercurial.manifest.FastdeltaUnavailable``.
1131 """
1131 """
1132
1132
1133
1133
1134 class imanifestrevisionbase(interfaceutil.Interface):
1134 class imanifestrevisionbase(interfaceutil.Interface):
1135 """Base interface representing a single revision of a manifest.
1135 """Base interface representing a single revision of a manifest.
1136
1136
1137 Should not be used as a primary interface: should always be inherited
1137 Should not be used as a primary interface: should always be inherited
1138 as part of a larger interface.
1138 as part of a larger interface.
1139 """
1139 """
1140
1140
1141 def copy():
1141 def copy():
1142 """Obtain a copy of this manifest instance.
1142 """Obtain a copy of this manifest instance.
1143
1143
1144 Returns an object conforming to the ``imanifestrevisionwritable``
1144 Returns an object conforming to the ``imanifestrevisionwritable``
1145 interface. The instance will be associated with the same
1145 interface. The instance will be associated with the same
1146 ``imanifestlog`` collection as this instance.
1146 ``imanifestlog`` collection as this instance.
1147 """
1147 """
1148
1148
1149 def read():
1149 def read():
1150 """Obtain the parsed manifest data structure.
1150 """Obtain the parsed manifest data structure.
1151
1151
1152 The returned object conforms to the ``imanifestdict`` interface.
1152 The returned object conforms to the ``imanifestdict`` interface.
1153 """
1153 """
1154
1154
1155
1155
1156 class imanifestrevisionstored(imanifestrevisionbase):
1156 class imanifestrevisionstored(imanifestrevisionbase):
1157 """Interface representing a manifest revision committed to storage."""
1157 """Interface representing a manifest revision committed to storage."""
1158
1158
1159 def node():
1159 def node():
1160 """The binary node for this manifest."""
1160 """The binary node for this manifest."""
1161
1161
1162 parents = interfaceutil.Attribute(
1162 parents = interfaceutil.Attribute(
1163 """List of binary nodes that are parents for this manifest revision."""
1163 """List of binary nodes that are parents for this manifest revision."""
1164 )
1164 )
1165
1165
1166 def readdelta(shallow=False):
1166 def readdelta(shallow=False):
1167 """Obtain the manifest data structure representing changes from parent.
1167 """Obtain the manifest data structure representing changes from parent.
1168
1168
1169 This manifest is compared to its 1st parent. A new manifest representing
1169 This manifest is compared to its 1st parent. A new manifest representing
1170 those differences is constructed.
1170 those differences is constructed.
1171
1171
1172 The returned object conforms to the ``imanifestdict`` interface.
1172 The returned object conforms to the ``imanifestdict`` interface.
1173 """
1173 """
1174
1174
1175 def readfast(shallow=False):
1175 def readfast(shallow=False):
1176 """Calls either ``read()`` or ``readdelta()``.
1176 """Calls either ``read()`` or ``readdelta()``.
1177
1177
1178 The faster of the two options is called.
1178 The faster of the two options is called.
1179 """
1179 """
1180
1180
1181 def find(key):
1181 def find(key):
1182 """Calls self.read().find(key)``.
1182 """Calls self.read().find(key)``.
1183
1183
1184 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1184 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1185 """
1185 """
1186
1186
1187
1187
1188 class imanifestrevisionwritable(imanifestrevisionbase):
1188 class imanifestrevisionwritable(imanifestrevisionbase):
1189 """Interface representing a manifest revision that can be committed."""
1189 """Interface representing a manifest revision that can be committed."""
1190
1190
1191 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1191 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1192 """Add this revision to storage.
1192 """Add this revision to storage.
1193
1193
1194 Takes a transaction object, the changeset revision number it will
1194 Takes a transaction object, the changeset revision number it will
1195 be associated with, its parent nodes, and lists of added and
1195 be associated with, its parent nodes, and lists of added and
1196 removed paths.
1196 removed paths.
1197
1197
1198 If match is provided, storage can choose not to inspect or write out
1198 If match is provided, storage can choose not to inspect or write out
1199 items that do not match. Storage is still required to be able to provide
1199 items that do not match. Storage is still required to be able to provide
1200 the full manifest in the future for any directories written (these
1200 the full manifest in the future for any directories written (these
1201 manifests should not be "narrowed on disk").
1201 manifests should not be "narrowed on disk").
1202
1202
1203 Returns the binary node of the created revision.
1203 Returns the binary node of the created revision.
1204 """
1204 """
1205
1205
1206
1206
1207 class imanifeststorage(interfaceutil.Interface):
1207 class imanifeststorage(interfaceutil.Interface):
1208 """Storage interface for manifest data."""
1208 """Storage interface for manifest data."""
1209
1209
1210 nodeconstants = interfaceutil.Attribute(
1210 nodeconstants = interfaceutil.Attribute(
1211 """nodeconstants used by the current repository."""
1211 """nodeconstants used by the current repository."""
1212 )
1212 )
1213
1213
1214 tree = interfaceutil.Attribute(
1214 tree = interfaceutil.Attribute(
1215 """The path to the directory this manifest tracks.
1215 """The path to the directory this manifest tracks.
1216
1216
1217 The empty bytestring represents the root manifest.
1217 The empty bytestring represents the root manifest.
1218 """
1218 """
1219 )
1219 )
1220
1220
1221 index = interfaceutil.Attribute(
1221 index = interfaceutil.Attribute(
1222 """An ``ifilerevisionssequence`` instance."""
1222 """An ``ifilerevisionssequence`` instance."""
1223 )
1223 )
1224
1224
1225 opener = interfaceutil.Attribute(
1225 opener = interfaceutil.Attribute(
1226 """VFS opener to use to access underlying files used for storage.
1226 """VFS opener to use to access underlying files used for storage.
1227
1227
1228 TODO this is revlog specific and should not be exposed.
1228 TODO this is revlog specific and should not be exposed.
1229 """
1229 """
1230 )
1230 )
1231
1231
1232 _generaldelta = interfaceutil.Attribute(
1232 _generaldelta = interfaceutil.Attribute(
1233 """Whether generaldelta storage is being used.
1233 """Whether generaldelta storage is being used.
1234
1234
1235 TODO this is revlog specific and should not be exposed.
1235 TODO this is revlog specific and should not be exposed.
1236 """
1236 """
1237 )
1237 )
1238
1238
1239 fulltextcache = interfaceutil.Attribute(
1239 fulltextcache = interfaceutil.Attribute(
1240 """Dict with cache of fulltexts.
1240 """Dict with cache of fulltexts.
1241
1241
1242 TODO this doesn't feel appropriate for the storage interface.
1242 TODO this doesn't feel appropriate for the storage interface.
1243 """
1243 """
1244 )
1244 )
1245
1245
1246 def __len__():
1246 def __len__():
1247 """Obtain the number of revisions stored for this manifest."""
1247 """Obtain the number of revisions stored for this manifest."""
1248
1248
1249 def __iter__():
1249 def __iter__():
1250 """Iterate over revision numbers for this manifest."""
1250 """Iterate over revision numbers for this manifest."""
1251
1251
1252 def rev(node):
1252 def rev(node):
1253 """Obtain the revision number given a binary node.
1253 """Obtain the revision number given a binary node.
1254
1254
1255 Raises ``error.LookupError`` if the node is not known.
1255 Raises ``error.LookupError`` if the node is not known.
1256 """
1256 """
1257
1257
1258 def node(rev):
1258 def node(rev):
1259 """Obtain the node value given a revision number.
1259 """Obtain the node value given a revision number.
1260
1260
1261 Raises ``error.LookupError`` if the revision is not known.
1261 Raises ``error.LookupError`` if the revision is not known.
1262 """
1262 """
1263
1263
1264 def lookup(value):
1264 def lookup(value):
1265 """Attempt to resolve a value to a node.
1265 """Attempt to resolve a value to a node.
1266
1266
1267 Value can be a binary node, hex node, revision number, or a bytes
1267 Value can be a binary node, hex node, revision number, or a bytes
1268 that can be converted to an integer.
1268 that can be converted to an integer.
1269
1269
1270 Raises ``error.LookupError`` if a ndoe could not be resolved.
1270 Raises ``error.LookupError`` if a ndoe could not be resolved.
1271 """
1271 """
1272
1272
1273 def parents(node):
1273 def parents(node):
1274 """Returns a 2-tuple of parent nodes for a node.
1274 """Returns a 2-tuple of parent nodes for a node.
1275
1275
1276 Values will be ``nullid`` if the parent is empty.
1276 Values will be ``nullid`` if the parent is empty.
1277 """
1277 """
1278
1278
1279 def parentrevs(rev):
1279 def parentrevs(rev):
1280 """Like parents() but operates on revision numbers."""
1280 """Like parents() but operates on revision numbers."""
1281
1281
1282 def linkrev(rev):
1282 def linkrev(rev):
1283 """Obtain the changeset revision number a revision is linked to."""
1283 """Obtain the changeset revision number a revision is linked to."""
1284
1284
1285 def revision(node):
1285 def revision(node):
1286 """Obtain fulltext data for a node."""
1286 """Obtain fulltext data for a node."""
1287
1287
1288 def rawdata(node, _df=None):
1288 def rawdata(node):
1289 """Obtain raw data for a node."""
1289 """Obtain raw data for a node."""
1290
1290
1291 def revdiff(rev1, rev2):
1291 def revdiff(rev1, rev2):
1292 """Obtain a delta between two revision numbers.
1292 """Obtain a delta between two revision numbers.
1293
1293
1294 The returned data is the result of ``bdiff.bdiff()`` on the raw
1294 The returned data is the result of ``bdiff.bdiff()`` on the raw
1295 revision data.
1295 revision data.
1296 """
1296 """
1297
1297
1298 def cmp(node, fulltext):
1298 def cmp(node, fulltext):
1299 """Compare fulltext to another revision.
1299 """Compare fulltext to another revision.
1300
1300
1301 Returns True if the fulltext is different from what is stored.
1301 Returns True if the fulltext is different from what is stored.
1302 """
1302 """
1303
1303
1304 def emitrevisions(
1304 def emitrevisions(
1305 nodes,
1305 nodes,
1306 nodesorder=None,
1306 nodesorder=None,
1307 revisiondata=False,
1307 revisiondata=False,
1308 assumehaveparentrevisions=False,
1308 assumehaveparentrevisions=False,
1309 ):
1309 ):
1310 """Produce ``irevisiondelta`` describing revisions.
1310 """Produce ``irevisiondelta`` describing revisions.
1311
1311
1312 See the documentation for ``ifiledata`` for more.
1312 See the documentation for ``ifiledata`` for more.
1313 """
1313 """
1314
1314
1315 def addgroup(
1315 def addgroup(
1316 deltas,
1316 deltas,
1317 linkmapper,
1317 linkmapper,
1318 transaction,
1318 transaction,
1319 addrevisioncb=None,
1319 addrevisioncb=None,
1320 duplicaterevisioncb=None,
1320 duplicaterevisioncb=None,
1321 ):
1321 ):
1322 """Process a series of deltas for storage.
1322 """Process a series of deltas for storage.
1323
1323
1324 See the documentation in ``ifilemutation`` for more.
1324 See the documentation in ``ifilemutation`` for more.
1325 """
1325 """
1326
1326
1327 def rawsize(rev):
1327 def rawsize(rev):
1328 """Obtain the size of tracked data.
1328 """Obtain the size of tracked data.
1329
1329
1330 Is equivalent to ``len(m.rawdata(node))``.
1330 Is equivalent to ``len(m.rawdata(node))``.
1331
1331
1332 TODO this method is only used by upgrade code and may be removed.
1332 TODO this method is only used by upgrade code and may be removed.
1333 """
1333 """
1334
1334
1335 def getstrippoint(minlink):
1335 def getstrippoint(minlink):
1336 """Find minimum revision that must be stripped to strip a linkrev.
1336 """Find minimum revision that must be stripped to strip a linkrev.
1337
1337
1338 See the documentation in ``ifilemutation`` for more.
1338 See the documentation in ``ifilemutation`` for more.
1339 """
1339 """
1340
1340
1341 def strip(minlink, transaction):
1341 def strip(minlink, transaction):
1342 """Remove storage of items starting at a linkrev.
1342 """Remove storage of items starting at a linkrev.
1343
1343
1344 See the documentation in ``ifilemutation`` for more.
1344 See the documentation in ``ifilemutation`` for more.
1345 """
1345 """
1346
1346
1347 def checksize():
1347 def checksize():
1348 """Obtain the expected sizes of backing files.
1348 """Obtain the expected sizes of backing files.
1349
1349
1350 TODO this is used by verify and it should not be part of the interface.
1350 TODO this is used by verify and it should not be part of the interface.
1351 """
1351 """
1352
1352
1353 def files():
1353 def files():
1354 """Obtain paths that are backing storage for this manifest.
1354 """Obtain paths that are backing storage for this manifest.
1355
1355
1356 TODO this is used by verify and there should probably be a better API
1356 TODO this is used by verify and there should probably be a better API
1357 for this functionality.
1357 for this functionality.
1358 """
1358 """
1359
1359
1360 def deltaparent(rev):
1360 def deltaparent(rev):
1361 """Obtain the revision that a revision is delta'd against.
1361 """Obtain the revision that a revision is delta'd against.
1362
1362
1363 TODO delta encoding is an implementation detail of storage and should
1363 TODO delta encoding is an implementation detail of storage and should
1364 not be exposed to the storage interface.
1364 not be exposed to the storage interface.
1365 """
1365 """
1366
1366
1367 def clone(tr, dest, **kwargs):
1367 def clone(tr, dest, **kwargs):
1368 """Clone this instance to another."""
1368 """Clone this instance to another."""
1369
1369
1370 def clearcaches(clear_persisted_data=False):
1370 def clearcaches(clear_persisted_data=False):
1371 """Clear any caches associated with this instance."""
1371 """Clear any caches associated with this instance."""
1372
1372
1373 def dirlog(d):
1373 def dirlog(d):
1374 """Obtain a manifest storage instance for a tree."""
1374 """Obtain a manifest storage instance for a tree."""
1375
1375
1376 def add(
1376 def add(
1377 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1377 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1378 ):
1378 ):
1379 """Add a revision to storage.
1379 """Add a revision to storage.
1380
1380
1381 ``m`` is an object conforming to ``imanifestdict``.
1381 ``m`` is an object conforming to ``imanifestdict``.
1382
1382
1383 ``link`` is the linkrev revision number.
1383 ``link`` is the linkrev revision number.
1384
1384
1385 ``p1`` and ``p2`` are the parent revision numbers.
1385 ``p1`` and ``p2`` are the parent revision numbers.
1386
1386
1387 ``added`` and ``removed`` are iterables of added and removed paths,
1387 ``added`` and ``removed`` are iterables of added and removed paths,
1388 respectively.
1388 respectively.
1389
1389
1390 ``readtree`` is a function that can be used to read the child tree(s)
1390 ``readtree`` is a function that can be used to read the child tree(s)
1391 when recursively writing the full tree structure when using
1391 when recursively writing the full tree structure when using
1392 treemanifets.
1392 treemanifets.
1393
1393
1394 ``match`` is a matcher that can be used to hint to storage that not all
1394 ``match`` is a matcher that can be used to hint to storage that not all
1395 paths must be inspected; this is an optimization and can be safely
1395 paths must be inspected; this is an optimization and can be safely
1396 ignored. Note that the storage must still be able to reproduce a full
1396 ignored. Note that the storage must still be able to reproduce a full
1397 manifest including files that did not match.
1397 manifest including files that did not match.
1398 """
1398 """
1399
1399
1400 def storageinfo(
1400 def storageinfo(
1401 exclusivefiles=False,
1401 exclusivefiles=False,
1402 sharedfiles=False,
1402 sharedfiles=False,
1403 revisionscount=False,
1403 revisionscount=False,
1404 trackedsize=False,
1404 trackedsize=False,
1405 storedsize=False,
1405 storedsize=False,
1406 ):
1406 ):
1407 """Obtain information about storage for this manifest's data.
1407 """Obtain information about storage for this manifest's data.
1408
1408
1409 See ``ifilestorage.storageinfo()`` for a description of this method.
1409 See ``ifilestorage.storageinfo()`` for a description of this method.
1410 This one behaves the same way, except for manifest data.
1410 This one behaves the same way, except for manifest data.
1411 """
1411 """
1412
1412
1413 def get_revlog():
1413 def get_revlog():
1414 """return an actual revlog instance if any
1414 """return an actual revlog instance if any
1415
1415
1416 This exist because a lot of code leverage the fact the underlying
1416 This exist because a lot of code leverage the fact the underlying
1417 storage is a revlog for optimization, so giving simple way to access
1417 storage is a revlog for optimization, so giving simple way to access
1418 the revlog instance helps such code.
1418 the revlog instance helps such code.
1419 """
1419 """
1420
1420
1421
1421
1422 class imanifestlog(interfaceutil.Interface):
1422 class imanifestlog(interfaceutil.Interface):
1423 """Interface representing a collection of manifest snapshots.
1423 """Interface representing a collection of manifest snapshots.
1424
1424
1425 Represents the root manifest in a repository.
1425 Represents the root manifest in a repository.
1426
1426
1427 Also serves as a means to access nested tree manifests and to cache
1427 Also serves as a means to access nested tree manifests and to cache
1428 tree manifests.
1428 tree manifests.
1429 """
1429 """
1430
1430
1431 nodeconstants = interfaceutil.Attribute(
1431 nodeconstants = interfaceutil.Attribute(
1432 """nodeconstants used by the current repository."""
1432 """nodeconstants used by the current repository."""
1433 )
1433 )
1434
1434
1435 def __getitem__(node):
1435 def __getitem__(node):
1436 """Obtain a manifest instance for a given binary node.
1436 """Obtain a manifest instance for a given binary node.
1437
1437
1438 Equivalent to calling ``self.get('', node)``.
1438 Equivalent to calling ``self.get('', node)``.
1439
1439
1440 The returned object conforms to the ``imanifestrevisionstored``
1440 The returned object conforms to the ``imanifestrevisionstored``
1441 interface.
1441 interface.
1442 """
1442 """
1443
1443
1444 def get(tree, node, verify=True):
1444 def get(tree, node, verify=True):
1445 """Retrieve the manifest instance for a given directory and binary node.
1445 """Retrieve the manifest instance for a given directory and binary node.
1446
1446
1447 ``node`` always refers to the node of the root manifest (which will be
1447 ``node`` always refers to the node of the root manifest (which will be
1448 the only manifest if flat manifests are being used).
1448 the only manifest if flat manifests are being used).
1449
1449
1450 If ``tree`` is the empty string, the root manifest is returned.
1450 If ``tree`` is the empty string, the root manifest is returned.
1451 Otherwise the manifest for the specified directory will be returned
1451 Otherwise the manifest for the specified directory will be returned
1452 (requires tree manifests).
1452 (requires tree manifests).
1453
1453
1454 If ``verify`` is True, ``LookupError`` is raised if the node is not
1454 If ``verify`` is True, ``LookupError`` is raised if the node is not
1455 known.
1455 known.
1456
1456
1457 The returned object conforms to the ``imanifestrevisionstored``
1457 The returned object conforms to the ``imanifestrevisionstored``
1458 interface.
1458 interface.
1459 """
1459 """
1460
1460
1461 def getstorage(tree):
1461 def getstorage(tree):
1462 """Retrieve an interface to storage for a particular tree.
1462 """Retrieve an interface to storage for a particular tree.
1463
1463
1464 If ``tree`` is the empty bytestring, storage for the root manifest will
1464 If ``tree`` is the empty bytestring, storage for the root manifest will
1465 be returned. Otherwise storage for a tree manifest is returned.
1465 be returned. Otherwise storage for a tree manifest is returned.
1466
1466
1467 TODO formalize interface for returned object.
1467 TODO formalize interface for returned object.
1468 """
1468 """
1469
1469
1470 def clearcaches():
1470 def clearcaches():
1471 """Clear caches associated with this collection."""
1471 """Clear caches associated with this collection."""
1472
1472
1473 def rev(node):
1473 def rev(node):
1474 """Obtain the revision number for a binary node.
1474 """Obtain the revision number for a binary node.
1475
1475
1476 Raises ``error.LookupError`` if the node is not known.
1476 Raises ``error.LookupError`` if the node is not known.
1477 """
1477 """
1478
1478
1479 def update_caches(transaction):
1479 def update_caches(transaction):
1480 """update whatever cache are relevant for the used storage."""
1480 """update whatever cache are relevant for the used storage."""
1481
1481
1482
1482
1483 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1483 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1484 """Local repository sub-interface providing access to tracked file storage.
1484 """Local repository sub-interface providing access to tracked file storage.
1485
1485
1486 This interface defines how a repository accesses storage for a single
1486 This interface defines how a repository accesses storage for a single
1487 tracked file path.
1487 tracked file path.
1488 """
1488 """
1489
1489
1490 def file(f):
1490 def file(f):
1491 """Obtain a filelog for a tracked path.
1491 """Obtain a filelog for a tracked path.
1492
1492
1493 The returned type conforms to the ``ifilestorage`` interface.
1493 The returned type conforms to the ``ifilestorage`` interface.
1494 """
1494 """
1495
1495
1496
1496
1497 class ilocalrepositorymain(interfaceutil.Interface):
1497 class ilocalrepositorymain(interfaceutil.Interface):
1498 """Main interface for local repositories.
1498 """Main interface for local repositories.
1499
1499
1500 This currently captures the reality of things - not how things should be.
1500 This currently captures the reality of things - not how things should be.
1501 """
1501 """
1502
1502
1503 nodeconstants = interfaceutil.Attribute(
1503 nodeconstants = interfaceutil.Attribute(
1504 """Constant nodes matching the hash function used by the repository."""
1504 """Constant nodes matching the hash function used by the repository."""
1505 )
1505 )
1506 nullid = interfaceutil.Attribute(
1506 nullid = interfaceutil.Attribute(
1507 """null revision for the hash function used by the repository."""
1507 """null revision for the hash function used by the repository."""
1508 )
1508 )
1509
1509
1510 supported = interfaceutil.Attribute(
1510 supported = interfaceutil.Attribute(
1511 """Set of requirements that this repo is capable of opening."""
1511 """Set of requirements that this repo is capable of opening."""
1512 )
1512 )
1513
1513
1514 requirements = interfaceutil.Attribute(
1514 requirements = interfaceutil.Attribute(
1515 """Set of requirements this repo uses."""
1515 """Set of requirements this repo uses."""
1516 )
1516 )
1517
1517
1518 features = interfaceutil.Attribute(
1518 features = interfaceutil.Attribute(
1519 """Set of "features" this repository supports.
1519 """Set of "features" this repository supports.
1520
1520
1521 A "feature" is a loosely-defined term. It can refer to a feature
1521 A "feature" is a loosely-defined term. It can refer to a feature
1522 in the classical sense or can describe an implementation detail
1522 in the classical sense or can describe an implementation detail
1523 of the repository. For example, a ``readonly`` feature may denote
1523 of the repository. For example, a ``readonly`` feature may denote
1524 the repository as read-only. Or a ``revlogfilestore`` feature may
1524 the repository as read-only. Or a ``revlogfilestore`` feature may
1525 denote that the repository is using revlogs for file storage.
1525 denote that the repository is using revlogs for file storage.
1526
1526
1527 The intent of features is to provide a machine-queryable mechanism
1527 The intent of features is to provide a machine-queryable mechanism
1528 for repo consumers to test for various repository characteristics.
1528 for repo consumers to test for various repository characteristics.
1529
1529
1530 Features are similar to ``requirements``. The main difference is that
1530 Features are similar to ``requirements``. The main difference is that
1531 requirements are stored on-disk and represent requirements to open the
1531 requirements are stored on-disk and represent requirements to open the
1532 repository. Features are more run-time capabilities of the repository
1532 repository. Features are more run-time capabilities of the repository
1533 and more granular capabilities (which may be derived from requirements).
1533 and more granular capabilities (which may be derived from requirements).
1534 """
1534 """
1535 )
1535 )
1536
1536
1537 filtername = interfaceutil.Attribute(
1537 filtername = interfaceutil.Attribute(
1538 """Name of the repoview that is active on this repo."""
1538 """Name of the repoview that is active on this repo."""
1539 )
1539 )
1540
1540
1541 vfs_map = interfaceutil.Attribute(
1541 vfs_map = interfaceutil.Attribute(
1542 """a bytes-key β†’ vfs mapping used by transaction and others"""
1542 """a bytes-key β†’ vfs mapping used by transaction and others"""
1543 )
1543 )
1544
1544
1545 wvfs = interfaceutil.Attribute(
1545 wvfs = interfaceutil.Attribute(
1546 """VFS used to access the working directory."""
1546 """VFS used to access the working directory."""
1547 )
1547 )
1548
1548
1549 vfs = interfaceutil.Attribute(
1549 vfs = interfaceutil.Attribute(
1550 """VFS rooted at the .hg directory.
1550 """VFS rooted at the .hg directory.
1551
1551
1552 Used to access repository data not in the store.
1552 Used to access repository data not in the store.
1553 """
1553 """
1554 )
1554 )
1555
1555
1556 svfs = interfaceutil.Attribute(
1556 svfs = interfaceutil.Attribute(
1557 """VFS rooted at the store.
1557 """VFS rooted at the store.
1558
1558
1559 Used to access repository data in the store. Typically .hg/store.
1559 Used to access repository data in the store. Typically .hg/store.
1560 But can point elsewhere if the store is shared.
1560 But can point elsewhere if the store is shared.
1561 """
1561 """
1562 )
1562 )
1563
1563
1564 root = interfaceutil.Attribute(
1564 root = interfaceutil.Attribute(
1565 """Path to the root of the working directory."""
1565 """Path to the root of the working directory."""
1566 )
1566 )
1567
1567
1568 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1568 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1569
1569
1570 origroot = interfaceutil.Attribute(
1570 origroot = interfaceutil.Attribute(
1571 """The filesystem path that was used to construct the repo."""
1571 """The filesystem path that was used to construct the repo."""
1572 )
1572 )
1573
1573
1574 auditor = interfaceutil.Attribute(
1574 auditor = interfaceutil.Attribute(
1575 """A pathauditor for the working directory.
1575 """A pathauditor for the working directory.
1576
1576
1577 This checks if a path refers to a nested repository.
1577 This checks if a path refers to a nested repository.
1578
1578
1579 Operates on the filesystem.
1579 Operates on the filesystem.
1580 """
1580 """
1581 )
1581 )
1582
1582
1583 nofsauditor = interfaceutil.Attribute(
1583 nofsauditor = interfaceutil.Attribute(
1584 """A pathauditor for the working directory.
1584 """A pathauditor for the working directory.
1585
1585
1586 This is like ``auditor`` except it doesn't do filesystem checks.
1586 This is like ``auditor`` except it doesn't do filesystem checks.
1587 """
1587 """
1588 )
1588 )
1589
1589
1590 baseui = interfaceutil.Attribute(
1590 baseui = interfaceutil.Attribute(
1591 """Original ui instance passed into constructor."""
1591 """Original ui instance passed into constructor."""
1592 )
1592 )
1593
1593
1594 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1594 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1595
1595
1596 sharedpath = interfaceutil.Attribute(
1596 sharedpath = interfaceutil.Attribute(
1597 """Path to the .hg directory of the repo this repo was shared from."""
1597 """Path to the .hg directory of the repo this repo was shared from."""
1598 )
1598 )
1599
1599
1600 store = interfaceutil.Attribute("""A store instance.""")
1600 store = interfaceutil.Attribute("""A store instance.""")
1601
1601
1602 spath = interfaceutil.Attribute("""Path to the store.""")
1602 spath = interfaceutil.Attribute("""Path to the store.""")
1603
1603
1604 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1604 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1605
1605
1606 cachevfs = interfaceutil.Attribute(
1606 cachevfs = interfaceutil.Attribute(
1607 """A VFS used to access the cache directory.
1607 """A VFS used to access the cache directory.
1608
1608
1609 Typically .hg/cache.
1609 Typically .hg/cache.
1610 """
1610 """
1611 )
1611 )
1612
1612
1613 wcachevfs = interfaceutil.Attribute(
1613 wcachevfs = interfaceutil.Attribute(
1614 """A VFS used to access the cache directory dedicated to working copy
1614 """A VFS used to access the cache directory dedicated to working copy
1615
1615
1616 Typically .hg/wcache.
1616 Typically .hg/wcache.
1617 """
1617 """
1618 )
1618 )
1619
1619
1620 filteredrevcache = interfaceutil.Attribute(
1620 filteredrevcache = interfaceutil.Attribute(
1621 """Holds sets of revisions to be filtered."""
1621 """Holds sets of revisions to be filtered."""
1622 )
1622 )
1623
1623
1624 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1624 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1625
1625
1626 filecopiesmode = interfaceutil.Attribute(
1626 filecopiesmode = interfaceutil.Attribute(
1627 """The way files copies should be dealt with in this repo."""
1627 """The way files copies should be dealt with in this repo."""
1628 )
1628 )
1629
1629
1630 def close():
1630 def close():
1631 """Close the handle on this repository."""
1631 """Close the handle on this repository."""
1632
1632
1633 def peer(path=None):
1633 def peer(path=None):
1634 """Obtain an object conforming to the ``peer`` interface."""
1634 """Obtain an object conforming to the ``peer`` interface."""
1635
1635
1636 def unfiltered():
1636 def unfiltered():
1637 """Obtain an unfiltered/raw view of this repo."""
1637 """Obtain an unfiltered/raw view of this repo."""
1638
1638
1639 def filtered(name, visibilityexceptions=None):
1639 def filtered(name, visibilityexceptions=None):
1640 """Obtain a named view of this repository."""
1640 """Obtain a named view of this repository."""
1641
1641
1642 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1642 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1643
1643
1644 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1644 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1645
1645
1646 manifestlog = interfaceutil.Attribute(
1646 manifestlog = interfaceutil.Attribute(
1647 """An instance conforming to the ``imanifestlog`` interface.
1647 """An instance conforming to the ``imanifestlog`` interface.
1648
1648
1649 Provides access to manifests for the repository.
1649 Provides access to manifests for the repository.
1650 """
1650 """
1651 )
1651 )
1652
1652
1653 dirstate = interfaceutil.Attribute("""Working directory state.""")
1653 dirstate = interfaceutil.Attribute("""Working directory state.""")
1654
1654
1655 narrowpats = interfaceutil.Attribute(
1655 narrowpats = interfaceutil.Attribute(
1656 """Matcher patterns for this repository's narrowspec."""
1656 """Matcher patterns for this repository's narrowspec."""
1657 )
1657 )
1658
1658
1659 def narrowmatch(match=None, includeexact=False):
1659 def narrowmatch(match=None, includeexact=False):
1660 """Obtain a matcher for the narrowspec."""
1660 """Obtain a matcher for the narrowspec."""
1661
1661
1662 def setnarrowpats(newincludes, newexcludes):
1662 def setnarrowpats(newincludes, newexcludes):
1663 """Define the narrowspec for this repository."""
1663 """Define the narrowspec for this repository."""
1664
1664
1665 def __getitem__(changeid):
1665 def __getitem__(changeid):
1666 """Try to resolve a changectx."""
1666 """Try to resolve a changectx."""
1667
1667
1668 def __contains__(changeid):
1668 def __contains__(changeid):
1669 """Whether a changeset exists."""
1669 """Whether a changeset exists."""
1670
1670
1671 def __nonzero__():
1671 def __nonzero__():
1672 """Always returns True."""
1672 """Always returns True."""
1673 return True
1673 return True
1674
1674
1675 __bool__ = __nonzero__
1675 __bool__ = __nonzero__
1676
1676
1677 def __len__():
1677 def __len__():
1678 """Returns the number of changesets in the repo."""
1678 """Returns the number of changesets in the repo."""
1679
1679
1680 def __iter__():
1680 def __iter__():
1681 """Iterate over revisions in the changelog."""
1681 """Iterate over revisions in the changelog."""
1682
1682
1683 def revs(expr, *args):
1683 def revs(expr, *args):
1684 """Evaluate a revset.
1684 """Evaluate a revset.
1685
1685
1686 Emits revisions.
1686 Emits revisions.
1687 """
1687 """
1688
1688
1689 def set(expr, *args):
1689 def set(expr, *args):
1690 """Evaluate a revset.
1690 """Evaluate a revset.
1691
1691
1692 Emits changectx instances.
1692 Emits changectx instances.
1693 """
1693 """
1694
1694
1695 def anyrevs(specs, user=False, localalias=None):
1695 def anyrevs(specs, user=False, localalias=None):
1696 """Find revisions matching one of the given revsets."""
1696 """Find revisions matching one of the given revsets."""
1697
1697
1698 def url():
1698 def url():
1699 """Returns a string representing the location of this repo."""
1699 """Returns a string representing the location of this repo."""
1700
1700
1701 def hook(name, throw=False, **args):
1701 def hook(name, throw=False, **args):
1702 """Call a hook."""
1702 """Call a hook."""
1703
1703
1704 def tags():
1704 def tags():
1705 """Return a mapping of tag to node."""
1705 """Return a mapping of tag to node."""
1706
1706
1707 def tagtype(tagname):
1707 def tagtype(tagname):
1708 """Return the type of a given tag."""
1708 """Return the type of a given tag."""
1709
1709
1710 def tagslist():
1710 def tagslist():
1711 """Return a list of tags ordered by revision."""
1711 """Return a list of tags ordered by revision."""
1712
1712
1713 def nodetags(node):
1713 def nodetags(node):
1714 """Return the tags associated with a node."""
1714 """Return the tags associated with a node."""
1715
1715
1716 def nodebookmarks(node):
1716 def nodebookmarks(node):
1717 """Return the list of bookmarks pointing to the specified node."""
1717 """Return the list of bookmarks pointing to the specified node."""
1718
1718
1719 def branchmap():
1719 def branchmap():
1720 """Return a mapping of branch to heads in that branch."""
1720 """Return a mapping of branch to heads in that branch."""
1721
1721
1722 def revbranchcache():
1722 def revbranchcache():
1723 pass
1723 pass
1724
1724
1725 def register_changeset(rev, changelogrevision):
1725 def register_changeset(rev, changelogrevision):
1726 """Extension point for caches for new nodes.
1726 """Extension point for caches for new nodes.
1727
1727
1728 Multiple consumers are expected to need parts of the changelogrevision,
1728 Multiple consumers are expected to need parts of the changelogrevision,
1729 so it is provided as optimization to avoid duplicate lookups. A simple
1729 so it is provided as optimization to avoid duplicate lookups. A simple
1730 cache would be fragile when other revisions are accessed, too."""
1730 cache would be fragile when other revisions are accessed, too."""
1731 pass
1731 pass
1732
1732
1733 def branchtip(branchtip, ignoremissing=False):
1733 def branchtip(branchtip, ignoremissing=False):
1734 """Return the tip node for a given branch."""
1734 """Return the tip node for a given branch."""
1735
1735
1736 def lookup(key):
1736 def lookup(key):
1737 """Resolve the node for a revision."""
1737 """Resolve the node for a revision."""
1738
1738
1739 def lookupbranch(key):
1739 def lookupbranch(key):
1740 """Look up the branch name of the given revision or branch name."""
1740 """Look up the branch name of the given revision or branch name."""
1741
1741
1742 def known(nodes):
1742 def known(nodes):
1743 """Determine whether a series of nodes is known.
1743 """Determine whether a series of nodes is known.
1744
1744
1745 Returns a list of bools.
1745 Returns a list of bools.
1746 """
1746 """
1747
1747
1748 def local():
1748 def local():
1749 """Whether the repository is local."""
1749 """Whether the repository is local."""
1750 return True
1750 return True
1751
1751
1752 def publishing():
1752 def publishing():
1753 """Whether the repository is a publishing repository."""
1753 """Whether the repository is a publishing repository."""
1754
1754
1755 def cancopy():
1755 def cancopy():
1756 pass
1756 pass
1757
1757
1758 def shared():
1758 def shared():
1759 """The type of shared repository or None."""
1759 """The type of shared repository or None."""
1760
1760
1761 def wjoin(f, *insidef):
1761 def wjoin(f, *insidef):
1762 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1762 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1763
1763
1764 def setparents(p1, p2):
1764 def setparents(p1, p2):
1765 """Set the parent nodes of the working directory."""
1765 """Set the parent nodes of the working directory."""
1766
1766
1767 def filectx(path, changeid=None, fileid=None):
1767 def filectx(path, changeid=None, fileid=None):
1768 """Obtain a filectx for the given file revision."""
1768 """Obtain a filectx for the given file revision."""
1769
1769
1770 def getcwd():
1770 def getcwd():
1771 """Obtain the current working directory from the dirstate."""
1771 """Obtain the current working directory from the dirstate."""
1772
1772
1773 def pathto(f, cwd=None):
1773 def pathto(f, cwd=None):
1774 """Obtain the relative path to a file."""
1774 """Obtain the relative path to a file."""
1775
1775
1776 def adddatafilter(name, fltr):
1776 def adddatafilter(name, fltr):
1777 pass
1777 pass
1778
1778
1779 def wread(filename):
1779 def wread(filename):
1780 """Read a file from wvfs, using data filters."""
1780 """Read a file from wvfs, using data filters."""
1781
1781
1782 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1782 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1783 """Write data to a file in the wvfs, using data filters."""
1783 """Write data to a file in the wvfs, using data filters."""
1784
1784
1785 def wwritedata(filename, data):
1785 def wwritedata(filename, data):
1786 """Resolve data for writing to the wvfs, using data filters."""
1786 """Resolve data for writing to the wvfs, using data filters."""
1787
1787
1788 def currenttransaction():
1788 def currenttransaction():
1789 """Obtain the current transaction instance or None."""
1789 """Obtain the current transaction instance or None."""
1790
1790
1791 def transaction(desc, report=None):
1791 def transaction(desc, report=None):
1792 """Open a new transaction to write to the repository."""
1792 """Open a new transaction to write to the repository."""
1793
1793
1794 def undofiles():
1794 def undofiles():
1795 """Returns a list of (vfs, path) for files to undo transactions."""
1795 """Returns a list of (vfs, path) for files to undo transactions."""
1796
1796
1797 def recover():
1797 def recover():
1798 """Roll back an interrupted transaction."""
1798 """Roll back an interrupted transaction."""
1799
1799
1800 def rollback(dryrun=False, force=False):
1800 def rollback(dryrun=False, force=False):
1801 """Undo the last transaction.
1801 """Undo the last transaction.
1802
1802
1803 DANGEROUS.
1803 DANGEROUS.
1804 """
1804 """
1805
1805
1806 def updatecaches(tr=None, full=False, caches=None):
1806 def updatecaches(tr=None, full=False, caches=None):
1807 """Warm repo caches."""
1807 """Warm repo caches."""
1808
1808
1809 def invalidatecaches():
1809 def invalidatecaches():
1810 """Invalidate cached data due to the repository mutating."""
1810 """Invalidate cached data due to the repository mutating."""
1811
1811
1812 def invalidatevolatilesets():
1812 def invalidatevolatilesets():
1813 pass
1813 pass
1814
1814
1815 def invalidatedirstate():
1815 def invalidatedirstate():
1816 """Invalidate the dirstate."""
1816 """Invalidate the dirstate."""
1817
1817
1818 def invalidate(clearfilecache=False):
1818 def invalidate(clearfilecache=False):
1819 pass
1819 pass
1820
1820
1821 def invalidateall():
1821 def invalidateall():
1822 pass
1822 pass
1823
1823
1824 def lock(wait=True):
1824 def lock(wait=True):
1825 """Lock the repository store and return a lock instance."""
1825 """Lock the repository store and return a lock instance."""
1826
1826
1827 def currentlock():
1827 def currentlock():
1828 """Return the lock if it's held or None."""
1828 """Return the lock if it's held or None."""
1829
1829
1830 def wlock(wait=True):
1830 def wlock(wait=True):
1831 """Lock the non-store parts of the repository."""
1831 """Lock the non-store parts of the repository."""
1832
1832
1833 def currentwlock():
1833 def currentwlock():
1834 """Return the wlock if it's held or None."""
1834 """Return the wlock if it's held or None."""
1835
1835
1836 def checkcommitpatterns(wctx, match, status, fail):
1836 def checkcommitpatterns(wctx, match, status, fail):
1837 pass
1837 pass
1838
1838
1839 def commit(
1839 def commit(
1840 text=b'',
1840 text=b'',
1841 user=None,
1841 user=None,
1842 date=None,
1842 date=None,
1843 match=None,
1843 match=None,
1844 force=False,
1844 force=False,
1845 editor=False,
1845 editor=False,
1846 extra=None,
1846 extra=None,
1847 ):
1847 ):
1848 """Add a new revision to the repository."""
1848 """Add a new revision to the repository."""
1849
1849
1850 def commitctx(ctx, error=False, origctx=None):
1850 def commitctx(ctx, error=False, origctx=None):
1851 """Commit a commitctx instance to the repository."""
1851 """Commit a commitctx instance to the repository."""
1852
1852
1853 def destroying():
1853 def destroying():
1854 """Inform the repository that nodes are about to be destroyed."""
1854 """Inform the repository that nodes are about to be destroyed."""
1855
1855
1856 def destroyed():
1856 def destroyed():
1857 """Inform the repository that nodes have been destroyed."""
1857 """Inform the repository that nodes have been destroyed."""
1858
1858
1859 def status(
1859 def status(
1860 node1=b'.',
1860 node1=b'.',
1861 node2=None,
1861 node2=None,
1862 match=None,
1862 match=None,
1863 ignored=False,
1863 ignored=False,
1864 clean=False,
1864 clean=False,
1865 unknown=False,
1865 unknown=False,
1866 listsubrepos=False,
1866 listsubrepos=False,
1867 ):
1867 ):
1868 """Convenience method to call repo[x].status()."""
1868 """Convenience method to call repo[x].status()."""
1869
1869
1870 def addpostdsstatus(ps):
1870 def addpostdsstatus(ps):
1871 pass
1871 pass
1872
1872
1873 def postdsstatus():
1873 def postdsstatus():
1874 pass
1874 pass
1875
1875
1876 def clearpostdsstatus():
1876 def clearpostdsstatus():
1877 pass
1877 pass
1878
1878
1879 def heads(start=None):
1879 def heads(start=None):
1880 """Obtain list of nodes that are DAG heads."""
1880 """Obtain list of nodes that are DAG heads."""
1881
1881
1882 def branchheads(branch=None, start=None, closed=False):
1882 def branchheads(branch=None, start=None, closed=False):
1883 pass
1883 pass
1884
1884
1885 def branches(nodes):
1885 def branches(nodes):
1886 pass
1886 pass
1887
1887
1888 def between(pairs):
1888 def between(pairs):
1889 pass
1889 pass
1890
1890
1891 def checkpush(pushop):
1891 def checkpush(pushop):
1892 pass
1892 pass
1893
1893
1894 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1894 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1895
1895
1896 def pushkey(namespace, key, old, new):
1896 def pushkey(namespace, key, old, new):
1897 pass
1897 pass
1898
1898
1899 def listkeys(namespace):
1899 def listkeys(namespace):
1900 pass
1900 pass
1901
1901
1902 def debugwireargs(one, two, three=None, four=None, five=None):
1902 def debugwireargs(one, two, three=None, four=None, five=None):
1903 pass
1903 pass
1904
1904
1905 def savecommitmessage(text):
1905 def savecommitmessage(text):
1906 pass
1906 pass
1907
1907
1908 def register_sidedata_computer(
1908 def register_sidedata_computer(
1909 kind, category, keys, computer, flags, replace=False
1909 kind, category, keys, computer, flags, replace=False
1910 ):
1910 ):
1911 pass
1911 pass
1912
1912
1913 def register_wanted_sidedata(category):
1913 def register_wanted_sidedata(category):
1914 pass
1914 pass
1915
1915
1916
1916
1917 class completelocalrepository(
1917 class completelocalrepository(
1918 ilocalrepositorymain, ilocalrepositoryfilestorage
1918 ilocalrepositorymain, ilocalrepositoryfilestorage
1919 ):
1919 ):
1920 """Complete interface for a local repository."""
1920 """Complete interface for a local repository."""
1921
1921
1922
1922
1923 class iwireprotocolcommandcacher(interfaceutil.Interface):
1923 class iwireprotocolcommandcacher(interfaceutil.Interface):
1924 """Represents a caching backend for wire protocol commands.
1924 """Represents a caching backend for wire protocol commands.
1925
1925
1926 Wire protocol version 2 supports transparent caching of many commands.
1926 Wire protocol version 2 supports transparent caching of many commands.
1927 To leverage this caching, servers can activate objects that cache
1927 To leverage this caching, servers can activate objects that cache
1928 command responses. Objects handle both cache writing and reading.
1928 command responses. Objects handle both cache writing and reading.
1929 This interface defines how that response caching mechanism works.
1929 This interface defines how that response caching mechanism works.
1930
1930
1931 Wire protocol version 2 commands emit a series of objects that are
1931 Wire protocol version 2 commands emit a series of objects that are
1932 serialized and sent to the client. The caching layer exists between
1932 serialized and sent to the client. The caching layer exists between
1933 the invocation of the command function and the sending of its output
1933 the invocation of the command function and the sending of its output
1934 objects to an output layer.
1934 objects to an output layer.
1935
1935
1936 Instances of this interface represent a binding to a cache that
1936 Instances of this interface represent a binding to a cache that
1937 can serve a response (in place of calling a command function) and/or
1937 can serve a response (in place of calling a command function) and/or
1938 write responses to a cache for subsequent use.
1938 write responses to a cache for subsequent use.
1939
1939
1940 When a command request arrives, the following happens with regards
1940 When a command request arrives, the following happens with regards
1941 to this interface:
1941 to this interface:
1942
1942
1943 1. The server determines whether the command request is cacheable.
1943 1. The server determines whether the command request is cacheable.
1944 2. If it is, an instance of this interface is spawned.
1944 2. If it is, an instance of this interface is spawned.
1945 3. The cacher is activated in a context manager (``__enter__`` is called).
1945 3. The cacher is activated in a context manager (``__enter__`` is called).
1946 4. A cache *key* for that request is derived. This will call the
1946 4. A cache *key* for that request is derived. This will call the
1947 instance's ``adjustcachekeystate()`` method so the derivation
1947 instance's ``adjustcachekeystate()`` method so the derivation
1948 can be influenced.
1948 can be influenced.
1949 5. The cacher is informed of the derived cache key via a call to
1949 5. The cacher is informed of the derived cache key via a call to
1950 ``setcachekey()``.
1950 ``setcachekey()``.
1951 6. The cacher's ``lookup()`` method is called to test for presence of
1951 6. The cacher's ``lookup()`` method is called to test for presence of
1952 the derived key in the cache.
1952 the derived key in the cache.
1953 7. If ``lookup()`` returns a hit, that cached result is used in place
1953 7. If ``lookup()`` returns a hit, that cached result is used in place
1954 of invoking the command function. ``__exit__`` is called and the instance
1954 of invoking the command function. ``__exit__`` is called and the instance
1955 is discarded.
1955 is discarded.
1956 8. The command function is invoked.
1956 8. The command function is invoked.
1957 9. ``onobject()`` is called for each object emitted by the command
1957 9. ``onobject()`` is called for each object emitted by the command
1958 function.
1958 function.
1959 10. After the final object is seen, ``onfinished()`` is called.
1959 10. After the final object is seen, ``onfinished()`` is called.
1960 11. ``__exit__`` is called to signal the end of use of the instance.
1960 11. ``__exit__`` is called to signal the end of use of the instance.
1961
1961
1962 Cache *key* derivation can be influenced by the instance.
1962 Cache *key* derivation can be influenced by the instance.
1963
1963
1964 Cache keys are initially derived by a deterministic representation of
1964 Cache keys are initially derived by a deterministic representation of
1965 the command request. This includes the command name, arguments, protocol
1965 the command request. This includes the command name, arguments, protocol
1966 version, etc. This initial key derivation is performed by CBOR-encoding a
1966 version, etc. This initial key derivation is performed by CBOR-encoding a
1967 data structure and feeding that output into a hasher.
1967 data structure and feeding that output into a hasher.
1968
1968
1969 Instances of this interface can influence this initial key derivation
1969 Instances of this interface can influence this initial key derivation
1970 via ``adjustcachekeystate()``.
1970 via ``adjustcachekeystate()``.
1971
1971
1972 The instance is informed of the derived cache key via a call to
1972 The instance is informed of the derived cache key via a call to
1973 ``setcachekey()``. The instance must store the key locally so it can
1973 ``setcachekey()``. The instance must store the key locally so it can
1974 be consulted on subsequent operations that may require it.
1974 be consulted on subsequent operations that may require it.
1975
1975
1976 When constructed, the instance has access to a callable that can be used
1976 When constructed, the instance has access to a callable that can be used
1977 for encoding response objects. This callable receives as its single
1977 for encoding response objects. This callable receives as its single
1978 argument an object emitted by a command function. It returns an iterable
1978 argument an object emitted by a command function. It returns an iterable
1979 of bytes chunks representing the encoded object. Unless the cacher is
1979 of bytes chunks representing the encoded object. Unless the cacher is
1980 caching native Python objects in memory or has a way of reconstructing
1980 caching native Python objects in memory or has a way of reconstructing
1981 the original Python objects, implementations typically call this function
1981 the original Python objects, implementations typically call this function
1982 to produce bytes from the output objects and then store those bytes in
1982 to produce bytes from the output objects and then store those bytes in
1983 the cache. When it comes time to re-emit those bytes, they are wrapped
1983 the cache. When it comes time to re-emit those bytes, they are wrapped
1984 in a ``wireprototypes.encodedresponse`` instance to tell the output
1984 in a ``wireprototypes.encodedresponse`` instance to tell the output
1985 layer that they are pre-encoded.
1985 layer that they are pre-encoded.
1986
1986
1987 When receiving the objects emitted by the command function, instances
1987 When receiving the objects emitted by the command function, instances
1988 can choose what to do with those objects. The simplest thing to do is
1988 can choose what to do with those objects. The simplest thing to do is
1989 re-emit the original objects. They will be forwarded to the output
1989 re-emit the original objects. They will be forwarded to the output
1990 layer and will be processed as if the cacher did not exist.
1990 layer and will be processed as if the cacher did not exist.
1991
1991
1992 Implementations could also choose to not emit objects - instead locally
1992 Implementations could also choose to not emit objects - instead locally
1993 buffering objects or their encoded representation. They could then emit
1993 buffering objects or their encoded representation. They could then emit
1994 a single "coalesced" object when ``onfinished()`` is called. In
1994 a single "coalesced" object when ``onfinished()`` is called. In
1995 this way, the implementation would function as a filtering layer of
1995 this way, the implementation would function as a filtering layer of
1996 sorts.
1996 sorts.
1997
1997
1998 When caching objects, typically the encoded form of the object will
1998 When caching objects, typically the encoded form of the object will
1999 be stored. Keep in mind that if the original object is forwarded to
1999 be stored. Keep in mind that if the original object is forwarded to
2000 the output layer, it will need to be encoded there as well. For large
2000 the output layer, it will need to be encoded there as well. For large
2001 output, this redundant encoding could add overhead. Implementations
2001 output, this redundant encoding could add overhead. Implementations
2002 could wrap the encoded object data in ``wireprototypes.encodedresponse``
2002 could wrap the encoded object data in ``wireprototypes.encodedresponse``
2003 instances to avoid this overhead.
2003 instances to avoid this overhead.
2004 """
2004 """
2005
2005
2006 def __enter__():
2006 def __enter__():
2007 """Marks the instance as active.
2007 """Marks the instance as active.
2008
2008
2009 Should return self.
2009 Should return self.
2010 """
2010 """
2011
2011
2012 def __exit__(exctype, excvalue, exctb):
2012 def __exit__(exctype, excvalue, exctb):
2013 """Called when cacher is no longer used.
2013 """Called when cacher is no longer used.
2014
2014
2015 This can be used by implementations to perform cleanup actions (e.g.
2015 This can be used by implementations to perform cleanup actions (e.g.
2016 disconnecting network sockets, aborting a partially cached response.
2016 disconnecting network sockets, aborting a partially cached response.
2017 """
2017 """
2018
2018
2019 def adjustcachekeystate(state):
2019 def adjustcachekeystate(state):
2020 """Influences cache key derivation by adjusting state to derive key.
2020 """Influences cache key derivation by adjusting state to derive key.
2021
2021
2022 A dict defining the state used to derive the cache key is passed.
2022 A dict defining the state used to derive the cache key is passed.
2023
2023
2024 Implementations can modify this dict to record additional state that
2024 Implementations can modify this dict to record additional state that
2025 is wanted to influence key derivation.
2025 is wanted to influence key derivation.
2026
2026
2027 Implementations are *highly* encouraged to not modify or delete
2027 Implementations are *highly* encouraged to not modify or delete
2028 existing keys.
2028 existing keys.
2029 """
2029 """
2030
2030
2031 def setcachekey(key):
2031 def setcachekey(key):
2032 """Record the derived cache key for this request.
2032 """Record the derived cache key for this request.
2033
2033
2034 Instances may mutate the key for internal usage, as desired. e.g.
2034 Instances may mutate the key for internal usage, as desired. e.g.
2035 instances may wish to prepend the repo name, introduce path
2035 instances may wish to prepend the repo name, introduce path
2036 components for filesystem or URL addressing, etc. Behavior is up to
2036 components for filesystem or URL addressing, etc. Behavior is up to
2037 the cache.
2037 the cache.
2038
2038
2039 Returns a bool indicating if the request is cacheable by this
2039 Returns a bool indicating if the request is cacheable by this
2040 instance.
2040 instance.
2041 """
2041 """
2042
2042
2043 def lookup():
2043 def lookup():
2044 """Attempt to resolve an entry in the cache.
2044 """Attempt to resolve an entry in the cache.
2045
2045
2046 The instance is instructed to look for the cache key that it was
2046 The instance is instructed to look for the cache key that it was
2047 informed about via the call to ``setcachekey()``.
2047 informed about via the call to ``setcachekey()``.
2048
2048
2049 If there's no cache hit or the cacher doesn't wish to use the cached
2049 If there's no cache hit or the cacher doesn't wish to use the cached
2050 entry, ``None`` should be returned.
2050 entry, ``None`` should be returned.
2051
2051
2052 Else, a dict defining the cached result should be returned. The
2052 Else, a dict defining the cached result should be returned. The
2053 dict may have the following keys:
2053 dict may have the following keys:
2054
2054
2055 objs
2055 objs
2056 An iterable of objects that should be sent to the client. That
2056 An iterable of objects that should be sent to the client. That
2057 iterable of objects is expected to be what the command function
2057 iterable of objects is expected to be what the command function
2058 would return if invoked or an equivalent representation thereof.
2058 would return if invoked or an equivalent representation thereof.
2059 """
2059 """
2060
2060
2061 def onobject(obj):
2061 def onobject(obj):
2062 """Called when a new object is emitted from the command function.
2062 """Called when a new object is emitted from the command function.
2063
2063
2064 Receives as its argument the object that was emitted from the
2064 Receives as its argument the object that was emitted from the
2065 command function.
2065 command function.
2066
2066
2067 This method returns an iterator of objects to forward to the output
2067 This method returns an iterator of objects to forward to the output
2068 layer. The easiest implementation is a generator that just
2068 layer. The easiest implementation is a generator that just
2069 ``yield obj``.
2069 ``yield obj``.
2070 """
2070 """
2071
2071
2072 def onfinished():
2072 def onfinished():
2073 """Called after all objects have been emitted from the command function.
2073 """Called after all objects have been emitted from the command function.
2074
2074
2075 Implementations should return an iterator of objects to forward to
2075 Implementations should return an iterator of objects to forward to
2076 the output layer.
2076 the output layer.
2077
2077
2078 This method can be a generator.
2078 This method can be a generator.
2079 """
2079 """
@@ -1,2382 +1,2382 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import heapq
9 import heapq
10 import itertools
10 import itertools
11 import struct
11 import struct
12 import weakref
12 import weakref
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 bin,
16 bin,
17 hex,
17 hex,
18 nullrev,
18 nullrev,
19 )
19 )
20 from . import (
20 from . import (
21 encoding,
21 encoding,
22 error,
22 error,
23 match as matchmod,
23 match as matchmod,
24 mdiff,
24 mdiff,
25 pathutil,
25 pathutil,
26 policy,
26 policy,
27 pycompat,
27 pycompat,
28 revlog,
28 revlog,
29 util,
29 util,
30 )
30 )
31 from .interfaces import (
31 from .interfaces import (
32 repository,
32 repository,
33 util as interfaceutil,
33 util as interfaceutil,
34 )
34 )
35 from .revlogutils import (
35 from .revlogutils import (
36 constants as revlog_constants,
36 constants as revlog_constants,
37 )
37 )
38
38
39 parsers = policy.importmod('parsers')
39 parsers = policy.importmod('parsers')
40 propertycache = util.propertycache
40 propertycache = util.propertycache
41
41
42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44
44
45
45
46 def _parse(nodelen, data):
46 def _parse(nodelen, data):
47 # This method does a little bit of excessive-looking
47 # This method does a little bit of excessive-looking
48 # precondition checking. This is so that the behavior of this
48 # precondition checking. This is so that the behavior of this
49 # class exactly matches its C counterpart to try and help
49 # class exactly matches its C counterpart to try and help
50 # prevent surprise breakage for anyone that develops against
50 # prevent surprise breakage for anyone that develops against
51 # the pure version.
51 # the pure version.
52 if data and data[-1:] != b'\n':
52 if data and data[-1:] != b'\n':
53 raise ValueError(b'Manifest did not end in a newline.')
53 raise ValueError(b'Manifest did not end in a newline.')
54 prev = None
54 prev = None
55 for l in data.splitlines():
55 for l in data.splitlines():
56 if prev is not None and prev > l:
56 if prev is not None and prev > l:
57 raise ValueError(b'Manifest lines not in sorted order.')
57 raise ValueError(b'Manifest lines not in sorted order.')
58 prev = l
58 prev = l
59 f, n = l.split(b'\0')
59 f, n = l.split(b'\0')
60 nl = len(n)
60 nl = len(n)
61 flags = n[-1:]
61 flags = n[-1:]
62 if flags in _manifestflags:
62 if flags in _manifestflags:
63 n = n[:-1]
63 n = n[:-1]
64 nl -= 1
64 nl -= 1
65 else:
65 else:
66 flags = b''
66 flags = b''
67 if nl != 2 * nodelen:
67 if nl != 2 * nodelen:
68 raise ValueError(b'Invalid manifest line')
68 raise ValueError(b'Invalid manifest line')
69
69
70 yield f, bin(n), flags
70 yield f, bin(n), flags
71
71
72
72
73 def _text(it):
73 def _text(it):
74 files = []
74 files = []
75 lines = []
75 lines = []
76 for f, n, fl in it:
76 for f, n, fl in it:
77 files.append(f)
77 files.append(f)
78 # if this is changed to support newlines in filenames,
78 # if this is changed to support newlines in filenames,
79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
81
81
82 _checkforbidden(files)
82 _checkforbidden(files)
83 return b''.join(lines)
83 return b''.join(lines)
84
84
85
85
86 class lazymanifestiter:
86 class lazymanifestiter:
87 def __init__(self, lm):
87 def __init__(self, lm):
88 self.pos = 0
88 self.pos = 0
89 self.lm = lm
89 self.lm = lm
90
90
91 def __iter__(self):
91 def __iter__(self):
92 return self
92 return self
93
93
94 def next(self):
94 def next(self):
95 try:
95 try:
96 data, pos = self.lm._get(self.pos)
96 data, pos = self.lm._get(self.pos)
97 except IndexError:
97 except IndexError:
98 raise StopIteration
98 raise StopIteration
99 if pos == -1:
99 if pos == -1:
100 self.pos += 1
100 self.pos += 1
101 return data[0]
101 return data[0]
102 self.pos += 1
102 self.pos += 1
103 zeropos = data.find(b'\x00', pos)
103 zeropos = data.find(b'\x00', pos)
104 return data[pos:zeropos]
104 return data[pos:zeropos]
105
105
106 __next__ = next
106 __next__ = next
107
107
108
108
109 class lazymanifestiterentries:
109 class lazymanifestiterentries:
110 def __init__(self, lm):
110 def __init__(self, lm):
111 self.lm = lm
111 self.lm = lm
112 self.pos = 0
112 self.pos = 0
113
113
114 def __iter__(self):
114 def __iter__(self):
115 return self
115 return self
116
116
117 def next(self):
117 def next(self):
118 try:
118 try:
119 data, pos = self.lm._get(self.pos)
119 data, pos = self.lm._get(self.pos)
120 except IndexError:
120 except IndexError:
121 raise StopIteration
121 raise StopIteration
122 if pos == -1:
122 if pos == -1:
123 self.pos += 1
123 self.pos += 1
124 return data
124 return data
125 zeropos = data.find(b'\x00', pos)
125 zeropos = data.find(b'\x00', pos)
126 nlpos = data.find(b'\n', pos)
126 nlpos = data.find(b'\n', pos)
127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
128 raise error.StorageError(b'Invalid manifest line')
128 raise error.StorageError(b'Invalid manifest line')
129 flags = data[nlpos - 1 : nlpos]
129 flags = data[nlpos - 1 : nlpos]
130 if flags in _manifestflags:
130 if flags in _manifestflags:
131 hlen = nlpos - zeropos - 2
131 hlen = nlpos - zeropos - 2
132 else:
132 else:
133 hlen = nlpos - zeropos - 1
133 hlen = nlpos - zeropos - 1
134 flags = b''
134 flags = b''
135 if hlen != 2 * self.lm._nodelen:
135 if hlen != 2 * self.lm._nodelen:
136 raise error.StorageError(b'Invalid manifest line')
136 raise error.StorageError(b'Invalid manifest line')
137 hashval = unhexlify(
137 hashval = unhexlify(
138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
139 )
139 )
140 self.pos += 1
140 self.pos += 1
141 return (data[pos:zeropos], hashval, flags)
141 return (data[pos:zeropos], hashval, flags)
142
142
143 __next__ = next
143 __next__ = next
144
144
145
145
146 def unhexlify(data, extra, pos, length):
146 def unhexlify(data, extra, pos, length):
147 s = bin(data[pos : pos + length])
147 s = bin(data[pos : pos + length])
148 if extra:
148 if extra:
149 s += chr(extra & 0xFF)
149 s += chr(extra & 0xFF)
150 return s
150 return s
151
151
152
152
153 def _cmp(a, b):
153 def _cmp(a, b):
154 return (a > b) - (a < b)
154 return (a > b) - (a < b)
155
155
156
156
157 _manifestflags = {b'', b'l', b't', b'x'}
157 _manifestflags = {b'', b'l', b't', b'x'}
158
158
159
159
160 class _lazymanifest:
160 class _lazymanifest:
161 """A pure python manifest backed by a byte string. It is supplimented with
161 """A pure python manifest backed by a byte string. It is supplimented with
162 internal lists as it is modified, until it is compacted back to a pure byte
162 internal lists as it is modified, until it is compacted back to a pure byte
163 string.
163 string.
164
164
165 ``data`` is the initial manifest data.
165 ``data`` is the initial manifest data.
166
166
167 ``positions`` is a list of offsets, one per manifest entry. Positive
167 ``positions`` is a list of offsets, one per manifest entry. Positive
168 values are offsets into ``data``, negative values are offsets into the
168 values are offsets into ``data``, negative values are offsets into the
169 ``extradata`` list. When an entry is removed, its entry is dropped from
169 ``extradata`` list. When an entry is removed, its entry is dropped from
170 ``positions``. The values are encoded such that when walking the list and
170 ``positions``. The values are encoded such that when walking the list and
171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
172 sorted by filename.
172 sorted by filename.
173
173
174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
175 modified since the manifest was created or compacted.
175 modified since the manifest was created or compacted.
176 """
176 """
177
177
178 def __init__(
178 def __init__(
179 self,
179 self,
180 nodelen,
180 nodelen,
181 data,
181 data,
182 positions=None,
182 positions=None,
183 extrainfo=None,
183 extrainfo=None,
184 extradata=None,
184 extradata=None,
185 hasremovals=False,
185 hasremovals=False,
186 ):
186 ):
187 self._nodelen = nodelen
187 self._nodelen = nodelen
188 if positions is None:
188 if positions is None:
189 self.positions = self.findlines(data)
189 self.positions = self.findlines(data)
190 self.extrainfo = [0] * len(self.positions)
190 self.extrainfo = [0] * len(self.positions)
191 self.data = data
191 self.data = data
192 self.extradata = []
192 self.extradata = []
193 self.hasremovals = False
193 self.hasremovals = False
194 else:
194 else:
195 self.positions = positions[:]
195 self.positions = positions[:]
196 self.extrainfo = extrainfo[:]
196 self.extrainfo = extrainfo[:]
197 self.extradata = extradata[:]
197 self.extradata = extradata[:]
198 self.data = data
198 self.data = data
199 self.hasremovals = hasremovals
199 self.hasremovals = hasremovals
200
200
201 def findlines(self, data):
201 def findlines(self, data):
202 if not data:
202 if not data:
203 return []
203 return []
204 pos = data.find(b"\n")
204 pos = data.find(b"\n")
205 if pos == -1 or data[-1:] != b'\n':
205 if pos == -1 or data[-1:] != b'\n':
206 raise ValueError(b"Manifest did not end in a newline.")
206 raise ValueError(b"Manifest did not end in a newline.")
207 positions = [0]
207 positions = [0]
208 prev = data[: data.find(b'\x00')]
208 prev = data[: data.find(b'\x00')]
209 while pos < len(data) - 1 and pos != -1:
209 while pos < len(data) - 1 and pos != -1:
210 positions.append(pos + 1)
210 positions.append(pos + 1)
211 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
211 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
212 if nexts < prev:
212 if nexts < prev:
213 raise ValueError(b"Manifest lines not in sorted order.")
213 raise ValueError(b"Manifest lines not in sorted order.")
214 prev = nexts
214 prev = nexts
215 pos = data.find(b"\n", pos + 1)
215 pos = data.find(b"\n", pos + 1)
216 return positions
216 return positions
217
217
218 def _get(self, index):
218 def _get(self, index):
219 # get the position encoded in pos:
219 # get the position encoded in pos:
220 # positive number is an index in 'data'
220 # positive number is an index in 'data'
221 # negative number is in extrapieces
221 # negative number is in extrapieces
222 pos = self.positions[index]
222 pos = self.positions[index]
223 if pos >= 0:
223 if pos >= 0:
224 return self.data, pos
224 return self.data, pos
225 return self.extradata[-pos - 1], -1
225 return self.extradata[-pos - 1], -1
226
226
227 def _getkey(self, pos):
227 def _getkey(self, pos):
228 if pos >= 0:
228 if pos >= 0:
229 return self.data[pos : self.data.find(b'\x00', pos + 1)]
229 return self.data[pos : self.data.find(b'\x00', pos + 1)]
230 return self.extradata[-pos - 1][0]
230 return self.extradata[-pos - 1][0]
231
231
232 def bsearch(self, key):
232 def bsearch(self, key):
233 first = 0
233 first = 0
234 last = len(self.positions) - 1
234 last = len(self.positions) - 1
235
235
236 while first <= last:
236 while first <= last:
237 midpoint = (first + last) // 2
237 midpoint = (first + last) // 2
238 nextpos = self.positions[midpoint]
238 nextpos = self.positions[midpoint]
239 candidate = self._getkey(nextpos)
239 candidate = self._getkey(nextpos)
240 r = _cmp(key, candidate)
240 r = _cmp(key, candidate)
241 if r == 0:
241 if r == 0:
242 return midpoint
242 return midpoint
243 else:
243 else:
244 if r < 0:
244 if r < 0:
245 last = midpoint - 1
245 last = midpoint - 1
246 else:
246 else:
247 first = midpoint + 1
247 first = midpoint + 1
248 return -1
248 return -1
249
249
250 def bsearch2(self, key):
250 def bsearch2(self, key):
251 # same as the above, but will always return the position
251 # same as the above, but will always return the position
252 # done for performance reasons
252 # done for performance reasons
253 first = 0
253 first = 0
254 last = len(self.positions) - 1
254 last = len(self.positions) - 1
255
255
256 while first <= last:
256 while first <= last:
257 midpoint = (first + last) // 2
257 midpoint = (first + last) // 2
258 nextpos = self.positions[midpoint]
258 nextpos = self.positions[midpoint]
259 candidate = self._getkey(nextpos)
259 candidate = self._getkey(nextpos)
260 r = _cmp(key, candidate)
260 r = _cmp(key, candidate)
261 if r == 0:
261 if r == 0:
262 return (midpoint, True)
262 return (midpoint, True)
263 else:
263 else:
264 if r < 0:
264 if r < 0:
265 last = midpoint - 1
265 last = midpoint - 1
266 else:
266 else:
267 first = midpoint + 1
267 first = midpoint + 1
268 return (first, False)
268 return (first, False)
269
269
270 def __contains__(self, key):
270 def __contains__(self, key):
271 return self.bsearch(key) != -1
271 return self.bsearch(key) != -1
272
272
273 def __getitem__(self, key):
273 def __getitem__(self, key):
274 if not isinstance(key, bytes):
274 if not isinstance(key, bytes):
275 raise TypeError(b"getitem: manifest keys must be a bytes.")
275 raise TypeError(b"getitem: manifest keys must be a bytes.")
276 needle = self.bsearch(key)
276 needle = self.bsearch(key)
277 if needle == -1:
277 if needle == -1:
278 raise KeyError
278 raise KeyError
279 data, pos = self._get(needle)
279 data, pos = self._get(needle)
280 if pos == -1:
280 if pos == -1:
281 return (data[1], data[2])
281 return (data[1], data[2])
282 zeropos = data.find(b'\x00', pos)
282 zeropos = data.find(b'\x00', pos)
283 nlpos = data.find(b'\n', zeropos)
283 nlpos = data.find(b'\n', zeropos)
284 assert 0 <= needle <= len(self.positions)
284 assert 0 <= needle <= len(self.positions)
285 assert len(self.extrainfo) == len(self.positions)
285 assert len(self.extrainfo) == len(self.positions)
286 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
286 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
287 raise error.StorageError(b'Invalid manifest line')
287 raise error.StorageError(b'Invalid manifest line')
288 hlen = nlpos - zeropos - 1
288 hlen = nlpos - zeropos - 1
289 flags = data[nlpos - 1 : nlpos]
289 flags = data[nlpos - 1 : nlpos]
290 if flags in _manifestflags:
290 if flags in _manifestflags:
291 hlen -= 1
291 hlen -= 1
292 else:
292 else:
293 flags = b''
293 flags = b''
294 if hlen != 2 * self._nodelen:
294 if hlen != 2 * self._nodelen:
295 raise error.StorageError(b'Invalid manifest line')
295 raise error.StorageError(b'Invalid manifest line')
296 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
296 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
297 return (hashval, flags)
297 return (hashval, flags)
298
298
299 def __delitem__(self, key):
299 def __delitem__(self, key):
300 needle, found = self.bsearch2(key)
300 needle, found = self.bsearch2(key)
301 if not found:
301 if not found:
302 raise KeyError
302 raise KeyError
303 cur = self.positions[needle]
303 cur = self.positions[needle]
304 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
304 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
305 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
305 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
306 if cur >= 0:
306 if cur >= 0:
307 # This does NOT unsort the list as far as the search functions are
307 # This does NOT unsort the list as far as the search functions are
308 # concerned, as they only examine lines mapped by self.positions.
308 # concerned, as they only examine lines mapped by self.positions.
309 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
309 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
310 self.hasremovals = True
310 self.hasremovals = True
311
311
312 def __setitem__(self, key, value):
312 def __setitem__(self, key, value):
313 if not isinstance(key, bytes):
313 if not isinstance(key, bytes):
314 raise TypeError(b"setitem: manifest keys must be a byte string.")
314 raise TypeError(b"setitem: manifest keys must be a byte string.")
315 if not isinstance(value, tuple) or len(value) != 2:
315 if not isinstance(value, tuple) or len(value) != 2:
316 raise TypeError(
316 raise TypeError(
317 b"Manifest values must be a tuple of (node, flags)."
317 b"Manifest values must be a tuple of (node, flags)."
318 )
318 )
319 hashval = value[0]
319 hashval = value[0]
320 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
320 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
321 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
321 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
322 flags = value[1]
322 flags = value[1]
323 if not isinstance(flags, bytes) or len(flags) > 1:
323 if not isinstance(flags, bytes) or len(flags) > 1:
324 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
324 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
325 needle, found = self.bsearch2(key)
325 needle, found = self.bsearch2(key)
326 if found:
326 if found:
327 # put the item
327 # put the item
328 pos = self.positions[needle]
328 pos = self.positions[needle]
329 if pos < 0:
329 if pos < 0:
330 self.extradata[-pos - 1] = (key, hashval, value[1])
330 self.extradata[-pos - 1] = (key, hashval, value[1])
331 else:
331 else:
332 # just don't bother
332 # just don't bother
333 self.extradata.append((key, hashval, value[1]))
333 self.extradata.append((key, hashval, value[1]))
334 self.positions[needle] = -len(self.extradata)
334 self.positions[needle] = -len(self.extradata)
335 else:
335 else:
336 # not found, put it in with extra positions
336 # not found, put it in with extra positions
337 self.extradata.append((key, hashval, value[1]))
337 self.extradata.append((key, hashval, value[1]))
338 self.positions = (
338 self.positions = (
339 self.positions[:needle]
339 self.positions[:needle]
340 + [-len(self.extradata)]
340 + [-len(self.extradata)]
341 + self.positions[needle:]
341 + self.positions[needle:]
342 )
342 )
343 self.extrainfo = (
343 self.extrainfo = (
344 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
344 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
345 )
345 )
346
346
347 def copy(self):
347 def copy(self):
348 # XXX call _compact like in C?
348 # XXX call _compact like in C?
349 return _lazymanifest(
349 return _lazymanifest(
350 self._nodelen,
350 self._nodelen,
351 self.data,
351 self.data,
352 self.positions,
352 self.positions,
353 self.extrainfo,
353 self.extrainfo,
354 self.extradata,
354 self.extradata,
355 self.hasremovals,
355 self.hasremovals,
356 )
356 )
357
357
358 def _compact(self):
358 def _compact(self):
359 # hopefully not called TOO often
359 # hopefully not called TOO often
360 if len(self.extradata) == 0 and not self.hasremovals:
360 if len(self.extradata) == 0 and not self.hasremovals:
361 return
361 return
362 l = []
362 l = []
363 i = 0
363 i = 0
364 offset = 0
364 offset = 0
365 self.extrainfo = [0] * len(self.positions)
365 self.extrainfo = [0] * len(self.positions)
366 while i < len(self.positions):
366 while i < len(self.positions):
367 if self.positions[i] >= 0:
367 if self.positions[i] >= 0:
368 cur = self.positions[i]
368 cur = self.positions[i]
369 last_cut = cur
369 last_cut = cur
370
370
371 # Collect all contiguous entries in the buffer at the current
371 # Collect all contiguous entries in the buffer at the current
372 # offset, breaking out only for added/modified items held in
372 # offset, breaking out only for added/modified items held in
373 # extradata, or a deleted line prior to the next position.
373 # extradata, or a deleted line prior to the next position.
374 while True:
374 while True:
375 self.positions[i] = offset
375 self.positions[i] = offset
376 i += 1
376 i += 1
377 if i == len(self.positions) or self.positions[i] < 0:
377 if i == len(self.positions) or self.positions[i] < 0:
378 break
378 break
379
379
380 # A removed file has no positions[] entry, but does have an
380 # A removed file has no positions[] entry, but does have an
381 # overwritten first byte. Break out and find the end of the
381 # overwritten first byte. Break out and find the end of the
382 # current good entry/entries if there is a removed file
382 # current good entry/entries if there is a removed file
383 # before the next position.
383 # before the next position.
384 if (
384 if (
385 self.hasremovals
385 self.hasremovals
386 and self.data.find(b'\n\x00', cur, self.positions[i])
386 and self.data.find(b'\n\x00', cur, self.positions[i])
387 != -1
387 != -1
388 ):
388 ):
389 break
389 break
390
390
391 offset += self.positions[i] - cur
391 offset += self.positions[i] - cur
392 cur = self.positions[i]
392 cur = self.positions[i]
393 end_cut = self.data.find(b'\n', cur)
393 end_cut = self.data.find(b'\n', cur)
394 if end_cut != -1:
394 if end_cut != -1:
395 end_cut += 1
395 end_cut += 1
396 offset += end_cut - cur
396 offset += end_cut - cur
397 l.append(self.data[last_cut:end_cut])
397 l.append(self.data[last_cut:end_cut])
398 else:
398 else:
399 while i < len(self.positions) and self.positions[i] < 0:
399 while i < len(self.positions) and self.positions[i] < 0:
400 cur = self.positions[i]
400 cur = self.positions[i]
401 t = self.extradata[-cur - 1]
401 t = self.extradata[-cur - 1]
402 l.append(self._pack(t))
402 l.append(self._pack(t))
403 self.positions[i] = offset
403 self.positions[i] = offset
404 # Hashes are either 20 bytes (old sha1s) or 32
404 # Hashes are either 20 bytes (old sha1s) or 32
405 # bytes (new non-sha1).
405 # bytes (new non-sha1).
406 hlen = 20
406 hlen = 20
407 if len(t[1]) > 25:
407 if len(t[1]) > 25:
408 hlen = 32
408 hlen = 32
409 if len(t[1]) > hlen:
409 if len(t[1]) > hlen:
410 self.extrainfo[i] = ord(t[1][hlen + 1])
410 self.extrainfo[i] = ord(t[1][hlen + 1])
411 offset += len(l[-1])
411 offset += len(l[-1])
412 i += 1
412 i += 1
413 self.data = b''.join(l)
413 self.data = b''.join(l)
414 self.hasremovals = False
414 self.hasremovals = False
415 self.extradata = []
415 self.extradata = []
416
416
417 def _pack(self, d):
417 def _pack(self, d):
418 n = d[1]
418 n = d[1]
419 assert len(n) in (20, 32)
419 assert len(n) in (20, 32)
420 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
420 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
421
421
422 def text(self):
422 def text(self):
423 self._compact()
423 self._compact()
424 return self.data
424 return self.data
425
425
426 def diff(self, m2, clean=False):
426 def diff(self, m2, clean=False):
427 '''Finds changes between the current manifest and m2.'''
427 '''Finds changes between the current manifest and m2.'''
428 # XXX think whether efficiency matters here
428 # XXX think whether efficiency matters here
429 diff = {}
429 diff = {}
430
430
431 for fn, e1, flags in self.iterentries():
431 for fn, e1, flags in self.iterentries():
432 if fn not in m2:
432 if fn not in m2:
433 diff[fn] = (e1, flags), (None, b'')
433 diff[fn] = (e1, flags), (None, b'')
434 else:
434 else:
435 e2 = m2[fn]
435 e2 = m2[fn]
436 if (e1, flags) != e2:
436 if (e1, flags) != e2:
437 diff[fn] = (e1, flags), e2
437 diff[fn] = (e1, flags), e2
438 elif clean:
438 elif clean:
439 diff[fn] = None
439 diff[fn] = None
440
440
441 for fn, e2, flags in m2.iterentries():
441 for fn, e2, flags in m2.iterentries():
442 if fn not in self:
442 if fn not in self:
443 diff[fn] = (None, b''), (e2, flags)
443 diff[fn] = (None, b''), (e2, flags)
444
444
445 return diff
445 return diff
446
446
447 def iterentries(self):
447 def iterentries(self):
448 return lazymanifestiterentries(self)
448 return lazymanifestiterentries(self)
449
449
450 def iterkeys(self):
450 def iterkeys(self):
451 return lazymanifestiter(self)
451 return lazymanifestiter(self)
452
452
453 def __iter__(self):
453 def __iter__(self):
454 return lazymanifestiter(self)
454 return lazymanifestiter(self)
455
455
456 def __len__(self):
456 def __len__(self):
457 return len(self.positions)
457 return len(self.positions)
458
458
459 def filtercopy(self, filterfn):
459 def filtercopy(self, filterfn):
460 # XXX should be optimized
460 # XXX should be optimized
461 c = _lazymanifest(self._nodelen, b'')
461 c = _lazymanifest(self._nodelen, b'')
462 for f, n, fl in self.iterentries():
462 for f, n, fl in self.iterentries():
463 if filterfn(f):
463 if filterfn(f):
464 c[f] = n, fl
464 c[f] = n, fl
465 return c
465 return c
466
466
467
467
468 try:
468 try:
469 _lazymanifest = parsers.lazymanifest
469 _lazymanifest = parsers.lazymanifest
470 except AttributeError:
470 except AttributeError:
471 pass
471 pass
472
472
473
473
474 @interfaceutil.implementer(repository.imanifestdict)
474 @interfaceutil.implementer(repository.imanifestdict)
475 class manifestdict:
475 class manifestdict:
476 def __init__(self, nodelen, data=b''):
476 def __init__(self, nodelen, data=b''):
477 self._nodelen = nodelen
477 self._nodelen = nodelen
478 self._lm = _lazymanifest(nodelen, data)
478 self._lm = _lazymanifest(nodelen, data)
479
479
480 def __getitem__(self, key):
480 def __getitem__(self, key):
481 return self._lm[key][0]
481 return self._lm[key][0]
482
482
483 def find(self, key):
483 def find(self, key):
484 return self._lm[key]
484 return self._lm[key]
485
485
486 def __len__(self):
486 def __len__(self):
487 return len(self._lm)
487 return len(self._lm)
488
488
489 def __nonzero__(self):
489 def __nonzero__(self):
490 # nonzero is covered by the __len__ function, but implementing it here
490 # nonzero is covered by the __len__ function, but implementing it here
491 # makes it easier for extensions to override.
491 # makes it easier for extensions to override.
492 return len(self._lm) != 0
492 return len(self._lm) != 0
493
493
494 __bool__ = __nonzero__
494 __bool__ = __nonzero__
495
495
496 def __setitem__(self, key, node):
496 def __setitem__(self, key, node):
497 self._lm[key] = node, self.flags(key)
497 self._lm[key] = node, self.flags(key)
498
498
499 def __contains__(self, key):
499 def __contains__(self, key):
500 if key is None:
500 if key is None:
501 return False
501 return False
502 return key in self._lm
502 return key in self._lm
503
503
504 def __delitem__(self, key):
504 def __delitem__(self, key):
505 del self._lm[key]
505 del self._lm[key]
506
506
507 def __iter__(self):
507 def __iter__(self):
508 return self._lm.__iter__()
508 return self._lm.__iter__()
509
509
510 def iterkeys(self):
510 def iterkeys(self):
511 return self._lm.iterkeys()
511 return self._lm.iterkeys()
512
512
513 def keys(self):
513 def keys(self):
514 return list(self.iterkeys())
514 return list(self.iterkeys())
515
515
516 def filesnotin(self, m2, match=None):
516 def filesnotin(self, m2, match=None):
517 '''Set of files in this manifest that are not in the other'''
517 '''Set of files in this manifest that are not in the other'''
518 if match is not None:
518 if match is not None:
519 match = matchmod.badmatch(match, lambda path, msg: None)
519 match = matchmod.badmatch(match, lambda path, msg: None)
520 sm2 = set(m2.walk(match))
520 sm2 = set(m2.walk(match))
521 return {f for f in self.walk(match) if f not in sm2}
521 return {f for f in self.walk(match) if f not in sm2}
522 return {f for f in self if f not in m2}
522 return {f for f in self if f not in m2}
523
523
524 @propertycache
524 @propertycache
525 def _dirs(self):
525 def _dirs(self):
526 return pathutil.dirs(self)
526 return pathutil.dirs(self)
527
527
528 def dirs(self):
528 def dirs(self):
529 return self._dirs
529 return self._dirs
530
530
531 def hasdir(self, dir):
531 def hasdir(self, dir):
532 return dir in self._dirs
532 return dir in self._dirs
533
533
534 def _filesfastpath(self, match):
534 def _filesfastpath(self, match):
535 """Checks whether we can correctly and quickly iterate over matcher
535 """Checks whether we can correctly and quickly iterate over matcher
536 files instead of over manifest files."""
536 files instead of over manifest files."""
537 files = match.files()
537 files = match.files()
538 return len(files) < 100 and (
538 return len(files) < 100 and (
539 match.isexact()
539 match.isexact()
540 or (match.prefix() and all(fn in self for fn in files))
540 or (match.prefix() and all(fn in self for fn in files))
541 )
541 )
542
542
543 def walk(self, match):
543 def walk(self, match):
544 """Generates matching file names.
544 """Generates matching file names.
545
545
546 Equivalent to manifest.matches(match).iterkeys(), but without creating
546 Equivalent to manifest.matches(match).iterkeys(), but without creating
547 an entirely new manifest.
547 an entirely new manifest.
548
548
549 It also reports nonexistent files by marking them bad with match.bad().
549 It also reports nonexistent files by marking them bad with match.bad().
550 """
550 """
551 if match.always():
551 if match.always():
552 for f in iter(self):
552 for f in iter(self):
553 yield f
553 yield f
554 return
554 return
555
555
556 fset = set(match.files())
556 fset = set(match.files())
557
557
558 # avoid the entire walk if we're only looking for specific files
558 # avoid the entire walk if we're only looking for specific files
559 if self._filesfastpath(match):
559 if self._filesfastpath(match):
560 for fn in sorted(fset):
560 for fn in sorted(fset):
561 if fn in self:
561 if fn in self:
562 yield fn
562 yield fn
563 return
563 return
564
564
565 for fn in self:
565 for fn in self:
566 if fn in fset:
566 if fn in fset:
567 # specified pattern is the exact name
567 # specified pattern is the exact name
568 fset.remove(fn)
568 fset.remove(fn)
569 if match(fn):
569 if match(fn):
570 yield fn
570 yield fn
571
571
572 # for dirstate.walk, files=[''] means "walk the whole tree".
572 # for dirstate.walk, files=[''] means "walk the whole tree".
573 # follow that here, too
573 # follow that here, too
574 fset.discard(b'')
574 fset.discard(b'')
575
575
576 for fn in sorted(fset):
576 for fn in sorted(fset):
577 if not self.hasdir(fn):
577 if not self.hasdir(fn):
578 match.bad(fn, None)
578 match.bad(fn, None)
579
579
580 def _matches(self, match):
580 def _matches(self, match):
581 '''generate a new manifest filtered by the match argument'''
581 '''generate a new manifest filtered by the match argument'''
582 if match.always():
582 if match.always():
583 return self.copy()
583 return self.copy()
584
584
585 if self._filesfastpath(match):
585 if self._filesfastpath(match):
586 m = manifestdict(self._nodelen)
586 m = manifestdict(self._nodelen)
587 lm = self._lm
587 lm = self._lm
588 for fn in match.files():
588 for fn in match.files():
589 if fn in lm:
589 if fn in lm:
590 m._lm[fn] = lm[fn]
590 m._lm[fn] = lm[fn]
591 return m
591 return m
592
592
593 m = manifestdict(self._nodelen)
593 m = manifestdict(self._nodelen)
594 m._lm = self._lm.filtercopy(match)
594 m._lm = self._lm.filtercopy(match)
595 return m
595 return m
596
596
597 def diff(self, m2, match=None, clean=False):
597 def diff(self, m2, match=None, clean=False):
598 """Finds changes between the current manifest and m2.
598 """Finds changes between the current manifest and m2.
599
599
600 Args:
600 Args:
601 m2: the manifest to which this manifest should be compared.
601 m2: the manifest to which this manifest should be compared.
602 clean: if true, include files unchanged between these manifests
602 clean: if true, include files unchanged between these manifests
603 with a None value in the returned dictionary.
603 with a None value in the returned dictionary.
604
604
605 The result is returned as a dict with filename as key and
605 The result is returned as a dict with filename as key and
606 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
606 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
607 nodeid in the current/other manifest and fl1/fl2 is the flag
607 nodeid in the current/other manifest and fl1/fl2 is the flag
608 in the current/other manifest. Where the file does not exist,
608 in the current/other manifest. Where the file does not exist,
609 the nodeid will be None and the flags will be the empty
609 the nodeid will be None and the flags will be the empty
610 string.
610 string.
611 """
611 """
612 if match:
612 if match:
613 m1 = self._matches(match)
613 m1 = self._matches(match)
614 m2 = m2._matches(match)
614 m2 = m2._matches(match)
615 return m1.diff(m2, clean=clean)
615 return m1.diff(m2, clean=clean)
616 return self._lm.diff(m2._lm, clean)
616 return self._lm.diff(m2._lm, clean)
617
617
618 def setflag(self, key, flag):
618 def setflag(self, key, flag):
619 if flag not in _manifestflags:
619 if flag not in _manifestflags:
620 raise TypeError(b"Invalid manifest flag set.")
620 raise TypeError(b"Invalid manifest flag set.")
621 self._lm[key] = self[key], flag
621 self._lm[key] = self[key], flag
622
622
623 def get(self, key, default=None):
623 def get(self, key, default=None):
624 try:
624 try:
625 return self._lm[key][0]
625 return self._lm[key][0]
626 except KeyError:
626 except KeyError:
627 return default
627 return default
628
628
629 def flags(self, key):
629 def flags(self, key):
630 try:
630 try:
631 return self._lm[key][1]
631 return self._lm[key][1]
632 except KeyError:
632 except KeyError:
633 return b''
633 return b''
634
634
635 def copy(self):
635 def copy(self):
636 c = manifestdict(self._nodelen)
636 c = manifestdict(self._nodelen)
637 c._lm = self._lm.copy()
637 c._lm = self._lm.copy()
638 return c
638 return c
639
639
640 def items(self):
640 def items(self):
641 return (x[:2] for x in self._lm.iterentries())
641 return (x[:2] for x in self._lm.iterentries())
642
642
643 def iteritems(self):
643 def iteritems(self):
644 return (x[:2] for x in self._lm.iterentries())
644 return (x[:2] for x in self._lm.iterentries())
645
645
646 def iterentries(self):
646 def iterentries(self):
647 return self._lm.iterentries()
647 return self._lm.iterentries()
648
648
649 def text(self):
649 def text(self):
650 # most likely uses native version
650 # most likely uses native version
651 return self._lm.text()
651 return self._lm.text()
652
652
653 def fastdelta(self, base, changes):
653 def fastdelta(self, base, changes):
654 """Given a base manifest text as a bytearray and a list of changes
654 """Given a base manifest text as a bytearray and a list of changes
655 relative to that text, compute a delta that can be used by revlog.
655 relative to that text, compute a delta that can be used by revlog.
656 """
656 """
657 delta = []
657 delta = []
658 dstart = None
658 dstart = None
659 dend = None
659 dend = None
660 dline = [b""]
660 dline = [b""]
661 start = 0
661 start = 0
662 # zero copy representation of base as a buffer
662 # zero copy representation of base as a buffer
663 addbuf = util.buffer(base)
663 addbuf = util.buffer(base)
664
664
665 changes = list(changes)
665 changes = list(changes)
666 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
666 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
667 # start with a readonly loop that finds the offset of
667 # start with a readonly loop that finds the offset of
668 # each line and creates the deltas
668 # each line and creates the deltas
669 for f, todelete in changes:
669 for f, todelete in changes:
670 # bs will either be the index of the item or the insert point
670 # bs will either be the index of the item or the insert point
671 start, end = _msearch(addbuf, f, start)
671 start, end = _msearch(addbuf, f, start)
672 if not todelete:
672 if not todelete:
673 h, fl = self._lm[f]
673 h, fl = self._lm[f]
674 l = b"%s\0%s%s\n" % (f, hex(h), fl)
674 l = b"%s\0%s%s\n" % (f, hex(h), fl)
675 else:
675 else:
676 if start == end:
676 if start == end:
677 # item we want to delete was not found, error out
677 # item we want to delete was not found, error out
678 raise AssertionError(
678 raise AssertionError(
679 _(b"failed to remove %s from manifest") % f
679 _(b"failed to remove %s from manifest") % f
680 )
680 )
681 l = b""
681 l = b""
682 if dstart is not None and dstart <= start and dend >= start:
682 if dstart is not None and dstart <= start and dend >= start:
683 if dend < end:
683 if dend < end:
684 dend = end
684 dend = end
685 if l:
685 if l:
686 dline.append(l)
686 dline.append(l)
687 else:
687 else:
688 if dstart is not None:
688 if dstart is not None:
689 delta.append([dstart, dend, b"".join(dline)])
689 delta.append([dstart, dend, b"".join(dline)])
690 dstart = start
690 dstart = start
691 dend = end
691 dend = end
692 dline = [l]
692 dline = [l]
693
693
694 if dstart is not None:
694 if dstart is not None:
695 delta.append([dstart, dend, b"".join(dline)])
695 delta.append([dstart, dend, b"".join(dline)])
696 # apply the delta to the base, and get a delta for addrevision
696 # apply the delta to the base, and get a delta for addrevision
697 deltatext, arraytext = _addlistdelta(base, delta)
697 deltatext, arraytext = _addlistdelta(base, delta)
698 else:
698 else:
699 # For large changes, it's much cheaper to just build the text and
699 # For large changes, it's much cheaper to just build the text and
700 # diff it.
700 # diff it.
701 arraytext = bytearray(self.text())
701 arraytext = bytearray(self.text())
702 deltatext = mdiff.textdiff(
702 deltatext = mdiff.textdiff(
703 util.buffer(base), util.buffer(arraytext)
703 util.buffer(base), util.buffer(arraytext)
704 )
704 )
705
705
706 return arraytext, deltatext
706 return arraytext, deltatext
707
707
708
708
709 def _msearch(m, s, lo=0, hi=None):
709 def _msearch(m, s, lo=0, hi=None):
710 """return a tuple (start, end) that says where to find s within m.
710 """return a tuple (start, end) that says where to find s within m.
711
711
712 If the string is found m[start:end] are the line containing
712 If the string is found m[start:end] are the line containing
713 that string. If start == end the string was not found and
713 that string. If start == end the string was not found and
714 they indicate the proper sorted insertion point.
714 they indicate the proper sorted insertion point.
715
715
716 m should be a buffer, a memoryview or a byte string.
716 m should be a buffer, a memoryview or a byte string.
717 s is a byte string"""
717 s is a byte string"""
718
718
719 def advance(i, c):
719 def advance(i, c):
720 while i < lenm and m[i : i + 1] != c:
720 while i < lenm and m[i : i + 1] != c:
721 i += 1
721 i += 1
722 return i
722 return i
723
723
724 if not s:
724 if not s:
725 return (lo, lo)
725 return (lo, lo)
726 lenm = len(m)
726 lenm = len(m)
727 if not hi:
727 if not hi:
728 hi = lenm
728 hi = lenm
729 while lo < hi:
729 while lo < hi:
730 mid = (lo + hi) // 2
730 mid = (lo + hi) // 2
731 start = mid
731 start = mid
732 while start > 0 and m[start - 1 : start] != b'\n':
732 while start > 0 and m[start - 1 : start] != b'\n':
733 start -= 1
733 start -= 1
734 end = advance(start, b'\0')
734 end = advance(start, b'\0')
735 if bytes(m[start:end]) < s:
735 if bytes(m[start:end]) < s:
736 # we know that after the null there are 40 bytes of sha1
736 # we know that after the null there are 40 bytes of sha1
737 # this translates to the bisect lo = mid + 1
737 # this translates to the bisect lo = mid + 1
738 lo = advance(end + 40, b'\n') + 1
738 lo = advance(end + 40, b'\n') + 1
739 else:
739 else:
740 # this translates to the bisect hi = mid
740 # this translates to the bisect hi = mid
741 hi = start
741 hi = start
742 end = advance(lo, b'\0')
742 end = advance(lo, b'\0')
743 found = m[lo:end]
743 found = m[lo:end]
744 if s == found:
744 if s == found:
745 # we know that after the null there are 40 bytes of sha1
745 # we know that after the null there are 40 bytes of sha1
746 end = advance(end + 40, b'\n')
746 end = advance(end + 40, b'\n')
747 return (lo, end + 1)
747 return (lo, end + 1)
748 else:
748 else:
749 return (lo, lo)
749 return (lo, lo)
750
750
751
751
752 def _checkforbidden(l):
752 def _checkforbidden(l):
753 """Check filenames for illegal characters."""
753 """Check filenames for illegal characters."""
754 for f in l:
754 for f in l:
755 if b'\n' in f or b'\r' in f:
755 if b'\n' in f or b'\r' in f:
756 raise error.StorageError(
756 raise error.StorageError(
757 _(b"'\\n' and '\\r' disallowed in filenames: %r")
757 _(b"'\\n' and '\\r' disallowed in filenames: %r")
758 % pycompat.bytestr(f)
758 % pycompat.bytestr(f)
759 )
759 )
760
760
761
761
762 # apply the changes collected during the bisect loop to our addlist
762 # apply the changes collected during the bisect loop to our addlist
763 # return a delta suitable for addrevision
763 # return a delta suitable for addrevision
764 def _addlistdelta(addlist, x):
764 def _addlistdelta(addlist, x):
765 # for large addlist arrays, building a new array is cheaper
765 # for large addlist arrays, building a new array is cheaper
766 # than repeatedly modifying the existing one
766 # than repeatedly modifying the existing one
767 currentposition = 0
767 currentposition = 0
768 newaddlist = bytearray()
768 newaddlist = bytearray()
769
769
770 for start, end, content in x:
770 for start, end, content in x:
771 newaddlist += addlist[currentposition:start]
771 newaddlist += addlist[currentposition:start]
772 if content:
772 if content:
773 newaddlist += bytearray(content)
773 newaddlist += bytearray(content)
774
774
775 currentposition = end
775 currentposition = end
776
776
777 newaddlist += addlist[currentposition:]
777 newaddlist += addlist[currentposition:]
778
778
779 deltatext = b"".join(
779 deltatext = b"".join(
780 struct.pack(b">lll", start, end, len(content)) + content
780 struct.pack(b">lll", start, end, len(content)) + content
781 for start, end, content in x
781 for start, end, content in x
782 )
782 )
783 return deltatext, newaddlist
783 return deltatext, newaddlist
784
784
785
785
786 def _splittopdir(f):
786 def _splittopdir(f):
787 if b'/' in f:
787 if b'/' in f:
788 dir, subpath = f.split(b'/', 1)
788 dir, subpath = f.split(b'/', 1)
789 return dir + b'/', subpath
789 return dir + b'/', subpath
790 else:
790 else:
791 return b'', f
791 return b'', f
792
792
793
793
794 _noop = lambda s: None
794 _noop = lambda s: None
795
795
796
796
797 @interfaceutil.implementer(repository.imanifestdict)
797 @interfaceutil.implementer(repository.imanifestdict)
798 class treemanifest:
798 class treemanifest:
799 def __init__(self, nodeconstants, dir=b'', text=b''):
799 def __init__(self, nodeconstants, dir=b'', text=b''):
800 self._dir = dir
800 self._dir = dir
801 self.nodeconstants = nodeconstants
801 self.nodeconstants = nodeconstants
802 self._node = self.nodeconstants.nullid
802 self._node = self.nodeconstants.nullid
803 self._nodelen = self.nodeconstants.nodelen
803 self._nodelen = self.nodeconstants.nodelen
804 self._loadfunc = _noop
804 self._loadfunc = _noop
805 self._copyfunc = _noop
805 self._copyfunc = _noop
806 self._dirty = False
806 self._dirty = False
807 self._dirs = {}
807 self._dirs = {}
808 self._lazydirs = {}
808 self._lazydirs = {}
809 # Using _lazymanifest here is a little slower than plain old dicts
809 # Using _lazymanifest here is a little slower than plain old dicts
810 self._files = {}
810 self._files = {}
811 self._flags = {}
811 self._flags = {}
812 if text:
812 if text:
813
813
814 def readsubtree(subdir, subm):
814 def readsubtree(subdir, subm):
815 raise AssertionError(
815 raise AssertionError(
816 b'treemanifest constructor only accepts flat manifests'
816 b'treemanifest constructor only accepts flat manifests'
817 )
817 )
818
818
819 self.parse(text, readsubtree)
819 self.parse(text, readsubtree)
820 self._dirty = True # Mark flat manifest dirty after parsing
820 self._dirty = True # Mark flat manifest dirty after parsing
821
821
822 def _subpath(self, path):
822 def _subpath(self, path):
823 return self._dir + path
823 return self._dir + path
824
824
825 def _loadalllazy(self):
825 def _loadalllazy(self):
826 selfdirs = self._dirs
826 selfdirs = self._dirs
827 subpath = self._subpath
827 subpath = self._subpath
828 for d, (node, readsubtree, docopy) in self._lazydirs.items():
828 for d, (node, readsubtree, docopy) in self._lazydirs.items():
829 if docopy:
829 if docopy:
830 selfdirs[d] = readsubtree(subpath(d), node).copy()
830 selfdirs[d] = readsubtree(subpath(d), node).copy()
831 else:
831 else:
832 selfdirs[d] = readsubtree(subpath(d), node)
832 selfdirs[d] = readsubtree(subpath(d), node)
833 self._lazydirs = {}
833 self._lazydirs = {}
834
834
835 def _loadlazy(self, d):
835 def _loadlazy(self, d):
836 v = self._lazydirs.get(d)
836 v = self._lazydirs.get(d)
837 if v:
837 if v:
838 node, readsubtree, docopy = v
838 node, readsubtree, docopy = v
839 if docopy:
839 if docopy:
840 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
840 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
841 else:
841 else:
842 self._dirs[d] = readsubtree(self._subpath(d), node)
842 self._dirs[d] = readsubtree(self._subpath(d), node)
843 del self._lazydirs[d]
843 del self._lazydirs[d]
844
844
845 def _loadchildrensetlazy(self, visit):
845 def _loadchildrensetlazy(self, visit):
846 if not visit:
846 if not visit:
847 return None
847 return None
848 if visit == b'all' or visit == b'this':
848 if visit == b'all' or visit == b'this':
849 self._loadalllazy()
849 self._loadalllazy()
850 return None
850 return None
851
851
852 loadlazy = self._loadlazy
852 loadlazy = self._loadlazy
853 for k in visit:
853 for k in visit:
854 loadlazy(k + b'/')
854 loadlazy(k + b'/')
855 return visit
855 return visit
856
856
857 def _loaddifflazy(self, t1, t2):
857 def _loaddifflazy(self, t1, t2):
858 """load items in t1 and t2 if they're needed for diffing.
858 """load items in t1 and t2 if they're needed for diffing.
859
859
860 The criteria currently is:
860 The criteria currently is:
861 - if it's not present in _lazydirs in either t1 or t2, load it in the
861 - if it's not present in _lazydirs in either t1 or t2, load it in the
862 other (it may already be loaded or it may not exist, doesn't matter)
862 other (it may already be loaded or it may not exist, doesn't matter)
863 - if it's present in _lazydirs in both, compare the nodeid; if it
863 - if it's present in _lazydirs in both, compare the nodeid; if it
864 differs, load it in both
864 differs, load it in both
865 """
865 """
866 toloadlazy = []
866 toloadlazy = []
867 for d, v1 in t1._lazydirs.items():
867 for d, v1 in t1._lazydirs.items():
868 v2 = t2._lazydirs.get(d)
868 v2 = t2._lazydirs.get(d)
869 if not v2 or v2[0] != v1[0]:
869 if not v2 or v2[0] != v1[0]:
870 toloadlazy.append(d)
870 toloadlazy.append(d)
871 for d, v1 in t2._lazydirs.items():
871 for d, v1 in t2._lazydirs.items():
872 if d not in t1._lazydirs:
872 if d not in t1._lazydirs:
873 toloadlazy.append(d)
873 toloadlazy.append(d)
874
874
875 for d in toloadlazy:
875 for d in toloadlazy:
876 t1._loadlazy(d)
876 t1._loadlazy(d)
877 t2._loadlazy(d)
877 t2._loadlazy(d)
878
878
879 def __len__(self):
879 def __len__(self):
880 self._load()
880 self._load()
881 size = len(self._files)
881 size = len(self._files)
882 self._loadalllazy()
882 self._loadalllazy()
883 for m in self._dirs.values():
883 for m in self._dirs.values():
884 size += m.__len__()
884 size += m.__len__()
885 return size
885 return size
886
886
887 def __nonzero__(self):
887 def __nonzero__(self):
888 # Faster than "__len() != 0" since it avoids loading sub-manifests
888 # Faster than "__len() != 0" since it avoids loading sub-manifests
889 return not self._isempty()
889 return not self._isempty()
890
890
891 __bool__ = __nonzero__
891 __bool__ = __nonzero__
892
892
893 def _isempty(self):
893 def _isempty(self):
894 self._load() # for consistency; already loaded by all callers
894 self._load() # for consistency; already loaded by all callers
895 # See if we can skip loading everything.
895 # See if we can skip loading everything.
896 if self._files or (
896 if self._files or (
897 self._dirs and any(not m._isempty() for m in self._dirs.values())
897 self._dirs and any(not m._isempty() for m in self._dirs.values())
898 ):
898 ):
899 return False
899 return False
900 self._loadalllazy()
900 self._loadalllazy()
901 return not self._dirs or all(m._isempty() for m in self._dirs.values())
901 return not self._dirs or all(m._isempty() for m in self._dirs.values())
902
902
903 @encoding.strmethod
903 @encoding.strmethod
904 def __repr__(self):
904 def __repr__(self):
905 return (
905 return (
906 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
906 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
907 % (
907 % (
908 self._dir,
908 self._dir,
909 hex(self._node),
909 hex(self._node),
910 bool(self._loadfunc is _noop),
910 bool(self._loadfunc is _noop),
911 self._dirty,
911 self._dirty,
912 id(self),
912 id(self),
913 )
913 )
914 )
914 )
915
915
916 def dir(self):
916 def dir(self):
917 """The directory that this tree manifest represents, including a
917 """The directory that this tree manifest represents, including a
918 trailing '/'. Empty string for the repo root directory."""
918 trailing '/'. Empty string for the repo root directory."""
919 return self._dir
919 return self._dir
920
920
921 def node(self):
921 def node(self):
922 """This node of this instance. nullid for unsaved instances. Should
922 """This node of this instance. nullid for unsaved instances. Should
923 be updated when the instance is read or written from a revlog.
923 be updated when the instance is read or written from a revlog.
924 """
924 """
925 assert not self._dirty
925 assert not self._dirty
926 return self._node
926 return self._node
927
927
928 def setnode(self, node):
928 def setnode(self, node):
929 self._node = node
929 self._node = node
930 self._dirty = False
930 self._dirty = False
931
931
932 def iterentries(self):
932 def iterentries(self):
933 self._load()
933 self._load()
934 self._loadalllazy()
934 self._loadalllazy()
935 for p, n in sorted(
935 for p, n in sorted(
936 itertools.chain(self._dirs.items(), self._files.items())
936 itertools.chain(self._dirs.items(), self._files.items())
937 ):
937 ):
938 if p in self._files:
938 if p in self._files:
939 yield self._subpath(p), n, self._flags.get(p, b'')
939 yield self._subpath(p), n, self._flags.get(p, b'')
940 else:
940 else:
941 for x in n.iterentries():
941 for x in n.iterentries():
942 yield x
942 yield x
943
943
944 def items(self):
944 def items(self):
945 self._load()
945 self._load()
946 self._loadalllazy()
946 self._loadalllazy()
947 for p, n in sorted(
947 for p, n in sorted(
948 itertools.chain(self._dirs.items(), self._files.items())
948 itertools.chain(self._dirs.items(), self._files.items())
949 ):
949 ):
950 if p in self._files:
950 if p in self._files:
951 yield self._subpath(p), n
951 yield self._subpath(p), n
952 else:
952 else:
953 for f, sn in n.items():
953 for f, sn in n.items():
954 yield f, sn
954 yield f, sn
955
955
956 iteritems = items
956 iteritems = items
957
957
958 def iterkeys(self):
958 def iterkeys(self):
959 self._load()
959 self._load()
960 self._loadalllazy()
960 self._loadalllazy()
961 for p in sorted(itertools.chain(self._dirs, self._files)):
961 for p in sorted(itertools.chain(self._dirs, self._files)):
962 if p in self._files:
962 if p in self._files:
963 yield self._subpath(p)
963 yield self._subpath(p)
964 else:
964 else:
965 for f in self._dirs[p]:
965 for f in self._dirs[p]:
966 yield f
966 yield f
967
967
968 def keys(self):
968 def keys(self):
969 return list(self.iterkeys())
969 return list(self.iterkeys())
970
970
971 def __iter__(self):
971 def __iter__(self):
972 return self.iterkeys()
972 return self.iterkeys()
973
973
974 def __contains__(self, f):
974 def __contains__(self, f):
975 if f is None:
975 if f is None:
976 return False
976 return False
977 self._load()
977 self._load()
978 dir, subpath = _splittopdir(f)
978 dir, subpath = _splittopdir(f)
979 if dir:
979 if dir:
980 self._loadlazy(dir)
980 self._loadlazy(dir)
981
981
982 if dir not in self._dirs:
982 if dir not in self._dirs:
983 return False
983 return False
984
984
985 return self._dirs[dir].__contains__(subpath)
985 return self._dirs[dir].__contains__(subpath)
986 else:
986 else:
987 return f in self._files
987 return f in self._files
988
988
989 def get(self, f, default=None):
989 def get(self, f, default=None):
990 self._load()
990 self._load()
991 dir, subpath = _splittopdir(f)
991 dir, subpath = _splittopdir(f)
992 if dir:
992 if dir:
993 self._loadlazy(dir)
993 self._loadlazy(dir)
994
994
995 if dir not in self._dirs:
995 if dir not in self._dirs:
996 return default
996 return default
997 return self._dirs[dir].get(subpath, default)
997 return self._dirs[dir].get(subpath, default)
998 else:
998 else:
999 return self._files.get(f, default)
999 return self._files.get(f, default)
1000
1000
1001 def __getitem__(self, f):
1001 def __getitem__(self, f):
1002 self._load()
1002 self._load()
1003 dir, subpath = _splittopdir(f)
1003 dir, subpath = _splittopdir(f)
1004 if dir:
1004 if dir:
1005 self._loadlazy(dir)
1005 self._loadlazy(dir)
1006
1006
1007 return self._dirs[dir].__getitem__(subpath)
1007 return self._dirs[dir].__getitem__(subpath)
1008 else:
1008 else:
1009 return self._files[f]
1009 return self._files[f]
1010
1010
1011 def flags(self, f):
1011 def flags(self, f):
1012 self._load()
1012 self._load()
1013 dir, subpath = _splittopdir(f)
1013 dir, subpath = _splittopdir(f)
1014 if dir:
1014 if dir:
1015 self._loadlazy(dir)
1015 self._loadlazy(dir)
1016
1016
1017 if dir not in self._dirs:
1017 if dir not in self._dirs:
1018 return b''
1018 return b''
1019 return self._dirs[dir].flags(subpath)
1019 return self._dirs[dir].flags(subpath)
1020 else:
1020 else:
1021 if f in self._lazydirs or f in self._dirs:
1021 if f in self._lazydirs or f in self._dirs:
1022 return b''
1022 return b''
1023 return self._flags.get(f, b'')
1023 return self._flags.get(f, b'')
1024
1024
1025 def find(self, f):
1025 def find(self, f):
1026 self._load()
1026 self._load()
1027 dir, subpath = _splittopdir(f)
1027 dir, subpath = _splittopdir(f)
1028 if dir:
1028 if dir:
1029 self._loadlazy(dir)
1029 self._loadlazy(dir)
1030
1030
1031 return self._dirs[dir].find(subpath)
1031 return self._dirs[dir].find(subpath)
1032 else:
1032 else:
1033 return self._files[f], self._flags.get(f, b'')
1033 return self._files[f], self._flags.get(f, b'')
1034
1034
1035 def __delitem__(self, f):
1035 def __delitem__(self, f):
1036 self._load()
1036 self._load()
1037 dir, subpath = _splittopdir(f)
1037 dir, subpath = _splittopdir(f)
1038 if dir:
1038 if dir:
1039 self._loadlazy(dir)
1039 self._loadlazy(dir)
1040
1040
1041 self._dirs[dir].__delitem__(subpath)
1041 self._dirs[dir].__delitem__(subpath)
1042 # If the directory is now empty, remove it
1042 # If the directory is now empty, remove it
1043 if self._dirs[dir]._isempty():
1043 if self._dirs[dir]._isempty():
1044 del self._dirs[dir]
1044 del self._dirs[dir]
1045 else:
1045 else:
1046 del self._files[f]
1046 del self._files[f]
1047 if f in self._flags:
1047 if f in self._flags:
1048 del self._flags[f]
1048 del self._flags[f]
1049 self._dirty = True
1049 self._dirty = True
1050
1050
1051 def __setitem__(self, f, n):
1051 def __setitem__(self, f, n):
1052 assert n is not None
1052 assert n is not None
1053 self._load()
1053 self._load()
1054 dir, subpath = _splittopdir(f)
1054 dir, subpath = _splittopdir(f)
1055 if dir:
1055 if dir:
1056 self._loadlazy(dir)
1056 self._loadlazy(dir)
1057 if dir not in self._dirs:
1057 if dir not in self._dirs:
1058 self._dirs[dir] = treemanifest(
1058 self._dirs[dir] = treemanifest(
1059 self.nodeconstants, self._subpath(dir)
1059 self.nodeconstants, self._subpath(dir)
1060 )
1060 )
1061 self._dirs[dir].__setitem__(subpath, n)
1061 self._dirs[dir].__setitem__(subpath, n)
1062 else:
1062 else:
1063 # manifest nodes are either 20 bytes or 32 bytes,
1063 # manifest nodes are either 20 bytes or 32 bytes,
1064 # depending on the hash in use. Assert this as historically
1064 # depending on the hash in use. Assert this as historically
1065 # sometimes extra bytes were added.
1065 # sometimes extra bytes were added.
1066 assert len(n) in (20, 32)
1066 assert len(n) in (20, 32)
1067 self._files[f] = n
1067 self._files[f] = n
1068 self._dirty = True
1068 self._dirty = True
1069
1069
1070 def _load(self):
1070 def _load(self):
1071 if self._loadfunc is not _noop:
1071 if self._loadfunc is not _noop:
1072 lf, self._loadfunc = self._loadfunc, _noop
1072 lf, self._loadfunc = self._loadfunc, _noop
1073 lf(self)
1073 lf(self)
1074 elif self._copyfunc is not _noop:
1074 elif self._copyfunc is not _noop:
1075 cf, self._copyfunc = self._copyfunc, _noop
1075 cf, self._copyfunc = self._copyfunc, _noop
1076 cf(self)
1076 cf(self)
1077
1077
1078 def setflag(self, f, flags):
1078 def setflag(self, f, flags):
1079 """Set the flags (symlink, executable) for path f."""
1079 """Set the flags (symlink, executable) for path f."""
1080 if flags not in _manifestflags:
1080 if flags not in _manifestflags:
1081 raise TypeError(b"Invalid manifest flag set.")
1081 raise TypeError(b"Invalid manifest flag set.")
1082 self._load()
1082 self._load()
1083 dir, subpath = _splittopdir(f)
1083 dir, subpath = _splittopdir(f)
1084 if dir:
1084 if dir:
1085 self._loadlazy(dir)
1085 self._loadlazy(dir)
1086 if dir not in self._dirs:
1086 if dir not in self._dirs:
1087 self._dirs[dir] = treemanifest(
1087 self._dirs[dir] = treemanifest(
1088 self.nodeconstants, self._subpath(dir)
1088 self.nodeconstants, self._subpath(dir)
1089 )
1089 )
1090 self._dirs[dir].setflag(subpath, flags)
1090 self._dirs[dir].setflag(subpath, flags)
1091 else:
1091 else:
1092 self._flags[f] = flags
1092 self._flags[f] = flags
1093 self._dirty = True
1093 self._dirty = True
1094
1094
1095 def copy(self):
1095 def copy(self):
1096 copy = treemanifest(self.nodeconstants, self._dir)
1096 copy = treemanifest(self.nodeconstants, self._dir)
1097 copy._node = self._node
1097 copy._node = self._node
1098 copy._dirty = self._dirty
1098 copy._dirty = self._dirty
1099 if self._copyfunc is _noop:
1099 if self._copyfunc is _noop:
1100
1100
1101 def _copyfunc(s):
1101 def _copyfunc(s):
1102 self._load()
1102 self._load()
1103 s._lazydirs = {
1103 s._lazydirs = {
1104 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1104 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1105 }
1105 }
1106 sdirs = s._dirs
1106 sdirs = s._dirs
1107 for d, v in self._dirs.items():
1107 for d, v in self._dirs.items():
1108 sdirs[d] = v.copy()
1108 sdirs[d] = v.copy()
1109 s._files = dict.copy(self._files)
1109 s._files = dict.copy(self._files)
1110 s._flags = dict.copy(self._flags)
1110 s._flags = dict.copy(self._flags)
1111
1111
1112 if self._loadfunc is _noop:
1112 if self._loadfunc is _noop:
1113 _copyfunc(copy)
1113 _copyfunc(copy)
1114 else:
1114 else:
1115 copy._copyfunc = _copyfunc
1115 copy._copyfunc = _copyfunc
1116 else:
1116 else:
1117 copy._copyfunc = self._copyfunc
1117 copy._copyfunc = self._copyfunc
1118 return copy
1118 return copy
1119
1119
1120 def filesnotin(self, m2, match=None):
1120 def filesnotin(self, m2, match=None):
1121 '''Set of files in this manifest that are not in the other'''
1121 '''Set of files in this manifest that are not in the other'''
1122 if match and not match.always():
1122 if match and not match.always():
1123 m1 = self._matches(match)
1123 m1 = self._matches(match)
1124 m2 = m2._matches(match)
1124 m2 = m2._matches(match)
1125 return m1.filesnotin(m2)
1125 return m1.filesnotin(m2)
1126
1126
1127 files = set()
1127 files = set()
1128
1128
1129 def _filesnotin(t1, t2):
1129 def _filesnotin(t1, t2):
1130 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1130 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1131 return
1131 return
1132 t1._load()
1132 t1._load()
1133 t2._load()
1133 t2._load()
1134 self._loaddifflazy(t1, t2)
1134 self._loaddifflazy(t1, t2)
1135 for d, m1 in t1._dirs.items():
1135 for d, m1 in t1._dirs.items():
1136 if d in t2._dirs:
1136 if d in t2._dirs:
1137 m2 = t2._dirs[d]
1137 m2 = t2._dirs[d]
1138 _filesnotin(m1, m2)
1138 _filesnotin(m1, m2)
1139 else:
1139 else:
1140 files.update(m1.iterkeys())
1140 files.update(m1.iterkeys())
1141
1141
1142 for fn in t1._files:
1142 for fn in t1._files:
1143 if fn not in t2._files:
1143 if fn not in t2._files:
1144 files.add(t1._subpath(fn))
1144 files.add(t1._subpath(fn))
1145
1145
1146 _filesnotin(self, m2)
1146 _filesnotin(self, m2)
1147 return files
1147 return files
1148
1148
1149 @propertycache
1149 @propertycache
1150 def _alldirs(self):
1150 def _alldirs(self):
1151 return pathutil.dirs(self)
1151 return pathutil.dirs(self)
1152
1152
1153 def dirs(self):
1153 def dirs(self):
1154 return self._alldirs
1154 return self._alldirs
1155
1155
1156 def hasdir(self, dir):
1156 def hasdir(self, dir):
1157 self._load()
1157 self._load()
1158 topdir, subdir = _splittopdir(dir)
1158 topdir, subdir = _splittopdir(dir)
1159 if topdir:
1159 if topdir:
1160 self._loadlazy(topdir)
1160 self._loadlazy(topdir)
1161 if topdir in self._dirs:
1161 if topdir in self._dirs:
1162 return self._dirs[topdir].hasdir(subdir)
1162 return self._dirs[topdir].hasdir(subdir)
1163 return False
1163 return False
1164 dirslash = dir + b'/'
1164 dirslash = dir + b'/'
1165 return dirslash in self._dirs or dirslash in self._lazydirs
1165 return dirslash in self._dirs or dirslash in self._lazydirs
1166
1166
1167 def walk(self, match):
1167 def walk(self, match):
1168 """Generates matching file names.
1168 """Generates matching file names.
1169
1169
1170 It also reports nonexistent files by marking them bad with match.bad().
1170 It also reports nonexistent files by marking them bad with match.bad().
1171 """
1171 """
1172 if match.always():
1172 if match.always():
1173 for f in iter(self):
1173 for f in iter(self):
1174 yield f
1174 yield f
1175 return
1175 return
1176
1176
1177 fset = set(match.files())
1177 fset = set(match.files())
1178
1178
1179 for fn in self._walk(match):
1179 for fn in self._walk(match):
1180 if fn in fset:
1180 if fn in fset:
1181 # specified pattern is the exact name
1181 # specified pattern is the exact name
1182 fset.remove(fn)
1182 fset.remove(fn)
1183 yield fn
1183 yield fn
1184
1184
1185 # for dirstate.walk, files=[''] means "walk the whole tree".
1185 # for dirstate.walk, files=[''] means "walk the whole tree".
1186 # follow that here, too
1186 # follow that here, too
1187 fset.discard(b'')
1187 fset.discard(b'')
1188
1188
1189 for fn in sorted(fset):
1189 for fn in sorted(fset):
1190 if not self.hasdir(fn):
1190 if not self.hasdir(fn):
1191 match.bad(fn, None)
1191 match.bad(fn, None)
1192
1192
1193 def _walk(self, match):
1193 def _walk(self, match):
1194 '''Recursively generates matching file names for walk().'''
1194 '''Recursively generates matching file names for walk().'''
1195 visit = match.visitchildrenset(self._dir[:-1])
1195 visit = match.visitchildrenset(self._dir[:-1])
1196 if not visit:
1196 if not visit:
1197 return
1197 return
1198
1198
1199 # yield this dir's files and walk its submanifests
1199 # yield this dir's files and walk its submanifests
1200 self._load()
1200 self._load()
1201 visit = self._loadchildrensetlazy(visit)
1201 visit = self._loadchildrensetlazy(visit)
1202 for p in sorted(list(self._dirs) + list(self._files)):
1202 for p in sorted(list(self._dirs) + list(self._files)):
1203 if p in self._files:
1203 if p in self._files:
1204 fullp = self._subpath(p)
1204 fullp = self._subpath(p)
1205 if match(fullp):
1205 if match(fullp):
1206 yield fullp
1206 yield fullp
1207 else:
1207 else:
1208 if not visit or p[:-1] in visit:
1208 if not visit or p[:-1] in visit:
1209 for f in self._dirs[p]._walk(match):
1209 for f in self._dirs[p]._walk(match):
1210 yield f
1210 yield f
1211
1211
1212 def _matches(self, match):
1212 def _matches(self, match):
1213 """recursively generate a new manifest filtered by the match argument."""
1213 """recursively generate a new manifest filtered by the match argument."""
1214 if match.always():
1214 if match.always():
1215 return self.copy()
1215 return self.copy()
1216 return self._matches_inner(match)
1216 return self._matches_inner(match)
1217
1217
1218 def _matches_inner(self, match):
1218 def _matches_inner(self, match):
1219 if match.always():
1219 if match.always():
1220 return self.copy()
1220 return self.copy()
1221
1221
1222 visit = match.visitchildrenset(self._dir[:-1])
1222 visit = match.visitchildrenset(self._dir[:-1])
1223 if visit == b'all':
1223 if visit == b'all':
1224 return self.copy()
1224 return self.copy()
1225 ret = treemanifest(self.nodeconstants, self._dir)
1225 ret = treemanifest(self.nodeconstants, self._dir)
1226 if not visit:
1226 if not visit:
1227 return ret
1227 return ret
1228
1228
1229 self._load()
1229 self._load()
1230 for fn in self._files:
1230 for fn in self._files:
1231 # While visitchildrenset *usually* lists only subdirs, this is
1231 # While visitchildrenset *usually* lists only subdirs, this is
1232 # actually up to the matcher and may have some files in the set().
1232 # actually up to the matcher and may have some files in the set().
1233 # If visit == 'this', we should obviously look at the files in this
1233 # If visit == 'this', we should obviously look at the files in this
1234 # directory; if visit is a set, and fn is in it, we should inspect
1234 # directory; if visit is a set, and fn is in it, we should inspect
1235 # fn (but no need to inspect things not in the set).
1235 # fn (but no need to inspect things not in the set).
1236 if visit != b'this' and fn not in visit:
1236 if visit != b'this' and fn not in visit:
1237 continue
1237 continue
1238 fullp = self._subpath(fn)
1238 fullp = self._subpath(fn)
1239 # visitchildrenset isn't perfect, we still need to call the regular
1239 # visitchildrenset isn't perfect, we still need to call the regular
1240 # matcher code to further filter results.
1240 # matcher code to further filter results.
1241 if not match(fullp):
1241 if not match(fullp):
1242 continue
1242 continue
1243 ret._files[fn] = self._files[fn]
1243 ret._files[fn] = self._files[fn]
1244 if fn in self._flags:
1244 if fn in self._flags:
1245 ret._flags[fn] = self._flags[fn]
1245 ret._flags[fn] = self._flags[fn]
1246
1246
1247 visit = self._loadchildrensetlazy(visit)
1247 visit = self._loadchildrensetlazy(visit)
1248 for dir, subm in self._dirs.items():
1248 for dir, subm in self._dirs.items():
1249 if visit and dir[:-1] not in visit:
1249 if visit and dir[:-1] not in visit:
1250 continue
1250 continue
1251 m = subm._matches_inner(match)
1251 m = subm._matches_inner(match)
1252 if not m._isempty():
1252 if not m._isempty():
1253 ret._dirs[dir] = m
1253 ret._dirs[dir] = m
1254
1254
1255 if not ret._isempty():
1255 if not ret._isempty():
1256 ret._dirty = True
1256 ret._dirty = True
1257 return ret
1257 return ret
1258
1258
1259 def fastdelta(self, base, changes):
1259 def fastdelta(self, base, changes):
1260 raise FastdeltaUnavailable()
1260 raise FastdeltaUnavailable()
1261
1261
1262 def diff(self, m2, match=None, clean=False):
1262 def diff(self, m2, match=None, clean=False):
1263 """Finds changes between the current manifest and m2.
1263 """Finds changes between the current manifest and m2.
1264
1264
1265 Args:
1265 Args:
1266 m2: the manifest to which this manifest should be compared.
1266 m2: the manifest to which this manifest should be compared.
1267 clean: if true, include files unchanged between these manifests
1267 clean: if true, include files unchanged between these manifests
1268 with a None value in the returned dictionary.
1268 with a None value in the returned dictionary.
1269
1269
1270 The result is returned as a dict with filename as key and
1270 The result is returned as a dict with filename as key and
1271 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1271 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1272 nodeid in the current/other manifest and fl1/fl2 is the flag
1272 nodeid in the current/other manifest and fl1/fl2 is the flag
1273 in the current/other manifest. Where the file does not exist,
1273 in the current/other manifest. Where the file does not exist,
1274 the nodeid will be None and the flags will be the empty
1274 the nodeid will be None and the flags will be the empty
1275 string.
1275 string.
1276 """
1276 """
1277 if match and not match.always():
1277 if match and not match.always():
1278 m1 = self._matches(match)
1278 m1 = self._matches(match)
1279 m2 = m2._matches(match)
1279 m2 = m2._matches(match)
1280 return m1.diff(m2, clean=clean)
1280 return m1.diff(m2, clean=clean)
1281 result = {}
1281 result = {}
1282 emptytree = treemanifest(self.nodeconstants)
1282 emptytree = treemanifest(self.nodeconstants)
1283
1283
1284 def _iterativediff(t1, t2, stack):
1284 def _iterativediff(t1, t2, stack):
1285 """compares two tree manifests and append new tree-manifests which
1285 """compares two tree manifests and append new tree-manifests which
1286 needs to be compared to stack"""
1286 needs to be compared to stack"""
1287 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1287 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1288 return
1288 return
1289 t1._load()
1289 t1._load()
1290 t2._load()
1290 t2._load()
1291 self._loaddifflazy(t1, t2)
1291 self._loaddifflazy(t1, t2)
1292
1292
1293 for d, m1 in t1._dirs.items():
1293 for d, m1 in t1._dirs.items():
1294 m2 = t2._dirs.get(d, emptytree)
1294 m2 = t2._dirs.get(d, emptytree)
1295 stack.append((m1, m2))
1295 stack.append((m1, m2))
1296
1296
1297 for d, m2 in t2._dirs.items():
1297 for d, m2 in t2._dirs.items():
1298 if d not in t1._dirs:
1298 if d not in t1._dirs:
1299 stack.append((emptytree, m2))
1299 stack.append((emptytree, m2))
1300
1300
1301 for fn, n1 in t1._files.items():
1301 for fn, n1 in t1._files.items():
1302 fl1 = t1._flags.get(fn, b'')
1302 fl1 = t1._flags.get(fn, b'')
1303 n2 = t2._files.get(fn, None)
1303 n2 = t2._files.get(fn, None)
1304 fl2 = t2._flags.get(fn, b'')
1304 fl2 = t2._flags.get(fn, b'')
1305 if n1 != n2 or fl1 != fl2:
1305 if n1 != n2 or fl1 != fl2:
1306 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1306 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1307 elif clean:
1307 elif clean:
1308 result[t1._subpath(fn)] = None
1308 result[t1._subpath(fn)] = None
1309
1309
1310 for fn, n2 in t2._files.items():
1310 for fn, n2 in t2._files.items():
1311 if fn not in t1._files:
1311 if fn not in t1._files:
1312 fl2 = t2._flags.get(fn, b'')
1312 fl2 = t2._flags.get(fn, b'')
1313 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1313 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1314
1314
1315 stackls = []
1315 stackls = []
1316 _iterativediff(self, m2, stackls)
1316 _iterativediff(self, m2, stackls)
1317 while stackls:
1317 while stackls:
1318 t1, t2 = stackls.pop()
1318 t1, t2 = stackls.pop()
1319 # stackls is populated in the function call
1319 # stackls is populated in the function call
1320 _iterativediff(t1, t2, stackls)
1320 _iterativediff(t1, t2, stackls)
1321 return result
1321 return result
1322
1322
1323 def unmodifiedsince(self, m2):
1323 def unmodifiedsince(self, m2):
1324 return not self._dirty and not m2._dirty and self._node == m2._node
1324 return not self._dirty and not m2._dirty and self._node == m2._node
1325
1325
1326 def parse(self, text, readsubtree):
1326 def parse(self, text, readsubtree):
1327 selflazy = self._lazydirs
1327 selflazy = self._lazydirs
1328 for f, n, fl in _parse(self._nodelen, text):
1328 for f, n, fl in _parse(self._nodelen, text):
1329 if fl == b't':
1329 if fl == b't':
1330 f = f + b'/'
1330 f = f + b'/'
1331 # False below means "doesn't need to be copied" and can use the
1331 # False below means "doesn't need to be copied" and can use the
1332 # cached value from readsubtree directly.
1332 # cached value from readsubtree directly.
1333 selflazy[f] = (n, readsubtree, False)
1333 selflazy[f] = (n, readsubtree, False)
1334 elif b'/' in f:
1334 elif b'/' in f:
1335 # This is a flat manifest, so use __setitem__ and setflag rather
1335 # This is a flat manifest, so use __setitem__ and setflag rather
1336 # than assigning directly to _files and _flags, so we can
1336 # than assigning directly to _files and _flags, so we can
1337 # assign a path in a subdirectory, and to mark dirty (compared
1337 # assign a path in a subdirectory, and to mark dirty (compared
1338 # to nullid).
1338 # to nullid).
1339 self[f] = n
1339 self[f] = n
1340 if fl:
1340 if fl:
1341 self.setflag(f, fl)
1341 self.setflag(f, fl)
1342 else:
1342 else:
1343 # Assigning to _files and _flags avoids marking as dirty,
1343 # Assigning to _files and _flags avoids marking as dirty,
1344 # and should be a little faster.
1344 # and should be a little faster.
1345 self._files[f] = n
1345 self._files[f] = n
1346 if fl:
1346 if fl:
1347 self._flags[f] = fl
1347 self._flags[f] = fl
1348
1348
1349 def text(self):
1349 def text(self):
1350 """Get the full data of this manifest as a bytestring."""
1350 """Get the full data of this manifest as a bytestring."""
1351 self._load()
1351 self._load()
1352 return _text(self.iterentries())
1352 return _text(self.iterentries())
1353
1353
1354 def dirtext(self):
1354 def dirtext(self):
1355 """Get the full data of this directory as a bytestring. Make sure that
1355 """Get the full data of this directory as a bytestring. Make sure that
1356 any submanifests have been written first, so their nodeids are correct.
1356 any submanifests have been written first, so their nodeids are correct.
1357 """
1357 """
1358 self._load()
1358 self._load()
1359 flags = self.flags
1359 flags = self.flags
1360 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1360 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1361 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1361 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1362 files = [(f, self._files[f], flags(f)) for f in self._files]
1362 files = [(f, self._files[f], flags(f)) for f in self._files]
1363 return _text(sorted(dirs + files + lazydirs))
1363 return _text(sorted(dirs + files + lazydirs))
1364
1364
1365 def read(self, gettext, readsubtree):
1365 def read(self, gettext, readsubtree):
1366 def _load_for_read(s):
1366 def _load_for_read(s):
1367 s.parse(gettext(), readsubtree)
1367 s.parse(gettext(), readsubtree)
1368 s._dirty = False
1368 s._dirty = False
1369
1369
1370 self._loadfunc = _load_for_read
1370 self._loadfunc = _load_for_read
1371
1371
1372 def writesubtrees(self, m1, m2, writesubtree, match):
1372 def writesubtrees(self, m1, m2, writesubtree, match):
1373 self._load() # for consistency; should never have any effect here
1373 self._load() # for consistency; should never have any effect here
1374 m1._load()
1374 m1._load()
1375 m2._load()
1375 m2._load()
1376 emptytree = treemanifest(self.nodeconstants)
1376 emptytree = treemanifest(self.nodeconstants)
1377
1377
1378 def getnode(m, d):
1378 def getnode(m, d):
1379 ld = m._lazydirs.get(d)
1379 ld = m._lazydirs.get(d)
1380 if ld:
1380 if ld:
1381 return ld[0]
1381 return ld[0]
1382 return m._dirs.get(d, emptytree)._node
1382 return m._dirs.get(d, emptytree)._node
1383
1383
1384 # let's skip investigating things that `match` says we do not need.
1384 # let's skip investigating things that `match` says we do not need.
1385 visit = match.visitchildrenset(self._dir[:-1])
1385 visit = match.visitchildrenset(self._dir[:-1])
1386 visit = self._loadchildrensetlazy(visit)
1386 visit = self._loadchildrensetlazy(visit)
1387 if visit == b'this' or visit == b'all':
1387 if visit == b'this' or visit == b'all':
1388 visit = None
1388 visit = None
1389 for d, subm in self._dirs.items():
1389 for d, subm in self._dirs.items():
1390 if visit and d[:-1] not in visit:
1390 if visit and d[:-1] not in visit:
1391 continue
1391 continue
1392 subp1 = getnode(m1, d)
1392 subp1 = getnode(m1, d)
1393 subp2 = getnode(m2, d)
1393 subp2 = getnode(m2, d)
1394 if subp1 == self.nodeconstants.nullid:
1394 if subp1 == self.nodeconstants.nullid:
1395 subp1, subp2 = subp2, subp1
1395 subp1, subp2 = subp2, subp1
1396 writesubtree(subm, subp1, subp2, match)
1396 writesubtree(subm, subp1, subp2, match)
1397
1397
1398 def walksubtrees(self, matcher=None):
1398 def walksubtrees(self, matcher=None):
1399 """Returns an iterator of the subtrees of this manifest, including this
1399 """Returns an iterator of the subtrees of this manifest, including this
1400 manifest itself.
1400 manifest itself.
1401
1401
1402 If `matcher` is provided, it only returns subtrees that match.
1402 If `matcher` is provided, it only returns subtrees that match.
1403 """
1403 """
1404 if matcher and not matcher.visitdir(self._dir[:-1]):
1404 if matcher and not matcher.visitdir(self._dir[:-1]):
1405 return
1405 return
1406 if not matcher or matcher(self._dir[:-1]):
1406 if not matcher or matcher(self._dir[:-1]):
1407 yield self
1407 yield self
1408
1408
1409 self._load()
1409 self._load()
1410 # OPT: use visitchildrenset to avoid loading everything.
1410 # OPT: use visitchildrenset to avoid loading everything.
1411 self._loadalllazy()
1411 self._loadalllazy()
1412 for d, subm in self._dirs.items():
1412 for d, subm in self._dirs.items():
1413 for subtree in subm.walksubtrees(matcher=matcher):
1413 for subtree in subm.walksubtrees(matcher=matcher):
1414 yield subtree
1414 yield subtree
1415
1415
1416
1416
1417 class manifestfulltextcache(util.lrucachedict):
1417 class manifestfulltextcache(util.lrucachedict):
1418 """File-backed LRU cache for the manifest cache
1418 """File-backed LRU cache for the manifest cache
1419
1419
1420 File consists of entries, up to EOF:
1420 File consists of entries, up to EOF:
1421
1421
1422 - 20 bytes node, 4 bytes length, <length> manifest data
1422 - 20 bytes node, 4 bytes length, <length> manifest data
1423
1423
1424 These are written in reverse cache order (oldest to newest).
1424 These are written in reverse cache order (oldest to newest).
1425
1425
1426 """
1426 """
1427
1427
1428 _file = b'manifestfulltextcache'
1428 _file = b'manifestfulltextcache'
1429
1429
1430 def __init__(self, max):
1430 def __init__(self, max):
1431 super(manifestfulltextcache, self).__init__(max)
1431 super(manifestfulltextcache, self).__init__(max)
1432 self._dirty = False
1432 self._dirty = False
1433 self._read = False
1433 self._read = False
1434 self._opener = None
1434 self._opener = None
1435
1435
1436 def read(self):
1436 def read(self):
1437 if self._read or self._opener is None:
1437 if self._read or self._opener is None:
1438 return
1438 return
1439
1439
1440 try:
1440 try:
1441 with self._opener(self._file) as fp:
1441 with self._opener(self._file) as fp:
1442 set = super(manifestfulltextcache, self).__setitem__
1442 set = super(manifestfulltextcache, self).__setitem__
1443 # ignore trailing data, this is a cache, corruption is skipped
1443 # ignore trailing data, this is a cache, corruption is skipped
1444 while True:
1444 while True:
1445 # TODO do we need to do work here for sha1 portability?
1445 # TODO do we need to do work here for sha1 portability?
1446 node = fp.read(20)
1446 node = fp.read(20)
1447 if len(node) < 20:
1447 if len(node) < 20:
1448 break
1448 break
1449 try:
1449 try:
1450 size = struct.unpack(b'>L', fp.read(4))[0]
1450 size = struct.unpack(b'>L', fp.read(4))[0]
1451 except struct.error:
1451 except struct.error:
1452 break
1452 break
1453 value = bytearray(fp.read(size))
1453 value = bytearray(fp.read(size))
1454 if len(value) != size:
1454 if len(value) != size:
1455 break
1455 break
1456 set(node, value)
1456 set(node, value)
1457 except IOError:
1457 except IOError:
1458 # the file is allowed to be missing
1458 # the file is allowed to be missing
1459 pass
1459 pass
1460
1460
1461 self._read = True
1461 self._read = True
1462 self._dirty = False
1462 self._dirty = False
1463
1463
1464 def write(self):
1464 def write(self):
1465 if not self._dirty or self._opener is None:
1465 if not self._dirty or self._opener is None:
1466 return
1466 return
1467 # rotate backwards to the first used node
1467 # rotate backwards to the first used node
1468 try:
1468 try:
1469 with self._opener(
1469 with self._opener(
1470 self._file, b'w', atomictemp=True, checkambig=True
1470 self._file, b'w', atomictemp=True, checkambig=True
1471 ) as fp:
1471 ) as fp:
1472 node = self._head.prev
1472 node = self._head.prev
1473 while True:
1473 while True:
1474 if node.key in self._cache:
1474 if node.key in self._cache:
1475 fp.write(node.key)
1475 fp.write(node.key)
1476 fp.write(struct.pack(b'>L', len(node.value)))
1476 fp.write(struct.pack(b'>L', len(node.value)))
1477 fp.write(node.value)
1477 fp.write(node.value)
1478 if node is self._head:
1478 if node is self._head:
1479 break
1479 break
1480 node = node.prev
1480 node = node.prev
1481 except IOError:
1481 except IOError:
1482 # We could not write the cache (eg: permission error)
1482 # We could not write the cache (eg: permission error)
1483 # the content can be missing.
1483 # the content can be missing.
1484 #
1484 #
1485 # We could try harder and see if we could recreate a wcache
1485 # We could try harder and see if we could recreate a wcache
1486 # directory were we coudl write too.
1486 # directory were we coudl write too.
1487 #
1487 #
1488 # XXX the error pass silently, having some way to issue an error
1488 # XXX the error pass silently, having some way to issue an error
1489 # log `ui.log` would be nice.
1489 # log `ui.log` would be nice.
1490 pass
1490 pass
1491
1491
1492 def __len__(self):
1492 def __len__(self):
1493 if not self._read:
1493 if not self._read:
1494 self.read()
1494 self.read()
1495 return super(manifestfulltextcache, self).__len__()
1495 return super(manifestfulltextcache, self).__len__()
1496
1496
1497 def __contains__(self, k):
1497 def __contains__(self, k):
1498 if not self._read:
1498 if not self._read:
1499 self.read()
1499 self.read()
1500 return super(manifestfulltextcache, self).__contains__(k)
1500 return super(manifestfulltextcache, self).__contains__(k)
1501
1501
1502 def __iter__(self):
1502 def __iter__(self):
1503 if not self._read:
1503 if not self._read:
1504 self.read()
1504 self.read()
1505 return super(manifestfulltextcache, self).__iter__()
1505 return super(manifestfulltextcache, self).__iter__()
1506
1506
1507 def __getitem__(self, k):
1507 def __getitem__(self, k):
1508 if not self._read:
1508 if not self._read:
1509 self.read()
1509 self.read()
1510 # the cache lru order can change on read
1510 # the cache lru order can change on read
1511 setdirty = self._cache.get(k) is not self._head
1511 setdirty = self._cache.get(k) is not self._head
1512 value = super(manifestfulltextcache, self).__getitem__(k)
1512 value = super(manifestfulltextcache, self).__getitem__(k)
1513 if setdirty:
1513 if setdirty:
1514 self._dirty = True
1514 self._dirty = True
1515 return value
1515 return value
1516
1516
1517 def __setitem__(self, k, v):
1517 def __setitem__(self, k, v):
1518 if not self._read:
1518 if not self._read:
1519 self.read()
1519 self.read()
1520 super(manifestfulltextcache, self).__setitem__(k, v)
1520 super(manifestfulltextcache, self).__setitem__(k, v)
1521 self._dirty = True
1521 self._dirty = True
1522
1522
1523 def __delitem__(self, k):
1523 def __delitem__(self, k):
1524 if not self._read:
1524 if not self._read:
1525 self.read()
1525 self.read()
1526 super(manifestfulltextcache, self).__delitem__(k)
1526 super(manifestfulltextcache, self).__delitem__(k)
1527 self._dirty = True
1527 self._dirty = True
1528
1528
1529 def get(self, k, default=None):
1529 def get(self, k, default=None):
1530 if not self._read:
1530 if not self._read:
1531 self.read()
1531 self.read()
1532 return super(manifestfulltextcache, self).get(k, default=default)
1532 return super(manifestfulltextcache, self).get(k, default=default)
1533
1533
1534 def clear(self, clear_persisted_data=False):
1534 def clear(self, clear_persisted_data=False):
1535 super(manifestfulltextcache, self).clear()
1535 super(manifestfulltextcache, self).clear()
1536 if clear_persisted_data:
1536 if clear_persisted_data:
1537 self._dirty = True
1537 self._dirty = True
1538 self.write()
1538 self.write()
1539 self._read = False
1539 self._read = False
1540
1540
1541
1541
1542 # and upper bound of what we expect from compression
1542 # and upper bound of what we expect from compression
1543 # (real live value seems to be "3")
1543 # (real live value seems to be "3")
1544 MAXCOMPRESSION = 3
1544 MAXCOMPRESSION = 3
1545
1545
1546
1546
1547 class FastdeltaUnavailable(Exception):
1547 class FastdeltaUnavailable(Exception):
1548 """Exception raised when fastdelta isn't usable on a manifest."""
1548 """Exception raised when fastdelta isn't usable on a manifest."""
1549
1549
1550
1550
1551 @interfaceutil.implementer(repository.imanifeststorage)
1551 @interfaceutil.implementer(repository.imanifeststorage)
1552 class manifestrevlog:
1552 class manifestrevlog:
1553 """A revlog that stores manifest texts. This is responsible for caching the
1553 """A revlog that stores manifest texts. This is responsible for caching the
1554 full-text manifest contents.
1554 full-text manifest contents.
1555 """
1555 """
1556
1556
1557 def __init__(
1557 def __init__(
1558 self,
1558 self,
1559 nodeconstants,
1559 nodeconstants,
1560 opener,
1560 opener,
1561 tree=b'',
1561 tree=b'',
1562 dirlogcache=None,
1562 dirlogcache=None,
1563 treemanifest=False,
1563 treemanifest=False,
1564 ):
1564 ):
1565 """Constructs a new manifest revlog
1565 """Constructs a new manifest revlog
1566
1566
1567 `indexfile` - used by extensions to have two manifests at once, like
1567 `indexfile` - used by extensions to have two manifests at once, like
1568 when transitioning between flatmanifeset and treemanifests.
1568 when transitioning between flatmanifeset and treemanifests.
1569
1569
1570 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1570 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1571 options can also be used to make this a tree manifest revlog. The opener
1571 options can also be used to make this a tree manifest revlog. The opener
1572 option takes precedence, so if it is set to True, we ignore whatever
1572 option takes precedence, so if it is set to True, we ignore whatever
1573 value is passed in to the constructor.
1573 value is passed in to the constructor.
1574 """
1574 """
1575 self.nodeconstants = nodeconstants
1575 self.nodeconstants = nodeconstants
1576 # During normal operations, we expect to deal with not more than four
1576 # During normal operations, we expect to deal with not more than four
1577 # revs at a time (such as during commit --amend). When rebasing large
1577 # revs at a time (such as during commit --amend). When rebasing large
1578 # stacks of commits, the number can go up, hence the config knob below.
1578 # stacks of commits, the number can go up, hence the config knob below.
1579 cachesize = 4
1579 cachesize = 4
1580 optiontreemanifest = False
1580 optiontreemanifest = False
1581 opts = getattr(opener, 'options', None)
1581 opts = getattr(opener, 'options', None)
1582 if opts is not None:
1582 if opts is not None:
1583 cachesize = opts.get(b'manifestcachesize', cachesize)
1583 cachesize = opts.get(b'manifestcachesize', cachesize)
1584 optiontreemanifest = opts.get(b'treemanifest', False)
1584 optiontreemanifest = opts.get(b'treemanifest', False)
1585
1585
1586 self._treeondisk = optiontreemanifest or treemanifest
1586 self._treeondisk = optiontreemanifest or treemanifest
1587
1587
1588 self._fulltextcache = manifestfulltextcache(cachesize)
1588 self._fulltextcache = manifestfulltextcache(cachesize)
1589
1589
1590 if tree:
1590 if tree:
1591 assert self._treeondisk, (tree, b'opts is %r' % opts)
1591 assert self._treeondisk, (tree, b'opts is %r' % opts)
1592
1592
1593 radix = b'00manifest'
1593 radix = b'00manifest'
1594 if tree:
1594 if tree:
1595 radix = b"meta/" + tree + radix
1595 radix = b"meta/" + tree + radix
1596
1596
1597 self.tree = tree
1597 self.tree = tree
1598
1598
1599 # The dirlogcache is kept on the root manifest log
1599 # The dirlogcache is kept on the root manifest log
1600 if tree:
1600 if tree:
1601 self._dirlogcache = dirlogcache
1601 self._dirlogcache = dirlogcache
1602 else:
1602 else:
1603 self._dirlogcache = {b'': self}
1603 self._dirlogcache = {b'': self}
1604
1604
1605 self._revlog = revlog.revlog(
1605 self._revlog = revlog.revlog(
1606 opener,
1606 opener,
1607 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1607 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1608 radix=radix,
1608 radix=radix,
1609 # only root indexfile is cached
1609 # only root indexfile is cached
1610 checkambig=not bool(tree),
1610 checkambig=not bool(tree),
1611 mmaplargeindex=True,
1611 mmaplargeindex=True,
1612 upperboundcomp=MAXCOMPRESSION,
1612 upperboundcomp=MAXCOMPRESSION,
1613 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1613 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1614 )
1614 )
1615
1615
1616 self.index = self._revlog.index
1616 self.index = self._revlog.index
1617 self._generaldelta = self._revlog._generaldelta
1617 self._generaldelta = self._revlog._generaldelta
1618
1618
1619 def get_revlog(self):
1619 def get_revlog(self):
1620 """return an actual revlog instance if any
1620 """return an actual revlog instance if any
1621
1621
1622 This exist because a lot of code leverage the fact the underlying
1622 This exist because a lot of code leverage the fact the underlying
1623 storage is a revlog for optimization, so giving simple way to access
1623 storage is a revlog for optimization, so giving simple way to access
1624 the revlog instance helps such code.
1624 the revlog instance helps such code.
1625 """
1625 """
1626 return self._revlog
1626 return self._revlog
1627
1627
1628 def _setupmanifestcachehooks(self, repo):
1628 def _setupmanifestcachehooks(self, repo):
1629 """Persist the manifestfulltextcache on lock release"""
1629 """Persist the manifestfulltextcache on lock release"""
1630 if not hasattr(repo, '_wlockref'):
1630 if not hasattr(repo, '_wlockref'):
1631 return
1631 return
1632
1632
1633 self._fulltextcache._opener = repo.wcachevfs
1633 self._fulltextcache._opener = repo.wcachevfs
1634 if repo._currentlock(repo._wlockref) is None:
1634 if repo._currentlock(repo._wlockref) is None:
1635 return
1635 return
1636
1636
1637 reporef = weakref.ref(repo)
1637 reporef = weakref.ref(repo)
1638 manifestrevlogref = weakref.ref(self)
1638 manifestrevlogref = weakref.ref(self)
1639
1639
1640 def persistmanifestcache(success):
1640 def persistmanifestcache(success):
1641 # Repo is in an unknown state, do not persist.
1641 # Repo is in an unknown state, do not persist.
1642 if not success:
1642 if not success:
1643 return
1643 return
1644
1644
1645 repo = reporef()
1645 repo = reporef()
1646 self = manifestrevlogref()
1646 self = manifestrevlogref()
1647 if repo is None or self is None:
1647 if repo is None or self is None:
1648 return
1648 return
1649 if repo.manifestlog.getstorage(b'') is not self:
1649 if repo.manifestlog.getstorage(b'') is not self:
1650 # there's a different manifest in play now, abort
1650 # there's a different manifest in play now, abort
1651 return
1651 return
1652 self._fulltextcache.write()
1652 self._fulltextcache.write()
1653
1653
1654 repo._afterlock(persistmanifestcache)
1654 repo._afterlock(persistmanifestcache)
1655
1655
1656 @property
1656 @property
1657 def fulltextcache(self):
1657 def fulltextcache(self):
1658 return self._fulltextcache
1658 return self._fulltextcache
1659
1659
1660 def clearcaches(self, clear_persisted_data=False):
1660 def clearcaches(self, clear_persisted_data=False):
1661 self._revlog.clearcaches()
1661 self._revlog.clearcaches()
1662 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1662 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1663 self._dirlogcache = {self.tree: self}
1663 self._dirlogcache = {self.tree: self}
1664
1664
1665 def dirlog(self, d):
1665 def dirlog(self, d):
1666 if d:
1666 if d:
1667 assert self._treeondisk
1667 assert self._treeondisk
1668 if d not in self._dirlogcache:
1668 if d not in self._dirlogcache:
1669 mfrevlog = manifestrevlog(
1669 mfrevlog = manifestrevlog(
1670 self.nodeconstants,
1670 self.nodeconstants,
1671 self.opener,
1671 self.opener,
1672 d,
1672 d,
1673 self._dirlogcache,
1673 self._dirlogcache,
1674 treemanifest=self._treeondisk,
1674 treemanifest=self._treeondisk,
1675 )
1675 )
1676 self._dirlogcache[d] = mfrevlog
1676 self._dirlogcache[d] = mfrevlog
1677 return self._dirlogcache[d]
1677 return self._dirlogcache[d]
1678
1678
1679 def add(
1679 def add(
1680 self,
1680 self,
1681 m,
1681 m,
1682 transaction,
1682 transaction,
1683 link,
1683 link,
1684 p1,
1684 p1,
1685 p2,
1685 p2,
1686 added,
1686 added,
1687 removed,
1687 removed,
1688 readtree=None,
1688 readtree=None,
1689 match=None,
1689 match=None,
1690 ):
1690 ):
1691 """add some manifest entry in to the manifest log
1691 """add some manifest entry in to the manifest log
1692
1692
1693 input:
1693 input:
1694
1694
1695 m: the manifest dict we want to store
1695 m: the manifest dict we want to store
1696 transaction: the open transaction
1696 transaction: the open transaction
1697 p1: manifest-node of p1
1697 p1: manifest-node of p1
1698 p2: manifest-node of p2
1698 p2: manifest-node of p2
1699 added: file added/changed compared to parent
1699 added: file added/changed compared to parent
1700 removed: file removed compared to parent
1700 removed: file removed compared to parent
1701
1701
1702 tree manifest input:
1702 tree manifest input:
1703
1703
1704 readtree: a function to read a subtree
1704 readtree: a function to read a subtree
1705 match: a filematcher for the subpart of the tree manifest
1705 match: a filematcher for the subpart of the tree manifest
1706 """
1706 """
1707 try:
1707 try:
1708 if p1 not in self.fulltextcache:
1708 if p1 not in self.fulltextcache:
1709 raise FastdeltaUnavailable()
1709 raise FastdeltaUnavailable()
1710 # If our first parent is in the manifest cache, we can
1710 # If our first parent is in the manifest cache, we can
1711 # compute a delta here using properties we know about the
1711 # compute a delta here using properties we know about the
1712 # manifest up-front, which may save time later for the
1712 # manifest up-front, which may save time later for the
1713 # revlog layer.
1713 # revlog layer.
1714
1714
1715 _checkforbidden(added)
1715 _checkforbidden(added)
1716 # combine the changed lists into one sorted iterator
1716 # combine the changed lists into one sorted iterator
1717 work = heapq.merge(
1717 work = heapq.merge(
1718 [(x, False) for x in sorted(added)],
1718 [(x, False) for x in sorted(added)],
1719 [(x, True) for x in sorted(removed)],
1719 [(x, True) for x in sorted(removed)],
1720 )
1720 )
1721
1721
1722 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1722 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1723 cachedelta = self._revlog.rev(p1), deltatext
1723 cachedelta = self._revlog.rev(p1), deltatext
1724 text = util.buffer(arraytext)
1724 text = util.buffer(arraytext)
1725 rev = self._revlog.addrevision(
1725 rev = self._revlog.addrevision(
1726 text, transaction, link, p1, p2, cachedelta
1726 text, transaction, link, p1, p2, cachedelta
1727 )
1727 )
1728 n = self._revlog.node(rev)
1728 n = self._revlog.node(rev)
1729 except FastdeltaUnavailable:
1729 except FastdeltaUnavailable:
1730 # The first parent manifest isn't already loaded or the
1730 # The first parent manifest isn't already loaded or the
1731 # manifest implementation doesn't support fastdelta, so
1731 # manifest implementation doesn't support fastdelta, so
1732 # we'll just encode a fulltext of the manifest and pass
1732 # we'll just encode a fulltext of the manifest and pass
1733 # that through to the revlog layer, and let it handle the
1733 # that through to the revlog layer, and let it handle the
1734 # delta process.
1734 # delta process.
1735 if self._treeondisk:
1735 if self._treeondisk:
1736 assert readtree, b"readtree must be set for treemanifest writes"
1736 assert readtree, b"readtree must be set for treemanifest writes"
1737 assert match, b"match must be specified for treemanifest writes"
1737 assert match, b"match must be specified for treemanifest writes"
1738 m1 = readtree(self.tree, p1)
1738 m1 = readtree(self.tree, p1)
1739 m2 = readtree(self.tree, p2)
1739 m2 = readtree(self.tree, p2)
1740 n = self._addtree(
1740 n = self._addtree(
1741 m, transaction, link, m1, m2, readtree, match=match
1741 m, transaction, link, m1, m2, readtree, match=match
1742 )
1742 )
1743 arraytext = None
1743 arraytext = None
1744 else:
1744 else:
1745 text = m.text()
1745 text = m.text()
1746 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1746 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1747 n = self._revlog.node(rev)
1747 n = self._revlog.node(rev)
1748 arraytext = bytearray(text)
1748 arraytext = bytearray(text)
1749
1749
1750 if arraytext is not None:
1750 if arraytext is not None:
1751 self.fulltextcache[n] = arraytext
1751 self.fulltextcache[n] = arraytext
1752
1752
1753 return n
1753 return n
1754
1754
1755 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1755 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1756 # If the manifest is unchanged compared to one parent,
1756 # If the manifest is unchanged compared to one parent,
1757 # don't write a new revision
1757 # don't write a new revision
1758 if self.tree != b'' and (
1758 if self.tree != b'' and (
1759 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1759 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1760 ):
1760 ):
1761 return m.node()
1761 return m.node()
1762
1762
1763 def writesubtree(subm, subp1, subp2, match):
1763 def writesubtree(subm, subp1, subp2, match):
1764 sublog = self.dirlog(subm.dir())
1764 sublog = self.dirlog(subm.dir())
1765 sublog.add(
1765 sublog.add(
1766 subm,
1766 subm,
1767 transaction,
1767 transaction,
1768 link,
1768 link,
1769 subp1,
1769 subp1,
1770 subp2,
1770 subp2,
1771 None,
1771 None,
1772 None,
1772 None,
1773 readtree=readtree,
1773 readtree=readtree,
1774 match=match,
1774 match=match,
1775 )
1775 )
1776
1776
1777 m.writesubtrees(m1, m2, writesubtree, match)
1777 m.writesubtrees(m1, m2, writesubtree, match)
1778 text = m.dirtext()
1778 text = m.dirtext()
1779 n = None
1779 n = None
1780 if self.tree != b'':
1780 if self.tree != b'':
1781 # Double-check whether contents are unchanged to one parent
1781 # Double-check whether contents are unchanged to one parent
1782 if text == m1.dirtext():
1782 if text == m1.dirtext():
1783 n = m1.node()
1783 n = m1.node()
1784 elif text == m2.dirtext():
1784 elif text == m2.dirtext():
1785 n = m2.node()
1785 n = m2.node()
1786
1786
1787 if not n:
1787 if not n:
1788 rev = self._revlog.addrevision(
1788 rev = self._revlog.addrevision(
1789 text, transaction, link, m1.node(), m2.node()
1789 text, transaction, link, m1.node(), m2.node()
1790 )
1790 )
1791 n = self._revlog.node(rev)
1791 n = self._revlog.node(rev)
1792
1792
1793 # Save nodeid so parent manifest can calculate its nodeid
1793 # Save nodeid so parent manifest can calculate its nodeid
1794 m.setnode(n)
1794 m.setnode(n)
1795 return n
1795 return n
1796
1796
1797 def __len__(self):
1797 def __len__(self):
1798 return len(self._revlog)
1798 return len(self._revlog)
1799
1799
1800 def __iter__(self):
1800 def __iter__(self):
1801 return self._revlog.__iter__()
1801 return self._revlog.__iter__()
1802
1802
1803 def rev(self, node):
1803 def rev(self, node):
1804 return self._revlog.rev(node)
1804 return self._revlog.rev(node)
1805
1805
1806 def node(self, rev):
1806 def node(self, rev):
1807 return self._revlog.node(rev)
1807 return self._revlog.node(rev)
1808
1808
1809 def lookup(self, value):
1809 def lookup(self, value):
1810 return self._revlog.lookup(value)
1810 return self._revlog.lookup(value)
1811
1811
1812 def parentrevs(self, rev):
1812 def parentrevs(self, rev):
1813 return self._revlog.parentrevs(rev)
1813 return self._revlog.parentrevs(rev)
1814
1814
1815 def parents(self, node):
1815 def parents(self, node):
1816 return self._revlog.parents(node)
1816 return self._revlog.parents(node)
1817
1817
1818 def linkrev(self, rev):
1818 def linkrev(self, rev):
1819 return self._revlog.linkrev(rev)
1819 return self._revlog.linkrev(rev)
1820
1820
1821 def checksize(self):
1821 def checksize(self):
1822 return self._revlog.checksize()
1822 return self._revlog.checksize()
1823
1823
1824 def revision(self, node):
1824 def revision(self, node):
1825 return self._revlog.revision(node)
1825 return self._revlog.revision(node)
1826
1826
1827 def rawdata(self, node, _df=None):
1827 def rawdata(self, node):
1828 return self._revlog.rawdata(node, _df=_df)
1828 return self._revlog.rawdata(node)
1829
1829
1830 def revdiff(self, rev1, rev2):
1830 def revdiff(self, rev1, rev2):
1831 return self._revlog.revdiff(rev1, rev2)
1831 return self._revlog.revdiff(rev1, rev2)
1832
1832
1833 def cmp(self, node, text):
1833 def cmp(self, node, text):
1834 return self._revlog.cmp(node, text)
1834 return self._revlog.cmp(node, text)
1835
1835
1836 def deltaparent(self, rev):
1836 def deltaparent(self, rev):
1837 return self._revlog.deltaparent(rev)
1837 return self._revlog.deltaparent(rev)
1838
1838
1839 def emitrevisions(
1839 def emitrevisions(
1840 self,
1840 self,
1841 nodes,
1841 nodes,
1842 nodesorder=None,
1842 nodesorder=None,
1843 revisiondata=False,
1843 revisiondata=False,
1844 assumehaveparentrevisions=False,
1844 assumehaveparentrevisions=False,
1845 deltamode=repository.CG_DELTAMODE_STD,
1845 deltamode=repository.CG_DELTAMODE_STD,
1846 sidedata_helpers=None,
1846 sidedata_helpers=None,
1847 debug_info=None,
1847 debug_info=None,
1848 ):
1848 ):
1849 return self._revlog.emitrevisions(
1849 return self._revlog.emitrevisions(
1850 nodes,
1850 nodes,
1851 nodesorder=nodesorder,
1851 nodesorder=nodesorder,
1852 revisiondata=revisiondata,
1852 revisiondata=revisiondata,
1853 assumehaveparentrevisions=assumehaveparentrevisions,
1853 assumehaveparentrevisions=assumehaveparentrevisions,
1854 deltamode=deltamode,
1854 deltamode=deltamode,
1855 sidedata_helpers=sidedata_helpers,
1855 sidedata_helpers=sidedata_helpers,
1856 debug_info=debug_info,
1856 debug_info=debug_info,
1857 )
1857 )
1858
1858
1859 def addgroup(
1859 def addgroup(
1860 self,
1860 self,
1861 deltas,
1861 deltas,
1862 linkmapper,
1862 linkmapper,
1863 transaction,
1863 transaction,
1864 alwayscache=False,
1864 alwayscache=False,
1865 addrevisioncb=None,
1865 addrevisioncb=None,
1866 duplicaterevisioncb=None,
1866 duplicaterevisioncb=None,
1867 debug_info=None,
1867 debug_info=None,
1868 delta_base_reuse_policy=None,
1868 delta_base_reuse_policy=None,
1869 ):
1869 ):
1870 return self._revlog.addgroup(
1870 return self._revlog.addgroup(
1871 deltas,
1871 deltas,
1872 linkmapper,
1872 linkmapper,
1873 transaction,
1873 transaction,
1874 alwayscache=alwayscache,
1874 alwayscache=alwayscache,
1875 addrevisioncb=addrevisioncb,
1875 addrevisioncb=addrevisioncb,
1876 duplicaterevisioncb=duplicaterevisioncb,
1876 duplicaterevisioncb=duplicaterevisioncb,
1877 debug_info=debug_info,
1877 debug_info=debug_info,
1878 delta_base_reuse_policy=delta_base_reuse_policy,
1878 delta_base_reuse_policy=delta_base_reuse_policy,
1879 )
1879 )
1880
1880
1881 def rawsize(self, rev):
1881 def rawsize(self, rev):
1882 return self._revlog.rawsize(rev)
1882 return self._revlog.rawsize(rev)
1883
1883
1884 def getstrippoint(self, minlink):
1884 def getstrippoint(self, minlink):
1885 return self._revlog.getstrippoint(minlink)
1885 return self._revlog.getstrippoint(minlink)
1886
1886
1887 def strip(self, minlink, transaction):
1887 def strip(self, minlink, transaction):
1888 return self._revlog.strip(minlink, transaction)
1888 return self._revlog.strip(minlink, transaction)
1889
1889
1890 def files(self):
1890 def files(self):
1891 return self._revlog.files()
1891 return self._revlog.files()
1892
1892
1893 def clone(self, tr, destrevlog, **kwargs):
1893 def clone(self, tr, destrevlog, **kwargs):
1894 if not isinstance(destrevlog, manifestrevlog):
1894 if not isinstance(destrevlog, manifestrevlog):
1895 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1895 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1896
1896
1897 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1897 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1898
1898
1899 def storageinfo(
1899 def storageinfo(
1900 self,
1900 self,
1901 exclusivefiles=False,
1901 exclusivefiles=False,
1902 sharedfiles=False,
1902 sharedfiles=False,
1903 revisionscount=False,
1903 revisionscount=False,
1904 trackedsize=False,
1904 trackedsize=False,
1905 storedsize=False,
1905 storedsize=False,
1906 ):
1906 ):
1907 return self._revlog.storageinfo(
1907 return self._revlog.storageinfo(
1908 exclusivefiles=exclusivefiles,
1908 exclusivefiles=exclusivefiles,
1909 sharedfiles=sharedfiles,
1909 sharedfiles=sharedfiles,
1910 revisionscount=revisionscount,
1910 revisionscount=revisionscount,
1911 trackedsize=trackedsize,
1911 trackedsize=trackedsize,
1912 storedsize=storedsize,
1912 storedsize=storedsize,
1913 )
1913 )
1914
1914
1915 @property
1915 @property
1916 def opener(self):
1916 def opener(self):
1917 return self._revlog.opener
1917 return self._revlog.opener
1918
1918
1919 @opener.setter
1919 @opener.setter
1920 def opener(self, value):
1920 def opener(self, value):
1921 self._revlog.opener = value
1921 self._revlog.opener = value
1922
1922
1923
1923
1924 @interfaceutil.implementer(repository.imanifestlog)
1924 @interfaceutil.implementer(repository.imanifestlog)
1925 class manifestlog:
1925 class manifestlog:
1926 """A collection class representing the collection of manifest snapshots
1926 """A collection class representing the collection of manifest snapshots
1927 referenced by commits in the repository.
1927 referenced by commits in the repository.
1928
1928
1929 In this situation, 'manifest' refers to the abstract concept of a snapshot
1929 In this situation, 'manifest' refers to the abstract concept of a snapshot
1930 of the list of files in the given commit. Consumers of the output of this
1930 of the list of files in the given commit. Consumers of the output of this
1931 class do not care about the implementation details of the actual manifests
1931 class do not care about the implementation details of the actual manifests
1932 they receive (i.e. tree or flat or lazily loaded, etc)."""
1932 they receive (i.e. tree or flat or lazily loaded, etc)."""
1933
1933
1934 def __init__(self, opener, repo, rootstore, narrowmatch):
1934 def __init__(self, opener, repo, rootstore, narrowmatch):
1935 self.nodeconstants = repo.nodeconstants
1935 self.nodeconstants = repo.nodeconstants
1936 usetreemanifest = False
1936 usetreemanifest = False
1937 cachesize = 4
1937 cachesize = 4
1938
1938
1939 opts = getattr(opener, 'options', None)
1939 opts = getattr(opener, 'options', None)
1940 if opts is not None:
1940 if opts is not None:
1941 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1941 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1942 cachesize = opts.get(b'manifestcachesize', cachesize)
1942 cachesize = opts.get(b'manifestcachesize', cachesize)
1943
1943
1944 self._treemanifests = usetreemanifest
1944 self._treemanifests = usetreemanifest
1945
1945
1946 self._rootstore = rootstore
1946 self._rootstore = rootstore
1947 self._rootstore._setupmanifestcachehooks(repo)
1947 self._rootstore._setupmanifestcachehooks(repo)
1948 self._narrowmatch = narrowmatch
1948 self._narrowmatch = narrowmatch
1949
1949
1950 # A cache of the manifestctx or treemanifestctx for each directory
1950 # A cache of the manifestctx or treemanifestctx for each directory
1951 self._dirmancache = {}
1951 self._dirmancache = {}
1952 self._dirmancache[b''] = util.lrucachedict(cachesize)
1952 self._dirmancache[b''] = util.lrucachedict(cachesize)
1953
1953
1954 self._cachesize = cachesize
1954 self._cachesize = cachesize
1955
1955
1956 def __getitem__(self, node):
1956 def __getitem__(self, node):
1957 """Retrieves the manifest instance for the given node. Throws a
1957 """Retrieves the manifest instance for the given node. Throws a
1958 LookupError if not found.
1958 LookupError if not found.
1959 """
1959 """
1960 return self.get(b'', node)
1960 return self.get(b'', node)
1961
1961
1962 def get(self, tree, node, verify=True):
1962 def get(self, tree, node, verify=True):
1963 """Retrieves the manifest instance for the given node. Throws a
1963 """Retrieves the manifest instance for the given node. Throws a
1964 LookupError if not found.
1964 LookupError if not found.
1965
1965
1966 `verify` - if True an exception will be thrown if the node is not in
1966 `verify` - if True an exception will be thrown if the node is not in
1967 the revlog
1967 the revlog
1968 """
1968 """
1969 if node in self._dirmancache.get(tree, ()):
1969 if node in self._dirmancache.get(tree, ()):
1970 return self._dirmancache[tree][node]
1970 return self._dirmancache[tree][node]
1971
1971
1972 if not self._narrowmatch.always():
1972 if not self._narrowmatch.always():
1973 if not self._narrowmatch.visitdir(tree[:-1]):
1973 if not self._narrowmatch.visitdir(tree[:-1]):
1974 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1974 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1975 if tree:
1975 if tree:
1976 if self._rootstore._treeondisk:
1976 if self._rootstore._treeondisk:
1977 if verify:
1977 if verify:
1978 # Side-effect is LookupError is raised if node doesn't
1978 # Side-effect is LookupError is raised if node doesn't
1979 # exist.
1979 # exist.
1980 self.getstorage(tree).rev(node)
1980 self.getstorage(tree).rev(node)
1981
1981
1982 m = treemanifestctx(self, tree, node)
1982 m = treemanifestctx(self, tree, node)
1983 else:
1983 else:
1984 raise error.Abort(
1984 raise error.Abort(
1985 _(
1985 _(
1986 b"cannot ask for manifest directory '%s' in a flat "
1986 b"cannot ask for manifest directory '%s' in a flat "
1987 b"manifest"
1987 b"manifest"
1988 )
1988 )
1989 % tree
1989 % tree
1990 )
1990 )
1991 else:
1991 else:
1992 if verify:
1992 if verify:
1993 # Side-effect is LookupError is raised if node doesn't exist.
1993 # Side-effect is LookupError is raised if node doesn't exist.
1994 self._rootstore.rev(node)
1994 self._rootstore.rev(node)
1995
1995
1996 if self._treemanifests:
1996 if self._treemanifests:
1997 m = treemanifestctx(self, b'', node)
1997 m = treemanifestctx(self, b'', node)
1998 else:
1998 else:
1999 m = manifestctx(self, node)
1999 m = manifestctx(self, node)
2000
2000
2001 if node != self.nodeconstants.nullid:
2001 if node != self.nodeconstants.nullid:
2002 mancache = self._dirmancache.get(tree)
2002 mancache = self._dirmancache.get(tree)
2003 if not mancache:
2003 if not mancache:
2004 mancache = util.lrucachedict(self._cachesize)
2004 mancache = util.lrucachedict(self._cachesize)
2005 self._dirmancache[tree] = mancache
2005 self._dirmancache[tree] = mancache
2006 mancache[node] = m
2006 mancache[node] = m
2007 return m
2007 return m
2008
2008
2009 def getstorage(self, tree):
2009 def getstorage(self, tree):
2010 return self._rootstore.dirlog(tree)
2010 return self._rootstore.dirlog(tree)
2011
2011
2012 def clearcaches(self, clear_persisted_data=False):
2012 def clearcaches(self, clear_persisted_data=False):
2013 self._dirmancache.clear()
2013 self._dirmancache.clear()
2014 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2014 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2015
2015
2016 def rev(self, node):
2016 def rev(self, node):
2017 return self._rootstore.rev(node)
2017 return self._rootstore.rev(node)
2018
2018
2019 def update_caches(self, transaction):
2019 def update_caches(self, transaction):
2020 return self._rootstore._revlog.update_caches(transaction=transaction)
2020 return self._rootstore._revlog.update_caches(transaction=transaction)
2021
2021
2022
2022
2023 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2023 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2024 class memmanifestctx:
2024 class memmanifestctx:
2025 def __init__(self, manifestlog):
2025 def __init__(self, manifestlog):
2026 self._manifestlog = manifestlog
2026 self._manifestlog = manifestlog
2027 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2027 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2028
2028
2029 def _storage(self):
2029 def _storage(self):
2030 return self._manifestlog.getstorage(b'')
2030 return self._manifestlog.getstorage(b'')
2031
2031
2032 def copy(self):
2032 def copy(self):
2033 memmf = memmanifestctx(self._manifestlog)
2033 memmf = memmanifestctx(self._manifestlog)
2034 memmf._manifestdict = self.read().copy()
2034 memmf._manifestdict = self.read().copy()
2035 return memmf
2035 return memmf
2036
2036
2037 def read(self):
2037 def read(self):
2038 return self._manifestdict
2038 return self._manifestdict
2039
2039
2040 def write(self, transaction, link, p1, p2, added, removed, match=None):
2040 def write(self, transaction, link, p1, p2, added, removed, match=None):
2041 return self._storage().add(
2041 return self._storage().add(
2042 self._manifestdict,
2042 self._manifestdict,
2043 transaction,
2043 transaction,
2044 link,
2044 link,
2045 p1,
2045 p1,
2046 p2,
2046 p2,
2047 added,
2047 added,
2048 removed,
2048 removed,
2049 match=match,
2049 match=match,
2050 )
2050 )
2051
2051
2052
2052
2053 @interfaceutil.implementer(repository.imanifestrevisionstored)
2053 @interfaceutil.implementer(repository.imanifestrevisionstored)
2054 class manifestctx:
2054 class manifestctx:
2055 """A class representing a single revision of a manifest, including its
2055 """A class representing a single revision of a manifest, including its
2056 contents, its parent revs, and its linkrev.
2056 contents, its parent revs, and its linkrev.
2057 """
2057 """
2058
2058
2059 def __init__(self, manifestlog, node):
2059 def __init__(self, manifestlog, node):
2060 self._manifestlog = manifestlog
2060 self._manifestlog = manifestlog
2061 self._data = None
2061 self._data = None
2062
2062
2063 self._node = node
2063 self._node = node
2064
2064
2065 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2065 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2066 # but let's add it later when something needs it and we can load it
2066 # but let's add it later when something needs it and we can load it
2067 # lazily.
2067 # lazily.
2068 # self.p1, self.p2 = store.parents(node)
2068 # self.p1, self.p2 = store.parents(node)
2069 # rev = store.rev(node)
2069 # rev = store.rev(node)
2070 # self.linkrev = store.linkrev(rev)
2070 # self.linkrev = store.linkrev(rev)
2071
2071
2072 def _storage(self):
2072 def _storage(self):
2073 return self._manifestlog.getstorage(b'')
2073 return self._manifestlog.getstorage(b'')
2074
2074
2075 def node(self):
2075 def node(self):
2076 return self._node
2076 return self._node
2077
2077
2078 def copy(self):
2078 def copy(self):
2079 memmf = memmanifestctx(self._manifestlog)
2079 memmf = memmanifestctx(self._manifestlog)
2080 memmf._manifestdict = self.read().copy()
2080 memmf._manifestdict = self.read().copy()
2081 return memmf
2081 return memmf
2082
2082
2083 @propertycache
2083 @propertycache
2084 def parents(self):
2084 def parents(self):
2085 return self._storage().parents(self._node)
2085 return self._storage().parents(self._node)
2086
2086
2087 def read(self):
2087 def read(self):
2088 if self._data is None:
2088 if self._data is None:
2089 nc = self._manifestlog.nodeconstants
2089 nc = self._manifestlog.nodeconstants
2090 if self._node == nc.nullid:
2090 if self._node == nc.nullid:
2091 self._data = manifestdict(nc.nodelen)
2091 self._data = manifestdict(nc.nodelen)
2092 else:
2092 else:
2093 store = self._storage()
2093 store = self._storage()
2094 if self._node in store.fulltextcache:
2094 if self._node in store.fulltextcache:
2095 text = pycompat.bytestr(store.fulltextcache[self._node])
2095 text = pycompat.bytestr(store.fulltextcache[self._node])
2096 else:
2096 else:
2097 text = store.revision(self._node)
2097 text = store.revision(self._node)
2098 arraytext = bytearray(text)
2098 arraytext = bytearray(text)
2099 store.fulltextcache[self._node] = arraytext
2099 store.fulltextcache[self._node] = arraytext
2100 self._data = manifestdict(nc.nodelen, text)
2100 self._data = manifestdict(nc.nodelen, text)
2101 return self._data
2101 return self._data
2102
2102
2103 def readfast(self, shallow=False):
2103 def readfast(self, shallow=False):
2104 """Calls either readdelta or read, based on which would be less work.
2104 """Calls either readdelta or read, based on which would be less work.
2105 readdelta is called if the delta is against the p1, and therefore can be
2105 readdelta is called if the delta is against the p1, and therefore can be
2106 read quickly.
2106 read quickly.
2107
2107
2108 If `shallow` is True, nothing changes since this is a flat manifest.
2108 If `shallow` is True, nothing changes since this is a flat manifest.
2109 """
2109 """
2110 store = self._storage()
2110 store = self._storage()
2111 r = store.rev(self._node)
2111 r = store.rev(self._node)
2112 deltaparent = store.deltaparent(r)
2112 deltaparent = store.deltaparent(r)
2113 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2113 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2114 return self.readdelta()
2114 return self.readdelta()
2115 return self.read()
2115 return self.read()
2116
2116
2117 def readdelta(self, shallow=False):
2117 def readdelta(self, shallow=False):
2118 """Returns a manifest containing just the entries that are present
2118 """Returns a manifest containing just the entries that are present
2119 in this manifest, but not in its p1 manifest. This is efficient to read
2119 in this manifest, but not in its p1 manifest. This is efficient to read
2120 if the revlog delta is already p1.
2120 if the revlog delta is already p1.
2121
2121
2122 Changing the value of `shallow` has no effect on flat manifests.
2122 Changing the value of `shallow` has no effect on flat manifests.
2123 """
2123 """
2124 store = self._storage()
2124 store = self._storage()
2125 r = store.rev(self._node)
2125 r = store.rev(self._node)
2126 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2126 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2127 return manifestdict(store.nodeconstants.nodelen, d)
2127 return manifestdict(store.nodeconstants.nodelen, d)
2128
2128
2129 def find(self, key):
2129 def find(self, key):
2130 return self.read().find(key)
2130 return self.read().find(key)
2131
2131
2132
2132
2133 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2133 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2134 class memtreemanifestctx:
2134 class memtreemanifestctx:
2135 def __init__(self, manifestlog, dir=b''):
2135 def __init__(self, manifestlog, dir=b''):
2136 self._manifestlog = manifestlog
2136 self._manifestlog = manifestlog
2137 self._dir = dir
2137 self._dir = dir
2138 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2138 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2139
2139
2140 def _storage(self):
2140 def _storage(self):
2141 return self._manifestlog.getstorage(b'')
2141 return self._manifestlog.getstorage(b'')
2142
2142
2143 def copy(self):
2143 def copy(self):
2144 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2144 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2145 memmf._treemanifest = self._treemanifest.copy()
2145 memmf._treemanifest = self._treemanifest.copy()
2146 return memmf
2146 return memmf
2147
2147
2148 def read(self):
2148 def read(self):
2149 return self._treemanifest
2149 return self._treemanifest
2150
2150
2151 def write(self, transaction, link, p1, p2, added, removed, match=None):
2151 def write(self, transaction, link, p1, p2, added, removed, match=None):
2152 def readtree(dir, node):
2152 def readtree(dir, node):
2153 return self._manifestlog.get(dir, node).read()
2153 return self._manifestlog.get(dir, node).read()
2154
2154
2155 return self._storage().add(
2155 return self._storage().add(
2156 self._treemanifest,
2156 self._treemanifest,
2157 transaction,
2157 transaction,
2158 link,
2158 link,
2159 p1,
2159 p1,
2160 p2,
2160 p2,
2161 added,
2161 added,
2162 removed,
2162 removed,
2163 readtree=readtree,
2163 readtree=readtree,
2164 match=match,
2164 match=match,
2165 )
2165 )
2166
2166
2167
2167
2168 @interfaceutil.implementer(repository.imanifestrevisionstored)
2168 @interfaceutil.implementer(repository.imanifestrevisionstored)
2169 class treemanifestctx:
2169 class treemanifestctx:
2170 def __init__(self, manifestlog, dir, node):
2170 def __init__(self, manifestlog, dir, node):
2171 self._manifestlog = manifestlog
2171 self._manifestlog = manifestlog
2172 self._dir = dir
2172 self._dir = dir
2173 self._data = None
2173 self._data = None
2174
2174
2175 self._node = node
2175 self._node = node
2176
2176
2177 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2177 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2178 # we can instantiate treemanifestctx objects for directories we don't
2178 # we can instantiate treemanifestctx objects for directories we don't
2179 # have on disk.
2179 # have on disk.
2180 # self.p1, self.p2 = store.parents(node)
2180 # self.p1, self.p2 = store.parents(node)
2181 # rev = store.rev(node)
2181 # rev = store.rev(node)
2182 # self.linkrev = store.linkrev(rev)
2182 # self.linkrev = store.linkrev(rev)
2183
2183
2184 def _storage(self):
2184 def _storage(self):
2185 narrowmatch = self._manifestlog._narrowmatch
2185 narrowmatch = self._manifestlog._narrowmatch
2186 if not narrowmatch.always():
2186 if not narrowmatch.always():
2187 if not narrowmatch.visitdir(self._dir[:-1]):
2187 if not narrowmatch.visitdir(self._dir[:-1]):
2188 return excludedmanifestrevlog(
2188 return excludedmanifestrevlog(
2189 self._manifestlog.nodeconstants, self._dir
2189 self._manifestlog.nodeconstants, self._dir
2190 )
2190 )
2191 return self._manifestlog.getstorage(self._dir)
2191 return self._manifestlog.getstorage(self._dir)
2192
2192
2193 def read(self):
2193 def read(self):
2194 if self._data is None:
2194 if self._data is None:
2195 store = self._storage()
2195 store = self._storage()
2196 if self._node == self._manifestlog.nodeconstants.nullid:
2196 if self._node == self._manifestlog.nodeconstants.nullid:
2197 self._data = treemanifest(self._manifestlog.nodeconstants)
2197 self._data = treemanifest(self._manifestlog.nodeconstants)
2198 # TODO accessing non-public API
2198 # TODO accessing non-public API
2199 elif store._treeondisk:
2199 elif store._treeondisk:
2200 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2200 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2201
2201
2202 def gettext():
2202 def gettext():
2203 return store.revision(self._node)
2203 return store.revision(self._node)
2204
2204
2205 def readsubtree(dir, subm):
2205 def readsubtree(dir, subm):
2206 # Set verify to False since we need to be able to create
2206 # Set verify to False since we need to be able to create
2207 # subtrees for trees that don't exist on disk.
2207 # subtrees for trees that don't exist on disk.
2208 return self._manifestlog.get(dir, subm, verify=False).read()
2208 return self._manifestlog.get(dir, subm, verify=False).read()
2209
2209
2210 m.read(gettext, readsubtree)
2210 m.read(gettext, readsubtree)
2211 m.setnode(self._node)
2211 m.setnode(self._node)
2212 self._data = m
2212 self._data = m
2213 else:
2213 else:
2214 if self._node in store.fulltextcache:
2214 if self._node in store.fulltextcache:
2215 text = pycompat.bytestr(store.fulltextcache[self._node])
2215 text = pycompat.bytestr(store.fulltextcache[self._node])
2216 else:
2216 else:
2217 text = store.revision(self._node)
2217 text = store.revision(self._node)
2218 arraytext = bytearray(text)
2218 arraytext = bytearray(text)
2219 store.fulltextcache[self._node] = arraytext
2219 store.fulltextcache[self._node] = arraytext
2220 self._data = treemanifest(
2220 self._data = treemanifest(
2221 self._manifestlog.nodeconstants, dir=self._dir, text=text
2221 self._manifestlog.nodeconstants, dir=self._dir, text=text
2222 )
2222 )
2223
2223
2224 return self._data
2224 return self._data
2225
2225
2226 def node(self):
2226 def node(self):
2227 return self._node
2227 return self._node
2228
2228
2229 def copy(self):
2229 def copy(self):
2230 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2230 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2231 memmf._treemanifest = self.read().copy()
2231 memmf._treemanifest = self.read().copy()
2232 return memmf
2232 return memmf
2233
2233
2234 @propertycache
2234 @propertycache
2235 def parents(self):
2235 def parents(self):
2236 return self._storage().parents(self._node)
2236 return self._storage().parents(self._node)
2237
2237
2238 def readdelta(self, shallow=False):
2238 def readdelta(self, shallow=False):
2239 """Returns a manifest containing just the entries that are present
2239 """Returns a manifest containing just the entries that are present
2240 in this manifest, but not in its p1 manifest. This is efficient to read
2240 in this manifest, but not in its p1 manifest. This is efficient to read
2241 if the revlog delta is already p1.
2241 if the revlog delta is already p1.
2242
2242
2243 If `shallow` is True, this will read the delta for this directory,
2243 If `shallow` is True, this will read the delta for this directory,
2244 without recursively reading subdirectory manifests. Instead, any
2244 without recursively reading subdirectory manifests. Instead, any
2245 subdirectory entry will be reported as it appears in the manifest, i.e.
2245 subdirectory entry will be reported as it appears in the manifest, i.e.
2246 the subdirectory will be reported among files and distinguished only by
2246 the subdirectory will be reported among files and distinguished only by
2247 its 't' flag.
2247 its 't' flag.
2248 """
2248 """
2249 store = self._storage()
2249 store = self._storage()
2250 if shallow:
2250 if shallow:
2251 r = store.rev(self._node)
2251 r = store.rev(self._node)
2252 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2252 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2253 return manifestdict(store.nodeconstants.nodelen, d)
2253 return manifestdict(store.nodeconstants.nodelen, d)
2254 else:
2254 else:
2255 # Need to perform a slow delta
2255 # Need to perform a slow delta
2256 r0 = store.deltaparent(store.rev(self._node))
2256 r0 = store.deltaparent(store.rev(self._node))
2257 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2257 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2258 m1 = self.read()
2258 m1 = self.read()
2259 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2259 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2260 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2260 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2261 if n1:
2261 if n1:
2262 md[f] = n1
2262 md[f] = n1
2263 if fl1:
2263 if fl1:
2264 md.setflag(f, fl1)
2264 md.setflag(f, fl1)
2265 return md
2265 return md
2266
2266
2267 def readfast(self, shallow=False):
2267 def readfast(self, shallow=False):
2268 """Calls either readdelta or read, based on which would be less work.
2268 """Calls either readdelta or read, based on which would be less work.
2269 readdelta is called if the delta is against the p1, and therefore can be
2269 readdelta is called if the delta is against the p1, and therefore can be
2270 read quickly.
2270 read quickly.
2271
2271
2272 If `shallow` is True, it only returns the entries from this manifest,
2272 If `shallow` is True, it only returns the entries from this manifest,
2273 and not any submanifests.
2273 and not any submanifests.
2274 """
2274 """
2275 store = self._storage()
2275 store = self._storage()
2276 r = store.rev(self._node)
2276 r = store.rev(self._node)
2277 deltaparent = store.deltaparent(r)
2277 deltaparent = store.deltaparent(r)
2278 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2278 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2279 return self.readdelta(shallow=shallow)
2279 return self.readdelta(shallow=shallow)
2280
2280
2281 if shallow:
2281 if shallow:
2282 return manifestdict(
2282 return manifestdict(
2283 store.nodeconstants.nodelen, store.revision(self._node)
2283 store.nodeconstants.nodelen, store.revision(self._node)
2284 )
2284 )
2285 else:
2285 else:
2286 return self.read()
2286 return self.read()
2287
2287
2288 def find(self, key):
2288 def find(self, key):
2289 return self.read().find(key)
2289 return self.read().find(key)
2290
2290
2291
2291
2292 class excludeddir(treemanifest):
2292 class excludeddir(treemanifest):
2293 """Stand-in for a directory that is excluded from the repository.
2293 """Stand-in for a directory that is excluded from the repository.
2294
2294
2295 With narrowing active on a repository that uses treemanifests,
2295 With narrowing active on a repository that uses treemanifests,
2296 some of the directory revlogs will be excluded from the resulting
2296 some of the directory revlogs will be excluded from the resulting
2297 clone. This is a huge storage win for clients, but means we need
2297 clone. This is a huge storage win for clients, but means we need
2298 some sort of pseudo-manifest to surface to internals so we can
2298 some sort of pseudo-manifest to surface to internals so we can
2299 detect a merge conflict outside the narrowspec. That's what this
2299 detect a merge conflict outside the narrowspec. That's what this
2300 class is: it stands in for a directory whose node is known, but
2300 class is: it stands in for a directory whose node is known, but
2301 whose contents are unknown.
2301 whose contents are unknown.
2302 """
2302 """
2303
2303
2304 def __init__(self, nodeconstants, dir, node):
2304 def __init__(self, nodeconstants, dir, node):
2305 super(excludeddir, self).__init__(nodeconstants, dir)
2305 super(excludeddir, self).__init__(nodeconstants, dir)
2306 self._node = node
2306 self._node = node
2307 # Add an empty file, which will be included by iterators and such,
2307 # Add an empty file, which will be included by iterators and such,
2308 # appearing as the directory itself (i.e. something like "dir/")
2308 # appearing as the directory itself (i.e. something like "dir/")
2309 self._files[b''] = node
2309 self._files[b''] = node
2310 self._flags[b''] = b't'
2310 self._flags[b''] = b't'
2311
2311
2312 # Manifests outside the narrowspec should never be modified, so avoid
2312 # Manifests outside the narrowspec should never be modified, so avoid
2313 # copying. This makes a noticeable difference when there are very many
2313 # copying. This makes a noticeable difference when there are very many
2314 # directories outside the narrowspec. Also, it makes sense for the copy to
2314 # directories outside the narrowspec. Also, it makes sense for the copy to
2315 # be of the same type as the original, which would not happen with the
2315 # be of the same type as the original, which would not happen with the
2316 # super type's copy().
2316 # super type's copy().
2317 def copy(self):
2317 def copy(self):
2318 return self
2318 return self
2319
2319
2320
2320
2321 class excludeddirmanifestctx(treemanifestctx):
2321 class excludeddirmanifestctx(treemanifestctx):
2322 """context wrapper for excludeddir - see that docstring for rationale"""
2322 """context wrapper for excludeddir - see that docstring for rationale"""
2323
2323
2324 def __init__(self, nodeconstants, dir, node):
2324 def __init__(self, nodeconstants, dir, node):
2325 self.nodeconstants = nodeconstants
2325 self.nodeconstants = nodeconstants
2326 self._dir = dir
2326 self._dir = dir
2327 self._node = node
2327 self._node = node
2328
2328
2329 def read(self):
2329 def read(self):
2330 return excludeddir(self.nodeconstants, self._dir, self._node)
2330 return excludeddir(self.nodeconstants, self._dir, self._node)
2331
2331
2332 def readfast(self, shallow=False):
2332 def readfast(self, shallow=False):
2333 # special version of readfast since we don't have underlying storage
2333 # special version of readfast since we don't have underlying storage
2334 return self.read()
2334 return self.read()
2335
2335
2336 def write(self, *args):
2336 def write(self, *args):
2337 raise error.ProgrammingError(
2337 raise error.ProgrammingError(
2338 b'attempt to write manifest from excluded dir %s' % self._dir
2338 b'attempt to write manifest from excluded dir %s' % self._dir
2339 )
2339 )
2340
2340
2341
2341
2342 class excludedmanifestrevlog(manifestrevlog):
2342 class excludedmanifestrevlog(manifestrevlog):
2343 """Stand-in for excluded treemanifest revlogs.
2343 """Stand-in for excluded treemanifest revlogs.
2344
2344
2345 When narrowing is active on a treemanifest repository, we'll have
2345 When narrowing is active on a treemanifest repository, we'll have
2346 references to directories we can't see due to the revlog being
2346 references to directories we can't see due to the revlog being
2347 skipped. This class exists to conform to the manifestrevlog
2347 skipped. This class exists to conform to the manifestrevlog
2348 interface for those directories and proactively prevent writes to
2348 interface for those directories and proactively prevent writes to
2349 outside the narrowspec.
2349 outside the narrowspec.
2350 """
2350 """
2351
2351
2352 def __init__(self, nodeconstants, dir):
2352 def __init__(self, nodeconstants, dir):
2353 self.nodeconstants = nodeconstants
2353 self.nodeconstants = nodeconstants
2354 self._dir = dir
2354 self._dir = dir
2355
2355
2356 def __len__(self):
2356 def __len__(self):
2357 raise error.ProgrammingError(
2357 raise error.ProgrammingError(
2358 b'attempt to get length of excluded dir %s' % self._dir
2358 b'attempt to get length of excluded dir %s' % self._dir
2359 )
2359 )
2360
2360
2361 def rev(self, node):
2361 def rev(self, node):
2362 raise error.ProgrammingError(
2362 raise error.ProgrammingError(
2363 b'attempt to get rev from excluded dir %s' % self._dir
2363 b'attempt to get rev from excluded dir %s' % self._dir
2364 )
2364 )
2365
2365
2366 def linkrev(self, node):
2366 def linkrev(self, node):
2367 raise error.ProgrammingError(
2367 raise error.ProgrammingError(
2368 b'attempt to get linkrev from excluded dir %s' % self._dir
2368 b'attempt to get linkrev from excluded dir %s' % self._dir
2369 )
2369 )
2370
2370
2371 def node(self, rev):
2371 def node(self, rev):
2372 raise error.ProgrammingError(
2372 raise error.ProgrammingError(
2373 b'attempt to get node from excluded dir %s' % self._dir
2373 b'attempt to get node from excluded dir %s' % self._dir
2374 )
2374 )
2375
2375
2376 def add(self, *args, **kwargs):
2376 def add(self, *args, **kwargs):
2377 # We should never write entries in dirlogs outside the narrow clone.
2377 # We should never write entries in dirlogs outside the narrow clone.
2378 # However, the method still gets called from writesubtree() in
2378 # However, the method still gets called from writesubtree() in
2379 # _addtree(), so we need to handle it. We should possibly make that
2379 # _addtree(), so we need to handle it. We should possibly make that
2380 # avoid calling add() with a clean manifest (_dirty is always False
2380 # avoid calling add() with a clean manifest (_dirty is always False
2381 # in excludeddir instances).
2381 # in excludeddir instances).
2382 pass
2382 pass
@@ -1,3549 +1,3546 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class revlog:
244 class revlog:
245 """
245 """
246 the underlying revision storage object
246 the underlying revision storage object
247
247
248 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
249
249
250 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
255 data.
255 data.
256
256
257 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
263
263
264 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
267 for locking while reading.
268
268
269 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
271
271
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 index will be mmapped rather than read if it is larger than the
273 index will be mmapped rather than read if it is larger than the
274 configured threshold.
274 configured threshold.
275
275
276 If censorable is True, the revlog can have censored revisions.
276 If censorable is True, the revlog can have censored revisions.
277
277
278 If `upperboundcomp` is not None, this is the expected maximal gain from
278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 compression for the data content.
279 compression for the data content.
280
280
281 `concurrencychecker` is an optional function that receives 3 arguments: a
281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 file handle, a filename, and an expected position. It should check whether
282 file handle, a filename, and an expected position. It should check whether
283 the current position in the file handle is valid, and log/warn/fail (by
283 the current position in the file handle is valid, and log/warn/fail (by
284 raising).
284 raising).
285
285
286 See mercurial/revlogutils/contants.py for details about the content of an
286 See mercurial/revlogutils/contants.py for details about the content of an
287 index entry.
287 index entry.
288 """
288 """
289
289
290 _flagserrorclass = error.RevlogError
290 _flagserrorclass = error.RevlogError
291
291
292 @staticmethod
292 @staticmethod
293 def is_inline_index(header_bytes):
293 def is_inline_index(header_bytes):
294 """Determine if a revlog is inline from the initial bytes of the index"""
294 """Determine if a revlog is inline from the initial bytes of the index"""
295 header = INDEX_HEADER.unpack(header_bytes)[0]
295 header = INDEX_HEADER.unpack(header_bytes)[0]
296
296
297 _format_flags = header & ~0xFFFF
297 _format_flags = header & ~0xFFFF
298 _format_version = header & 0xFFFF
298 _format_version = header & 0xFFFF
299
299
300 features = FEATURES_BY_VERSION[_format_version]
300 features = FEATURES_BY_VERSION[_format_version]
301 return features[b'inline'](_format_flags)
301 return features[b'inline'](_format_flags)
302
302
303 def __init__(
303 def __init__(
304 self,
304 self,
305 opener,
305 opener,
306 target,
306 target,
307 radix,
307 radix,
308 postfix=None, # only exist for `tmpcensored` now
308 postfix=None, # only exist for `tmpcensored` now
309 checkambig=False,
309 checkambig=False,
310 mmaplargeindex=False,
310 mmaplargeindex=False,
311 censorable=False,
311 censorable=False,
312 upperboundcomp=None,
312 upperboundcomp=None,
313 persistentnodemap=False,
313 persistentnodemap=False,
314 concurrencychecker=None,
314 concurrencychecker=None,
315 trypending=False,
315 trypending=False,
316 try_split=False,
316 try_split=False,
317 canonical_parent_order=True,
317 canonical_parent_order=True,
318 ):
318 ):
319 """
319 """
320 create a revlog object
320 create a revlog object
321
321
322 opener is a function that abstracts the file opening operation
322 opener is a function that abstracts the file opening operation
323 and can be used to implement COW semantics or the like.
323 and can be used to implement COW semantics or the like.
324
324
325 `target`: a (KIND, ID) tuple that identify the content stored in
325 `target`: a (KIND, ID) tuple that identify the content stored in
326 this revlog. It help the rest of the code to understand what the revlog
326 this revlog. It help the rest of the code to understand what the revlog
327 is about without having to resort to heuristic and index filename
327 is about without having to resort to heuristic and index filename
328 analysis. Note: that this must be reliably be set by normal code, but
328 analysis. Note: that this must be reliably be set by normal code, but
329 that test, debug, or performance measurement code might not set this to
329 that test, debug, or performance measurement code might not set this to
330 accurate value.
330 accurate value.
331 """
331 """
332 self.upperboundcomp = upperboundcomp
332 self.upperboundcomp = upperboundcomp
333
333
334 self.radix = radix
334 self.radix = radix
335
335
336 self._docket_file = None
336 self._docket_file = None
337 self._indexfile = None
337 self._indexfile = None
338 self._datafile = None
338 self._datafile = None
339 self._sidedatafile = None
339 self._sidedatafile = None
340 self._nodemap_file = None
340 self._nodemap_file = None
341 self.postfix = postfix
341 self.postfix = postfix
342 self._trypending = trypending
342 self._trypending = trypending
343 self._try_split = try_split
343 self._try_split = try_split
344 self.opener = opener
344 self.opener = opener
345 if persistentnodemap:
345 if persistentnodemap:
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
347
347
348 assert target[0] in ALL_KINDS
348 assert target[0] in ALL_KINDS
349 assert len(target) == 2
349 assert len(target) == 2
350 self.target = target
350 self.target = target
351 # When True, indexfile is opened with checkambig=True at writing, to
351 # When True, indexfile is opened with checkambig=True at writing, to
352 # avoid file stat ambiguity.
352 # avoid file stat ambiguity.
353 self._checkambig = checkambig
353 self._checkambig = checkambig
354 self._mmaplargeindex = mmaplargeindex
354 self._mmaplargeindex = mmaplargeindex
355 self._censorable = censorable
355 self._censorable = censorable
356 # 3-tuple of (node, rev, text) for a raw revision.
356 # 3-tuple of (node, rev, text) for a raw revision.
357 self._revisioncache = None
357 self._revisioncache = None
358 # Maps rev to chain base rev.
358 # Maps rev to chain base rev.
359 self._chainbasecache = util.lrucachedict(100)
359 self._chainbasecache = util.lrucachedict(100)
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 self._chunkcache = (0, b'')
361 self._chunkcache = (0, b'')
362 # How much data to read and cache into the raw revlog data cache.
362 # How much data to read and cache into the raw revlog data cache.
363 self._chunkcachesize = 65536
363 self._chunkcachesize = 65536
364 self._maxchainlen = None
364 self._maxchainlen = None
365 self._deltabothparents = True
365 self._deltabothparents = True
366 self._candidate_group_chunk_size = 0
366 self._candidate_group_chunk_size = 0
367 self._debug_delta = False
367 self._debug_delta = False
368 self.index = None
368 self.index = None
369 self._docket = None
369 self._docket = None
370 self._nodemap_docket = None
370 self._nodemap_docket = None
371 # Mapping of partial identifiers to full nodes.
371 # Mapping of partial identifiers to full nodes.
372 self._pcache = {}
372 self._pcache = {}
373 # Mapping of revision integer to full node.
373 # Mapping of revision integer to full node.
374 self._compengine = b'zlib'
374 self._compengine = b'zlib'
375 self._compengineopts = {}
375 self._compengineopts = {}
376 self._maxdeltachainspan = -1
376 self._maxdeltachainspan = -1
377 self._withsparseread = False
377 self._withsparseread = False
378 self._sparserevlog = False
378 self._sparserevlog = False
379 self.hassidedata = False
379 self.hassidedata = False
380 self._srdensitythreshold = 0.50
380 self._srdensitythreshold = 0.50
381 self._srmingapsize = 262144
381 self._srmingapsize = 262144
382
382
383 # other optionnals features
383 # other optionnals features
384
384
385 # might remove rank configuration once the computation has no impact
385 # might remove rank configuration once the computation has no impact
386 self._compute_rank = False
386 self._compute_rank = False
387
387
388 # Make copy of flag processors so each revlog instance can support
388 # Make copy of flag processors so each revlog instance can support
389 # custom flags.
389 # custom flags.
390 self._flagprocessors = dict(flagutil.flagprocessors)
390 self._flagprocessors = dict(flagutil.flagprocessors)
391
391
392 # 3-tuple of file handles being used for active writing.
392 # 3-tuple of file handles being used for active writing.
393 self._writinghandles = None
393 self._writinghandles = None
394 # prevent nesting of addgroup
394 # prevent nesting of addgroup
395 self._adding_group = None
395 self._adding_group = None
396
396
397 self._loadindex()
397 self._loadindex()
398
398
399 self._concurrencychecker = concurrencychecker
399 self._concurrencychecker = concurrencychecker
400
400
401 # parent order is supposed to be semantically irrelevant, so we
401 # parent order is supposed to be semantically irrelevant, so we
402 # normally resort parents to ensure that the first parent is non-null,
402 # normally resort parents to ensure that the first parent is non-null,
403 # if there is a non-null parent at all.
403 # if there is a non-null parent at all.
404 # filelog abuses the parent order as flag to mark some instances of
404 # filelog abuses the parent order as flag to mark some instances of
405 # meta-encoded files, so allow it to disable this behavior.
405 # meta-encoded files, so allow it to disable this behavior.
406 self.canonical_parent_order = canonical_parent_order
406 self.canonical_parent_order = canonical_parent_order
407
407
408 def _init_opts(self):
408 def _init_opts(self):
409 """process options (from above/config) to setup associated default revlog mode
409 """process options (from above/config) to setup associated default revlog mode
410
410
411 These values might be affected when actually reading on disk information.
411 These values might be affected when actually reading on disk information.
412
412
413 The relevant values are returned for use in _loadindex().
413 The relevant values are returned for use in _loadindex().
414
414
415 * newversionflags:
415 * newversionflags:
416 version header to use if we need to create a new revlog
416 version header to use if we need to create a new revlog
417
417
418 * mmapindexthreshold:
418 * mmapindexthreshold:
419 minimal index size for start to use mmap
419 minimal index size for start to use mmap
420
420
421 * force_nodemap:
421 * force_nodemap:
422 force the usage of a "development" version of the nodemap code
422 force the usage of a "development" version of the nodemap code
423 """
423 """
424 mmapindexthreshold = None
424 mmapindexthreshold = None
425 opts = self.opener.options
425 opts = self.opener.options
426
426
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
428 new_header = CHANGELOGV2
428 new_header = CHANGELOGV2
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
430 elif b'revlogv2' in opts:
430 elif b'revlogv2' in opts:
431 new_header = REVLOGV2
431 new_header = REVLOGV2
432 elif b'revlogv1' in opts:
432 elif b'revlogv1' in opts:
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
434 if b'generaldelta' in opts:
434 if b'generaldelta' in opts:
435 new_header |= FLAG_GENERALDELTA
435 new_header |= FLAG_GENERALDELTA
436 elif b'revlogv0' in self.opener.options:
436 elif b'revlogv0' in self.opener.options:
437 new_header = REVLOGV0
437 new_header = REVLOGV0
438 else:
438 else:
439 new_header = REVLOG_DEFAULT_VERSION
439 new_header = REVLOG_DEFAULT_VERSION
440
440
441 if b'chunkcachesize' in opts:
441 if b'chunkcachesize' in opts:
442 self._chunkcachesize = opts[b'chunkcachesize']
442 self._chunkcachesize = opts[b'chunkcachesize']
443 if b'maxchainlen' in opts:
443 if b'maxchainlen' in opts:
444 self._maxchainlen = opts[b'maxchainlen']
444 self._maxchainlen = opts[b'maxchainlen']
445 if b'deltabothparents' in opts:
445 if b'deltabothparents' in opts:
446 self._deltabothparents = opts[b'deltabothparents']
446 self._deltabothparents = opts[b'deltabothparents']
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
448 if dps_cgds:
448 if dps_cgds:
449 self._candidate_group_chunk_size = dps_cgds
449 self._candidate_group_chunk_size = dps_cgds
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
451 self._lazydeltabase = False
451 self._lazydeltabase = False
452 if self._lazydelta:
452 if self._lazydelta:
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
454 if b'debug-delta' in opts:
454 if b'debug-delta' in opts:
455 self._debug_delta = opts[b'debug-delta']
455 self._debug_delta = opts[b'debug-delta']
456 if b'compengine' in opts:
456 if b'compengine' in opts:
457 self._compengine = opts[b'compengine']
457 self._compengine = opts[b'compengine']
458 if b'zlib.level' in opts:
458 if b'zlib.level' in opts:
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
460 if b'zstd.level' in opts:
460 if b'zstd.level' in opts:
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
462 if b'maxdeltachainspan' in opts:
462 if b'maxdeltachainspan' in opts:
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
465 mmapindexthreshold = opts[b'mmapindexthreshold']
465 mmapindexthreshold = opts[b'mmapindexthreshold']
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
468 # sparse-revlog forces sparse-read
468 # sparse-revlog forces sparse-read
469 self._withsparseread = self._sparserevlog or withsparseread
469 self._withsparseread = self._sparserevlog or withsparseread
470 if b'sparse-read-density-threshold' in opts:
470 if b'sparse-read-density-threshold' in opts:
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
472 if b'sparse-read-min-gap-size' in opts:
472 if b'sparse-read-min-gap-size' in opts:
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
474 if opts.get(b'enableellipsis'):
474 if opts.get(b'enableellipsis'):
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
476
476
477 # revlog v0 doesn't have flag processors
477 # revlog v0 doesn't have flag processors
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
480
480
481 if self._chunkcachesize <= 0:
481 if self._chunkcachesize <= 0:
482 raise error.RevlogError(
482 raise error.RevlogError(
483 _(b'revlog chunk cache size %r is not greater than 0')
483 _(b'revlog chunk cache size %r is not greater than 0')
484 % self._chunkcachesize
484 % self._chunkcachesize
485 )
485 )
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
487 raise error.RevlogError(
487 raise error.RevlogError(
488 _(b'revlog chunk cache size %r is not a power of 2')
488 _(b'revlog chunk cache size %r is not a power of 2')
489 % self._chunkcachesize
489 % self._chunkcachesize
490 )
490 )
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
492 return new_header, mmapindexthreshold, force_nodemap
492 return new_header, mmapindexthreshold, force_nodemap
493
493
494 def _get_data(self, filepath, mmap_threshold, size=None):
494 def _get_data(self, filepath, mmap_threshold, size=None):
495 """return a file content with or without mmap
495 """return a file content with or without mmap
496
496
497 If the file is missing return the empty string"""
497 If the file is missing return the empty string"""
498 try:
498 try:
499 with self.opener(filepath) as fp:
499 with self.opener(filepath) as fp:
500 if mmap_threshold is not None:
500 if mmap_threshold is not None:
501 file_size = self.opener.fstat(fp).st_size
501 file_size = self.opener.fstat(fp).st_size
502 if file_size >= mmap_threshold:
502 if file_size >= mmap_threshold:
503 if size is not None:
503 if size is not None:
504 # avoid potentiel mmap crash
504 # avoid potentiel mmap crash
505 size = min(file_size, size)
505 size = min(file_size, size)
506 # TODO: should .close() to release resources without
506 # TODO: should .close() to release resources without
507 # relying on Python GC
507 # relying on Python GC
508 if size is None:
508 if size is None:
509 return util.buffer(util.mmapread(fp))
509 return util.buffer(util.mmapread(fp))
510 else:
510 else:
511 return util.buffer(util.mmapread(fp, size))
511 return util.buffer(util.mmapread(fp, size))
512 if size is None:
512 if size is None:
513 return fp.read()
513 return fp.read()
514 else:
514 else:
515 return fp.read(size)
515 return fp.read(size)
516 except FileNotFoundError:
516 except FileNotFoundError:
517 return b''
517 return b''
518
518
519 def get_streams(self, max_linkrev, force_inline=False):
519 def get_streams(self, max_linkrev, force_inline=False):
520 """return a list of streams that represent this revlog
520 """return a list of streams that represent this revlog
521
521
522 This is used by stream-clone to do bytes to bytes copies of a repository.
522 This is used by stream-clone to do bytes to bytes copies of a repository.
523
523
524 This streams data for all revisions that refer to a changelog revision up
524 This streams data for all revisions that refer to a changelog revision up
525 to `max_linkrev`.
525 to `max_linkrev`.
526
526
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
528
528
529 It returns is a list of three-tuple:
529 It returns is a list of three-tuple:
530
530
531 [
531 [
532 (filename, bytes_stream, stream_size),
532 (filename, bytes_stream, stream_size),
533 …
533 …
534 ]
534 ]
535 """
535 """
536 n = len(self)
536 n = len(self)
537 index = self.index
537 index = self.index
538 while n > 0:
538 while n > 0:
539 linkrev = index[n - 1][4]
539 linkrev = index[n - 1][4]
540 if linkrev < max_linkrev:
540 if linkrev < max_linkrev:
541 break
541 break
542 # note: this loop will rarely go through multiple iterations, since
542 # note: this loop will rarely go through multiple iterations, since
543 # it only traverses commits created during the current streaming
543 # it only traverses commits created during the current streaming
544 # pull operation.
544 # pull operation.
545 #
545 #
546 # If this become a problem, using a binary search should cap the
546 # If this become a problem, using a binary search should cap the
547 # runtime of this.
547 # runtime of this.
548 n = n - 1
548 n = n - 1
549 if n == 0:
549 if n == 0:
550 # no data to send
550 # no data to send
551 return []
551 return []
552 index_size = n * index.entry_size
552 index_size = n * index.entry_size
553 data_size = self.end(n - 1)
553 data_size = self.end(n - 1)
554
554
555 # XXX we might have been split (or stripped) since the object
555 # XXX we might have been split (or stripped) since the object
556 # initialization, We need to close this race too, but having a way to
556 # initialization, We need to close this race too, but having a way to
557 # pre-open the file we feed to the revlog and never closing them before
557 # pre-open the file we feed to the revlog and never closing them before
558 # we are done streaming.
558 # we are done streaming.
559
559
560 if self._inline:
560 if self._inline:
561
561
562 def get_stream():
562 def get_stream():
563 with self._indexfp() as fp:
563 with self._indexfp() as fp:
564 yield None
564 yield None
565 size = index_size + data_size
565 size = index_size + data_size
566 if size <= 65536:
566 if size <= 65536:
567 yield fp.read(size)
567 yield fp.read(size)
568 else:
568 else:
569 yield from util.filechunkiter(fp, limit=size)
569 yield from util.filechunkiter(fp, limit=size)
570
570
571 inline_stream = get_stream()
571 inline_stream = get_stream()
572 next(inline_stream)
572 next(inline_stream)
573 return [
573 return [
574 (self._indexfile, inline_stream, index_size + data_size),
574 (self._indexfile, inline_stream, index_size + data_size),
575 ]
575 ]
576 elif force_inline:
576 elif force_inline:
577
577
578 def get_stream():
578 def get_stream():
579 with self.reading():
579 with self.reading():
580 yield None
580 yield None
581
581
582 for rev in range(n):
582 for rev in range(n):
583 idx = self.index.entry_binary(rev)
583 idx = self.index.entry_binary(rev)
584 if rev == 0 and self._docket is None:
584 if rev == 0 and self._docket is None:
585 # re-inject the inline flag
585 # re-inject the inline flag
586 header = self._format_flags
586 header = self._format_flags
587 header |= self._format_version
587 header |= self._format_version
588 header |= FLAG_INLINE_DATA
588 header |= FLAG_INLINE_DATA
589 header = self.index.pack_header(header)
589 header = self.index.pack_header(header)
590 idx = header + idx
590 idx = header + idx
591 yield idx
591 yield idx
592 yield self._getsegmentforrevs(rev, rev)[1]
592 yield self._getsegmentforrevs(rev, rev)[1]
593
593
594 inline_stream = get_stream()
594 inline_stream = get_stream()
595 next(inline_stream)
595 next(inline_stream)
596 return [
596 return [
597 (self._indexfile, inline_stream, index_size + data_size),
597 (self._indexfile, inline_stream, index_size + data_size),
598 ]
598 ]
599 else:
599 else:
600
600
601 def get_index_stream():
601 def get_index_stream():
602 with self._indexfp() as fp:
602 with self._indexfp() as fp:
603 yield None
603 yield None
604 if index_size <= 65536:
604 if index_size <= 65536:
605 yield fp.read(index_size)
605 yield fp.read(index_size)
606 else:
606 else:
607 yield from util.filechunkiter(fp, limit=index_size)
607 yield from util.filechunkiter(fp, limit=index_size)
608
608
609 def get_data_stream():
609 def get_data_stream():
610 with self._datafp() as fp:
610 with self._datafp() as fp:
611 yield None
611 yield None
612 if data_size <= 65536:
612 if data_size <= 65536:
613 yield fp.read(data_size)
613 yield fp.read(data_size)
614 else:
614 else:
615 yield from util.filechunkiter(fp, limit=data_size)
615 yield from util.filechunkiter(fp, limit=data_size)
616
616
617 index_stream = get_index_stream()
617 index_stream = get_index_stream()
618 next(index_stream)
618 next(index_stream)
619 data_stream = get_data_stream()
619 data_stream = get_data_stream()
620 next(data_stream)
620 next(data_stream)
621 return [
621 return [
622 (self._datafile, data_stream, data_size),
622 (self._datafile, data_stream, data_size),
623 (self._indexfile, index_stream, index_size),
623 (self._indexfile, index_stream, index_size),
624 ]
624 ]
625
625
626 def _loadindex(self, docket=None):
626 def _loadindex(self, docket=None):
627
627
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
629
629
630 if self.postfix is not None:
630 if self.postfix is not None:
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
633 entry_point = b'%s.i.a' % self.radix
633 entry_point = b'%s.i.a' % self.radix
634 elif self._try_split and self.opener.exists(self._split_index_file):
634 elif self._try_split and self.opener.exists(self._split_index_file):
635 entry_point = self._split_index_file
635 entry_point = self._split_index_file
636 else:
636 else:
637 entry_point = b'%s.i' % self.radix
637 entry_point = b'%s.i' % self.radix
638
638
639 if docket is not None:
639 if docket is not None:
640 self._docket = docket
640 self._docket = docket
641 self._docket_file = entry_point
641 self._docket_file = entry_point
642 else:
642 else:
643 self._initempty = True
643 self._initempty = True
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
645 if len(entry_data) > 0:
645 if len(entry_data) > 0:
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
647 self._initempty = False
647 self._initempty = False
648 else:
648 else:
649 header = new_header
649 header = new_header
650
650
651 self._format_flags = header & ~0xFFFF
651 self._format_flags = header & ~0xFFFF
652 self._format_version = header & 0xFFFF
652 self._format_version = header & 0xFFFF
653
653
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
655 if supported_flags is None:
655 if supported_flags is None:
656 msg = _(b'unknown version (%d) in revlog %s')
656 msg = _(b'unknown version (%d) in revlog %s')
657 msg %= (self._format_version, self.display_id)
657 msg %= (self._format_version, self.display_id)
658 raise error.RevlogError(msg)
658 raise error.RevlogError(msg)
659 elif self._format_flags & ~supported_flags:
659 elif self._format_flags & ~supported_flags:
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
661 display_flag = self._format_flags >> 16
661 display_flag = self._format_flags >> 16
662 msg %= (display_flag, self._format_version, self.display_id)
662 msg %= (display_flag, self._format_version, self.display_id)
663 raise error.RevlogError(msg)
663 raise error.RevlogError(msg)
664
664
665 features = FEATURES_BY_VERSION[self._format_version]
665 features = FEATURES_BY_VERSION[self._format_version]
666 self._inline = features[b'inline'](self._format_flags)
666 self._inline = features[b'inline'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
668 self.hassidedata = features[b'sidedata']
668 self.hassidedata = features[b'sidedata']
669
669
670 if not features[b'docket']:
670 if not features[b'docket']:
671 self._indexfile = entry_point
671 self._indexfile = entry_point
672 index_data = entry_data
672 index_data = entry_data
673 else:
673 else:
674 self._docket_file = entry_point
674 self._docket_file = entry_point
675 if self._initempty:
675 if self._initempty:
676 self._docket = docketutil.default_docket(self, header)
676 self._docket = docketutil.default_docket(self, header)
677 else:
677 else:
678 self._docket = docketutil.parse_docket(
678 self._docket = docketutil.parse_docket(
679 self, entry_data, use_pending=self._trypending
679 self, entry_data, use_pending=self._trypending
680 )
680 )
681
681
682 if self._docket is not None:
682 if self._docket is not None:
683 self._indexfile = self._docket.index_filepath()
683 self._indexfile = self._docket.index_filepath()
684 index_data = b''
684 index_data = b''
685 index_size = self._docket.index_end
685 index_size = self._docket.index_end
686 if index_size > 0:
686 if index_size > 0:
687 index_data = self._get_data(
687 index_data = self._get_data(
688 self._indexfile, mmapindexthreshold, size=index_size
688 self._indexfile, mmapindexthreshold, size=index_size
689 )
689 )
690 if len(index_data) < index_size:
690 if len(index_data) < index_size:
691 msg = _(b'too few index data for %s: got %d, expected %d')
691 msg = _(b'too few index data for %s: got %d, expected %d')
692 msg %= (self.display_id, len(index_data), index_size)
692 msg %= (self.display_id, len(index_data), index_size)
693 raise error.RevlogError(msg)
693 raise error.RevlogError(msg)
694
694
695 self._inline = False
695 self._inline = False
696 # generaldelta implied by version 2 revlogs.
696 # generaldelta implied by version 2 revlogs.
697 self._generaldelta = True
697 self._generaldelta = True
698 # the logic for persistent nodemap will be dealt with within the
698 # the logic for persistent nodemap will be dealt with within the
699 # main docket, so disable it for now.
699 # main docket, so disable it for now.
700 self._nodemap_file = None
700 self._nodemap_file = None
701
701
702 if self._docket is not None:
702 if self._docket is not None:
703 self._datafile = self._docket.data_filepath()
703 self._datafile = self._docket.data_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
705 elif self.postfix is None:
705 elif self.postfix is None:
706 self._datafile = b'%s.d' % self.radix
706 self._datafile = b'%s.d' % self.radix
707 else:
707 else:
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
709
709
710 self.nodeconstants = sha1nodeconstants
710 self.nodeconstants = sha1nodeconstants
711 self.nullid = self.nodeconstants.nullid
711 self.nullid = self.nodeconstants.nullid
712
712
713 # sparse-revlog can't be on without general-delta (issue6056)
713 # sparse-revlog can't be on without general-delta (issue6056)
714 if not self._generaldelta:
714 if not self._generaldelta:
715 self._sparserevlog = False
715 self._sparserevlog = False
716
716
717 self._storedeltachains = True
717 self._storedeltachains = True
718
718
719 devel_nodemap = (
719 devel_nodemap = (
720 self._nodemap_file
720 self._nodemap_file
721 and force_nodemap
721 and force_nodemap
722 and parse_index_v1_nodemap is not None
722 and parse_index_v1_nodemap is not None
723 )
723 )
724
724
725 use_rust_index = False
725 use_rust_index = False
726 if rustrevlog is not None:
726 if rustrevlog is not None:
727 if self._nodemap_file is not None:
727 if self._nodemap_file is not None:
728 use_rust_index = True
728 use_rust_index = True
729 else:
729 else:
730 use_rust_index = self.opener.options.get(b'rust.index')
730 use_rust_index = self.opener.options.get(b'rust.index')
731
731
732 self._parse_index = parse_index_v1
732 self._parse_index = parse_index_v1
733 if self._format_version == REVLOGV0:
733 if self._format_version == REVLOGV0:
734 self._parse_index = revlogv0.parse_index_v0
734 self._parse_index = revlogv0.parse_index_v0
735 elif self._format_version == REVLOGV2:
735 elif self._format_version == REVLOGV2:
736 self._parse_index = parse_index_v2
736 self._parse_index = parse_index_v2
737 elif self._format_version == CHANGELOGV2:
737 elif self._format_version == CHANGELOGV2:
738 self._parse_index = parse_index_cl_v2
738 self._parse_index = parse_index_cl_v2
739 elif devel_nodemap:
739 elif devel_nodemap:
740 self._parse_index = parse_index_v1_nodemap
740 self._parse_index = parse_index_v1_nodemap
741 elif use_rust_index:
741 elif use_rust_index:
742 self._parse_index = parse_index_v1_mixed
742 self._parse_index = parse_index_v1_mixed
743 try:
743 try:
744 d = self._parse_index(index_data, self._inline)
744 d = self._parse_index(index_data, self._inline)
745 index, chunkcache = d
745 index, chunkcache = d
746 use_nodemap = (
746 use_nodemap = (
747 not self._inline
747 not self._inline
748 and self._nodemap_file is not None
748 and self._nodemap_file is not None
749 and hasattr(index, 'update_nodemap_data')
749 and hasattr(index, 'update_nodemap_data')
750 )
750 )
751 if use_nodemap:
751 if use_nodemap:
752 nodemap_data = nodemaputil.persisted_data(self)
752 nodemap_data = nodemaputil.persisted_data(self)
753 if nodemap_data is not None:
753 if nodemap_data is not None:
754 docket = nodemap_data[0]
754 docket = nodemap_data[0]
755 if (
755 if (
756 len(d[0]) > docket.tip_rev
756 len(d[0]) > docket.tip_rev
757 and d[0][docket.tip_rev][7] == docket.tip_node
757 and d[0][docket.tip_rev][7] == docket.tip_node
758 ):
758 ):
759 # no changelog tampering
759 # no changelog tampering
760 self._nodemap_docket = docket
760 self._nodemap_docket = docket
761 index.update_nodemap_data(*nodemap_data)
761 index.update_nodemap_data(*nodemap_data)
762 except (ValueError, IndexError):
762 except (ValueError, IndexError):
763 raise error.RevlogError(
763 raise error.RevlogError(
764 _(b"index %s is corrupted") % self.display_id
764 _(b"index %s is corrupted") % self.display_id
765 )
765 )
766 self.index = index
766 self.index = index
767 self._segmentfile = randomaccessfile.randomaccessfile(
767 self._segmentfile = randomaccessfile.randomaccessfile(
768 self.opener,
768 self.opener,
769 (self._indexfile if self._inline else self._datafile),
769 (self._indexfile if self._inline else self._datafile),
770 self._chunkcachesize,
770 self._chunkcachesize,
771 chunkcache,
771 chunkcache,
772 )
772 )
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
774 self.opener,
774 self.opener,
775 self._sidedatafile,
775 self._sidedatafile,
776 self._chunkcachesize,
776 self._chunkcachesize,
777 )
777 )
778 # revnum -> (chain-length, sum-delta-length)
778 # revnum -> (chain-length, sum-delta-length)
779 self._chaininfocache = util.lrucachedict(500)
779 self._chaininfocache = util.lrucachedict(500)
780 # revlog header -> revlog compressor
780 # revlog header -> revlog compressor
781 self._decompressors = {}
781 self._decompressors = {}
782
782
783 def get_revlog(self):
783 def get_revlog(self):
784 """simple function to mirror API of other not-really-revlog API"""
784 """simple function to mirror API of other not-really-revlog API"""
785 return self
785 return self
786
786
787 @util.propertycache
787 @util.propertycache
788 def revlog_kind(self):
788 def revlog_kind(self):
789 return self.target[0]
789 return self.target[0]
790
790
791 @util.propertycache
791 @util.propertycache
792 def display_id(self):
792 def display_id(self):
793 """The public facing "ID" of the revlog that we use in message"""
793 """The public facing "ID" of the revlog that we use in message"""
794 if self.revlog_kind == KIND_FILELOG:
794 if self.revlog_kind == KIND_FILELOG:
795 # Reference the file without the "data/" prefix, so it is familiar
795 # Reference the file without the "data/" prefix, so it is familiar
796 # to the user.
796 # to the user.
797 return self.target[1]
797 return self.target[1]
798 else:
798 else:
799 return self.radix
799 return self.radix
800
800
801 def _get_decompressor(self, t):
801 def _get_decompressor(self, t):
802 try:
802 try:
803 compressor = self._decompressors[t]
803 compressor = self._decompressors[t]
804 except KeyError:
804 except KeyError:
805 try:
805 try:
806 engine = util.compengines.forrevlogheader(t)
806 engine = util.compengines.forrevlogheader(t)
807 compressor = engine.revlogcompressor(self._compengineopts)
807 compressor = engine.revlogcompressor(self._compengineopts)
808 self._decompressors[t] = compressor
808 self._decompressors[t] = compressor
809 except KeyError:
809 except KeyError:
810 raise error.RevlogError(
810 raise error.RevlogError(
811 _(b'unknown compression type %s') % binascii.hexlify(t)
811 _(b'unknown compression type %s') % binascii.hexlify(t)
812 )
812 )
813 return compressor
813 return compressor
814
814
815 @util.propertycache
815 @util.propertycache
816 def _compressor(self):
816 def _compressor(self):
817 engine = util.compengines[self._compengine]
817 engine = util.compengines[self._compengine]
818 return engine.revlogcompressor(self._compengineopts)
818 return engine.revlogcompressor(self._compengineopts)
819
819
820 @util.propertycache
820 @util.propertycache
821 def _decompressor(self):
821 def _decompressor(self):
822 """the default decompressor"""
822 """the default decompressor"""
823 if self._docket is None:
823 if self._docket is None:
824 return None
824 return None
825 t = self._docket.default_compression_header
825 t = self._docket.default_compression_header
826 c = self._get_decompressor(t)
826 c = self._get_decompressor(t)
827 return c.decompress
827 return c.decompress
828
828
829 def _indexfp(self):
829 def _indexfp(self):
830 """file object for the revlog's index file"""
830 """file object for the revlog's index file"""
831 return self.opener(self._indexfile, mode=b"r")
831 return self.opener(self._indexfile, mode=b"r")
832
832
833 def __index_write_fp(self):
833 def __index_write_fp(self):
834 # You should not use this directly and use `_writing` instead
834 # You should not use this directly and use `_writing` instead
835 try:
835 try:
836 f = self.opener(
836 f = self.opener(
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
838 )
838 )
839 if self._docket is None:
839 if self._docket is None:
840 f.seek(0, os.SEEK_END)
840 f.seek(0, os.SEEK_END)
841 else:
841 else:
842 f.seek(self._docket.index_end, os.SEEK_SET)
842 f.seek(self._docket.index_end, os.SEEK_SET)
843 return f
843 return f
844 except FileNotFoundError:
844 except FileNotFoundError:
845 return self.opener(
845 return self.opener(
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
847 )
847 )
848
848
849 def __index_new_fp(self):
849 def __index_new_fp(self):
850 # You should not use this unless you are upgrading from inline revlog
850 # You should not use this unless you are upgrading from inline revlog
851 return self.opener(
851 return self.opener(
852 self._indexfile,
852 self._indexfile,
853 mode=b"w",
853 mode=b"w",
854 checkambig=self._checkambig,
854 checkambig=self._checkambig,
855 atomictemp=True,
855 atomictemp=True,
856 )
856 )
857
857
858 def _datafp(self, mode=b'r'):
858 def _datafp(self, mode=b'r'):
859 """file object for the revlog's data file"""
859 """file object for the revlog's data file"""
860 return self.opener(self._datafile, mode=mode)
860 return self.opener(self._datafile, mode=mode)
861
861
862 @contextlib.contextmanager
862 @contextlib.contextmanager
863 def _sidedatareadfp(self):
863 def _sidedatareadfp(self):
864 """file object suitable to read sidedata"""
864 """file object suitable to read sidedata"""
865 if self._writinghandles:
865 if self._writinghandles:
866 yield self._writinghandles[2]
866 yield self._writinghandles[2]
867 else:
867 else:
868 with self.opener(self._sidedatafile) as fp:
868 with self.opener(self._sidedatafile) as fp:
869 yield fp
869 yield fp
870
870
871 def tiprev(self):
871 def tiprev(self):
872 return len(self.index) - 1
872 return len(self.index) - 1
873
873
874 def tip(self):
874 def tip(self):
875 return self.node(self.tiprev())
875 return self.node(self.tiprev())
876
876
877 def __contains__(self, rev):
877 def __contains__(self, rev):
878 return 0 <= rev < len(self)
878 return 0 <= rev < len(self)
879
879
880 def __len__(self):
880 def __len__(self):
881 return len(self.index)
881 return len(self.index)
882
882
883 def __iter__(self):
883 def __iter__(self):
884 return iter(range(len(self)))
884 return iter(range(len(self)))
885
885
886 def revs(self, start=0, stop=None):
886 def revs(self, start=0, stop=None):
887 """iterate over all rev in this revlog (from start to stop)"""
887 """iterate over all rev in this revlog (from start to stop)"""
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
889
889
890 def hasnode(self, node):
890 def hasnode(self, node):
891 try:
891 try:
892 self.rev(node)
892 self.rev(node)
893 return True
893 return True
894 except KeyError:
894 except KeyError:
895 return False
895 return False
896
896
897 def _candelta(self, baserev, rev):
897 def _candelta(self, baserev, rev):
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
899 # Disable delta if either rev requires a content-changing flag
899 # Disable delta if either rev requires a content-changing flag
900 # processor (ex. LFS). This is because such flag processor can alter
900 # processor (ex. LFS). This is because such flag processor can alter
901 # the rawtext content that the delta will be based on, and two clients
901 # the rawtext content that the delta will be based on, and two clients
902 # could have a same revlog node with different flags (i.e. different
902 # could have a same revlog node with different flags (i.e. different
903 # rawtext contents) and the delta could be incompatible.
903 # rawtext contents) and the delta could be incompatible.
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
906 ):
906 ):
907 return False
907 return False
908 return True
908 return True
909
909
910 def update_caches(self, transaction):
910 def update_caches(self, transaction):
911 """update on disk cache
911 """update on disk cache
912
912
913 If a transaction is passed, the update may be delayed to transaction
913 If a transaction is passed, the update may be delayed to transaction
914 commit."""
914 commit."""
915 if self._nodemap_file is not None:
915 if self._nodemap_file is not None:
916 if transaction is None:
916 if transaction is None:
917 nodemaputil.update_persistent_nodemap(self)
917 nodemaputil.update_persistent_nodemap(self)
918 else:
918 else:
919 nodemaputil.setup_persistent_nodemap(transaction, self)
919 nodemaputil.setup_persistent_nodemap(transaction, self)
920
920
921 def clearcaches(self):
921 def clearcaches(self):
922 """Clear in-memory caches"""
922 """Clear in-memory caches"""
923 self._revisioncache = None
923 self._revisioncache = None
924 self._chainbasecache.clear()
924 self._chainbasecache.clear()
925 self._segmentfile.clear_cache()
925 self._segmentfile.clear_cache()
926 self._segmentfile_sidedata.clear_cache()
926 self._segmentfile_sidedata.clear_cache()
927 self._pcache = {}
927 self._pcache = {}
928 self._nodemap_docket = None
928 self._nodemap_docket = None
929 self.index.clearcaches()
929 self.index.clearcaches()
930 # The python code is the one responsible for validating the docket, we
930 # The python code is the one responsible for validating the docket, we
931 # end up having to refresh it here.
931 # end up having to refresh it here.
932 use_nodemap = (
932 use_nodemap = (
933 not self._inline
933 not self._inline
934 and self._nodemap_file is not None
934 and self._nodemap_file is not None
935 and hasattr(self.index, 'update_nodemap_data')
935 and hasattr(self.index, 'update_nodemap_data')
936 )
936 )
937 if use_nodemap:
937 if use_nodemap:
938 nodemap_data = nodemaputil.persisted_data(self)
938 nodemap_data = nodemaputil.persisted_data(self)
939 if nodemap_data is not None:
939 if nodemap_data is not None:
940 self._nodemap_docket = nodemap_data[0]
940 self._nodemap_docket = nodemap_data[0]
941 self.index.update_nodemap_data(*nodemap_data)
941 self.index.update_nodemap_data(*nodemap_data)
942
942
943 def rev(self, node):
943 def rev(self, node):
944 """return the revision number associated with a <nodeid>"""
944 """return the revision number associated with a <nodeid>"""
945 try:
945 try:
946 return self.index.rev(node)
946 return self.index.rev(node)
947 except TypeError:
947 except TypeError:
948 raise
948 raise
949 except error.RevlogError:
949 except error.RevlogError:
950 # parsers.c radix tree lookup failed
950 # parsers.c radix tree lookup failed
951 if (
951 if (
952 node == self.nodeconstants.wdirid
952 node == self.nodeconstants.wdirid
953 or node in self.nodeconstants.wdirfilenodeids
953 or node in self.nodeconstants.wdirfilenodeids
954 ):
954 ):
955 raise error.WdirUnsupported
955 raise error.WdirUnsupported
956 raise error.LookupError(node, self.display_id, _(b'no node'))
956 raise error.LookupError(node, self.display_id, _(b'no node'))
957
957
958 # Accessors for index entries.
958 # Accessors for index entries.
959
959
960 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
960 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
961 # are flags.
961 # are flags.
962 def start(self, rev):
962 def start(self, rev):
963 return int(self.index[rev][0] >> 16)
963 return int(self.index[rev][0] >> 16)
964
964
965 def sidedata_cut_off(self, rev):
965 def sidedata_cut_off(self, rev):
966 sd_cut_off = self.index[rev][8]
966 sd_cut_off = self.index[rev][8]
967 if sd_cut_off != 0:
967 if sd_cut_off != 0:
968 return sd_cut_off
968 return sd_cut_off
969 # This is some annoying dance, because entries without sidedata
969 # This is some annoying dance, because entries without sidedata
970 # currently use 0 as their ofsset. (instead of previous-offset +
970 # currently use 0 as their ofsset. (instead of previous-offset +
971 # previous-size)
971 # previous-size)
972 #
972 #
973 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
973 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
974 # In the meantime, we need this.
974 # In the meantime, we need this.
975 while 0 <= rev:
975 while 0 <= rev:
976 e = self.index[rev]
976 e = self.index[rev]
977 if e[9] != 0:
977 if e[9] != 0:
978 return e[8] + e[9]
978 return e[8] + e[9]
979 rev -= 1
979 rev -= 1
980 return 0
980 return 0
981
981
982 def flags(self, rev):
982 def flags(self, rev):
983 return self.index[rev][0] & 0xFFFF
983 return self.index[rev][0] & 0xFFFF
984
984
985 def length(self, rev):
985 def length(self, rev):
986 return self.index[rev][1]
986 return self.index[rev][1]
987
987
988 def sidedata_length(self, rev):
988 def sidedata_length(self, rev):
989 if not self.hassidedata:
989 if not self.hassidedata:
990 return 0
990 return 0
991 return self.index[rev][9]
991 return self.index[rev][9]
992
992
993 def rawsize(self, rev):
993 def rawsize(self, rev):
994 """return the length of the uncompressed text for a given revision"""
994 """return the length of the uncompressed text for a given revision"""
995 l = self.index[rev][2]
995 l = self.index[rev][2]
996 if l >= 0:
996 if l >= 0:
997 return l
997 return l
998
998
999 t = self.rawdata(rev)
999 t = self.rawdata(rev)
1000 return len(t)
1000 return len(t)
1001
1001
1002 def size(self, rev):
1002 def size(self, rev):
1003 """length of non-raw text (processed by a "read" flag processor)"""
1003 """length of non-raw text (processed by a "read" flag processor)"""
1004 # fast path: if no "read" flag processor could change the content,
1004 # fast path: if no "read" flag processor could change the content,
1005 # size is rawsize. note: ELLIPSIS is known to not change the content.
1005 # size is rawsize. note: ELLIPSIS is known to not change the content.
1006 flags = self.flags(rev)
1006 flags = self.flags(rev)
1007 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1007 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1008 return self.rawsize(rev)
1008 return self.rawsize(rev)
1009
1009
1010 return len(self.revision(rev))
1010 return len(self.revision(rev))
1011
1011
1012 def fast_rank(self, rev):
1012 def fast_rank(self, rev):
1013 """Return the rank of a revision if already known, or None otherwise.
1013 """Return the rank of a revision if already known, or None otherwise.
1014
1014
1015 The rank of a revision is the size of the sub-graph it defines as a
1015 The rank of a revision is the size of the sub-graph it defines as a
1016 head. Equivalently, the rank of a revision `r` is the size of the set
1016 head. Equivalently, the rank of a revision `r` is the size of the set
1017 `ancestors(r)`, `r` included.
1017 `ancestors(r)`, `r` included.
1018
1018
1019 This method returns the rank retrieved from the revlog in constant
1019 This method returns the rank retrieved from the revlog in constant
1020 time. It makes no attempt at computing unknown values for versions of
1020 time. It makes no attempt at computing unknown values for versions of
1021 the revlog which do not persist the rank.
1021 the revlog which do not persist the rank.
1022 """
1022 """
1023 rank = self.index[rev][ENTRY_RANK]
1023 rank = self.index[rev][ENTRY_RANK]
1024 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1024 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1025 return None
1025 return None
1026 if rev == nullrev:
1026 if rev == nullrev:
1027 return 0 # convention
1027 return 0 # convention
1028 return rank
1028 return rank
1029
1029
1030 def chainbase(self, rev):
1030 def chainbase(self, rev):
1031 base = self._chainbasecache.get(rev)
1031 base = self._chainbasecache.get(rev)
1032 if base is not None:
1032 if base is not None:
1033 return base
1033 return base
1034
1034
1035 index = self.index
1035 index = self.index
1036 iterrev = rev
1036 iterrev = rev
1037 base = index[iterrev][3]
1037 base = index[iterrev][3]
1038 while base != iterrev:
1038 while base != iterrev:
1039 iterrev = base
1039 iterrev = base
1040 base = index[iterrev][3]
1040 base = index[iterrev][3]
1041
1041
1042 self._chainbasecache[rev] = base
1042 self._chainbasecache[rev] = base
1043 return base
1043 return base
1044
1044
1045 def linkrev(self, rev):
1045 def linkrev(self, rev):
1046 return self.index[rev][4]
1046 return self.index[rev][4]
1047
1047
1048 def parentrevs(self, rev):
1048 def parentrevs(self, rev):
1049 try:
1049 try:
1050 entry = self.index[rev]
1050 entry = self.index[rev]
1051 except IndexError:
1051 except IndexError:
1052 if rev == wdirrev:
1052 if rev == wdirrev:
1053 raise error.WdirUnsupported
1053 raise error.WdirUnsupported
1054 raise
1054 raise
1055
1055
1056 if self.canonical_parent_order and entry[5] == nullrev:
1056 if self.canonical_parent_order and entry[5] == nullrev:
1057 return entry[6], entry[5]
1057 return entry[6], entry[5]
1058 else:
1058 else:
1059 return entry[5], entry[6]
1059 return entry[5], entry[6]
1060
1060
1061 # fast parentrevs(rev) where rev isn't filtered
1061 # fast parentrevs(rev) where rev isn't filtered
1062 _uncheckedparentrevs = parentrevs
1062 _uncheckedparentrevs = parentrevs
1063
1063
1064 def node(self, rev):
1064 def node(self, rev):
1065 try:
1065 try:
1066 return self.index[rev][7]
1066 return self.index[rev][7]
1067 except IndexError:
1067 except IndexError:
1068 if rev == wdirrev:
1068 if rev == wdirrev:
1069 raise error.WdirUnsupported
1069 raise error.WdirUnsupported
1070 raise
1070 raise
1071
1071
1072 # Derived from index values.
1072 # Derived from index values.
1073
1073
1074 def end(self, rev):
1074 def end(self, rev):
1075 return self.start(rev) + self.length(rev)
1075 return self.start(rev) + self.length(rev)
1076
1076
1077 def parents(self, node):
1077 def parents(self, node):
1078 i = self.index
1078 i = self.index
1079 d = i[self.rev(node)]
1079 d = i[self.rev(node)]
1080 # inline node() to avoid function call overhead
1080 # inline node() to avoid function call overhead
1081 if self.canonical_parent_order and d[5] == self.nullid:
1081 if self.canonical_parent_order and d[5] == self.nullid:
1082 return i[d[6]][7], i[d[5]][7]
1082 return i[d[6]][7], i[d[5]][7]
1083 else:
1083 else:
1084 return i[d[5]][7], i[d[6]][7]
1084 return i[d[5]][7], i[d[6]][7]
1085
1085
1086 def chainlen(self, rev):
1086 def chainlen(self, rev):
1087 return self._chaininfo(rev)[0]
1087 return self._chaininfo(rev)[0]
1088
1088
1089 def _chaininfo(self, rev):
1089 def _chaininfo(self, rev):
1090 chaininfocache = self._chaininfocache
1090 chaininfocache = self._chaininfocache
1091 if rev in chaininfocache:
1091 if rev in chaininfocache:
1092 return chaininfocache[rev]
1092 return chaininfocache[rev]
1093 index = self.index
1093 index = self.index
1094 generaldelta = self._generaldelta
1094 generaldelta = self._generaldelta
1095 iterrev = rev
1095 iterrev = rev
1096 e = index[iterrev]
1096 e = index[iterrev]
1097 clen = 0
1097 clen = 0
1098 compresseddeltalen = 0
1098 compresseddeltalen = 0
1099 while iterrev != e[3]:
1099 while iterrev != e[3]:
1100 clen += 1
1100 clen += 1
1101 compresseddeltalen += e[1]
1101 compresseddeltalen += e[1]
1102 if generaldelta:
1102 if generaldelta:
1103 iterrev = e[3]
1103 iterrev = e[3]
1104 else:
1104 else:
1105 iterrev -= 1
1105 iterrev -= 1
1106 if iterrev in chaininfocache:
1106 if iterrev in chaininfocache:
1107 t = chaininfocache[iterrev]
1107 t = chaininfocache[iterrev]
1108 clen += t[0]
1108 clen += t[0]
1109 compresseddeltalen += t[1]
1109 compresseddeltalen += t[1]
1110 break
1110 break
1111 e = index[iterrev]
1111 e = index[iterrev]
1112 else:
1112 else:
1113 # Add text length of base since decompressing that also takes
1113 # Add text length of base since decompressing that also takes
1114 # work. For cache hits the length is already included.
1114 # work. For cache hits the length is already included.
1115 compresseddeltalen += e[1]
1115 compresseddeltalen += e[1]
1116 r = (clen, compresseddeltalen)
1116 r = (clen, compresseddeltalen)
1117 chaininfocache[rev] = r
1117 chaininfocache[rev] = r
1118 return r
1118 return r
1119
1119
1120 def _deltachain(self, rev, stoprev=None):
1120 def _deltachain(self, rev, stoprev=None):
1121 """Obtain the delta chain for a revision.
1121 """Obtain the delta chain for a revision.
1122
1122
1123 ``stoprev`` specifies a revision to stop at. If not specified, we
1123 ``stoprev`` specifies a revision to stop at. If not specified, we
1124 stop at the base of the chain.
1124 stop at the base of the chain.
1125
1125
1126 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1126 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1127 revs in ascending order and ``stopped`` is a bool indicating whether
1127 revs in ascending order and ``stopped`` is a bool indicating whether
1128 ``stoprev`` was hit.
1128 ``stoprev`` was hit.
1129 """
1129 """
1130 # Try C implementation.
1130 # Try C implementation.
1131 try:
1131 try:
1132 return self.index.deltachain(rev, stoprev, self._generaldelta)
1132 return self.index.deltachain(rev, stoprev, self._generaldelta)
1133 except AttributeError:
1133 except AttributeError:
1134 pass
1134 pass
1135
1135
1136 chain = []
1136 chain = []
1137
1137
1138 # Alias to prevent attribute lookup in tight loop.
1138 # Alias to prevent attribute lookup in tight loop.
1139 index = self.index
1139 index = self.index
1140 generaldelta = self._generaldelta
1140 generaldelta = self._generaldelta
1141
1141
1142 iterrev = rev
1142 iterrev = rev
1143 e = index[iterrev]
1143 e = index[iterrev]
1144 while iterrev != e[3] and iterrev != stoprev:
1144 while iterrev != e[3] and iterrev != stoprev:
1145 chain.append(iterrev)
1145 chain.append(iterrev)
1146 if generaldelta:
1146 if generaldelta:
1147 iterrev = e[3]
1147 iterrev = e[3]
1148 else:
1148 else:
1149 iterrev -= 1
1149 iterrev -= 1
1150 e = index[iterrev]
1150 e = index[iterrev]
1151
1151
1152 if iterrev == stoprev:
1152 if iterrev == stoprev:
1153 stopped = True
1153 stopped = True
1154 else:
1154 else:
1155 chain.append(iterrev)
1155 chain.append(iterrev)
1156 stopped = False
1156 stopped = False
1157
1157
1158 chain.reverse()
1158 chain.reverse()
1159 return chain, stopped
1159 return chain, stopped
1160
1160
1161 def ancestors(self, revs, stoprev=0, inclusive=False):
1161 def ancestors(self, revs, stoprev=0, inclusive=False):
1162 """Generate the ancestors of 'revs' in reverse revision order.
1162 """Generate the ancestors of 'revs' in reverse revision order.
1163 Does not generate revs lower than stoprev.
1163 Does not generate revs lower than stoprev.
1164
1164
1165 See the documentation for ancestor.lazyancestors for more details."""
1165 See the documentation for ancestor.lazyancestors for more details."""
1166
1166
1167 # first, make sure start revisions aren't filtered
1167 # first, make sure start revisions aren't filtered
1168 revs = list(revs)
1168 revs = list(revs)
1169 checkrev = self.node
1169 checkrev = self.node
1170 for r in revs:
1170 for r in revs:
1171 checkrev(r)
1171 checkrev(r)
1172 # and we're sure ancestors aren't filtered as well
1172 # and we're sure ancestors aren't filtered as well
1173
1173
1174 if rustancestor is not None and self.index.rust_ext_compat:
1174 if rustancestor is not None and self.index.rust_ext_compat:
1175 lazyancestors = rustancestor.LazyAncestors
1175 lazyancestors = rustancestor.LazyAncestors
1176 arg = self.index
1176 arg = self.index
1177 else:
1177 else:
1178 lazyancestors = ancestor.lazyancestors
1178 lazyancestors = ancestor.lazyancestors
1179 arg = self._uncheckedparentrevs
1179 arg = self._uncheckedparentrevs
1180 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1180 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1181
1181
1182 def descendants(self, revs):
1182 def descendants(self, revs):
1183 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1183 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1184
1184
1185 def findcommonmissing(self, common=None, heads=None):
1185 def findcommonmissing(self, common=None, heads=None):
1186 """Return a tuple of the ancestors of common and the ancestors of heads
1186 """Return a tuple of the ancestors of common and the ancestors of heads
1187 that are not ancestors of common. In revset terminology, we return the
1187 that are not ancestors of common. In revset terminology, we return the
1188 tuple:
1188 tuple:
1189
1189
1190 ::common, (::heads) - (::common)
1190 ::common, (::heads) - (::common)
1191
1191
1192 The list is sorted by revision number, meaning it is
1192 The list is sorted by revision number, meaning it is
1193 topologically sorted.
1193 topologically sorted.
1194
1194
1195 'heads' and 'common' are both lists of node IDs. If heads is
1195 'heads' and 'common' are both lists of node IDs. If heads is
1196 not supplied, uses all of the revlog's heads. If common is not
1196 not supplied, uses all of the revlog's heads. If common is not
1197 supplied, uses nullid."""
1197 supplied, uses nullid."""
1198 if common is None:
1198 if common is None:
1199 common = [self.nullid]
1199 common = [self.nullid]
1200 if heads is None:
1200 if heads is None:
1201 heads = self.heads()
1201 heads = self.heads()
1202
1202
1203 common = [self.rev(n) for n in common]
1203 common = [self.rev(n) for n in common]
1204 heads = [self.rev(n) for n in heads]
1204 heads = [self.rev(n) for n in heads]
1205
1205
1206 # we want the ancestors, but inclusive
1206 # we want the ancestors, but inclusive
1207 class lazyset:
1207 class lazyset:
1208 def __init__(self, lazyvalues):
1208 def __init__(self, lazyvalues):
1209 self.addedvalues = set()
1209 self.addedvalues = set()
1210 self.lazyvalues = lazyvalues
1210 self.lazyvalues = lazyvalues
1211
1211
1212 def __contains__(self, value):
1212 def __contains__(self, value):
1213 return value in self.addedvalues or value in self.lazyvalues
1213 return value in self.addedvalues or value in self.lazyvalues
1214
1214
1215 def __iter__(self):
1215 def __iter__(self):
1216 added = self.addedvalues
1216 added = self.addedvalues
1217 for r in added:
1217 for r in added:
1218 yield r
1218 yield r
1219 for r in self.lazyvalues:
1219 for r in self.lazyvalues:
1220 if not r in added:
1220 if not r in added:
1221 yield r
1221 yield r
1222
1222
1223 def add(self, value):
1223 def add(self, value):
1224 self.addedvalues.add(value)
1224 self.addedvalues.add(value)
1225
1225
1226 def update(self, values):
1226 def update(self, values):
1227 self.addedvalues.update(values)
1227 self.addedvalues.update(values)
1228
1228
1229 has = lazyset(self.ancestors(common))
1229 has = lazyset(self.ancestors(common))
1230 has.add(nullrev)
1230 has.add(nullrev)
1231 has.update(common)
1231 has.update(common)
1232
1232
1233 # take all ancestors from heads that aren't in has
1233 # take all ancestors from heads that aren't in has
1234 missing = set()
1234 missing = set()
1235 visit = collections.deque(r for r in heads if r not in has)
1235 visit = collections.deque(r for r in heads if r not in has)
1236 while visit:
1236 while visit:
1237 r = visit.popleft()
1237 r = visit.popleft()
1238 if r in missing:
1238 if r in missing:
1239 continue
1239 continue
1240 else:
1240 else:
1241 missing.add(r)
1241 missing.add(r)
1242 for p in self.parentrevs(r):
1242 for p in self.parentrevs(r):
1243 if p not in has:
1243 if p not in has:
1244 visit.append(p)
1244 visit.append(p)
1245 missing = list(missing)
1245 missing = list(missing)
1246 missing.sort()
1246 missing.sort()
1247 return has, [self.node(miss) for miss in missing]
1247 return has, [self.node(miss) for miss in missing]
1248
1248
1249 def incrementalmissingrevs(self, common=None):
1249 def incrementalmissingrevs(self, common=None):
1250 """Return an object that can be used to incrementally compute the
1250 """Return an object that can be used to incrementally compute the
1251 revision numbers of the ancestors of arbitrary sets that are not
1251 revision numbers of the ancestors of arbitrary sets that are not
1252 ancestors of common. This is an ancestor.incrementalmissingancestors
1252 ancestors of common. This is an ancestor.incrementalmissingancestors
1253 object.
1253 object.
1254
1254
1255 'common' is a list of revision numbers. If common is not supplied, uses
1255 'common' is a list of revision numbers. If common is not supplied, uses
1256 nullrev.
1256 nullrev.
1257 """
1257 """
1258 if common is None:
1258 if common is None:
1259 common = [nullrev]
1259 common = [nullrev]
1260
1260
1261 if rustancestor is not None and self.index.rust_ext_compat:
1261 if rustancestor is not None and self.index.rust_ext_compat:
1262 return rustancestor.MissingAncestors(self.index, common)
1262 return rustancestor.MissingAncestors(self.index, common)
1263 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1263 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1264
1264
1265 def findmissingrevs(self, common=None, heads=None):
1265 def findmissingrevs(self, common=None, heads=None):
1266 """Return the revision numbers of the ancestors of heads that
1266 """Return the revision numbers of the ancestors of heads that
1267 are not ancestors of common.
1267 are not ancestors of common.
1268
1268
1269 More specifically, return a list of revision numbers corresponding to
1269 More specifically, return a list of revision numbers corresponding to
1270 nodes N such that every N satisfies the following constraints:
1270 nodes N such that every N satisfies the following constraints:
1271
1271
1272 1. N is an ancestor of some node in 'heads'
1272 1. N is an ancestor of some node in 'heads'
1273 2. N is not an ancestor of any node in 'common'
1273 2. N is not an ancestor of any node in 'common'
1274
1274
1275 The list is sorted by revision number, meaning it is
1275 The list is sorted by revision number, meaning it is
1276 topologically sorted.
1276 topologically sorted.
1277
1277
1278 'heads' and 'common' are both lists of revision numbers. If heads is
1278 'heads' and 'common' are both lists of revision numbers. If heads is
1279 not supplied, uses all of the revlog's heads. If common is not
1279 not supplied, uses all of the revlog's heads. If common is not
1280 supplied, uses nullid."""
1280 supplied, uses nullid."""
1281 if common is None:
1281 if common is None:
1282 common = [nullrev]
1282 common = [nullrev]
1283 if heads is None:
1283 if heads is None:
1284 heads = self.headrevs()
1284 heads = self.headrevs()
1285
1285
1286 inc = self.incrementalmissingrevs(common=common)
1286 inc = self.incrementalmissingrevs(common=common)
1287 return inc.missingancestors(heads)
1287 return inc.missingancestors(heads)
1288
1288
1289 def findmissing(self, common=None, heads=None):
1289 def findmissing(self, common=None, heads=None):
1290 """Return the ancestors of heads that are not ancestors of common.
1290 """Return the ancestors of heads that are not ancestors of common.
1291
1291
1292 More specifically, return a list of nodes N such that every N
1292 More specifically, return a list of nodes N such that every N
1293 satisfies the following constraints:
1293 satisfies the following constraints:
1294
1294
1295 1. N is an ancestor of some node in 'heads'
1295 1. N is an ancestor of some node in 'heads'
1296 2. N is not an ancestor of any node in 'common'
1296 2. N is not an ancestor of any node in 'common'
1297
1297
1298 The list is sorted by revision number, meaning it is
1298 The list is sorted by revision number, meaning it is
1299 topologically sorted.
1299 topologically sorted.
1300
1300
1301 'heads' and 'common' are both lists of node IDs. If heads is
1301 'heads' and 'common' are both lists of node IDs. If heads is
1302 not supplied, uses all of the revlog's heads. If common is not
1302 not supplied, uses all of the revlog's heads. If common is not
1303 supplied, uses nullid."""
1303 supplied, uses nullid."""
1304 if common is None:
1304 if common is None:
1305 common = [self.nullid]
1305 common = [self.nullid]
1306 if heads is None:
1306 if heads is None:
1307 heads = self.heads()
1307 heads = self.heads()
1308
1308
1309 common = [self.rev(n) for n in common]
1309 common = [self.rev(n) for n in common]
1310 heads = [self.rev(n) for n in heads]
1310 heads = [self.rev(n) for n in heads]
1311
1311
1312 inc = self.incrementalmissingrevs(common=common)
1312 inc = self.incrementalmissingrevs(common=common)
1313 return [self.node(r) for r in inc.missingancestors(heads)]
1313 return [self.node(r) for r in inc.missingancestors(heads)]
1314
1314
1315 def nodesbetween(self, roots=None, heads=None):
1315 def nodesbetween(self, roots=None, heads=None):
1316 """Return a topological path from 'roots' to 'heads'.
1316 """Return a topological path from 'roots' to 'heads'.
1317
1317
1318 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1318 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1319 topologically sorted list of all nodes N that satisfy both of
1319 topologically sorted list of all nodes N that satisfy both of
1320 these constraints:
1320 these constraints:
1321
1321
1322 1. N is a descendant of some node in 'roots'
1322 1. N is a descendant of some node in 'roots'
1323 2. N is an ancestor of some node in 'heads'
1323 2. N is an ancestor of some node in 'heads'
1324
1324
1325 Every node is considered to be both a descendant and an ancestor
1325 Every node is considered to be both a descendant and an ancestor
1326 of itself, so every reachable node in 'roots' and 'heads' will be
1326 of itself, so every reachable node in 'roots' and 'heads' will be
1327 included in 'nodes'.
1327 included in 'nodes'.
1328
1328
1329 'outroots' is the list of reachable nodes in 'roots', i.e., the
1329 'outroots' is the list of reachable nodes in 'roots', i.e., the
1330 subset of 'roots' that is returned in 'nodes'. Likewise,
1330 subset of 'roots' that is returned in 'nodes'. Likewise,
1331 'outheads' is the subset of 'heads' that is also in 'nodes'.
1331 'outheads' is the subset of 'heads' that is also in 'nodes'.
1332
1332
1333 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1333 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1334 unspecified, uses nullid as the only root. If 'heads' is
1334 unspecified, uses nullid as the only root. If 'heads' is
1335 unspecified, uses list of all of the revlog's heads."""
1335 unspecified, uses list of all of the revlog's heads."""
1336 nonodes = ([], [], [])
1336 nonodes = ([], [], [])
1337 if roots is not None:
1337 if roots is not None:
1338 roots = list(roots)
1338 roots = list(roots)
1339 if not roots:
1339 if not roots:
1340 return nonodes
1340 return nonodes
1341 lowestrev = min([self.rev(n) for n in roots])
1341 lowestrev = min([self.rev(n) for n in roots])
1342 else:
1342 else:
1343 roots = [self.nullid] # Everybody's a descendant of nullid
1343 roots = [self.nullid] # Everybody's a descendant of nullid
1344 lowestrev = nullrev
1344 lowestrev = nullrev
1345 if (lowestrev == nullrev) and (heads is None):
1345 if (lowestrev == nullrev) and (heads is None):
1346 # We want _all_ the nodes!
1346 # We want _all_ the nodes!
1347 return (
1347 return (
1348 [self.node(r) for r in self],
1348 [self.node(r) for r in self],
1349 [self.nullid],
1349 [self.nullid],
1350 list(self.heads()),
1350 list(self.heads()),
1351 )
1351 )
1352 if heads is None:
1352 if heads is None:
1353 # All nodes are ancestors, so the latest ancestor is the last
1353 # All nodes are ancestors, so the latest ancestor is the last
1354 # node.
1354 # node.
1355 highestrev = len(self) - 1
1355 highestrev = len(self) - 1
1356 # Set ancestors to None to signal that every node is an ancestor.
1356 # Set ancestors to None to signal that every node is an ancestor.
1357 ancestors = None
1357 ancestors = None
1358 # Set heads to an empty dictionary for later discovery of heads
1358 # Set heads to an empty dictionary for later discovery of heads
1359 heads = {}
1359 heads = {}
1360 else:
1360 else:
1361 heads = list(heads)
1361 heads = list(heads)
1362 if not heads:
1362 if not heads:
1363 return nonodes
1363 return nonodes
1364 ancestors = set()
1364 ancestors = set()
1365 # Turn heads into a dictionary so we can remove 'fake' heads.
1365 # Turn heads into a dictionary so we can remove 'fake' heads.
1366 # Also, later we will be using it to filter out the heads we can't
1366 # Also, later we will be using it to filter out the heads we can't
1367 # find from roots.
1367 # find from roots.
1368 heads = dict.fromkeys(heads, False)
1368 heads = dict.fromkeys(heads, False)
1369 # Start at the top and keep marking parents until we're done.
1369 # Start at the top and keep marking parents until we're done.
1370 nodestotag = set(heads)
1370 nodestotag = set(heads)
1371 # Remember where the top was so we can use it as a limit later.
1371 # Remember where the top was so we can use it as a limit later.
1372 highestrev = max([self.rev(n) for n in nodestotag])
1372 highestrev = max([self.rev(n) for n in nodestotag])
1373 while nodestotag:
1373 while nodestotag:
1374 # grab a node to tag
1374 # grab a node to tag
1375 n = nodestotag.pop()
1375 n = nodestotag.pop()
1376 # Never tag nullid
1376 # Never tag nullid
1377 if n == self.nullid:
1377 if n == self.nullid:
1378 continue
1378 continue
1379 # A node's revision number represents its place in a
1379 # A node's revision number represents its place in a
1380 # topologically sorted list of nodes.
1380 # topologically sorted list of nodes.
1381 r = self.rev(n)
1381 r = self.rev(n)
1382 if r >= lowestrev:
1382 if r >= lowestrev:
1383 if n not in ancestors:
1383 if n not in ancestors:
1384 # If we are possibly a descendant of one of the roots
1384 # If we are possibly a descendant of one of the roots
1385 # and we haven't already been marked as an ancestor
1385 # and we haven't already been marked as an ancestor
1386 ancestors.add(n) # Mark as ancestor
1386 ancestors.add(n) # Mark as ancestor
1387 # Add non-nullid parents to list of nodes to tag.
1387 # Add non-nullid parents to list of nodes to tag.
1388 nodestotag.update(
1388 nodestotag.update(
1389 [p for p in self.parents(n) if p != self.nullid]
1389 [p for p in self.parents(n) if p != self.nullid]
1390 )
1390 )
1391 elif n in heads: # We've seen it before, is it a fake head?
1391 elif n in heads: # We've seen it before, is it a fake head?
1392 # So it is, real heads should not be the ancestors of
1392 # So it is, real heads should not be the ancestors of
1393 # any other heads.
1393 # any other heads.
1394 heads.pop(n)
1394 heads.pop(n)
1395 if not ancestors:
1395 if not ancestors:
1396 return nonodes
1396 return nonodes
1397 # Now that we have our set of ancestors, we want to remove any
1397 # Now that we have our set of ancestors, we want to remove any
1398 # roots that are not ancestors.
1398 # roots that are not ancestors.
1399
1399
1400 # If one of the roots was nullid, everything is included anyway.
1400 # If one of the roots was nullid, everything is included anyway.
1401 if lowestrev > nullrev:
1401 if lowestrev > nullrev:
1402 # But, since we weren't, let's recompute the lowest rev to not
1402 # But, since we weren't, let's recompute the lowest rev to not
1403 # include roots that aren't ancestors.
1403 # include roots that aren't ancestors.
1404
1404
1405 # Filter out roots that aren't ancestors of heads
1405 # Filter out roots that aren't ancestors of heads
1406 roots = [root for root in roots if root in ancestors]
1406 roots = [root for root in roots if root in ancestors]
1407 # Recompute the lowest revision
1407 # Recompute the lowest revision
1408 if roots:
1408 if roots:
1409 lowestrev = min([self.rev(root) for root in roots])
1409 lowestrev = min([self.rev(root) for root in roots])
1410 else:
1410 else:
1411 # No more roots? Return empty list
1411 # No more roots? Return empty list
1412 return nonodes
1412 return nonodes
1413 else:
1413 else:
1414 # We are descending from nullid, and don't need to care about
1414 # We are descending from nullid, and don't need to care about
1415 # any other roots.
1415 # any other roots.
1416 lowestrev = nullrev
1416 lowestrev = nullrev
1417 roots = [self.nullid]
1417 roots = [self.nullid]
1418 # Transform our roots list into a set.
1418 # Transform our roots list into a set.
1419 descendants = set(roots)
1419 descendants = set(roots)
1420 # Also, keep the original roots so we can filter out roots that aren't
1420 # Also, keep the original roots so we can filter out roots that aren't
1421 # 'real' roots (i.e. are descended from other roots).
1421 # 'real' roots (i.e. are descended from other roots).
1422 roots = descendants.copy()
1422 roots = descendants.copy()
1423 # Our topologically sorted list of output nodes.
1423 # Our topologically sorted list of output nodes.
1424 orderedout = []
1424 orderedout = []
1425 # Don't start at nullid since we don't want nullid in our output list,
1425 # Don't start at nullid since we don't want nullid in our output list,
1426 # and if nullid shows up in descendants, empty parents will look like
1426 # and if nullid shows up in descendants, empty parents will look like
1427 # they're descendants.
1427 # they're descendants.
1428 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1428 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1429 n = self.node(r)
1429 n = self.node(r)
1430 isdescendant = False
1430 isdescendant = False
1431 if lowestrev == nullrev: # Everybody is a descendant of nullid
1431 if lowestrev == nullrev: # Everybody is a descendant of nullid
1432 isdescendant = True
1432 isdescendant = True
1433 elif n in descendants:
1433 elif n in descendants:
1434 # n is already a descendant
1434 # n is already a descendant
1435 isdescendant = True
1435 isdescendant = True
1436 # This check only needs to be done here because all the roots
1436 # This check only needs to be done here because all the roots
1437 # will start being marked is descendants before the loop.
1437 # will start being marked is descendants before the loop.
1438 if n in roots:
1438 if n in roots:
1439 # If n was a root, check if it's a 'real' root.
1439 # If n was a root, check if it's a 'real' root.
1440 p = tuple(self.parents(n))
1440 p = tuple(self.parents(n))
1441 # If any of its parents are descendants, it's not a root.
1441 # If any of its parents are descendants, it's not a root.
1442 if (p[0] in descendants) or (p[1] in descendants):
1442 if (p[0] in descendants) or (p[1] in descendants):
1443 roots.remove(n)
1443 roots.remove(n)
1444 else:
1444 else:
1445 p = tuple(self.parents(n))
1445 p = tuple(self.parents(n))
1446 # A node is a descendant if either of its parents are
1446 # A node is a descendant if either of its parents are
1447 # descendants. (We seeded the dependents list with the roots
1447 # descendants. (We seeded the dependents list with the roots
1448 # up there, remember?)
1448 # up there, remember?)
1449 if (p[0] in descendants) or (p[1] in descendants):
1449 if (p[0] in descendants) or (p[1] in descendants):
1450 descendants.add(n)
1450 descendants.add(n)
1451 isdescendant = True
1451 isdescendant = True
1452 if isdescendant and ((ancestors is None) or (n in ancestors)):
1452 if isdescendant and ((ancestors is None) or (n in ancestors)):
1453 # Only include nodes that are both descendants and ancestors.
1453 # Only include nodes that are both descendants and ancestors.
1454 orderedout.append(n)
1454 orderedout.append(n)
1455 if (ancestors is not None) and (n in heads):
1455 if (ancestors is not None) and (n in heads):
1456 # We're trying to figure out which heads are reachable
1456 # We're trying to figure out which heads are reachable
1457 # from roots.
1457 # from roots.
1458 # Mark this head as having been reached
1458 # Mark this head as having been reached
1459 heads[n] = True
1459 heads[n] = True
1460 elif ancestors is None:
1460 elif ancestors is None:
1461 # Otherwise, we're trying to discover the heads.
1461 # Otherwise, we're trying to discover the heads.
1462 # Assume this is a head because if it isn't, the next step
1462 # Assume this is a head because if it isn't, the next step
1463 # will eventually remove it.
1463 # will eventually remove it.
1464 heads[n] = True
1464 heads[n] = True
1465 # But, obviously its parents aren't.
1465 # But, obviously its parents aren't.
1466 for p in self.parents(n):
1466 for p in self.parents(n):
1467 heads.pop(p, None)
1467 heads.pop(p, None)
1468 heads = [head for head, flag in heads.items() if flag]
1468 heads = [head for head, flag in heads.items() if flag]
1469 roots = list(roots)
1469 roots = list(roots)
1470 assert orderedout
1470 assert orderedout
1471 assert roots
1471 assert roots
1472 assert heads
1472 assert heads
1473 return (orderedout, roots, heads)
1473 return (orderedout, roots, heads)
1474
1474
1475 def headrevs(self, revs=None):
1475 def headrevs(self, revs=None):
1476 if revs is None:
1476 if revs is None:
1477 try:
1477 try:
1478 return self.index.headrevs()
1478 return self.index.headrevs()
1479 except AttributeError:
1479 except AttributeError:
1480 return self._headrevs()
1480 return self._headrevs()
1481 if rustdagop is not None and self.index.rust_ext_compat:
1481 if rustdagop is not None and self.index.rust_ext_compat:
1482 return rustdagop.headrevs(self.index, revs)
1482 return rustdagop.headrevs(self.index, revs)
1483 return dagop.headrevs(revs, self._uncheckedparentrevs)
1483 return dagop.headrevs(revs, self._uncheckedparentrevs)
1484
1484
1485 def computephases(self, roots):
1485 def computephases(self, roots):
1486 return self.index.computephasesmapsets(roots)
1486 return self.index.computephasesmapsets(roots)
1487
1487
1488 def _headrevs(self):
1488 def _headrevs(self):
1489 count = len(self)
1489 count = len(self)
1490 if not count:
1490 if not count:
1491 return [nullrev]
1491 return [nullrev]
1492 # we won't iter over filtered rev so nobody is a head at start
1492 # we won't iter over filtered rev so nobody is a head at start
1493 ishead = [0] * (count + 1)
1493 ishead = [0] * (count + 1)
1494 index = self.index
1494 index = self.index
1495 for r in self:
1495 for r in self:
1496 ishead[r] = 1 # I may be an head
1496 ishead[r] = 1 # I may be an head
1497 e = index[r]
1497 e = index[r]
1498 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1498 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1499 return [r for r, val in enumerate(ishead) if val]
1499 return [r for r, val in enumerate(ishead) if val]
1500
1500
1501 def heads(self, start=None, stop=None):
1501 def heads(self, start=None, stop=None):
1502 """return the list of all nodes that have no children
1502 """return the list of all nodes that have no children
1503
1503
1504 if start is specified, only heads that are descendants of
1504 if start is specified, only heads that are descendants of
1505 start will be returned
1505 start will be returned
1506 if stop is specified, it will consider all the revs from stop
1506 if stop is specified, it will consider all the revs from stop
1507 as if they had no children
1507 as if they had no children
1508 """
1508 """
1509 if start is None and stop is None:
1509 if start is None and stop is None:
1510 if not len(self):
1510 if not len(self):
1511 return [self.nullid]
1511 return [self.nullid]
1512 return [self.node(r) for r in self.headrevs()]
1512 return [self.node(r) for r in self.headrevs()]
1513
1513
1514 if start is None:
1514 if start is None:
1515 start = nullrev
1515 start = nullrev
1516 else:
1516 else:
1517 start = self.rev(start)
1517 start = self.rev(start)
1518
1518
1519 stoprevs = {self.rev(n) for n in stop or []}
1519 stoprevs = {self.rev(n) for n in stop or []}
1520
1520
1521 revs = dagop.headrevssubset(
1521 revs = dagop.headrevssubset(
1522 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1522 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1523 )
1523 )
1524
1524
1525 return [self.node(rev) for rev in revs]
1525 return [self.node(rev) for rev in revs]
1526
1526
1527 def children(self, node):
1527 def children(self, node):
1528 """find the children of a given node"""
1528 """find the children of a given node"""
1529 c = []
1529 c = []
1530 p = self.rev(node)
1530 p = self.rev(node)
1531 for r in self.revs(start=p + 1):
1531 for r in self.revs(start=p + 1):
1532 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1532 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1533 if prevs:
1533 if prevs:
1534 for pr in prevs:
1534 for pr in prevs:
1535 if pr == p:
1535 if pr == p:
1536 c.append(self.node(r))
1536 c.append(self.node(r))
1537 elif p == nullrev:
1537 elif p == nullrev:
1538 c.append(self.node(r))
1538 c.append(self.node(r))
1539 return c
1539 return c
1540
1540
1541 def commonancestorsheads(self, a, b):
1541 def commonancestorsheads(self, a, b):
1542 """calculate all the heads of the common ancestors of nodes a and b"""
1542 """calculate all the heads of the common ancestors of nodes a and b"""
1543 a, b = self.rev(a), self.rev(b)
1543 a, b = self.rev(a), self.rev(b)
1544 ancs = self._commonancestorsheads(a, b)
1544 ancs = self._commonancestorsheads(a, b)
1545 return pycompat.maplist(self.node, ancs)
1545 return pycompat.maplist(self.node, ancs)
1546
1546
1547 def _commonancestorsheads(self, *revs):
1547 def _commonancestorsheads(self, *revs):
1548 """calculate all the heads of the common ancestors of revs"""
1548 """calculate all the heads of the common ancestors of revs"""
1549 try:
1549 try:
1550 ancs = self.index.commonancestorsheads(*revs)
1550 ancs = self.index.commonancestorsheads(*revs)
1551 except (AttributeError, OverflowError): # C implementation failed
1551 except (AttributeError, OverflowError): # C implementation failed
1552 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1552 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1553 return ancs
1553 return ancs
1554
1554
1555 def isancestor(self, a, b):
1555 def isancestor(self, a, b):
1556 """return True if node a is an ancestor of node b
1556 """return True if node a is an ancestor of node b
1557
1557
1558 A revision is considered an ancestor of itself."""
1558 A revision is considered an ancestor of itself."""
1559 a, b = self.rev(a), self.rev(b)
1559 a, b = self.rev(a), self.rev(b)
1560 return self.isancestorrev(a, b)
1560 return self.isancestorrev(a, b)
1561
1561
1562 def isancestorrev(self, a, b):
1562 def isancestorrev(self, a, b):
1563 """return True if revision a is an ancestor of revision b
1563 """return True if revision a is an ancestor of revision b
1564
1564
1565 A revision is considered an ancestor of itself.
1565 A revision is considered an ancestor of itself.
1566
1566
1567 The implementation of this is trivial but the use of
1567 The implementation of this is trivial but the use of
1568 reachableroots is not."""
1568 reachableroots is not."""
1569 if a == nullrev:
1569 if a == nullrev:
1570 return True
1570 return True
1571 elif a == b:
1571 elif a == b:
1572 return True
1572 return True
1573 elif a > b:
1573 elif a > b:
1574 return False
1574 return False
1575 return bool(self.reachableroots(a, [b], [a], includepath=False))
1575 return bool(self.reachableroots(a, [b], [a], includepath=False))
1576
1576
1577 def reachableroots(self, minroot, heads, roots, includepath=False):
1577 def reachableroots(self, minroot, heads, roots, includepath=False):
1578 """return (heads(::(<roots> and <roots>::<heads>)))
1578 """return (heads(::(<roots> and <roots>::<heads>)))
1579
1579
1580 If includepath is True, return (<roots>::<heads>)."""
1580 If includepath is True, return (<roots>::<heads>)."""
1581 try:
1581 try:
1582 return self.index.reachableroots2(
1582 return self.index.reachableroots2(
1583 minroot, heads, roots, includepath
1583 minroot, heads, roots, includepath
1584 )
1584 )
1585 except AttributeError:
1585 except AttributeError:
1586 return dagop._reachablerootspure(
1586 return dagop._reachablerootspure(
1587 self.parentrevs, minroot, roots, heads, includepath
1587 self.parentrevs, minroot, roots, heads, includepath
1588 )
1588 )
1589
1589
1590 def ancestor(self, a, b):
1590 def ancestor(self, a, b):
1591 """calculate the "best" common ancestor of nodes a and b"""
1591 """calculate the "best" common ancestor of nodes a and b"""
1592
1592
1593 a, b = self.rev(a), self.rev(b)
1593 a, b = self.rev(a), self.rev(b)
1594 try:
1594 try:
1595 ancs = self.index.ancestors(a, b)
1595 ancs = self.index.ancestors(a, b)
1596 except (AttributeError, OverflowError):
1596 except (AttributeError, OverflowError):
1597 ancs = ancestor.ancestors(self.parentrevs, a, b)
1597 ancs = ancestor.ancestors(self.parentrevs, a, b)
1598 if ancs:
1598 if ancs:
1599 # choose a consistent winner when there's a tie
1599 # choose a consistent winner when there's a tie
1600 return min(map(self.node, ancs))
1600 return min(map(self.node, ancs))
1601 return self.nullid
1601 return self.nullid
1602
1602
1603 def _match(self, id):
1603 def _match(self, id):
1604 if isinstance(id, int):
1604 if isinstance(id, int):
1605 # rev
1605 # rev
1606 return self.node(id)
1606 return self.node(id)
1607 if len(id) == self.nodeconstants.nodelen:
1607 if len(id) == self.nodeconstants.nodelen:
1608 # possibly a binary node
1608 # possibly a binary node
1609 # odds of a binary node being all hex in ASCII are 1 in 10**25
1609 # odds of a binary node being all hex in ASCII are 1 in 10**25
1610 try:
1610 try:
1611 node = id
1611 node = id
1612 self.rev(node) # quick search the index
1612 self.rev(node) # quick search the index
1613 return node
1613 return node
1614 except error.LookupError:
1614 except error.LookupError:
1615 pass # may be partial hex id
1615 pass # may be partial hex id
1616 try:
1616 try:
1617 # str(rev)
1617 # str(rev)
1618 rev = int(id)
1618 rev = int(id)
1619 if b"%d" % rev != id:
1619 if b"%d" % rev != id:
1620 raise ValueError
1620 raise ValueError
1621 if rev < 0:
1621 if rev < 0:
1622 rev = len(self) + rev
1622 rev = len(self) + rev
1623 if rev < 0 or rev >= len(self):
1623 if rev < 0 or rev >= len(self):
1624 raise ValueError
1624 raise ValueError
1625 return self.node(rev)
1625 return self.node(rev)
1626 except (ValueError, OverflowError):
1626 except (ValueError, OverflowError):
1627 pass
1627 pass
1628 if len(id) == 2 * self.nodeconstants.nodelen:
1628 if len(id) == 2 * self.nodeconstants.nodelen:
1629 try:
1629 try:
1630 # a full hex nodeid?
1630 # a full hex nodeid?
1631 node = bin(id)
1631 node = bin(id)
1632 self.rev(node)
1632 self.rev(node)
1633 return node
1633 return node
1634 except (binascii.Error, error.LookupError):
1634 except (binascii.Error, error.LookupError):
1635 pass
1635 pass
1636
1636
1637 def _partialmatch(self, id):
1637 def _partialmatch(self, id):
1638 # we don't care wdirfilenodeids as they should be always full hash
1638 # we don't care wdirfilenodeids as they should be always full hash
1639 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1639 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1640 ambiguous = False
1640 ambiguous = False
1641 try:
1641 try:
1642 partial = self.index.partialmatch(id)
1642 partial = self.index.partialmatch(id)
1643 if partial and self.hasnode(partial):
1643 if partial and self.hasnode(partial):
1644 if maybewdir:
1644 if maybewdir:
1645 # single 'ff...' match in radix tree, ambiguous with wdir
1645 # single 'ff...' match in radix tree, ambiguous with wdir
1646 ambiguous = True
1646 ambiguous = True
1647 else:
1647 else:
1648 return partial
1648 return partial
1649 elif maybewdir:
1649 elif maybewdir:
1650 # no 'ff...' match in radix tree, wdir identified
1650 # no 'ff...' match in radix tree, wdir identified
1651 raise error.WdirUnsupported
1651 raise error.WdirUnsupported
1652 else:
1652 else:
1653 return None
1653 return None
1654 except error.RevlogError:
1654 except error.RevlogError:
1655 # parsers.c radix tree lookup gave multiple matches
1655 # parsers.c radix tree lookup gave multiple matches
1656 # fast path: for unfiltered changelog, radix tree is accurate
1656 # fast path: for unfiltered changelog, radix tree is accurate
1657 if not getattr(self, 'filteredrevs', None):
1657 if not getattr(self, 'filteredrevs', None):
1658 ambiguous = True
1658 ambiguous = True
1659 # fall through to slow path that filters hidden revisions
1659 # fall through to slow path that filters hidden revisions
1660 except (AttributeError, ValueError):
1660 except (AttributeError, ValueError):
1661 # we are pure python, or key is not hex
1661 # we are pure python, or key is not hex
1662 pass
1662 pass
1663 if ambiguous:
1663 if ambiguous:
1664 raise error.AmbiguousPrefixLookupError(
1664 raise error.AmbiguousPrefixLookupError(
1665 id, self.display_id, _(b'ambiguous identifier')
1665 id, self.display_id, _(b'ambiguous identifier')
1666 )
1666 )
1667
1667
1668 if id in self._pcache:
1668 if id in self._pcache:
1669 return self._pcache[id]
1669 return self._pcache[id]
1670
1670
1671 if len(id) <= 40:
1671 if len(id) <= 40:
1672 # hex(node)[:...]
1672 # hex(node)[:...]
1673 l = len(id) // 2 * 2 # grab an even number of digits
1673 l = len(id) // 2 * 2 # grab an even number of digits
1674 try:
1674 try:
1675 # we're dropping the last digit, so let's check that it's hex,
1675 # we're dropping the last digit, so let's check that it's hex,
1676 # to avoid the expensive computation below if it's not
1676 # to avoid the expensive computation below if it's not
1677 if len(id) % 2 > 0:
1677 if len(id) % 2 > 0:
1678 if not (id[-1] in hexdigits):
1678 if not (id[-1] in hexdigits):
1679 return None
1679 return None
1680 prefix = bin(id[:l])
1680 prefix = bin(id[:l])
1681 except binascii.Error:
1681 except binascii.Error:
1682 pass
1682 pass
1683 else:
1683 else:
1684 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1684 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1685 nl = [
1685 nl = [
1686 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1686 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1687 ]
1687 ]
1688 if self.nodeconstants.nullhex.startswith(id):
1688 if self.nodeconstants.nullhex.startswith(id):
1689 nl.append(self.nullid)
1689 nl.append(self.nullid)
1690 if len(nl) > 0:
1690 if len(nl) > 0:
1691 if len(nl) == 1 and not maybewdir:
1691 if len(nl) == 1 and not maybewdir:
1692 self._pcache[id] = nl[0]
1692 self._pcache[id] = nl[0]
1693 return nl[0]
1693 return nl[0]
1694 raise error.AmbiguousPrefixLookupError(
1694 raise error.AmbiguousPrefixLookupError(
1695 id, self.display_id, _(b'ambiguous identifier')
1695 id, self.display_id, _(b'ambiguous identifier')
1696 )
1696 )
1697 if maybewdir:
1697 if maybewdir:
1698 raise error.WdirUnsupported
1698 raise error.WdirUnsupported
1699 return None
1699 return None
1700
1700
1701 def lookup(self, id):
1701 def lookup(self, id):
1702 """locate a node based on:
1702 """locate a node based on:
1703 - revision number or str(revision number)
1703 - revision number or str(revision number)
1704 - nodeid or subset of hex nodeid
1704 - nodeid or subset of hex nodeid
1705 """
1705 """
1706 n = self._match(id)
1706 n = self._match(id)
1707 if n is not None:
1707 if n is not None:
1708 return n
1708 return n
1709 n = self._partialmatch(id)
1709 n = self._partialmatch(id)
1710 if n:
1710 if n:
1711 return n
1711 return n
1712
1712
1713 raise error.LookupError(id, self.display_id, _(b'no match found'))
1713 raise error.LookupError(id, self.display_id, _(b'no match found'))
1714
1714
1715 def shortest(self, node, minlength=1):
1715 def shortest(self, node, minlength=1):
1716 """Find the shortest unambiguous prefix that matches node."""
1716 """Find the shortest unambiguous prefix that matches node."""
1717
1717
1718 def isvalid(prefix):
1718 def isvalid(prefix):
1719 try:
1719 try:
1720 matchednode = self._partialmatch(prefix)
1720 matchednode = self._partialmatch(prefix)
1721 except error.AmbiguousPrefixLookupError:
1721 except error.AmbiguousPrefixLookupError:
1722 return False
1722 return False
1723 except error.WdirUnsupported:
1723 except error.WdirUnsupported:
1724 # single 'ff...' match
1724 # single 'ff...' match
1725 return True
1725 return True
1726 if matchednode is None:
1726 if matchednode is None:
1727 raise error.LookupError(node, self.display_id, _(b'no node'))
1727 raise error.LookupError(node, self.display_id, _(b'no node'))
1728 return True
1728 return True
1729
1729
1730 def maybewdir(prefix):
1730 def maybewdir(prefix):
1731 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1731 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1732
1732
1733 hexnode = hex(node)
1733 hexnode = hex(node)
1734
1734
1735 def disambiguate(hexnode, minlength):
1735 def disambiguate(hexnode, minlength):
1736 """Disambiguate against wdirid."""
1736 """Disambiguate against wdirid."""
1737 for length in range(minlength, len(hexnode) + 1):
1737 for length in range(minlength, len(hexnode) + 1):
1738 prefix = hexnode[:length]
1738 prefix = hexnode[:length]
1739 if not maybewdir(prefix):
1739 if not maybewdir(prefix):
1740 return prefix
1740 return prefix
1741
1741
1742 if not getattr(self, 'filteredrevs', None):
1742 if not getattr(self, 'filteredrevs', None):
1743 try:
1743 try:
1744 length = max(self.index.shortest(node), minlength)
1744 length = max(self.index.shortest(node), minlength)
1745 return disambiguate(hexnode, length)
1745 return disambiguate(hexnode, length)
1746 except error.RevlogError:
1746 except error.RevlogError:
1747 if node != self.nodeconstants.wdirid:
1747 if node != self.nodeconstants.wdirid:
1748 raise error.LookupError(
1748 raise error.LookupError(
1749 node, self.display_id, _(b'no node')
1749 node, self.display_id, _(b'no node')
1750 )
1750 )
1751 except AttributeError:
1751 except AttributeError:
1752 # Fall through to pure code
1752 # Fall through to pure code
1753 pass
1753 pass
1754
1754
1755 if node == self.nodeconstants.wdirid:
1755 if node == self.nodeconstants.wdirid:
1756 for length in range(minlength, len(hexnode) + 1):
1756 for length in range(minlength, len(hexnode) + 1):
1757 prefix = hexnode[:length]
1757 prefix = hexnode[:length]
1758 if isvalid(prefix):
1758 if isvalid(prefix):
1759 return prefix
1759 return prefix
1760
1760
1761 for length in range(minlength, len(hexnode) + 1):
1761 for length in range(minlength, len(hexnode) + 1):
1762 prefix = hexnode[:length]
1762 prefix = hexnode[:length]
1763 if isvalid(prefix):
1763 if isvalid(prefix):
1764 return disambiguate(hexnode, length)
1764 return disambiguate(hexnode, length)
1765
1765
1766 def cmp(self, node, text):
1766 def cmp(self, node, text):
1767 """compare text with a given file revision
1767 """compare text with a given file revision
1768
1768
1769 returns True if text is different than what is stored.
1769 returns True if text is different than what is stored.
1770 """
1770 """
1771 p1, p2 = self.parents(node)
1771 p1, p2 = self.parents(node)
1772 return storageutil.hashrevisionsha1(text, p1, p2) != node
1772 return storageutil.hashrevisionsha1(text, p1, p2) != node
1773
1773
1774 def _getsegmentforrevs(self, startrev, endrev, df=None):
1774 def _getsegmentforrevs(self, startrev, endrev, df=None):
1775 """Obtain a segment of raw data corresponding to a range of revisions.
1775 """Obtain a segment of raw data corresponding to a range of revisions.
1776
1776
1777 Accepts the start and end revisions and an optional already-open
1777 Accepts the start and end revisions and an optional already-open
1778 file handle to be used for reading. If the file handle is read, its
1778 file handle to be used for reading. If the file handle is read, its
1779 seek position will not be preserved.
1779 seek position will not be preserved.
1780
1780
1781 Requests for data may be satisfied by a cache.
1781 Requests for data may be satisfied by a cache.
1782
1782
1783 Returns a 2-tuple of (offset, data) for the requested range of
1783 Returns a 2-tuple of (offset, data) for the requested range of
1784 revisions. Offset is the integer offset from the beginning of the
1784 revisions. Offset is the integer offset from the beginning of the
1785 revlog and data is a str or buffer of the raw byte data.
1785 revlog and data is a str or buffer of the raw byte data.
1786
1786
1787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1788 to determine where each revision's data begins and ends.
1788 to determine where each revision's data begins and ends.
1789 """
1789 """
1790 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1790 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1791 # (functions are expensive).
1791 # (functions are expensive).
1792 index = self.index
1792 index = self.index
1793 istart = index[startrev]
1793 istart = index[startrev]
1794 start = int(istart[0] >> 16)
1794 start = int(istart[0] >> 16)
1795 if startrev == endrev:
1795 if startrev == endrev:
1796 end = start + istart[1]
1796 end = start + istart[1]
1797 else:
1797 else:
1798 iend = index[endrev]
1798 iend = index[endrev]
1799 end = int(iend[0] >> 16) + iend[1]
1799 end = int(iend[0] >> 16) + iend[1]
1800
1800
1801 if self._inline:
1801 if self._inline:
1802 start += (startrev + 1) * self.index.entry_size
1802 start += (startrev + 1) * self.index.entry_size
1803 end += (endrev + 1) * self.index.entry_size
1803 end += (endrev + 1) * self.index.entry_size
1804 length = end - start
1804 length = end - start
1805
1805
1806 return start, self._segmentfile.read_chunk(start, length, df)
1806 return start, self._segmentfile.read_chunk(start, length, df)
1807
1807
1808 def _chunk(self, rev, df=None):
1808 def _chunk(self, rev, df=None):
1809 """Obtain a single decompressed chunk for a revision.
1809 """Obtain a single decompressed chunk for a revision.
1810
1810
1811 Accepts an integer revision and an optional already-open file handle
1811 Accepts an integer revision and an optional already-open file handle
1812 to be used for reading. If used, the seek position of the file will not
1812 to be used for reading. If used, the seek position of the file will not
1813 be preserved.
1813 be preserved.
1814
1814
1815 Returns a str holding uncompressed data for the requested revision.
1815 Returns a str holding uncompressed data for the requested revision.
1816 """
1816 """
1817 compression_mode = self.index[rev][10]
1817 compression_mode = self.index[rev][10]
1818 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1818 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1819 if compression_mode == COMP_MODE_PLAIN:
1819 if compression_mode == COMP_MODE_PLAIN:
1820 return data
1820 return data
1821 elif compression_mode == COMP_MODE_DEFAULT:
1821 elif compression_mode == COMP_MODE_DEFAULT:
1822 return self._decompressor(data)
1822 return self._decompressor(data)
1823 elif compression_mode == COMP_MODE_INLINE:
1823 elif compression_mode == COMP_MODE_INLINE:
1824 return self.decompress(data)
1824 return self.decompress(data)
1825 else:
1825 else:
1826 msg = b'unknown compression mode %d'
1826 msg = b'unknown compression mode %d'
1827 msg %= compression_mode
1827 msg %= compression_mode
1828 raise error.RevlogError(msg)
1828 raise error.RevlogError(msg)
1829
1829
1830 def _chunks(self, revs, df=None, targetsize=None):
1830 def _chunks(self, revs, df=None, targetsize=None):
1831 """Obtain decompressed chunks for the specified revisions.
1831 """Obtain decompressed chunks for the specified revisions.
1832
1832
1833 Accepts an iterable of numeric revisions that are assumed to be in
1833 Accepts an iterable of numeric revisions that are assumed to be in
1834 ascending order. Also accepts an optional already-open file handle
1834 ascending order. Also accepts an optional already-open file handle
1835 to be used for reading. If used, the seek position of the file will
1835 to be used for reading. If used, the seek position of the file will
1836 not be preserved.
1836 not be preserved.
1837
1837
1838 This function is similar to calling ``self._chunk()`` multiple times,
1838 This function is similar to calling ``self._chunk()`` multiple times,
1839 but is faster.
1839 but is faster.
1840
1840
1841 Returns a list with decompressed data for each requested revision.
1841 Returns a list with decompressed data for each requested revision.
1842 """
1842 """
1843 if not revs:
1843 if not revs:
1844 return []
1844 return []
1845 start = self.start
1845 start = self.start
1846 length = self.length
1846 length = self.length
1847 inline = self._inline
1847 inline = self._inline
1848 iosize = self.index.entry_size
1848 iosize = self.index.entry_size
1849 buffer = util.buffer
1849 buffer = util.buffer
1850
1850
1851 l = []
1851 l = []
1852 ladd = l.append
1852 ladd = l.append
1853
1853
1854 if not self._withsparseread:
1854 if not self._withsparseread:
1855 slicedchunks = (revs,)
1855 slicedchunks = (revs,)
1856 else:
1856 else:
1857 slicedchunks = deltautil.slicechunk(
1857 slicedchunks = deltautil.slicechunk(
1858 self, revs, targetsize=targetsize
1858 self, revs, targetsize=targetsize
1859 )
1859 )
1860
1860
1861 for revschunk in slicedchunks:
1861 for revschunk in slicedchunks:
1862 firstrev = revschunk[0]
1862 firstrev = revschunk[0]
1863 # Skip trailing revisions with empty diff
1863 # Skip trailing revisions with empty diff
1864 for lastrev in revschunk[::-1]:
1864 for lastrev in revschunk[::-1]:
1865 if length(lastrev) != 0:
1865 if length(lastrev) != 0:
1866 break
1866 break
1867
1867
1868 try:
1868 try:
1869 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1869 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1870 except OverflowError:
1870 except OverflowError:
1871 # issue4215 - we can't cache a run of chunks greater than
1871 # issue4215 - we can't cache a run of chunks greater than
1872 # 2G on Windows
1872 # 2G on Windows
1873 return [self._chunk(rev, df=df) for rev in revschunk]
1873 return [self._chunk(rev, df=df) for rev in revschunk]
1874
1874
1875 decomp = self.decompress
1875 decomp = self.decompress
1876 # self._decompressor might be None, but will not be used in that case
1876 # self._decompressor might be None, but will not be used in that case
1877 def_decomp = self._decompressor
1877 def_decomp = self._decompressor
1878 for rev in revschunk:
1878 for rev in revschunk:
1879 chunkstart = start(rev)
1879 chunkstart = start(rev)
1880 if inline:
1880 if inline:
1881 chunkstart += (rev + 1) * iosize
1881 chunkstart += (rev + 1) * iosize
1882 chunklength = length(rev)
1882 chunklength = length(rev)
1883 comp_mode = self.index[rev][10]
1883 comp_mode = self.index[rev][10]
1884 c = buffer(data, chunkstart - offset, chunklength)
1884 c = buffer(data, chunkstart - offset, chunklength)
1885 if comp_mode == COMP_MODE_PLAIN:
1885 if comp_mode == COMP_MODE_PLAIN:
1886 ladd(c)
1886 ladd(c)
1887 elif comp_mode == COMP_MODE_INLINE:
1887 elif comp_mode == COMP_MODE_INLINE:
1888 ladd(decomp(c))
1888 ladd(decomp(c))
1889 elif comp_mode == COMP_MODE_DEFAULT:
1889 elif comp_mode == COMP_MODE_DEFAULT:
1890 ladd(def_decomp(c))
1890 ladd(def_decomp(c))
1891 else:
1891 else:
1892 msg = b'unknown compression mode %d'
1892 msg = b'unknown compression mode %d'
1893 msg %= comp_mode
1893 msg %= comp_mode
1894 raise error.RevlogError(msg)
1894 raise error.RevlogError(msg)
1895
1895
1896 return l
1896 return l
1897
1897
1898 def deltaparent(self, rev):
1898 def deltaparent(self, rev):
1899 """return deltaparent of the given revision"""
1899 """return deltaparent of the given revision"""
1900 base = self.index[rev][3]
1900 base = self.index[rev][3]
1901 if base == rev:
1901 if base == rev:
1902 return nullrev
1902 return nullrev
1903 elif self._generaldelta:
1903 elif self._generaldelta:
1904 return base
1904 return base
1905 else:
1905 else:
1906 return rev - 1
1906 return rev - 1
1907
1907
1908 def issnapshot(self, rev):
1908 def issnapshot(self, rev):
1909 """tells whether rev is a snapshot"""
1909 """tells whether rev is a snapshot"""
1910 if not self._sparserevlog:
1910 if not self._sparserevlog:
1911 return self.deltaparent(rev) == nullrev
1911 return self.deltaparent(rev) == nullrev
1912 elif hasattr(self.index, 'issnapshot'):
1912 elif hasattr(self.index, 'issnapshot'):
1913 # directly assign the method to cache the testing and access
1913 # directly assign the method to cache the testing and access
1914 self.issnapshot = self.index.issnapshot
1914 self.issnapshot = self.index.issnapshot
1915 return self.issnapshot(rev)
1915 return self.issnapshot(rev)
1916 if rev == nullrev:
1916 if rev == nullrev:
1917 return True
1917 return True
1918 entry = self.index[rev]
1918 entry = self.index[rev]
1919 base = entry[3]
1919 base = entry[3]
1920 if base == rev:
1920 if base == rev:
1921 return True
1921 return True
1922 if base == nullrev:
1922 if base == nullrev:
1923 return True
1923 return True
1924 p1 = entry[5]
1924 p1 = entry[5]
1925 while self.length(p1) == 0:
1925 while self.length(p1) == 0:
1926 b = self.deltaparent(p1)
1926 b = self.deltaparent(p1)
1927 if b == p1:
1927 if b == p1:
1928 break
1928 break
1929 p1 = b
1929 p1 = b
1930 p2 = entry[6]
1930 p2 = entry[6]
1931 while self.length(p2) == 0:
1931 while self.length(p2) == 0:
1932 b = self.deltaparent(p2)
1932 b = self.deltaparent(p2)
1933 if b == p2:
1933 if b == p2:
1934 break
1934 break
1935 p2 = b
1935 p2 = b
1936 if base == p1 or base == p2:
1936 if base == p1 or base == p2:
1937 return False
1937 return False
1938 return self.issnapshot(base)
1938 return self.issnapshot(base)
1939
1939
1940 def snapshotdepth(self, rev):
1940 def snapshotdepth(self, rev):
1941 """number of snapshot in the chain before this one"""
1941 """number of snapshot in the chain before this one"""
1942 if not self.issnapshot(rev):
1942 if not self.issnapshot(rev):
1943 raise error.ProgrammingError(b'revision %d not a snapshot')
1943 raise error.ProgrammingError(b'revision %d not a snapshot')
1944 return len(self._deltachain(rev)[0]) - 1
1944 return len(self._deltachain(rev)[0]) - 1
1945
1945
1946 def revdiff(self, rev1, rev2):
1946 def revdiff(self, rev1, rev2):
1947 """return or calculate a delta between two revisions
1947 """return or calculate a delta between two revisions
1948
1948
1949 The delta calculated is in binary form and is intended to be written to
1949 The delta calculated is in binary form and is intended to be written to
1950 revlog data directly. So this function needs raw revision data.
1950 revlog data directly. So this function needs raw revision data.
1951 """
1951 """
1952 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1952 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1953 return bytes(self._chunk(rev2))
1953 return bytes(self._chunk(rev2))
1954
1954
1955 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1955 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1956
1956
1957 def revision(self, nodeorrev):
1957 def revision(self, nodeorrev):
1958 """return an uncompressed revision of a given node or revision
1958 """return an uncompressed revision of a given node or revision
1959 number.
1959 number.
1960 """
1960 """
1961 return self._revisiondata(nodeorrev)
1961 return self._revisiondata(nodeorrev)
1962
1962
1963 def sidedata(self, nodeorrev, _df=None):
1963 def sidedata(self, nodeorrev, _df=None):
1964 """a map of extra data related to the changeset but not part of the hash
1964 """a map of extra data related to the changeset but not part of the hash
1965
1965
1966 This function currently return a dictionary. However, more advanced
1966 This function currently return a dictionary. However, more advanced
1967 mapping object will likely be used in the future for a more
1967 mapping object will likely be used in the future for a more
1968 efficient/lazy code.
1968 efficient/lazy code.
1969 """
1969 """
1970 # deal with <nodeorrev> argument type
1970 # deal with <nodeorrev> argument type
1971 if isinstance(nodeorrev, int):
1971 if isinstance(nodeorrev, int):
1972 rev = nodeorrev
1972 rev = nodeorrev
1973 else:
1973 else:
1974 rev = self.rev(nodeorrev)
1974 rev = self.rev(nodeorrev)
1975 return self._sidedata(rev)
1975 return self._sidedata(rev)
1976
1976
1977 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1977 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1978 # deal with <nodeorrev> argument type
1978 # deal with <nodeorrev> argument type
1979 if isinstance(nodeorrev, int):
1979 if isinstance(nodeorrev, int):
1980 rev = nodeorrev
1980 rev = nodeorrev
1981 node = self.node(rev)
1981 node = self.node(rev)
1982 else:
1982 else:
1983 node = nodeorrev
1983 node = nodeorrev
1984 rev = None
1984 rev = None
1985
1985
1986 # fast path the special `nullid` rev
1986 # fast path the special `nullid` rev
1987 if node == self.nullid:
1987 if node == self.nullid:
1988 return b""
1988 return b""
1989
1989
1990 # ``rawtext`` is the text as stored inside the revlog. Might be the
1990 # ``rawtext`` is the text as stored inside the revlog. Might be the
1991 # revision or might need to be processed to retrieve the revision.
1991 # revision or might need to be processed to retrieve the revision.
1992 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1992 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1993
1993
1994 if raw and validated:
1994 if raw and validated:
1995 # if we don't want to process the raw text and that raw
1995 # if we don't want to process the raw text and that raw
1996 # text is cached, we can exit early.
1996 # text is cached, we can exit early.
1997 return rawtext
1997 return rawtext
1998 if rev is None:
1998 if rev is None:
1999 rev = self.rev(node)
1999 rev = self.rev(node)
2000 # the revlog's flag for this revision
2000 # the revlog's flag for this revision
2001 # (usually alter its state or content)
2001 # (usually alter its state or content)
2002 flags = self.flags(rev)
2002 flags = self.flags(rev)
2003
2003
2004 if validated and flags == REVIDX_DEFAULT_FLAGS:
2004 if validated and flags == REVIDX_DEFAULT_FLAGS:
2005 # no extra flags set, no flag processor runs, text = rawtext
2005 # no extra flags set, no flag processor runs, text = rawtext
2006 return rawtext
2006 return rawtext
2007
2007
2008 if raw:
2008 if raw:
2009 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2009 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2010 text = rawtext
2010 text = rawtext
2011 else:
2011 else:
2012 r = flagutil.processflagsread(self, rawtext, flags)
2012 r = flagutil.processflagsread(self, rawtext, flags)
2013 text, validatehash = r
2013 text, validatehash = r
2014 if validatehash:
2014 if validatehash:
2015 self.checkhash(text, node, rev=rev)
2015 self.checkhash(text, node, rev=rev)
2016 if not validated:
2016 if not validated:
2017 self._revisioncache = (node, rev, rawtext)
2017 self._revisioncache = (node, rev, rawtext)
2018
2018
2019 return text
2019 return text
2020
2020
2021 def _rawtext(self, node, rev, _df=None):
2021 def _rawtext(self, node, rev, _df=None):
2022 """return the possibly unvalidated rawtext for a revision
2022 """return the possibly unvalidated rawtext for a revision
2023
2023
2024 returns (rev, rawtext, validated)
2024 returns (rev, rawtext, validated)
2025 """
2025 """
2026
2026
2027 # revision in the cache (could be useful to apply delta)
2027 # revision in the cache (could be useful to apply delta)
2028 cachedrev = None
2028 cachedrev = None
2029 # An intermediate text to apply deltas to
2029 # An intermediate text to apply deltas to
2030 basetext = None
2030 basetext = None
2031
2031
2032 # Check if we have the entry in cache
2032 # Check if we have the entry in cache
2033 # The cache entry looks like (node, rev, rawtext)
2033 # The cache entry looks like (node, rev, rawtext)
2034 if self._revisioncache:
2034 if self._revisioncache:
2035 if self._revisioncache[0] == node:
2035 if self._revisioncache[0] == node:
2036 return (rev, self._revisioncache[2], True)
2036 return (rev, self._revisioncache[2], True)
2037 cachedrev = self._revisioncache[1]
2037 cachedrev = self._revisioncache[1]
2038
2038
2039 if rev is None:
2039 if rev is None:
2040 rev = self.rev(node)
2040 rev = self.rev(node)
2041
2041
2042 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2042 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2043 if stopped:
2043 if stopped:
2044 basetext = self._revisioncache[2]
2044 basetext = self._revisioncache[2]
2045
2045
2046 # drop cache to save memory, the caller is expected to
2046 # drop cache to save memory, the caller is expected to
2047 # update self._revisioncache after validating the text
2047 # update self._revisioncache after validating the text
2048 self._revisioncache = None
2048 self._revisioncache = None
2049
2049
2050 targetsize = None
2050 targetsize = None
2051 rawsize = self.index[rev][2]
2051 rawsize = self.index[rev][2]
2052 if 0 <= rawsize:
2052 if 0 <= rawsize:
2053 targetsize = 4 * rawsize
2053 targetsize = 4 * rawsize
2054
2054
2055 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2055 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2056 if basetext is None:
2056 if basetext is None:
2057 basetext = bytes(bins[0])
2057 basetext = bytes(bins[0])
2058 bins = bins[1:]
2058 bins = bins[1:]
2059
2059
2060 rawtext = mdiff.patches(basetext, bins)
2060 rawtext = mdiff.patches(basetext, bins)
2061 del basetext # let us have a chance to free memory early
2061 del basetext # let us have a chance to free memory early
2062 return (rev, rawtext, False)
2062 return (rev, rawtext, False)
2063
2063
2064 def _sidedata(self, rev):
2064 def _sidedata(self, rev):
2065 """Return the sidedata for a given revision number."""
2065 """Return the sidedata for a given revision number."""
2066 index_entry = self.index[rev]
2066 index_entry = self.index[rev]
2067 sidedata_offset = index_entry[8]
2067 sidedata_offset = index_entry[8]
2068 sidedata_size = index_entry[9]
2068 sidedata_size = index_entry[9]
2069
2069
2070 if self._inline:
2070 if self._inline:
2071 sidedata_offset += self.index.entry_size * (1 + rev)
2071 sidedata_offset += self.index.entry_size * (1 + rev)
2072 if sidedata_size == 0:
2072 if sidedata_size == 0:
2073 return {}
2073 return {}
2074
2074
2075 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2075 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2076 filename = self._sidedatafile
2076 filename = self._sidedatafile
2077 end = self._docket.sidedata_end
2077 end = self._docket.sidedata_end
2078 offset = sidedata_offset
2078 offset = sidedata_offset
2079 length = sidedata_size
2079 length = sidedata_size
2080 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2080 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2081 raise error.RevlogError(m)
2081 raise error.RevlogError(m)
2082
2082
2083 comp_segment = self._segmentfile_sidedata.read_chunk(
2083 comp_segment = self._segmentfile_sidedata.read_chunk(
2084 sidedata_offset, sidedata_size
2084 sidedata_offset, sidedata_size
2085 )
2085 )
2086
2086
2087 comp = self.index[rev][11]
2087 comp = self.index[rev][11]
2088 if comp == COMP_MODE_PLAIN:
2088 if comp == COMP_MODE_PLAIN:
2089 segment = comp_segment
2089 segment = comp_segment
2090 elif comp == COMP_MODE_DEFAULT:
2090 elif comp == COMP_MODE_DEFAULT:
2091 segment = self._decompressor(comp_segment)
2091 segment = self._decompressor(comp_segment)
2092 elif comp == COMP_MODE_INLINE:
2092 elif comp == COMP_MODE_INLINE:
2093 segment = self.decompress(comp_segment)
2093 segment = self.decompress(comp_segment)
2094 else:
2094 else:
2095 msg = b'unknown compression mode %d'
2095 msg = b'unknown compression mode %d'
2096 msg %= comp
2096 msg %= comp
2097 raise error.RevlogError(msg)
2097 raise error.RevlogError(msg)
2098
2098
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2100 return sidedata
2100 return sidedata
2101
2101
2102 def rawdata(self, nodeorrev, _df=None):
2102 def rawdata(self, nodeorrev):
2103 """return an uncompressed raw data of a given node or revision number.
2103 """return an uncompressed raw data of a given node or revision number."""
2104
2104 return self._revisiondata(nodeorrev, raw=True)
2105 _df - an existing file handle to read from. (internal-only)
2106 """
2107 return self._revisiondata(nodeorrev, _df, raw=True)
2108
2105
2109 def hash(self, text, p1, p2):
2106 def hash(self, text, p1, p2):
2110 """Compute a node hash.
2107 """Compute a node hash.
2111
2108
2112 Available as a function so that subclasses can replace the hash
2109 Available as a function so that subclasses can replace the hash
2113 as needed.
2110 as needed.
2114 """
2111 """
2115 return storageutil.hashrevisionsha1(text, p1, p2)
2112 return storageutil.hashrevisionsha1(text, p1, p2)
2116
2113
2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2114 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2118 """Check node hash integrity.
2115 """Check node hash integrity.
2119
2116
2120 Available as a function so that subclasses can extend hash mismatch
2117 Available as a function so that subclasses can extend hash mismatch
2121 behaviors as needed.
2118 behaviors as needed.
2122 """
2119 """
2123 try:
2120 try:
2124 if p1 is None and p2 is None:
2121 if p1 is None and p2 is None:
2125 p1, p2 = self.parents(node)
2122 p1, p2 = self.parents(node)
2126 if node != self.hash(text, p1, p2):
2123 if node != self.hash(text, p1, p2):
2127 # Clear the revision cache on hash failure. The revision cache
2124 # Clear the revision cache on hash failure. The revision cache
2128 # only stores the raw revision and clearing the cache does have
2125 # only stores the raw revision and clearing the cache does have
2129 # the side-effect that we won't have a cache hit when the raw
2126 # the side-effect that we won't have a cache hit when the raw
2130 # revision data is accessed. But this case should be rare and
2127 # revision data is accessed. But this case should be rare and
2131 # it is extra work to teach the cache about the hash
2128 # it is extra work to teach the cache about the hash
2132 # verification state.
2129 # verification state.
2133 if self._revisioncache and self._revisioncache[0] == node:
2130 if self._revisioncache and self._revisioncache[0] == node:
2134 self._revisioncache = None
2131 self._revisioncache = None
2135
2132
2136 revornode = rev
2133 revornode = rev
2137 if revornode is None:
2134 if revornode is None:
2138 revornode = templatefilters.short(hex(node))
2135 revornode = templatefilters.short(hex(node))
2139 raise error.RevlogError(
2136 raise error.RevlogError(
2140 _(b"integrity check failed on %s:%s")
2137 _(b"integrity check failed on %s:%s")
2141 % (self.display_id, pycompat.bytestr(revornode))
2138 % (self.display_id, pycompat.bytestr(revornode))
2142 )
2139 )
2143 except error.RevlogError:
2140 except error.RevlogError:
2144 if self._censorable and storageutil.iscensoredtext(text):
2141 if self._censorable and storageutil.iscensoredtext(text):
2145 raise error.CensoredNodeError(self.display_id, node, text)
2142 raise error.CensoredNodeError(self.display_id, node, text)
2146 raise
2143 raise
2147
2144
2148 @property
2145 @property
2149 def _split_index_file(self):
2146 def _split_index_file(self):
2150 """the path where to expect the index of an ongoing splitting operation
2147 """the path where to expect the index of an ongoing splitting operation
2151
2148
2152 The file will only exist if a splitting operation is in progress, but
2149 The file will only exist if a splitting operation is in progress, but
2153 it is always expected at the same location."""
2150 it is always expected at the same location."""
2154 parts = self.radix.split(b'/')
2151 parts = self.radix.split(b'/')
2155 if len(parts) > 1:
2152 if len(parts) > 1:
2156 # adds a '-s' prefix to the ``data/` or `meta/` base
2153 # adds a '-s' prefix to the ``data/` or `meta/` base
2157 head = parts[0] + b'-s'
2154 head = parts[0] + b'-s'
2158 mids = parts[1:-1]
2155 mids = parts[1:-1]
2159 tail = parts[-1] + b'.i'
2156 tail = parts[-1] + b'.i'
2160 pieces = [head] + mids + [tail]
2157 pieces = [head] + mids + [tail]
2161 return b'/'.join(pieces)
2158 return b'/'.join(pieces)
2162 else:
2159 else:
2163 # the revlog is stored at the root of the store (changelog or
2160 # the revlog is stored at the root of the store (changelog or
2164 # manifest), no risk of collision.
2161 # manifest), no risk of collision.
2165 return self.radix + b'.i.s'
2162 return self.radix + b'.i.s'
2166
2163
2167 def _enforceinlinesize(self, tr, side_write=True):
2164 def _enforceinlinesize(self, tr, side_write=True):
2168 """Check if the revlog is too big for inline and convert if so.
2165 """Check if the revlog is too big for inline and convert if so.
2169
2166
2170 This should be called after revisions are added to the revlog. If the
2167 This should be called after revisions are added to the revlog. If the
2171 revlog has grown too large to be an inline revlog, it will convert it
2168 revlog has grown too large to be an inline revlog, it will convert it
2172 to use multiple index and data files.
2169 to use multiple index and data files.
2173 """
2170 """
2174 tiprev = len(self) - 1
2171 tiprev = len(self) - 1
2175 total_size = self.start(tiprev) + self.length(tiprev)
2172 total_size = self.start(tiprev) + self.length(tiprev)
2176 if not self._inline or total_size < _maxinline:
2173 if not self._inline or total_size < _maxinline:
2177 return
2174 return
2178
2175
2179 troffset = tr.findoffset(self._indexfile)
2176 troffset = tr.findoffset(self._indexfile)
2180 if troffset is None:
2177 if troffset is None:
2181 raise error.RevlogError(
2178 raise error.RevlogError(
2182 _(b"%s not found in the transaction") % self._indexfile
2179 _(b"%s not found in the transaction") % self._indexfile
2183 )
2180 )
2184 if troffset:
2181 if troffset:
2185 tr.addbackup(self._indexfile, for_offset=True)
2182 tr.addbackup(self._indexfile, for_offset=True)
2186 tr.add(self._datafile, 0)
2183 tr.add(self._datafile, 0)
2187
2184
2188 existing_handles = False
2185 existing_handles = False
2189 if self._writinghandles is not None:
2186 if self._writinghandles is not None:
2190 existing_handles = True
2187 existing_handles = True
2191 fp = self._writinghandles[0]
2188 fp = self._writinghandles[0]
2192 fp.flush()
2189 fp.flush()
2193 fp.close()
2190 fp.close()
2194 # We can't use the cached file handle after close(). So prevent
2191 # We can't use the cached file handle after close(). So prevent
2195 # its usage.
2192 # its usage.
2196 self._writinghandles = None
2193 self._writinghandles = None
2197 self._segmentfile.writing_handle = None
2194 self._segmentfile.writing_handle = None
2198 # No need to deal with sidedata writing handle as it is only
2195 # No need to deal with sidedata writing handle as it is only
2199 # relevant with revlog-v2 which is never inline, not reaching
2196 # relevant with revlog-v2 which is never inline, not reaching
2200 # this code
2197 # this code
2201 if side_write:
2198 if side_write:
2202 old_index_file_path = self._indexfile
2199 old_index_file_path = self._indexfile
2203 new_index_file_path = self._split_index_file
2200 new_index_file_path = self._split_index_file
2204 opener = self.opener
2201 opener = self.opener
2205 weak_self = weakref.ref(self)
2202 weak_self = weakref.ref(self)
2206
2203
2207 # the "split" index replace the real index when the transaction is finalized
2204 # the "split" index replace the real index when the transaction is finalized
2208 def finalize_callback(tr):
2205 def finalize_callback(tr):
2209 opener.rename(
2206 opener.rename(
2210 new_index_file_path,
2207 new_index_file_path,
2211 old_index_file_path,
2208 old_index_file_path,
2212 checkambig=True,
2209 checkambig=True,
2213 )
2210 )
2214 maybe_self = weak_self()
2211 maybe_self = weak_self()
2215 if maybe_self is not None:
2212 if maybe_self is not None:
2216 maybe_self._indexfile = old_index_file_path
2213 maybe_self._indexfile = old_index_file_path
2217
2214
2218 def abort_callback(tr):
2215 def abort_callback(tr):
2219 maybe_self = weak_self()
2216 maybe_self = weak_self()
2220 if maybe_self is not None:
2217 if maybe_self is not None:
2221 maybe_self._indexfile = old_index_file_path
2218 maybe_self._indexfile = old_index_file_path
2222
2219
2223 tr.registertmp(new_index_file_path)
2220 tr.registertmp(new_index_file_path)
2224 if self.target[1] is not None:
2221 if self.target[1] is not None:
2225 callback_id = b'000-revlog-split-%d-%s' % self.target
2222 callback_id = b'000-revlog-split-%d-%s' % self.target
2226 else:
2223 else:
2227 callback_id = b'000-revlog-split-%d' % self.target[0]
2224 callback_id = b'000-revlog-split-%d' % self.target[0]
2228 tr.addfinalize(callback_id, finalize_callback)
2225 tr.addfinalize(callback_id, finalize_callback)
2229 tr.addabort(callback_id, abort_callback)
2226 tr.addabort(callback_id, abort_callback)
2230
2227
2231 new_dfh = self._datafp(b'w+')
2228 new_dfh = self._datafp(b'w+')
2232 new_dfh.truncate(0) # drop any potentially existing data
2229 new_dfh.truncate(0) # drop any potentially existing data
2233 try:
2230 try:
2234 with self._indexfp() as read_ifh:
2231 with self._indexfp() as read_ifh:
2235 for r in self:
2232 for r in self:
2236 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2233 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2237 new_dfh.flush()
2234 new_dfh.flush()
2238
2235
2239 if side_write:
2236 if side_write:
2240 self._indexfile = new_index_file_path
2237 self._indexfile = new_index_file_path
2241 with self.__index_new_fp() as fp:
2238 with self.__index_new_fp() as fp:
2242 self._format_flags &= ~FLAG_INLINE_DATA
2239 self._format_flags &= ~FLAG_INLINE_DATA
2243 self._inline = False
2240 self._inline = False
2244 for i in self:
2241 for i in self:
2245 e = self.index.entry_binary(i)
2242 e = self.index.entry_binary(i)
2246 if i == 0 and self._docket is None:
2243 if i == 0 and self._docket is None:
2247 header = self._format_flags | self._format_version
2244 header = self._format_flags | self._format_version
2248 header = self.index.pack_header(header)
2245 header = self.index.pack_header(header)
2249 e = header + e
2246 e = header + e
2250 fp.write(e)
2247 fp.write(e)
2251 if self._docket is not None:
2248 if self._docket is not None:
2252 self._docket.index_end = fp.tell()
2249 self._docket.index_end = fp.tell()
2253
2250
2254 # If we don't use side-write, the temp file replace the real
2251 # If we don't use side-write, the temp file replace the real
2255 # index when we exit the context manager
2252 # index when we exit the context manager
2256
2253
2257 nodemaputil.setup_persistent_nodemap(tr, self)
2254 nodemaputil.setup_persistent_nodemap(tr, self)
2258 self._segmentfile = randomaccessfile.randomaccessfile(
2255 self._segmentfile = randomaccessfile.randomaccessfile(
2259 self.opener,
2256 self.opener,
2260 self._datafile,
2257 self._datafile,
2261 self._chunkcachesize,
2258 self._chunkcachesize,
2262 )
2259 )
2263
2260
2264 if existing_handles:
2261 if existing_handles:
2265 # switched from inline to conventional reopen the index
2262 # switched from inline to conventional reopen the index
2266 ifh = self.__index_write_fp()
2263 ifh = self.__index_write_fp()
2267 self._writinghandles = (ifh, new_dfh, None)
2264 self._writinghandles = (ifh, new_dfh, None)
2268 self._segmentfile.writing_handle = new_dfh
2265 self._segmentfile.writing_handle = new_dfh
2269 new_dfh = None
2266 new_dfh = None
2270 # No need to deal with sidedata writing handle as it is only
2267 # No need to deal with sidedata writing handle as it is only
2271 # relevant with revlog-v2 which is never inline, not reaching
2268 # relevant with revlog-v2 which is never inline, not reaching
2272 # this code
2269 # this code
2273 finally:
2270 finally:
2274 if new_dfh is not None:
2271 if new_dfh is not None:
2275 new_dfh.close()
2272 new_dfh.close()
2276
2273
2277 def _nodeduplicatecallback(self, transaction, node):
2274 def _nodeduplicatecallback(self, transaction, node):
2278 """called when trying to add a node already stored."""
2275 """called when trying to add a node already stored."""
2279
2276
2280 @contextlib.contextmanager
2277 @contextlib.contextmanager
2281 def reading(self):
2278 def reading(self):
2282 """Context manager that keeps data and sidedata files open for reading"""
2279 """Context manager that keeps data and sidedata files open for reading"""
2283 if len(self.index) == 0:
2280 if len(self.index) == 0:
2284 yield # nothing to be read
2281 yield # nothing to be read
2285 else:
2282 else:
2286 with self._segmentfile.reading():
2283 with self._segmentfile.reading():
2287 with self._segmentfile_sidedata.reading():
2284 with self._segmentfile_sidedata.reading():
2288 yield
2285 yield
2289
2286
2290 @contextlib.contextmanager
2287 @contextlib.contextmanager
2291 def _writing(self, transaction):
2288 def _writing(self, transaction):
2292 if self._trypending:
2289 if self._trypending:
2293 msg = b'try to write in a `trypending` revlog: %s'
2290 msg = b'try to write in a `trypending` revlog: %s'
2294 msg %= self.display_id
2291 msg %= self.display_id
2295 raise error.ProgrammingError(msg)
2292 raise error.ProgrammingError(msg)
2296 if self._writinghandles is not None:
2293 if self._writinghandles is not None:
2297 yield
2294 yield
2298 else:
2295 else:
2299 ifh = dfh = sdfh = None
2296 ifh = dfh = sdfh = None
2300 try:
2297 try:
2301 r = len(self)
2298 r = len(self)
2302 # opening the data file.
2299 # opening the data file.
2303 dsize = 0
2300 dsize = 0
2304 if r:
2301 if r:
2305 dsize = self.end(r - 1)
2302 dsize = self.end(r - 1)
2306 dfh = None
2303 dfh = None
2307 if not self._inline:
2304 if not self._inline:
2308 try:
2305 try:
2309 dfh = self._datafp(b"r+")
2306 dfh = self._datafp(b"r+")
2310 if self._docket is None:
2307 if self._docket is None:
2311 dfh.seek(0, os.SEEK_END)
2308 dfh.seek(0, os.SEEK_END)
2312 else:
2309 else:
2313 dfh.seek(self._docket.data_end, os.SEEK_SET)
2310 dfh.seek(self._docket.data_end, os.SEEK_SET)
2314 except FileNotFoundError:
2311 except FileNotFoundError:
2315 dfh = self._datafp(b"w+")
2312 dfh = self._datafp(b"w+")
2316 transaction.add(self._datafile, dsize)
2313 transaction.add(self._datafile, dsize)
2317 if self._sidedatafile is not None:
2314 if self._sidedatafile is not None:
2318 # revlog-v2 does not inline, help Pytype
2315 # revlog-v2 does not inline, help Pytype
2319 assert dfh is not None
2316 assert dfh is not None
2320 try:
2317 try:
2321 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2318 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2322 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2319 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2323 except FileNotFoundError:
2320 except FileNotFoundError:
2324 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2321 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2325 transaction.add(
2322 transaction.add(
2326 self._sidedatafile, self._docket.sidedata_end
2323 self._sidedatafile, self._docket.sidedata_end
2327 )
2324 )
2328
2325
2329 # opening the index file.
2326 # opening the index file.
2330 isize = r * self.index.entry_size
2327 isize = r * self.index.entry_size
2331 ifh = self.__index_write_fp()
2328 ifh = self.__index_write_fp()
2332 if self._inline:
2329 if self._inline:
2333 transaction.add(self._indexfile, dsize + isize)
2330 transaction.add(self._indexfile, dsize + isize)
2334 else:
2331 else:
2335 transaction.add(self._indexfile, isize)
2332 transaction.add(self._indexfile, isize)
2336 # exposing all file handle for writing.
2333 # exposing all file handle for writing.
2337 self._writinghandles = (ifh, dfh, sdfh)
2334 self._writinghandles = (ifh, dfh, sdfh)
2338 self._segmentfile.writing_handle = ifh if self._inline else dfh
2335 self._segmentfile.writing_handle = ifh if self._inline else dfh
2339 self._segmentfile_sidedata.writing_handle = sdfh
2336 self._segmentfile_sidedata.writing_handle = sdfh
2340 yield
2337 yield
2341 if self._docket is not None:
2338 if self._docket is not None:
2342 self._write_docket(transaction)
2339 self._write_docket(transaction)
2343 finally:
2340 finally:
2344 self._writinghandles = None
2341 self._writinghandles = None
2345 self._segmentfile.writing_handle = None
2342 self._segmentfile.writing_handle = None
2346 self._segmentfile_sidedata.writing_handle = None
2343 self._segmentfile_sidedata.writing_handle = None
2347 if dfh is not None:
2344 if dfh is not None:
2348 dfh.close()
2345 dfh.close()
2349 if sdfh is not None:
2346 if sdfh is not None:
2350 sdfh.close()
2347 sdfh.close()
2351 # closing the index file last to avoid exposing referent to
2348 # closing the index file last to avoid exposing referent to
2352 # potential unflushed data content.
2349 # potential unflushed data content.
2353 if ifh is not None:
2350 if ifh is not None:
2354 ifh.close()
2351 ifh.close()
2355
2352
2356 def _write_docket(self, transaction):
2353 def _write_docket(self, transaction):
2357 """write the current docket on disk
2354 """write the current docket on disk
2358
2355
2359 Exist as a method to help changelog to implement transaction logic
2356 Exist as a method to help changelog to implement transaction logic
2360
2357
2361 We could also imagine using the same transaction logic for all revlog
2358 We could also imagine using the same transaction logic for all revlog
2362 since docket are cheap."""
2359 since docket are cheap."""
2363 self._docket.write(transaction)
2360 self._docket.write(transaction)
2364
2361
2365 def addrevision(
2362 def addrevision(
2366 self,
2363 self,
2367 text,
2364 text,
2368 transaction,
2365 transaction,
2369 link,
2366 link,
2370 p1,
2367 p1,
2371 p2,
2368 p2,
2372 cachedelta=None,
2369 cachedelta=None,
2373 node=None,
2370 node=None,
2374 flags=REVIDX_DEFAULT_FLAGS,
2371 flags=REVIDX_DEFAULT_FLAGS,
2375 deltacomputer=None,
2372 deltacomputer=None,
2376 sidedata=None,
2373 sidedata=None,
2377 ):
2374 ):
2378 """add a revision to the log
2375 """add a revision to the log
2379
2376
2380 text - the revision data to add
2377 text - the revision data to add
2381 transaction - the transaction object used for rollback
2378 transaction - the transaction object used for rollback
2382 link - the linkrev data to add
2379 link - the linkrev data to add
2383 p1, p2 - the parent nodeids of the revision
2380 p1, p2 - the parent nodeids of the revision
2384 cachedelta - an optional precomputed delta
2381 cachedelta - an optional precomputed delta
2385 node - nodeid of revision; typically node is not specified, and it is
2382 node - nodeid of revision; typically node is not specified, and it is
2386 computed by default as hash(text, p1, p2), however subclasses might
2383 computed by default as hash(text, p1, p2), however subclasses might
2387 use different hashing method (and override checkhash() in such case)
2384 use different hashing method (and override checkhash() in such case)
2388 flags - the known flags to set on the revision
2385 flags - the known flags to set on the revision
2389 deltacomputer - an optional deltacomputer instance shared between
2386 deltacomputer - an optional deltacomputer instance shared between
2390 multiple calls
2387 multiple calls
2391 """
2388 """
2392 if link == nullrev:
2389 if link == nullrev:
2393 raise error.RevlogError(
2390 raise error.RevlogError(
2394 _(b"attempted to add linkrev -1 to %s") % self.display_id
2391 _(b"attempted to add linkrev -1 to %s") % self.display_id
2395 )
2392 )
2396
2393
2397 if sidedata is None:
2394 if sidedata is None:
2398 sidedata = {}
2395 sidedata = {}
2399 elif sidedata and not self.hassidedata:
2396 elif sidedata and not self.hassidedata:
2400 raise error.ProgrammingError(
2397 raise error.ProgrammingError(
2401 _(b"trying to add sidedata to a revlog who don't support them")
2398 _(b"trying to add sidedata to a revlog who don't support them")
2402 )
2399 )
2403
2400
2404 if flags:
2401 if flags:
2405 node = node or self.hash(text, p1, p2)
2402 node = node or self.hash(text, p1, p2)
2406
2403
2407 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2404 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2408
2405
2409 # If the flag processor modifies the revision data, ignore any provided
2406 # If the flag processor modifies the revision data, ignore any provided
2410 # cachedelta.
2407 # cachedelta.
2411 if rawtext != text:
2408 if rawtext != text:
2412 cachedelta = None
2409 cachedelta = None
2413
2410
2414 if len(rawtext) > _maxentrysize:
2411 if len(rawtext) > _maxentrysize:
2415 raise error.RevlogError(
2412 raise error.RevlogError(
2416 _(
2413 _(
2417 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2414 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2418 )
2415 )
2419 % (self.display_id, len(rawtext))
2416 % (self.display_id, len(rawtext))
2420 )
2417 )
2421
2418
2422 node = node or self.hash(rawtext, p1, p2)
2419 node = node or self.hash(rawtext, p1, p2)
2423 rev = self.index.get_rev(node)
2420 rev = self.index.get_rev(node)
2424 if rev is not None:
2421 if rev is not None:
2425 return rev
2422 return rev
2426
2423
2427 if validatehash:
2424 if validatehash:
2428 self.checkhash(rawtext, node, p1=p1, p2=p2)
2425 self.checkhash(rawtext, node, p1=p1, p2=p2)
2429
2426
2430 return self.addrawrevision(
2427 return self.addrawrevision(
2431 rawtext,
2428 rawtext,
2432 transaction,
2429 transaction,
2433 link,
2430 link,
2434 p1,
2431 p1,
2435 p2,
2432 p2,
2436 node,
2433 node,
2437 flags,
2434 flags,
2438 cachedelta=cachedelta,
2435 cachedelta=cachedelta,
2439 deltacomputer=deltacomputer,
2436 deltacomputer=deltacomputer,
2440 sidedata=sidedata,
2437 sidedata=sidedata,
2441 )
2438 )
2442
2439
2443 def addrawrevision(
2440 def addrawrevision(
2444 self,
2441 self,
2445 rawtext,
2442 rawtext,
2446 transaction,
2443 transaction,
2447 link,
2444 link,
2448 p1,
2445 p1,
2449 p2,
2446 p2,
2450 node,
2447 node,
2451 flags,
2448 flags,
2452 cachedelta=None,
2449 cachedelta=None,
2453 deltacomputer=None,
2450 deltacomputer=None,
2454 sidedata=None,
2451 sidedata=None,
2455 ):
2452 ):
2456 """add a raw revision with known flags, node and parents
2453 """add a raw revision with known flags, node and parents
2457 useful when reusing a revision not stored in this revlog (ex: received
2454 useful when reusing a revision not stored in this revlog (ex: received
2458 over wire, or read from an external bundle).
2455 over wire, or read from an external bundle).
2459 """
2456 """
2460 with self._writing(transaction):
2457 with self._writing(transaction):
2461 return self._addrevision(
2458 return self._addrevision(
2462 node,
2459 node,
2463 rawtext,
2460 rawtext,
2464 transaction,
2461 transaction,
2465 link,
2462 link,
2466 p1,
2463 p1,
2467 p2,
2464 p2,
2468 flags,
2465 flags,
2469 cachedelta,
2466 cachedelta,
2470 deltacomputer=deltacomputer,
2467 deltacomputer=deltacomputer,
2471 sidedata=sidedata,
2468 sidedata=sidedata,
2472 )
2469 )
2473
2470
2474 def compress(self, data):
2471 def compress(self, data):
2475 """Generate a possibly-compressed representation of data."""
2472 """Generate a possibly-compressed representation of data."""
2476 if not data:
2473 if not data:
2477 return b'', data
2474 return b'', data
2478
2475
2479 compressed = self._compressor.compress(data)
2476 compressed = self._compressor.compress(data)
2480
2477
2481 if compressed:
2478 if compressed:
2482 # The revlog compressor added the header in the returned data.
2479 # The revlog compressor added the header in the returned data.
2483 return b'', compressed
2480 return b'', compressed
2484
2481
2485 if data[0:1] == b'\0':
2482 if data[0:1] == b'\0':
2486 return b'', data
2483 return b'', data
2487 return b'u', data
2484 return b'u', data
2488
2485
2489 def decompress(self, data):
2486 def decompress(self, data):
2490 """Decompress a revlog chunk.
2487 """Decompress a revlog chunk.
2491
2488
2492 The chunk is expected to begin with a header identifying the
2489 The chunk is expected to begin with a header identifying the
2493 format type so it can be routed to an appropriate decompressor.
2490 format type so it can be routed to an appropriate decompressor.
2494 """
2491 """
2495 if not data:
2492 if not data:
2496 return data
2493 return data
2497
2494
2498 # Revlogs are read much more frequently than they are written and many
2495 # Revlogs are read much more frequently than they are written and many
2499 # chunks only take microseconds to decompress, so performance is
2496 # chunks only take microseconds to decompress, so performance is
2500 # important here.
2497 # important here.
2501 #
2498 #
2502 # We can make a few assumptions about revlogs:
2499 # We can make a few assumptions about revlogs:
2503 #
2500 #
2504 # 1) the majority of chunks will be compressed (as opposed to inline
2501 # 1) the majority of chunks will be compressed (as opposed to inline
2505 # raw data).
2502 # raw data).
2506 # 2) decompressing *any* data will likely by at least 10x slower than
2503 # 2) decompressing *any* data will likely by at least 10x slower than
2507 # returning raw inline data.
2504 # returning raw inline data.
2508 # 3) we want to prioritize common and officially supported compression
2505 # 3) we want to prioritize common and officially supported compression
2509 # engines
2506 # engines
2510 #
2507 #
2511 # It follows that we want to optimize for "decompress compressed data
2508 # It follows that we want to optimize for "decompress compressed data
2512 # when encoded with common and officially supported compression engines"
2509 # when encoded with common and officially supported compression engines"
2513 # case over "raw data" and "data encoded by less common or non-official
2510 # case over "raw data" and "data encoded by less common or non-official
2514 # compression engines." That is why we have the inline lookup first
2511 # compression engines." That is why we have the inline lookup first
2515 # followed by the compengines lookup.
2512 # followed by the compengines lookup.
2516 #
2513 #
2517 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2514 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2518 # compressed chunks. And this matters for changelog and manifest reads.
2515 # compressed chunks. And this matters for changelog and manifest reads.
2519 t = data[0:1]
2516 t = data[0:1]
2520
2517
2521 if t == b'x':
2518 if t == b'x':
2522 try:
2519 try:
2523 return _zlibdecompress(data)
2520 return _zlibdecompress(data)
2524 except zlib.error as e:
2521 except zlib.error as e:
2525 raise error.RevlogError(
2522 raise error.RevlogError(
2526 _(b'revlog decompress error: %s')
2523 _(b'revlog decompress error: %s')
2527 % stringutil.forcebytestr(e)
2524 % stringutil.forcebytestr(e)
2528 )
2525 )
2529 # '\0' is more common than 'u' so it goes first.
2526 # '\0' is more common than 'u' so it goes first.
2530 elif t == b'\0':
2527 elif t == b'\0':
2531 return data
2528 return data
2532 elif t == b'u':
2529 elif t == b'u':
2533 return util.buffer(data, 1)
2530 return util.buffer(data, 1)
2534
2531
2535 compressor = self._get_decompressor(t)
2532 compressor = self._get_decompressor(t)
2536
2533
2537 return compressor.decompress(data)
2534 return compressor.decompress(data)
2538
2535
2539 def _addrevision(
2536 def _addrevision(
2540 self,
2537 self,
2541 node,
2538 node,
2542 rawtext,
2539 rawtext,
2543 transaction,
2540 transaction,
2544 link,
2541 link,
2545 p1,
2542 p1,
2546 p2,
2543 p2,
2547 flags,
2544 flags,
2548 cachedelta,
2545 cachedelta,
2549 alwayscache=False,
2546 alwayscache=False,
2550 deltacomputer=None,
2547 deltacomputer=None,
2551 sidedata=None,
2548 sidedata=None,
2552 ):
2549 ):
2553 """internal function to add revisions to the log
2550 """internal function to add revisions to the log
2554
2551
2555 see addrevision for argument descriptions.
2552 see addrevision for argument descriptions.
2556
2553
2557 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2554 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2558
2555
2559 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2556 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2560 be used.
2557 be used.
2561
2558
2562 invariants:
2559 invariants:
2563 - rawtext is optional (can be None); if not set, cachedelta must be set.
2560 - rawtext is optional (can be None); if not set, cachedelta must be set.
2564 if both are set, they must correspond to each other.
2561 if both are set, they must correspond to each other.
2565 """
2562 """
2566 if node == self.nullid:
2563 if node == self.nullid:
2567 raise error.RevlogError(
2564 raise error.RevlogError(
2568 _(b"%s: attempt to add null revision") % self.display_id
2565 _(b"%s: attempt to add null revision") % self.display_id
2569 )
2566 )
2570 if (
2567 if (
2571 node == self.nodeconstants.wdirid
2568 node == self.nodeconstants.wdirid
2572 or node in self.nodeconstants.wdirfilenodeids
2569 or node in self.nodeconstants.wdirfilenodeids
2573 ):
2570 ):
2574 raise error.RevlogError(
2571 raise error.RevlogError(
2575 _(b"%s: attempt to add wdir revision") % self.display_id
2572 _(b"%s: attempt to add wdir revision") % self.display_id
2576 )
2573 )
2577 if self._writinghandles is None:
2574 if self._writinghandles is None:
2578 msg = b'adding revision outside `revlog._writing` context'
2575 msg = b'adding revision outside `revlog._writing` context'
2579 raise error.ProgrammingError(msg)
2576 raise error.ProgrammingError(msg)
2580
2577
2581 btext = [rawtext]
2578 btext = [rawtext]
2582
2579
2583 curr = len(self)
2580 curr = len(self)
2584 prev = curr - 1
2581 prev = curr - 1
2585
2582
2586 offset = self._get_data_offset(prev)
2583 offset = self._get_data_offset(prev)
2587
2584
2588 if self._concurrencychecker:
2585 if self._concurrencychecker:
2589 ifh, dfh, sdfh = self._writinghandles
2586 ifh, dfh, sdfh = self._writinghandles
2590 # XXX no checking for the sidedata file
2587 # XXX no checking for the sidedata file
2591 if self._inline:
2588 if self._inline:
2592 # offset is "as if" it were in the .d file, so we need to add on
2589 # offset is "as if" it were in the .d file, so we need to add on
2593 # the size of the entry metadata.
2590 # the size of the entry metadata.
2594 self._concurrencychecker(
2591 self._concurrencychecker(
2595 ifh, self._indexfile, offset + curr * self.index.entry_size
2592 ifh, self._indexfile, offset + curr * self.index.entry_size
2596 )
2593 )
2597 else:
2594 else:
2598 # Entries in the .i are a consistent size.
2595 # Entries in the .i are a consistent size.
2599 self._concurrencychecker(
2596 self._concurrencychecker(
2600 ifh, self._indexfile, curr * self.index.entry_size
2597 ifh, self._indexfile, curr * self.index.entry_size
2601 )
2598 )
2602 self._concurrencychecker(dfh, self._datafile, offset)
2599 self._concurrencychecker(dfh, self._datafile, offset)
2603
2600
2604 p1r, p2r = self.rev(p1), self.rev(p2)
2601 p1r, p2r = self.rev(p1), self.rev(p2)
2605
2602
2606 # full versions are inserted when the needed deltas
2603 # full versions are inserted when the needed deltas
2607 # become comparable to the uncompressed text
2604 # become comparable to the uncompressed text
2608 if rawtext is None:
2605 if rawtext is None:
2609 # need rawtext size, before changed by flag processors, which is
2606 # need rawtext size, before changed by flag processors, which is
2610 # the non-raw size. use revlog explicitly to avoid filelog's extra
2607 # the non-raw size. use revlog explicitly to avoid filelog's extra
2611 # logic that might remove metadata size.
2608 # logic that might remove metadata size.
2612 textlen = mdiff.patchedsize(
2609 textlen = mdiff.patchedsize(
2613 revlog.size(self, cachedelta[0]), cachedelta[1]
2610 revlog.size(self, cachedelta[0]), cachedelta[1]
2614 )
2611 )
2615 else:
2612 else:
2616 textlen = len(rawtext)
2613 textlen = len(rawtext)
2617
2614
2618 if deltacomputer is None:
2615 if deltacomputer is None:
2619 write_debug = None
2616 write_debug = None
2620 if self._debug_delta:
2617 if self._debug_delta:
2621 write_debug = transaction._report
2618 write_debug = transaction._report
2622 deltacomputer = deltautil.deltacomputer(
2619 deltacomputer = deltautil.deltacomputer(
2623 self, write_debug=write_debug
2620 self, write_debug=write_debug
2624 )
2621 )
2625
2622
2626 if cachedelta is not None and len(cachedelta) == 2:
2623 if cachedelta is not None and len(cachedelta) == 2:
2627 # If the cached delta has no information about how it should be
2624 # If the cached delta has no information about how it should be
2628 # reused, add the default reuse instruction according to the
2625 # reused, add the default reuse instruction according to the
2629 # revlog's configuration.
2626 # revlog's configuration.
2630 if self._generaldelta and self._lazydeltabase:
2627 if self._generaldelta and self._lazydeltabase:
2631 delta_base_reuse = DELTA_BASE_REUSE_TRY
2628 delta_base_reuse = DELTA_BASE_REUSE_TRY
2632 else:
2629 else:
2633 delta_base_reuse = DELTA_BASE_REUSE_NO
2630 delta_base_reuse = DELTA_BASE_REUSE_NO
2634 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2631 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2635
2632
2636 revinfo = revlogutils.revisioninfo(
2633 revinfo = revlogutils.revisioninfo(
2637 node,
2634 node,
2638 p1,
2635 p1,
2639 p2,
2636 p2,
2640 btext,
2637 btext,
2641 textlen,
2638 textlen,
2642 cachedelta,
2639 cachedelta,
2643 flags,
2640 flags,
2644 )
2641 )
2645
2642
2646 deltainfo = deltacomputer.finddeltainfo(revinfo)
2643 deltainfo = deltacomputer.finddeltainfo(revinfo)
2647
2644
2648 compression_mode = COMP_MODE_INLINE
2645 compression_mode = COMP_MODE_INLINE
2649 if self._docket is not None:
2646 if self._docket is not None:
2650 default_comp = self._docket.default_compression_header
2647 default_comp = self._docket.default_compression_header
2651 r = deltautil.delta_compression(default_comp, deltainfo)
2648 r = deltautil.delta_compression(default_comp, deltainfo)
2652 compression_mode, deltainfo = r
2649 compression_mode, deltainfo = r
2653
2650
2654 sidedata_compression_mode = COMP_MODE_INLINE
2651 sidedata_compression_mode = COMP_MODE_INLINE
2655 if sidedata and self.hassidedata:
2652 if sidedata and self.hassidedata:
2656 sidedata_compression_mode = COMP_MODE_PLAIN
2653 sidedata_compression_mode = COMP_MODE_PLAIN
2657 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2654 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2658 sidedata_offset = self._docket.sidedata_end
2655 sidedata_offset = self._docket.sidedata_end
2659 h, comp_sidedata = self.compress(serialized_sidedata)
2656 h, comp_sidedata = self.compress(serialized_sidedata)
2660 if (
2657 if (
2661 h != b'u'
2658 h != b'u'
2662 and comp_sidedata[0:1] != b'\0'
2659 and comp_sidedata[0:1] != b'\0'
2663 and len(comp_sidedata) < len(serialized_sidedata)
2660 and len(comp_sidedata) < len(serialized_sidedata)
2664 ):
2661 ):
2665 assert not h
2662 assert not h
2666 if (
2663 if (
2667 comp_sidedata[0:1]
2664 comp_sidedata[0:1]
2668 == self._docket.default_compression_header
2665 == self._docket.default_compression_header
2669 ):
2666 ):
2670 sidedata_compression_mode = COMP_MODE_DEFAULT
2667 sidedata_compression_mode = COMP_MODE_DEFAULT
2671 serialized_sidedata = comp_sidedata
2668 serialized_sidedata = comp_sidedata
2672 else:
2669 else:
2673 sidedata_compression_mode = COMP_MODE_INLINE
2670 sidedata_compression_mode = COMP_MODE_INLINE
2674 serialized_sidedata = comp_sidedata
2671 serialized_sidedata = comp_sidedata
2675 else:
2672 else:
2676 serialized_sidedata = b""
2673 serialized_sidedata = b""
2677 # Don't store the offset if the sidedata is empty, that way
2674 # Don't store the offset if the sidedata is empty, that way
2678 # we can easily detect empty sidedata and they will be no different
2675 # we can easily detect empty sidedata and they will be no different
2679 # than ones we manually add.
2676 # than ones we manually add.
2680 sidedata_offset = 0
2677 sidedata_offset = 0
2681
2678
2682 rank = RANK_UNKNOWN
2679 rank = RANK_UNKNOWN
2683 if self._compute_rank:
2680 if self._compute_rank:
2684 if (p1r, p2r) == (nullrev, nullrev):
2681 if (p1r, p2r) == (nullrev, nullrev):
2685 rank = 1
2682 rank = 1
2686 elif p1r != nullrev and p2r == nullrev:
2683 elif p1r != nullrev and p2r == nullrev:
2687 rank = 1 + self.fast_rank(p1r)
2684 rank = 1 + self.fast_rank(p1r)
2688 elif p1r == nullrev and p2r != nullrev:
2685 elif p1r == nullrev and p2r != nullrev:
2689 rank = 1 + self.fast_rank(p2r)
2686 rank = 1 + self.fast_rank(p2r)
2690 else: # merge node
2687 else: # merge node
2691 if rustdagop is not None and self.index.rust_ext_compat:
2688 if rustdagop is not None and self.index.rust_ext_compat:
2692 rank = rustdagop.rank(self.index, p1r, p2r)
2689 rank = rustdagop.rank(self.index, p1r, p2r)
2693 else:
2690 else:
2694 pmin, pmax = sorted((p1r, p2r))
2691 pmin, pmax = sorted((p1r, p2r))
2695 rank = 1 + self.fast_rank(pmax)
2692 rank = 1 + self.fast_rank(pmax)
2696 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2693 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2697
2694
2698 e = revlogutils.entry(
2695 e = revlogutils.entry(
2699 flags=flags,
2696 flags=flags,
2700 data_offset=offset,
2697 data_offset=offset,
2701 data_compressed_length=deltainfo.deltalen,
2698 data_compressed_length=deltainfo.deltalen,
2702 data_uncompressed_length=textlen,
2699 data_uncompressed_length=textlen,
2703 data_compression_mode=compression_mode,
2700 data_compression_mode=compression_mode,
2704 data_delta_base=deltainfo.base,
2701 data_delta_base=deltainfo.base,
2705 link_rev=link,
2702 link_rev=link,
2706 parent_rev_1=p1r,
2703 parent_rev_1=p1r,
2707 parent_rev_2=p2r,
2704 parent_rev_2=p2r,
2708 node_id=node,
2705 node_id=node,
2709 sidedata_offset=sidedata_offset,
2706 sidedata_offset=sidedata_offset,
2710 sidedata_compressed_length=len(serialized_sidedata),
2707 sidedata_compressed_length=len(serialized_sidedata),
2711 sidedata_compression_mode=sidedata_compression_mode,
2708 sidedata_compression_mode=sidedata_compression_mode,
2712 rank=rank,
2709 rank=rank,
2713 )
2710 )
2714
2711
2715 self.index.append(e)
2712 self.index.append(e)
2716 entry = self.index.entry_binary(curr)
2713 entry = self.index.entry_binary(curr)
2717 if curr == 0 and self._docket is None:
2714 if curr == 0 and self._docket is None:
2718 header = self._format_flags | self._format_version
2715 header = self._format_flags | self._format_version
2719 header = self.index.pack_header(header)
2716 header = self.index.pack_header(header)
2720 entry = header + entry
2717 entry = header + entry
2721 self._writeentry(
2718 self._writeentry(
2722 transaction,
2719 transaction,
2723 entry,
2720 entry,
2724 deltainfo.data,
2721 deltainfo.data,
2725 link,
2722 link,
2726 offset,
2723 offset,
2727 serialized_sidedata,
2724 serialized_sidedata,
2728 sidedata_offset,
2725 sidedata_offset,
2729 )
2726 )
2730
2727
2731 rawtext = btext[0]
2728 rawtext = btext[0]
2732
2729
2733 if alwayscache and rawtext is None:
2730 if alwayscache and rawtext is None:
2734 rawtext = deltacomputer.buildtext(revinfo)
2731 rawtext = deltacomputer.buildtext(revinfo)
2735
2732
2736 if type(rawtext) == bytes: # only accept immutable objects
2733 if type(rawtext) == bytes: # only accept immutable objects
2737 self._revisioncache = (node, curr, rawtext)
2734 self._revisioncache = (node, curr, rawtext)
2738 self._chainbasecache[curr] = deltainfo.chainbase
2735 self._chainbasecache[curr] = deltainfo.chainbase
2739 return curr
2736 return curr
2740
2737
2741 def _get_data_offset(self, prev):
2738 def _get_data_offset(self, prev):
2742 """Returns the current offset in the (in-transaction) data file.
2739 """Returns the current offset in the (in-transaction) data file.
2743 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2740 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2744 file to store that information: since sidedata can be rewritten to the
2741 file to store that information: since sidedata can be rewritten to the
2745 end of the data file within a transaction, you can have cases where, for
2742 end of the data file within a transaction, you can have cases where, for
2746 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2743 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2747 to `n - 1`'s sidedata being written after `n`'s data.
2744 to `n - 1`'s sidedata being written after `n`'s data.
2748
2745
2749 TODO cache this in a docket file before getting out of experimental."""
2746 TODO cache this in a docket file before getting out of experimental."""
2750 if self._docket is None:
2747 if self._docket is None:
2751 return self.end(prev)
2748 return self.end(prev)
2752 else:
2749 else:
2753 return self._docket.data_end
2750 return self._docket.data_end
2754
2751
2755 def _writeentry(
2752 def _writeentry(
2756 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2753 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2757 ):
2754 ):
2758 # Files opened in a+ mode have inconsistent behavior on various
2755 # Files opened in a+ mode have inconsistent behavior on various
2759 # platforms. Windows requires that a file positioning call be made
2756 # platforms. Windows requires that a file positioning call be made
2760 # when the file handle transitions between reads and writes. See
2757 # when the file handle transitions between reads and writes. See
2761 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2758 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2762 # platforms, Python or the platform itself can be buggy. Some versions
2759 # platforms, Python or the platform itself can be buggy. Some versions
2763 # of Solaris have been observed to not append at the end of the file
2760 # of Solaris have been observed to not append at the end of the file
2764 # if the file was seeked to before the end. See issue4943 for more.
2761 # if the file was seeked to before the end. See issue4943 for more.
2765 #
2762 #
2766 # We work around this issue by inserting a seek() before writing.
2763 # We work around this issue by inserting a seek() before writing.
2767 # Note: This is likely not necessary on Python 3. However, because
2764 # Note: This is likely not necessary on Python 3. However, because
2768 # the file handle is reused for reads and may be seeked there, we need
2765 # the file handle is reused for reads and may be seeked there, we need
2769 # to be careful before changing this.
2766 # to be careful before changing this.
2770 if self._writinghandles is None:
2767 if self._writinghandles is None:
2771 msg = b'adding revision outside `revlog._writing` context'
2768 msg = b'adding revision outside `revlog._writing` context'
2772 raise error.ProgrammingError(msg)
2769 raise error.ProgrammingError(msg)
2773 ifh, dfh, sdfh = self._writinghandles
2770 ifh, dfh, sdfh = self._writinghandles
2774 if self._docket is None:
2771 if self._docket is None:
2775 ifh.seek(0, os.SEEK_END)
2772 ifh.seek(0, os.SEEK_END)
2776 else:
2773 else:
2777 ifh.seek(self._docket.index_end, os.SEEK_SET)
2774 ifh.seek(self._docket.index_end, os.SEEK_SET)
2778 if dfh:
2775 if dfh:
2779 if self._docket is None:
2776 if self._docket is None:
2780 dfh.seek(0, os.SEEK_END)
2777 dfh.seek(0, os.SEEK_END)
2781 else:
2778 else:
2782 dfh.seek(self._docket.data_end, os.SEEK_SET)
2779 dfh.seek(self._docket.data_end, os.SEEK_SET)
2783 if sdfh:
2780 if sdfh:
2784 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2781 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2785
2782
2786 curr = len(self) - 1
2783 curr = len(self) - 1
2787 if not self._inline:
2784 if not self._inline:
2788 transaction.add(self._datafile, offset)
2785 transaction.add(self._datafile, offset)
2789 if self._sidedatafile:
2786 if self._sidedatafile:
2790 transaction.add(self._sidedatafile, sidedata_offset)
2787 transaction.add(self._sidedatafile, sidedata_offset)
2791 transaction.add(self._indexfile, curr * len(entry))
2788 transaction.add(self._indexfile, curr * len(entry))
2792 if data[0]:
2789 if data[0]:
2793 dfh.write(data[0])
2790 dfh.write(data[0])
2794 dfh.write(data[1])
2791 dfh.write(data[1])
2795 if sidedata:
2792 if sidedata:
2796 sdfh.write(sidedata)
2793 sdfh.write(sidedata)
2797 ifh.write(entry)
2794 ifh.write(entry)
2798 else:
2795 else:
2799 offset += curr * self.index.entry_size
2796 offset += curr * self.index.entry_size
2800 transaction.add(self._indexfile, offset)
2797 transaction.add(self._indexfile, offset)
2801 ifh.write(entry)
2798 ifh.write(entry)
2802 ifh.write(data[0])
2799 ifh.write(data[0])
2803 ifh.write(data[1])
2800 ifh.write(data[1])
2804 assert not sidedata
2801 assert not sidedata
2805 self._enforceinlinesize(transaction)
2802 self._enforceinlinesize(transaction)
2806 if self._docket is not None:
2803 if self._docket is not None:
2807 # revlog-v2 always has 3 writing handles, help Pytype
2804 # revlog-v2 always has 3 writing handles, help Pytype
2808 wh1 = self._writinghandles[0]
2805 wh1 = self._writinghandles[0]
2809 wh2 = self._writinghandles[1]
2806 wh2 = self._writinghandles[1]
2810 wh3 = self._writinghandles[2]
2807 wh3 = self._writinghandles[2]
2811 assert wh1 is not None
2808 assert wh1 is not None
2812 assert wh2 is not None
2809 assert wh2 is not None
2813 assert wh3 is not None
2810 assert wh3 is not None
2814 self._docket.index_end = wh1.tell()
2811 self._docket.index_end = wh1.tell()
2815 self._docket.data_end = wh2.tell()
2812 self._docket.data_end = wh2.tell()
2816 self._docket.sidedata_end = wh3.tell()
2813 self._docket.sidedata_end = wh3.tell()
2817
2814
2818 nodemaputil.setup_persistent_nodemap(transaction, self)
2815 nodemaputil.setup_persistent_nodemap(transaction, self)
2819
2816
2820 def addgroup(
2817 def addgroup(
2821 self,
2818 self,
2822 deltas,
2819 deltas,
2823 linkmapper,
2820 linkmapper,
2824 transaction,
2821 transaction,
2825 alwayscache=False,
2822 alwayscache=False,
2826 addrevisioncb=None,
2823 addrevisioncb=None,
2827 duplicaterevisioncb=None,
2824 duplicaterevisioncb=None,
2828 debug_info=None,
2825 debug_info=None,
2829 delta_base_reuse_policy=None,
2826 delta_base_reuse_policy=None,
2830 ):
2827 ):
2831 """
2828 """
2832 add a delta group
2829 add a delta group
2833
2830
2834 given a set of deltas, add them to the revision log. the
2831 given a set of deltas, add them to the revision log. the
2835 first delta is against its parent, which should be in our
2832 first delta is against its parent, which should be in our
2836 log, the rest are against the previous delta.
2833 log, the rest are against the previous delta.
2837
2834
2838 If ``addrevisioncb`` is defined, it will be called with arguments of
2835 If ``addrevisioncb`` is defined, it will be called with arguments of
2839 this revlog and the node that was added.
2836 this revlog and the node that was added.
2840 """
2837 """
2841
2838
2842 if self._adding_group:
2839 if self._adding_group:
2843 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2840 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2844
2841
2845 # read the default delta-base reuse policy from revlog config if the
2842 # read the default delta-base reuse policy from revlog config if the
2846 # group did not specify one.
2843 # group did not specify one.
2847 if delta_base_reuse_policy is None:
2844 if delta_base_reuse_policy is None:
2848 if self._generaldelta and self._lazydeltabase:
2845 if self._generaldelta and self._lazydeltabase:
2849 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2846 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2850 else:
2847 else:
2851 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2848 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2852
2849
2853 self._adding_group = True
2850 self._adding_group = True
2854 empty = True
2851 empty = True
2855 try:
2852 try:
2856 with self._writing(transaction):
2853 with self._writing(transaction):
2857 write_debug = None
2854 write_debug = None
2858 if self._debug_delta:
2855 if self._debug_delta:
2859 write_debug = transaction._report
2856 write_debug = transaction._report
2860 deltacomputer = deltautil.deltacomputer(
2857 deltacomputer = deltautil.deltacomputer(
2861 self,
2858 self,
2862 write_debug=write_debug,
2859 write_debug=write_debug,
2863 debug_info=debug_info,
2860 debug_info=debug_info,
2864 )
2861 )
2865 # loop through our set of deltas
2862 # loop through our set of deltas
2866 for data in deltas:
2863 for data in deltas:
2867 (
2864 (
2868 node,
2865 node,
2869 p1,
2866 p1,
2870 p2,
2867 p2,
2871 linknode,
2868 linknode,
2872 deltabase,
2869 deltabase,
2873 delta,
2870 delta,
2874 flags,
2871 flags,
2875 sidedata,
2872 sidedata,
2876 ) = data
2873 ) = data
2877 link = linkmapper(linknode)
2874 link = linkmapper(linknode)
2878 flags = flags or REVIDX_DEFAULT_FLAGS
2875 flags = flags or REVIDX_DEFAULT_FLAGS
2879
2876
2880 rev = self.index.get_rev(node)
2877 rev = self.index.get_rev(node)
2881 if rev is not None:
2878 if rev is not None:
2882 # this can happen if two branches make the same change
2879 # this can happen if two branches make the same change
2883 self._nodeduplicatecallback(transaction, rev)
2880 self._nodeduplicatecallback(transaction, rev)
2884 if duplicaterevisioncb:
2881 if duplicaterevisioncb:
2885 duplicaterevisioncb(self, rev)
2882 duplicaterevisioncb(self, rev)
2886 empty = False
2883 empty = False
2887 continue
2884 continue
2888
2885
2889 for p in (p1, p2):
2886 for p in (p1, p2):
2890 if not self.index.has_node(p):
2887 if not self.index.has_node(p):
2891 raise error.LookupError(
2888 raise error.LookupError(
2892 p, self.radix, _(b'unknown parent')
2889 p, self.radix, _(b'unknown parent')
2893 )
2890 )
2894
2891
2895 if not self.index.has_node(deltabase):
2892 if not self.index.has_node(deltabase):
2896 raise error.LookupError(
2893 raise error.LookupError(
2897 deltabase, self.display_id, _(b'unknown delta base')
2894 deltabase, self.display_id, _(b'unknown delta base')
2898 )
2895 )
2899
2896
2900 baserev = self.rev(deltabase)
2897 baserev = self.rev(deltabase)
2901
2898
2902 if baserev != nullrev and self.iscensored(baserev):
2899 if baserev != nullrev and self.iscensored(baserev):
2903 # if base is censored, delta must be full replacement in a
2900 # if base is censored, delta must be full replacement in a
2904 # single patch operation
2901 # single patch operation
2905 hlen = struct.calcsize(b">lll")
2902 hlen = struct.calcsize(b">lll")
2906 oldlen = self.rawsize(baserev)
2903 oldlen = self.rawsize(baserev)
2907 newlen = len(delta) - hlen
2904 newlen = len(delta) - hlen
2908 if delta[:hlen] != mdiff.replacediffheader(
2905 if delta[:hlen] != mdiff.replacediffheader(
2909 oldlen, newlen
2906 oldlen, newlen
2910 ):
2907 ):
2911 raise error.CensoredBaseError(
2908 raise error.CensoredBaseError(
2912 self.display_id, self.node(baserev)
2909 self.display_id, self.node(baserev)
2913 )
2910 )
2914
2911
2915 if not flags and self._peek_iscensored(baserev, delta):
2912 if not flags and self._peek_iscensored(baserev, delta):
2916 flags |= REVIDX_ISCENSORED
2913 flags |= REVIDX_ISCENSORED
2917
2914
2918 # We assume consumers of addrevisioncb will want to retrieve
2915 # We assume consumers of addrevisioncb will want to retrieve
2919 # the added revision, which will require a call to
2916 # the added revision, which will require a call to
2920 # revision(). revision() will fast path if there is a cache
2917 # revision(). revision() will fast path if there is a cache
2921 # hit. So, we tell _addrevision() to always cache in this case.
2918 # hit. So, we tell _addrevision() to always cache in this case.
2922 # We're only using addgroup() in the context of changegroup
2919 # We're only using addgroup() in the context of changegroup
2923 # generation so the revision data can always be handled as raw
2920 # generation so the revision data can always be handled as raw
2924 # by the flagprocessor.
2921 # by the flagprocessor.
2925 rev = self._addrevision(
2922 rev = self._addrevision(
2926 node,
2923 node,
2927 None,
2924 None,
2928 transaction,
2925 transaction,
2929 link,
2926 link,
2930 p1,
2927 p1,
2931 p2,
2928 p2,
2932 flags,
2929 flags,
2933 (baserev, delta, delta_base_reuse_policy),
2930 (baserev, delta, delta_base_reuse_policy),
2934 alwayscache=alwayscache,
2931 alwayscache=alwayscache,
2935 deltacomputer=deltacomputer,
2932 deltacomputer=deltacomputer,
2936 sidedata=sidedata,
2933 sidedata=sidedata,
2937 )
2934 )
2938
2935
2939 if addrevisioncb:
2936 if addrevisioncb:
2940 addrevisioncb(self, rev)
2937 addrevisioncb(self, rev)
2941 empty = False
2938 empty = False
2942 finally:
2939 finally:
2943 self._adding_group = False
2940 self._adding_group = False
2944 return not empty
2941 return not empty
2945
2942
2946 def iscensored(self, rev):
2943 def iscensored(self, rev):
2947 """Check if a file revision is censored."""
2944 """Check if a file revision is censored."""
2948 if not self._censorable:
2945 if not self._censorable:
2949 return False
2946 return False
2950
2947
2951 return self.flags(rev) & REVIDX_ISCENSORED
2948 return self.flags(rev) & REVIDX_ISCENSORED
2952
2949
2953 def _peek_iscensored(self, baserev, delta):
2950 def _peek_iscensored(self, baserev, delta):
2954 """Quickly check if a delta produces a censored revision."""
2951 """Quickly check if a delta produces a censored revision."""
2955 if not self._censorable:
2952 if not self._censorable:
2956 return False
2953 return False
2957
2954
2958 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2955 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2959
2956
2960 def getstrippoint(self, minlink):
2957 def getstrippoint(self, minlink):
2961 """find the minimum rev that must be stripped to strip the linkrev
2958 """find the minimum rev that must be stripped to strip the linkrev
2962
2959
2963 Returns a tuple containing the minimum rev and a set of all revs that
2960 Returns a tuple containing the minimum rev and a set of all revs that
2964 have linkrevs that will be broken by this strip.
2961 have linkrevs that will be broken by this strip.
2965 """
2962 """
2966 return storageutil.resolvestripinfo(
2963 return storageutil.resolvestripinfo(
2967 minlink,
2964 minlink,
2968 len(self) - 1,
2965 len(self) - 1,
2969 self.headrevs(),
2966 self.headrevs(),
2970 self.linkrev,
2967 self.linkrev,
2971 self.parentrevs,
2968 self.parentrevs,
2972 )
2969 )
2973
2970
2974 def strip(self, minlink, transaction):
2971 def strip(self, minlink, transaction):
2975 """truncate the revlog on the first revision with a linkrev >= minlink
2972 """truncate the revlog on the first revision with a linkrev >= minlink
2976
2973
2977 This function is called when we're stripping revision minlink and
2974 This function is called when we're stripping revision minlink and
2978 its descendants from the repository.
2975 its descendants from the repository.
2979
2976
2980 We have to remove all revisions with linkrev >= minlink, because
2977 We have to remove all revisions with linkrev >= minlink, because
2981 the equivalent changelog revisions will be renumbered after the
2978 the equivalent changelog revisions will be renumbered after the
2982 strip.
2979 strip.
2983
2980
2984 So we truncate the revlog on the first of these revisions, and
2981 So we truncate the revlog on the first of these revisions, and
2985 trust that the caller has saved the revisions that shouldn't be
2982 trust that the caller has saved the revisions that shouldn't be
2986 removed and that it'll re-add them after this truncation.
2983 removed and that it'll re-add them after this truncation.
2987 """
2984 """
2988 if len(self) == 0:
2985 if len(self) == 0:
2989 return
2986 return
2990
2987
2991 rev, _ = self.getstrippoint(minlink)
2988 rev, _ = self.getstrippoint(minlink)
2992 if rev == len(self):
2989 if rev == len(self):
2993 return
2990 return
2994
2991
2995 # first truncate the files on disk
2992 # first truncate the files on disk
2996 data_end = self.start(rev)
2993 data_end = self.start(rev)
2997 if not self._inline:
2994 if not self._inline:
2998 transaction.add(self._datafile, data_end)
2995 transaction.add(self._datafile, data_end)
2999 end = rev * self.index.entry_size
2996 end = rev * self.index.entry_size
3000 else:
2997 else:
3001 end = data_end + (rev * self.index.entry_size)
2998 end = data_end + (rev * self.index.entry_size)
3002
2999
3003 if self._sidedatafile:
3000 if self._sidedatafile:
3004 sidedata_end = self.sidedata_cut_off(rev)
3001 sidedata_end = self.sidedata_cut_off(rev)
3005 transaction.add(self._sidedatafile, sidedata_end)
3002 transaction.add(self._sidedatafile, sidedata_end)
3006
3003
3007 transaction.add(self._indexfile, end)
3004 transaction.add(self._indexfile, end)
3008 if self._docket is not None:
3005 if self._docket is not None:
3009 # XXX we could, leverage the docket while stripping. However it is
3006 # XXX we could, leverage the docket while stripping. However it is
3010 # not powerfull enough at the time of this comment
3007 # not powerfull enough at the time of this comment
3011 self._docket.index_end = end
3008 self._docket.index_end = end
3012 self._docket.data_end = data_end
3009 self._docket.data_end = data_end
3013 self._docket.sidedata_end = sidedata_end
3010 self._docket.sidedata_end = sidedata_end
3014 self._docket.write(transaction, stripping=True)
3011 self._docket.write(transaction, stripping=True)
3015
3012
3016 # then reset internal state in memory to forget those revisions
3013 # then reset internal state in memory to forget those revisions
3017 self._revisioncache = None
3014 self._revisioncache = None
3018 self._chaininfocache = util.lrucachedict(500)
3015 self._chaininfocache = util.lrucachedict(500)
3019 self._segmentfile.clear_cache()
3016 self._segmentfile.clear_cache()
3020 self._segmentfile_sidedata.clear_cache()
3017 self._segmentfile_sidedata.clear_cache()
3021
3018
3022 del self.index[rev:-1]
3019 del self.index[rev:-1]
3023
3020
3024 def checksize(self):
3021 def checksize(self):
3025 """Check size of index and data files
3022 """Check size of index and data files
3026
3023
3027 return a (dd, di) tuple.
3024 return a (dd, di) tuple.
3028 - dd: extra bytes for the "data" file
3025 - dd: extra bytes for the "data" file
3029 - di: extra bytes for the "index" file
3026 - di: extra bytes for the "index" file
3030
3027
3031 A healthy revlog will return (0, 0).
3028 A healthy revlog will return (0, 0).
3032 """
3029 """
3033 expected = 0
3030 expected = 0
3034 if len(self):
3031 if len(self):
3035 expected = max(0, self.end(len(self) - 1))
3032 expected = max(0, self.end(len(self) - 1))
3036
3033
3037 try:
3034 try:
3038 with self._datafp() as f:
3035 with self._datafp() as f:
3039 f.seek(0, io.SEEK_END)
3036 f.seek(0, io.SEEK_END)
3040 actual = f.tell()
3037 actual = f.tell()
3041 dd = actual - expected
3038 dd = actual - expected
3042 except FileNotFoundError:
3039 except FileNotFoundError:
3043 dd = 0
3040 dd = 0
3044
3041
3045 try:
3042 try:
3046 f = self.opener(self._indexfile)
3043 f = self.opener(self._indexfile)
3047 f.seek(0, io.SEEK_END)
3044 f.seek(0, io.SEEK_END)
3048 actual = f.tell()
3045 actual = f.tell()
3049 f.close()
3046 f.close()
3050 s = self.index.entry_size
3047 s = self.index.entry_size
3051 i = max(0, actual // s)
3048 i = max(0, actual // s)
3052 di = actual - (i * s)
3049 di = actual - (i * s)
3053 if self._inline:
3050 if self._inline:
3054 databytes = 0
3051 databytes = 0
3055 for r in self:
3052 for r in self:
3056 databytes += max(0, self.length(r))
3053 databytes += max(0, self.length(r))
3057 dd = 0
3054 dd = 0
3058 di = actual - len(self) * s - databytes
3055 di = actual - len(self) * s - databytes
3059 except FileNotFoundError:
3056 except FileNotFoundError:
3060 di = 0
3057 di = 0
3061
3058
3062 return (dd, di)
3059 return (dd, di)
3063
3060
3064 def files(self):
3061 def files(self):
3065 res = [self._indexfile]
3062 res = [self._indexfile]
3066 if self._docket_file is None:
3063 if self._docket_file is None:
3067 if not self._inline:
3064 if not self._inline:
3068 res.append(self._datafile)
3065 res.append(self._datafile)
3069 else:
3066 else:
3070 res.append(self._docket_file)
3067 res.append(self._docket_file)
3071 res.extend(self._docket.old_index_filepaths(include_empty=False))
3068 res.extend(self._docket.old_index_filepaths(include_empty=False))
3072 if self._docket.data_end:
3069 if self._docket.data_end:
3073 res.append(self._datafile)
3070 res.append(self._datafile)
3074 res.extend(self._docket.old_data_filepaths(include_empty=False))
3071 res.extend(self._docket.old_data_filepaths(include_empty=False))
3075 if self._docket.sidedata_end:
3072 if self._docket.sidedata_end:
3076 res.append(self._sidedatafile)
3073 res.append(self._sidedatafile)
3077 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3074 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3078 return res
3075 return res
3079
3076
3080 def emitrevisions(
3077 def emitrevisions(
3081 self,
3078 self,
3082 nodes,
3079 nodes,
3083 nodesorder=None,
3080 nodesorder=None,
3084 revisiondata=False,
3081 revisiondata=False,
3085 assumehaveparentrevisions=False,
3082 assumehaveparentrevisions=False,
3086 deltamode=repository.CG_DELTAMODE_STD,
3083 deltamode=repository.CG_DELTAMODE_STD,
3087 sidedata_helpers=None,
3084 sidedata_helpers=None,
3088 debug_info=None,
3085 debug_info=None,
3089 ):
3086 ):
3090 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3087 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3091 raise error.ProgrammingError(
3088 raise error.ProgrammingError(
3092 b'unhandled value for nodesorder: %s' % nodesorder
3089 b'unhandled value for nodesorder: %s' % nodesorder
3093 )
3090 )
3094
3091
3095 if nodesorder is None and not self._generaldelta:
3092 if nodesorder is None and not self._generaldelta:
3096 nodesorder = b'storage'
3093 nodesorder = b'storage'
3097
3094
3098 if (
3095 if (
3099 not self._storedeltachains
3096 not self._storedeltachains
3100 and deltamode != repository.CG_DELTAMODE_PREV
3097 and deltamode != repository.CG_DELTAMODE_PREV
3101 ):
3098 ):
3102 deltamode = repository.CG_DELTAMODE_FULL
3099 deltamode = repository.CG_DELTAMODE_FULL
3103
3100
3104 return storageutil.emitrevisions(
3101 return storageutil.emitrevisions(
3105 self,
3102 self,
3106 nodes,
3103 nodes,
3107 nodesorder,
3104 nodesorder,
3108 revlogrevisiondelta,
3105 revlogrevisiondelta,
3109 deltaparentfn=self.deltaparent,
3106 deltaparentfn=self.deltaparent,
3110 candeltafn=self._candelta,
3107 candeltafn=self._candelta,
3111 rawsizefn=self.rawsize,
3108 rawsizefn=self.rawsize,
3112 revdifffn=self.revdiff,
3109 revdifffn=self.revdiff,
3113 flagsfn=self.flags,
3110 flagsfn=self.flags,
3114 deltamode=deltamode,
3111 deltamode=deltamode,
3115 revisiondata=revisiondata,
3112 revisiondata=revisiondata,
3116 assumehaveparentrevisions=assumehaveparentrevisions,
3113 assumehaveparentrevisions=assumehaveparentrevisions,
3117 sidedata_helpers=sidedata_helpers,
3114 sidedata_helpers=sidedata_helpers,
3118 debug_info=debug_info,
3115 debug_info=debug_info,
3119 )
3116 )
3120
3117
3121 DELTAREUSEALWAYS = b'always'
3118 DELTAREUSEALWAYS = b'always'
3122 DELTAREUSESAMEREVS = b'samerevs'
3119 DELTAREUSESAMEREVS = b'samerevs'
3123 DELTAREUSENEVER = b'never'
3120 DELTAREUSENEVER = b'never'
3124
3121
3125 DELTAREUSEFULLADD = b'fulladd'
3122 DELTAREUSEFULLADD = b'fulladd'
3126
3123
3127 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3124 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3128
3125
3129 def clone(
3126 def clone(
3130 self,
3127 self,
3131 tr,
3128 tr,
3132 destrevlog,
3129 destrevlog,
3133 addrevisioncb=None,
3130 addrevisioncb=None,
3134 deltareuse=DELTAREUSESAMEREVS,
3131 deltareuse=DELTAREUSESAMEREVS,
3135 forcedeltabothparents=None,
3132 forcedeltabothparents=None,
3136 sidedata_helpers=None,
3133 sidedata_helpers=None,
3137 ):
3134 ):
3138 """Copy this revlog to another, possibly with format changes.
3135 """Copy this revlog to another, possibly with format changes.
3139
3136
3140 The destination revlog will contain the same revisions and nodes.
3137 The destination revlog will contain the same revisions and nodes.
3141 However, it may not be bit-for-bit identical due to e.g. delta encoding
3138 However, it may not be bit-for-bit identical due to e.g. delta encoding
3142 differences.
3139 differences.
3143
3140
3144 The ``deltareuse`` argument control how deltas from the existing revlog
3141 The ``deltareuse`` argument control how deltas from the existing revlog
3145 are preserved in the destination revlog. The argument can have the
3142 are preserved in the destination revlog. The argument can have the
3146 following values:
3143 following values:
3147
3144
3148 DELTAREUSEALWAYS
3145 DELTAREUSEALWAYS
3149 Deltas will always be reused (if possible), even if the destination
3146 Deltas will always be reused (if possible), even if the destination
3150 revlog would not select the same revisions for the delta. This is the
3147 revlog would not select the same revisions for the delta. This is the
3151 fastest mode of operation.
3148 fastest mode of operation.
3152 DELTAREUSESAMEREVS
3149 DELTAREUSESAMEREVS
3153 Deltas will be reused if the destination revlog would pick the same
3150 Deltas will be reused if the destination revlog would pick the same
3154 revisions for the delta. This mode strikes a balance between speed
3151 revisions for the delta. This mode strikes a balance between speed
3155 and optimization.
3152 and optimization.
3156 DELTAREUSENEVER
3153 DELTAREUSENEVER
3157 Deltas will never be reused. This is the slowest mode of execution.
3154 Deltas will never be reused. This is the slowest mode of execution.
3158 This mode can be used to recompute deltas (e.g. if the diff/delta
3155 This mode can be used to recompute deltas (e.g. if the diff/delta
3159 algorithm changes).
3156 algorithm changes).
3160 DELTAREUSEFULLADD
3157 DELTAREUSEFULLADD
3161 Revision will be re-added as if their were new content. This is
3158 Revision will be re-added as if their were new content. This is
3162 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3159 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3163 eg: large file detection and handling.
3160 eg: large file detection and handling.
3164
3161
3165 Delta computation can be slow, so the choice of delta reuse policy can
3162 Delta computation can be slow, so the choice of delta reuse policy can
3166 significantly affect run time.
3163 significantly affect run time.
3167
3164
3168 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3165 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3169 two extremes. Deltas will be reused if they are appropriate. But if the
3166 two extremes. Deltas will be reused if they are appropriate. But if the
3170 delta could choose a better revision, it will do so. This means if you
3167 delta could choose a better revision, it will do so. This means if you
3171 are converting a non-generaldelta revlog to a generaldelta revlog,
3168 are converting a non-generaldelta revlog to a generaldelta revlog,
3172 deltas will be recomputed if the delta's parent isn't a parent of the
3169 deltas will be recomputed if the delta's parent isn't a parent of the
3173 revision.
3170 revision.
3174
3171
3175 In addition to the delta policy, the ``forcedeltabothparents``
3172 In addition to the delta policy, the ``forcedeltabothparents``
3176 argument controls whether to force compute deltas against both parents
3173 argument controls whether to force compute deltas against both parents
3177 for merges. By default, the current default is used.
3174 for merges. By default, the current default is used.
3178
3175
3179 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3176 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3180 `sidedata_helpers`.
3177 `sidedata_helpers`.
3181 """
3178 """
3182 if deltareuse not in self.DELTAREUSEALL:
3179 if deltareuse not in self.DELTAREUSEALL:
3183 raise ValueError(
3180 raise ValueError(
3184 _(b'value for deltareuse invalid: %s') % deltareuse
3181 _(b'value for deltareuse invalid: %s') % deltareuse
3185 )
3182 )
3186
3183
3187 if len(destrevlog):
3184 if len(destrevlog):
3188 raise ValueError(_(b'destination revlog is not empty'))
3185 raise ValueError(_(b'destination revlog is not empty'))
3189
3186
3190 if getattr(self, 'filteredrevs', None):
3187 if getattr(self, 'filteredrevs', None):
3191 raise ValueError(_(b'source revlog has filtered revisions'))
3188 raise ValueError(_(b'source revlog has filtered revisions'))
3192 if getattr(destrevlog, 'filteredrevs', None):
3189 if getattr(destrevlog, 'filteredrevs', None):
3193 raise ValueError(_(b'destination revlog has filtered revisions'))
3190 raise ValueError(_(b'destination revlog has filtered revisions'))
3194
3191
3195 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3192 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3196 # if possible.
3193 # if possible.
3197 oldlazydelta = destrevlog._lazydelta
3194 oldlazydelta = destrevlog._lazydelta
3198 oldlazydeltabase = destrevlog._lazydeltabase
3195 oldlazydeltabase = destrevlog._lazydeltabase
3199 oldamd = destrevlog._deltabothparents
3196 oldamd = destrevlog._deltabothparents
3200
3197
3201 try:
3198 try:
3202 if deltareuse == self.DELTAREUSEALWAYS:
3199 if deltareuse == self.DELTAREUSEALWAYS:
3203 destrevlog._lazydeltabase = True
3200 destrevlog._lazydeltabase = True
3204 destrevlog._lazydelta = True
3201 destrevlog._lazydelta = True
3205 elif deltareuse == self.DELTAREUSESAMEREVS:
3202 elif deltareuse == self.DELTAREUSESAMEREVS:
3206 destrevlog._lazydeltabase = False
3203 destrevlog._lazydeltabase = False
3207 destrevlog._lazydelta = True
3204 destrevlog._lazydelta = True
3208 elif deltareuse == self.DELTAREUSENEVER:
3205 elif deltareuse == self.DELTAREUSENEVER:
3209 destrevlog._lazydeltabase = False
3206 destrevlog._lazydeltabase = False
3210 destrevlog._lazydelta = False
3207 destrevlog._lazydelta = False
3211
3208
3212 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3209 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3213
3210
3214 with self.reading():
3211 with self.reading():
3215 self._clone(
3212 self._clone(
3216 tr,
3213 tr,
3217 destrevlog,
3214 destrevlog,
3218 addrevisioncb,
3215 addrevisioncb,
3219 deltareuse,
3216 deltareuse,
3220 forcedeltabothparents,
3217 forcedeltabothparents,
3221 sidedata_helpers,
3218 sidedata_helpers,
3222 )
3219 )
3223
3220
3224 finally:
3221 finally:
3225 destrevlog._lazydelta = oldlazydelta
3222 destrevlog._lazydelta = oldlazydelta
3226 destrevlog._lazydeltabase = oldlazydeltabase
3223 destrevlog._lazydeltabase = oldlazydeltabase
3227 destrevlog._deltabothparents = oldamd
3224 destrevlog._deltabothparents = oldamd
3228
3225
3229 def _clone(
3226 def _clone(
3230 self,
3227 self,
3231 tr,
3228 tr,
3232 destrevlog,
3229 destrevlog,
3233 addrevisioncb,
3230 addrevisioncb,
3234 deltareuse,
3231 deltareuse,
3235 forcedeltabothparents,
3232 forcedeltabothparents,
3236 sidedata_helpers,
3233 sidedata_helpers,
3237 ):
3234 ):
3238 """perform the core duty of `revlog.clone` after parameter processing"""
3235 """perform the core duty of `revlog.clone` after parameter processing"""
3239 write_debug = None
3236 write_debug = None
3240 if self._debug_delta:
3237 if self._debug_delta:
3241 write_debug = tr._report
3238 write_debug = tr._report
3242 deltacomputer = deltautil.deltacomputer(
3239 deltacomputer = deltautil.deltacomputer(
3243 destrevlog,
3240 destrevlog,
3244 write_debug=write_debug,
3241 write_debug=write_debug,
3245 )
3242 )
3246 index = self.index
3243 index = self.index
3247 for rev in self:
3244 for rev in self:
3248 entry = index[rev]
3245 entry = index[rev]
3249
3246
3250 # Some classes override linkrev to take filtered revs into
3247 # Some classes override linkrev to take filtered revs into
3251 # account. Use raw entry from index.
3248 # account. Use raw entry from index.
3252 flags = entry[0] & 0xFFFF
3249 flags = entry[0] & 0xFFFF
3253 linkrev = entry[4]
3250 linkrev = entry[4]
3254 p1 = index[entry[5]][7]
3251 p1 = index[entry[5]][7]
3255 p2 = index[entry[6]][7]
3252 p2 = index[entry[6]][7]
3256 node = entry[7]
3253 node = entry[7]
3257
3254
3258 # (Possibly) reuse the delta from the revlog if allowed and
3255 # (Possibly) reuse the delta from the revlog if allowed and
3259 # the revlog chunk is a delta.
3256 # the revlog chunk is a delta.
3260 cachedelta = None
3257 cachedelta = None
3261 rawtext = None
3258 rawtext = None
3262 if deltareuse == self.DELTAREUSEFULLADD:
3259 if deltareuse == self.DELTAREUSEFULLADD:
3263 text = self._revisiondata(rev)
3260 text = self._revisiondata(rev)
3264 sidedata = self.sidedata(rev)
3261 sidedata = self.sidedata(rev)
3265
3262
3266 if sidedata_helpers is not None:
3263 if sidedata_helpers is not None:
3267 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3264 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3268 self, sidedata_helpers, sidedata, rev
3265 self, sidedata_helpers, sidedata, rev
3269 )
3266 )
3270 flags = flags | new_flags[0] & ~new_flags[1]
3267 flags = flags | new_flags[0] & ~new_flags[1]
3271
3268
3272 destrevlog.addrevision(
3269 destrevlog.addrevision(
3273 text,
3270 text,
3274 tr,
3271 tr,
3275 linkrev,
3272 linkrev,
3276 p1,
3273 p1,
3277 p2,
3274 p2,
3278 cachedelta=cachedelta,
3275 cachedelta=cachedelta,
3279 node=node,
3276 node=node,
3280 flags=flags,
3277 flags=flags,
3281 deltacomputer=deltacomputer,
3278 deltacomputer=deltacomputer,
3282 sidedata=sidedata,
3279 sidedata=sidedata,
3283 )
3280 )
3284 else:
3281 else:
3285 if destrevlog._lazydelta:
3282 if destrevlog._lazydelta:
3286 dp = self.deltaparent(rev)
3283 dp = self.deltaparent(rev)
3287 if dp != nullrev:
3284 if dp != nullrev:
3288 cachedelta = (dp, bytes(self._chunk(rev)))
3285 cachedelta = (dp, bytes(self._chunk(rev)))
3289
3286
3290 sidedata = None
3287 sidedata = None
3291 if not cachedelta:
3288 if not cachedelta:
3292 rawtext = self._revisiondata(rev)
3289 rawtext = self._revisiondata(rev)
3293 sidedata = self.sidedata(rev)
3290 sidedata = self.sidedata(rev)
3294 if sidedata is None:
3291 if sidedata is None:
3295 sidedata = self.sidedata(rev)
3292 sidedata = self.sidedata(rev)
3296
3293
3297 if sidedata_helpers is not None:
3294 if sidedata_helpers is not None:
3298 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3295 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3299 self, sidedata_helpers, sidedata, rev
3296 self, sidedata_helpers, sidedata, rev
3300 )
3297 )
3301 flags = flags | new_flags[0] & ~new_flags[1]
3298 flags = flags | new_flags[0] & ~new_flags[1]
3302
3299
3303 with destrevlog._writing(tr):
3300 with destrevlog._writing(tr):
3304 destrevlog._addrevision(
3301 destrevlog._addrevision(
3305 node,
3302 node,
3306 rawtext,
3303 rawtext,
3307 tr,
3304 tr,
3308 linkrev,
3305 linkrev,
3309 p1,
3306 p1,
3310 p2,
3307 p2,
3311 flags,
3308 flags,
3312 cachedelta,
3309 cachedelta,
3313 deltacomputer=deltacomputer,
3310 deltacomputer=deltacomputer,
3314 sidedata=sidedata,
3311 sidedata=sidedata,
3315 )
3312 )
3316
3313
3317 if addrevisioncb:
3314 if addrevisioncb:
3318 addrevisioncb(self, rev, node)
3315 addrevisioncb(self, rev, node)
3319
3316
3320 def censorrevision(self, tr, censornode, tombstone=b''):
3317 def censorrevision(self, tr, censornode, tombstone=b''):
3321 if self._format_version == REVLOGV0:
3318 if self._format_version == REVLOGV0:
3322 raise error.RevlogError(
3319 raise error.RevlogError(
3323 _(b'cannot censor with version %d revlogs')
3320 _(b'cannot censor with version %d revlogs')
3324 % self._format_version
3321 % self._format_version
3325 )
3322 )
3326 elif self._format_version == REVLOGV1:
3323 elif self._format_version == REVLOGV1:
3327 rewrite.v1_censor(self, tr, censornode, tombstone)
3324 rewrite.v1_censor(self, tr, censornode, tombstone)
3328 else:
3325 else:
3329 rewrite.v2_censor(self, tr, censornode, tombstone)
3326 rewrite.v2_censor(self, tr, censornode, tombstone)
3330
3327
3331 def verifyintegrity(self, state):
3328 def verifyintegrity(self, state):
3332 """Verifies the integrity of the revlog.
3329 """Verifies the integrity of the revlog.
3333
3330
3334 Yields ``revlogproblem`` instances describing problems that are
3331 Yields ``revlogproblem`` instances describing problems that are
3335 found.
3332 found.
3336 """
3333 """
3337 dd, di = self.checksize()
3334 dd, di = self.checksize()
3338 if dd:
3335 if dd:
3339 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3336 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3340 if di:
3337 if di:
3341 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3338 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3342
3339
3343 version = self._format_version
3340 version = self._format_version
3344
3341
3345 # The verifier tells us what version revlog we should be.
3342 # The verifier tells us what version revlog we should be.
3346 if version != state[b'expectedversion']:
3343 if version != state[b'expectedversion']:
3347 yield revlogproblem(
3344 yield revlogproblem(
3348 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3345 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3349 % (self.display_id, version, state[b'expectedversion'])
3346 % (self.display_id, version, state[b'expectedversion'])
3350 )
3347 )
3351
3348
3352 state[b'skipread'] = set()
3349 state[b'skipread'] = set()
3353 state[b'safe_renamed'] = set()
3350 state[b'safe_renamed'] = set()
3354
3351
3355 for rev in self:
3352 for rev in self:
3356 node = self.node(rev)
3353 node = self.node(rev)
3357
3354
3358 # Verify contents. 4 cases to care about:
3355 # Verify contents. 4 cases to care about:
3359 #
3356 #
3360 # common: the most common case
3357 # common: the most common case
3361 # rename: with a rename
3358 # rename: with a rename
3362 # meta: file content starts with b'\1\n', the metadata
3359 # meta: file content starts with b'\1\n', the metadata
3363 # header defined in filelog.py, but without a rename
3360 # header defined in filelog.py, but without a rename
3364 # ext: content stored externally
3361 # ext: content stored externally
3365 #
3362 #
3366 # More formally, their differences are shown below:
3363 # More formally, their differences are shown below:
3367 #
3364 #
3368 # | common | rename | meta | ext
3365 # | common | rename | meta | ext
3369 # -------------------------------------------------------
3366 # -------------------------------------------------------
3370 # flags() | 0 | 0 | 0 | not 0
3367 # flags() | 0 | 0 | 0 | not 0
3371 # renamed() | False | True | False | ?
3368 # renamed() | False | True | False | ?
3372 # rawtext[0:2]=='\1\n'| False | True | True | ?
3369 # rawtext[0:2]=='\1\n'| False | True | True | ?
3373 #
3370 #
3374 # "rawtext" means the raw text stored in revlog data, which
3371 # "rawtext" means the raw text stored in revlog data, which
3375 # could be retrieved by "rawdata(rev)". "text"
3372 # could be retrieved by "rawdata(rev)". "text"
3376 # mentioned below is "revision(rev)".
3373 # mentioned below is "revision(rev)".
3377 #
3374 #
3378 # There are 3 different lengths stored physically:
3375 # There are 3 different lengths stored physically:
3379 # 1. L1: rawsize, stored in revlog index
3376 # 1. L1: rawsize, stored in revlog index
3380 # 2. L2: len(rawtext), stored in revlog data
3377 # 2. L2: len(rawtext), stored in revlog data
3381 # 3. L3: len(text), stored in revlog data if flags==0, or
3378 # 3. L3: len(text), stored in revlog data if flags==0, or
3382 # possibly somewhere else if flags!=0
3379 # possibly somewhere else if flags!=0
3383 #
3380 #
3384 # L1 should be equal to L2. L3 could be different from them.
3381 # L1 should be equal to L2. L3 could be different from them.
3385 # "text" may or may not affect commit hash depending on flag
3382 # "text" may or may not affect commit hash depending on flag
3386 # processors (see flagutil.addflagprocessor).
3383 # processors (see flagutil.addflagprocessor).
3387 #
3384 #
3388 # | common | rename | meta | ext
3385 # | common | rename | meta | ext
3389 # -------------------------------------------------
3386 # -------------------------------------------------
3390 # rawsize() | L1 | L1 | L1 | L1
3387 # rawsize() | L1 | L1 | L1 | L1
3391 # size() | L1 | L2-LM | L1(*) | L1 (?)
3388 # size() | L1 | L2-LM | L1(*) | L1 (?)
3392 # len(rawtext) | L2 | L2 | L2 | L2
3389 # len(rawtext) | L2 | L2 | L2 | L2
3393 # len(text) | L2 | L2 | L2 | L3
3390 # len(text) | L2 | L2 | L2 | L3
3394 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3391 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3395 #
3392 #
3396 # LM: length of metadata, depending on rawtext
3393 # LM: length of metadata, depending on rawtext
3397 # (*): not ideal, see comment in filelog.size
3394 # (*): not ideal, see comment in filelog.size
3398 # (?): could be "- len(meta)" if the resolved content has
3395 # (?): could be "- len(meta)" if the resolved content has
3399 # rename metadata
3396 # rename metadata
3400 #
3397 #
3401 # Checks needed to be done:
3398 # Checks needed to be done:
3402 # 1. length check: L1 == L2, in all cases.
3399 # 1. length check: L1 == L2, in all cases.
3403 # 2. hash check: depending on flag processor, we may need to
3400 # 2. hash check: depending on flag processor, we may need to
3404 # use either "text" (external), or "rawtext" (in revlog).
3401 # use either "text" (external), or "rawtext" (in revlog).
3405
3402
3406 try:
3403 try:
3407 skipflags = state.get(b'skipflags', 0)
3404 skipflags = state.get(b'skipflags', 0)
3408 if skipflags:
3405 if skipflags:
3409 skipflags &= self.flags(rev)
3406 skipflags &= self.flags(rev)
3410
3407
3411 _verify_revision(self, skipflags, state, node)
3408 _verify_revision(self, skipflags, state, node)
3412
3409
3413 l1 = self.rawsize(rev)
3410 l1 = self.rawsize(rev)
3414 l2 = len(self.rawdata(node))
3411 l2 = len(self.rawdata(node))
3415
3412
3416 if l1 != l2:
3413 if l1 != l2:
3417 yield revlogproblem(
3414 yield revlogproblem(
3418 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3415 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3419 node=node,
3416 node=node,
3420 )
3417 )
3421
3418
3422 except error.CensoredNodeError:
3419 except error.CensoredNodeError:
3423 if state[b'erroroncensored']:
3420 if state[b'erroroncensored']:
3424 yield revlogproblem(
3421 yield revlogproblem(
3425 error=_(b'censored file data'), node=node
3422 error=_(b'censored file data'), node=node
3426 )
3423 )
3427 state[b'skipread'].add(node)
3424 state[b'skipread'].add(node)
3428 except Exception as e:
3425 except Exception as e:
3429 yield revlogproblem(
3426 yield revlogproblem(
3430 error=_(b'unpacking %s: %s')
3427 error=_(b'unpacking %s: %s')
3431 % (short(node), stringutil.forcebytestr(e)),
3428 % (short(node), stringutil.forcebytestr(e)),
3432 node=node,
3429 node=node,
3433 )
3430 )
3434 state[b'skipread'].add(node)
3431 state[b'skipread'].add(node)
3435
3432
3436 def storageinfo(
3433 def storageinfo(
3437 self,
3434 self,
3438 exclusivefiles=False,
3435 exclusivefiles=False,
3439 sharedfiles=False,
3436 sharedfiles=False,
3440 revisionscount=False,
3437 revisionscount=False,
3441 trackedsize=False,
3438 trackedsize=False,
3442 storedsize=False,
3439 storedsize=False,
3443 ):
3440 ):
3444 d = {}
3441 d = {}
3445
3442
3446 if exclusivefiles:
3443 if exclusivefiles:
3447 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3444 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3448 if not self._inline:
3445 if not self._inline:
3449 d[b'exclusivefiles'].append((self.opener, self._datafile))
3446 d[b'exclusivefiles'].append((self.opener, self._datafile))
3450
3447
3451 if sharedfiles:
3448 if sharedfiles:
3452 d[b'sharedfiles'] = []
3449 d[b'sharedfiles'] = []
3453
3450
3454 if revisionscount:
3451 if revisionscount:
3455 d[b'revisionscount'] = len(self)
3452 d[b'revisionscount'] = len(self)
3456
3453
3457 if trackedsize:
3454 if trackedsize:
3458 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3455 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3459
3456
3460 if storedsize:
3457 if storedsize:
3461 d[b'storedsize'] = sum(
3458 d[b'storedsize'] = sum(
3462 self.opener.stat(path).st_size for path in self.files()
3459 self.opener.stat(path).st_size for path in self.files()
3463 )
3460 )
3464
3461
3465 return d
3462 return d
3466
3463
3467 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3464 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3468 if not self.hassidedata:
3465 if not self.hassidedata:
3469 return
3466 return
3470 # revlog formats with sidedata support does not support inline
3467 # revlog formats with sidedata support does not support inline
3471 assert not self._inline
3468 assert not self._inline
3472 if not helpers[1] and not helpers[2]:
3469 if not helpers[1] and not helpers[2]:
3473 # Nothing to generate or remove
3470 # Nothing to generate or remove
3474 return
3471 return
3475
3472
3476 new_entries = []
3473 new_entries = []
3477 # append the new sidedata
3474 # append the new sidedata
3478 with self._writing(transaction):
3475 with self._writing(transaction):
3479 ifh, dfh, sdfh = self._writinghandles
3476 ifh, dfh, sdfh = self._writinghandles
3480 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3477 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3481
3478
3482 current_offset = sdfh.tell()
3479 current_offset = sdfh.tell()
3483 for rev in range(startrev, endrev + 1):
3480 for rev in range(startrev, endrev + 1):
3484 entry = self.index[rev]
3481 entry = self.index[rev]
3485 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3482 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3486 store=self,
3483 store=self,
3487 sidedata_helpers=helpers,
3484 sidedata_helpers=helpers,
3488 sidedata={},
3485 sidedata={},
3489 rev=rev,
3486 rev=rev,
3490 )
3487 )
3491
3488
3492 serialized_sidedata = sidedatautil.serialize_sidedata(
3489 serialized_sidedata = sidedatautil.serialize_sidedata(
3493 new_sidedata
3490 new_sidedata
3494 )
3491 )
3495
3492
3496 sidedata_compression_mode = COMP_MODE_INLINE
3493 sidedata_compression_mode = COMP_MODE_INLINE
3497 if serialized_sidedata and self.hassidedata:
3494 if serialized_sidedata and self.hassidedata:
3498 sidedata_compression_mode = COMP_MODE_PLAIN
3495 sidedata_compression_mode = COMP_MODE_PLAIN
3499 h, comp_sidedata = self.compress(serialized_sidedata)
3496 h, comp_sidedata = self.compress(serialized_sidedata)
3500 if (
3497 if (
3501 h != b'u'
3498 h != b'u'
3502 and comp_sidedata[0] != b'\0'
3499 and comp_sidedata[0] != b'\0'
3503 and len(comp_sidedata) < len(serialized_sidedata)
3500 and len(comp_sidedata) < len(serialized_sidedata)
3504 ):
3501 ):
3505 assert not h
3502 assert not h
3506 if (
3503 if (
3507 comp_sidedata[0]
3504 comp_sidedata[0]
3508 == self._docket.default_compression_header
3505 == self._docket.default_compression_header
3509 ):
3506 ):
3510 sidedata_compression_mode = COMP_MODE_DEFAULT
3507 sidedata_compression_mode = COMP_MODE_DEFAULT
3511 serialized_sidedata = comp_sidedata
3508 serialized_sidedata = comp_sidedata
3512 else:
3509 else:
3513 sidedata_compression_mode = COMP_MODE_INLINE
3510 sidedata_compression_mode = COMP_MODE_INLINE
3514 serialized_sidedata = comp_sidedata
3511 serialized_sidedata = comp_sidedata
3515 if entry[8] != 0 or entry[9] != 0:
3512 if entry[8] != 0 or entry[9] != 0:
3516 # rewriting entries that already have sidedata is not
3513 # rewriting entries that already have sidedata is not
3517 # supported yet, because it introduces garbage data in the
3514 # supported yet, because it introduces garbage data in the
3518 # revlog.
3515 # revlog.
3519 msg = b"rewriting existing sidedata is not supported yet"
3516 msg = b"rewriting existing sidedata is not supported yet"
3520 raise error.Abort(msg)
3517 raise error.Abort(msg)
3521
3518
3522 # Apply (potential) flags to add and to remove after running
3519 # Apply (potential) flags to add and to remove after running
3523 # the sidedata helpers
3520 # the sidedata helpers
3524 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3521 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3525 entry_update = (
3522 entry_update = (
3526 current_offset,
3523 current_offset,
3527 len(serialized_sidedata),
3524 len(serialized_sidedata),
3528 new_offset_flags,
3525 new_offset_flags,
3529 sidedata_compression_mode,
3526 sidedata_compression_mode,
3530 )
3527 )
3531
3528
3532 # the sidedata computation might have move the file cursors around
3529 # the sidedata computation might have move the file cursors around
3533 sdfh.seek(current_offset, os.SEEK_SET)
3530 sdfh.seek(current_offset, os.SEEK_SET)
3534 sdfh.write(serialized_sidedata)
3531 sdfh.write(serialized_sidedata)
3535 new_entries.append(entry_update)
3532 new_entries.append(entry_update)
3536 current_offset += len(serialized_sidedata)
3533 current_offset += len(serialized_sidedata)
3537 self._docket.sidedata_end = sdfh.tell()
3534 self._docket.sidedata_end = sdfh.tell()
3538
3535
3539 # rewrite the new index entries
3536 # rewrite the new index entries
3540 ifh.seek(startrev * self.index.entry_size)
3537 ifh.seek(startrev * self.index.entry_size)
3541 for i, e in enumerate(new_entries):
3538 for i, e in enumerate(new_entries):
3542 rev = startrev + i
3539 rev = startrev + i
3543 self.index.replace_sidedata_info(rev, *e)
3540 self.index.replace_sidedata_info(rev, *e)
3544 packed = self.index.entry_binary(rev)
3541 packed = self.index.entry_binary(rev)
3545 if rev == 0 and self._docket is None:
3542 if rev == 0 and self._docket is None:
3546 header = self._format_flags | self._format_version
3543 header = self._format_flags | self._format_version
3547 header = self.index.pack_header(header)
3544 header = self.index.pack_header(header)
3548 packed = header + packed
3545 packed = header + packed
3549 ifh.write(packed)
3546 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now