##// END OF EJS Templates
revlog: add a `get_revlog` method...
marmoute -
r51530:32837c7e default
parent child Browse files
Show More
@@ -1,304 +1,313 b''
1 # filelog.py - file history class for mercurial
1 # filelog.py - file history class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 from .i18n import _
9 from .i18n import _
10 from .node import nullrev
10 from .node import nullrev
11 from . import (
11 from . import (
12 error,
12 error,
13 revlog,
13 revlog,
14 )
14 )
15 from .interfaces import (
15 from .interfaces import (
16 repository,
16 repository,
17 util as interfaceutil,
17 util as interfaceutil,
18 )
18 )
19 from .utils import storageutil
19 from .utils import storageutil
20 from .revlogutils import (
20 from .revlogutils import (
21 constants as revlog_constants,
21 constants as revlog_constants,
22 rewrite,
22 rewrite,
23 )
23 )
24
24
25
25
26 @interfaceutil.implementer(repository.ifilestorage)
26 @interfaceutil.implementer(repository.ifilestorage)
27 class filelog:
27 class filelog:
28 def __init__(self, opener, path, try_split=False):
28 def __init__(self, opener, path, try_split=False):
29 self._revlog = revlog.revlog(
29 self._revlog = revlog.revlog(
30 opener,
30 opener,
31 # XXX should use the unencoded path
31 # XXX should use the unencoded path
32 target=(revlog_constants.KIND_FILELOG, path),
32 target=(revlog_constants.KIND_FILELOG, path),
33 radix=b'/'.join((b'data', path)),
33 radix=b'/'.join((b'data', path)),
34 censorable=True,
34 censorable=True,
35 canonical_parent_order=False, # see comment in revlog.py
35 canonical_parent_order=False, # see comment in revlog.py
36 try_split=try_split,
36 try_split=try_split,
37 )
37 )
38 # Full name of the user visible file, relative to the repository root.
38 # Full name of the user visible file, relative to the repository root.
39 # Used by LFS.
39 # Used by LFS.
40 self._revlog.filename = path
40 self._revlog.filename = path
41 self.nullid = self._revlog.nullid
41 self.nullid = self._revlog.nullid
42 opts = opener.options
42 opts = opener.options
43 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
43 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
44
44
45 def get_revlog(self):
46 """return an actual revlog instance if any
47
48 This exist because a lot of code leverage the fact the underlying
49 storage is a revlog for optimization, so giving simple way to access
50 the revlog instance helps such code.
51 """
52 return self._revlog
53
45 def __len__(self):
54 def __len__(self):
46 return len(self._revlog)
55 return len(self._revlog)
47
56
48 def __iter__(self):
57 def __iter__(self):
49 return self._revlog.__iter__()
58 return self._revlog.__iter__()
50
59
51 def hasnode(self, node):
60 def hasnode(self, node):
52 if node in (self.nullid, nullrev):
61 if node in (self.nullid, nullrev):
53 return False
62 return False
54
63
55 try:
64 try:
56 self._revlog.rev(node)
65 self._revlog.rev(node)
57 return True
66 return True
58 except (TypeError, ValueError, IndexError, error.LookupError):
67 except (TypeError, ValueError, IndexError, error.LookupError):
59 return False
68 return False
60
69
61 def revs(self, start=0, stop=None):
70 def revs(self, start=0, stop=None):
62 return self._revlog.revs(start=start, stop=stop)
71 return self._revlog.revs(start=start, stop=stop)
63
72
64 def parents(self, node):
73 def parents(self, node):
65 return self._revlog.parents(node)
74 return self._revlog.parents(node)
66
75
67 def parentrevs(self, rev):
76 def parentrevs(self, rev):
68 return self._revlog.parentrevs(rev)
77 return self._revlog.parentrevs(rev)
69
78
70 def rev(self, node):
79 def rev(self, node):
71 return self._revlog.rev(node)
80 return self._revlog.rev(node)
72
81
73 def node(self, rev):
82 def node(self, rev):
74 return self._revlog.node(rev)
83 return self._revlog.node(rev)
75
84
76 def lookup(self, node):
85 def lookup(self, node):
77 return storageutil.fileidlookup(
86 return storageutil.fileidlookup(
78 self._revlog, node, self._revlog.display_id
87 self._revlog, node, self._revlog.display_id
79 )
88 )
80
89
81 def linkrev(self, rev):
90 def linkrev(self, rev):
82 return self._revlog.linkrev(rev)
91 return self._revlog.linkrev(rev)
83
92
84 def commonancestorsheads(self, node1, node2):
93 def commonancestorsheads(self, node1, node2):
85 return self._revlog.commonancestorsheads(node1, node2)
94 return self._revlog.commonancestorsheads(node1, node2)
86
95
87 # Used by dagop.blockdescendants().
96 # Used by dagop.blockdescendants().
88 def descendants(self, revs):
97 def descendants(self, revs):
89 return self._revlog.descendants(revs)
98 return self._revlog.descendants(revs)
90
99
91 def heads(self, start=None, stop=None):
100 def heads(self, start=None, stop=None):
92 return self._revlog.heads(start, stop)
101 return self._revlog.heads(start, stop)
93
102
94 # Used by hgweb, children extension.
103 # Used by hgweb, children extension.
95 def children(self, node):
104 def children(self, node):
96 return self._revlog.children(node)
105 return self._revlog.children(node)
97
106
98 def iscensored(self, rev):
107 def iscensored(self, rev):
99 return self._revlog.iscensored(rev)
108 return self._revlog.iscensored(rev)
100
109
101 def revision(self, node, _df=None):
110 def revision(self, node, _df=None):
102 return self._revlog.revision(node, _df=_df)
111 return self._revlog.revision(node, _df=_df)
103
112
104 def rawdata(self, node, _df=None):
113 def rawdata(self, node, _df=None):
105 return self._revlog.rawdata(node, _df=_df)
114 return self._revlog.rawdata(node, _df=_df)
106
115
107 def emitrevisions(
116 def emitrevisions(
108 self,
117 self,
109 nodes,
118 nodes,
110 nodesorder=None,
119 nodesorder=None,
111 revisiondata=False,
120 revisiondata=False,
112 assumehaveparentrevisions=False,
121 assumehaveparentrevisions=False,
113 deltamode=repository.CG_DELTAMODE_STD,
122 deltamode=repository.CG_DELTAMODE_STD,
114 sidedata_helpers=None,
123 sidedata_helpers=None,
115 debug_info=None,
124 debug_info=None,
116 ):
125 ):
117 return self._revlog.emitrevisions(
126 return self._revlog.emitrevisions(
118 nodes,
127 nodes,
119 nodesorder=nodesorder,
128 nodesorder=nodesorder,
120 revisiondata=revisiondata,
129 revisiondata=revisiondata,
121 assumehaveparentrevisions=assumehaveparentrevisions,
130 assumehaveparentrevisions=assumehaveparentrevisions,
122 deltamode=deltamode,
131 deltamode=deltamode,
123 sidedata_helpers=sidedata_helpers,
132 sidedata_helpers=sidedata_helpers,
124 debug_info=debug_info,
133 debug_info=debug_info,
125 )
134 )
126
135
127 def addrevision(
136 def addrevision(
128 self,
137 self,
129 revisiondata,
138 revisiondata,
130 transaction,
139 transaction,
131 linkrev,
140 linkrev,
132 p1,
141 p1,
133 p2,
142 p2,
134 node=None,
143 node=None,
135 flags=revlog.REVIDX_DEFAULT_FLAGS,
144 flags=revlog.REVIDX_DEFAULT_FLAGS,
136 cachedelta=None,
145 cachedelta=None,
137 ):
146 ):
138 return self._revlog.addrevision(
147 return self._revlog.addrevision(
139 revisiondata,
148 revisiondata,
140 transaction,
149 transaction,
141 linkrev,
150 linkrev,
142 p1,
151 p1,
143 p2,
152 p2,
144 node=node,
153 node=node,
145 flags=flags,
154 flags=flags,
146 cachedelta=cachedelta,
155 cachedelta=cachedelta,
147 )
156 )
148
157
149 def addgroup(
158 def addgroup(
150 self,
159 self,
151 deltas,
160 deltas,
152 linkmapper,
161 linkmapper,
153 transaction,
162 transaction,
154 addrevisioncb=None,
163 addrevisioncb=None,
155 duplicaterevisioncb=None,
164 duplicaterevisioncb=None,
156 maybemissingparents=False,
165 maybemissingparents=False,
157 debug_info=None,
166 debug_info=None,
158 delta_base_reuse_policy=None,
167 delta_base_reuse_policy=None,
159 ):
168 ):
160 if maybemissingparents:
169 if maybemissingparents:
161 raise error.Abort(
170 raise error.Abort(
162 _(
171 _(
163 b'revlog storage does not support missing '
172 b'revlog storage does not support missing '
164 b'parents write mode'
173 b'parents write mode'
165 )
174 )
166 )
175 )
167
176
168 with self._revlog._writing(transaction):
177 with self._revlog._writing(transaction):
169
178
170 if self._fix_issue6528:
179 if self._fix_issue6528:
171 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
180 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
172
181
173 return self._revlog.addgroup(
182 return self._revlog.addgroup(
174 deltas,
183 deltas,
175 linkmapper,
184 linkmapper,
176 transaction,
185 transaction,
177 addrevisioncb=addrevisioncb,
186 addrevisioncb=addrevisioncb,
178 duplicaterevisioncb=duplicaterevisioncb,
187 duplicaterevisioncb=duplicaterevisioncb,
179 debug_info=debug_info,
188 debug_info=debug_info,
180 delta_base_reuse_policy=delta_base_reuse_policy,
189 delta_base_reuse_policy=delta_base_reuse_policy,
181 )
190 )
182
191
183 def getstrippoint(self, minlink):
192 def getstrippoint(self, minlink):
184 return self._revlog.getstrippoint(minlink)
193 return self._revlog.getstrippoint(minlink)
185
194
186 def strip(self, minlink, transaction):
195 def strip(self, minlink, transaction):
187 return self._revlog.strip(minlink, transaction)
196 return self._revlog.strip(minlink, transaction)
188
197
189 def censorrevision(self, tr, node, tombstone=b''):
198 def censorrevision(self, tr, node, tombstone=b''):
190 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
199 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
191
200
192 def files(self):
201 def files(self):
193 return self._revlog.files()
202 return self._revlog.files()
194
203
195 def read(self, node):
204 def read(self, node):
196 return storageutil.filtermetadata(self.revision(node))
205 return storageutil.filtermetadata(self.revision(node))
197
206
198 def add(self, text, meta, transaction, link, p1=None, p2=None):
207 def add(self, text, meta, transaction, link, p1=None, p2=None):
199 if meta or text.startswith(b'\1\n'):
208 if meta or text.startswith(b'\1\n'):
200 text = storageutil.packmeta(meta, text)
209 text = storageutil.packmeta(meta, text)
201 rev = self.addrevision(text, transaction, link, p1, p2)
210 rev = self.addrevision(text, transaction, link, p1, p2)
202 return self.node(rev)
211 return self.node(rev)
203
212
204 def renamed(self, node):
213 def renamed(self, node):
205 return storageutil.filerevisioncopied(self, node)
214 return storageutil.filerevisioncopied(self, node)
206
215
207 def size(self, rev):
216 def size(self, rev):
208 """return the size of a given revision"""
217 """return the size of a given revision"""
209
218
210 # for revisions with renames, we have to go the slow way
219 # for revisions with renames, we have to go the slow way
211 node = self.node(rev)
220 node = self.node(rev)
212 if self.iscensored(rev):
221 if self.iscensored(rev):
213 return 0
222 return 0
214 if self.renamed(node):
223 if self.renamed(node):
215 return len(self.read(node))
224 return len(self.read(node))
216
225
217 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
226 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
218 # XXX See also basefilectx.cmp.
227 # XXX See also basefilectx.cmp.
219 return self._revlog.size(rev)
228 return self._revlog.size(rev)
220
229
221 def cmp(self, node, text):
230 def cmp(self, node, text):
222 """compare text with a given file revision
231 """compare text with a given file revision
223
232
224 returns True if text is different than what is stored.
233 returns True if text is different than what is stored.
225 """
234 """
226 return not storageutil.filedataequivalent(self, node, text)
235 return not storageutil.filedataequivalent(self, node, text)
227
236
228 def verifyintegrity(self, state):
237 def verifyintegrity(self, state):
229 return self._revlog.verifyintegrity(state)
238 return self._revlog.verifyintegrity(state)
230
239
231 def storageinfo(
240 def storageinfo(
232 self,
241 self,
233 exclusivefiles=False,
242 exclusivefiles=False,
234 sharedfiles=False,
243 sharedfiles=False,
235 revisionscount=False,
244 revisionscount=False,
236 trackedsize=False,
245 trackedsize=False,
237 storedsize=False,
246 storedsize=False,
238 ):
247 ):
239 return self._revlog.storageinfo(
248 return self._revlog.storageinfo(
240 exclusivefiles=exclusivefiles,
249 exclusivefiles=exclusivefiles,
241 sharedfiles=sharedfiles,
250 sharedfiles=sharedfiles,
242 revisionscount=revisionscount,
251 revisionscount=revisionscount,
243 trackedsize=trackedsize,
252 trackedsize=trackedsize,
244 storedsize=storedsize,
253 storedsize=storedsize,
245 )
254 )
246
255
247 # Used by repo upgrade.
256 # Used by repo upgrade.
248 def clone(self, tr, destrevlog, **kwargs):
257 def clone(self, tr, destrevlog, **kwargs):
249 if not isinstance(destrevlog, filelog):
258 if not isinstance(destrevlog, filelog):
250 msg = b'expected filelog to clone(), not %r'
259 msg = b'expected filelog to clone(), not %r'
251 msg %= destrevlog
260 msg %= destrevlog
252 raise error.ProgrammingError(msg)
261 raise error.ProgrammingError(msg)
253
262
254 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
263 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
255
264
256
265
257 class narrowfilelog(filelog):
266 class narrowfilelog(filelog):
258 """Filelog variation to be used with narrow stores."""
267 """Filelog variation to be used with narrow stores."""
259
268
260 def __init__(self, opener, path, narrowmatch, try_split=False):
269 def __init__(self, opener, path, narrowmatch, try_split=False):
261 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
270 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
262 self._narrowmatch = narrowmatch
271 self._narrowmatch = narrowmatch
263
272
264 def renamed(self, node):
273 def renamed(self, node):
265 res = super(narrowfilelog, self).renamed(node)
274 res = super(narrowfilelog, self).renamed(node)
266
275
267 # Renames that come from outside the narrowspec are problematic
276 # Renames that come from outside the narrowspec are problematic
268 # because we may lack the base text for the rename. This can result
277 # because we may lack the base text for the rename. This can result
269 # in code attempting to walk the ancestry or compute a diff
278 # in code attempting to walk the ancestry or compute a diff
270 # encountering a missing revision. We address this by silently
279 # encountering a missing revision. We address this by silently
271 # removing rename metadata if the source file is outside the
280 # removing rename metadata if the source file is outside the
272 # narrow spec.
281 # narrow spec.
273 #
282 #
274 # A better solution would be to see if the base revision is available,
283 # A better solution would be to see if the base revision is available,
275 # rather than assuming it isn't.
284 # rather than assuming it isn't.
276 #
285 #
277 # An even better solution would be to teach all consumers of rename
286 # An even better solution would be to teach all consumers of rename
278 # metadata that the base revision may not be available.
287 # metadata that the base revision may not be available.
279 #
288 #
280 # TODO consider better ways of doing this.
289 # TODO consider better ways of doing this.
281 if res and not self._narrowmatch(res[0]):
290 if res and not self._narrowmatch(res[0]):
282 return None
291 return None
283
292
284 return res
293 return res
285
294
286 def size(self, rev):
295 def size(self, rev):
287 # Because we have a custom renamed() that may lie, we need to call
296 # Because we have a custom renamed() that may lie, we need to call
288 # the base renamed() to report accurate results.
297 # the base renamed() to report accurate results.
289 node = self.node(rev)
298 node = self.node(rev)
290 if super(narrowfilelog, self).renamed(node):
299 if super(narrowfilelog, self).renamed(node):
291 return len(self.read(node))
300 return len(self.read(node))
292 else:
301 else:
293 return super(narrowfilelog, self).size(rev)
302 return super(narrowfilelog, self).size(rev)
294
303
295 def cmp(self, node, text):
304 def cmp(self, node, text):
296 # We don't call `super` because narrow parents can be buggy in case of a
305 # We don't call `super` because narrow parents can be buggy in case of a
297 # ambiguous dirstate. Always take the slow path until there is a better
306 # ambiguous dirstate. Always take the slow path until there is a better
298 # fix, see issue6150.
307 # fix, see issue6150.
299
308
300 # Censored files compare against the empty file.
309 # Censored files compare against the empty file.
301 if self.iscensored(self.rev(node)):
310 if self.iscensored(self.rev(node)):
302 return text != b''
311 return text != b''
303
312
304 return self.read(node) != text
313 return self.read(node) != text
@@ -1,2065 +1,2073 b''
1 # repository.py - Interfaces and base classes for repositories and peers.
1 # repository.py - Interfaces and base classes for repositories and peers.
2 # coding: utf-8
2 # coding: utf-8
3 #
3 #
4 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9
9
10 from ..i18n import _
10 from ..i18n import _
11 from .. import error
11 from .. import error
12 from . import util as interfaceutil
12 from . import util as interfaceutil
13
13
14 # Local repository feature string.
14 # Local repository feature string.
15
15
16 # Revlogs are being used for file storage.
16 # Revlogs are being used for file storage.
17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 # The storage part of the repository is shared from an external source.
18 # The storage part of the repository is shared from an external source.
19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 # LFS supported for backing file storage.
20 # LFS supported for backing file storage.
21 REPO_FEATURE_LFS = b'lfs'
21 REPO_FEATURE_LFS = b'lfs'
22 # Repository supports being stream cloned.
22 # Repository supports being stream cloned.
23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 # Repository supports (at least) some sidedata to be stored
24 # Repository supports (at least) some sidedata to be stored
25 REPO_FEATURE_SIDE_DATA = b'side-data'
25 REPO_FEATURE_SIDE_DATA = b'side-data'
26 # Files storage may lack data for all ancestors.
26 # Files storage may lack data for all ancestors.
27 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
27 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
28
28
29 REVISION_FLAG_CENSORED = 1 << 15
29 REVISION_FLAG_CENSORED = 1 << 15
30 REVISION_FLAG_ELLIPSIS = 1 << 14
30 REVISION_FLAG_ELLIPSIS = 1 << 14
31 REVISION_FLAG_EXTSTORED = 1 << 13
31 REVISION_FLAG_EXTSTORED = 1 << 13
32 REVISION_FLAG_HASCOPIESINFO = 1 << 12
32 REVISION_FLAG_HASCOPIESINFO = 1 << 12
33
33
34 REVISION_FLAGS_KNOWN = (
34 REVISION_FLAGS_KNOWN = (
35 REVISION_FLAG_CENSORED
35 REVISION_FLAG_CENSORED
36 | REVISION_FLAG_ELLIPSIS
36 | REVISION_FLAG_ELLIPSIS
37 | REVISION_FLAG_EXTSTORED
37 | REVISION_FLAG_EXTSTORED
38 | REVISION_FLAG_HASCOPIESINFO
38 | REVISION_FLAG_HASCOPIESINFO
39 )
39 )
40
40
41 CG_DELTAMODE_STD = b'default'
41 CG_DELTAMODE_STD = b'default'
42 CG_DELTAMODE_PREV = b'previous'
42 CG_DELTAMODE_PREV = b'previous'
43 CG_DELTAMODE_FULL = b'fulltext'
43 CG_DELTAMODE_FULL = b'fulltext'
44 CG_DELTAMODE_P1 = b'p1'
44 CG_DELTAMODE_P1 = b'p1'
45
45
46
46
47 ## Cache related constants:
47 ## Cache related constants:
48 #
48 #
49 # Used to control which cache should be warmed in a repo.updatecaches(…) call.
49 # Used to control which cache should be warmed in a repo.updatecaches(…) call.
50
50
51 # Warm branchmaps of all known repoview's filter-level
51 # Warm branchmaps of all known repoview's filter-level
52 CACHE_BRANCHMAP_ALL = b"branchmap-all"
52 CACHE_BRANCHMAP_ALL = b"branchmap-all"
53 # Warm branchmaps of repoview's filter-level used by server
53 # Warm branchmaps of repoview's filter-level used by server
54 CACHE_BRANCHMAP_SERVED = b"branchmap-served"
54 CACHE_BRANCHMAP_SERVED = b"branchmap-served"
55 # Warm internal changelog cache (eg: persistent nodemap)
55 # Warm internal changelog cache (eg: persistent nodemap)
56 CACHE_CHANGELOG_CACHE = b"changelog-cache"
56 CACHE_CHANGELOG_CACHE = b"changelog-cache"
57 # Warm full manifest cache
57 # Warm full manifest cache
58 CACHE_FULL_MANIFEST = b"full-manifest"
58 CACHE_FULL_MANIFEST = b"full-manifest"
59 # Warm file-node-tags cache
59 # Warm file-node-tags cache
60 CACHE_FILE_NODE_TAGS = b"file-node-tags"
60 CACHE_FILE_NODE_TAGS = b"file-node-tags"
61 # Warm internal manifestlog cache (eg: persistent nodemap)
61 # Warm internal manifestlog cache (eg: persistent nodemap)
62 CACHE_MANIFESTLOG_CACHE = b"manifestlog-cache"
62 CACHE_MANIFESTLOG_CACHE = b"manifestlog-cache"
63 # Warn rev branch cache
63 # Warn rev branch cache
64 CACHE_REV_BRANCH = b"rev-branch-cache"
64 CACHE_REV_BRANCH = b"rev-branch-cache"
65 # Warm tags' cache for default repoview'
65 # Warm tags' cache for default repoview'
66 CACHE_TAGS_DEFAULT = b"tags-default"
66 CACHE_TAGS_DEFAULT = b"tags-default"
67 # Warm tags' cache for repoview's filter-level used by server
67 # Warm tags' cache for repoview's filter-level used by server
68 CACHE_TAGS_SERVED = b"tags-served"
68 CACHE_TAGS_SERVED = b"tags-served"
69
69
70 # the cache to warm by default after a simple transaction
70 # the cache to warm by default after a simple transaction
71 # (this is a mutable set to let extension update it)
71 # (this is a mutable set to let extension update it)
72 CACHES_DEFAULT = {
72 CACHES_DEFAULT = {
73 CACHE_BRANCHMAP_SERVED,
73 CACHE_BRANCHMAP_SERVED,
74 }
74 }
75
75
76 # the caches to warm when warming all of them
76 # the caches to warm when warming all of them
77 # (this is a mutable set to let extension update it)
77 # (this is a mutable set to let extension update it)
78 CACHES_ALL = {
78 CACHES_ALL = {
79 CACHE_BRANCHMAP_SERVED,
79 CACHE_BRANCHMAP_SERVED,
80 CACHE_BRANCHMAP_ALL,
80 CACHE_BRANCHMAP_ALL,
81 CACHE_CHANGELOG_CACHE,
81 CACHE_CHANGELOG_CACHE,
82 CACHE_FILE_NODE_TAGS,
82 CACHE_FILE_NODE_TAGS,
83 CACHE_FULL_MANIFEST,
83 CACHE_FULL_MANIFEST,
84 CACHE_MANIFESTLOG_CACHE,
84 CACHE_MANIFESTLOG_CACHE,
85 CACHE_TAGS_DEFAULT,
85 CACHE_TAGS_DEFAULT,
86 CACHE_TAGS_SERVED,
86 CACHE_TAGS_SERVED,
87 }
87 }
88
88
89 # the cache to warm by default on simple call
89 # the cache to warm by default on simple call
90 # (this is a mutable set to let extension update it)
90 # (this is a mutable set to let extension update it)
91 CACHES_POST_CLONE = CACHES_ALL.copy()
91 CACHES_POST_CLONE = CACHES_ALL.copy()
92 CACHES_POST_CLONE.discard(CACHE_FILE_NODE_TAGS)
92 CACHES_POST_CLONE.discard(CACHE_FILE_NODE_TAGS)
93
93
94
94
95 class ipeerconnection(interfaceutil.Interface):
95 class ipeerconnection(interfaceutil.Interface):
96 """Represents a "connection" to a repository.
96 """Represents a "connection" to a repository.
97
97
98 This is the base interface for representing a connection to a repository.
98 This is the base interface for representing a connection to a repository.
99 It holds basic properties and methods applicable to all peer types.
99 It holds basic properties and methods applicable to all peer types.
100
100
101 This is not a complete interface definition and should not be used
101 This is not a complete interface definition and should not be used
102 outside of this module.
102 outside of this module.
103 """
103 """
104
104
105 ui = interfaceutil.Attribute("""ui.ui instance""")
105 ui = interfaceutil.Attribute("""ui.ui instance""")
106 path = interfaceutil.Attribute("""a urlutil.path instance or None""")
106 path = interfaceutil.Attribute("""a urlutil.path instance or None""")
107
107
108 def url():
108 def url():
109 """Returns a URL string representing this peer.
109 """Returns a URL string representing this peer.
110
110
111 Currently, implementations expose the raw URL used to construct the
111 Currently, implementations expose the raw URL used to construct the
112 instance. It may contain credentials as part of the URL. The
112 instance. It may contain credentials as part of the URL. The
113 expectations of the value aren't well-defined and this could lead to
113 expectations of the value aren't well-defined and this could lead to
114 data leakage.
114 data leakage.
115
115
116 TODO audit/clean consumers and more clearly define the contents of this
116 TODO audit/clean consumers and more clearly define the contents of this
117 value.
117 value.
118 """
118 """
119
119
120 def local():
120 def local():
121 """Returns a local repository instance.
121 """Returns a local repository instance.
122
122
123 If the peer represents a local repository, returns an object that
123 If the peer represents a local repository, returns an object that
124 can be used to interface with it. Otherwise returns ``None``.
124 can be used to interface with it. Otherwise returns ``None``.
125 """
125 """
126
126
127 def canpush():
127 def canpush():
128 """Returns a boolean indicating if this peer can be pushed to."""
128 """Returns a boolean indicating if this peer can be pushed to."""
129
129
130 def close():
130 def close():
131 """Close the connection to this peer.
131 """Close the connection to this peer.
132
132
133 This is called when the peer will no longer be used. Resources
133 This is called when the peer will no longer be used. Resources
134 associated with the peer should be cleaned up.
134 associated with the peer should be cleaned up.
135 """
135 """
136
136
137
137
138 class ipeercapabilities(interfaceutil.Interface):
138 class ipeercapabilities(interfaceutil.Interface):
139 """Peer sub-interface related to capabilities."""
139 """Peer sub-interface related to capabilities."""
140
140
141 def capable(name):
141 def capable(name):
142 """Determine support for a named capability.
142 """Determine support for a named capability.
143
143
144 Returns ``False`` if capability not supported.
144 Returns ``False`` if capability not supported.
145
145
146 Returns ``True`` if boolean capability is supported. Returns a string
146 Returns ``True`` if boolean capability is supported. Returns a string
147 if capability support is non-boolean.
147 if capability support is non-boolean.
148
148
149 Capability strings may or may not map to wire protocol capabilities.
149 Capability strings may or may not map to wire protocol capabilities.
150 """
150 """
151
151
152 def requirecap(name, purpose):
152 def requirecap(name, purpose):
153 """Require a capability to be present.
153 """Require a capability to be present.
154
154
155 Raises a ``CapabilityError`` if the capability isn't present.
155 Raises a ``CapabilityError`` if the capability isn't present.
156 """
156 """
157
157
158
158
159 class ipeercommands(interfaceutil.Interface):
159 class ipeercommands(interfaceutil.Interface):
160 """Client-side interface for communicating over the wire protocol.
160 """Client-side interface for communicating over the wire protocol.
161
161
162 This interface is used as a gateway to the Mercurial wire protocol.
162 This interface is used as a gateway to the Mercurial wire protocol.
163 methods commonly call wire protocol commands of the same name.
163 methods commonly call wire protocol commands of the same name.
164 """
164 """
165
165
166 def branchmap():
166 def branchmap():
167 """Obtain heads in named branches.
167 """Obtain heads in named branches.
168
168
169 Returns a dict mapping branch name to an iterable of nodes that are
169 Returns a dict mapping branch name to an iterable of nodes that are
170 heads on that branch.
170 heads on that branch.
171 """
171 """
172
172
173 def capabilities():
173 def capabilities():
174 """Obtain capabilities of the peer.
174 """Obtain capabilities of the peer.
175
175
176 Returns a set of string capabilities.
176 Returns a set of string capabilities.
177 """
177 """
178
178
179 def clonebundles():
179 def clonebundles():
180 """Obtains the clone bundles manifest for the repo.
180 """Obtains the clone bundles manifest for the repo.
181
181
182 Returns the manifest as unparsed bytes.
182 Returns the manifest as unparsed bytes.
183 """
183 """
184
184
185 def debugwireargs(one, two, three=None, four=None, five=None):
185 def debugwireargs(one, two, three=None, four=None, five=None):
186 """Used to facilitate debugging of arguments passed over the wire."""
186 """Used to facilitate debugging of arguments passed over the wire."""
187
187
188 def getbundle(source, **kwargs):
188 def getbundle(source, **kwargs):
189 """Obtain remote repository data as a bundle.
189 """Obtain remote repository data as a bundle.
190
190
191 This command is how the bulk of repository data is transferred from
191 This command is how the bulk of repository data is transferred from
192 the peer to the local repository
192 the peer to the local repository
193
193
194 Returns a generator of bundle data.
194 Returns a generator of bundle data.
195 """
195 """
196
196
197 def heads():
197 def heads():
198 """Determine all known head revisions in the peer.
198 """Determine all known head revisions in the peer.
199
199
200 Returns an iterable of binary nodes.
200 Returns an iterable of binary nodes.
201 """
201 """
202
202
203 def known(nodes):
203 def known(nodes):
204 """Determine whether multiple nodes are known.
204 """Determine whether multiple nodes are known.
205
205
206 Accepts an iterable of nodes whose presence to check for.
206 Accepts an iterable of nodes whose presence to check for.
207
207
208 Returns an iterable of booleans indicating of the corresponding node
208 Returns an iterable of booleans indicating of the corresponding node
209 at that index is known to the peer.
209 at that index is known to the peer.
210 """
210 """
211
211
212 def listkeys(namespace):
212 def listkeys(namespace):
213 """Obtain all keys in a pushkey namespace.
213 """Obtain all keys in a pushkey namespace.
214
214
215 Returns an iterable of key names.
215 Returns an iterable of key names.
216 """
216 """
217
217
218 def lookup(key):
218 def lookup(key):
219 """Resolve a value to a known revision.
219 """Resolve a value to a known revision.
220
220
221 Returns a binary node of the resolved revision on success.
221 Returns a binary node of the resolved revision on success.
222 """
222 """
223
223
224 def pushkey(namespace, key, old, new):
224 def pushkey(namespace, key, old, new):
225 """Set a value using the ``pushkey`` protocol.
225 """Set a value using the ``pushkey`` protocol.
226
226
227 Arguments correspond to the pushkey namespace and key to operate on and
227 Arguments correspond to the pushkey namespace and key to operate on and
228 the old and new values for that key.
228 the old and new values for that key.
229
229
230 Returns a string with the peer result. The value inside varies by the
230 Returns a string with the peer result. The value inside varies by the
231 namespace.
231 namespace.
232 """
232 """
233
233
234 def stream_out():
234 def stream_out():
235 """Obtain streaming clone data.
235 """Obtain streaming clone data.
236
236
237 Successful result should be a generator of data chunks.
237 Successful result should be a generator of data chunks.
238 """
238 """
239
239
240 def unbundle(bundle, heads, url):
240 def unbundle(bundle, heads, url):
241 """Transfer repository data to the peer.
241 """Transfer repository data to the peer.
242
242
243 This is how the bulk of data during a push is transferred.
243 This is how the bulk of data during a push is transferred.
244
244
245 Returns the integer number of heads added to the peer.
245 Returns the integer number of heads added to the peer.
246 """
246 """
247
247
248
248
249 class ipeerlegacycommands(interfaceutil.Interface):
249 class ipeerlegacycommands(interfaceutil.Interface):
250 """Interface for implementing support for legacy wire protocol commands.
250 """Interface for implementing support for legacy wire protocol commands.
251
251
252 Wire protocol commands transition to legacy status when they are no longer
252 Wire protocol commands transition to legacy status when they are no longer
253 used by modern clients. To facilitate identifying which commands are
253 used by modern clients. To facilitate identifying which commands are
254 legacy, the interfaces are split.
254 legacy, the interfaces are split.
255 """
255 """
256
256
257 def between(pairs):
257 def between(pairs):
258 """Obtain nodes between pairs of nodes.
258 """Obtain nodes between pairs of nodes.
259
259
260 ``pairs`` is an iterable of node pairs.
260 ``pairs`` is an iterable of node pairs.
261
261
262 Returns an iterable of iterables of nodes corresponding to each
262 Returns an iterable of iterables of nodes corresponding to each
263 requested pair.
263 requested pair.
264 """
264 """
265
265
266 def branches(nodes):
266 def branches(nodes):
267 """Obtain ancestor changesets of specific nodes back to a branch point.
267 """Obtain ancestor changesets of specific nodes back to a branch point.
268
268
269 For each requested node, the peer finds the first ancestor node that is
269 For each requested node, the peer finds the first ancestor node that is
270 a DAG root or is a merge.
270 a DAG root or is a merge.
271
271
272 Returns an iterable of iterables with the resolved values for each node.
272 Returns an iterable of iterables with the resolved values for each node.
273 """
273 """
274
274
275 def changegroup(nodes, source):
275 def changegroup(nodes, source):
276 """Obtain a changegroup with data for descendants of specified nodes."""
276 """Obtain a changegroup with data for descendants of specified nodes."""
277
277
278 def changegroupsubset(bases, heads, source):
278 def changegroupsubset(bases, heads, source):
279 pass
279 pass
280
280
281
281
282 class ipeercommandexecutor(interfaceutil.Interface):
282 class ipeercommandexecutor(interfaceutil.Interface):
283 """Represents a mechanism to execute remote commands.
283 """Represents a mechanism to execute remote commands.
284
284
285 This is the primary interface for requesting that wire protocol commands
285 This is the primary interface for requesting that wire protocol commands
286 be executed. Instances of this interface are active in a context manager
286 be executed. Instances of this interface are active in a context manager
287 and have a well-defined lifetime. When the context manager exits, all
287 and have a well-defined lifetime. When the context manager exits, all
288 outstanding requests are waited on.
288 outstanding requests are waited on.
289 """
289 """
290
290
291 def callcommand(name, args):
291 def callcommand(name, args):
292 """Request that a named command be executed.
292 """Request that a named command be executed.
293
293
294 Receives the command name and a dictionary of command arguments.
294 Receives the command name and a dictionary of command arguments.
295
295
296 Returns a ``concurrent.futures.Future`` that will resolve to the
296 Returns a ``concurrent.futures.Future`` that will resolve to the
297 result of that command request. That exact value is left up to
297 result of that command request. That exact value is left up to
298 the implementation and possibly varies by command.
298 the implementation and possibly varies by command.
299
299
300 Not all commands can coexist with other commands in an executor
300 Not all commands can coexist with other commands in an executor
301 instance: it depends on the underlying wire protocol transport being
301 instance: it depends on the underlying wire protocol transport being
302 used and the command itself.
302 used and the command itself.
303
303
304 Implementations MAY call ``sendcommands()`` automatically if the
304 Implementations MAY call ``sendcommands()`` automatically if the
305 requested command can not coexist with other commands in this executor.
305 requested command can not coexist with other commands in this executor.
306
306
307 Implementations MAY call ``sendcommands()`` automatically when the
307 Implementations MAY call ``sendcommands()`` automatically when the
308 future's ``result()`` is called. So, consumers using multiple
308 future's ``result()`` is called. So, consumers using multiple
309 commands with an executor MUST ensure that ``result()`` is not called
309 commands with an executor MUST ensure that ``result()`` is not called
310 until all command requests have been issued.
310 until all command requests have been issued.
311 """
311 """
312
312
313 def sendcommands():
313 def sendcommands():
314 """Trigger submission of queued command requests.
314 """Trigger submission of queued command requests.
315
315
316 Not all transports submit commands as soon as they are requested to
316 Not all transports submit commands as soon as they are requested to
317 run. When called, this method forces queued command requests to be
317 run. When called, this method forces queued command requests to be
318 issued. It will no-op if all commands have already been sent.
318 issued. It will no-op if all commands have already been sent.
319
319
320 When called, no more new commands may be issued with this executor.
320 When called, no more new commands may be issued with this executor.
321 """
321 """
322
322
323 def close():
323 def close():
324 """Signal that this command request is finished.
324 """Signal that this command request is finished.
325
325
326 When called, no more new commands may be issued. All outstanding
326 When called, no more new commands may be issued. All outstanding
327 commands that have previously been issued are waited on before
327 commands that have previously been issued are waited on before
328 returning. This not only includes waiting for the futures to resolve,
328 returning. This not only includes waiting for the futures to resolve,
329 but also waiting for all response data to arrive. In other words,
329 but also waiting for all response data to arrive. In other words,
330 calling this waits for all on-wire state for issued command requests
330 calling this waits for all on-wire state for issued command requests
331 to finish.
331 to finish.
332
332
333 When used as a context manager, this method is called when exiting the
333 When used as a context manager, this method is called when exiting the
334 context manager.
334 context manager.
335
335
336 This method may call ``sendcommands()`` if there are buffered commands.
336 This method may call ``sendcommands()`` if there are buffered commands.
337 """
337 """
338
338
339
339
340 class ipeerrequests(interfaceutil.Interface):
340 class ipeerrequests(interfaceutil.Interface):
341 """Interface for executing commands on a peer."""
341 """Interface for executing commands on a peer."""
342
342
343 limitedarguments = interfaceutil.Attribute(
343 limitedarguments = interfaceutil.Attribute(
344 """True if the peer cannot receive large argument value for commands."""
344 """True if the peer cannot receive large argument value for commands."""
345 )
345 )
346
346
347 def commandexecutor():
347 def commandexecutor():
348 """A context manager that resolves to an ipeercommandexecutor.
348 """A context manager that resolves to an ipeercommandexecutor.
349
349
350 The object this resolves to can be used to issue command requests
350 The object this resolves to can be used to issue command requests
351 to the peer.
351 to the peer.
352
352
353 Callers should call its ``callcommand`` method to issue command
353 Callers should call its ``callcommand`` method to issue command
354 requests.
354 requests.
355
355
356 A new executor should be obtained for each distinct set of commands
356 A new executor should be obtained for each distinct set of commands
357 (possibly just a single command) that the consumer wants to execute
357 (possibly just a single command) that the consumer wants to execute
358 as part of a single operation or round trip. This is because some
358 as part of a single operation or round trip. This is because some
359 peers are half-duplex and/or don't support persistent connections.
359 peers are half-duplex and/or don't support persistent connections.
360 e.g. in the case of HTTP peers, commands sent to an executor represent
360 e.g. in the case of HTTP peers, commands sent to an executor represent
361 a single HTTP request. While some peers may support multiple command
361 a single HTTP request. While some peers may support multiple command
362 sends over the wire per executor, consumers need to code to the least
362 sends over the wire per executor, consumers need to code to the least
363 capable peer. So it should be assumed that command executors buffer
363 capable peer. So it should be assumed that command executors buffer
364 called commands until they are told to send them and that each
364 called commands until they are told to send them and that each
365 command executor could result in a new connection or wire-level request
365 command executor could result in a new connection or wire-level request
366 being issued.
366 being issued.
367 """
367 """
368
368
369
369
370 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
370 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
371 """Unified interface for peer repositories.
371 """Unified interface for peer repositories.
372
372
373 All peer instances must conform to this interface.
373 All peer instances must conform to this interface.
374 """
374 """
375
375
376
376
377 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
377 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
378 """Unified peer interface for wire protocol version 2 peers."""
378 """Unified peer interface for wire protocol version 2 peers."""
379
379
380 apidescriptor = interfaceutil.Attribute(
380 apidescriptor = interfaceutil.Attribute(
381 """Data structure holding description of server API."""
381 """Data structure holding description of server API."""
382 )
382 )
383
383
384
384
385 @interfaceutil.implementer(ipeerbase)
385 @interfaceutil.implementer(ipeerbase)
386 class peer:
386 class peer:
387 """Base class for peer repositories."""
387 """Base class for peer repositories."""
388
388
389 limitedarguments = False
389 limitedarguments = False
390
390
391 def __init__(self, ui, path=None, remotehidden=False):
391 def __init__(self, ui, path=None, remotehidden=False):
392 self.ui = ui
392 self.ui = ui
393 self.path = path
393 self.path = path
394
394
395 def capable(self, name):
395 def capable(self, name):
396 caps = self.capabilities()
396 caps = self.capabilities()
397 if name in caps:
397 if name in caps:
398 return True
398 return True
399
399
400 name = b'%s=' % name
400 name = b'%s=' % name
401 for cap in caps:
401 for cap in caps:
402 if cap.startswith(name):
402 if cap.startswith(name):
403 return cap[len(name) :]
403 return cap[len(name) :]
404
404
405 return False
405 return False
406
406
407 def requirecap(self, name, purpose):
407 def requirecap(self, name, purpose):
408 if self.capable(name):
408 if self.capable(name):
409 return
409 return
410
410
411 raise error.CapabilityError(
411 raise error.CapabilityError(
412 _(
412 _(
413 b'cannot %s; remote repository does not support the '
413 b'cannot %s; remote repository does not support the '
414 b'\'%s\' capability'
414 b'\'%s\' capability'
415 )
415 )
416 % (purpose, name)
416 % (purpose, name)
417 )
417 )
418
418
419
419
420 class iverifyproblem(interfaceutil.Interface):
420 class iverifyproblem(interfaceutil.Interface):
421 """Represents a problem with the integrity of the repository.
421 """Represents a problem with the integrity of the repository.
422
422
423 Instances of this interface are emitted to describe an integrity issue
423 Instances of this interface are emitted to describe an integrity issue
424 with a repository (e.g. corrupt storage, missing data, etc).
424 with a repository (e.g. corrupt storage, missing data, etc).
425
425
426 Instances are essentially messages associated with severity.
426 Instances are essentially messages associated with severity.
427 """
427 """
428
428
429 warning = interfaceutil.Attribute(
429 warning = interfaceutil.Attribute(
430 """Message indicating a non-fatal problem."""
430 """Message indicating a non-fatal problem."""
431 )
431 )
432
432
433 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
433 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
434
434
435 node = interfaceutil.Attribute(
435 node = interfaceutil.Attribute(
436 """Revision encountering the problem.
436 """Revision encountering the problem.
437
437
438 ``None`` means the problem doesn't apply to a single revision.
438 ``None`` means the problem doesn't apply to a single revision.
439 """
439 """
440 )
440 )
441
441
442
442
443 class irevisiondelta(interfaceutil.Interface):
443 class irevisiondelta(interfaceutil.Interface):
444 """Represents a delta between one revision and another.
444 """Represents a delta between one revision and another.
445
445
446 Instances convey enough information to allow a revision to be exchanged
446 Instances convey enough information to allow a revision to be exchanged
447 with another repository.
447 with another repository.
448
448
449 Instances represent the fulltext revision data or a delta against
449 Instances represent the fulltext revision data or a delta against
450 another revision. Therefore the ``revision`` and ``delta`` attributes
450 another revision. Therefore the ``revision`` and ``delta`` attributes
451 are mutually exclusive.
451 are mutually exclusive.
452
452
453 Typically used for changegroup generation.
453 Typically used for changegroup generation.
454 """
454 """
455
455
456 node = interfaceutil.Attribute("""20 byte node of this revision.""")
456 node = interfaceutil.Attribute("""20 byte node of this revision.""")
457
457
458 p1node = interfaceutil.Attribute(
458 p1node = interfaceutil.Attribute(
459 """20 byte node of 1st parent of this revision."""
459 """20 byte node of 1st parent of this revision."""
460 )
460 )
461
461
462 p2node = interfaceutil.Attribute(
462 p2node = interfaceutil.Attribute(
463 """20 byte node of 2nd parent of this revision."""
463 """20 byte node of 2nd parent of this revision."""
464 )
464 )
465
465
466 linknode = interfaceutil.Attribute(
466 linknode = interfaceutil.Attribute(
467 """20 byte node of the changelog revision this node is linked to."""
467 """20 byte node of the changelog revision this node is linked to."""
468 )
468 )
469
469
470 flags = interfaceutil.Attribute(
470 flags = interfaceutil.Attribute(
471 """2 bytes of integer flags that apply to this revision.
471 """2 bytes of integer flags that apply to this revision.
472
472
473 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
473 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
474 """
474 """
475 )
475 )
476
476
477 basenode = interfaceutil.Attribute(
477 basenode = interfaceutil.Attribute(
478 """20 byte node of the revision this data is a delta against.
478 """20 byte node of the revision this data is a delta against.
479
479
480 ``nullid`` indicates that the revision is a full revision and not
480 ``nullid`` indicates that the revision is a full revision and not
481 a delta.
481 a delta.
482 """
482 """
483 )
483 )
484
484
485 baserevisionsize = interfaceutil.Attribute(
485 baserevisionsize = interfaceutil.Attribute(
486 """Size of base revision this delta is against.
486 """Size of base revision this delta is against.
487
487
488 May be ``None`` if ``basenode`` is ``nullid``.
488 May be ``None`` if ``basenode`` is ``nullid``.
489 """
489 """
490 )
490 )
491
491
492 revision = interfaceutil.Attribute(
492 revision = interfaceutil.Attribute(
493 """Raw fulltext of revision data for this node."""
493 """Raw fulltext of revision data for this node."""
494 )
494 )
495
495
496 delta = interfaceutil.Attribute(
496 delta = interfaceutil.Attribute(
497 """Delta between ``basenode`` and ``node``.
497 """Delta between ``basenode`` and ``node``.
498
498
499 Stored in the bdiff delta format.
499 Stored in the bdiff delta format.
500 """
500 """
501 )
501 )
502
502
503 sidedata = interfaceutil.Attribute(
503 sidedata = interfaceutil.Attribute(
504 """Raw sidedata bytes for the given revision."""
504 """Raw sidedata bytes for the given revision."""
505 )
505 )
506
506
507 protocol_flags = interfaceutil.Attribute(
507 protocol_flags = interfaceutil.Attribute(
508 """Single byte of integer flags that can influence the protocol.
508 """Single byte of integer flags that can influence the protocol.
509
509
510 This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
510 This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
511 """
511 """
512 )
512 )
513
513
514
514
515 class ifilerevisionssequence(interfaceutil.Interface):
515 class ifilerevisionssequence(interfaceutil.Interface):
516 """Contains index data for all revisions of a file.
516 """Contains index data for all revisions of a file.
517
517
518 Types implementing this behave like lists of tuples. The index
518 Types implementing this behave like lists of tuples. The index
519 in the list corresponds to the revision number. The values contain
519 in the list corresponds to the revision number. The values contain
520 index metadata.
520 index metadata.
521
521
522 The *null* revision (revision number -1) is always the last item
522 The *null* revision (revision number -1) is always the last item
523 in the index.
523 in the index.
524 """
524 """
525
525
526 def __len__():
526 def __len__():
527 """The total number of revisions."""
527 """The total number of revisions."""
528
528
529 def __getitem__(rev):
529 def __getitem__(rev):
530 """Returns the object having a specific revision number.
530 """Returns the object having a specific revision number.
531
531
532 Returns an 8-tuple with the following fields:
532 Returns an 8-tuple with the following fields:
533
533
534 offset+flags
534 offset+flags
535 Contains the offset and flags for the revision. 64-bit unsigned
535 Contains the offset and flags for the revision. 64-bit unsigned
536 integer where first 6 bytes are the offset and the next 2 bytes
536 integer where first 6 bytes are the offset and the next 2 bytes
537 are flags. The offset can be 0 if it is not used by the store.
537 are flags. The offset can be 0 if it is not used by the store.
538 compressed size
538 compressed size
539 Size of the revision data in the store. It can be 0 if it isn't
539 Size of the revision data in the store. It can be 0 if it isn't
540 needed by the store.
540 needed by the store.
541 uncompressed size
541 uncompressed size
542 Fulltext size. It can be 0 if it isn't needed by the store.
542 Fulltext size. It can be 0 if it isn't needed by the store.
543 base revision
543 base revision
544 Revision number of revision the delta for storage is encoded
544 Revision number of revision the delta for storage is encoded
545 against. -1 indicates not encoded against a base revision.
545 against. -1 indicates not encoded against a base revision.
546 link revision
546 link revision
547 Revision number of changelog revision this entry is related to.
547 Revision number of changelog revision this entry is related to.
548 p1 revision
548 p1 revision
549 Revision number of 1st parent. -1 if no 1st parent.
549 Revision number of 1st parent. -1 if no 1st parent.
550 p2 revision
550 p2 revision
551 Revision number of 2nd parent. -1 if no 1st parent.
551 Revision number of 2nd parent. -1 if no 1st parent.
552 node
552 node
553 Binary node value for this revision number.
553 Binary node value for this revision number.
554
554
555 Negative values should index off the end of the sequence. ``-1``
555 Negative values should index off the end of the sequence. ``-1``
556 should return the null revision. ``-2`` should return the most
556 should return the null revision. ``-2`` should return the most
557 recent revision.
557 recent revision.
558 """
558 """
559
559
560 def __contains__(rev):
560 def __contains__(rev):
561 """Whether a revision number exists."""
561 """Whether a revision number exists."""
562
562
563 def insert(self, i, entry):
563 def insert(self, i, entry):
564 """Add an item to the index at specific revision."""
564 """Add an item to the index at specific revision."""
565
565
566
566
567 class ifileindex(interfaceutil.Interface):
567 class ifileindex(interfaceutil.Interface):
568 """Storage interface for index data of a single file.
568 """Storage interface for index data of a single file.
569
569
570 File storage data is divided into index metadata and data storage.
570 File storage data is divided into index metadata and data storage.
571 This interface defines the index portion of the interface.
571 This interface defines the index portion of the interface.
572
572
573 The index logically consists of:
573 The index logically consists of:
574
574
575 * A mapping between revision numbers and nodes.
575 * A mapping between revision numbers and nodes.
576 * DAG data (storing and querying the relationship between nodes).
576 * DAG data (storing and querying the relationship between nodes).
577 * Metadata to facilitate storage.
577 * Metadata to facilitate storage.
578 """
578 """
579
579
580 nullid = interfaceutil.Attribute(
580 nullid = interfaceutil.Attribute(
581 """node for the null revision for use as delta base."""
581 """node for the null revision for use as delta base."""
582 )
582 )
583
583
584 def __len__():
584 def __len__():
585 """Obtain the number of revisions stored for this file."""
585 """Obtain the number of revisions stored for this file."""
586
586
587 def __iter__():
587 def __iter__():
588 """Iterate over revision numbers for this file."""
588 """Iterate over revision numbers for this file."""
589
589
590 def hasnode(node):
590 def hasnode(node):
591 """Returns a bool indicating if a node is known to this store.
591 """Returns a bool indicating if a node is known to this store.
592
592
593 Implementations must only return True for full, binary node values:
593 Implementations must only return True for full, binary node values:
594 hex nodes, revision numbers, and partial node matches must be
594 hex nodes, revision numbers, and partial node matches must be
595 rejected.
595 rejected.
596
596
597 The null node is never present.
597 The null node is never present.
598 """
598 """
599
599
600 def revs(start=0, stop=None):
600 def revs(start=0, stop=None):
601 """Iterate over revision numbers for this file, with control."""
601 """Iterate over revision numbers for this file, with control."""
602
602
603 def parents(node):
603 def parents(node):
604 """Returns a 2-tuple of parent nodes for a revision.
604 """Returns a 2-tuple of parent nodes for a revision.
605
605
606 Values will be ``nullid`` if the parent is empty.
606 Values will be ``nullid`` if the parent is empty.
607 """
607 """
608
608
609 def parentrevs(rev):
609 def parentrevs(rev):
610 """Like parents() but operates on revision numbers."""
610 """Like parents() but operates on revision numbers."""
611
611
612 def rev(node):
612 def rev(node):
613 """Obtain the revision number given a node.
613 """Obtain the revision number given a node.
614
614
615 Raises ``error.LookupError`` if the node is not known.
615 Raises ``error.LookupError`` if the node is not known.
616 """
616 """
617
617
618 def node(rev):
618 def node(rev):
619 """Obtain the node value given a revision number.
619 """Obtain the node value given a revision number.
620
620
621 Raises ``IndexError`` if the node is not known.
621 Raises ``IndexError`` if the node is not known.
622 """
622 """
623
623
624 def lookup(node):
624 def lookup(node):
625 """Attempt to resolve a value to a node.
625 """Attempt to resolve a value to a node.
626
626
627 Value can be a binary node, hex node, revision number, or a string
627 Value can be a binary node, hex node, revision number, or a string
628 that can be converted to an integer.
628 that can be converted to an integer.
629
629
630 Raises ``error.LookupError`` if a node could not be resolved.
630 Raises ``error.LookupError`` if a node could not be resolved.
631 """
631 """
632
632
633 def linkrev(rev):
633 def linkrev(rev):
634 """Obtain the changeset revision number a revision is linked to."""
634 """Obtain the changeset revision number a revision is linked to."""
635
635
636 def iscensored(rev):
636 def iscensored(rev):
637 """Return whether a revision's content has been censored."""
637 """Return whether a revision's content has been censored."""
638
638
639 def commonancestorsheads(node1, node2):
639 def commonancestorsheads(node1, node2):
640 """Obtain an iterable of nodes containing heads of common ancestors.
640 """Obtain an iterable of nodes containing heads of common ancestors.
641
641
642 See ``ancestor.commonancestorsheads()``.
642 See ``ancestor.commonancestorsheads()``.
643 """
643 """
644
644
645 def descendants(revs):
645 def descendants(revs):
646 """Obtain descendant revision numbers for a set of revision numbers.
646 """Obtain descendant revision numbers for a set of revision numbers.
647
647
648 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
648 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
649 """
649 """
650
650
651 def heads(start=None, stop=None):
651 def heads(start=None, stop=None):
652 """Obtain a list of nodes that are DAG heads, with control.
652 """Obtain a list of nodes that are DAG heads, with control.
653
653
654 The set of revisions examined can be limited by specifying
654 The set of revisions examined can be limited by specifying
655 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
655 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
656 iterable of nodes. DAG traversal starts at earlier revision
656 iterable of nodes. DAG traversal starts at earlier revision
657 ``start`` and iterates forward until any node in ``stop`` is
657 ``start`` and iterates forward until any node in ``stop`` is
658 encountered.
658 encountered.
659 """
659 """
660
660
661 def children(node):
661 def children(node):
662 """Obtain nodes that are children of a node.
662 """Obtain nodes that are children of a node.
663
663
664 Returns a list of nodes.
664 Returns a list of nodes.
665 """
665 """
666
666
667
667
668 class ifiledata(interfaceutil.Interface):
668 class ifiledata(interfaceutil.Interface):
669 """Storage interface for data storage of a specific file.
669 """Storage interface for data storage of a specific file.
670
670
671 This complements ``ifileindex`` and provides an interface for accessing
671 This complements ``ifileindex`` and provides an interface for accessing
672 data for a tracked file.
672 data for a tracked file.
673 """
673 """
674
674
675 def size(rev):
675 def size(rev):
676 """Obtain the fulltext size of file data.
676 """Obtain the fulltext size of file data.
677
677
678 Any metadata is excluded from size measurements.
678 Any metadata is excluded from size measurements.
679 """
679 """
680
680
681 def revision(node, raw=False):
681 def revision(node, raw=False):
682 """Obtain fulltext data for a node.
682 """Obtain fulltext data for a node.
683
683
684 By default, any storage transformations are applied before the data
684 By default, any storage transformations are applied before the data
685 is returned. If ``raw`` is True, non-raw storage transformations
685 is returned. If ``raw`` is True, non-raw storage transformations
686 are not applied.
686 are not applied.
687
687
688 The fulltext data may contain a header containing metadata. Most
688 The fulltext data may contain a header containing metadata. Most
689 consumers should use ``read()`` to obtain the actual file data.
689 consumers should use ``read()`` to obtain the actual file data.
690 """
690 """
691
691
692 def rawdata(node):
692 def rawdata(node):
693 """Obtain raw data for a node."""
693 """Obtain raw data for a node."""
694
694
695 def read(node):
695 def read(node):
696 """Resolve file fulltext data.
696 """Resolve file fulltext data.
697
697
698 This is similar to ``revision()`` except any metadata in the data
698 This is similar to ``revision()`` except any metadata in the data
699 headers is stripped.
699 headers is stripped.
700 """
700 """
701
701
702 def renamed(node):
702 def renamed(node):
703 """Obtain copy metadata for a node.
703 """Obtain copy metadata for a node.
704
704
705 Returns ``False`` if no copy metadata is stored or a 2-tuple of
705 Returns ``False`` if no copy metadata is stored or a 2-tuple of
706 (path, node) from which this revision was copied.
706 (path, node) from which this revision was copied.
707 """
707 """
708
708
709 def cmp(node, fulltext):
709 def cmp(node, fulltext):
710 """Compare fulltext to another revision.
710 """Compare fulltext to another revision.
711
711
712 Returns True if the fulltext is different from what is stored.
712 Returns True if the fulltext is different from what is stored.
713
713
714 This takes copy metadata into account.
714 This takes copy metadata into account.
715
715
716 TODO better document the copy metadata and censoring logic.
716 TODO better document the copy metadata and censoring logic.
717 """
717 """
718
718
719 def emitrevisions(
719 def emitrevisions(
720 nodes,
720 nodes,
721 nodesorder=None,
721 nodesorder=None,
722 revisiondata=False,
722 revisiondata=False,
723 assumehaveparentrevisions=False,
723 assumehaveparentrevisions=False,
724 deltamode=CG_DELTAMODE_STD,
724 deltamode=CG_DELTAMODE_STD,
725 ):
725 ):
726 """Produce ``irevisiondelta`` for revisions.
726 """Produce ``irevisiondelta`` for revisions.
727
727
728 Given an iterable of nodes, emits objects conforming to the
728 Given an iterable of nodes, emits objects conforming to the
729 ``irevisiondelta`` interface that describe revisions in storage.
729 ``irevisiondelta`` interface that describe revisions in storage.
730
730
731 This method is a generator.
731 This method is a generator.
732
732
733 The input nodes may be unordered. Implementations must ensure that a
733 The input nodes may be unordered. Implementations must ensure that a
734 node's parents are emitted before the node itself. Transitively, this
734 node's parents are emitted before the node itself. Transitively, this
735 means that a node may only be emitted once all its ancestors in
735 means that a node may only be emitted once all its ancestors in
736 ``nodes`` have also been emitted.
736 ``nodes`` have also been emitted.
737
737
738 By default, emits "index" data (the ``node``, ``p1node``, and
738 By default, emits "index" data (the ``node``, ``p1node``, and
739 ``p2node`` attributes). If ``revisiondata`` is set, revision data
739 ``p2node`` attributes). If ``revisiondata`` is set, revision data
740 will also be present on the emitted objects.
740 will also be present on the emitted objects.
741
741
742 With default argument values, implementations can choose to emit
742 With default argument values, implementations can choose to emit
743 either fulltext revision data or a delta. When emitting deltas,
743 either fulltext revision data or a delta. When emitting deltas,
744 implementations must consider whether the delta's base revision
744 implementations must consider whether the delta's base revision
745 fulltext is available to the receiver.
745 fulltext is available to the receiver.
746
746
747 The base revision fulltext is guaranteed to be available if any of
747 The base revision fulltext is guaranteed to be available if any of
748 the following are met:
748 the following are met:
749
749
750 * Its fulltext revision was emitted by this method call.
750 * Its fulltext revision was emitted by this method call.
751 * A delta for that revision was emitted by this method call.
751 * A delta for that revision was emitted by this method call.
752 * ``assumehaveparentrevisions`` is True and the base revision is a
752 * ``assumehaveparentrevisions`` is True and the base revision is a
753 parent of the node.
753 parent of the node.
754
754
755 ``nodesorder`` can be used to control the order that revisions are
755 ``nodesorder`` can be used to control the order that revisions are
756 emitted. By default, revisions can be reordered as long as they are
756 emitted. By default, revisions can be reordered as long as they are
757 in DAG topological order (see above). If the value is ``nodes``,
757 in DAG topological order (see above). If the value is ``nodes``,
758 the iteration order from ``nodes`` should be used. If the value is
758 the iteration order from ``nodes`` should be used. If the value is
759 ``storage``, then the native order from the backing storage layer
759 ``storage``, then the native order from the backing storage layer
760 is used. (Not all storage layers will have strong ordering and behavior
760 is used. (Not all storage layers will have strong ordering and behavior
761 of this mode is storage-dependent.) ``nodes`` ordering can force
761 of this mode is storage-dependent.) ``nodes`` ordering can force
762 revisions to be emitted before their ancestors, so consumers should
762 revisions to be emitted before their ancestors, so consumers should
763 use it with care.
763 use it with care.
764
764
765 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
765 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
766 be set and it is the caller's responsibility to resolve it, if needed.
766 be set and it is the caller's responsibility to resolve it, if needed.
767
767
768 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
768 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
769 all revision data should be emitted as deltas against the revision
769 all revision data should be emitted as deltas against the revision
770 emitted just prior. The initial revision should be a delta against its
770 emitted just prior. The initial revision should be a delta against its
771 1st parent.
771 1st parent.
772 """
772 """
773
773
774
774
775 class ifilemutation(interfaceutil.Interface):
775 class ifilemutation(interfaceutil.Interface):
776 """Storage interface for mutation events of a tracked file."""
776 """Storage interface for mutation events of a tracked file."""
777
777
778 def add(filedata, meta, transaction, linkrev, p1, p2):
778 def add(filedata, meta, transaction, linkrev, p1, p2):
779 """Add a new revision to the store.
779 """Add a new revision to the store.
780
780
781 Takes file data, dictionary of metadata, a transaction, linkrev,
781 Takes file data, dictionary of metadata, a transaction, linkrev,
782 and parent nodes.
782 and parent nodes.
783
783
784 Returns the node that was added.
784 Returns the node that was added.
785
785
786 May no-op if a revision matching the supplied data is already stored.
786 May no-op if a revision matching the supplied data is already stored.
787 """
787 """
788
788
789 def addrevision(
789 def addrevision(
790 revisiondata,
790 revisiondata,
791 transaction,
791 transaction,
792 linkrev,
792 linkrev,
793 p1,
793 p1,
794 p2,
794 p2,
795 node=None,
795 node=None,
796 flags=0,
796 flags=0,
797 cachedelta=None,
797 cachedelta=None,
798 ):
798 ):
799 """Add a new revision to the store and return its number.
799 """Add a new revision to the store and return its number.
800
800
801 This is similar to ``add()`` except it operates at a lower level.
801 This is similar to ``add()`` except it operates at a lower level.
802
802
803 The data passed in already contains a metadata header, if any.
803 The data passed in already contains a metadata header, if any.
804
804
805 ``node`` and ``flags`` can be used to define the expected node and
805 ``node`` and ``flags`` can be used to define the expected node and
806 the flags to use with storage. ``flags`` is a bitwise value composed
806 the flags to use with storage. ``flags`` is a bitwise value composed
807 of the various ``REVISION_FLAG_*`` constants.
807 of the various ``REVISION_FLAG_*`` constants.
808
808
809 ``add()`` is usually called when adding files from e.g. the working
809 ``add()`` is usually called when adding files from e.g. the working
810 directory. ``addrevision()`` is often called by ``add()`` and for
810 directory. ``addrevision()`` is often called by ``add()`` and for
811 scenarios where revision data has already been computed, such as when
811 scenarios where revision data has already been computed, such as when
812 applying raw data from a peer repo.
812 applying raw data from a peer repo.
813 """
813 """
814
814
815 def addgroup(
815 def addgroup(
816 deltas,
816 deltas,
817 linkmapper,
817 linkmapper,
818 transaction,
818 transaction,
819 addrevisioncb=None,
819 addrevisioncb=None,
820 duplicaterevisioncb=None,
820 duplicaterevisioncb=None,
821 maybemissingparents=False,
821 maybemissingparents=False,
822 ):
822 ):
823 """Process a series of deltas for storage.
823 """Process a series of deltas for storage.
824
824
825 ``deltas`` is an iterable of 7-tuples of
825 ``deltas`` is an iterable of 7-tuples of
826 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
826 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
827 to add.
827 to add.
828
828
829 The ``delta`` field contains ``mpatch`` data to apply to a base
829 The ``delta`` field contains ``mpatch`` data to apply to a base
830 revision, identified by ``deltabase``. The base node can be
830 revision, identified by ``deltabase``. The base node can be
831 ``nullid``, in which case the header from the delta can be ignored
831 ``nullid``, in which case the header from the delta can be ignored
832 and the delta used as the fulltext.
832 and the delta used as the fulltext.
833
833
834 ``alwayscache`` instructs the lower layers to cache the content of the
834 ``alwayscache`` instructs the lower layers to cache the content of the
835 newly added revision, even if it needs to be explicitly computed.
835 newly added revision, even if it needs to be explicitly computed.
836 This used to be the default when ``addrevisioncb`` was provided up to
836 This used to be the default when ``addrevisioncb`` was provided up to
837 Mercurial 5.8.
837 Mercurial 5.8.
838
838
839 ``addrevisioncb`` should be called for each new rev as it is committed.
839 ``addrevisioncb`` should be called for each new rev as it is committed.
840 ``duplicaterevisioncb`` should be called for all revs with a
840 ``duplicaterevisioncb`` should be called for all revs with a
841 pre-existing node.
841 pre-existing node.
842
842
843 ``maybemissingparents`` is a bool indicating whether the incoming
843 ``maybemissingparents`` is a bool indicating whether the incoming
844 data may reference parents/ancestor revisions that aren't present.
844 data may reference parents/ancestor revisions that aren't present.
845 This flag is set when receiving data into a "shallow" store that
845 This flag is set when receiving data into a "shallow" store that
846 doesn't hold all history.
846 doesn't hold all history.
847
847
848 Returns a list of nodes that were processed. A node will be in the list
848 Returns a list of nodes that were processed. A node will be in the list
849 even if it existed in the store previously.
849 even if it existed in the store previously.
850 """
850 """
851
851
852 def censorrevision(tr, node, tombstone=b''):
852 def censorrevision(tr, node, tombstone=b''):
853 """Remove the content of a single revision.
853 """Remove the content of a single revision.
854
854
855 The specified ``node`` will have its content purged from storage.
855 The specified ``node`` will have its content purged from storage.
856 Future attempts to access the revision data for this node will
856 Future attempts to access the revision data for this node will
857 result in failure.
857 result in failure.
858
858
859 A ``tombstone`` message can optionally be stored. This message may be
859 A ``tombstone`` message can optionally be stored. This message may be
860 displayed to users when they attempt to access the missing revision
860 displayed to users when they attempt to access the missing revision
861 data.
861 data.
862
862
863 Storage backends may have stored deltas against the previous content
863 Storage backends may have stored deltas against the previous content
864 in this revision. As part of censoring a revision, these storage
864 in this revision. As part of censoring a revision, these storage
865 backends are expected to rewrite any internally stored deltas such
865 backends are expected to rewrite any internally stored deltas such
866 that they no longer reference the deleted content.
866 that they no longer reference the deleted content.
867 """
867 """
868
868
869 def getstrippoint(minlink):
869 def getstrippoint(minlink):
870 """Find the minimum revision that must be stripped to strip a linkrev.
870 """Find the minimum revision that must be stripped to strip a linkrev.
871
871
872 Returns a 2-tuple containing the minimum revision number and a set
872 Returns a 2-tuple containing the minimum revision number and a set
873 of all revisions numbers that would be broken by this strip.
873 of all revisions numbers that would be broken by this strip.
874
874
875 TODO this is highly revlog centric and should be abstracted into
875 TODO this is highly revlog centric and should be abstracted into
876 a higher-level deletion API. ``repair.strip()`` relies on this.
876 a higher-level deletion API. ``repair.strip()`` relies on this.
877 """
877 """
878
878
879 def strip(minlink, transaction):
879 def strip(minlink, transaction):
880 """Remove storage of items starting at a linkrev.
880 """Remove storage of items starting at a linkrev.
881
881
882 This uses ``getstrippoint()`` to determine the first node to remove.
882 This uses ``getstrippoint()`` to determine the first node to remove.
883 Then it effectively truncates storage for all revisions after that.
883 Then it effectively truncates storage for all revisions after that.
884
884
885 TODO this is highly revlog centric and should be abstracted into a
885 TODO this is highly revlog centric and should be abstracted into a
886 higher-level deletion API.
886 higher-level deletion API.
887 """
887 """
888
888
889
889
890 class ifilestorage(ifileindex, ifiledata, ifilemutation):
890 class ifilestorage(ifileindex, ifiledata, ifilemutation):
891 """Complete storage interface for a single tracked file."""
891 """Complete storage interface for a single tracked file."""
892
892
893 def files():
893 def files():
894 """Obtain paths that are backing storage for this file.
894 """Obtain paths that are backing storage for this file.
895
895
896 TODO this is used heavily by verify code and there should probably
896 TODO this is used heavily by verify code and there should probably
897 be a better API for that.
897 be a better API for that.
898 """
898 """
899
899
900 def storageinfo(
900 def storageinfo(
901 exclusivefiles=False,
901 exclusivefiles=False,
902 sharedfiles=False,
902 sharedfiles=False,
903 revisionscount=False,
903 revisionscount=False,
904 trackedsize=False,
904 trackedsize=False,
905 storedsize=False,
905 storedsize=False,
906 ):
906 ):
907 """Obtain information about storage for this file's data.
907 """Obtain information about storage for this file's data.
908
908
909 Returns a dict describing storage for this tracked path. The keys
909 Returns a dict describing storage for this tracked path. The keys
910 in the dict map to arguments of the same. The arguments are bools
910 in the dict map to arguments of the same. The arguments are bools
911 indicating whether to calculate and obtain that data.
911 indicating whether to calculate and obtain that data.
912
912
913 exclusivefiles
913 exclusivefiles
914 Iterable of (vfs, path) describing files that are exclusively
914 Iterable of (vfs, path) describing files that are exclusively
915 used to back storage for this tracked path.
915 used to back storage for this tracked path.
916
916
917 sharedfiles
917 sharedfiles
918 Iterable of (vfs, path) describing files that are used to back
918 Iterable of (vfs, path) describing files that are used to back
919 storage for this tracked path. Those files may also provide storage
919 storage for this tracked path. Those files may also provide storage
920 for other stored entities.
920 for other stored entities.
921
921
922 revisionscount
922 revisionscount
923 Number of revisions available for retrieval.
923 Number of revisions available for retrieval.
924
924
925 trackedsize
925 trackedsize
926 Total size in bytes of all tracked revisions. This is a sum of the
926 Total size in bytes of all tracked revisions. This is a sum of the
927 length of the fulltext of all revisions.
927 length of the fulltext of all revisions.
928
928
929 storedsize
929 storedsize
930 Total size in bytes used to store data for all tracked revisions.
930 Total size in bytes used to store data for all tracked revisions.
931 This is commonly less than ``trackedsize`` due to internal usage
931 This is commonly less than ``trackedsize`` due to internal usage
932 of deltas rather than fulltext revisions.
932 of deltas rather than fulltext revisions.
933
933
934 Not all storage backends may support all queries are have a reasonable
934 Not all storage backends may support all queries are have a reasonable
935 value to use. In that case, the value should be set to ``None`` and
935 value to use. In that case, the value should be set to ``None`` and
936 callers are expected to handle this special value.
936 callers are expected to handle this special value.
937 """
937 """
938
938
939 def verifyintegrity(state):
939 def verifyintegrity(state):
940 """Verifies the integrity of file storage.
940 """Verifies the integrity of file storage.
941
941
942 ``state`` is a dict holding state of the verifier process. It can be
942 ``state`` is a dict holding state of the verifier process. It can be
943 used to communicate data between invocations of multiple storage
943 used to communicate data between invocations of multiple storage
944 primitives.
944 primitives.
945
945
946 If individual revisions cannot have their revision content resolved,
946 If individual revisions cannot have their revision content resolved,
947 the method is expected to set the ``skipread`` key to a set of nodes
947 the method is expected to set the ``skipread`` key to a set of nodes
948 that encountered problems. If set, the method can also add the node(s)
948 that encountered problems. If set, the method can also add the node(s)
949 to ``safe_renamed`` in order to indicate nodes that may perform the
949 to ``safe_renamed`` in order to indicate nodes that may perform the
950 rename checks with currently accessible data.
950 rename checks with currently accessible data.
951
951
952 The method yields objects conforming to the ``iverifyproblem``
952 The method yields objects conforming to the ``iverifyproblem``
953 interface.
953 interface.
954 """
954 """
955
955
956
956
957 class idirs(interfaceutil.Interface):
957 class idirs(interfaceutil.Interface):
958 """Interface representing a collection of directories from paths.
958 """Interface representing a collection of directories from paths.
959
959
960 This interface is essentially a derived data structure representing
960 This interface is essentially a derived data structure representing
961 directories from a collection of paths.
961 directories from a collection of paths.
962 """
962 """
963
963
964 def addpath(path):
964 def addpath(path):
965 """Add a path to the collection.
965 """Add a path to the collection.
966
966
967 All directories in the path will be added to the collection.
967 All directories in the path will be added to the collection.
968 """
968 """
969
969
970 def delpath(path):
970 def delpath(path):
971 """Remove a path from the collection.
971 """Remove a path from the collection.
972
972
973 If the removal was the last path in a particular directory, the
973 If the removal was the last path in a particular directory, the
974 directory is removed from the collection.
974 directory is removed from the collection.
975 """
975 """
976
976
977 def __iter__():
977 def __iter__():
978 """Iterate over the directories in this collection of paths."""
978 """Iterate over the directories in this collection of paths."""
979
979
980 def __contains__(path):
980 def __contains__(path):
981 """Whether a specific directory is in this collection."""
981 """Whether a specific directory is in this collection."""
982
982
983
983
984 class imanifestdict(interfaceutil.Interface):
984 class imanifestdict(interfaceutil.Interface):
985 """Interface representing a manifest data structure.
985 """Interface representing a manifest data structure.
986
986
987 A manifest is effectively a dict mapping paths to entries. Each entry
987 A manifest is effectively a dict mapping paths to entries. Each entry
988 consists of a binary node and extra flags affecting that entry.
988 consists of a binary node and extra flags affecting that entry.
989 """
989 """
990
990
991 def __getitem__(path):
991 def __getitem__(path):
992 """Returns the binary node value for a path in the manifest.
992 """Returns the binary node value for a path in the manifest.
993
993
994 Raises ``KeyError`` if the path does not exist in the manifest.
994 Raises ``KeyError`` if the path does not exist in the manifest.
995
995
996 Equivalent to ``self.find(path)[0]``.
996 Equivalent to ``self.find(path)[0]``.
997 """
997 """
998
998
999 def find(path):
999 def find(path):
1000 """Returns the entry for a path in the manifest.
1000 """Returns the entry for a path in the manifest.
1001
1001
1002 Returns a 2-tuple of (node, flags).
1002 Returns a 2-tuple of (node, flags).
1003
1003
1004 Raises ``KeyError`` if the path does not exist in the manifest.
1004 Raises ``KeyError`` if the path does not exist in the manifest.
1005 """
1005 """
1006
1006
1007 def __len__():
1007 def __len__():
1008 """Return the number of entries in the manifest."""
1008 """Return the number of entries in the manifest."""
1009
1009
1010 def __nonzero__():
1010 def __nonzero__():
1011 """Returns True if the manifest has entries, False otherwise."""
1011 """Returns True if the manifest has entries, False otherwise."""
1012
1012
1013 __bool__ = __nonzero__
1013 __bool__ = __nonzero__
1014
1014
1015 def __setitem__(path, node):
1015 def __setitem__(path, node):
1016 """Define the node value for a path in the manifest.
1016 """Define the node value for a path in the manifest.
1017
1017
1018 If the path is already in the manifest, its flags will be copied to
1018 If the path is already in the manifest, its flags will be copied to
1019 the new entry.
1019 the new entry.
1020 """
1020 """
1021
1021
1022 def __contains__(path):
1022 def __contains__(path):
1023 """Whether a path exists in the manifest."""
1023 """Whether a path exists in the manifest."""
1024
1024
1025 def __delitem__(path):
1025 def __delitem__(path):
1026 """Remove a path from the manifest.
1026 """Remove a path from the manifest.
1027
1027
1028 Raises ``KeyError`` if the path is not in the manifest.
1028 Raises ``KeyError`` if the path is not in the manifest.
1029 """
1029 """
1030
1030
1031 def __iter__():
1031 def __iter__():
1032 """Iterate over paths in the manifest."""
1032 """Iterate over paths in the manifest."""
1033
1033
1034 def iterkeys():
1034 def iterkeys():
1035 """Iterate over paths in the manifest."""
1035 """Iterate over paths in the manifest."""
1036
1036
1037 def keys():
1037 def keys():
1038 """Obtain a list of paths in the manifest."""
1038 """Obtain a list of paths in the manifest."""
1039
1039
1040 def filesnotin(other, match=None):
1040 def filesnotin(other, match=None):
1041 """Obtain the set of paths in this manifest but not in another.
1041 """Obtain the set of paths in this manifest but not in another.
1042
1042
1043 ``match`` is an optional matcher function to be applied to both
1043 ``match`` is an optional matcher function to be applied to both
1044 manifests.
1044 manifests.
1045
1045
1046 Returns a set of paths.
1046 Returns a set of paths.
1047 """
1047 """
1048
1048
1049 def dirs():
1049 def dirs():
1050 """Returns an object implementing the ``idirs`` interface."""
1050 """Returns an object implementing the ``idirs`` interface."""
1051
1051
1052 def hasdir(dir):
1052 def hasdir(dir):
1053 """Returns a bool indicating if a directory is in this manifest."""
1053 """Returns a bool indicating if a directory is in this manifest."""
1054
1054
1055 def walk(match):
1055 def walk(match):
1056 """Generator of paths in manifest satisfying a matcher.
1056 """Generator of paths in manifest satisfying a matcher.
1057
1057
1058 If the matcher has explicit files listed and they don't exist in
1058 If the matcher has explicit files listed and they don't exist in
1059 the manifest, ``match.bad()`` is called for each missing file.
1059 the manifest, ``match.bad()`` is called for each missing file.
1060 """
1060 """
1061
1061
1062 def diff(other, match=None, clean=False):
1062 def diff(other, match=None, clean=False):
1063 """Find differences between this manifest and another.
1063 """Find differences between this manifest and another.
1064
1064
1065 This manifest is compared to ``other``.
1065 This manifest is compared to ``other``.
1066
1066
1067 If ``match`` is provided, the two manifests are filtered against this
1067 If ``match`` is provided, the two manifests are filtered against this
1068 matcher and only entries satisfying the matcher are compared.
1068 matcher and only entries satisfying the matcher are compared.
1069
1069
1070 If ``clean`` is True, unchanged files are included in the returned
1070 If ``clean`` is True, unchanged files are included in the returned
1071 object.
1071 object.
1072
1072
1073 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1073 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1074 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1074 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1075 represents the node and flags for this manifest and ``(node2, flag2)``
1075 represents the node and flags for this manifest and ``(node2, flag2)``
1076 are the same for the other manifest.
1076 are the same for the other manifest.
1077 """
1077 """
1078
1078
1079 def setflag(path, flag):
1079 def setflag(path, flag):
1080 """Set the flag value for a given path.
1080 """Set the flag value for a given path.
1081
1081
1082 Raises ``KeyError`` if the path is not already in the manifest.
1082 Raises ``KeyError`` if the path is not already in the manifest.
1083 """
1083 """
1084
1084
1085 def get(path, default=None):
1085 def get(path, default=None):
1086 """Obtain the node value for a path or a default value if missing."""
1086 """Obtain the node value for a path or a default value if missing."""
1087
1087
1088 def flags(path):
1088 def flags(path):
1089 """Return the flags value for a path (default: empty bytestring)."""
1089 """Return the flags value for a path (default: empty bytestring)."""
1090
1090
1091 def copy():
1091 def copy():
1092 """Return a copy of this manifest."""
1092 """Return a copy of this manifest."""
1093
1093
1094 def items():
1094 def items():
1095 """Returns an iterable of (path, node) for items in this manifest."""
1095 """Returns an iterable of (path, node) for items in this manifest."""
1096
1096
1097 def iteritems():
1097 def iteritems():
1098 """Identical to items()."""
1098 """Identical to items()."""
1099
1099
1100 def iterentries():
1100 def iterentries():
1101 """Returns an iterable of (path, node, flags) for this manifest.
1101 """Returns an iterable of (path, node, flags) for this manifest.
1102
1102
1103 Similar to ``iteritems()`` except items are a 3-tuple and include
1103 Similar to ``iteritems()`` except items are a 3-tuple and include
1104 flags.
1104 flags.
1105 """
1105 """
1106
1106
1107 def text():
1107 def text():
1108 """Obtain the raw data representation for this manifest.
1108 """Obtain the raw data representation for this manifest.
1109
1109
1110 Result is used to create a manifest revision.
1110 Result is used to create a manifest revision.
1111 """
1111 """
1112
1112
1113 def fastdelta(base, changes):
1113 def fastdelta(base, changes):
1114 """Obtain a delta between this manifest and another given changes.
1114 """Obtain a delta between this manifest and another given changes.
1115
1115
1116 ``base`` in the raw data representation for another manifest.
1116 ``base`` in the raw data representation for another manifest.
1117
1117
1118 ``changes`` is an iterable of ``(path, to_delete)``.
1118 ``changes`` is an iterable of ``(path, to_delete)``.
1119
1119
1120 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1120 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1121 delta between ``base`` and this manifest.
1121 delta between ``base`` and this manifest.
1122
1122
1123 If this manifest implementation can't support ``fastdelta()``,
1123 If this manifest implementation can't support ``fastdelta()``,
1124 raise ``mercurial.manifest.FastdeltaUnavailable``.
1124 raise ``mercurial.manifest.FastdeltaUnavailable``.
1125 """
1125 """
1126
1126
1127
1127
1128 class imanifestrevisionbase(interfaceutil.Interface):
1128 class imanifestrevisionbase(interfaceutil.Interface):
1129 """Base interface representing a single revision of a manifest.
1129 """Base interface representing a single revision of a manifest.
1130
1130
1131 Should not be used as a primary interface: should always be inherited
1131 Should not be used as a primary interface: should always be inherited
1132 as part of a larger interface.
1132 as part of a larger interface.
1133 """
1133 """
1134
1134
1135 def copy():
1135 def copy():
1136 """Obtain a copy of this manifest instance.
1136 """Obtain a copy of this manifest instance.
1137
1137
1138 Returns an object conforming to the ``imanifestrevisionwritable``
1138 Returns an object conforming to the ``imanifestrevisionwritable``
1139 interface. The instance will be associated with the same
1139 interface. The instance will be associated with the same
1140 ``imanifestlog`` collection as this instance.
1140 ``imanifestlog`` collection as this instance.
1141 """
1141 """
1142
1142
1143 def read():
1143 def read():
1144 """Obtain the parsed manifest data structure.
1144 """Obtain the parsed manifest data structure.
1145
1145
1146 The returned object conforms to the ``imanifestdict`` interface.
1146 The returned object conforms to the ``imanifestdict`` interface.
1147 """
1147 """
1148
1148
1149
1149
1150 class imanifestrevisionstored(imanifestrevisionbase):
1150 class imanifestrevisionstored(imanifestrevisionbase):
1151 """Interface representing a manifest revision committed to storage."""
1151 """Interface representing a manifest revision committed to storage."""
1152
1152
1153 def node():
1153 def node():
1154 """The binary node for this manifest."""
1154 """The binary node for this manifest."""
1155
1155
1156 parents = interfaceutil.Attribute(
1156 parents = interfaceutil.Attribute(
1157 """List of binary nodes that are parents for this manifest revision."""
1157 """List of binary nodes that are parents for this manifest revision."""
1158 )
1158 )
1159
1159
1160 def readdelta(shallow=False):
1160 def readdelta(shallow=False):
1161 """Obtain the manifest data structure representing changes from parent.
1161 """Obtain the manifest data structure representing changes from parent.
1162
1162
1163 This manifest is compared to its 1st parent. A new manifest representing
1163 This manifest is compared to its 1st parent. A new manifest representing
1164 those differences is constructed.
1164 those differences is constructed.
1165
1165
1166 The returned object conforms to the ``imanifestdict`` interface.
1166 The returned object conforms to the ``imanifestdict`` interface.
1167 """
1167 """
1168
1168
1169 def readfast(shallow=False):
1169 def readfast(shallow=False):
1170 """Calls either ``read()`` or ``readdelta()``.
1170 """Calls either ``read()`` or ``readdelta()``.
1171
1171
1172 The faster of the two options is called.
1172 The faster of the two options is called.
1173 """
1173 """
1174
1174
1175 def find(key):
1175 def find(key):
1176 """Calls self.read().find(key)``.
1176 """Calls self.read().find(key)``.
1177
1177
1178 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1178 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1179 """
1179 """
1180
1180
1181
1181
1182 class imanifestrevisionwritable(imanifestrevisionbase):
1182 class imanifestrevisionwritable(imanifestrevisionbase):
1183 """Interface representing a manifest revision that can be committed."""
1183 """Interface representing a manifest revision that can be committed."""
1184
1184
1185 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1185 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1186 """Add this revision to storage.
1186 """Add this revision to storage.
1187
1187
1188 Takes a transaction object, the changeset revision number it will
1188 Takes a transaction object, the changeset revision number it will
1189 be associated with, its parent nodes, and lists of added and
1189 be associated with, its parent nodes, and lists of added and
1190 removed paths.
1190 removed paths.
1191
1191
1192 If match is provided, storage can choose not to inspect or write out
1192 If match is provided, storage can choose not to inspect or write out
1193 items that do not match. Storage is still required to be able to provide
1193 items that do not match. Storage is still required to be able to provide
1194 the full manifest in the future for any directories written (these
1194 the full manifest in the future for any directories written (these
1195 manifests should not be "narrowed on disk").
1195 manifests should not be "narrowed on disk").
1196
1196
1197 Returns the binary node of the created revision.
1197 Returns the binary node of the created revision.
1198 """
1198 """
1199
1199
1200
1200
1201 class imanifeststorage(interfaceutil.Interface):
1201 class imanifeststorage(interfaceutil.Interface):
1202 """Storage interface for manifest data."""
1202 """Storage interface for manifest data."""
1203
1203
1204 nodeconstants = interfaceutil.Attribute(
1204 nodeconstants = interfaceutil.Attribute(
1205 """nodeconstants used by the current repository."""
1205 """nodeconstants used by the current repository."""
1206 )
1206 )
1207
1207
1208 tree = interfaceutil.Attribute(
1208 tree = interfaceutil.Attribute(
1209 """The path to the directory this manifest tracks.
1209 """The path to the directory this manifest tracks.
1210
1210
1211 The empty bytestring represents the root manifest.
1211 The empty bytestring represents the root manifest.
1212 """
1212 """
1213 )
1213 )
1214
1214
1215 index = interfaceutil.Attribute(
1215 index = interfaceutil.Attribute(
1216 """An ``ifilerevisionssequence`` instance."""
1216 """An ``ifilerevisionssequence`` instance."""
1217 )
1217 )
1218
1218
1219 opener = interfaceutil.Attribute(
1219 opener = interfaceutil.Attribute(
1220 """VFS opener to use to access underlying files used for storage.
1220 """VFS opener to use to access underlying files used for storage.
1221
1221
1222 TODO this is revlog specific and should not be exposed.
1222 TODO this is revlog specific and should not be exposed.
1223 """
1223 """
1224 )
1224 )
1225
1225
1226 _generaldelta = interfaceutil.Attribute(
1226 _generaldelta = interfaceutil.Attribute(
1227 """Whether generaldelta storage is being used.
1227 """Whether generaldelta storage is being used.
1228
1228
1229 TODO this is revlog specific and should not be exposed.
1229 TODO this is revlog specific and should not be exposed.
1230 """
1230 """
1231 )
1231 )
1232
1232
1233 fulltextcache = interfaceutil.Attribute(
1233 fulltextcache = interfaceutil.Attribute(
1234 """Dict with cache of fulltexts.
1234 """Dict with cache of fulltexts.
1235
1235
1236 TODO this doesn't feel appropriate for the storage interface.
1236 TODO this doesn't feel appropriate for the storage interface.
1237 """
1237 """
1238 )
1238 )
1239
1239
1240 def __len__():
1240 def __len__():
1241 """Obtain the number of revisions stored for this manifest."""
1241 """Obtain the number of revisions stored for this manifest."""
1242
1242
1243 def __iter__():
1243 def __iter__():
1244 """Iterate over revision numbers for this manifest."""
1244 """Iterate over revision numbers for this manifest."""
1245
1245
1246 def rev(node):
1246 def rev(node):
1247 """Obtain the revision number given a binary node.
1247 """Obtain the revision number given a binary node.
1248
1248
1249 Raises ``error.LookupError`` if the node is not known.
1249 Raises ``error.LookupError`` if the node is not known.
1250 """
1250 """
1251
1251
1252 def node(rev):
1252 def node(rev):
1253 """Obtain the node value given a revision number.
1253 """Obtain the node value given a revision number.
1254
1254
1255 Raises ``error.LookupError`` if the revision is not known.
1255 Raises ``error.LookupError`` if the revision is not known.
1256 """
1256 """
1257
1257
1258 def lookup(value):
1258 def lookup(value):
1259 """Attempt to resolve a value to a node.
1259 """Attempt to resolve a value to a node.
1260
1260
1261 Value can be a binary node, hex node, revision number, or a bytes
1261 Value can be a binary node, hex node, revision number, or a bytes
1262 that can be converted to an integer.
1262 that can be converted to an integer.
1263
1263
1264 Raises ``error.LookupError`` if a ndoe could not be resolved.
1264 Raises ``error.LookupError`` if a ndoe could not be resolved.
1265 """
1265 """
1266
1266
1267 def parents(node):
1267 def parents(node):
1268 """Returns a 2-tuple of parent nodes for a node.
1268 """Returns a 2-tuple of parent nodes for a node.
1269
1269
1270 Values will be ``nullid`` if the parent is empty.
1270 Values will be ``nullid`` if the parent is empty.
1271 """
1271 """
1272
1272
1273 def parentrevs(rev):
1273 def parentrevs(rev):
1274 """Like parents() but operates on revision numbers."""
1274 """Like parents() but operates on revision numbers."""
1275
1275
1276 def linkrev(rev):
1276 def linkrev(rev):
1277 """Obtain the changeset revision number a revision is linked to."""
1277 """Obtain the changeset revision number a revision is linked to."""
1278
1278
1279 def revision(node, _df=None):
1279 def revision(node, _df=None):
1280 """Obtain fulltext data for a node."""
1280 """Obtain fulltext data for a node."""
1281
1281
1282 def rawdata(node, _df=None):
1282 def rawdata(node, _df=None):
1283 """Obtain raw data for a node."""
1283 """Obtain raw data for a node."""
1284
1284
1285 def revdiff(rev1, rev2):
1285 def revdiff(rev1, rev2):
1286 """Obtain a delta between two revision numbers.
1286 """Obtain a delta between two revision numbers.
1287
1287
1288 The returned data is the result of ``bdiff.bdiff()`` on the raw
1288 The returned data is the result of ``bdiff.bdiff()`` on the raw
1289 revision data.
1289 revision data.
1290 """
1290 """
1291
1291
1292 def cmp(node, fulltext):
1292 def cmp(node, fulltext):
1293 """Compare fulltext to another revision.
1293 """Compare fulltext to another revision.
1294
1294
1295 Returns True if the fulltext is different from what is stored.
1295 Returns True if the fulltext is different from what is stored.
1296 """
1296 """
1297
1297
1298 def emitrevisions(
1298 def emitrevisions(
1299 nodes,
1299 nodes,
1300 nodesorder=None,
1300 nodesorder=None,
1301 revisiondata=False,
1301 revisiondata=False,
1302 assumehaveparentrevisions=False,
1302 assumehaveparentrevisions=False,
1303 ):
1303 ):
1304 """Produce ``irevisiondelta`` describing revisions.
1304 """Produce ``irevisiondelta`` describing revisions.
1305
1305
1306 See the documentation for ``ifiledata`` for more.
1306 See the documentation for ``ifiledata`` for more.
1307 """
1307 """
1308
1308
1309 def addgroup(
1309 def addgroup(
1310 deltas,
1310 deltas,
1311 linkmapper,
1311 linkmapper,
1312 transaction,
1312 transaction,
1313 addrevisioncb=None,
1313 addrevisioncb=None,
1314 duplicaterevisioncb=None,
1314 duplicaterevisioncb=None,
1315 ):
1315 ):
1316 """Process a series of deltas for storage.
1316 """Process a series of deltas for storage.
1317
1317
1318 See the documentation in ``ifilemutation`` for more.
1318 See the documentation in ``ifilemutation`` for more.
1319 """
1319 """
1320
1320
1321 def rawsize(rev):
1321 def rawsize(rev):
1322 """Obtain the size of tracked data.
1322 """Obtain the size of tracked data.
1323
1323
1324 Is equivalent to ``len(m.rawdata(node))``.
1324 Is equivalent to ``len(m.rawdata(node))``.
1325
1325
1326 TODO this method is only used by upgrade code and may be removed.
1326 TODO this method is only used by upgrade code and may be removed.
1327 """
1327 """
1328
1328
1329 def getstrippoint(minlink):
1329 def getstrippoint(minlink):
1330 """Find minimum revision that must be stripped to strip a linkrev.
1330 """Find minimum revision that must be stripped to strip a linkrev.
1331
1331
1332 See the documentation in ``ifilemutation`` for more.
1332 See the documentation in ``ifilemutation`` for more.
1333 """
1333 """
1334
1334
1335 def strip(minlink, transaction):
1335 def strip(minlink, transaction):
1336 """Remove storage of items starting at a linkrev.
1336 """Remove storage of items starting at a linkrev.
1337
1337
1338 See the documentation in ``ifilemutation`` for more.
1338 See the documentation in ``ifilemutation`` for more.
1339 """
1339 """
1340
1340
1341 def checksize():
1341 def checksize():
1342 """Obtain the expected sizes of backing files.
1342 """Obtain the expected sizes of backing files.
1343
1343
1344 TODO this is used by verify and it should not be part of the interface.
1344 TODO this is used by verify and it should not be part of the interface.
1345 """
1345 """
1346
1346
1347 def files():
1347 def files():
1348 """Obtain paths that are backing storage for this manifest.
1348 """Obtain paths that are backing storage for this manifest.
1349
1349
1350 TODO this is used by verify and there should probably be a better API
1350 TODO this is used by verify and there should probably be a better API
1351 for this functionality.
1351 for this functionality.
1352 """
1352 """
1353
1353
1354 def deltaparent(rev):
1354 def deltaparent(rev):
1355 """Obtain the revision that a revision is delta'd against.
1355 """Obtain the revision that a revision is delta'd against.
1356
1356
1357 TODO delta encoding is an implementation detail of storage and should
1357 TODO delta encoding is an implementation detail of storage and should
1358 not be exposed to the storage interface.
1358 not be exposed to the storage interface.
1359 """
1359 """
1360
1360
1361 def clone(tr, dest, **kwargs):
1361 def clone(tr, dest, **kwargs):
1362 """Clone this instance to another."""
1362 """Clone this instance to another."""
1363
1363
1364 def clearcaches(clear_persisted_data=False):
1364 def clearcaches(clear_persisted_data=False):
1365 """Clear any caches associated with this instance."""
1365 """Clear any caches associated with this instance."""
1366
1366
1367 def dirlog(d):
1367 def dirlog(d):
1368 """Obtain a manifest storage instance for a tree."""
1368 """Obtain a manifest storage instance for a tree."""
1369
1369
1370 def add(
1370 def add(
1371 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1371 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1372 ):
1372 ):
1373 """Add a revision to storage.
1373 """Add a revision to storage.
1374
1374
1375 ``m`` is an object conforming to ``imanifestdict``.
1375 ``m`` is an object conforming to ``imanifestdict``.
1376
1376
1377 ``link`` is the linkrev revision number.
1377 ``link`` is the linkrev revision number.
1378
1378
1379 ``p1`` and ``p2`` are the parent revision numbers.
1379 ``p1`` and ``p2`` are the parent revision numbers.
1380
1380
1381 ``added`` and ``removed`` are iterables of added and removed paths,
1381 ``added`` and ``removed`` are iterables of added and removed paths,
1382 respectively.
1382 respectively.
1383
1383
1384 ``readtree`` is a function that can be used to read the child tree(s)
1384 ``readtree`` is a function that can be used to read the child tree(s)
1385 when recursively writing the full tree structure when using
1385 when recursively writing the full tree structure when using
1386 treemanifets.
1386 treemanifets.
1387
1387
1388 ``match`` is a matcher that can be used to hint to storage that not all
1388 ``match`` is a matcher that can be used to hint to storage that not all
1389 paths must be inspected; this is an optimization and can be safely
1389 paths must be inspected; this is an optimization and can be safely
1390 ignored. Note that the storage must still be able to reproduce a full
1390 ignored. Note that the storage must still be able to reproduce a full
1391 manifest including files that did not match.
1391 manifest including files that did not match.
1392 """
1392 """
1393
1393
1394 def storageinfo(
1394 def storageinfo(
1395 exclusivefiles=False,
1395 exclusivefiles=False,
1396 sharedfiles=False,
1396 sharedfiles=False,
1397 revisionscount=False,
1397 revisionscount=False,
1398 trackedsize=False,
1398 trackedsize=False,
1399 storedsize=False,
1399 storedsize=False,
1400 ):
1400 ):
1401 """Obtain information about storage for this manifest's data.
1401 """Obtain information about storage for this manifest's data.
1402
1402
1403 See ``ifilestorage.storageinfo()`` for a description of this method.
1403 See ``ifilestorage.storageinfo()`` for a description of this method.
1404 This one behaves the same way, except for manifest data.
1404 This one behaves the same way, except for manifest data.
1405 """
1405 """
1406
1406
1407 def get_revlog():
1408 """return an actual revlog instance if any
1409
1410 This exist because a lot of code leverage the fact the underlying
1411 storage is a revlog for optimization, so giving simple way to access
1412 the revlog instance helps such code.
1413 """
1414
1407
1415
1408 class imanifestlog(interfaceutil.Interface):
1416 class imanifestlog(interfaceutil.Interface):
1409 """Interface representing a collection of manifest snapshots.
1417 """Interface representing a collection of manifest snapshots.
1410
1418
1411 Represents the root manifest in a repository.
1419 Represents the root manifest in a repository.
1412
1420
1413 Also serves as a means to access nested tree manifests and to cache
1421 Also serves as a means to access nested tree manifests and to cache
1414 tree manifests.
1422 tree manifests.
1415 """
1423 """
1416
1424
1417 nodeconstants = interfaceutil.Attribute(
1425 nodeconstants = interfaceutil.Attribute(
1418 """nodeconstants used by the current repository."""
1426 """nodeconstants used by the current repository."""
1419 )
1427 )
1420
1428
1421 def __getitem__(node):
1429 def __getitem__(node):
1422 """Obtain a manifest instance for a given binary node.
1430 """Obtain a manifest instance for a given binary node.
1423
1431
1424 Equivalent to calling ``self.get('', node)``.
1432 Equivalent to calling ``self.get('', node)``.
1425
1433
1426 The returned object conforms to the ``imanifestrevisionstored``
1434 The returned object conforms to the ``imanifestrevisionstored``
1427 interface.
1435 interface.
1428 """
1436 """
1429
1437
1430 def get(tree, node, verify=True):
1438 def get(tree, node, verify=True):
1431 """Retrieve the manifest instance for a given directory and binary node.
1439 """Retrieve the manifest instance for a given directory and binary node.
1432
1440
1433 ``node`` always refers to the node of the root manifest (which will be
1441 ``node`` always refers to the node of the root manifest (which will be
1434 the only manifest if flat manifests are being used).
1442 the only manifest if flat manifests are being used).
1435
1443
1436 If ``tree`` is the empty string, the root manifest is returned.
1444 If ``tree`` is the empty string, the root manifest is returned.
1437 Otherwise the manifest for the specified directory will be returned
1445 Otherwise the manifest for the specified directory will be returned
1438 (requires tree manifests).
1446 (requires tree manifests).
1439
1447
1440 If ``verify`` is True, ``LookupError`` is raised if the node is not
1448 If ``verify`` is True, ``LookupError`` is raised if the node is not
1441 known.
1449 known.
1442
1450
1443 The returned object conforms to the ``imanifestrevisionstored``
1451 The returned object conforms to the ``imanifestrevisionstored``
1444 interface.
1452 interface.
1445 """
1453 """
1446
1454
1447 def getstorage(tree):
1455 def getstorage(tree):
1448 """Retrieve an interface to storage for a particular tree.
1456 """Retrieve an interface to storage for a particular tree.
1449
1457
1450 If ``tree`` is the empty bytestring, storage for the root manifest will
1458 If ``tree`` is the empty bytestring, storage for the root manifest will
1451 be returned. Otherwise storage for a tree manifest is returned.
1459 be returned. Otherwise storage for a tree manifest is returned.
1452
1460
1453 TODO formalize interface for returned object.
1461 TODO formalize interface for returned object.
1454 """
1462 """
1455
1463
1456 def clearcaches():
1464 def clearcaches():
1457 """Clear caches associated with this collection."""
1465 """Clear caches associated with this collection."""
1458
1466
1459 def rev(node):
1467 def rev(node):
1460 """Obtain the revision number for a binary node.
1468 """Obtain the revision number for a binary node.
1461
1469
1462 Raises ``error.LookupError`` if the node is not known.
1470 Raises ``error.LookupError`` if the node is not known.
1463 """
1471 """
1464
1472
1465 def update_caches(transaction):
1473 def update_caches(transaction):
1466 """update whatever cache are relevant for the used storage."""
1474 """update whatever cache are relevant for the used storage."""
1467
1475
1468
1476
1469 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1477 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1470 """Local repository sub-interface providing access to tracked file storage.
1478 """Local repository sub-interface providing access to tracked file storage.
1471
1479
1472 This interface defines how a repository accesses storage for a single
1480 This interface defines how a repository accesses storage for a single
1473 tracked file path.
1481 tracked file path.
1474 """
1482 """
1475
1483
1476 def file(f):
1484 def file(f):
1477 """Obtain a filelog for a tracked path.
1485 """Obtain a filelog for a tracked path.
1478
1486
1479 The returned type conforms to the ``ifilestorage`` interface.
1487 The returned type conforms to the ``ifilestorage`` interface.
1480 """
1488 """
1481
1489
1482
1490
1483 class ilocalrepositorymain(interfaceutil.Interface):
1491 class ilocalrepositorymain(interfaceutil.Interface):
1484 """Main interface for local repositories.
1492 """Main interface for local repositories.
1485
1493
1486 This currently captures the reality of things - not how things should be.
1494 This currently captures the reality of things - not how things should be.
1487 """
1495 """
1488
1496
1489 nodeconstants = interfaceutil.Attribute(
1497 nodeconstants = interfaceutil.Attribute(
1490 """Constant nodes matching the hash function used by the repository."""
1498 """Constant nodes matching the hash function used by the repository."""
1491 )
1499 )
1492 nullid = interfaceutil.Attribute(
1500 nullid = interfaceutil.Attribute(
1493 """null revision for the hash function used by the repository."""
1501 """null revision for the hash function used by the repository."""
1494 )
1502 )
1495
1503
1496 supported = interfaceutil.Attribute(
1504 supported = interfaceutil.Attribute(
1497 """Set of requirements that this repo is capable of opening."""
1505 """Set of requirements that this repo is capable of opening."""
1498 )
1506 )
1499
1507
1500 requirements = interfaceutil.Attribute(
1508 requirements = interfaceutil.Attribute(
1501 """Set of requirements this repo uses."""
1509 """Set of requirements this repo uses."""
1502 )
1510 )
1503
1511
1504 features = interfaceutil.Attribute(
1512 features = interfaceutil.Attribute(
1505 """Set of "features" this repository supports.
1513 """Set of "features" this repository supports.
1506
1514
1507 A "feature" is a loosely-defined term. It can refer to a feature
1515 A "feature" is a loosely-defined term. It can refer to a feature
1508 in the classical sense or can describe an implementation detail
1516 in the classical sense or can describe an implementation detail
1509 of the repository. For example, a ``readonly`` feature may denote
1517 of the repository. For example, a ``readonly`` feature may denote
1510 the repository as read-only. Or a ``revlogfilestore`` feature may
1518 the repository as read-only. Or a ``revlogfilestore`` feature may
1511 denote that the repository is using revlogs for file storage.
1519 denote that the repository is using revlogs for file storage.
1512
1520
1513 The intent of features is to provide a machine-queryable mechanism
1521 The intent of features is to provide a machine-queryable mechanism
1514 for repo consumers to test for various repository characteristics.
1522 for repo consumers to test for various repository characteristics.
1515
1523
1516 Features are similar to ``requirements``. The main difference is that
1524 Features are similar to ``requirements``. The main difference is that
1517 requirements are stored on-disk and represent requirements to open the
1525 requirements are stored on-disk and represent requirements to open the
1518 repository. Features are more run-time capabilities of the repository
1526 repository. Features are more run-time capabilities of the repository
1519 and more granular capabilities (which may be derived from requirements).
1527 and more granular capabilities (which may be derived from requirements).
1520 """
1528 """
1521 )
1529 )
1522
1530
1523 filtername = interfaceutil.Attribute(
1531 filtername = interfaceutil.Attribute(
1524 """Name of the repoview that is active on this repo."""
1532 """Name of the repoview that is active on this repo."""
1525 )
1533 )
1526
1534
1527 vfs_map = interfaceutil.Attribute(
1535 vfs_map = interfaceutil.Attribute(
1528 """a bytes-key β†’ vfs mapping used by transaction and others"""
1536 """a bytes-key β†’ vfs mapping used by transaction and others"""
1529 )
1537 )
1530
1538
1531 wvfs = interfaceutil.Attribute(
1539 wvfs = interfaceutil.Attribute(
1532 """VFS used to access the working directory."""
1540 """VFS used to access the working directory."""
1533 )
1541 )
1534
1542
1535 vfs = interfaceutil.Attribute(
1543 vfs = interfaceutil.Attribute(
1536 """VFS rooted at the .hg directory.
1544 """VFS rooted at the .hg directory.
1537
1545
1538 Used to access repository data not in the store.
1546 Used to access repository data not in the store.
1539 """
1547 """
1540 )
1548 )
1541
1549
1542 svfs = interfaceutil.Attribute(
1550 svfs = interfaceutil.Attribute(
1543 """VFS rooted at the store.
1551 """VFS rooted at the store.
1544
1552
1545 Used to access repository data in the store. Typically .hg/store.
1553 Used to access repository data in the store. Typically .hg/store.
1546 But can point elsewhere if the store is shared.
1554 But can point elsewhere if the store is shared.
1547 """
1555 """
1548 )
1556 )
1549
1557
1550 root = interfaceutil.Attribute(
1558 root = interfaceutil.Attribute(
1551 """Path to the root of the working directory."""
1559 """Path to the root of the working directory."""
1552 )
1560 )
1553
1561
1554 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1562 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1555
1563
1556 origroot = interfaceutil.Attribute(
1564 origroot = interfaceutil.Attribute(
1557 """The filesystem path that was used to construct the repo."""
1565 """The filesystem path that was used to construct the repo."""
1558 )
1566 )
1559
1567
1560 auditor = interfaceutil.Attribute(
1568 auditor = interfaceutil.Attribute(
1561 """A pathauditor for the working directory.
1569 """A pathauditor for the working directory.
1562
1570
1563 This checks if a path refers to a nested repository.
1571 This checks if a path refers to a nested repository.
1564
1572
1565 Operates on the filesystem.
1573 Operates on the filesystem.
1566 """
1574 """
1567 )
1575 )
1568
1576
1569 nofsauditor = interfaceutil.Attribute(
1577 nofsauditor = interfaceutil.Attribute(
1570 """A pathauditor for the working directory.
1578 """A pathauditor for the working directory.
1571
1579
1572 This is like ``auditor`` except it doesn't do filesystem checks.
1580 This is like ``auditor`` except it doesn't do filesystem checks.
1573 """
1581 """
1574 )
1582 )
1575
1583
1576 baseui = interfaceutil.Attribute(
1584 baseui = interfaceutil.Attribute(
1577 """Original ui instance passed into constructor."""
1585 """Original ui instance passed into constructor."""
1578 )
1586 )
1579
1587
1580 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1588 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1581
1589
1582 sharedpath = interfaceutil.Attribute(
1590 sharedpath = interfaceutil.Attribute(
1583 """Path to the .hg directory of the repo this repo was shared from."""
1591 """Path to the .hg directory of the repo this repo was shared from."""
1584 )
1592 )
1585
1593
1586 store = interfaceutil.Attribute("""A store instance.""")
1594 store = interfaceutil.Attribute("""A store instance.""")
1587
1595
1588 spath = interfaceutil.Attribute("""Path to the store.""")
1596 spath = interfaceutil.Attribute("""Path to the store.""")
1589
1597
1590 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1598 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1591
1599
1592 cachevfs = interfaceutil.Attribute(
1600 cachevfs = interfaceutil.Attribute(
1593 """A VFS used to access the cache directory.
1601 """A VFS used to access the cache directory.
1594
1602
1595 Typically .hg/cache.
1603 Typically .hg/cache.
1596 """
1604 """
1597 )
1605 )
1598
1606
1599 wcachevfs = interfaceutil.Attribute(
1607 wcachevfs = interfaceutil.Attribute(
1600 """A VFS used to access the cache directory dedicated to working copy
1608 """A VFS used to access the cache directory dedicated to working copy
1601
1609
1602 Typically .hg/wcache.
1610 Typically .hg/wcache.
1603 """
1611 """
1604 )
1612 )
1605
1613
1606 filteredrevcache = interfaceutil.Attribute(
1614 filteredrevcache = interfaceutil.Attribute(
1607 """Holds sets of revisions to be filtered."""
1615 """Holds sets of revisions to be filtered."""
1608 )
1616 )
1609
1617
1610 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1618 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1611
1619
1612 filecopiesmode = interfaceutil.Attribute(
1620 filecopiesmode = interfaceutil.Attribute(
1613 """The way files copies should be dealt with in this repo."""
1621 """The way files copies should be dealt with in this repo."""
1614 )
1622 )
1615
1623
1616 def close():
1624 def close():
1617 """Close the handle on this repository."""
1625 """Close the handle on this repository."""
1618
1626
1619 def peer(path=None):
1627 def peer(path=None):
1620 """Obtain an object conforming to the ``peer`` interface."""
1628 """Obtain an object conforming to the ``peer`` interface."""
1621
1629
1622 def unfiltered():
1630 def unfiltered():
1623 """Obtain an unfiltered/raw view of this repo."""
1631 """Obtain an unfiltered/raw view of this repo."""
1624
1632
1625 def filtered(name, visibilityexceptions=None):
1633 def filtered(name, visibilityexceptions=None):
1626 """Obtain a named view of this repository."""
1634 """Obtain a named view of this repository."""
1627
1635
1628 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1636 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1629
1637
1630 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1638 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1631
1639
1632 manifestlog = interfaceutil.Attribute(
1640 manifestlog = interfaceutil.Attribute(
1633 """An instance conforming to the ``imanifestlog`` interface.
1641 """An instance conforming to the ``imanifestlog`` interface.
1634
1642
1635 Provides access to manifests for the repository.
1643 Provides access to manifests for the repository.
1636 """
1644 """
1637 )
1645 )
1638
1646
1639 dirstate = interfaceutil.Attribute("""Working directory state.""")
1647 dirstate = interfaceutil.Attribute("""Working directory state.""")
1640
1648
1641 narrowpats = interfaceutil.Attribute(
1649 narrowpats = interfaceutil.Attribute(
1642 """Matcher patterns for this repository's narrowspec."""
1650 """Matcher patterns for this repository's narrowspec."""
1643 )
1651 )
1644
1652
1645 def narrowmatch(match=None, includeexact=False):
1653 def narrowmatch(match=None, includeexact=False):
1646 """Obtain a matcher for the narrowspec."""
1654 """Obtain a matcher for the narrowspec."""
1647
1655
1648 def setnarrowpats(newincludes, newexcludes):
1656 def setnarrowpats(newincludes, newexcludes):
1649 """Define the narrowspec for this repository."""
1657 """Define the narrowspec for this repository."""
1650
1658
1651 def __getitem__(changeid):
1659 def __getitem__(changeid):
1652 """Try to resolve a changectx."""
1660 """Try to resolve a changectx."""
1653
1661
1654 def __contains__(changeid):
1662 def __contains__(changeid):
1655 """Whether a changeset exists."""
1663 """Whether a changeset exists."""
1656
1664
1657 def __nonzero__():
1665 def __nonzero__():
1658 """Always returns True."""
1666 """Always returns True."""
1659 return True
1667 return True
1660
1668
1661 __bool__ = __nonzero__
1669 __bool__ = __nonzero__
1662
1670
1663 def __len__():
1671 def __len__():
1664 """Returns the number of changesets in the repo."""
1672 """Returns the number of changesets in the repo."""
1665
1673
1666 def __iter__():
1674 def __iter__():
1667 """Iterate over revisions in the changelog."""
1675 """Iterate over revisions in the changelog."""
1668
1676
1669 def revs(expr, *args):
1677 def revs(expr, *args):
1670 """Evaluate a revset.
1678 """Evaluate a revset.
1671
1679
1672 Emits revisions.
1680 Emits revisions.
1673 """
1681 """
1674
1682
1675 def set(expr, *args):
1683 def set(expr, *args):
1676 """Evaluate a revset.
1684 """Evaluate a revset.
1677
1685
1678 Emits changectx instances.
1686 Emits changectx instances.
1679 """
1687 """
1680
1688
1681 def anyrevs(specs, user=False, localalias=None):
1689 def anyrevs(specs, user=False, localalias=None):
1682 """Find revisions matching one of the given revsets."""
1690 """Find revisions matching one of the given revsets."""
1683
1691
1684 def url():
1692 def url():
1685 """Returns a string representing the location of this repo."""
1693 """Returns a string representing the location of this repo."""
1686
1694
1687 def hook(name, throw=False, **args):
1695 def hook(name, throw=False, **args):
1688 """Call a hook."""
1696 """Call a hook."""
1689
1697
1690 def tags():
1698 def tags():
1691 """Return a mapping of tag to node."""
1699 """Return a mapping of tag to node."""
1692
1700
1693 def tagtype(tagname):
1701 def tagtype(tagname):
1694 """Return the type of a given tag."""
1702 """Return the type of a given tag."""
1695
1703
1696 def tagslist():
1704 def tagslist():
1697 """Return a list of tags ordered by revision."""
1705 """Return a list of tags ordered by revision."""
1698
1706
1699 def nodetags(node):
1707 def nodetags(node):
1700 """Return the tags associated with a node."""
1708 """Return the tags associated with a node."""
1701
1709
1702 def nodebookmarks(node):
1710 def nodebookmarks(node):
1703 """Return the list of bookmarks pointing to the specified node."""
1711 """Return the list of bookmarks pointing to the specified node."""
1704
1712
1705 def branchmap():
1713 def branchmap():
1706 """Return a mapping of branch to heads in that branch."""
1714 """Return a mapping of branch to heads in that branch."""
1707
1715
1708 def revbranchcache():
1716 def revbranchcache():
1709 pass
1717 pass
1710
1718
1711 def register_changeset(rev, changelogrevision):
1719 def register_changeset(rev, changelogrevision):
1712 """Extension point for caches for new nodes.
1720 """Extension point for caches for new nodes.
1713
1721
1714 Multiple consumers are expected to need parts of the changelogrevision,
1722 Multiple consumers are expected to need parts of the changelogrevision,
1715 so it is provided as optimization to avoid duplicate lookups. A simple
1723 so it is provided as optimization to avoid duplicate lookups. A simple
1716 cache would be fragile when other revisions are accessed, too."""
1724 cache would be fragile when other revisions are accessed, too."""
1717 pass
1725 pass
1718
1726
1719 def branchtip(branchtip, ignoremissing=False):
1727 def branchtip(branchtip, ignoremissing=False):
1720 """Return the tip node for a given branch."""
1728 """Return the tip node for a given branch."""
1721
1729
1722 def lookup(key):
1730 def lookup(key):
1723 """Resolve the node for a revision."""
1731 """Resolve the node for a revision."""
1724
1732
1725 def lookupbranch(key):
1733 def lookupbranch(key):
1726 """Look up the branch name of the given revision or branch name."""
1734 """Look up the branch name of the given revision or branch name."""
1727
1735
1728 def known(nodes):
1736 def known(nodes):
1729 """Determine whether a series of nodes is known.
1737 """Determine whether a series of nodes is known.
1730
1738
1731 Returns a list of bools.
1739 Returns a list of bools.
1732 """
1740 """
1733
1741
1734 def local():
1742 def local():
1735 """Whether the repository is local."""
1743 """Whether the repository is local."""
1736 return True
1744 return True
1737
1745
1738 def publishing():
1746 def publishing():
1739 """Whether the repository is a publishing repository."""
1747 """Whether the repository is a publishing repository."""
1740
1748
1741 def cancopy():
1749 def cancopy():
1742 pass
1750 pass
1743
1751
1744 def shared():
1752 def shared():
1745 """The type of shared repository or None."""
1753 """The type of shared repository or None."""
1746
1754
1747 def wjoin(f, *insidef):
1755 def wjoin(f, *insidef):
1748 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1756 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1749
1757
1750 def setparents(p1, p2):
1758 def setparents(p1, p2):
1751 """Set the parent nodes of the working directory."""
1759 """Set the parent nodes of the working directory."""
1752
1760
1753 def filectx(path, changeid=None, fileid=None):
1761 def filectx(path, changeid=None, fileid=None):
1754 """Obtain a filectx for the given file revision."""
1762 """Obtain a filectx for the given file revision."""
1755
1763
1756 def getcwd():
1764 def getcwd():
1757 """Obtain the current working directory from the dirstate."""
1765 """Obtain the current working directory from the dirstate."""
1758
1766
1759 def pathto(f, cwd=None):
1767 def pathto(f, cwd=None):
1760 """Obtain the relative path to a file."""
1768 """Obtain the relative path to a file."""
1761
1769
1762 def adddatafilter(name, fltr):
1770 def adddatafilter(name, fltr):
1763 pass
1771 pass
1764
1772
1765 def wread(filename):
1773 def wread(filename):
1766 """Read a file from wvfs, using data filters."""
1774 """Read a file from wvfs, using data filters."""
1767
1775
1768 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1776 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1769 """Write data to a file in the wvfs, using data filters."""
1777 """Write data to a file in the wvfs, using data filters."""
1770
1778
1771 def wwritedata(filename, data):
1779 def wwritedata(filename, data):
1772 """Resolve data for writing to the wvfs, using data filters."""
1780 """Resolve data for writing to the wvfs, using data filters."""
1773
1781
1774 def currenttransaction():
1782 def currenttransaction():
1775 """Obtain the current transaction instance or None."""
1783 """Obtain the current transaction instance or None."""
1776
1784
1777 def transaction(desc, report=None):
1785 def transaction(desc, report=None):
1778 """Open a new transaction to write to the repository."""
1786 """Open a new transaction to write to the repository."""
1779
1787
1780 def undofiles():
1788 def undofiles():
1781 """Returns a list of (vfs, path) for files to undo transactions."""
1789 """Returns a list of (vfs, path) for files to undo transactions."""
1782
1790
1783 def recover():
1791 def recover():
1784 """Roll back an interrupted transaction."""
1792 """Roll back an interrupted transaction."""
1785
1793
1786 def rollback(dryrun=False, force=False):
1794 def rollback(dryrun=False, force=False):
1787 """Undo the last transaction.
1795 """Undo the last transaction.
1788
1796
1789 DANGEROUS.
1797 DANGEROUS.
1790 """
1798 """
1791
1799
1792 def updatecaches(tr=None, full=False, caches=None):
1800 def updatecaches(tr=None, full=False, caches=None):
1793 """Warm repo caches."""
1801 """Warm repo caches."""
1794
1802
1795 def invalidatecaches():
1803 def invalidatecaches():
1796 """Invalidate cached data due to the repository mutating."""
1804 """Invalidate cached data due to the repository mutating."""
1797
1805
1798 def invalidatevolatilesets():
1806 def invalidatevolatilesets():
1799 pass
1807 pass
1800
1808
1801 def invalidatedirstate():
1809 def invalidatedirstate():
1802 """Invalidate the dirstate."""
1810 """Invalidate the dirstate."""
1803
1811
1804 def invalidate(clearfilecache=False):
1812 def invalidate(clearfilecache=False):
1805 pass
1813 pass
1806
1814
1807 def invalidateall():
1815 def invalidateall():
1808 pass
1816 pass
1809
1817
1810 def lock(wait=True):
1818 def lock(wait=True):
1811 """Lock the repository store and return a lock instance."""
1819 """Lock the repository store and return a lock instance."""
1812
1820
1813 def currentlock():
1821 def currentlock():
1814 """Return the lock if it's held or None."""
1822 """Return the lock if it's held or None."""
1815
1823
1816 def wlock(wait=True):
1824 def wlock(wait=True):
1817 """Lock the non-store parts of the repository."""
1825 """Lock the non-store parts of the repository."""
1818
1826
1819 def currentwlock():
1827 def currentwlock():
1820 """Return the wlock if it's held or None."""
1828 """Return the wlock if it's held or None."""
1821
1829
1822 def checkcommitpatterns(wctx, match, status, fail):
1830 def checkcommitpatterns(wctx, match, status, fail):
1823 pass
1831 pass
1824
1832
1825 def commit(
1833 def commit(
1826 text=b'',
1834 text=b'',
1827 user=None,
1835 user=None,
1828 date=None,
1836 date=None,
1829 match=None,
1837 match=None,
1830 force=False,
1838 force=False,
1831 editor=False,
1839 editor=False,
1832 extra=None,
1840 extra=None,
1833 ):
1841 ):
1834 """Add a new revision to the repository."""
1842 """Add a new revision to the repository."""
1835
1843
1836 def commitctx(ctx, error=False, origctx=None):
1844 def commitctx(ctx, error=False, origctx=None):
1837 """Commit a commitctx instance to the repository."""
1845 """Commit a commitctx instance to the repository."""
1838
1846
1839 def destroying():
1847 def destroying():
1840 """Inform the repository that nodes are about to be destroyed."""
1848 """Inform the repository that nodes are about to be destroyed."""
1841
1849
1842 def destroyed():
1850 def destroyed():
1843 """Inform the repository that nodes have been destroyed."""
1851 """Inform the repository that nodes have been destroyed."""
1844
1852
1845 def status(
1853 def status(
1846 node1=b'.',
1854 node1=b'.',
1847 node2=None,
1855 node2=None,
1848 match=None,
1856 match=None,
1849 ignored=False,
1857 ignored=False,
1850 clean=False,
1858 clean=False,
1851 unknown=False,
1859 unknown=False,
1852 listsubrepos=False,
1860 listsubrepos=False,
1853 ):
1861 ):
1854 """Convenience method to call repo[x].status()."""
1862 """Convenience method to call repo[x].status()."""
1855
1863
1856 def addpostdsstatus(ps):
1864 def addpostdsstatus(ps):
1857 pass
1865 pass
1858
1866
1859 def postdsstatus():
1867 def postdsstatus():
1860 pass
1868 pass
1861
1869
1862 def clearpostdsstatus():
1870 def clearpostdsstatus():
1863 pass
1871 pass
1864
1872
1865 def heads(start=None):
1873 def heads(start=None):
1866 """Obtain list of nodes that are DAG heads."""
1874 """Obtain list of nodes that are DAG heads."""
1867
1875
1868 def branchheads(branch=None, start=None, closed=False):
1876 def branchheads(branch=None, start=None, closed=False):
1869 pass
1877 pass
1870
1878
1871 def branches(nodes):
1879 def branches(nodes):
1872 pass
1880 pass
1873
1881
1874 def between(pairs):
1882 def between(pairs):
1875 pass
1883 pass
1876
1884
1877 def checkpush(pushop):
1885 def checkpush(pushop):
1878 pass
1886 pass
1879
1887
1880 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1888 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1881
1889
1882 def pushkey(namespace, key, old, new):
1890 def pushkey(namespace, key, old, new):
1883 pass
1891 pass
1884
1892
1885 def listkeys(namespace):
1893 def listkeys(namespace):
1886 pass
1894 pass
1887
1895
1888 def debugwireargs(one, two, three=None, four=None, five=None):
1896 def debugwireargs(one, two, three=None, four=None, five=None):
1889 pass
1897 pass
1890
1898
1891 def savecommitmessage(text):
1899 def savecommitmessage(text):
1892 pass
1900 pass
1893
1901
1894 def register_sidedata_computer(
1902 def register_sidedata_computer(
1895 kind, category, keys, computer, flags, replace=False
1903 kind, category, keys, computer, flags, replace=False
1896 ):
1904 ):
1897 pass
1905 pass
1898
1906
1899 def register_wanted_sidedata(category):
1907 def register_wanted_sidedata(category):
1900 pass
1908 pass
1901
1909
1902
1910
1903 class completelocalrepository(
1911 class completelocalrepository(
1904 ilocalrepositorymain, ilocalrepositoryfilestorage
1912 ilocalrepositorymain, ilocalrepositoryfilestorage
1905 ):
1913 ):
1906 """Complete interface for a local repository."""
1914 """Complete interface for a local repository."""
1907
1915
1908
1916
1909 class iwireprotocolcommandcacher(interfaceutil.Interface):
1917 class iwireprotocolcommandcacher(interfaceutil.Interface):
1910 """Represents a caching backend for wire protocol commands.
1918 """Represents a caching backend for wire protocol commands.
1911
1919
1912 Wire protocol version 2 supports transparent caching of many commands.
1920 Wire protocol version 2 supports transparent caching of many commands.
1913 To leverage this caching, servers can activate objects that cache
1921 To leverage this caching, servers can activate objects that cache
1914 command responses. Objects handle both cache writing and reading.
1922 command responses. Objects handle both cache writing and reading.
1915 This interface defines how that response caching mechanism works.
1923 This interface defines how that response caching mechanism works.
1916
1924
1917 Wire protocol version 2 commands emit a series of objects that are
1925 Wire protocol version 2 commands emit a series of objects that are
1918 serialized and sent to the client. The caching layer exists between
1926 serialized and sent to the client. The caching layer exists between
1919 the invocation of the command function and the sending of its output
1927 the invocation of the command function and the sending of its output
1920 objects to an output layer.
1928 objects to an output layer.
1921
1929
1922 Instances of this interface represent a binding to a cache that
1930 Instances of this interface represent a binding to a cache that
1923 can serve a response (in place of calling a command function) and/or
1931 can serve a response (in place of calling a command function) and/or
1924 write responses to a cache for subsequent use.
1932 write responses to a cache for subsequent use.
1925
1933
1926 When a command request arrives, the following happens with regards
1934 When a command request arrives, the following happens with regards
1927 to this interface:
1935 to this interface:
1928
1936
1929 1. The server determines whether the command request is cacheable.
1937 1. The server determines whether the command request is cacheable.
1930 2. If it is, an instance of this interface is spawned.
1938 2. If it is, an instance of this interface is spawned.
1931 3. The cacher is activated in a context manager (``__enter__`` is called).
1939 3. The cacher is activated in a context manager (``__enter__`` is called).
1932 4. A cache *key* for that request is derived. This will call the
1940 4. A cache *key* for that request is derived. This will call the
1933 instance's ``adjustcachekeystate()`` method so the derivation
1941 instance's ``adjustcachekeystate()`` method so the derivation
1934 can be influenced.
1942 can be influenced.
1935 5. The cacher is informed of the derived cache key via a call to
1943 5. The cacher is informed of the derived cache key via a call to
1936 ``setcachekey()``.
1944 ``setcachekey()``.
1937 6. The cacher's ``lookup()`` method is called to test for presence of
1945 6. The cacher's ``lookup()`` method is called to test for presence of
1938 the derived key in the cache.
1946 the derived key in the cache.
1939 7. If ``lookup()`` returns a hit, that cached result is used in place
1947 7. If ``lookup()`` returns a hit, that cached result is used in place
1940 of invoking the command function. ``__exit__`` is called and the instance
1948 of invoking the command function. ``__exit__`` is called and the instance
1941 is discarded.
1949 is discarded.
1942 8. The command function is invoked.
1950 8. The command function is invoked.
1943 9. ``onobject()`` is called for each object emitted by the command
1951 9. ``onobject()`` is called for each object emitted by the command
1944 function.
1952 function.
1945 10. After the final object is seen, ``onfinished()`` is called.
1953 10. After the final object is seen, ``onfinished()`` is called.
1946 11. ``__exit__`` is called to signal the end of use of the instance.
1954 11. ``__exit__`` is called to signal the end of use of the instance.
1947
1955
1948 Cache *key* derivation can be influenced by the instance.
1956 Cache *key* derivation can be influenced by the instance.
1949
1957
1950 Cache keys are initially derived by a deterministic representation of
1958 Cache keys are initially derived by a deterministic representation of
1951 the command request. This includes the command name, arguments, protocol
1959 the command request. This includes the command name, arguments, protocol
1952 version, etc. This initial key derivation is performed by CBOR-encoding a
1960 version, etc. This initial key derivation is performed by CBOR-encoding a
1953 data structure and feeding that output into a hasher.
1961 data structure and feeding that output into a hasher.
1954
1962
1955 Instances of this interface can influence this initial key derivation
1963 Instances of this interface can influence this initial key derivation
1956 via ``adjustcachekeystate()``.
1964 via ``adjustcachekeystate()``.
1957
1965
1958 The instance is informed of the derived cache key via a call to
1966 The instance is informed of the derived cache key via a call to
1959 ``setcachekey()``. The instance must store the key locally so it can
1967 ``setcachekey()``. The instance must store the key locally so it can
1960 be consulted on subsequent operations that may require it.
1968 be consulted on subsequent operations that may require it.
1961
1969
1962 When constructed, the instance has access to a callable that can be used
1970 When constructed, the instance has access to a callable that can be used
1963 for encoding response objects. This callable receives as its single
1971 for encoding response objects. This callable receives as its single
1964 argument an object emitted by a command function. It returns an iterable
1972 argument an object emitted by a command function. It returns an iterable
1965 of bytes chunks representing the encoded object. Unless the cacher is
1973 of bytes chunks representing the encoded object. Unless the cacher is
1966 caching native Python objects in memory or has a way of reconstructing
1974 caching native Python objects in memory or has a way of reconstructing
1967 the original Python objects, implementations typically call this function
1975 the original Python objects, implementations typically call this function
1968 to produce bytes from the output objects and then store those bytes in
1976 to produce bytes from the output objects and then store those bytes in
1969 the cache. When it comes time to re-emit those bytes, they are wrapped
1977 the cache. When it comes time to re-emit those bytes, they are wrapped
1970 in a ``wireprototypes.encodedresponse`` instance to tell the output
1978 in a ``wireprototypes.encodedresponse`` instance to tell the output
1971 layer that they are pre-encoded.
1979 layer that they are pre-encoded.
1972
1980
1973 When receiving the objects emitted by the command function, instances
1981 When receiving the objects emitted by the command function, instances
1974 can choose what to do with those objects. The simplest thing to do is
1982 can choose what to do with those objects. The simplest thing to do is
1975 re-emit the original objects. They will be forwarded to the output
1983 re-emit the original objects. They will be forwarded to the output
1976 layer and will be processed as if the cacher did not exist.
1984 layer and will be processed as if the cacher did not exist.
1977
1985
1978 Implementations could also choose to not emit objects - instead locally
1986 Implementations could also choose to not emit objects - instead locally
1979 buffering objects or their encoded representation. They could then emit
1987 buffering objects or their encoded representation. They could then emit
1980 a single "coalesced" object when ``onfinished()`` is called. In
1988 a single "coalesced" object when ``onfinished()`` is called. In
1981 this way, the implementation would function as a filtering layer of
1989 this way, the implementation would function as a filtering layer of
1982 sorts.
1990 sorts.
1983
1991
1984 When caching objects, typically the encoded form of the object will
1992 When caching objects, typically the encoded form of the object will
1985 be stored. Keep in mind that if the original object is forwarded to
1993 be stored. Keep in mind that if the original object is forwarded to
1986 the output layer, it will need to be encoded there as well. For large
1994 the output layer, it will need to be encoded there as well. For large
1987 output, this redundant encoding could add overhead. Implementations
1995 output, this redundant encoding could add overhead. Implementations
1988 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1996 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1989 instances to avoid this overhead.
1997 instances to avoid this overhead.
1990 """
1998 """
1991
1999
1992 def __enter__():
2000 def __enter__():
1993 """Marks the instance as active.
2001 """Marks the instance as active.
1994
2002
1995 Should return self.
2003 Should return self.
1996 """
2004 """
1997
2005
1998 def __exit__(exctype, excvalue, exctb):
2006 def __exit__(exctype, excvalue, exctb):
1999 """Called when cacher is no longer used.
2007 """Called when cacher is no longer used.
2000
2008
2001 This can be used by implementations to perform cleanup actions (e.g.
2009 This can be used by implementations to perform cleanup actions (e.g.
2002 disconnecting network sockets, aborting a partially cached response.
2010 disconnecting network sockets, aborting a partially cached response.
2003 """
2011 """
2004
2012
2005 def adjustcachekeystate(state):
2013 def adjustcachekeystate(state):
2006 """Influences cache key derivation by adjusting state to derive key.
2014 """Influences cache key derivation by adjusting state to derive key.
2007
2015
2008 A dict defining the state used to derive the cache key is passed.
2016 A dict defining the state used to derive the cache key is passed.
2009
2017
2010 Implementations can modify this dict to record additional state that
2018 Implementations can modify this dict to record additional state that
2011 is wanted to influence key derivation.
2019 is wanted to influence key derivation.
2012
2020
2013 Implementations are *highly* encouraged to not modify or delete
2021 Implementations are *highly* encouraged to not modify or delete
2014 existing keys.
2022 existing keys.
2015 """
2023 """
2016
2024
2017 def setcachekey(key):
2025 def setcachekey(key):
2018 """Record the derived cache key for this request.
2026 """Record the derived cache key for this request.
2019
2027
2020 Instances may mutate the key for internal usage, as desired. e.g.
2028 Instances may mutate the key for internal usage, as desired. e.g.
2021 instances may wish to prepend the repo name, introduce path
2029 instances may wish to prepend the repo name, introduce path
2022 components for filesystem or URL addressing, etc. Behavior is up to
2030 components for filesystem or URL addressing, etc. Behavior is up to
2023 the cache.
2031 the cache.
2024
2032
2025 Returns a bool indicating if the request is cacheable by this
2033 Returns a bool indicating if the request is cacheable by this
2026 instance.
2034 instance.
2027 """
2035 """
2028
2036
2029 def lookup():
2037 def lookup():
2030 """Attempt to resolve an entry in the cache.
2038 """Attempt to resolve an entry in the cache.
2031
2039
2032 The instance is instructed to look for the cache key that it was
2040 The instance is instructed to look for the cache key that it was
2033 informed about via the call to ``setcachekey()``.
2041 informed about via the call to ``setcachekey()``.
2034
2042
2035 If there's no cache hit or the cacher doesn't wish to use the cached
2043 If there's no cache hit or the cacher doesn't wish to use the cached
2036 entry, ``None`` should be returned.
2044 entry, ``None`` should be returned.
2037
2045
2038 Else, a dict defining the cached result should be returned. The
2046 Else, a dict defining the cached result should be returned. The
2039 dict may have the following keys:
2047 dict may have the following keys:
2040
2048
2041 objs
2049 objs
2042 An iterable of objects that should be sent to the client. That
2050 An iterable of objects that should be sent to the client. That
2043 iterable of objects is expected to be what the command function
2051 iterable of objects is expected to be what the command function
2044 would return if invoked or an equivalent representation thereof.
2052 would return if invoked or an equivalent representation thereof.
2045 """
2053 """
2046
2054
2047 def onobject(obj):
2055 def onobject(obj):
2048 """Called when a new object is emitted from the command function.
2056 """Called when a new object is emitted from the command function.
2049
2057
2050 Receives as its argument the object that was emitted from the
2058 Receives as its argument the object that was emitted from the
2051 command function.
2059 command function.
2052
2060
2053 This method returns an iterator of objects to forward to the output
2061 This method returns an iterator of objects to forward to the output
2054 layer. The easiest implementation is a generator that just
2062 layer. The easiest implementation is a generator that just
2055 ``yield obj``.
2063 ``yield obj``.
2056 """
2064 """
2057
2065
2058 def onfinished():
2066 def onfinished():
2059 """Called after all objects have been emitted from the command function.
2067 """Called after all objects have been emitted from the command function.
2060
2068
2061 Implementations should return an iterator of objects to forward to
2069 Implementations should return an iterator of objects to forward to
2062 the output layer.
2070 the output layer.
2063
2071
2064 This method can be a generator.
2072 This method can be a generator.
2065 """
2073 """
@@ -1,2374 +1,2383 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import heapq
9 import heapq
10 import itertools
10 import itertools
11 import struct
11 import struct
12 import weakref
12 import weakref
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 bin,
16 bin,
17 hex,
17 hex,
18 nullrev,
18 nullrev,
19 )
19 )
20 from .pycompat import getattr
20 from .pycompat import getattr
21 from . import (
21 from . import (
22 encoding,
22 encoding,
23 error,
23 error,
24 match as matchmod,
24 match as matchmod,
25 mdiff,
25 mdiff,
26 pathutil,
26 pathutil,
27 policy,
27 policy,
28 pycompat,
28 pycompat,
29 revlog,
29 revlog,
30 util,
30 util,
31 )
31 )
32 from .interfaces import (
32 from .interfaces import (
33 repository,
33 repository,
34 util as interfaceutil,
34 util as interfaceutil,
35 )
35 )
36 from .revlogutils import (
36 from .revlogutils import (
37 constants as revlog_constants,
37 constants as revlog_constants,
38 )
38 )
39
39
40 parsers = policy.importmod('parsers')
40 parsers = policy.importmod('parsers')
41 propertycache = util.propertycache
41 propertycache = util.propertycache
42
42
43 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
44 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
45
45
46
46
47 def _parse(nodelen, data):
47 def _parse(nodelen, data):
48 # This method does a little bit of excessive-looking
48 # This method does a little bit of excessive-looking
49 # precondition checking. This is so that the behavior of this
49 # precondition checking. This is so that the behavior of this
50 # class exactly matches its C counterpart to try and help
50 # class exactly matches its C counterpart to try and help
51 # prevent surprise breakage for anyone that develops against
51 # prevent surprise breakage for anyone that develops against
52 # the pure version.
52 # the pure version.
53 if data and data[-1:] != b'\n':
53 if data and data[-1:] != b'\n':
54 raise ValueError(b'Manifest did not end in a newline.')
54 raise ValueError(b'Manifest did not end in a newline.')
55 prev = None
55 prev = None
56 for l in data.splitlines():
56 for l in data.splitlines():
57 if prev is not None and prev > l:
57 if prev is not None and prev > l:
58 raise ValueError(b'Manifest lines not in sorted order.')
58 raise ValueError(b'Manifest lines not in sorted order.')
59 prev = l
59 prev = l
60 f, n = l.split(b'\0')
60 f, n = l.split(b'\0')
61 nl = len(n)
61 nl = len(n)
62 flags = n[-1:]
62 flags = n[-1:]
63 if flags in _manifestflags:
63 if flags in _manifestflags:
64 n = n[:-1]
64 n = n[:-1]
65 nl -= 1
65 nl -= 1
66 else:
66 else:
67 flags = b''
67 flags = b''
68 if nl != 2 * nodelen:
68 if nl != 2 * nodelen:
69 raise ValueError(b'Invalid manifest line')
69 raise ValueError(b'Invalid manifest line')
70
70
71 yield f, bin(n), flags
71 yield f, bin(n), flags
72
72
73
73
74 def _text(it):
74 def _text(it):
75 files = []
75 files = []
76 lines = []
76 lines = []
77 for f, n, fl in it:
77 for f, n, fl in it:
78 files.append(f)
78 files.append(f)
79 # if this is changed to support newlines in filenames,
79 # if this is changed to support newlines in filenames,
80 # be sure to check the templates/ dir again (especially *-raw.tmpl)
80 # be sure to check the templates/ dir again (especially *-raw.tmpl)
81 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
81 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
82
82
83 _checkforbidden(files)
83 _checkforbidden(files)
84 return b''.join(lines)
84 return b''.join(lines)
85
85
86
86
87 class lazymanifestiter:
87 class lazymanifestiter:
88 def __init__(self, lm):
88 def __init__(self, lm):
89 self.pos = 0
89 self.pos = 0
90 self.lm = lm
90 self.lm = lm
91
91
92 def __iter__(self):
92 def __iter__(self):
93 return self
93 return self
94
94
95 def next(self):
95 def next(self):
96 try:
96 try:
97 data, pos = self.lm._get(self.pos)
97 data, pos = self.lm._get(self.pos)
98 except IndexError:
98 except IndexError:
99 raise StopIteration
99 raise StopIteration
100 if pos == -1:
100 if pos == -1:
101 self.pos += 1
101 self.pos += 1
102 return data[0]
102 return data[0]
103 self.pos += 1
103 self.pos += 1
104 zeropos = data.find(b'\x00', pos)
104 zeropos = data.find(b'\x00', pos)
105 return data[pos:zeropos]
105 return data[pos:zeropos]
106
106
107 __next__ = next
107 __next__ = next
108
108
109
109
110 class lazymanifestiterentries:
110 class lazymanifestiterentries:
111 def __init__(self, lm):
111 def __init__(self, lm):
112 self.lm = lm
112 self.lm = lm
113 self.pos = 0
113 self.pos = 0
114
114
115 def __iter__(self):
115 def __iter__(self):
116 return self
116 return self
117
117
118 def next(self):
118 def next(self):
119 try:
119 try:
120 data, pos = self.lm._get(self.pos)
120 data, pos = self.lm._get(self.pos)
121 except IndexError:
121 except IndexError:
122 raise StopIteration
122 raise StopIteration
123 if pos == -1:
123 if pos == -1:
124 self.pos += 1
124 self.pos += 1
125 return data
125 return data
126 zeropos = data.find(b'\x00', pos)
126 zeropos = data.find(b'\x00', pos)
127 nlpos = data.find(b'\n', pos)
127 nlpos = data.find(b'\n', pos)
128 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
128 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
129 raise error.StorageError(b'Invalid manifest line')
129 raise error.StorageError(b'Invalid manifest line')
130 flags = data[nlpos - 1 : nlpos]
130 flags = data[nlpos - 1 : nlpos]
131 if flags in _manifestflags:
131 if flags in _manifestflags:
132 hlen = nlpos - zeropos - 2
132 hlen = nlpos - zeropos - 2
133 else:
133 else:
134 hlen = nlpos - zeropos - 1
134 hlen = nlpos - zeropos - 1
135 flags = b''
135 flags = b''
136 if hlen != 2 * self.lm._nodelen:
136 if hlen != 2 * self.lm._nodelen:
137 raise error.StorageError(b'Invalid manifest line')
137 raise error.StorageError(b'Invalid manifest line')
138 hashval = unhexlify(
138 hashval = unhexlify(
139 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
139 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
140 )
140 )
141 self.pos += 1
141 self.pos += 1
142 return (data[pos:zeropos], hashval, flags)
142 return (data[pos:zeropos], hashval, flags)
143
143
144 __next__ = next
144 __next__ = next
145
145
146
146
147 def unhexlify(data, extra, pos, length):
147 def unhexlify(data, extra, pos, length):
148 s = bin(data[pos : pos + length])
148 s = bin(data[pos : pos + length])
149 if extra:
149 if extra:
150 s += chr(extra & 0xFF)
150 s += chr(extra & 0xFF)
151 return s
151 return s
152
152
153
153
154 def _cmp(a, b):
154 def _cmp(a, b):
155 return (a > b) - (a < b)
155 return (a > b) - (a < b)
156
156
157
157
158 _manifestflags = {b'', b'l', b't', b'x'}
158 _manifestflags = {b'', b'l', b't', b'x'}
159
159
160
160
161 class _lazymanifest:
161 class _lazymanifest:
162 """A pure python manifest backed by a byte string. It is supplimented with
162 """A pure python manifest backed by a byte string. It is supplimented with
163 internal lists as it is modified, until it is compacted back to a pure byte
163 internal lists as it is modified, until it is compacted back to a pure byte
164 string.
164 string.
165
165
166 ``data`` is the initial manifest data.
166 ``data`` is the initial manifest data.
167
167
168 ``positions`` is a list of offsets, one per manifest entry. Positive
168 ``positions`` is a list of offsets, one per manifest entry. Positive
169 values are offsets into ``data``, negative values are offsets into the
169 values are offsets into ``data``, negative values are offsets into the
170 ``extradata`` list. When an entry is removed, its entry is dropped from
170 ``extradata`` list. When an entry is removed, its entry is dropped from
171 ``positions``. The values are encoded such that when walking the list and
171 ``positions``. The values are encoded such that when walking the list and
172 indexing into ``data`` or ``extradata`` as appropriate, the entries are
172 indexing into ``data`` or ``extradata`` as appropriate, the entries are
173 sorted by filename.
173 sorted by filename.
174
174
175 ``extradata`` is a list of (key, hash, flags) for entries that were added or
175 ``extradata`` is a list of (key, hash, flags) for entries that were added or
176 modified since the manifest was created or compacted.
176 modified since the manifest was created or compacted.
177 """
177 """
178
178
179 def __init__(
179 def __init__(
180 self,
180 self,
181 nodelen,
181 nodelen,
182 data,
182 data,
183 positions=None,
183 positions=None,
184 extrainfo=None,
184 extrainfo=None,
185 extradata=None,
185 extradata=None,
186 hasremovals=False,
186 hasremovals=False,
187 ):
187 ):
188 self._nodelen = nodelen
188 self._nodelen = nodelen
189 if positions is None:
189 if positions is None:
190 self.positions = self.findlines(data)
190 self.positions = self.findlines(data)
191 self.extrainfo = [0] * len(self.positions)
191 self.extrainfo = [0] * len(self.positions)
192 self.data = data
192 self.data = data
193 self.extradata = []
193 self.extradata = []
194 self.hasremovals = False
194 self.hasremovals = False
195 else:
195 else:
196 self.positions = positions[:]
196 self.positions = positions[:]
197 self.extrainfo = extrainfo[:]
197 self.extrainfo = extrainfo[:]
198 self.extradata = extradata[:]
198 self.extradata = extradata[:]
199 self.data = data
199 self.data = data
200 self.hasremovals = hasremovals
200 self.hasremovals = hasremovals
201
201
202 def findlines(self, data):
202 def findlines(self, data):
203 if not data:
203 if not data:
204 return []
204 return []
205 pos = data.find(b"\n")
205 pos = data.find(b"\n")
206 if pos == -1 or data[-1:] != b'\n':
206 if pos == -1 or data[-1:] != b'\n':
207 raise ValueError(b"Manifest did not end in a newline.")
207 raise ValueError(b"Manifest did not end in a newline.")
208 positions = [0]
208 positions = [0]
209 prev = data[: data.find(b'\x00')]
209 prev = data[: data.find(b'\x00')]
210 while pos < len(data) - 1 and pos != -1:
210 while pos < len(data) - 1 and pos != -1:
211 positions.append(pos + 1)
211 positions.append(pos + 1)
212 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
212 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
213 if nexts < prev:
213 if nexts < prev:
214 raise ValueError(b"Manifest lines not in sorted order.")
214 raise ValueError(b"Manifest lines not in sorted order.")
215 prev = nexts
215 prev = nexts
216 pos = data.find(b"\n", pos + 1)
216 pos = data.find(b"\n", pos + 1)
217 return positions
217 return positions
218
218
219 def _get(self, index):
219 def _get(self, index):
220 # get the position encoded in pos:
220 # get the position encoded in pos:
221 # positive number is an index in 'data'
221 # positive number is an index in 'data'
222 # negative number is in extrapieces
222 # negative number is in extrapieces
223 pos = self.positions[index]
223 pos = self.positions[index]
224 if pos >= 0:
224 if pos >= 0:
225 return self.data, pos
225 return self.data, pos
226 return self.extradata[-pos - 1], -1
226 return self.extradata[-pos - 1], -1
227
227
228 def _getkey(self, pos):
228 def _getkey(self, pos):
229 if pos >= 0:
229 if pos >= 0:
230 return self.data[pos : self.data.find(b'\x00', pos + 1)]
230 return self.data[pos : self.data.find(b'\x00', pos + 1)]
231 return self.extradata[-pos - 1][0]
231 return self.extradata[-pos - 1][0]
232
232
233 def bsearch(self, key):
233 def bsearch(self, key):
234 first = 0
234 first = 0
235 last = len(self.positions) - 1
235 last = len(self.positions) - 1
236
236
237 while first <= last:
237 while first <= last:
238 midpoint = (first + last) // 2
238 midpoint = (first + last) // 2
239 nextpos = self.positions[midpoint]
239 nextpos = self.positions[midpoint]
240 candidate = self._getkey(nextpos)
240 candidate = self._getkey(nextpos)
241 r = _cmp(key, candidate)
241 r = _cmp(key, candidate)
242 if r == 0:
242 if r == 0:
243 return midpoint
243 return midpoint
244 else:
244 else:
245 if r < 0:
245 if r < 0:
246 last = midpoint - 1
246 last = midpoint - 1
247 else:
247 else:
248 first = midpoint + 1
248 first = midpoint + 1
249 return -1
249 return -1
250
250
251 def bsearch2(self, key):
251 def bsearch2(self, key):
252 # same as the above, but will always return the position
252 # same as the above, but will always return the position
253 # done for performance reasons
253 # done for performance reasons
254 first = 0
254 first = 0
255 last = len(self.positions) - 1
255 last = len(self.positions) - 1
256
256
257 while first <= last:
257 while first <= last:
258 midpoint = (first + last) // 2
258 midpoint = (first + last) // 2
259 nextpos = self.positions[midpoint]
259 nextpos = self.positions[midpoint]
260 candidate = self._getkey(nextpos)
260 candidate = self._getkey(nextpos)
261 r = _cmp(key, candidate)
261 r = _cmp(key, candidate)
262 if r == 0:
262 if r == 0:
263 return (midpoint, True)
263 return (midpoint, True)
264 else:
264 else:
265 if r < 0:
265 if r < 0:
266 last = midpoint - 1
266 last = midpoint - 1
267 else:
267 else:
268 first = midpoint + 1
268 first = midpoint + 1
269 return (first, False)
269 return (first, False)
270
270
271 def __contains__(self, key):
271 def __contains__(self, key):
272 return self.bsearch(key) != -1
272 return self.bsearch(key) != -1
273
273
274 def __getitem__(self, key):
274 def __getitem__(self, key):
275 if not isinstance(key, bytes):
275 if not isinstance(key, bytes):
276 raise TypeError(b"getitem: manifest keys must be a bytes.")
276 raise TypeError(b"getitem: manifest keys must be a bytes.")
277 needle = self.bsearch(key)
277 needle = self.bsearch(key)
278 if needle == -1:
278 if needle == -1:
279 raise KeyError
279 raise KeyError
280 data, pos = self._get(needle)
280 data, pos = self._get(needle)
281 if pos == -1:
281 if pos == -1:
282 return (data[1], data[2])
282 return (data[1], data[2])
283 zeropos = data.find(b'\x00', pos)
283 zeropos = data.find(b'\x00', pos)
284 nlpos = data.find(b'\n', zeropos)
284 nlpos = data.find(b'\n', zeropos)
285 assert 0 <= needle <= len(self.positions)
285 assert 0 <= needle <= len(self.positions)
286 assert len(self.extrainfo) == len(self.positions)
286 assert len(self.extrainfo) == len(self.positions)
287 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
287 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
288 raise error.StorageError(b'Invalid manifest line')
288 raise error.StorageError(b'Invalid manifest line')
289 hlen = nlpos - zeropos - 1
289 hlen = nlpos - zeropos - 1
290 flags = data[nlpos - 1 : nlpos]
290 flags = data[nlpos - 1 : nlpos]
291 if flags in _manifestflags:
291 if flags in _manifestflags:
292 hlen -= 1
292 hlen -= 1
293 else:
293 else:
294 flags = b''
294 flags = b''
295 if hlen != 2 * self._nodelen:
295 if hlen != 2 * self._nodelen:
296 raise error.StorageError(b'Invalid manifest line')
296 raise error.StorageError(b'Invalid manifest line')
297 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
297 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
298 return (hashval, flags)
298 return (hashval, flags)
299
299
300 def __delitem__(self, key):
300 def __delitem__(self, key):
301 needle, found = self.bsearch2(key)
301 needle, found = self.bsearch2(key)
302 if not found:
302 if not found:
303 raise KeyError
303 raise KeyError
304 cur = self.positions[needle]
304 cur = self.positions[needle]
305 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
305 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
306 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
306 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
307 if cur >= 0:
307 if cur >= 0:
308 # This does NOT unsort the list as far as the search functions are
308 # This does NOT unsort the list as far as the search functions are
309 # concerned, as they only examine lines mapped by self.positions.
309 # concerned, as they only examine lines mapped by self.positions.
310 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
310 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
311 self.hasremovals = True
311 self.hasremovals = True
312
312
313 def __setitem__(self, key, value):
313 def __setitem__(self, key, value):
314 if not isinstance(key, bytes):
314 if not isinstance(key, bytes):
315 raise TypeError(b"setitem: manifest keys must be a byte string.")
315 raise TypeError(b"setitem: manifest keys must be a byte string.")
316 if not isinstance(value, tuple) or len(value) != 2:
316 if not isinstance(value, tuple) or len(value) != 2:
317 raise TypeError(
317 raise TypeError(
318 b"Manifest values must be a tuple of (node, flags)."
318 b"Manifest values must be a tuple of (node, flags)."
319 )
319 )
320 hashval = value[0]
320 hashval = value[0]
321 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
321 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
322 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
322 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
323 flags = value[1]
323 flags = value[1]
324 if not isinstance(flags, bytes) or len(flags) > 1:
324 if not isinstance(flags, bytes) or len(flags) > 1:
325 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
325 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
326 needle, found = self.bsearch2(key)
326 needle, found = self.bsearch2(key)
327 if found:
327 if found:
328 # put the item
328 # put the item
329 pos = self.positions[needle]
329 pos = self.positions[needle]
330 if pos < 0:
330 if pos < 0:
331 self.extradata[-pos - 1] = (key, hashval, value[1])
331 self.extradata[-pos - 1] = (key, hashval, value[1])
332 else:
332 else:
333 # just don't bother
333 # just don't bother
334 self.extradata.append((key, hashval, value[1]))
334 self.extradata.append((key, hashval, value[1]))
335 self.positions[needle] = -len(self.extradata)
335 self.positions[needle] = -len(self.extradata)
336 else:
336 else:
337 # not found, put it in with extra positions
337 # not found, put it in with extra positions
338 self.extradata.append((key, hashval, value[1]))
338 self.extradata.append((key, hashval, value[1]))
339 self.positions = (
339 self.positions = (
340 self.positions[:needle]
340 self.positions[:needle]
341 + [-len(self.extradata)]
341 + [-len(self.extradata)]
342 + self.positions[needle:]
342 + self.positions[needle:]
343 )
343 )
344 self.extrainfo = (
344 self.extrainfo = (
345 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
345 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
346 )
346 )
347
347
348 def copy(self):
348 def copy(self):
349 # XXX call _compact like in C?
349 # XXX call _compact like in C?
350 return _lazymanifest(
350 return _lazymanifest(
351 self._nodelen,
351 self._nodelen,
352 self.data,
352 self.data,
353 self.positions,
353 self.positions,
354 self.extrainfo,
354 self.extrainfo,
355 self.extradata,
355 self.extradata,
356 self.hasremovals,
356 self.hasremovals,
357 )
357 )
358
358
359 def _compact(self):
359 def _compact(self):
360 # hopefully not called TOO often
360 # hopefully not called TOO often
361 if len(self.extradata) == 0 and not self.hasremovals:
361 if len(self.extradata) == 0 and not self.hasremovals:
362 return
362 return
363 l = []
363 l = []
364 i = 0
364 i = 0
365 offset = 0
365 offset = 0
366 self.extrainfo = [0] * len(self.positions)
366 self.extrainfo = [0] * len(self.positions)
367 while i < len(self.positions):
367 while i < len(self.positions):
368 if self.positions[i] >= 0:
368 if self.positions[i] >= 0:
369 cur = self.positions[i]
369 cur = self.positions[i]
370 last_cut = cur
370 last_cut = cur
371
371
372 # Collect all contiguous entries in the buffer at the current
372 # Collect all contiguous entries in the buffer at the current
373 # offset, breaking out only for added/modified items held in
373 # offset, breaking out only for added/modified items held in
374 # extradata, or a deleted line prior to the next position.
374 # extradata, or a deleted line prior to the next position.
375 while True:
375 while True:
376 self.positions[i] = offset
376 self.positions[i] = offset
377 i += 1
377 i += 1
378 if i == len(self.positions) or self.positions[i] < 0:
378 if i == len(self.positions) or self.positions[i] < 0:
379 break
379 break
380
380
381 # A removed file has no positions[] entry, but does have an
381 # A removed file has no positions[] entry, but does have an
382 # overwritten first byte. Break out and find the end of the
382 # overwritten first byte. Break out and find the end of the
383 # current good entry/entries if there is a removed file
383 # current good entry/entries if there is a removed file
384 # before the next position.
384 # before the next position.
385 if (
385 if (
386 self.hasremovals
386 self.hasremovals
387 and self.data.find(b'\n\x00', cur, self.positions[i])
387 and self.data.find(b'\n\x00', cur, self.positions[i])
388 != -1
388 != -1
389 ):
389 ):
390 break
390 break
391
391
392 offset += self.positions[i] - cur
392 offset += self.positions[i] - cur
393 cur = self.positions[i]
393 cur = self.positions[i]
394 end_cut = self.data.find(b'\n', cur)
394 end_cut = self.data.find(b'\n', cur)
395 if end_cut != -1:
395 if end_cut != -1:
396 end_cut += 1
396 end_cut += 1
397 offset += end_cut - cur
397 offset += end_cut - cur
398 l.append(self.data[last_cut:end_cut])
398 l.append(self.data[last_cut:end_cut])
399 else:
399 else:
400 while i < len(self.positions) and self.positions[i] < 0:
400 while i < len(self.positions) and self.positions[i] < 0:
401 cur = self.positions[i]
401 cur = self.positions[i]
402 t = self.extradata[-cur - 1]
402 t = self.extradata[-cur - 1]
403 l.append(self._pack(t))
403 l.append(self._pack(t))
404 self.positions[i] = offset
404 self.positions[i] = offset
405 # Hashes are either 20 bytes (old sha1s) or 32
405 # Hashes are either 20 bytes (old sha1s) or 32
406 # bytes (new non-sha1).
406 # bytes (new non-sha1).
407 hlen = 20
407 hlen = 20
408 if len(t[1]) > 25:
408 if len(t[1]) > 25:
409 hlen = 32
409 hlen = 32
410 if len(t[1]) > hlen:
410 if len(t[1]) > hlen:
411 self.extrainfo[i] = ord(t[1][hlen + 1])
411 self.extrainfo[i] = ord(t[1][hlen + 1])
412 offset += len(l[-1])
412 offset += len(l[-1])
413 i += 1
413 i += 1
414 self.data = b''.join(l)
414 self.data = b''.join(l)
415 self.hasremovals = False
415 self.hasremovals = False
416 self.extradata = []
416 self.extradata = []
417
417
418 def _pack(self, d):
418 def _pack(self, d):
419 n = d[1]
419 n = d[1]
420 assert len(n) in (20, 32)
420 assert len(n) in (20, 32)
421 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
421 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
422
422
423 def text(self):
423 def text(self):
424 self._compact()
424 self._compact()
425 return self.data
425 return self.data
426
426
427 def diff(self, m2, clean=False):
427 def diff(self, m2, clean=False):
428 '''Finds changes between the current manifest and m2.'''
428 '''Finds changes between the current manifest and m2.'''
429 # XXX think whether efficiency matters here
429 # XXX think whether efficiency matters here
430 diff = {}
430 diff = {}
431
431
432 for fn, e1, flags in self.iterentries():
432 for fn, e1, flags in self.iterentries():
433 if fn not in m2:
433 if fn not in m2:
434 diff[fn] = (e1, flags), (None, b'')
434 diff[fn] = (e1, flags), (None, b'')
435 else:
435 else:
436 e2 = m2[fn]
436 e2 = m2[fn]
437 if (e1, flags) != e2:
437 if (e1, flags) != e2:
438 diff[fn] = (e1, flags), e2
438 diff[fn] = (e1, flags), e2
439 elif clean:
439 elif clean:
440 diff[fn] = None
440 diff[fn] = None
441
441
442 for fn, e2, flags in m2.iterentries():
442 for fn, e2, flags in m2.iterentries():
443 if fn not in self:
443 if fn not in self:
444 diff[fn] = (None, b''), (e2, flags)
444 diff[fn] = (None, b''), (e2, flags)
445
445
446 return diff
446 return diff
447
447
448 def iterentries(self):
448 def iterentries(self):
449 return lazymanifestiterentries(self)
449 return lazymanifestiterentries(self)
450
450
451 def iterkeys(self):
451 def iterkeys(self):
452 return lazymanifestiter(self)
452 return lazymanifestiter(self)
453
453
454 def __iter__(self):
454 def __iter__(self):
455 return lazymanifestiter(self)
455 return lazymanifestiter(self)
456
456
457 def __len__(self):
457 def __len__(self):
458 return len(self.positions)
458 return len(self.positions)
459
459
460 def filtercopy(self, filterfn):
460 def filtercopy(self, filterfn):
461 # XXX should be optimized
461 # XXX should be optimized
462 c = _lazymanifest(self._nodelen, b'')
462 c = _lazymanifest(self._nodelen, b'')
463 for f, n, fl in self.iterentries():
463 for f, n, fl in self.iterentries():
464 if filterfn(f):
464 if filterfn(f):
465 c[f] = n, fl
465 c[f] = n, fl
466 return c
466 return c
467
467
468
468
469 try:
469 try:
470 _lazymanifest = parsers.lazymanifest
470 _lazymanifest = parsers.lazymanifest
471 except AttributeError:
471 except AttributeError:
472 pass
472 pass
473
473
474
474
475 @interfaceutil.implementer(repository.imanifestdict)
475 @interfaceutil.implementer(repository.imanifestdict)
476 class manifestdict:
476 class manifestdict:
477 def __init__(self, nodelen, data=b''):
477 def __init__(self, nodelen, data=b''):
478 self._nodelen = nodelen
478 self._nodelen = nodelen
479 self._lm = _lazymanifest(nodelen, data)
479 self._lm = _lazymanifest(nodelen, data)
480
480
481 def __getitem__(self, key):
481 def __getitem__(self, key):
482 return self._lm[key][0]
482 return self._lm[key][0]
483
483
484 def find(self, key):
484 def find(self, key):
485 return self._lm[key]
485 return self._lm[key]
486
486
487 def __len__(self):
487 def __len__(self):
488 return len(self._lm)
488 return len(self._lm)
489
489
490 def __nonzero__(self):
490 def __nonzero__(self):
491 # nonzero is covered by the __len__ function, but implementing it here
491 # nonzero is covered by the __len__ function, but implementing it here
492 # makes it easier for extensions to override.
492 # makes it easier for extensions to override.
493 return len(self._lm) != 0
493 return len(self._lm) != 0
494
494
495 __bool__ = __nonzero__
495 __bool__ = __nonzero__
496
496
497 def __setitem__(self, key, node):
497 def __setitem__(self, key, node):
498 self._lm[key] = node, self.flags(key)
498 self._lm[key] = node, self.flags(key)
499
499
500 def __contains__(self, key):
500 def __contains__(self, key):
501 if key is None:
501 if key is None:
502 return False
502 return False
503 return key in self._lm
503 return key in self._lm
504
504
505 def __delitem__(self, key):
505 def __delitem__(self, key):
506 del self._lm[key]
506 del self._lm[key]
507
507
508 def __iter__(self):
508 def __iter__(self):
509 return self._lm.__iter__()
509 return self._lm.__iter__()
510
510
511 def iterkeys(self):
511 def iterkeys(self):
512 return self._lm.iterkeys()
512 return self._lm.iterkeys()
513
513
514 def keys(self):
514 def keys(self):
515 return list(self.iterkeys())
515 return list(self.iterkeys())
516
516
517 def filesnotin(self, m2, match=None):
517 def filesnotin(self, m2, match=None):
518 '''Set of files in this manifest that are not in the other'''
518 '''Set of files in this manifest that are not in the other'''
519 if match is not None:
519 if match is not None:
520 match = matchmod.badmatch(match, lambda path, msg: None)
520 match = matchmod.badmatch(match, lambda path, msg: None)
521 sm2 = set(m2.walk(match))
521 sm2 = set(m2.walk(match))
522 return {f for f in self.walk(match) if f not in sm2}
522 return {f for f in self.walk(match) if f not in sm2}
523 return {f for f in self if f not in m2}
523 return {f for f in self if f not in m2}
524
524
525 @propertycache
525 @propertycache
526 def _dirs(self):
526 def _dirs(self):
527 return pathutil.dirs(self)
527 return pathutil.dirs(self)
528
528
529 def dirs(self):
529 def dirs(self):
530 return self._dirs
530 return self._dirs
531
531
532 def hasdir(self, dir):
532 def hasdir(self, dir):
533 return dir in self._dirs
533 return dir in self._dirs
534
534
535 def _filesfastpath(self, match):
535 def _filesfastpath(self, match):
536 """Checks whether we can correctly and quickly iterate over matcher
536 """Checks whether we can correctly and quickly iterate over matcher
537 files instead of over manifest files."""
537 files instead of over manifest files."""
538 files = match.files()
538 files = match.files()
539 return len(files) < 100 and (
539 return len(files) < 100 and (
540 match.isexact()
540 match.isexact()
541 or (match.prefix() and all(fn in self for fn in files))
541 or (match.prefix() and all(fn in self for fn in files))
542 )
542 )
543
543
544 def walk(self, match):
544 def walk(self, match):
545 """Generates matching file names.
545 """Generates matching file names.
546
546
547 Equivalent to manifest.matches(match).iterkeys(), but without creating
547 Equivalent to manifest.matches(match).iterkeys(), but without creating
548 an entirely new manifest.
548 an entirely new manifest.
549
549
550 It also reports nonexistent files by marking them bad with match.bad().
550 It also reports nonexistent files by marking them bad with match.bad().
551 """
551 """
552 if match.always():
552 if match.always():
553 for f in iter(self):
553 for f in iter(self):
554 yield f
554 yield f
555 return
555 return
556
556
557 fset = set(match.files())
557 fset = set(match.files())
558
558
559 # avoid the entire walk if we're only looking for specific files
559 # avoid the entire walk if we're only looking for specific files
560 if self._filesfastpath(match):
560 if self._filesfastpath(match):
561 for fn in sorted(fset):
561 for fn in sorted(fset):
562 if fn in self:
562 if fn in self:
563 yield fn
563 yield fn
564 return
564 return
565
565
566 for fn in self:
566 for fn in self:
567 if fn in fset:
567 if fn in fset:
568 # specified pattern is the exact name
568 # specified pattern is the exact name
569 fset.remove(fn)
569 fset.remove(fn)
570 if match(fn):
570 if match(fn):
571 yield fn
571 yield fn
572
572
573 # for dirstate.walk, files=[''] means "walk the whole tree".
573 # for dirstate.walk, files=[''] means "walk the whole tree".
574 # follow that here, too
574 # follow that here, too
575 fset.discard(b'')
575 fset.discard(b'')
576
576
577 for fn in sorted(fset):
577 for fn in sorted(fset):
578 if not self.hasdir(fn):
578 if not self.hasdir(fn):
579 match.bad(fn, None)
579 match.bad(fn, None)
580
580
581 def _matches(self, match):
581 def _matches(self, match):
582 '''generate a new manifest filtered by the match argument'''
582 '''generate a new manifest filtered by the match argument'''
583 if match.always():
583 if match.always():
584 return self.copy()
584 return self.copy()
585
585
586 if self._filesfastpath(match):
586 if self._filesfastpath(match):
587 m = manifestdict(self._nodelen)
587 m = manifestdict(self._nodelen)
588 lm = self._lm
588 lm = self._lm
589 for fn in match.files():
589 for fn in match.files():
590 if fn in lm:
590 if fn in lm:
591 m._lm[fn] = lm[fn]
591 m._lm[fn] = lm[fn]
592 return m
592 return m
593
593
594 m = manifestdict(self._nodelen)
594 m = manifestdict(self._nodelen)
595 m._lm = self._lm.filtercopy(match)
595 m._lm = self._lm.filtercopy(match)
596 return m
596 return m
597
597
598 def diff(self, m2, match=None, clean=False):
598 def diff(self, m2, match=None, clean=False):
599 """Finds changes between the current manifest and m2.
599 """Finds changes between the current manifest and m2.
600
600
601 Args:
601 Args:
602 m2: the manifest to which this manifest should be compared.
602 m2: the manifest to which this manifest should be compared.
603 clean: if true, include files unchanged between these manifests
603 clean: if true, include files unchanged between these manifests
604 with a None value in the returned dictionary.
604 with a None value in the returned dictionary.
605
605
606 The result is returned as a dict with filename as key and
606 The result is returned as a dict with filename as key and
607 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
607 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
608 nodeid in the current/other manifest and fl1/fl2 is the flag
608 nodeid in the current/other manifest and fl1/fl2 is the flag
609 in the current/other manifest. Where the file does not exist,
609 in the current/other manifest. Where the file does not exist,
610 the nodeid will be None and the flags will be the empty
610 the nodeid will be None and the flags will be the empty
611 string.
611 string.
612 """
612 """
613 if match:
613 if match:
614 m1 = self._matches(match)
614 m1 = self._matches(match)
615 m2 = m2._matches(match)
615 m2 = m2._matches(match)
616 return m1.diff(m2, clean=clean)
616 return m1.diff(m2, clean=clean)
617 return self._lm.diff(m2._lm, clean)
617 return self._lm.diff(m2._lm, clean)
618
618
619 def setflag(self, key, flag):
619 def setflag(self, key, flag):
620 if flag not in _manifestflags:
620 if flag not in _manifestflags:
621 raise TypeError(b"Invalid manifest flag set.")
621 raise TypeError(b"Invalid manifest flag set.")
622 self._lm[key] = self[key], flag
622 self._lm[key] = self[key], flag
623
623
624 def get(self, key, default=None):
624 def get(self, key, default=None):
625 try:
625 try:
626 return self._lm[key][0]
626 return self._lm[key][0]
627 except KeyError:
627 except KeyError:
628 return default
628 return default
629
629
630 def flags(self, key):
630 def flags(self, key):
631 try:
631 try:
632 return self._lm[key][1]
632 return self._lm[key][1]
633 except KeyError:
633 except KeyError:
634 return b''
634 return b''
635
635
636 def copy(self):
636 def copy(self):
637 c = manifestdict(self._nodelen)
637 c = manifestdict(self._nodelen)
638 c._lm = self._lm.copy()
638 c._lm = self._lm.copy()
639 return c
639 return c
640
640
641 def items(self):
641 def items(self):
642 return (x[:2] for x in self._lm.iterentries())
642 return (x[:2] for x in self._lm.iterentries())
643
643
644 def iteritems(self):
644 def iteritems(self):
645 return (x[:2] for x in self._lm.iterentries())
645 return (x[:2] for x in self._lm.iterentries())
646
646
647 def iterentries(self):
647 def iterentries(self):
648 return self._lm.iterentries()
648 return self._lm.iterentries()
649
649
650 def text(self):
650 def text(self):
651 # most likely uses native version
651 # most likely uses native version
652 return self._lm.text()
652 return self._lm.text()
653
653
654 def fastdelta(self, base, changes):
654 def fastdelta(self, base, changes):
655 """Given a base manifest text as a bytearray and a list of changes
655 """Given a base manifest text as a bytearray and a list of changes
656 relative to that text, compute a delta that can be used by revlog.
656 relative to that text, compute a delta that can be used by revlog.
657 """
657 """
658 delta = []
658 delta = []
659 dstart = None
659 dstart = None
660 dend = None
660 dend = None
661 dline = [b""]
661 dline = [b""]
662 start = 0
662 start = 0
663 # zero copy representation of base as a buffer
663 # zero copy representation of base as a buffer
664 addbuf = util.buffer(base)
664 addbuf = util.buffer(base)
665
665
666 changes = list(changes)
666 changes = list(changes)
667 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
667 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
668 # start with a readonly loop that finds the offset of
668 # start with a readonly loop that finds the offset of
669 # each line and creates the deltas
669 # each line and creates the deltas
670 for f, todelete in changes:
670 for f, todelete in changes:
671 # bs will either be the index of the item or the insert point
671 # bs will either be the index of the item or the insert point
672 start, end = _msearch(addbuf, f, start)
672 start, end = _msearch(addbuf, f, start)
673 if not todelete:
673 if not todelete:
674 h, fl = self._lm[f]
674 h, fl = self._lm[f]
675 l = b"%s\0%s%s\n" % (f, hex(h), fl)
675 l = b"%s\0%s%s\n" % (f, hex(h), fl)
676 else:
676 else:
677 if start == end:
677 if start == end:
678 # item we want to delete was not found, error out
678 # item we want to delete was not found, error out
679 raise AssertionError(
679 raise AssertionError(
680 _(b"failed to remove %s from manifest") % f
680 _(b"failed to remove %s from manifest") % f
681 )
681 )
682 l = b""
682 l = b""
683 if dstart is not None and dstart <= start and dend >= start:
683 if dstart is not None and dstart <= start and dend >= start:
684 if dend < end:
684 if dend < end:
685 dend = end
685 dend = end
686 if l:
686 if l:
687 dline.append(l)
687 dline.append(l)
688 else:
688 else:
689 if dstart is not None:
689 if dstart is not None:
690 delta.append([dstart, dend, b"".join(dline)])
690 delta.append([dstart, dend, b"".join(dline)])
691 dstart = start
691 dstart = start
692 dend = end
692 dend = end
693 dline = [l]
693 dline = [l]
694
694
695 if dstart is not None:
695 if dstart is not None:
696 delta.append([dstart, dend, b"".join(dline)])
696 delta.append([dstart, dend, b"".join(dline)])
697 # apply the delta to the base, and get a delta for addrevision
697 # apply the delta to the base, and get a delta for addrevision
698 deltatext, arraytext = _addlistdelta(base, delta)
698 deltatext, arraytext = _addlistdelta(base, delta)
699 else:
699 else:
700 # For large changes, it's much cheaper to just build the text and
700 # For large changes, it's much cheaper to just build the text and
701 # diff it.
701 # diff it.
702 arraytext = bytearray(self.text())
702 arraytext = bytearray(self.text())
703 deltatext = mdiff.textdiff(
703 deltatext = mdiff.textdiff(
704 util.buffer(base), util.buffer(arraytext)
704 util.buffer(base), util.buffer(arraytext)
705 )
705 )
706
706
707 return arraytext, deltatext
707 return arraytext, deltatext
708
708
709
709
710 def _msearch(m, s, lo=0, hi=None):
710 def _msearch(m, s, lo=0, hi=None):
711 """return a tuple (start, end) that says where to find s within m.
711 """return a tuple (start, end) that says where to find s within m.
712
712
713 If the string is found m[start:end] are the line containing
713 If the string is found m[start:end] are the line containing
714 that string. If start == end the string was not found and
714 that string. If start == end the string was not found and
715 they indicate the proper sorted insertion point.
715 they indicate the proper sorted insertion point.
716
716
717 m should be a buffer, a memoryview or a byte string.
717 m should be a buffer, a memoryview or a byte string.
718 s is a byte string"""
718 s is a byte string"""
719
719
720 def advance(i, c):
720 def advance(i, c):
721 while i < lenm and m[i : i + 1] != c:
721 while i < lenm and m[i : i + 1] != c:
722 i += 1
722 i += 1
723 return i
723 return i
724
724
725 if not s:
725 if not s:
726 return (lo, lo)
726 return (lo, lo)
727 lenm = len(m)
727 lenm = len(m)
728 if not hi:
728 if not hi:
729 hi = lenm
729 hi = lenm
730 while lo < hi:
730 while lo < hi:
731 mid = (lo + hi) // 2
731 mid = (lo + hi) // 2
732 start = mid
732 start = mid
733 while start > 0 and m[start - 1 : start] != b'\n':
733 while start > 0 and m[start - 1 : start] != b'\n':
734 start -= 1
734 start -= 1
735 end = advance(start, b'\0')
735 end = advance(start, b'\0')
736 if bytes(m[start:end]) < s:
736 if bytes(m[start:end]) < s:
737 # we know that after the null there are 40 bytes of sha1
737 # we know that after the null there are 40 bytes of sha1
738 # this translates to the bisect lo = mid + 1
738 # this translates to the bisect lo = mid + 1
739 lo = advance(end + 40, b'\n') + 1
739 lo = advance(end + 40, b'\n') + 1
740 else:
740 else:
741 # this translates to the bisect hi = mid
741 # this translates to the bisect hi = mid
742 hi = start
742 hi = start
743 end = advance(lo, b'\0')
743 end = advance(lo, b'\0')
744 found = m[lo:end]
744 found = m[lo:end]
745 if s == found:
745 if s == found:
746 # we know that after the null there are 40 bytes of sha1
746 # we know that after the null there are 40 bytes of sha1
747 end = advance(end + 40, b'\n')
747 end = advance(end + 40, b'\n')
748 return (lo, end + 1)
748 return (lo, end + 1)
749 else:
749 else:
750 return (lo, lo)
750 return (lo, lo)
751
751
752
752
753 def _checkforbidden(l):
753 def _checkforbidden(l):
754 """Check filenames for illegal characters."""
754 """Check filenames for illegal characters."""
755 for f in l:
755 for f in l:
756 if b'\n' in f or b'\r' in f:
756 if b'\n' in f or b'\r' in f:
757 raise error.StorageError(
757 raise error.StorageError(
758 _(b"'\\n' and '\\r' disallowed in filenames: %r")
758 _(b"'\\n' and '\\r' disallowed in filenames: %r")
759 % pycompat.bytestr(f)
759 % pycompat.bytestr(f)
760 )
760 )
761
761
762
762
763 # apply the changes collected during the bisect loop to our addlist
763 # apply the changes collected during the bisect loop to our addlist
764 # return a delta suitable for addrevision
764 # return a delta suitable for addrevision
765 def _addlistdelta(addlist, x):
765 def _addlistdelta(addlist, x):
766 # for large addlist arrays, building a new array is cheaper
766 # for large addlist arrays, building a new array is cheaper
767 # than repeatedly modifying the existing one
767 # than repeatedly modifying the existing one
768 currentposition = 0
768 currentposition = 0
769 newaddlist = bytearray()
769 newaddlist = bytearray()
770
770
771 for start, end, content in x:
771 for start, end, content in x:
772 newaddlist += addlist[currentposition:start]
772 newaddlist += addlist[currentposition:start]
773 if content:
773 if content:
774 newaddlist += bytearray(content)
774 newaddlist += bytearray(content)
775
775
776 currentposition = end
776 currentposition = end
777
777
778 newaddlist += addlist[currentposition:]
778 newaddlist += addlist[currentposition:]
779
779
780 deltatext = b"".join(
780 deltatext = b"".join(
781 struct.pack(b">lll", start, end, len(content)) + content
781 struct.pack(b">lll", start, end, len(content)) + content
782 for start, end, content in x
782 for start, end, content in x
783 )
783 )
784 return deltatext, newaddlist
784 return deltatext, newaddlist
785
785
786
786
787 def _splittopdir(f):
787 def _splittopdir(f):
788 if b'/' in f:
788 if b'/' in f:
789 dir, subpath = f.split(b'/', 1)
789 dir, subpath = f.split(b'/', 1)
790 return dir + b'/', subpath
790 return dir + b'/', subpath
791 else:
791 else:
792 return b'', f
792 return b'', f
793
793
794
794
795 _noop = lambda s: None
795 _noop = lambda s: None
796
796
797
797
798 @interfaceutil.implementer(repository.imanifestdict)
798 @interfaceutil.implementer(repository.imanifestdict)
799 class treemanifest:
799 class treemanifest:
800 def __init__(self, nodeconstants, dir=b'', text=b''):
800 def __init__(self, nodeconstants, dir=b'', text=b''):
801 self._dir = dir
801 self._dir = dir
802 self.nodeconstants = nodeconstants
802 self.nodeconstants = nodeconstants
803 self._node = self.nodeconstants.nullid
803 self._node = self.nodeconstants.nullid
804 self._nodelen = self.nodeconstants.nodelen
804 self._nodelen = self.nodeconstants.nodelen
805 self._loadfunc = _noop
805 self._loadfunc = _noop
806 self._copyfunc = _noop
806 self._copyfunc = _noop
807 self._dirty = False
807 self._dirty = False
808 self._dirs = {}
808 self._dirs = {}
809 self._lazydirs = {}
809 self._lazydirs = {}
810 # Using _lazymanifest here is a little slower than plain old dicts
810 # Using _lazymanifest here is a little slower than plain old dicts
811 self._files = {}
811 self._files = {}
812 self._flags = {}
812 self._flags = {}
813 if text:
813 if text:
814
814
815 def readsubtree(subdir, subm):
815 def readsubtree(subdir, subm):
816 raise AssertionError(
816 raise AssertionError(
817 b'treemanifest constructor only accepts flat manifests'
817 b'treemanifest constructor only accepts flat manifests'
818 )
818 )
819
819
820 self.parse(text, readsubtree)
820 self.parse(text, readsubtree)
821 self._dirty = True # Mark flat manifest dirty after parsing
821 self._dirty = True # Mark flat manifest dirty after parsing
822
822
823 def _subpath(self, path):
823 def _subpath(self, path):
824 return self._dir + path
824 return self._dir + path
825
825
826 def _loadalllazy(self):
826 def _loadalllazy(self):
827 selfdirs = self._dirs
827 selfdirs = self._dirs
828 subpath = self._subpath
828 subpath = self._subpath
829 for d, (node, readsubtree, docopy) in self._lazydirs.items():
829 for d, (node, readsubtree, docopy) in self._lazydirs.items():
830 if docopy:
830 if docopy:
831 selfdirs[d] = readsubtree(subpath(d), node).copy()
831 selfdirs[d] = readsubtree(subpath(d), node).copy()
832 else:
832 else:
833 selfdirs[d] = readsubtree(subpath(d), node)
833 selfdirs[d] = readsubtree(subpath(d), node)
834 self._lazydirs = {}
834 self._lazydirs = {}
835
835
836 def _loadlazy(self, d):
836 def _loadlazy(self, d):
837 v = self._lazydirs.get(d)
837 v = self._lazydirs.get(d)
838 if v:
838 if v:
839 node, readsubtree, docopy = v
839 node, readsubtree, docopy = v
840 if docopy:
840 if docopy:
841 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
841 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
842 else:
842 else:
843 self._dirs[d] = readsubtree(self._subpath(d), node)
843 self._dirs[d] = readsubtree(self._subpath(d), node)
844 del self._lazydirs[d]
844 del self._lazydirs[d]
845
845
846 def _loadchildrensetlazy(self, visit):
846 def _loadchildrensetlazy(self, visit):
847 if not visit:
847 if not visit:
848 return None
848 return None
849 if visit == b'all' or visit == b'this':
849 if visit == b'all' or visit == b'this':
850 self._loadalllazy()
850 self._loadalllazy()
851 return None
851 return None
852
852
853 loadlazy = self._loadlazy
853 loadlazy = self._loadlazy
854 for k in visit:
854 for k in visit:
855 loadlazy(k + b'/')
855 loadlazy(k + b'/')
856 return visit
856 return visit
857
857
858 def _loaddifflazy(self, t1, t2):
858 def _loaddifflazy(self, t1, t2):
859 """load items in t1 and t2 if they're needed for diffing.
859 """load items in t1 and t2 if they're needed for diffing.
860
860
861 The criteria currently is:
861 The criteria currently is:
862 - if it's not present in _lazydirs in either t1 or t2, load it in the
862 - if it's not present in _lazydirs in either t1 or t2, load it in the
863 other (it may already be loaded or it may not exist, doesn't matter)
863 other (it may already be loaded or it may not exist, doesn't matter)
864 - if it's present in _lazydirs in both, compare the nodeid; if it
864 - if it's present in _lazydirs in both, compare the nodeid; if it
865 differs, load it in both
865 differs, load it in both
866 """
866 """
867 toloadlazy = []
867 toloadlazy = []
868 for d, v1 in t1._lazydirs.items():
868 for d, v1 in t1._lazydirs.items():
869 v2 = t2._lazydirs.get(d)
869 v2 = t2._lazydirs.get(d)
870 if not v2 or v2[0] != v1[0]:
870 if not v2 or v2[0] != v1[0]:
871 toloadlazy.append(d)
871 toloadlazy.append(d)
872 for d, v1 in t2._lazydirs.items():
872 for d, v1 in t2._lazydirs.items():
873 if d not in t1._lazydirs:
873 if d not in t1._lazydirs:
874 toloadlazy.append(d)
874 toloadlazy.append(d)
875
875
876 for d in toloadlazy:
876 for d in toloadlazy:
877 t1._loadlazy(d)
877 t1._loadlazy(d)
878 t2._loadlazy(d)
878 t2._loadlazy(d)
879
879
880 def __len__(self):
880 def __len__(self):
881 self._load()
881 self._load()
882 size = len(self._files)
882 size = len(self._files)
883 self._loadalllazy()
883 self._loadalllazy()
884 for m in self._dirs.values():
884 for m in self._dirs.values():
885 size += m.__len__()
885 size += m.__len__()
886 return size
886 return size
887
887
888 def __nonzero__(self):
888 def __nonzero__(self):
889 # Faster than "__len() != 0" since it avoids loading sub-manifests
889 # Faster than "__len() != 0" since it avoids loading sub-manifests
890 return not self._isempty()
890 return not self._isempty()
891
891
892 __bool__ = __nonzero__
892 __bool__ = __nonzero__
893
893
894 def _isempty(self):
894 def _isempty(self):
895 self._load() # for consistency; already loaded by all callers
895 self._load() # for consistency; already loaded by all callers
896 # See if we can skip loading everything.
896 # See if we can skip loading everything.
897 if self._files or (
897 if self._files or (
898 self._dirs and any(not m._isempty() for m in self._dirs.values())
898 self._dirs and any(not m._isempty() for m in self._dirs.values())
899 ):
899 ):
900 return False
900 return False
901 self._loadalllazy()
901 self._loadalllazy()
902 return not self._dirs or all(m._isempty() for m in self._dirs.values())
902 return not self._dirs or all(m._isempty() for m in self._dirs.values())
903
903
904 @encoding.strmethod
904 @encoding.strmethod
905 def __repr__(self):
905 def __repr__(self):
906 return (
906 return (
907 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
907 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
908 % (
908 % (
909 self._dir,
909 self._dir,
910 hex(self._node),
910 hex(self._node),
911 bool(self._loadfunc is _noop),
911 bool(self._loadfunc is _noop),
912 self._dirty,
912 self._dirty,
913 id(self),
913 id(self),
914 )
914 )
915 )
915 )
916
916
917 def dir(self):
917 def dir(self):
918 """The directory that this tree manifest represents, including a
918 """The directory that this tree manifest represents, including a
919 trailing '/'. Empty string for the repo root directory."""
919 trailing '/'. Empty string for the repo root directory."""
920 return self._dir
920 return self._dir
921
921
922 def node(self):
922 def node(self):
923 """This node of this instance. nullid for unsaved instances. Should
923 """This node of this instance. nullid for unsaved instances. Should
924 be updated when the instance is read or written from a revlog.
924 be updated when the instance is read or written from a revlog.
925 """
925 """
926 assert not self._dirty
926 assert not self._dirty
927 return self._node
927 return self._node
928
928
929 def setnode(self, node):
929 def setnode(self, node):
930 self._node = node
930 self._node = node
931 self._dirty = False
931 self._dirty = False
932
932
933 def iterentries(self):
933 def iterentries(self):
934 self._load()
934 self._load()
935 self._loadalllazy()
935 self._loadalllazy()
936 for p, n in sorted(
936 for p, n in sorted(
937 itertools.chain(self._dirs.items(), self._files.items())
937 itertools.chain(self._dirs.items(), self._files.items())
938 ):
938 ):
939 if p in self._files:
939 if p in self._files:
940 yield self._subpath(p), n, self._flags.get(p, b'')
940 yield self._subpath(p), n, self._flags.get(p, b'')
941 else:
941 else:
942 for x in n.iterentries():
942 for x in n.iterentries():
943 yield x
943 yield x
944
944
945 def items(self):
945 def items(self):
946 self._load()
946 self._load()
947 self._loadalllazy()
947 self._loadalllazy()
948 for p, n in sorted(
948 for p, n in sorted(
949 itertools.chain(self._dirs.items(), self._files.items())
949 itertools.chain(self._dirs.items(), self._files.items())
950 ):
950 ):
951 if p in self._files:
951 if p in self._files:
952 yield self._subpath(p), n
952 yield self._subpath(p), n
953 else:
953 else:
954 for f, sn in n.items():
954 for f, sn in n.items():
955 yield f, sn
955 yield f, sn
956
956
957 iteritems = items
957 iteritems = items
958
958
959 def iterkeys(self):
959 def iterkeys(self):
960 self._load()
960 self._load()
961 self._loadalllazy()
961 self._loadalllazy()
962 for p in sorted(itertools.chain(self._dirs, self._files)):
962 for p in sorted(itertools.chain(self._dirs, self._files)):
963 if p in self._files:
963 if p in self._files:
964 yield self._subpath(p)
964 yield self._subpath(p)
965 else:
965 else:
966 for f in self._dirs[p]:
966 for f in self._dirs[p]:
967 yield f
967 yield f
968
968
969 def keys(self):
969 def keys(self):
970 return list(self.iterkeys())
970 return list(self.iterkeys())
971
971
972 def __iter__(self):
972 def __iter__(self):
973 return self.iterkeys()
973 return self.iterkeys()
974
974
975 def __contains__(self, f):
975 def __contains__(self, f):
976 if f is None:
976 if f is None:
977 return False
977 return False
978 self._load()
978 self._load()
979 dir, subpath = _splittopdir(f)
979 dir, subpath = _splittopdir(f)
980 if dir:
980 if dir:
981 self._loadlazy(dir)
981 self._loadlazy(dir)
982
982
983 if dir not in self._dirs:
983 if dir not in self._dirs:
984 return False
984 return False
985
985
986 return self._dirs[dir].__contains__(subpath)
986 return self._dirs[dir].__contains__(subpath)
987 else:
987 else:
988 return f in self._files
988 return f in self._files
989
989
990 def get(self, f, default=None):
990 def get(self, f, default=None):
991 self._load()
991 self._load()
992 dir, subpath = _splittopdir(f)
992 dir, subpath = _splittopdir(f)
993 if dir:
993 if dir:
994 self._loadlazy(dir)
994 self._loadlazy(dir)
995
995
996 if dir not in self._dirs:
996 if dir not in self._dirs:
997 return default
997 return default
998 return self._dirs[dir].get(subpath, default)
998 return self._dirs[dir].get(subpath, default)
999 else:
999 else:
1000 return self._files.get(f, default)
1000 return self._files.get(f, default)
1001
1001
1002 def __getitem__(self, f):
1002 def __getitem__(self, f):
1003 self._load()
1003 self._load()
1004 dir, subpath = _splittopdir(f)
1004 dir, subpath = _splittopdir(f)
1005 if dir:
1005 if dir:
1006 self._loadlazy(dir)
1006 self._loadlazy(dir)
1007
1007
1008 return self._dirs[dir].__getitem__(subpath)
1008 return self._dirs[dir].__getitem__(subpath)
1009 else:
1009 else:
1010 return self._files[f]
1010 return self._files[f]
1011
1011
1012 def flags(self, f):
1012 def flags(self, f):
1013 self._load()
1013 self._load()
1014 dir, subpath = _splittopdir(f)
1014 dir, subpath = _splittopdir(f)
1015 if dir:
1015 if dir:
1016 self._loadlazy(dir)
1016 self._loadlazy(dir)
1017
1017
1018 if dir not in self._dirs:
1018 if dir not in self._dirs:
1019 return b''
1019 return b''
1020 return self._dirs[dir].flags(subpath)
1020 return self._dirs[dir].flags(subpath)
1021 else:
1021 else:
1022 if f in self._lazydirs or f in self._dirs:
1022 if f in self._lazydirs or f in self._dirs:
1023 return b''
1023 return b''
1024 return self._flags.get(f, b'')
1024 return self._flags.get(f, b'')
1025
1025
1026 def find(self, f):
1026 def find(self, f):
1027 self._load()
1027 self._load()
1028 dir, subpath = _splittopdir(f)
1028 dir, subpath = _splittopdir(f)
1029 if dir:
1029 if dir:
1030 self._loadlazy(dir)
1030 self._loadlazy(dir)
1031
1031
1032 return self._dirs[dir].find(subpath)
1032 return self._dirs[dir].find(subpath)
1033 else:
1033 else:
1034 return self._files[f], self._flags.get(f, b'')
1034 return self._files[f], self._flags.get(f, b'')
1035
1035
1036 def __delitem__(self, f):
1036 def __delitem__(self, f):
1037 self._load()
1037 self._load()
1038 dir, subpath = _splittopdir(f)
1038 dir, subpath = _splittopdir(f)
1039 if dir:
1039 if dir:
1040 self._loadlazy(dir)
1040 self._loadlazy(dir)
1041
1041
1042 self._dirs[dir].__delitem__(subpath)
1042 self._dirs[dir].__delitem__(subpath)
1043 # If the directory is now empty, remove it
1043 # If the directory is now empty, remove it
1044 if self._dirs[dir]._isempty():
1044 if self._dirs[dir]._isempty():
1045 del self._dirs[dir]
1045 del self._dirs[dir]
1046 else:
1046 else:
1047 del self._files[f]
1047 del self._files[f]
1048 if f in self._flags:
1048 if f in self._flags:
1049 del self._flags[f]
1049 del self._flags[f]
1050 self._dirty = True
1050 self._dirty = True
1051
1051
1052 def __setitem__(self, f, n):
1052 def __setitem__(self, f, n):
1053 assert n is not None
1053 assert n is not None
1054 self._load()
1054 self._load()
1055 dir, subpath = _splittopdir(f)
1055 dir, subpath = _splittopdir(f)
1056 if dir:
1056 if dir:
1057 self._loadlazy(dir)
1057 self._loadlazy(dir)
1058 if dir not in self._dirs:
1058 if dir not in self._dirs:
1059 self._dirs[dir] = treemanifest(
1059 self._dirs[dir] = treemanifest(
1060 self.nodeconstants, self._subpath(dir)
1060 self.nodeconstants, self._subpath(dir)
1061 )
1061 )
1062 self._dirs[dir].__setitem__(subpath, n)
1062 self._dirs[dir].__setitem__(subpath, n)
1063 else:
1063 else:
1064 # manifest nodes are either 20 bytes or 32 bytes,
1064 # manifest nodes are either 20 bytes or 32 bytes,
1065 # depending on the hash in use. Assert this as historically
1065 # depending on the hash in use. Assert this as historically
1066 # sometimes extra bytes were added.
1066 # sometimes extra bytes were added.
1067 assert len(n) in (20, 32)
1067 assert len(n) in (20, 32)
1068 self._files[f] = n
1068 self._files[f] = n
1069 self._dirty = True
1069 self._dirty = True
1070
1070
1071 def _load(self):
1071 def _load(self):
1072 if self._loadfunc is not _noop:
1072 if self._loadfunc is not _noop:
1073 lf, self._loadfunc = self._loadfunc, _noop
1073 lf, self._loadfunc = self._loadfunc, _noop
1074 lf(self)
1074 lf(self)
1075 elif self._copyfunc is not _noop:
1075 elif self._copyfunc is not _noop:
1076 cf, self._copyfunc = self._copyfunc, _noop
1076 cf, self._copyfunc = self._copyfunc, _noop
1077 cf(self)
1077 cf(self)
1078
1078
1079 def setflag(self, f, flags):
1079 def setflag(self, f, flags):
1080 """Set the flags (symlink, executable) for path f."""
1080 """Set the flags (symlink, executable) for path f."""
1081 if flags not in _manifestflags:
1081 if flags not in _manifestflags:
1082 raise TypeError(b"Invalid manifest flag set.")
1082 raise TypeError(b"Invalid manifest flag set.")
1083 self._load()
1083 self._load()
1084 dir, subpath = _splittopdir(f)
1084 dir, subpath = _splittopdir(f)
1085 if dir:
1085 if dir:
1086 self._loadlazy(dir)
1086 self._loadlazy(dir)
1087 if dir not in self._dirs:
1087 if dir not in self._dirs:
1088 self._dirs[dir] = treemanifest(
1088 self._dirs[dir] = treemanifest(
1089 self.nodeconstants, self._subpath(dir)
1089 self.nodeconstants, self._subpath(dir)
1090 )
1090 )
1091 self._dirs[dir].setflag(subpath, flags)
1091 self._dirs[dir].setflag(subpath, flags)
1092 else:
1092 else:
1093 self._flags[f] = flags
1093 self._flags[f] = flags
1094 self._dirty = True
1094 self._dirty = True
1095
1095
1096 def copy(self):
1096 def copy(self):
1097 copy = treemanifest(self.nodeconstants, self._dir)
1097 copy = treemanifest(self.nodeconstants, self._dir)
1098 copy._node = self._node
1098 copy._node = self._node
1099 copy._dirty = self._dirty
1099 copy._dirty = self._dirty
1100 if self._copyfunc is _noop:
1100 if self._copyfunc is _noop:
1101
1101
1102 def _copyfunc(s):
1102 def _copyfunc(s):
1103 self._load()
1103 self._load()
1104 s._lazydirs = {
1104 s._lazydirs = {
1105 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1105 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1106 }
1106 }
1107 sdirs = s._dirs
1107 sdirs = s._dirs
1108 for d, v in self._dirs.items():
1108 for d, v in self._dirs.items():
1109 sdirs[d] = v.copy()
1109 sdirs[d] = v.copy()
1110 s._files = dict.copy(self._files)
1110 s._files = dict.copy(self._files)
1111 s._flags = dict.copy(self._flags)
1111 s._flags = dict.copy(self._flags)
1112
1112
1113 if self._loadfunc is _noop:
1113 if self._loadfunc is _noop:
1114 _copyfunc(copy)
1114 _copyfunc(copy)
1115 else:
1115 else:
1116 copy._copyfunc = _copyfunc
1116 copy._copyfunc = _copyfunc
1117 else:
1117 else:
1118 copy._copyfunc = self._copyfunc
1118 copy._copyfunc = self._copyfunc
1119 return copy
1119 return copy
1120
1120
1121 def filesnotin(self, m2, match=None):
1121 def filesnotin(self, m2, match=None):
1122 '''Set of files in this manifest that are not in the other'''
1122 '''Set of files in this manifest that are not in the other'''
1123 if match and not match.always():
1123 if match and not match.always():
1124 m1 = self._matches(match)
1124 m1 = self._matches(match)
1125 m2 = m2._matches(match)
1125 m2 = m2._matches(match)
1126 return m1.filesnotin(m2)
1126 return m1.filesnotin(m2)
1127
1127
1128 files = set()
1128 files = set()
1129
1129
1130 def _filesnotin(t1, t2):
1130 def _filesnotin(t1, t2):
1131 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1131 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1132 return
1132 return
1133 t1._load()
1133 t1._load()
1134 t2._load()
1134 t2._load()
1135 self._loaddifflazy(t1, t2)
1135 self._loaddifflazy(t1, t2)
1136 for d, m1 in t1._dirs.items():
1136 for d, m1 in t1._dirs.items():
1137 if d in t2._dirs:
1137 if d in t2._dirs:
1138 m2 = t2._dirs[d]
1138 m2 = t2._dirs[d]
1139 _filesnotin(m1, m2)
1139 _filesnotin(m1, m2)
1140 else:
1140 else:
1141 files.update(m1.iterkeys())
1141 files.update(m1.iterkeys())
1142
1142
1143 for fn in t1._files:
1143 for fn in t1._files:
1144 if fn not in t2._files:
1144 if fn not in t2._files:
1145 files.add(t1._subpath(fn))
1145 files.add(t1._subpath(fn))
1146
1146
1147 _filesnotin(self, m2)
1147 _filesnotin(self, m2)
1148 return files
1148 return files
1149
1149
1150 @propertycache
1150 @propertycache
1151 def _alldirs(self):
1151 def _alldirs(self):
1152 return pathutil.dirs(self)
1152 return pathutil.dirs(self)
1153
1153
1154 def dirs(self):
1154 def dirs(self):
1155 return self._alldirs
1155 return self._alldirs
1156
1156
1157 def hasdir(self, dir):
1157 def hasdir(self, dir):
1158 self._load()
1158 self._load()
1159 topdir, subdir = _splittopdir(dir)
1159 topdir, subdir = _splittopdir(dir)
1160 if topdir:
1160 if topdir:
1161 self._loadlazy(topdir)
1161 self._loadlazy(topdir)
1162 if topdir in self._dirs:
1162 if topdir in self._dirs:
1163 return self._dirs[topdir].hasdir(subdir)
1163 return self._dirs[topdir].hasdir(subdir)
1164 return False
1164 return False
1165 dirslash = dir + b'/'
1165 dirslash = dir + b'/'
1166 return dirslash in self._dirs or dirslash in self._lazydirs
1166 return dirslash in self._dirs or dirslash in self._lazydirs
1167
1167
1168 def walk(self, match):
1168 def walk(self, match):
1169 """Generates matching file names.
1169 """Generates matching file names.
1170
1170
1171 It also reports nonexistent files by marking them bad with match.bad().
1171 It also reports nonexistent files by marking them bad with match.bad().
1172 """
1172 """
1173 if match.always():
1173 if match.always():
1174 for f in iter(self):
1174 for f in iter(self):
1175 yield f
1175 yield f
1176 return
1176 return
1177
1177
1178 fset = set(match.files())
1178 fset = set(match.files())
1179
1179
1180 for fn in self._walk(match):
1180 for fn in self._walk(match):
1181 if fn in fset:
1181 if fn in fset:
1182 # specified pattern is the exact name
1182 # specified pattern is the exact name
1183 fset.remove(fn)
1183 fset.remove(fn)
1184 yield fn
1184 yield fn
1185
1185
1186 # for dirstate.walk, files=[''] means "walk the whole tree".
1186 # for dirstate.walk, files=[''] means "walk the whole tree".
1187 # follow that here, too
1187 # follow that here, too
1188 fset.discard(b'')
1188 fset.discard(b'')
1189
1189
1190 for fn in sorted(fset):
1190 for fn in sorted(fset):
1191 if not self.hasdir(fn):
1191 if not self.hasdir(fn):
1192 match.bad(fn, None)
1192 match.bad(fn, None)
1193
1193
1194 def _walk(self, match):
1194 def _walk(self, match):
1195 '''Recursively generates matching file names for walk().'''
1195 '''Recursively generates matching file names for walk().'''
1196 visit = match.visitchildrenset(self._dir[:-1])
1196 visit = match.visitchildrenset(self._dir[:-1])
1197 if not visit:
1197 if not visit:
1198 return
1198 return
1199
1199
1200 # yield this dir's files and walk its submanifests
1200 # yield this dir's files and walk its submanifests
1201 self._load()
1201 self._load()
1202 visit = self._loadchildrensetlazy(visit)
1202 visit = self._loadchildrensetlazy(visit)
1203 for p in sorted(list(self._dirs) + list(self._files)):
1203 for p in sorted(list(self._dirs) + list(self._files)):
1204 if p in self._files:
1204 if p in self._files:
1205 fullp = self._subpath(p)
1205 fullp = self._subpath(p)
1206 if match(fullp):
1206 if match(fullp):
1207 yield fullp
1207 yield fullp
1208 else:
1208 else:
1209 if not visit or p[:-1] in visit:
1209 if not visit or p[:-1] in visit:
1210 for f in self._dirs[p]._walk(match):
1210 for f in self._dirs[p]._walk(match):
1211 yield f
1211 yield f
1212
1212
1213 def _matches(self, match):
1213 def _matches(self, match):
1214 """recursively generate a new manifest filtered by the match argument."""
1214 """recursively generate a new manifest filtered by the match argument."""
1215 if match.always():
1215 if match.always():
1216 return self.copy()
1216 return self.copy()
1217 return self._matches_inner(match)
1217 return self._matches_inner(match)
1218
1218
1219 def _matches_inner(self, match):
1219 def _matches_inner(self, match):
1220 if match.always():
1220 if match.always():
1221 return self.copy()
1221 return self.copy()
1222
1222
1223 visit = match.visitchildrenset(self._dir[:-1])
1223 visit = match.visitchildrenset(self._dir[:-1])
1224 if visit == b'all':
1224 if visit == b'all':
1225 return self.copy()
1225 return self.copy()
1226 ret = treemanifest(self.nodeconstants, self._dir)
1226 ret = treemanifest(self.nodeconstants, self._dir)
1227 if not visit:
1227 if not visit:
1228 return ret
1228 return ret
1229
1229
1230 self._load()
1230 self._load()
1231 for fn in self._files:
1231 for fn in self._files:
1232 # While visitchildrenset *usually* lists only subdirs, this is
1232 # While visitchildrenset *usually* lists only subdirs, this is
1233 # actually up to the matcher and may have some files in the set().
1233 # actually up to the matcher and may have some files in the set().
1234 # If visit == 'this', we should obviously look at the files in this
1234 # If visit == 'this', we should obviously look at the files in this
1235 # directory; if visit is a set, and fn is in it, we should inspect
1235 # directory; if visit is a set, and fn is in it, we should inspect
1236 # fn (but no need to inspect things not in the set).
1236 # fn (but no need to inspect things not in the set).
1237 if visit != b'this' and fn not in visit:
1237 if visit != b'this' and fn not in visit:
1238 continue
1238 continue
1239 fullp = self._subpath(fn)
1239 fullp = self._subpath(fn)
1240 # visitchildrenset isn't perfect, we still need to call the regular
1240 # visitchildrenset isn't perfect, we still need to call the regular
1241 # matcher code to further filter results.
1241 # matcher code to further filter results.
1242 if not match(fullp):
1242 if not match(fullp):
1243 continue
1243 continue
1244 ret._files[fn] = self._files[fn]
1244 ret._files[fn] = self._files[fn]
1245 if fn in self._flags:
1245 if fn in self._flags:
1246 ret._flags[fn] = self._flags[fn]
1246 ret._flags[fn] = self._flags[fn]
1247
1247
1248 visit = self._loadchildrensetlazy(visit)
1248 visit = self._loadchildrensetlazy(visit)
1249 for dir, subm in self._dirs.items():
1249 for dir, subm in self._dirs.items():
1250 if visit and dir[:-1] not in visit:
1250 if visit and dir[:-1] not in visit:
1251 continue
1251 continue
1252 m = subm._matches_inner(match)
1252 m = subm._matches_inner(match)
1253 if not m._isempty():
1253 if not m._isempty():
1254 ret._dirs[dir] = m
1254 ret._dirs[dir] = m
1255
1255
1256 if not ret._isempty():
1256 if not ret._isempty():
1257 ret._dirty = True
1257 ret._dirty = True
1258 return ret
1258 return ret
1259
1259
1260 def fastdelta(self, base, changes):
1260 def fastdelta(self, base, changes):
1261 raise FastdeltaUnavailable()
1261 raise FastdeltaUnavailable()
1262
1262
1263 def diff(self, m2, match=None, clean=False):
1263 def diff(self, m2, match=None, clean=False):
1264 """Finds changes between the current manifest and m2.
1264 """Finds changes between the current manifest and m2.
1265
1265
1266 Args:
1266 Args:
1267 m2: the manifest to which this manifest should be compared.
1267 m2: the manifest to which this manifest should be compared.
1268 clean: if true, include files unchanged between these manifests
1268 clean: if true, include files unchanged between these manifests
1269 with a None value in the returned dictionary.
1269 with a None value in the returned dictionary.
1270
1270
1271 The result is returned as a dict with filename as key and
1271 The result is returned as a dict with filename as key and
1272 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1272 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1273 nodeid in the current/other manifest and fl1/fl2 is the flag
1273 nodeid in the current/other manifest and fl1/fl2 is the flag
1274 in the current/other manifest. Where the file does not exist,
1274 in the current/other manifest. Where the file does not exist,
1275 the nodeid will be None and the flags will be the empty
1275 the nodeid will be None and the flags will be the empty
1276 string.
1276 string.
1277 """
1277 """
1278 if match and not match.always():
1278 if match and not match.always():
1279 m1 = self._matches(match)
1279 m1 = self._matches(match)
1280 m2 = m2._matches(match)
1280 m2 = m2._matches(match)
1281 return m1.diff(m2, clean=clean)
1281 return m1.diff(m2, clean=clean)
1282 result = {}
1282 result = {}
1283 emptytree = treemanifest(self.nodeconstants)
1283 emptytree = treemanifest(self.nodeconstants)
1284
1284
1285 def _iterativediff(t1, t2, stack):
1285 def _iterativediff(t1, t2, stack):
1286 """compares two tree manifests and append new tree-manifests which
1286 """compares two tree manifests and append new tree-manifests which
1287 needs to be compared to stack"""
1287 needs to be compared to stack"""
1288 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1288 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1289 return
1289 return
1290 t1._load()
1290 t1._load()
1291 t2._load()
1291 t2._load()
1292 self._loaddifflazy(t1, t2)
1292 self._loaddifflazy(t1, t2)
1293
1293
1294 for d, m1 in t1._dirs.items():
1294 for d, m1 in t1._dirs.items():
1295 m2 = t2._dirs.get(d, emptytree)
1295 m2 = t2._dirs.get(d, emptytree)
1296 stack.append((m1, m2))
1296 stack.append((m1, m2))
1297
1297
1298 for d, m2 in t2._dirs.items():
1298 for d, m2 in t2._dirs.items():
1299 if d not in t1._dirs:
1299 if d not in t1._dirs:
1300 stack.append((emptytree, m2))
1300 stack.append((emptytree, m2))
1301
1301
1302 for fn, n1 in t1._files.items():
1302 for fn, n1 in t1._files.items():
1303 fl1 = t1._flags.get(fn, b'')
1303 fl1 = t1._flags.get(fn, b'')
1304 n2 = t2._files.get(fn, None)
1304 n2 = t2._files.get(fn, None)
1305 fl2 = t2._flags.get(fn, b'')
1305 fl2 = t2._flags.get(fn, b'')
1306 if n1 != n2 or fl1 != fl2:
1306 if n1 != n2 or fl1 != fl2:
1307 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1307 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1308 elif clean:
1308 elif clean:
1309 result[t1._subpath(fn)] = None
1309 result[t1._subpath(fn)] = None
1310
1310
1311 for fn, n2 in t2._files.items():
1311 for fn, n2 in t2._files.items():
1312 if fn not in t1._files:
1312 if fn not in t1._files:
1313 fl2 = t2._flags.get(fn, b'')
1313 fl2 = t2._flags.get(fn, b'')
1314 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1314 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1315
1315
1316 stackls = []
1316 stackls = []
1317 _iterativediff(self, m2, stackls)
1317 _iterativediff(self, m2, stackls)
1318 while stackls:
1318 while stackls:
1319 t1, t2 = stackls.pop()
1319 t1, t2 = stackls.pop()
1320 # stackls is populated in the function call
1320 # stackls is populated in the function call
1321 _iterativediff(t1, t2, stackls)
1321 _iterativediff(t1, t2, stackls)
1322 return result
1322 return result
1323
1323
1324 def unmodifiedsince(self, m2):
1324 def unmodifiedsince(self, m2):
1325 return not self._dirty and not m2._dirty and self._node == m2._node
1325 return not self._dirty and not m2._dirty and self._node == m2._node
1326
1326
1327 def parse(self, text, readsubtree):
1327 def parse(self, text, readsubtree):
1328 selflazy = self._lazydirs
1328 selflazy = self._lazydirs
1329 for f, n, fl in _parse(self._nodelen, text):
1329 for f, n, fl in _parse(self._nodelen, text):
1330 if fl == b't':
1330 if fl == b't':
1331 f = f + b'/'
1331 f = f + b'/'
1332 # False below means "doesn't need to be copied" and can use the
1332 # False below means "doesn't need to be copied" and can use the
1333 # cached value from readsubtree directly.
1333 # cached value from readsubtree directly.
1334 selflazy[f] = (n, readsubtree, False)
1334 selflazy[f] = (n, readsubtree, False)
1335 elif b'/' in f:
1335 elif b'/' in f:
1336 # This is a flat manifest, so use __setitem__ and setflag rather
1336 # This is a flat manifest, so use __setitem__ and setflag rather
1337 # than assigning directly to _files and _flags, so we can
1337 # than assigning directly to _files and _flags, so we can
1338 # assign a path in a subdirectory, and to mark dirty (compared
1338 # assign a path in a subdirectory, and to mark dirty (compared
1339 # to nullid).
1339 # to nullid).
1340 self[f] = n
1340 self[f] = n
1341 if fl:
1341 if fl:
1342 self.setflag(f, fl)
1342 self.setflag(f, fl)
1343 else:
1343 else:
1344 # Assigning to _files and _flags avoids marking as dirty,
1344 # Assigning to _files and _flags avoids marking as dirty,
1345 # and should be a little faster.
1345 # and should be a little faster.
1346 self._files[f] = n
1346 self._files[f] = n
1347 if fl:
1347 if fl:
1348 self._flags[f] = fl
1348 self._flags[f] = fl
1349
1349
1350 def text(self):
1350 def text(self):
1351 """Get the full data of this manifest as a bytestring."""
1351 """Get the full data of this manifest as a bytestring."""
1352 self._load()
1352 self._load()
1353 return _text(self.iterentries())
1353 return _text(self.iterentries())
1354
1354
1355 def dirtext(self):
1355 def dirtext(self):
1356 """Get the full data of this directory as a bytestring. Make sure that
1356 """Get the full data of this directory as a bytestring. Make sure that
1357 any submanifests have been written first, so their nodeids are correct.
1357 any submanifests have been written first, so their nodeids are correct.
1358 """
1358 """
1359 self._load()
1359 self._load()
1360 flags = self.flags
1360 flags = self.flags
1361 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1361 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1362 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1362 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1363 files = [(f, self._files[f], flags(f)) for f in self._files]
1363 files = [(f, self._files[f], flags(f)) for f in self._files]
1364 return _text(sorted(dirs + files + lazydirs))
1364 return _text(sorted(dirs + files + lazydirs))
1365
1365
1366 def read(self, gettext, readsubtree):
1366 def read(self, gettext, readsubtree):
1367 def _load_for_read(s):
1367 def _load_for_read(s):
1368 s.parse(gettext(), readsubtree)
1368 s.parse(gettext(), readsubtree)
1369 s._dirty = False
1369 s._dirty = False
1370
1370
1371 self._loadfunc = _load_for_read
1371 self._loadfunc = _load_for_read
1372
1372
1373 def writesubtrees(self, m1, m2, writesubtree, match):
1373 def writesubtrees(self, m1, m2, writesubtree, match):
1374 self._load() # for consistency; should never have any effect here
1374 self._load() # for consistency; should never have any effect here
1375 m1._load()
1375 m1._load()
1376 m2._load()
1376 m2._load()
1377 emptytree = treemanifest(self.nodeconstants)
1377 emptytree = treemanifest(self.nodeconstants)
1378
1378
1379 def getnode(m, d):
1379 def getnode(m, d):
1380 ld = m._lazydirs.get(d)
1380 ld = m._lazydirs.get(d)
1381 if ld:
1381 if ld:
1382 return ld[0]
1382 return ld[0]
1383 return m._dirs.get(d, emptytree)._node
1383 return m._dirs.get(d, emptytree)._node
1384
1384
1385 # let's skip investigating things that `match` says we do not need.
1385 # let's skip investigating things that `match` says we do not need.
1386 visit = match.visitchildrenset(self._dir[:-1])
1386 visit = match.visitchildrenset(self._dir[:-1])
1387 visit = self._loadchildrensetlazy(visit)
1387 visit = self._loadchildrensetlazy(visit)
1388 if visit == b'this' or visit == b'all':
1388 if visit == b'this' or visit == b'all':
1389 visit = None
1389 visit = None
1390 for d, subm in self._dirs.items():
1390 for d, subm in self._dirs.items():
1391 if visit and d[:-1] not in visit:
1391 if visit and d[:-1] not in visit:
1392 continue
1392 continue
1393 subp1 = getnode(m1, d)
1393 subp1 = getnode(m1, d)
1394 subp2 = getnode(m2, d)
1394 subp2 = getnode(m2, d)
1395 if subp1 == self.nodeconstants.nullid:
1395 if subp1 == self.nodeconstants.nullid:
1396 subp1, subp2 = subp2, subp1
1396 subp1, subp2 = subp2, subp1
1397 writesubtree(subm, subp1, subp2, match)
1397 writesubtree(subm, subp1, subp2, match)
1398
1398
1399 def walksubtrees(self, matcher=None):
1399 def walksubtrees(self, matcher=None):
1400 """Returns an iterator of the subtrees of this manifest, including this
1400 """Returns an iterator of the subtrees of this manifest, including this
1401 manifest itself.
1401 manifest itself.
1402
1402
1403 If `matcher` is provided, it only returns subtrees that match.
1403 If `matcher` is provided, it only returns subtrees that match.
1404 """
1404 """
1405 if matcher and not matcher.visitdir(self._dir[:-1]):
1405 if matcher and not matcher.visitdir(self._dir[:-1]):
1406 return
1406 return
1407 if not matcher or matcher(self._dir[:-1]):
1407 if not matcher or matcher(self._dir[:-1]):
1408 yield self
1408 yield self
1409
1409
1410 self._load()
1410 self._load()
1411 # OPT: use visitchildrenset to avoid loading everything.
1411 # OPT: use visitchildrenset to avoid loading everything.
1412 self._loadalllazy()
1412 self._loadalllazy()
1413 for d, subm in self._dirs.items():
1413 for d, subm in self._dirs.items():
1414 for subtree in subm.walksubtrees(matcher=matcher):
1414 for subtree in subm.walksubtrees(matcher=matcher):
1415 yield subtree
1415 yield subtree
1416
1416
1417
1417
1418 class manifestfulltextcache(util.lrucachedict):
1418 class manifestfulltextcache(util.lrucachedict):
1419 """File-backed LRU cache for the manifest cache
1419 """File-backed LRU cache for the manifest cache
1420
1420
1421 File consists of entries, up to EOF:
1421 File consists of entries, up to EOF:
1422
1422
1423 - 20 bytes node, 4 bytes length, <length> manifest data
1423 - 20 bytes node, 4 bytes length, <length> manifest data
1424
1424
1425 These are written in reverse cache order (oldest to newest).
1425 These are written in reverse cache order (oldest to newest).
1426
1426
1427 """
1427 """
1428
1428
1429 _file = b'manifestfulltextcache'
1429 _file = b'manifestfulltextcache'
1430
1430
1431 def __init__(self, max):
1431 def __init__(self, max):
1432 super(manifestfulltextcache, self).__init__(max)
1432 super(manifestfulltextcache, self).__init__(max)
1433 self._dirty = False
1433 self._dirty = False
1434 self._read = False
1434 self._read = False
1435 self._opener = None
1435 self._opener = None
1436
1436
1437 def read(self):
1437 def read(self):
1438 if self._read or self._opener is None:
1438 if self._read or self._opener is None:
1439 return
1439 return
1440
1440
1441 try:
1441 try:
1442 with self._opener(self._file) as fp:
1442 with self._opener(self._file) as fp:
1443 set = super(manifestfulltextcache, self).__setitem__
1443 set = super(manifestfulltextcache, self).__setitem__
1444 # ignore trailing data, this is a cache, corruption is skipped
1444 # ignore trailing data, this is a cache, corruption is skipped
1445 while True:
1445 while True:
1446 # TODO do we need to do work here for sha1 portability?
1446 # TODO do we need to do work here for sha1 portability?
1447 node = fp.read(20)
1447 node = fp.read(20)
1448 if len(node) < 20:
1448 if len(node) < 20:
1449 break
1449 break
1450 try:
1450 try:
1451 size = struct.unpack(b'>L', fp.read(4))[0]
1451 size = struct.unpack(b'>L', fp.read(4))[0]
1452 except struct.error:
1452 except struct.error:
1453 break
1453 break
1454 value = bytearray(fp.read(size))
1454 value = bytearray(fp.read(size))
1455 if len(value) != size:
1455 if len(value) != size:
1456 break
1456 break
1457 set(node, value)
1457 set(node, value)
1458 except IOError:
1458 except IOError:
1459 # the file is allowed to be missing
1459 # the file is allowed to be missing
1460 pass
1460 pass
1461
1461
1462 self._read = True
1462 self._read = True
1463 self._dirty = False
1463 self._dirty = False
1464
1464
1465 def write(self):
1465 def write(self):
1466 if not self._dirty or self._opener is None:
1466 if not self._dirty or self._opener is None:
1467 return
1467 return
1468 # rotate backwards to the first used node
1468 # rotate backwards to the first used node
1469 try:
1469 try:
1470 with self._opener(
1470 with self._opener(
1471 self._file, b'w', atomictemp=True, checkambig=True
1471 self._file, b'w', atomictemp=True, checkambig=True
1472 ) as fp:
1472 ) as fp:
1473 node = self._head.prev
1473 node = self._head.prev
1474 while True:
1474 while True:
1475 if node.key in self._cache:
1475 if node.key in self._cache:
1476 fp.write(node.key)
1476 fp.write(node.key)
1477 fp.write(struct.pack(b'>L', len(node.value)))
1477 fp.write(struct.pack(b'>L', len(node.value)))
1478 fp.write(node.value)
1478 fp.write(node.value)
1479 if node is self._head:
1479 if node is self._head:
1480 break
1480 break
1481 node = node.prev
1481 node = node.prev
1482 except IOError:
1482 except IOError:
1483 # We could not write the cache (eg: permission error)
1483 # We could not write the cache (eg: permission error)
1484 # the content can be missing.
1484 # the content can be missing.
1485 #
1485 #
1486 # We could try harder and see if we could recreate a wcache
1486 # We could try harder and see if we could recreate a wcache
1487 # directory were we coudl write too.
1487 # directory were we coudl write too.
1488 #
1488 #
1489 # XXX the error pass silently, having some way to issue an error
1489 # XXX the error pass silently, having some way to issue an error
1490 # log `ui.log` would be nice.
1490 # log `ui.log` would be nice.
1491 pass
1491 pass
1492
1492
1493 def __len__(self):
1493 def __len__(self):
1494 if not self._read:
1494 if not self._read:
1495 self.read()
1495 self.read()
1496 return super(manifestfulltextcache, self).__len__()
1496 return super(manifestfulltextcache, self).__len__()
1497
1497
1498 def __contains__(self, k):
1498 def __contains__(self, k):
1499 if not self._read:
1499 if not self._read:
1500 self.read()
1500 self.read()
1501 return super(manifestfulltextcache, self).__contains__(k)
1501 return super(manifestfulltextcache, self).__contains__(k)
1502
1502
1503 def __iter__(self):
1503 def __iter__(self):
1504 if not self._read:
1504 if not self._read:
1505 self.read()
1505 self.read()
1506 return super(manifestfulltextcache, self).__iter__()
1506 return super(manifestfulltextcache, self).__iter__()
1507
1507
1508 def __getitem__(self, k):
1508 def __getitem__(self, k):
1509 if not self._read:
1509 if not self._read:
1510 self.read()
1510 self.read()
1511 # the cache lru order can change on read
1511 # the cache lru order can change on read
1512 setdirty = self._cache.get(k) is not self._head
1512 setdirty = self._cache.get(k) is not self._head
1513 value = super(manifestfulltextcache, self).__getitem__(k)
1513 value = super(manifestfulltextcache, self).__getitem__(k)
1514 if setdirty:
1514 if setdirty:
1515 self._dirty = True
1515 self._dirty = True
1516 return value
1516 return value
1517
1517
1518 def __setitem__(self, k, v):
1518 def __setitem__(self, k, v):
1519 if not self._read:
1519 if not self._read:
1520 self.read()
1520 self.read()
1521 super(manifestfulltextcache, self).__setitem__(k, v)
1521 super(manifestfulltextcache, self).__setitem__(k, v)
1522 self._dirty = True
1522 self._dirty = True
1523
1523
1524 def __delitem__(self, k):
1524 def __delitem__(self, k):
1525 if not self._read:
1525 if not self._read:
1526 self.read()
1526 self.read()
1527 super(manifestfulltextcache, self).__delitem__(k)
1527 super(manifestfulltextcache, self).__delitem__(k)
1528 self._dirty = True
1528 self._dirty = True
1529
1529
1530 def get(self, k, default=None):
1530 def get(self, k, default=None):
1531 if not self._read:
1531 if not self._read:
1532 self.read()
1532 self.read()
1533 return super(manifestfulltextcache, self).get(k, default=default)
1533 return super(manifestfulltextcache, self).get(k, default=default)
1534
1534
1535 def clear(self, clear_persisted_data=False):
1535 def clear(self, clear_persisted_data=False):
1536 super(manifestfulltextcache, self).clear()
1536 super(manifestfulltextcache, self).clear()
1537 if clear_persisted_data:
1537 if clear_persisted_data:
1538 self._dirty = True
1538 self._dirty = True
1539 self.write()
1539 self.write()
1540 self._read = False
1540 self._read = False
1541
1541
1542
1542
1543 # and upper bound of what we expect from compression
1543 # and upper bound of what we expect from compression
1544 # (real live value seems to be "3")
1544 # (real live value seems to be "3")
1545 MAXCOMPRESSION = 3
1545 MAXCOMPRESSION = 3
1546
1546
1547
1547
1548 class FastdeltaUnavailable(Exception):
1548 class FastdeltaUnavailable(Exception):
1549 """Exception raised when fastdelta isn't usable on a manifest."""
1549 """Exception raised when fastdelta isn't usable on a manifest."""
1550
1550
1551
1551
1552 @interfaceutil.implementer(repository.imanifeststorage)
1552 @interfaceutil.implementer(repository.imanifeststorage)
1553 class manifestrevlog:
1553 class manifestrevlog:
1554 """A revlog that stores manifest texts. This is responsible for caching the
1554 """A revlog that stores manifest texts. This is responsible for caching the
1555 full-text manifest contents.
1555 full-text manifest contents.
1556 """
1556 """
1557
1557
1558 def __init__(
1558 def __init__(
1559 self,
1559 self,
1560 nodeconstants,
1560 nodeconstants,
1561 opener,
1561 opener,
1562 tree=b'',
1562 tree=b'',
1563 dirlogcache=None,
1563 dirlogcache=None,
1564 treemanifest=False,
1564 treemanifest=False,
1565 ):
1565 ):
1566 """Constructs a new manifest revlog
1566 """Constructs a new manifest revlog
1567
1567
1568 `indexfile` - used by extensions to have two manifests at once, like
1568 `indexfile` - used by extensions to have two manifests at once, like
1569 when transitioning between flatmanifeset and treemanifests.
1569 when transitioning between flatmanifeset and treemanifests.
1570
1570
1571 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1571 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1572 options can also be used to make this a tree manifest revlog. The opener
1572 options can also be used to make this a tree manifest revlog. The opener
1573 option takes precedence, so if it is set to True, we ignore whatever
1573 option takes precedence, so if it is set to True, we ignore whatever
1574 value is passed in to the constructor.
1574 value is passed in to the constructor.
1575 """
1575 """
1576 self.nodeconstants = nodeconstants
1576 self.nodeconstants = nodeconstants
1577 # During normal operations, we expect to deal with not more than four
1577 # During normal operations, we expect to deal with not more than four
1578 # revs at a time (such as during commit --amend). When rebasing large
1578 # revs at a time (such as during commit --amend). When rebasing large
1579 # stacks of commits, the number can go up, hence the config knob below.
1579 # stacks of commits, the number can go up, hence the config knob below.
1580 cachesize = 4
1580 cachesize = 4
1581 optiontreemanifest = False
1581 optiontreemanifest = False
1582 opts = getattr(opener, 'options', None)
1582 opts = getattr(opener, 'options', None)
1583 if opts is not None:
1583 if opts is not None:
1584 cachesize = opts.get(b'manifestcachesize', cachesize)
1584 cachesize = opts.get(b'manifestcachesize', cachesize)
1585 optiontreemanifest = opts.get(b'treemanifest', False)
1585 optiontreemanifest = opts.get(b'treemanifest', False)
1586
1586
1587 self._treeondisk = optiontreemanifest or treemanifest
1587 self._treeondisk = optiontreemanifest or treemanifest
1588
1588
1589 self._fulltextcache = manifestfulltextcache(cachesize)
1589 self._fulltextcache = manifestfulltextcache(cachesize)
1590
1590
1591 if tree:
1591 if tree:
1592 assert self._treeondisk, (tree, b'opts is %r' % opts)
1592 assert self._treeondisk, (tree, b'opts is %r' % opts)
1593
1593
1594 radix = b'00manifest'
1594 radix = b'00manifest'
1595 if tree:
1595 if tree:
1596 radix = b"meta/" + tree + radix
1596 radix = b"meta/" + tree + radix
1597
1597
1598 self.tree = tree
1598 self.tree = tree
1599
1599
1600 # The dirlogcache is kept on the root manifest log
1600 # The dirlogcache is kept on the root manifest log
1601 if tree:
1601 if tree:
1602 self._dirlogcache = dirlogcache
1602 self._dirlogcache = dirlogcache
1603 else:
1603 else:
1604 self._dirlogcache = {b'': self}
1604 self._dirlogcache = {b'': self}
1605
1605
1606 self._revlog = revlog.revlog(
1606 self._revlog = revlog.revlog(
1607 opener,
1607 opener,
1608 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1608 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1609 radix=radix,
1609 radix=radix,
1610 # only root indexfile is cached
1610 # only root indexfile is cached
1611 checkambig=not bool(tree),
1611 checkambig=not bool(tree),
1612 mmaplargeindex=True,
1612 mmaplargeindex=True,
1613 upperboundcomp=MAXCOMPRESSION,
1613 upperboundcomp=MAXCOMPRESSION,
1614 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1614 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1615 )
1615 )
1616
1616
1617 self.index = self._revlog.index
1617 self.index = self._revlog.index
1618 self._generaldelta = self._revlog._generaldelta
1618 self._generaldelta = self._revlog._generaldelta
1619
1619
1620 def get_revlog(self):
1621 """return an actual revlog instance if any
1622
1623 This exist because a lot of code leverage the fact the underlying
1624 storage is a revlog for optimization, so giving simple way to access
1625 the revlog instance helps such code.
1626 """
1627 return self._revlog
1628
1620 def _setupmanifestcachehooks(self, repo):
1629 def _setupmanifestcachehooks(self, repo):
1621 """Persist the manifestfulltextcache on lock release"""
1630 """Persist the manifestfulltextcache on lock release"""
1622 if not util.safehasattr(repo, '_wlockref'):
1631 if not util.safehasattr(repo, '_wlockref'):
1623 return
1632 return
1624
1633
1625 self._fulltextcache._opener = repo.wcachevfs
1634 self._fulltextcache._opener = repo.wcachevfs
1626 if repo._currentlock(repo._wlockref) is None:
1635 if repo._currentlock(repo._wlockref) is None:
1627 return
1636 return
1628
1637
1629 reporef = weakref.ref(repo)
1638 reporef = weakref.ref(repo)
1630 manifestrevlogref = weakref.ref(self)
1639 manifestrevlogref = weakref.ref(self)
1631
1640
1632 def persistmanifestcache(success):
1641 def persistmanifestcache(success):
1633 # Repo is in an unknown state, do not persist.
1642 # Repo is in an unknown state, do not persist.
1634 if not success:
1643 if not success:
1635 return
1644 return
1636
1645
1637 repo = reporef()
1646 repo = reporef()
1638 self = manifestrevlogref()
1647 self = manifestrevlogref()
1639 if repo is None or self is None:
1648 if repo is None or self is None:
1640 return
1649 return
1641 if repo.manifestlog.getstorage(b'') is not self:
1650 if repo.manifestlog.getstorage(b'') is not self:
1642 # there's a different manifest in play now, abort
1651 # there's a different manifest in play now, abort
1643 return
1652 return
1644 self._fulltextcache.write()
1653 self._fulltextcache.write()
1645
1654
1646 repo._afterlock(persistmanifestcache)
1655 repo._afterlock(persistmanifestcache)
1647
1656
1648 @property
1657 @property
1649 def fulltextcache(self):
1658 def fulltextcache(self):
1650 return self._fulltextcache
1659 return self._fulltextcache
1651
1660
1652 def clearcaches(self, clear_persisted_data=False):
1661 def clearcaches(self, clear_persisted_data=False):
1653 self._revlog.clearcaches()
1662 self._revlog.clearcaches()
1654 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1663 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1655 self._dirlogcache = {self.tree: self}
1664 self._dirlogcache = {self.tree: self}
1656
1665
1657 def dirlog(self, d):
1666 def dirlog(self, d):
1658 if d:
1667 if d:
1659 assert self._treeondisk
1668 assert self._treeondisk
1660 if d not in self._dirlogcache:
1669 if d not in self._dirlogcache:
1661 mfrevlog = manifestrevlog(
1670 mfrevlog = manifestrevlog(
1662 self.nodeconstants,
1671 self.nodeconstants,
1663 self.opener,
1672 self.opener,
1664 d,
1673 d,
1665 self._dirlogcache,
1674 self._dirlogcache,
1666 treemanifest=self._treeondisk,
1675 treemanifest=self._treeondisk,
1667 )
1676 )
1668 self._dirlogcache[d] = mfrevlog
1677 self._dirlogcache[d] = mfrevlog
1669 return self._dirlogcache[d]
1678 return self._dirlogcache[d]
1670
1679
1671 def add(
1680 def add(
1672 self,
1681 self,
1673 m,
1682 m,
1674 transaction,
1683 transaction,
1675 link,
1684 link,
1676 p1,
1685 p1,
1677 p2,
1686 p2,
1678 added,
1687 added,
1679 removed,
1688 removed,
1680 readtree=None,
1689 readtree=None,
1681 match=None,
1690 match=None,
1682 ):
1691 ):
1683 """add some manifest entry in to the manifest log
1692 """add some manifest entry in to the manifest log
1684
1693
1685 input:
1694 input:
1686
1695
1687 m: the manifest dict we want to store
1696 m: the manifest dict we want to store
1688 transaction: the open transaction
1697 transaction: the open transaction
1689 p1: manifest-node of p1
1698 p1: manifest-node of p1
1690 p2: manifest-node of p2
1699 p2: manifest-node of p2
1691 added: file added/changed compared to parent
1700 added: file added/changed compared to parent
1692 removed: file removed compared to parent
1701 removed: file removed compared to parent
1693
1702
1694 tree manifest input:
1703 tree manifest input:
1695
1704
1696 readtree: a function to read a subtree
1705 readtree: a function to read a subtree
1697 match: a filematcher for the subpart of the tree manifest
1706 match: a filematcher for the subpart of the tree manifest
1698 """
1707 """
1699 try:
1708 try:
1700 if p1 not in self.fulltextcache:
1709 if p1 not in self.fulltextcache:
1701 raise FastdeltaUnavailable()
1710 raise FastdeltaUnavailable()
1702 # If our first parent is in the manifest cache, we can
1711 # If our first parent is in the manifest cache, we can
1703 # compute a delta here using properties we know about the
1712 # compute a delta here using properties we know about the
1704 # manifest up-front, which may save time later for the
1713 # manifest up-front, which may save time later for the
1705 # revlog layer.
1714 # revlog layer.
1706
1715
1707 _checkforbidden(added)
1716 _checkforbidden(added)
1708 # combine the changed lists into one sorted iterator
1717 # combine the changed lists into one sorted iterator
1709 work = heapq.merge(
1718 work = heapq.merge(
1710 [(x, False) for x in sorted(added)],
1719 [(x, False) for x in sorted(added)],
1711 [(x, True) for x in sorted(removed)],
1720 [(x, True) for x in sorted(removed)],
1712 )
1721 )
1713
1722
1714 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1723 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1715 cachedelta = self._revlog.rev(p1), deltatext
1724 cachedelta = self._revlog.rev(p1), deltatext
1716 text = util.buffer(arraytext)
1725 text = util.buffer(arraytext)
1717 rev = self._revlog.addrevision(
1726 rev = self._revlog.addrevision(
1718 text, transaction, link, p1, p2, cachedelta
1727 text, transaction, link, p1, p2, cachedelta
1719 )
1728 )
1720 n = self._revlog.node(rev)
1729 n = self._revlog.node(rev)
1721 except FastdeltaUnavailable:
1730 except FastdeltaUnavailable:
1722 # The first parent manifest isn't already loaded or the
1731 # The first parent manifest isn't already loaded or the
1723 # manifest implementation doesn't support fastdelta, so
1732 # manifest implementation doesn't support fastdelta, so
1724 # we'll just encode a fulltext of the manifest and pass
1733 # we'll just encode a fulltext of the manifest and pass
1725 # that through to the revlog layer, and let it handle the
1734 # that through to the revlog layer, and let it handle the
1726 # delta process.
1735 # delta process.
1727 if self._treeondisk:
1736 if self._treeondisk:
1728 assert readtree, b"readtree must be set for treemanifest writes"
1737 assert readtree, b"readtree must be set for treemanifest writes"
1729 assert match, b"match must be specified for treemanifest writes"
1738 assert match, b"match must be specified for treemanifest writes"
1730 m1 = readtree(self.tree, p1)
1739 m1 = readtree(self.tree, p1)
1731 m2 = readtree(self.tree, p2)
1740 m2 = readtree(self.tree, p2)
1732 n = self._addtree(
1741 n = self._addtree(
1733 m, transaction, link, m1, m2, readtree, match=match
1742 m, transaction, link, m1, m2, readtree, match=match
1734 )
1743 )
1735 arraytext = None
1744 arraytext = None
1736 else:
1745 else:
1737 text = m.text()
1746 text = m.text()
1738 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1747 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1739 n = self._revlog.node(rev)
1748 n = self._revlog.node(rev)
1740 arraytext = bytearray(text)
1749 arraytext = bytearray(text)
1741
1750
1742 if arraytext is not None:
1751 if arraytext is not None:
1743 self.fulltextcache[n] = arraytext
1752 self.fulltextcache[n] = arraytext
1744
1753
1745 return n
1754 return n
1746
1755
1747 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1756 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1748 # If the manifest is unchanged compared to one parent,
1757 # If the manifest is unchanged compared to one parent,
1749 # don't write a new revision
1758 # don't write a new revision
1750 if self.tree != b'' and (
1759 if self.tree != b'' and (
1751 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1760 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1752 ):
1761 ):
1753 return m.node()
1762 return m.node()
1754
1763
1755 def writesubtree(subm, subp1, subp2, match):
1764 def writesubtree(subm, subp1, subp2, match):
1756 sublog = self.dirlog(subm.dir())
1765 sublog = self.dirlog(subm.dir())
1757 sublog.add(
1766 sublog.add(
1758 subm,
1767 subm,
1759 transaction,
1768 transaction,
1760 link,
1769 link,
1761 subp1,
1770 subp1,
1762 subp2,
1771 subp2,
1763 None,
1772 None,
1764 None,
1773 None,
1765 readtree=readtree,
1774 readtree=readtree,
1766 match=match,
1775 match=match,
1767 )
1776 )
1768
1777
1769 m.writesubtrees(m1, m2, writesubtree, match)
1778 m.writesubtrees(m1, m2, writesubtree, match)
1770 text = m.dirtext()
1779 text = m.dirtext()
1771 n = None
1780 n = None
1772 if self.tree != b'':
1781 if self.tree != b'':
1773 # Double-check whether contents are unchanged to one parent
1782 # Double-check whether contents are unchanged to one parent
1774 if text == m1.dirtext():
1783 if text == m1.dirtext():
1775 n = m1.node()
1784 n = m1.node()
1776 elif text == m2.dirtext():
1785 elif text == m2.dirtext():
1777 n = m2.node()
1786 n = m2.node()
1778
1787
1779 if not n:
1788 if not n:
1780 rev = self._revlog.addrevision(
1789 rev = self._revlog.addrevision(
1781 text, transaction, link, m1.node(), m2.node()
1790 text, transaction, link, m1.node(), m2.node()
1782 )
1791 )
1783 n = self._revlog.node(rev)
1792 n = self._revlog.node(rev)
1784
1793
1785 # Save nodeid so parent manifest can calculate its nodeid
1794 # Save nodeid so parent manifest can calculate its nodeid
1786 m.setnode(n)
1795 m.setnode(n)
1787 return n
1796 return n
1788
1797
1789 def __len__(self):
1798 def __len__(self):
1790 return len(self._revlog)
1799 return len(self._revlog)
1791
1800
1792 def __iter__(self):
1801 def __iter__(self):
1793 return self._revlog.__iter__()
1802 return self._revlog.__iter__()
1794
1803
1795 def rev(self, node):
1804 def rev(self, node):
1796 return self._revlog.rev(node)
1805 return self._revlog.rev(node)
1797
1806
1798 def node(self, rev):
1807 def node(self, rev):
1799 return self._revlog.node(rev)
1808 return self._revlog.node(rev)
1800
1809
1801 def lookup(self, value):
1810 def lookup(self, value):
1802 return self._revlog.lookup(value)
1811 return self._revlog.lookup(value)
1803
1812
1804 def parentrevs(self, rev):
1813 def parentrevs(self, rev):
1805 return self._revlog.parentrevs(rev)
1814 return self._revlog.parentrevs(rev)
1806
1815
1807 def parents(self, node):
1816 def parents(self, node):
1808 return self._revlog.parents(node)
1817 return self._revlog.parents(node)
1809
1818
1810 def linkrev(self, rev):
1819 def linkrev(self, rev):
1811 return self._revlog.linkrev(rev)
1820 return self._revlog.linkrev(rev)
1812
1821
1813 def checksize(self):
1822 def checksize(self):
1814 return self._revlog.checksize()
1823 return self._revlog.checksize()
1815
1824
1816 def revision(self, node, _df=None):
1825 def revision(self, node, _df=None):
1817 return self._revlog.revision(node, _df=_df)
1826 return self._revlog.revision(node, _df=_df)
1818
1827
1819 def rawdata(self, node, _df=None):
1828 def rawdata(self, node, _df=None):
1820 return self._revlog.rawdata(node, _df=_df)
1829 return self._revlog.rawdata(node, _df=_df)
1821
1830
1822 def revdiff(self, rev1, rev2):
1831 def revdiff(self, rev1, rev2):
1823 return self._revlog.revdiff(rev1, rev2)
1832 return self._revlog.revdiff(rev1, rev2)
1824
1833
1825 def cmp(self, node, text):
1834 def cmp(self, node, text):
1826 return self._revlog.cmp(node, text)
1835 return self._revlog.cmp(node, text)
1827
1836
1828 def deltaparent(self, rev):
1837 def deltaparent(self, rev):
1829 return self._revlog.deltaparent(rev)
1838 return self._revlog.deltaparent(rev)
1830
1839
1831 def emitrevisions(
1840 def emitrevisions(
1832 self,
1841 self,
1833 nodes,
1842 nodes,
1834 nodesorder=None,
1843 nodesorder=None,
1835 revisiondata=False,
1844 revisiondata=False,
1836 assumehaveparentrevisions=False,
1845 assumehaveparentrevisions=False,
1837 deltamode=repository.CG_DELTAMODE_STD,
1846 deltamode=repository.CG_DELTAMODE_STD,
1838 sidedata_helpers=None,
1847 sidedata_helpers=None,
1839 debug_info=None,
1848 debug_info=None,
1840 ):
1849 ):
1841 return self._revlog.emitrevisions(
1850 return self._revlog.emitrevisions(
1842 nodes,
1851 nodes,
1843 nodesorder=nodesorder,
1852 nodesorder=nodesorder,
1844 revisiondata=revisiondata,
1853 revisiondata=revisiondata,
1845 assumehaveparentrevisions=assumehaveparentrevisions,
1854 assumehaveparentrevisions=assumehaveparentrevisions,
1846 deltamode=deltamode,
1855 deltamode=deltamode,
1847 sidedata_helpers=sidedata_helpers,
1856 sidedata_helpers=sidedata_helpers,
1848 debug_info=debug_info,
1857 debug_info=debug_info,
1849 )
1858 )
1850
1859
1851 def addgroup(
1860 def addgroup(
1852 self,
1861 self,
1853 deltas,
1862 deltas,
1854 linkmapper,
1863 linkmapper,
1855 transaction,
1864 transaction,
1856 alwayscache=False,
1865 alwayscache=False,
1857 addrevisioncb=None,
1866 addrevisioncb=None,
1858 duplicaterevisioncb=None,
1867 duplicaterevisioncb=None,
1859 debug_info=None,
1868 debug_info=None,
1860 delta_base_reuse_policy=None,
1869 delta_base_reuse_policy=None,
1861 ):
1870 ):
1862 return self._revlog.addgroup(
1871 return self._revlog.addgroup(
1863 deltas,
1872 deltas,
1864 linkmapper,
1873 linkmapper,
1865 transaction,
1874 transaction,
1866 alwayscache=alwayscache,
1875 alwayscache=alwayscache,
1867 addrevisioncb=addrevisioncb,
1876 addrevisioncb=addrevisioncb,
1868 duplicaterevisioncb=duplicaterevisioncb,
1877 duplicaterevisioncb=duplicaterevisioncb,
1869 debug_info=debug_info,
1878 debug_info=debug_info,
1870 delta_base_reuse_policy=delta_base_reuse_policy,
1879 delta_base_reuse_policy=delta_base_reuse_policy,
1871 )
1880 )
1872
1881
1873 def rawsize(self, rev):
1882 def rawsize(self, rev):
1874 return self._revlog.rawsize(rev)
1883 return self._revlog.rawsize(rev)
1875
1884
1876 def getstrippoint(self, minlink):
1885 def getstrippoint(self, minlink):
1877 return self._revlog.getstrippoint(minlink)
1886 return self._revlog.getstrippoint(minlink)
1878
1887
1879 def strip(self, minlink, transaction):
1888 def strip(self, minlink, transaction):
1880 return self._revlog.strip(minlink, transaction)
1889 return self._revlog.strip(minlink, transaction)
1881
1890
1882 def files(self):
1891 def files(self):
1883 return self._revlog.files()
1892 return self._revlog.files()
1884
1893
1885 def clone(self, tr, destrevlog, **kwargs):
1894 def clone(self, tr, destrevlog, **kwargs):
1886 if not isinstance(destrevlog, manifestrevlog):
1895 if not isinstance(destrevlog, manifestrevlog):
1887 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1896 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1888
1897
1889 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1898 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1890
1899
1891 def storageinfo(
1900 def storageinfo(
1892 self,
1901 self,
1893 exclusivefiles=False,
1902 exclusivefiles=False,
1894 sharedfiles=False,
1903 sharedfiles=False,
1895 revisionscount=False,
1904 revisionscount=False,
1896 trackedsize=False,
1905 trackedsize=False,
1897 storedsize=False,
1906 storedsize=False,
1898 ):
1907 ):
1899 return self._revlog.storageinfo(
1908 return self._revlog.storageinfo(
1900 exclusivefiles=exclusivefiles,
1909 exclusivefiles=exclusivefiles,
1901 sharedfiles=sharedfiles,
1910 sharedfiles=sharedfiles,
1902 revisionscount=revisionscount,
1911 revisionscount=revisionscount,
1903 trackedsize=trackedsize,
1912 trackedsize=trackedsize,
1904 storedsize=storedsize,
1913 storedsize=storedsize,
1905 )
1914 )
1906
1915
1907 @property
1916 @property
1908 def opener(self):
1917 def opener(self):
1909 return self._revlog.opener
1918 return self._revlog.opener
1910
1919
1911 @opener.setter
1920 @opener.setter
1912 def opener(self, value):
1921 def opener(self, value):
1913 self._revlog.opener = value
1922 self._revlog.opener = value
1914
1923
1915
1924
1916 @interfaceutil.implementer(repository.imanifestlog)
1925 @interfaceutil.implementer(repository.imanifestlog)
1917 class manifestlog:
1926 class manifestlog:
1918 """A collection class representing the collection of manifest snapshots
1927 """A collection class representing the collection of manifest snapshots
1919 referenced by commits in the repository.
1928 referenced by commits in the repository.
1920
1929
1921 In this situation, 'manifest' refers to the abstract concept of a snapshot
1930 In this situation, 'manifest' refers to the abstract concept of a snapshot
1922 of the list of files in the given commit. Consumers of the output of this
1931 of the list of files in the given commit. Consumers of the output of this
1923 class do not care about the implementation details of the actual manifests
1932 class do not care about the implementation details of the actual manifests
1924 they receive (i.e. tree or flat or lazily loaded, etc)."""
1933 they receive (i.e. tree or flat or lazily loaded, etc)."""
1925
1934
1926 def __init__(self, opener, repo, rootstore, narrowmatch):
1935 def __init__(self, opener, repo, rootstore, narrowmatch):
1927 self.nodeconstants = repo.nodeconstants
1936 self.nodeconstants = repo.nodeconstants
1928 usetreemanifest = False
1937 usetreemanifest = False
1929 cachesize = 4
1938 cachesize = 4
1930
1939
1931 opts = getattr(opener, 'options', None)
1940 opts = getattr(opener, 'options', None)
1932 if opts is not None:
1941 if opts is not None:
1933 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1942 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1934 cachesize = opts.get(b'manifestcachesize', cachesize)
1943 cachesize = opts.get(b'manifestcachesize', cachesize)
1935
1944
1936 self._treemanifests = usetreemanifest
1945 self._treemanifests = usetreemanifest
1937
1946
1938 self._rootstore = rootstore
1947 self._rootstore = rootstore
1939 self._rootstore._setupmanifestcachehooks(repo)
1948 self._rootstore._setupmanifestcachehooks(repo)
1940 self._narrowmatch = narrowmatch
1949 self._narrowmatch = narrowmatch
1941
1950
1942 # A cache of the manifestctx or treemanifestctx for each directory
1951 # A cache of the manifestctx or treemanifestctx for each directory
1943 self._dirmancache = {}
1952 self._dirmancache = {}
1944 self._dirmancache[b''] = util.lrucachedict(cachesize)
1953 self._dirmancache[b''] = util.lrucachedict(cachesize)
1945
1954
1946 self._cachesize = cachesize
1955 self._cachesize = cachesize
1947
1956
1948 def __getitem__(self, node):
1957 def __getitem__(self, node):
1949 """Retrieves the manifest instance for the given node. Throws a
1958 """Retrieves the manifest instance for the given node. Throws a
1950 LookupError if not found.
1959 LookupError if not found.
1951 """
1960 """
1952 return self.get(b'', node)
1961 return self.get(b'', node)
1953
1962
1954 def get(self, tree, node, verify=True):
1963 def get(self, tree, node, verify=True):
1955 """Retrieves the manifest instance for the given node. Throws a
1964 """Retrieves the manifest instance for the given node. Throws a
1956 LookupError if not found.
1965 LookupError if not found.
1957
1966
1958 `verify` - if True an exception will be thrown if the node is not in
1967 `verify` - if True an exception will be thrown if the node is not in
1959 the revlog
1968 the revlog
1960 """
1969 """
1961 if node in self._dirmancache.get(tree, ()):
1970 if node in self._dirmancache.get(tree, ()):
1962 return self._dirmancache[tree][node]
1971 return self._dirmancache[tree][node]
1963
1972
1964 if not self._narrowmatch.always():
1973 if not self._narrowmatch.always():
1965 if not self._narrowmatch.visitdir(tree[:-1]):
1974 if not self._narrowmatch.visitdir(tree[:-1]):
1966 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1975 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1967 if tree:
1976 if tree:
1968 if self._rootstore._treeondisk:
1977 if self._rootstore._treeondisk:
1969 if verify:
1978 if verify:
1970 # Side-effect is LookupError is raised if node doesn't
1979 # Side-effect is LookupError is raised if node doesn't
1971 # exist.
1980 # exist.
1972 self.getstorage(tree).rev(node)
1981 self.getstorage(tree).rev(node)
1973
1982
1974 m = treemanifestctx(self, tree, node)
1983 m = treemanifestctx(self, tree, node)
1975 else:
1984 else:
1976 raise error.Abort(
1985 raise error.Abort(
1977 _(
1986 _(
1978 b"cannot ask for manifest directory '%s' in a flat "
1987 b"cannot ask for manifest directory '%s' in a flat "
1979 b"manifest"
1988 b"manifest"
1980 )
1989 )
1981 % tree
1990 % tree
1982 )
1991 )
1983 else:
1992 else:
1984 if verify:
1993 if verify:
1985 # Side-effect is LookupError is raised if node doesn't exist.
1994 # Side-effect is LookupError is raised if node doesn't exist.
1986 self._rootstore.rev(node)
1995 self._rootstore.rev(node)
1987
1996
1988 if self._treemanifests:
1997 if self._treemanifests:
1989 m = treemanifestctx(self, b'', node)
1998 m = treemanifestctx(self, b'', node)
1990 else:
1999 else:
1991 m = manifestctx(self, node)
2000 m = manifestctx(self, node)
1992
2001
1993 if node != self.nodeconstants.nullid:
2002 if node != self.nodeconstants.nullid:
1994 mancache = self._dirmancache.get(tree)
2003 mancache = self._dirmancache.get(tree)
1995 if not mancache:
2004 if not mancache:
1996 mancache = util.lrucachedict(self._cachesize)
2005 mancache = util.lrucachedict(self._cachesize)
1997 self._dirmancache[tree] = mancache
2006 self._dirmancache[tree] = mancache
1998 mancache[node] = m
2007 mancache[node] = m
1999 return m
2008 return m
2000
2009
2001 def getstorage(self, tree):
2010 def getstorage(self, tree):
2002 return self._rootstore.dirlog(tree)
2011 return self._rootstore.dirlog(tree)
2003
2012
2004 def clearcaches(self, clear_persisted_data=False):
2013 def clearcaches(self, clear_persisted_data=False):
2005 self._dirmancache.clear()
2014 self._dirmancache.clear()
2006 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2015 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2007
2016
2008 def rev(self, node):
2017 def rev(self, node):
2009 return self._rootstore.rev(node)
2018 return self._rootstore.rev(node)
2010
2019
2011 def update_caches(self, transaction):
2020 def update_caches(self, transaction):
2012 return self._rootstore._revlog.update_caches(transaction=transaction)
2021 return self._rootstore._revlog.update_caches(transaction=transaction)
2013
2022
2014
2023
2015 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2024 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2016 class memmanifestctx:
2025 class memmanifestctx:
2017 def __init__(self, manifestlog):
2026 def __init__(self, manifestlog):
2018 self._manifestlog = manifestlog
2027 self._manifestlog = manifestlog
2019 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2028 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2020
2029
2021 def _storage(self):
2030 def _storage(self):
2022 return self._manifestlog.getstorage(b'')
2031 return self._manifestlog.getstorage(b'')
2023
2032
2024 def copy(self):
2033 def copy(self):
2025 memmf = memmanifestctx(self._manifestlog)
2034 memmf = memmanifestctx(self._manifestlog)
2026 memmf._manifestdict = self.read().copy()
2035 memmf._manifestdict = self.read().copy()
2027 return memmf
2036 return memmf
2028
2037
2029 def read(self):
2038 def read(self):
2030 return self._manifestdict
2039 return self._manifestdict
2031
2040
2032 def write(self, transaction, link, p1, p2, added, removed, match=None):
2041 def write(self, transaction, link, p1, p2, added, removed, match=None):
2033 return self._storage().add(
2042 return self._storage().add(
2034 self._manifestdict,
2043 self._manifestdict,
2035 transaction,
2044 transaction,
2036 link,
2045 link,
2037 p1,
2046 p1,
2038 p2,
2047 p2,
2039 added,
2048 added,
2040 removed,
2049 removed,
2041 match=match,
2050 match=match,
2042 )
2051 )
2043
2052
2044
2053
2045 @interfaceutil.implementer(repository.imanifestrevisionstored)
2054 @interfaceutil.implementer(repository.imanifestrevisionstored)
2046 class manifestctx:
2055 class manifestctx:
2047 """A class representing a single revision of a manifest, including its
2056 """A class representing a single revision of a manifest, including its
2048 contents, its parent revs, and its linkrev.
2057 contents, its parent revs, and its linkrev.
2049 """
2058 """
2050
2059
2051 def __init__(self, manifestlog, node):
2060 def __init__(self, manifestlog, node):
2052 self._manifestlog = manifestlog
2061 self._manifestlog = manifestlog
2053 self._data = None
2062 self._data = None
2054
2063
2055 self._node = node
2064 self._node = node
2056
2065
2057 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2066 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2058 # but let's add it later when something needs it and we can load it
2067 # but let's add it later when something needs it and we can load it
2059 # lazily.
2068 # lazily.
2060 # self.p1, self.p2 = store.parents(node)
2069 # self.p1, self.p2 = store.parents(node)
2061 # rev = store.rev(node)
2070 # rev = store.rev(node)
2062 # self.linkrev = store.linkrev(rev)
2071 # self.linkrev = store.linkrev(rev)
2063
2072
2064 def _storage(self):
2073 def _storage(self):
2065 return self._manifestlog.getstorage(b'')
2074 return self._manifestlog.getstorage(b'')
2066
2075
2067 def node(self):
2076 def node(self):
2068 return self._node
2077 return self._node
2069
2078
2070 def copy(self):
2079 def copy(self):
2071 memmf = memmanifestctx(self._manifestlog)
2080 memmf = memmanifestctx(self._manifestlog)
2072 memmf._manifestdict = self.read().copy()
2081 memmf._manifestdict = self.read().copy()
2073 return memmf
2082 return memmf
2074
2083
2075 @propertycache
2084 @propertycache
2076 def parents(self):
2085 def parents(self):
2077 return self._storage().parents(self._node)
2086 return self._storage().parents(self._node)
2078
2087
2079 def read(self):
2088 def read(self):
2080 if self._data is None:
2089 if self._data is None:
2081 nc = self._manifestlog.nodeconstants
2090 nc = self._manifestlog.nodeconstants
2082 if self._node == nc.nullid:
2091 if self._node == nc.nullid:
2083 self._data = manifestdict(nc.nodelen)
2092 self._data = manifestdict(nc.nodelen)
2084 else:
2093 else:
2085 store = self._storage()
2094 store = self._storage()
2086 if self._node in store.fulltextcache:
2095 if self._node in store.fulltextcache:
2087 text = pycompat.bytestr(store.fulltextcache[self._node])
2096 text = pycompat.bytestr(store.fulltextcache[self._node])
2088 else:
2097 else:
2089 text = store.revision(self._node)
2098 text = store.revision(self._node)
2090 arraytext = bytearray(text)
2099 arraytext = bytearray(text)
2091 store.fulltextcache[self._node] = arraytext
2100 store.fulltextcache[self._node] = arraytext
2092 self._data = manifestdict(nc.nodelen, text)
2101 self._data = manifestdict(nc.nodelen, text)
2093 return self._data
2102 return self._data
2094
2103
2095 def readfast(self, shallow=False):
2104 def readfast(self, shallow=False):
2096 """Calls either readdelta or read, based on which would be less work.
2105 """Calls either readdelta or read, based on which would be less work.
2097 readdelta is called if the delta is against the p1, and therefore can be
2106 readdelta is called if the delta is against the p1, and therefore can be
2098 read quickly.
2107 read quickly.
2099
2108
2100 If `shallow` is True, nothing changes since this is a flat manifest.
2109 If `shallow` is True, nothing changes since this is a flat manifest.
2101 """
2110 """
2102 store = self._storage()
2111 store = self._storage()
2103 r = store.rev(self._node)
2112 r = store.rev(self._node)
2104 deltaparent = store.deltaparent(r)
2113 deltaparent = store.deltaparent(r)
2105 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2114 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2106 return self.readdelta()
2115 return self.readdelta()
2107 return self.read()
2116 return self.read()
2108
2117
2109 def readdelta(self, shallow=False):
2118 def readdelta(self, shallow=False):
2110 """Returns a manifest containing just the entries that are present
2119 """Returns a manifest containing just the entries that are present
2111 in this manifest, but not in its p1 manifest. This is efficient to read
2120 in this manifest, but not in its p1 manifest. This is efficient to read
2112 if the revlog delta is already p1.
2121 if the revlog delta is already p1.
2113
2122
2114 Changing the value of `shallow` has no effect on flat manifests.
2123 Changing the value of `shallow` has no effect on flat manifests.
2115 """
2124 """
2116 store = self._storage()
2125 store = self._storage()
2117 r = store.rev(self._node)
2126 r = store.rev(self._node)
2118 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2127 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2119 return manifestdict(store.nodeconstants.nodelen, d)
2128 return manifestdict(store.nodeconstants.nodelen, d)
2120
2129
2121 def find(self, key):
2130 def find(self, key):
2122 return self.read().find(key)
2131 return self.read().find(key)
2123
2132
2124
2133
2125 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2134 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2126 class memtreemanifestctx:
2135 class memtreemanifestctx:
2127 def __init__(self, manifestlog, dir=b''):
2136 def __init__(self, manifestlog, dir=b''):
2128 self._manifestlog = manifestlog
2137 self._manifestlog = manifestlog
2129 self._dir = dir
2138 self._dir = dir
2130 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2139 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2131
2140
2132 def _storage(self):
2141 def _storage(self):
2133 return self._manifestlog.getstorage(b'')
2142 return self._manifestlog.getstorage(b'')
2134
2143
2135 def copy(self):
2144 def copy(self):
2136 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2145 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2137 memmf._treemanifest = self._treemanifest.copy()
2146 memmf._treemanifest = self._treemanifest.copy()
2138 return memmf
2147 return memmf
2139
2148
2140 def read(self):
2149 def read(self):
2141 return self._treemanifest
2150 return self._treemanifest
2142
2151
2143 def write(self, transaction, link, p1, p2, added, removed, match=None):
2152 def write(self, transaction, link, p1, p2, added, removed, match=None):
2144 def readtree(dir, node):
2153 def readtree(dir, node):
2145 return self._manifestlog.get(dir, node).read()
2154 return self._manifestlog.get(dir, node).read()
2146
2155
2147 return self._storage().add(
2156 return self._storage().add(
2148 self._treemanifest,
2157 self._treemanifest,
2149 transaction,
2158 transaction,
2150 link,
2159 link,
2151 p1,
2160 p1,
2152 p2,
2161 p2,
2153 added,
2162 added,
2154 removed,
2163 removed,
2155 readtree=readtree,
2164 readtree=readtree,
2156 match=match,
2165 match=match,
2157 )
2166 )
2158
2167
2159
2168
2160 @interfaceutil.implementer(repository.imanifestrevisionstored)
2169 @interfaceutil.implementer(repository.imanifestrevisionstored)
2161 class treemanifestctx:
2170 class treemanifestctx:
2162 def __init__(self, manifestlog, dir, node):
2171 def __init__(self, manifestlog, dir, node):
2163 self._manifestlog = manifestlog
2172 self._manifestlog = manifestlog
2164 self._dir = dir
2173 self._dir = dir
2165 self._data = None
2174 self._data = None
2166
2175
2167 self._node = node
2176 self._node = node
2168
2177
2169 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2178 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2170 # we can instantiate treemanifestctx objects for directories we don't
2179 # we can instantiate treemanifestctx objects for directories we don't
2171 # have on disk.
2180 # have on disk.
2172 # self.p1, self.p2 = store.parents(node)
2181 # self.p1, self.p2 = store.parents(node)
2173 # rev = store.rev(node)
2182 # rev = store.rev(node)
2174 # self.linkrev = store.linkrev(rev)
2183 # self.linkrev = store.linkrev(rev)
2175
2184
2176 def _storage(self):
2185 def _storage(self):
2177 narrowmatch = self._manifestlog._narrowmatch
2186 narrowmatch = self._manifestlog._narrowmatch
2178 if not narrowmatch.always():
2187 if not narrowmatch.always():
2179 if not narrowmatch.visitdir(self._dir[:-1]):
2188 if not narrowmatch.visitdir(self._dir[:-1]):
2180 return excludedmanifestrevlog(
2189 return excludedmanifestrevlog(
2181 self._manifestlog.nodeconstants, self._dir
2190 self._manifestlog.nodeconstants, self._dir
2182 )
2191 )
2183 return self._manifestlog.getstorage(self._dir)
2192 return self._manifestlog.getstorage(self._dir)
2184
2193
2185 def read(self):
2194 def read(self):
2186 if self._data is None:
2195 if self._data is None:
2187 store = self._storage()
2196 store = self._storage()
2188 if self._node == self._manifestlog.nodeconstants.nullid:
2197 if self._node == self._manifestlog.nodeconstants.nullid:
2189 self._data = treemanifest(self._manifestlog.nodeconstants)
2198 self._data = treemanifest(self._manifestlog.nodeconstants)
2190 # TODO accessing non-public API
2199 # TODO accessing non-public API
2191 elif store._treeondisk:
2200 elif store._treeondisk:
2192 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2201 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2193
2202
2194 def gettext():
2203 def gettext():
2195 return store.revision(self._node)
2204 return store.revision(self._node)
2196
2205
2197 def readsubtree(dir, subm):
2206 def readsubtree(dir, subm):
2198 # Set verify to False since we need to be able to create
2207 # Set verify to False since we need to be able to create
2199 # subtrees for trees that don't exist on disk.
2208 # subtrees for trees that don't exist on disk.
2200 return self._manifestlog.get(dir, subm, verify=False).read()
2209 return self._manifestlog.get(dir, subm, verify=False).read()
2201
2210
2202 m.read(gettext, readsubtree)
2211 m.read(gettext, readsubtree)
2203 m.setnode(self._node)
2212 m.setnode(self._node)
2204 self._data = m
2213 self._data = m
2205 else:
2214 else:
2206 if self._node in store.fulltextcache:
2215 if self._node in store.fulltextcache:
2207 text = pycompat.bytestr(store.fulltextcache[self._node])
2216 text = pycompat.bytestr(store.fulltextcache[self._node])
2208 else:
2217 else:
2209 text = store.revision(self._node)
2218 text = store.revision(self._node)
2210 arraytext = bytearray(text)
2219 arraytext = bytearray(text)
2211 store.fulltextcache[self._node] = arraytext
2220 store.fulltextcache[self._node] = arraytext
2212 self._data = treemanifest(
2221 self._data = treemanifest(
2213 self._manifestlog.nodeconstants, dir=self._dir, text=text
2222 self._manifestlog.nodeconstants, dir=self._dir, text=text
2214 )
2223 )
2215
2224
2216 return self._data
2225 return self._data
2217
2226
2218 def node(self):
2227 def node(self):
2219 return self._node
2228 return self._node
2220
2229
2221 def copy(self):
2230 def copy(self):
2222 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2231 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2223 memmf._treemanifest = self.read().copy()
2232 memmf._treemanifest = self.read().copy()
2224 return memmf
2233 return memmf
2225
2234
2226 @propertycache
2235 @propertycache
2227 def parents(self):
2236 def parents(self):
2228 return self._storage().parents(self._node)
2237 return self._storage().parents(self._node)
2229
2238
2230 def readdelta(self, shallow=False):
2239 def readdelta(self, shallow=False):
2231 """Returns a manifest containing just the entries that are present
2240 """Returns a manifest containing just the entries that are present
2232 in this manifest, but not in its p1 manifest. This is efficient to read
2241 in this manifest, but not in its p1 manifest. This is efficient to read
2233 if the revlog delta is already p1.
2242 if the revlog delta is already p1.
2234
2243
2235 If `shallow` is True, this will read the delta for this directory,
2244 If `shallow` is True, this will read the delta for this directory,
2236 without recursively reading subdirectory manifests. Instead, any
2245 without recursively reading subdirectory manifests. Instead, any
2237 subdirectory entry will be reported as it appears in the manifest, i.e.
2246 subdirectory entry will be reported as it appears in the manifest, i.e.
2238 the subdirectory will be reported among files and distinguished only by
2247 the subdirectory will be reported among files and distinguished only by
2239 its 't' flag.
2248 its 't' flag.
2240 """
2249 """
2241 store = self._storage()
2250 store = self._storage()
2242 if shallow:
2251 if shallow:
2243 r = store.rev(self._node)
2252 r = store.rev(self._node)
2244 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2253 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2245 return manifestdict(store.nodeconstants.nodelen, d)
2254 return manifestdict(store.nodeconstants.nodelen, d)
2246 else:
2255 else:
2247 # Need to perform a slow delta
2256 # Need to perform a slow delta
2248 r0 = store.deltaparent(store.rev(self._node))
2257 r0 = store.deltaparent(store.rev(self._node))
2249 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2258 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2250 m1 = self.read()
2259 m1 = self.read()
2251 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2260 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2252 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2261 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2253 if n1:
2262 if n1:
2254 md[f] = n1
2263 md[f] = n1
2255 if fl1:
2264 if fl1:
2256 md.setflag(f, fl1)
2265 md.setflag(f, fl1)
2257 return md
2266 return md
2258
2267
2259 def readfast(self, shallow=False):
2268 def readfast(self, shallow=False):
2260 """Calls either readdelta or read, based on which would be less work.
2269 """Calls either readdelta or read, based on which would be less work.
2261 readdelta is called if the delta is against the p1, and therefore can be
2270 readdelta is called if the delta is against the p1, and therefore can be
2262 read quickly.
2271 read quickly.
2263
2272
2264 If `shallow` is True, it only returns the entries from this manifest,
2273 If `shallow` is True, it only returns the entries from this manifest,
2265 and not any submanifests.
2274 and not any submanifests.
2266 """
2275 """
2267 store = self._storage()
2276 store = self._storage()
2268 r = store.rev(self._node)
2277 r = store.rev(self._node)
2269 deltaparent = store.deltaparent(r)
2278 deltaparent = store.deltaparent(r)
2270 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2279 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2271 return self.readdelta(shallow=shallow)
2280 return self.readdelta(shallow=shallow)
2272
2281
2273 if shallow:
2282 if shallow:
2274 return manifestdict(
2283 return manifestdict(
2275 store.nodeconstants.nodelen, store.revision(self._node)
2284 store.nodeconstants.nodelen, store.revision(self._node)
2276 )
2285 )
2277 else:
2286 else:
2278 return self.read()
2287 return self.read()
2279
2288
2280 def find(self, key):
2289 def find(self, key):
2281 return self.read().find(key)
2290 return self.read().find(key)
2282
2291
2283
2292
2284 class excludeddir(treemanifest):
2293 class excludeddir(treemanifest):
2285 """Stand-in for a directory that is excluded from the repository.
2294 """Stand-in for a directory that is excluded from the repository.
2286
2295
2287 With narrowing active on a repository that uses treemanifests,
2296 With narrowing active on a repository that uses treemanifests,
2288 some of the directory revlogs will be excluded from the resulting
2297 some of the directory revlogs will be excluded from the resulting
2289 clone. This is a huge storage win for clients, but means we need
2298 clone. This is a huge storage win for clients, but means we need
2290 some sort of pseudo-manifest to surface to internals so we can
2299 some sort of pseudo-manifest to surface to internals so we can
2291 detect a merge conflict outside the narrowspec. That's what this
2300 detect a merge conflict outside the narrowspec. That's what this
2292 class is: it stands in for a directory whose node is known, but
2301 class is: it stands in for a directory whose node is known, but
2293 whose contents are unknown.
2302 whose contents are unknown.
2294 """
2303 """
2295
2304
2296 def __init__(self, nodeconstants, dir, node):
2305 def __init__(self, nodeconstants, dir, node):
2297 super(excludeddir, self).__init__(nodeconstants, dir)
2306 super(excludeddir, self).__init__(nodeconstants, dir)
2298 self._node = node
2307 self._node = node
2299 # Add an empty file, which will be included by iterators and such,
2308 # Add an empty file, which will be included by iterators and such,
2300 # appearing as the directory itself (i.e. something like "dir/")
2309 # appearing as the directory itself (i.e. something like "dir/")
2301 self._files[b''] = node
2310 self._files[b''] = node
2302 self._flags[b''] = b't'
2311 self._flags[b''] = b't'
2303
2312
2304 # Manifests outside the narrowspec should never be modified, so avoid
2313 # Manifests outside the narrowspec should never be modified, so avoid
2305 # copying. This makes a noticeable difference when there are very many
2314 # copying. This makes a noticeable difference when there are very many
2306 # directories outside the narrowspec. Also, it makes sense for the copy to
2315 # directories outside the narrowspec. Also, it makes sense for the copy to
2307 # be of the same type as the original, which would not happen with the
2316 # be of the same type as the original, which would not happen with the
2308 # super type's copy().
2317 # super type's copy().
2309 def copy(self):
2318 def copy(self):
2310 return self
2319 return self
2311
2320
2312
2321
2313 class excludeddirmanifestctx(treemanifestctx):
2322 class excludeddirmanifestctx(treemanifestctx):
2314 """context wrapper for excludeddir - see that docstring for rationale"""
2323 """context wrapper for excludeddir - see that docstring for rationale"""
2315
2324
2316 def __init__(self, nodeconstants, dir, node):
2325 def __init__(self, nodeconstants, dir, node):
2317 self.nodeconstants = nodeconstants
2326 self.nodeconstants = nodeconstants
2318 self._dir = dir
2327 self._dir = dir
2319 self._node = node
2328 self._node = node
2320
2329
2321 def read(self):
2330 def read(self):
2322 return excludeddir(self.nodeconstants, self._dir, self._node)
2331 return excludeddir(self.nodeconstants, self._dir, self._node)
2323
2332
2324 def readfast(self, shallow=False):
2333 def readfast(self, shallow=False):
2325 # special version of readfast since we don't have underlying storage
2334 # special version of readfast since we don't have underlying storage
2326 return self.read()
2335 return self.read()
2327
2336
2328 def write(self, *args):
2337 def write(self, *args):
2329 raise error.ProgrammingError(
2338 raise error.ProgrammingError(
2330 b'attempt to write manifest from excluded dir %s' % self._dir
2339 b'attempt to write manifest from excluded dir %s' % self._dir
2331 )
2340 )
2332
2341
2333
2342
2334 class excludedmanifestrevlog(manifestrevlog):
2343 class excludedmanifestrevlog(manifestrevlog):
2335 """Stand-in for excluded treemanifest revlogs.
2344 """Stand-in for excluded treemanifest revlogs.
2336
2345
2337 When narrowing is active on a treemanifest repository, we'll have
2346 When narrowing is active on a treemanifest repository, we'll have
2338 references to directories we can't see due to the revlog being
2347 references to directories we can't see due to the revlog being
2339 skipped. This class exists to conform to the manifestrevlog
2348 skipped. This class exists to conform to the manifestrevlog
2340 interface for those directories and proactively prevent writes to
2349 interface for those directories and proactively prevent writes to
2341 outside the narrowspec.
2350 outside the narrowspec.
2342 """
2351 """
2343
2352
2344 def __init__(self, nodeconstants, dir):
2353 def __init__(self, nodeconstants, dir):
2345 self.nodeconstants = nodeconstants
2354 self.nodeconstants = nodeconstants
2346 self._dir = dir
2355 self._dir = dir
2347
2356
2348 def __len__(self):
2357 def __len__(self):
2349 raise error.ProgrammingError(
2358 raise error.ProgrammingError(
2350 b'attempt to get length of excluded dir %s' % self._dir
2359 b'attempt to get length of excluded dir %s' % self._dir
2351 )
2360 )
2352
2361
2353 def rev(self, node):
2362 def rev(self, node):
2354 raise error.ProgrammingError(
2363 raise error.ProgrammingError(
2355 b'attempt to get rev from excluded dir %s' % self._dir
2364 b'attempt to get rev from excluded dir %s' % self._dir
2356 )
2365 )
2357
2366
2358 def linkrev(self, node):
2367 def linkrev(self, node):
2359 raise error.ProgrammingError(
2368 raise error.ProgrammingError(
2360 b'attempt to get linkrev from excluded dir %s' % self._dir
2369 b'attempt to get linkrev from excluded dir %s' % self._dir
2361 )
2370 )
2362
2371
2363 def node(self, rev):
2372 def node(self, rev):
2364 raise error.ProgrammingError(
2373 raise error.ProgrammingError(
2365 b'attempt to get node from excluded dir %s' % self._dir
2374 b'attempt to get node from excluded dir %s' % self._dir
2366 )
2375 )
2367
2376
2368 def add(self, *args, **kwargs):
2377 def add(self, *args, **kwargs):
2369 # We should never write entries in dirlogs outside the narrow clone.
2378 # We should never write entries in dirlogs outside the narrow clone.
2370 # However, the method still gets called from writesubtree() in
2379 # However, the method still gets called from writesubtree() in
2371 # _addtree(), so we need to handle it. We should possibly make that
2380 # _addtree(), so we need to handle it. We should possibly make that
2372 # avoid calling add() with a clean manifest (_dirty is always False
2381 # avoid calling add() with a clean manifest (_dirty is always False
2373 # in excludeddir instances).
2382 # in excludeddir instances).
2374 pass
2383 pass
@@ -1,3406 +1,3410 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_NO,
43 DELTA_BASE_REUSE_TRY,
43 DELTA_BASE_REUSE_TRY,
44 ENTRY_RANK,
44 ENTRY_RANK,
45 FEATURES_BY_VERSION,
45 FEATURES_BY_VERSION,
46 FLAG_GENERALDELTA,
46 FLAG_GENERALDELTA,
47 FLAG_INLINE_DATA,
47 FLAG_INLINE_DATA,
48 INDEX_HEADER,
48 INDEX_HEADER,
49 KIND_CHANGELOG,
49 KIND_CHANGELOG,
50 KIND_FILELOG,
50 KIND_FILELOG,
51 RANK_UNKNOWN,
51 RANK_UNKNOWN,
52 REVLOGV0,
52 REVLOGV0,
53 REVLOGV1,
53 REVLOGV1,
54 REVLOGV1_FLAGS,
54 REVLOGV1_FLAGS,
55 REVLOGV2,
55 REVLOGV2,
56 REVLOGV2_FLAGS,
56 REVLOGV2_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
58 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_FORMAT,
59 REVLOG_DEFAULT_VERSION,
59 REVLOG_DEFAULT_VERSION,
60 SUPPORTED_FLAGS,
60 SUPPORTED_FLAGS,
61 )
61 )
62 from .revlogutils.flagutil import (
62 from .revlogutils.flagutil import (
63 REVIDX_DEFAULT_FLAGS,
63 REVIDX_DEFAULT_FLAGS,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 REVIDX_FLAGS_ORDER,
66 REVIDX_FLAGS_ORDER,
67 REVIDX_HASCOPIESINFO,
67 REVIDX_HASCOPIESINFO,
68 REVIDX_ISCENSORED,
68 REVIDX_ISCENSORED,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 )
70 )
71 from .thirdparty import attr
71 from .thirdparty import attr
72 from . import (
72 from . import (
73 ancestor,
73 ancestor,
74 dagop,
74 dagop,
75 error,
75 error,
76 mdiff,
76 mdiff,
77 policy,
77 policy,
78 pycompat,
78 pycompat,
79 revlogutils,
79 revlogutils,
80 templatefilters,
80 templatefilters,
81 util,
81 util,
82 )
82 )
83 from .interfaces import (
83 from .interfaces import (
84 repository,
84 repository,
85 util as interfaceutil,
85 util as interfaceutil,
86 )
86 )
87 from .revlogutils import (
87 from .revlogutils import (
88 deltas as deltautil,
88 deltas as deltautil,
89 docket as docketutil,
89 docket as docketutil,
90 flagutil,
90 flagutil,
91 nodemap as nodemaputil,
91 nodemap as nodemaputil,
92 randomaccessfile,
92 randomaccessfile,
93 revlogv0,
93 revlogv0,
94 rewrite,
94 rewrite,
95 sidedata as sidedatautil,
95 sidedata as sidedatautil,
96 )
96 )
97 from .utils import (
97 from .utils import (
98 storageutil,
98 storageutil,
99 stringutil,
99 stringutil,
100 )
100 )
101
101
102 # blanked usage of all the name to prevent pyflakes constraints
102 # blanked usage of all the name to prevent pyflakes constraints
103 # We need these name available in the module for extensions.
103 # We need these name available in the module for extensions.
104
104
105 REVLOGV0
105 REVLOGV0
106 REVLOGV1
106 REVLOGV1
107 REVLOGV2
107 REVLOGV2
108 CHANGELOGV2
108 CHANGELOGV2
109 FLAG_INLINE_DATA
109 FLAG_INLINE_DATA
110 FLAG_GENERALDELTA
110 FLAG_GENERALDELTA
111 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FLAGS
112 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_FORMAT
113 REVLOG_DEFAULT_VERSION
113 REVLOG_DEFAULT_VERSION
114 REVLOGV1_FLAGS
114 REVLOGV1_FLAGS
115 REVLOGV2_FLAGS
115 REVLOGV2_FLAGS
116 REVIDX_ISCENSORED
116 REVIDX_ISCENSORED
117 REVIDX_ELLIPSIS
117 REVIDX_ELLIPSIS
118 REVIDX_HASCOPIESINFO
118 REVIDX_HASCOPIESINFO
119 REVIDX_EXTSTORED
119 REVIDX_EXTSTORED
120 REVIDX_DEFAULT_FLAGS
120 REVIDX_DEFAULT_FLAGS
121 REVIDX_FLAGS_ORDER
121 REVIDX_FLAGS_ORDER
122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 REVIDX_RAWTEXT_CHANGING_FLAGS
123
123
124 parsers = policy.importmod('parsers')
124 parsers = policy.importmod('parsers')
125 rustancestor = policy.importrust('ancestor')
125 rustancestor = policy.importrust('ancestor')
126 rustdagop = policy.importrust('dagop')
126 rustdagop = policy.importrust('dagop')
127 rustrevlog = policy.importrust('revlog')
127 rustrevlog = policy.importrust('revlog')
128
128
129 # Aliased for performance.
129 # Aliased for performance.
130 _zlibdecompress = zlib.decompress
130 _zlibdecompress = zlib.decompress
131
131
132 # max size of inline data embedded into a revlog
132 # max size of inline data embedded into a revlog
133 _maxinline = 131072
133 _maxinline = 131072
134
134
135 # Flag processors for REVIDX_ELLIPSIS.
135 # Flag processors for REVIDX_ELLIPSIS.
136 def ellipsisreadprocessor(rl, text):
136 def ellipsisreadprocessor(rl, text):
137 return text, False
137 return text, False
138
138
139
139
140 def ellipsiswriteprocessor(rl, text):
140 def ellipsiswriteprocessor(rl, text):
141 return text, False
141 return text, False
142
142
143
143
144 def ellipsisrawprocessor(rl, text):
144 def ellipsisrawprocessor(rl, text):
145 return False
145 return False
146
146
147
147
148 ellipsisprocessor = (
148 ellipsisprocessor = (
149 ellipsisreadprocessor,
149 ellipsisreadprocessor,
150 ellipsiswriteprocessor,
150 ellipsiswriteprocessor,
151 ellipsisrawprocessor,
151 ellipsisrawprocessor,
152 )
152 )
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @interfaceutil.implementer(repository.irevisiondelta)
175 @interfaceutil.implementer(repository.irevisiondelta)
176 @attr.s(slots=True)
176 @attr.s(slots=True)
177 class revlogrevisiondelta:
177 class revlogrevisiondelta:
178 node = attr.ib()
178 node = attr.ib()
179 p1node = attr.ib()
179 p1node = attr.ib()
180 p2node = attr.ib()
180 p2node = attr.ib()
181 basenode = attr.ib()
181 basenode = attr.ib()
182 flags = attr.ib()
182 flags = attr.ib()
183 baserevisionsize = attr.ib()
183 baserevisionsize = attr.ib()
184 revision = attr.ib()
184 revision = attr.ib()
185 delta = attr.ib()
185 delta = attr.ib()
186 sidedata = attr.ib()
186 sidedata = attr.ib()
187 protocol_flags = attr.ib()
187 protocol_flags = attr.ib()
188 linknode = attr.ib(default=None)
188 linknode = attr.ib(default=None)
189
189
190
190
191 @interfaceutil.implementer(repository.iverifyproblem)
191 @interfaceutil.implementer(repository.iverifyproblem)
192 @attr.s(frozen=True)
192 @attr.s(frozen=True)
193 class revlogproblem:
193 class revlogproblem:
194 warning = attr.ib(default=None)
194 warning = attr.ib(default=None)
195 error = attr.ib(default=None)
195 error = attr.ib(default=None)
196 node = attr.ib(default=None)
196 node = attr.ib(default=None)
197
197
198
198
199 def parse_index_v1(data, inline):
199 def parse_index_v1(data, inline):
200 # call the C implementation to parse the index data
200 # call the C implementation to parse the index data
201 index, cache = parsers.parse_index2(data, inline)
201 index, cache = parsers.parse_index2(data, inline)
202 return index, cache
202 return index, cache
203
203
204
204
205 def parse_index_v2(data, inline):
205 def parse_index_v2(data, inline):
206 # call the C implementation to parse the index data
206 # call the C implementation to parse the index data
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 return index, cache
208 return index, cache
209
209
210
210
211 def parse_index_cl_v2(data, inline):
211 def parse_index_cl_v2(data, inline):
212 # call the C implementation to parse the index data
212 # call the C implementation to parse the index data
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 return index, cache
214 return index, cache
215
215
216
216
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218
218
219 def parse_index_v1_nodemap(data, inline):
219 def parse_index_v1_nodemap(data, inline):
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 else:
224 else:
225 parse_index_v1_nodemap = None
225 parse_index_v1_nodemap = None
226
226
227
227
228 def parse_index_v1_mixed(data, inline):
228 def parse_index_v1_mixed(data, inline):
229 index, cache = parse_index_v1(data, inline)
229 index, cache = parse_index_v1(data, inline)
230 return rustrevlog.MixedIndex(index), cache
230 return rustrevlog.MixedIndex(index), cache
231
231
232
232
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 # signed integer)
234 # signed integer)
235 _maxentrysize = 0x7FFFFFFF
235 _maxentrysize = 0x7FFFFFFF
236
236
237 FILE_TOO_SHORT_MSG = _(
237 FILE_TOO_SHORT_MSG = _(
238 b'cannot read from revlog %s;'
238 b'cannot read from revlog %s;'
239 b' expected %d bytes from offset %d, data size is %d'
239 b' expected %d bytes from offset %d, data size is %d'
240 )
240 )
241
241
242 hexdigits = b'0123456789abcdefABCDEF'
242 hexdigits = b'0123456789abcdefABCDEF'
243
243
244
244
245 class revlog:
245 class revlog:
246 """
246 """
247 the underlying revision storage object
247 the underlying revision storage object
248
248
249 A revlog consists of two parts, an index and the revision data.
249 A revlog consists of two parts, an index and the revision data.
250
250
251 The index is a file with a fixed record size containing
251 The index is a file with a fixed record size containing
252 information on each revision, including its nodeid (hash), the
252 information on each revision, including its nodeid (hash), the
253 nodeids of its parents, the position and offset of its data within
253 nodeids of its parents, the position and offset of its data within
254 the data file, and the revision it's based on. Finally, each entry
254 the data file, and the revision it's based on. Finally, each entry
255 contains a linkrev entry that can serve as a pointer to external
255 contains a linkrev entry that can serve as a pointer to external
256 data.
256 data.
257
257
258 The revision data itself is a linear collection of data chunks.
258 The revision data itself is a linear collection of data chunks.
259 Each chunk represents a revision and is usually represented as a
259 Each chunk represents a revision and is usually represented as a
260 delta against the previous chunk. To bound lookup time, runs of
260 delta against the previous chunk. To bound lookup time, runs of
261 deltas are limited to about 2 times the length of the original
261 deltas are limited to about 2 times the length of the original
262 version data. This makes retrieval of a version proportional to
262 version data. This makes retrieval of a version proportional to
263 its size, or O(1) relative to the number of revisions.
263 its size, or O(1) relative to the number of revisions.
264
264
265 Both pieces of the revlog are written to in an append-only
265 Both pieces of the revlog are written to in an append-only
266 fashion, which means we never need to rewrite a file to insert or
266 fashion, which means we never need to rewrite a file to insert or
267 remove data, and can use some simple techniques to avoid the need
267 remove data, and can use some simple techniques to avoid the need
268 for locking while reading.
268 for locking while reading.
269
269
270 If checkambig, indexfile is opened with checkambig=True at
270 If checkambig, indexfile is opened with checkambig=True at
271 writing, to avoid file stat ambiguity.
271 writing, to avoid file stat ambiguity.
272
272
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 index will be mmapped rather than read if it is larger than the
274 index will be mmapped rather than read if it is larger than the
275 configured threshold.
275 configured threshold.
276
276
277 If censorable is True, the revlog can have censored revisions.
277 If censorable is True, the revlog can have censored revisions.
278
278
279 If `upperboundcomp` is not None, this is the expected maximal gain from
279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 compression for the data content.
280 compression for the data content.
281
281
282 `concurrencychecker` is an optional function that receives 3 arguments: a
282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 file handle, a filename, and an expected position. It should check whether
283 file handle, a filename, and an expected position. It should check whether
284 the current position in the file handle is valid, and log/warn/fail (by
284 the current position in the file handle is valid, and log/warn/fail (by
285 raising).
285 raising).
286
286
287 See mercurial/revlogutils/contants.py for details about the content of an
287 See mercurial/revlogutils/contants.py for details about the content of an
288 index entry.
288 index entry.
289 """
289 """
290
290
291 _flagserrorclass = error.RevlogError
291 _flagserrorclass = error.RevlogError
292
292
293 def __init__(
293 def __init__(
294 self,
294 self,
295 opener,
295 opener,
296 target,
296 target,
297 radix,
297 radix,
298 postfix=None, # only exist for `tmpcensored` now
298 postfix=None, # only exist for `tmpcensored` now
299 checkambig=False,
299 checkambig=False,
300 mmaplargeindex=False,
300 mmaplargeindex=False,
301 censorable=False,
301 censorable=False,
302 upperboundcomp=None,
302 upperboundcomp=None,
303 persistentnodemap=False,
303 persistentnodemap=False,
304 concurrencychecker=None,
304 concurrencychecker=None,
305 trypending=False,
305 trypending=False,
306 try_split=False,
306 try_split=False,
307 canonical_parent_order=True,
307 canonical_parent_order=True,
308 ):
308 ):
309 """
309 """
310 create a revlog object
310 create a revlog object
311
311
312 opener is a function that abstracts the file opening operation
312 opener is a function that abstracts the file opening operation
313 and can be used to implement COW semantics or the like.
313 and can be used to implement COW semantics or the like.
314
314
315 `target`: a (KIND, ID) tuple that identify the content stored in
315 `target`: a (KIND, ID) tuple that identify the content stored in
316 this revlog. It help the rest of the code to understand what the revlog
316 this revlog. It help the rest of the code to understand what the revlog
317 is about without having to resort to heuristic and index filename
317 is about without having to resort to heuristic and index filename
318 analysis. Note: that this must be reliably be set by normal code, but
318 analysis. Note: that this must be reliably be set by normal code, but
319 that test, debug, or performance measurement code might not set this to
319 that test, debug, or performance measurement code might not set this to
320 accurate value.
320 accurate value.
321 """
321 """
322 self.upperboundcomp = upperboundcomp
322 self.upperboundcomp = upperboundcomp
323
323
324 self.radix = radix
324 self.radix = radix
325
325
326 self._docket_file = None
326 self._docket_file = None
327 self._indexfile = None
327 self._indexfile = None
328 self._datafile = None
328 self._datafile = None
329 self._sidedatafile = None
329 self._sidedatafile = None
330 self._nodemap_file = None
330 self._nodemap_file = None
331 self.postfix = postfix
331 self.postfix = postfix
332 self._trypending = trypending
332 self._trypending = trypending
333 self._try_split = try_split
333 self._try_split = try_split
334 self.opener = opener
334 self.opener = opener
335 if persistentnodemap:
335 if persistentnodemap:
336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
337
337
338 assert target[0] in ALL_KINDS
338 assert target[0] in ALL_KINDS
339 assert len(target) == 2
339 assert len(target) == 2
340 self.target = target
340 self.target = target
341 # When True, indexfile is opened with checkambig=True at writing, to
341 # When True, indexfile is opened with checkambig=True at writing, to
342 # avoid file stat ambiguity.
342 # avoid file stat ambiguity.
343 self._checkambig = checkambig
343 self._checkambig = checkambig
344 self._mmaplargeindex = mmaplargeindex
344 self._mmaplargeindex = mmaplargeindex
345 self._censorable = censorable
345 self._censorable = censorable
346 # 3-tuple of (node, rev, text) for a raw revision.
346 # 3-tuple of (node, rev, text) for a raw revision.
347 self._revisioncache = None
347 self._revisioncache = None
348 # Maps rev to chain base rev.
348 # Maps rev to chain base rev.
349 self._chainbasecache = util.lrucachedict(100)
349 self._chainbasecache = util.lrucachedict(100)
350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
351 self._chunkcache = (0, b'')
351 self._chunkcache = (0, b'')
352 # How much data to read and cache into the raw revlog data cache.
352 # How much data to read and cache into the raw revlog data cache.
353 self._chunkcachesize = 65536
353 self._chunkcachesize = 65536
354 self._maxchainlen = None
354 self._maxchainlen = None
355 self._deltabothparents = True
355 self._deltabothparents = True
356 self._candidate_group_chunk_size = 0
356 self._candidate_group_chunk_size = 0
357 self._debug_delta = False
357 self._debug_delta = False
358 self.index = None
358 self.index = None
359 self._docket = None
359 self._docket = None
360 self._nodemap_docket = None
360 self._nodemap_docket = None
361 # Mapping of partial identifiers to full nodes.
361 # Mapping of partial identifiers to full nodes.
362 self._pcache = {}
362 self._pcache = {}
363 # Mapping of revision integer to full node.
363 # Mapping of revision integer to full node.
364 self._compengine = b'zlib'
364 self._compengine = b'zlib'
365 self._compengineopts = {}
365 self._compengineopts = {}
366 self._maxdeltachainspan = -1
366 self._maxdeltachainspan = -1
367 self._withsparseread = False
367 self._withsparseread = False
368 self._sparserevlog = False
368 self._sparserevlog = False
369 self.hassidedata = False
369 self.hassidedata = False
370 self._srdensitythreshold = 0.50
370 self._srdensitythreshold = 0.50
371 self._srmingapsize = 262144
371 self._srmingapsize = 262144
372
372
373 # other optionnals features
373 # other optionnals features
374
374
375 # might remove rank configuration once the computation has no impact
375 # might remove rank configuration once the computation has no impact
376 self._compute_rank = False
376 self._compute_rank = False
377
377
378 # Make copy of flag processors so each revlog instance can support
378 # Make copy of flag processors so each revlog instance can support
379 # custom flags.
379 # custom flags.
380 self._flagprocessors = dict(flagutil.flagprocessors)
380 self._flagprocessors = dict(flagutil.flagprocessors)
381
381
382 # 3-tuple of file handles being used for active writing.
382 # 3-tuple of file handles being used for active writing.
383 self._writinghandles = None
383 self._writinghandles = None
384 # prevent nesting of addgroup
384 # prevent nesting of addgroup
385 self._adding_group = None
385 self._adding_group = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 self._concurrencychecker = concurrencychecker
389 self._concurrencychecker = concurrencychecker
390
390
391 # parent order is supposed to be semantically irrelevant, so we
391 # parent order is supposed to be semantically irrelevant, so we
392 # normally resort parents to ensure that the first parent is non-null,
392 # normally resort parents to ensure that the first parent is non-null,
393 # if there is a non-null parent at all.
393 # if there is a non-null parent at all.
394 # filelog abuses the parent order as flag to mark some instances of
394 # filelog abuses the parent order as flag to mark some instances of
395 # meta-encoded files, so allow it to disable this behavior.
395 # meta-encoded files, so allow it to disable this behavior.
396 self.canonical_parent_order = canonical_parent_order
396 self.canonical_parent_order = canonical_parent_order
397
397
398 def _init_opts(self):
398 def _init_opts(self):
399 """process options (from above/config) to setup associated default revlog mode
399 """process options (from above/config) to setup associated default revlog mode
400
400
401 These values might be affected when actually reading on disk information.
401 These values might be affected when actually reading on disk information.
402
402
403 The relevant values are returned for use in _loadindex().
403 The relevant values are returned for use in _loadindex().
404
404
405 * newversionflags:
405 * newversionflags:
406 version header to use if we need to create a new revlog
406 version header to use if we need to create a new revlog
407
407
408 * mmapindexthreshold:
408 * mmapindexthreshold:
409 minimal index size for start to use mmap
409 minimal index size for start to use mmap
410
410
411 * force_nodemap:
411 * force_nodemap:
412 force the usage of a "development" version of the nodemap code
412 force the usage of a "development" version of the nodemap code
413 """
413 """
414 mmapindexthreshold = None
414 mmapindexthreshold = None
415 opts = self.opener.options
415 opts = self.opener.options
416
416
417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
418 new_header = CHANGELOGV2
418 new_header = CHANGELOGV2
419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
420 elif b'revlogv2' in opts:
420 elif b'revlogv2' in opts:
421 new_header = REVLOGV2
421 new_header = REVLOGV2
422 elif b'revlogv1' in opts:
422 elif b'revlogv1' in opts:
423 new_header = REVLOGV1 | FLAG_INLINE_DATA
423 new_header = REVLOGV1 | FLAG_INLINE_DATA
424 if b'generaldelta' in opts:
424 if b'generaldelta' in opts:
425 new_header |= FLAG_GENERALDELTA
425 new_header |= FLAG_GENERALDELTA
426 elif b'revlogv0' in self.opener.options:
426 elif b'revlogv0' in self.opener.options:
427 new_header = REVLOGV0
427 new_header = REVLOGV0
428 else:
428 else:
429 new_header = REVLOG_DEFAULT_VERSION
429 new_header = REVLOG_DEFAULT_VERSION
430
430
431 if b'chunkcachesize' in opts:
431 if b'chunkcachesize' in opts:
432 self._chunkcachesize = opts[b'chunkcachesize']
432 self._chunkcachesize = opts[b'chunkcachesize']
433 if b'maxchainlen' in opts:
433 if b'maxchainlen' in opts:
434 self._maxchainlen = opts[b'maxchainlen']
434 self._maxchainlen = opts[b'maxchainlen']
435 if b'deltabothparents' in opts:
435 if b'deltabothparents' in opts:
436 self._deltabothparents = opts[b'deltabothparents']
436 self._deltabothparents = opts[b'deltabothparents']
437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
438 if dps_cgds:
438 if dps_cgds:
439 self._candidate_group_chunk_size = dps_cgds
439 self._candidate_group_chunk_size = dps_cgds
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 self._lazydeltabase = False
441 self._lazydeltabase = False
442 if self._lazydelta:
442 if self._lazydelta:
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 if b'debug-delta' in opts:
444 if b'debug-delta' in opts:
445 self._debug_delta = opts[b'debug-delta']
445 self._debug_delta = opts[b'debug-delta']
446 if b'compengine' in opts:
446 if b'compengine' in opts:
447 self._compengine = opts[b'compengine']
447 self._compengine = opts[b'compengine']
448 if b'zlib.level' in opts:
448 if b'zlib.level' in opts:
449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
450 if b'zstd.level' in opts:
450 if b'zstd.level' in opts:
451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
452 if b'maxdeltachainspan' in opts:
452 if b'maxdeltachainspan' in opts:
453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
455 mmapindexthreshold = opts[b'mmapindexthreshold']
455 mmapindexthreshold = opts[b'mmapindexthreshold']
456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
457 withsparseread = bool(opts.get(b'with-sparse-read', False))
457 withsparseread = bool(opts.get(b'with-sparse-read', False))
458 # sparse-revlog forces sparse-read
458 # sparse-revlog forces sparse-read
459 self._withsparseread = self._sparserevlog or withsparseread
459 self._withsparseread = self._sparserevlog or withsparseread
460 if b'sparse-read-density-threshold' in opts:
460 if b'sparse-read-density-threshold' in opts:
461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
462 if b'sparse-read-min-gap-size' in opts:
462 if b'sparse-read-min-gap-size' in opts:
463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
464 if opts.get(b'enableellipsis'):
464 if opts.get(b'enableellipsis'):
465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
466
466
467 # revlog v0 doesn't have flag processors
467 # revlog v0 doesn't have flag processors
468 for flag, processor in opts.get(b'flagprocessors', {}).items():
468 for flag, processor in opts.get(b'flagprocessors', {}).items():
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470
470
471 if self._chunkcachesize <= 0:
471 if self._chunkcachesize <= 0:
472 raise error.RevlogError(
472 raise error.RevlogError(
473 _(b'revlog chunk cache size %r is not greater than 0')
473 _(b'revlog chunk cache size %r is not greater than 0')
474 % self._chunkcachesize
474 % self._chunkcachesize
475 )
475 )
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 raise error.RevlogError(
477 raise error.RevlogError(
478 _(b'revlog chunk cache size %r is not a power of 2')
478 _(b'revlog chunk cache size %r is not a power of 2')
479 % self._chunkcachesize
479 % self._chunkcachesize
480 )
480 )
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 return new_header, mmapindexthreshold, force_nodemap
482 return new_header, mmapindexthreshold, force_nodemap
483
483
484 def _get_data(self, filepath, mmap_threshold, size=None):
484 def _get_data(self, filepath, mmap_threshold, size=None):
485 """return a file content with or without mmap
485 """return a file content with or without mmap
486
486
487 If the file is missing return the empty string"""
487 If the file is missing return the empty string"""
488 try:
488 try:
489 with self.opener(filepath) as fp:
489 with self.opener(filepath) as fp:
490 if mmap_threshold is not None:
490 if mmap_threshold is not None:
491 file_size = self.opener.fstat(fp).st_size
491 file_size = self.opener.fstat(fp).st_size
492 if file_size >= mmap_threshold:
492 if file_size >= mmap_threshold:
493 if size is not None:
493 if size is not None:
494 # avoid potentiel mmap crash
494 # avoid potentiel mmap crash
495 size = min(file_size, size)
495 size = min(file_size, size)
496 # TODO: should .close() to release resources without
496 # TODO: should .close() to release resources without
497 # relying on Python GC
497 # relying on Python GC
498 if size is None:
498 if size is None:
499 return util.buffer(util.mmapread(fp))
499 return util.buffer(util.mmapread(fp))
500 else:
500 else:
501 return util.buffer(util.mmapread(fp, size))
501 return util.buffer(util.mmapread(fp, size))
502 if size is None:
502 if size is None:
503 return fp.read()
503 return fp.read()
504 else:
504 else:
505 return fp.read(size)
505 return fp.read(size)
506 except FileNotFoundError:
506 except FileNotFoundError:
507 return b''
507 return b''
508
508
509 def _loadindex(self, docket=None):
509 def _loadindex(self, docket=None):
510
510
511 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
511 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
512
512
513 if self.postfix is not None:
513 if self.postfix is not None:
514 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
514 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
515 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
515 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
516 entry_point = b'%s.i.a' % self.radix
516 entry_point = b'%s.i.a' % self.radix
517 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
517 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
518 entry_point = b'%s.i.s' % self.radix
518 entry_point = b'%s.i.s' % self.radix
519 else:
519 else:
520 entry_point = b'%s.i' % self.radix
520 entry_point = b'%s.i' % self.radix
521
521
522 if docket is not None:
522 if docket is not None:
523 self._docket = docket
523 self._docket = docket
524 self._docket_file = entry_point
524 self._docket_file = entry_point
525 else:
525 else:
526 self._initempty = True
526 self._initempty = True
527 entry_data = self._get_data(entry_point, mmapindexthreshold)
527 entry_data = self._get_data(entry_point, mmapindexthreshold)
528 if len(entry_data) > 0:
528 if len(entry_data) > 0:
529 header = INDEX_HEADER.unpack(entry_data[:4])[0]
529 header = INDEX_HEADER.unpack(entry_data[:4])[0]
530 self._initempty = False
530 self._initempty = False
531 else:
531 else:
532 header = new_header
532 header = new_header
533
533
534 self._format_flags = header & ~0xFFFF
534 self._format_flags = header & ~0xFFFF
535 self._format_version = header & 0xFFFF
535 self._format_version = header & 0xFFFF
536
536
537 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
537 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
538 if supported_flags is None:
538 if supported_flags is None:
539 msg = _(b'unknown version (%d) in revlog %s')
539 msg = _(b'unknown version (%d) in revlog %s')
540 msg %= (self._format_version, self.display_id)
540 msg %= (self._format_version, self.display_id)
541 raise error.RevlogError(msg)
541 raise error.RevlogError(msg)
542 elif self._format_flags & ~supported_flags:
542 elif self._format_flags & ~supported_flags:
543 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
543 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
544 display_flag = self._format_flags >> 16
544 display_flag = self._format_flags >> 16
545 msg %= (display_flag, self._format_version, self.display_id)
545 msg %= (display_flag, self._format_version, self.display_id)
546 raise error.RevlogError(msg)
546 raise error.RevlogError(msg)
547
547
548 features = FEATURES_BY_VERSION[self._format_version]
548 features = FEATURES_BY_VERSION[self._format_version]
549 self._inline = features[b'inline'](self._format_flags)
549 self._inline = features[b'inline'](self._format_flags)
550 self._generaldelta = features[b'generaldelta'](self._format_flags)
550 self._generaldelta = features[b'generaldelta'](self._format_flags)
551 self.hassidedata = features[b'sidedata']
551 self.hassidedata = features[b'sidedata']
552
552
553 if not features[b'docket']:
553 if not features[b'docket']:
554 self._indexfile = entry_point
554 self._indexfile = entry_point
555 index_data = entry_data
555 index_data = entry_data
556 else:
556 else:
557 self._docket_file = entry_point
557 self._docket_file = entry_point
558 if self._initempty:
558 if self._initempty:
559 self._docket = docketutil.default_docket(self, header)
559 self._docket = docketutil.default_docket(self, header)
560 else:
560 else:
561 self._docket = docketutil.parse_docket(
561 self._docket = docketutil.parse_docket(
562 self, entry_data, use_pending=self._trypending
562 self, entry_data, use_pending=self._trypending
563 )
563 )
564
564
565 if self._docket is not None:
565 if self._docket is not None:
566 self._indexfile = self._docket.index_filepath()
566 self._indexfile = self._docket.index_filepath()
567 index_data = b''
567 index_data = b''
568 index_size = self._docket.index_end
568 index_size = self._docket.index_end
569 if index_size > 0:
569 if index_size > 0:
570 index_data = self._get_data(
570 index_data = self._get_data(
571 self._indexfile, mmapindexthreshold, size=index_size
571 self._indexfile, mmapindexthreshold, size=index_size
572 )
572 )
573 if len(index_data) < index_size:
573 if len(index_data) < index_size:
574 msg = _(b'too few index data for %s: got %d, expected %d')
574 msg = _(b'too few index data for %s: got %d, expected %d')
575 msg %= (self.display_id, len(index_data), index_size)
575 msg %= (self.display_id, len(index_data), index_size)
576 raise error.RevlogError(msg)
576 raise error.RevlogError(msg)
577
577
578 self._inline = False
578 self._inline = False
579 # generaldelta implied by version 2 revlogs.
579 # generaldelta implied by version 2 revlogs.
580 self._generaldelta = True
580 self._generaldelta = True
581 # the logic for persistent nodemap will be dealt with within the
581 # the logic for persistent nodemap will be dealt with within the
582 # main docket, so disable it for now.
582 # main docket, so disable it for now.
583 self._nodemap_file = None
583 self._nodemap_file = None
584
584
585 if self._docket is not None:
585 if self._docket is not None:
586 self._datafile = self._docket.data_filepath()
586 self._datafile = self._docket.data_filepath()
587 self._sidedatafile = self._docket.sidedata_filepath()
587 self._sidedatafile = self._docket.sidedata_filepath()
588 elif self.postfix is None:
588 elif self.postfix is None:
589 self._datafile = b'%s.d' % self.radix
589 self._datafile = b'%s.d' % self.radix
590 else:
590 else:
591 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
591 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
592
592
593 self.nodeconstants = sha1nodeconstants
593 self.nodeconstants = sha1nodeconstants
594 self.nullid = self.nodeconstants.nullid
594 self.nullid = self.nodeconstants.nullid
595
595
596 # sparse-revlog can't be on without general-delta (issue6056)
596 # sparse-revlog can't be on without general-delta (issue6056)
597 if not self._generaldelta:
597 if not self._generaldelta:
598 self._sparserevlog = False
598 self._sparserevlog = False
599
599
600 self._storedeltachains = True
600 self._storedeltachains = True
601
601
602 devel_nodemap = (
602 devel_nodemap = (
603 self._nodemap_file
603 self._nodemap_file
604 and force_nodemap
604 and force_nodemap
605 and parse_index_v1_nodemap is not None
605 and parse_index_v1_nodemap is not None
606 )
606 )
607
607
608 use_rust_index = False
608 use_rust_index = False
609 if rustrevlog is not None:
609 if rustrevlog is not None:
610 if self._nodemap_file is not None:
610 if self._nodemap_file is not None:
611 use_rust_index = True
611 use_rust_index = True
612 else:
612 else:
613 use_rust_index = self.opener.options.get(b'rust.index')
613 use_rust_index = self.opener.options.get(b'rust.index')
614
614
615 self._parse_index = parse_index_v1
615 self._parse_index = parse_index_v1
616 if self._format_version == REVLOGV0:
616 if self._format_version == REVLOGV0:
617 self._parse_index = revlogv0.parse_index_v0
617 self._parse_index = revlogv0.parse_index_v0
618 elif self._format_version == REVLOGV2:
618 elif self._format_version == REVLOGV2:
619 self._parse_index = parse_index_v2
619 self._parse_index = parse_index_v2
620 elif self._format_version == CHANGELOGV2:
620 elif self._format_version == CHANGELOGV2:
621 self._parse_index = parse_index_cl_v2
621 self._parse_index = parse_index_cl_v2
622 elif devel_nodemap:
622 elif devel_nodemap:
623 self._parse_index = parse_index_v1_nodemap
623 self._parse_index = parse_index_v1_nodemap
624 elif use_rust_index:
624 elif use_rust_index:
625 self._parse_index = parse_index_v1_mixed
625 self._parse_index = parse_index_v1_mixed
626 try:
626 try:
627 d = self._parse_index(index_data, self._inline)
627 d = self._parse_index(index_data, self._inline)
628 index, chunkcache = d
628 index, chunkcache = d
629 use_nodemap = (
629 use_nodemap = (
630 not self._inline
630 not self._inline
631 and self._nodemap_file is not None
631 and self._nodemap_file is not None
632 and util.safehasattr(index, 'update_nodemap_data')
632 and util.safehasattr(index, 'update_nodemap_data')
633 )
633 )
634 if use_nodemap:
634 if use_nodemap:
635 nodemap_data = nodemaputil.persisted_data(self)
635 nodemap_data = nodemaputil.persisted_data(self)
636 if nodemap_data is not None:
636 if nodemap_data is not None:
637 docket = nodemap_data[0]
637 docket = nodemap_data[0]
638 if (
638 if (
639 len(d[0]) > docket.tip_rev
639 len(d[0]) > docket.tip_rev
640 and d[0][docket.tip_rev][7] == docket.tip_node
640 and d[0][docket.tip_rev][7] == docket.tip_node
641 ):
641 ):
642 # no changelog tampering
642 # no changelog tampering
643 self._nodemap_docket = docket
643 self._nodemap_docket = docket
644 index.update_nodemap_data(*nodemap_data)
644 index.update_nodemap_data(*nodemap_data)
645 except (ValueError, IndexError):
645 except (ValueError, IndexError):
646 raise error.RevlogError(
646 raise error.RevlogError(
647 _(b"index %s is corrupted") % self.display_id
647 _(b"index %s is corrupted") % self.display_id
648 )
648 )
649 self.index = index
649 self.index = index
650 self._segmentfile = randomaccessfile.randomaccessfile(
650 self._segmentfile = randomaccessfile.randomaccessfile(
651 self.opener,
651 self.opener,
652 (self._indexfile if self._inline else self._datafile),
652 (self._indexfile if self._inline else self._datafile),
653 self._chunkcachesize,
653 self._chunkcachesize,
654 chunkcache,
654 chunkcache,
655 )
655 )
656 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
656 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
657 self.opener,
657 self.opener,
658 self._sidedatafile,
658 self._sidedatafile,
659 self._chunkcachesize,
659 self._chunkcachesize,
660 )
660 )
661 # revnum -> (chain-length, sum-delta-length)
661 # revnum -> (chain-length, sum-delta-length)
662 self._chaininfocache = util.lrucachedict(500)
662 self._chaininfocache = util.lrucachedict(500)
663 # revlog header -> revlog compressor
663 # revlog header -> revlog compressor
664 self._decompressors = {}
664 self._decompressors = {}
665
665
666 def get_revlog(self):
667 """simple function to mirror API of other not-really-revlog API"""
668 return self
669
666 @util.propertycache
670 @util.propertycache
667 def revlog_kind(self):
671 def revlog_kind(self):
668 return self.target[0]
672 return self.target[0]
669
673
670 @util.propertycache
674 @util.propertycache
671 def display_id(self):
675 def display_id(self):
672 """The public facing "ID" of the revlog that we use in message"""
676 """The public facing "ID" of the revlog that we use in message"""
673 if self.revlog_kind == KIND_FILELOG:
677 if self.revlog_kind == KIND_FILELOG:
674 # Reference the file without the "data/" prefix, so it is familiar
678 # Reference the file without the "data/" prefix, so it is familiar
675 # to the user.
679 # to the user.
676 return self.target[1]
680 return self.target[1]
677 else:
681 else:
678 return self.radix
682 return self.radix
679
683
680 def _get_decompressor(self, t):
684 def _get_decompressor(self, t):
681 try:
685 try:
682 compressor = self._decompressors[t]
686 compressor = self._decompressors[t]
683 except KeyError:
687 except KeyError:
684 try:
688 try:
685 engine = util.compengines.forrevlogheader(t)
689 engine = util.compengines.forrevlogheader(t)
686 compressor = engine.revlogcompressor(self._compengineopts)
690 compressor = engine.revlogcompressor(self._compengineopts)
687 self._decompressors[t] = compressor
691 self._decompressors[t] = compressor
688 except KeyError:
692 except KeyError:
689 raise error.RevlogError(
693 raise error.RevlogError(
690 _(b'unknown compression type %s') % binascii.hexlify(t)
694 _(b'unknown compression type %s') % binascii.hexlify(t)
691 )
695 )
692 return compressor
696 return compressor
693
697
694 @util.propertycache
698 @util.propertycache
695 def _compressor(self):
699 def _compressor(self):
696 engine = util.compengines[self._compengine]
700 engine = util.compengines[self._compengine]
697 return engine.revlogcompressor(self._compengineopts)
701 return engine.revlogcompressor(self._compengineopts)
698
702
699 @util.propertycache
703 @util.propertycache
700 def _decompressor(self):
704 def _decompressor(self):
701 """the default decompressor"""
705 """the default decompressor"""
702 if self._docket is None:
706 if self._docket is None:
703 return None
707 return None
704 t = self._docket.default_compression_header
708 t = self._docket.default_compression_header
705 c = self._get_decompressor(t)
709 c = self._get_decompressor(t)
706 return c.decompress
710 return c.decompress
707
711
708 def _indexfp(self):
712 def _indexfp(self):
709 """file object for the revlog's index file"""
713 """file object for the revlog's index file"""
710 return self.opener(self._indexfile, mode=b"r")
714 return self.opener(self._indexfile, mode=b"r")
711
715
712 def __index_write_fp(self):
716 def __index_write_fp(self):
713 # You should not use this directly and use `_writing` instead
717 # You should not use this directly and use `_writing` instead
714 try:
718 try:
715 f = self.opener(
719 f = self.opener(
716 self._indexfile, mode=b"r+", checkambig=self._checkambig
720 self._indexfile, mode=b"r+", checkambig=self._checkambig
717 )
721 )
718 if self._docket is None:
722 if self._docket is None:
719 f.seek(0, os.SEEK_END)
723 f.seek(0, os.SEEK_END)
720 else:
724 else:
721 f.seek(self._docket.index_end, os.SEEK_SET)
725 f.seek(self._docket.index_end, os.SEEK_SET)
722 return f
726 return f
723 except FileNotFoundError:
727 except FileNotFoundError:
724 return self.opener(
728 return self.opener(
725 self._indexfile, mode=b"w+", checkambig=self._checkambig
729 self._indexfile, mode=b"w+", checkambig=self._checkambig
726 )
730 )
727
731
728 def __index_new_fp(self):
732 def __index_new_fp(self):
729 # You should not use this unless you are upgrading from inline revlog
733 # You should not use this unless you are upgrading from inline revlog
730 return self.opener(
734 return self.opener(
731 self._indexfile,
735 self._indexfile,
732 mode=b"w",
736 mode=b"w",
733 checkambig=self._checkambig,
737 checkambig=self._checkambig,
734 atomictemp=True,
738 atomictemp=True,
735 )
739 )
736
740
737 def _datafp(self, mode=b'r'):
741 def _datafp(self, mode=b'r'):
738 """file object for the revlog's data file"""
742 """file object for the revlog's data file"""
739 return self.opener(self._datafile, mode=mode)
743 return self.opener(self._datafile, mode=mode)
740
744
741 @contextlib.contextmanager
745 @contextlib.contextmanager
742 def _sidedatareadfp(self):
746 def _sidedatareadfp(self):
743 """file object suitable to read sidedata"""
747 """file object suitable to read sidedata"""
744 if self._writinghandles:
748 if self._writinghandles:
745 yield self._writinghandles[2]
749 yield self._writinghandles[2]
746 else:
750 else:
747 with self.opener(self._sidedatafile) as fp:
751 with self.opener(self._sidedatafile) as fp:
748 yield fp
752 yield fp
749
753
750 def tiprev(self):
754 def tiprev(self):
751 return len(self.index) - 1
755 return len(self.index) - 1
752
756
753 def tip(self):
757 def tip(self):
754 return self.node(self.tiprev())
758 return self.node(self.tiprev())
755
759
756 def __contains__(self, rev):
760 def __contains__(self, rev):
757 return 0 <= rev < len(self)
761 return 0 <= rev < len(self)
758
762
759 def __len__(self):
763 def __len__(self):
760 return len(self.index)
764 return len(self.index)
761
765
762 def __iter__(self):
766 def __iter__(self):
763 return iter(range(len(self)))
767 return iter(range(len(self)))
764
768
765 def revs(self, start=0, stop=None):
769 def revs(self, start=0, stop=None):
766 """iterate over all rev in this revlog (from start to stop)"""
770 """iterate over all rev in this revlog (from start to stop)"""
767 return storageutil.iterrevs(len(self), start=start, stop=stop)
771 return storageutil.iterrevs(len(self), start=start, stop=stop)
768
772
769 def hasnode(self, node):
773 def hasnode(self, node):
770 try:
774 try:
771 self.rev(node)
775 self.rev(node)
772 return True
776 return True
773 except KeyError:
777 except KeyError:
774 return False
778 return False
775
779
776 def candelta(self, baserev, rev):
780 def candelta(self, baserev, rev):
777 """whether two revisions (baserev, rev) can be delta-ed or not"""
781 """whether two revisions (baserev, rev) can be delta-ed or not"""
778 # Disable delta if either rev requires a content-changing flag
782 # Disable delta if either rev requires a content-changing flag
779 # processor (ex. LFS). This is because such flag processor can alter
783 # processor (ex. LFS). This is because such flag processor can alter
780 # the rawtext content that the delta will be based on, and two clients
784 # the rawtext content that the delta will be based on, and two clients
781 # could have a same revlog node with different flags (i.e. different
785 # could have a same revlog node with different flags (i.e. different
782 # rawtext contents) and the delta could be incompatible.
786 # rawtext contents) and the delta could be incompatible.
783 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
787 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
784 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
788 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
785 ):
789 ):
786 return False
790 return False
787 return True
791 return True
788
792
789 def update_caches(self, transaction):
793 def update_caches(self, transaction):
790 if self._nodemap_file is not None:
794 if self._nodemap_file is not None:
791 if transaction is None:
795 if transaction is None:
792 nodemaputil.update_persistent_nodemap(self)
796 nodemaputil.update_persistent_nodemap(self)
793 else:
797 else:
794 nodemaputil.setup_persistent_nodemap(transaction, self)
798 nodemaputil.setup_persistent_nodemap(transaction, self)
795
799
796 def clearcaches(self):
800 def clearcaches(self):
797 self._revisioncache = None
801 self._revisioncache = None
798 self._chainbasecache.clear()
802 self._chainbasecache.clear()
799 self._segmentfile.clear_cache()
803 self._segmentfile.clear_cache()
800 self._segmentfile_sidedata.clear_cache()
804 self._segmentfile_sidedata.clear_cache()
801 self._pcache = {}
805 self._pcache = {}
802 self._nodemap_docket = None
806 self._nodemap_docket = None
803 self.index.clearcaches()
807 self.index.clearcaches()
804 # The python code is the one responsible for validating the docket, we
808 # The python code is the one responsible for validating the docket, we
805 # end up having to refresh it here.
809 # end up having to refresh it here.
806 use_nodemap = (
810 use_nodemap = (
807 not self._inline
811 not self._inline
808 and self._nodemap_file is not None
812 and self._nodemap_file is not None
809 and util.safehasattr(self.index, 'update_nodemap_data')
813 and util.safehasattr(self.index, 'update_nodemap_data')
810 )
814 )
811 if use_nodemap:
815 if use_nodemap:
812 nodemap_data = nodemaputil.persisted_data(self)
816 nodemap_data = nodemaputil.persisted_data(self)
813 if nodemap_data is not None:
817 if nodemap_data is not None:
814 self._nodemap_docket = nodemap_data[0]
818 self._nodemap_docket = nodemap_data[0]
815 self.index.update_nodemap_data(*nodemap_data)
819 self.index.update_nodemap_data(*nodemap_data)
816
820
817 def rev(self, node):
821 def rev(self, node):
818 try:
822 try:
819 return self.index.rev(node)
823 return self.index.rev(node)
820 except TypeError:
824 except TypeError:
821 raise
825 raise
822 except error.RevlogError:
826 except error.RevlogError:
823 # parsers.c radix tree lookup failed
827 # parsers.c radix tree lookup failed
824 if (
828 if (
825 node == self.nodeconstants.wdirid
829 node == self.nodeconstants.wdirid
826 or node in self.nodeconstants.wdirfilenodeids
830 or node in self.nodeconstants.wdirfilenodeids
827 ):
831 ):
828 raise error.WdirUnsupported
832 raise error.WdirUnsupported
829 raise error.LookupError(node, self.display_id, _(b'no node'))
833 raise error.LookupError(node, self.display_id, _(b'no node'))
830
834
831 # Accessors for index entries.
835 # Accessors for index entries.
832
836
833 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
837 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
834 # are flags.
838 # are flags.
835 def start(self, rev):
839 def start(self, rev):
836 return int(self.index[rev][0] >> 16)
840 return int(self.index[rev][0] >> 16)
837
841
838 def sidedata_cut_off(self, rev):
842 def sidedata_cut_off(self, rev):
839 sd_cut_off = self.index[rev][8]
843 sd_cut_off = self.index[rev][8]
840 if sd_cut_off != 0:
844 if sd_cut_off != 0:
841 return sd_cut_off
845 return sd_cut_off
842 # This is some annoying dance, because entries without sidedata
846 # This is some annoying dance, because entries without sidedata
843 # currently use 0 as their ofsset. (instead of previous-offset +
847 # currently use 0 as their ofsset. (instead of previous-offset +
844 # previous-size)
848 # previous-size)
845 #
849 #
846 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
850 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
847 # In the meantime, we need this.
851 # In the meantime, we need this.
848 while 0 <= rev:
852 while 0 <= rev:
849 e = self.index[rev]
853 e = self.index[rev]
850 if e[9] != 0:
854 if e[9] != 0:
851 return e[8] + e[9]
855 return e[8] + e[9]
852 rev -= 1
856 rev -= 1
853 return 0
857 return 0
854
858
855 def flags(self, rev):
859 def flags(self, rev):
856 return self.index[rev][0] & 0xFFFF
860 return self.index[rev][0] & 0xFFFF
857
861
858 def length(self, rev):
862 def length(self, rev):
859 return self.index[rev][1]
863 return self.index[rev][1]
860
864
861 def sidedata_length(self, rev):
865 def sidedata_length(self, rev):
862 if not self.hassidedata:
866 if not self.hassidedata:
863 return 0
867 return 0
864 return self.index[rev][9]
868 return self.index[rev][9]
865
869
866 def rawsize(self, rev):
870 def rawsize(self, rev):
867 """return the length of the uncompressed text for a given revision"""
871 """return the length of the uncompressed text for a given revision"""
868 l = self.index[rev][2]
872 l = self.index[rev][2]
869 if l >= 0:
873 if l >= 0:
870 return l
874 return l
871
875
872 t = self.rawdata(rev)
876 t = self.rawdata(rev)
873 return len(t)
877 return len(t)
874
878
875 def size(self, rev):
879 def size(self, rev):
876 """length of non-raw text (processed by a "read" flag processor)"""
880 """length of non-raw text (processed by a "read" flag processor)"""
877 # fast path: if no "read" flag processor could change the content,
881 # fast path: if no "read" flag processor could change the content,
878 # size is rawsize. note: ELLIPSIS is known to not change the content.
882 # size is rawsize. note: ELLIPSIS is known to not change the content.
879 flags = self.flags(rev)
883 flags = self.flags(rev)
880 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
884 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
881 return self.rawsize(rev)
885 return self.rawsize(rev)
882
886
883 return len(self.revision(rev))
887 return len(self.revision(rev))
884
888
885 def fast_rank(self, rev):
889 def fast_rank(self, rev):
886 """Return the rank of a revision if already known, or None otherwise.
890 """Return the rank of a revision if already known, or None otherwise.
887
891
888 The rank of a revision is the size of the sub-graph it defines as a
892 The rank of a revision is the size of the sub-graph it defines as a
889 head. Equivalently, the rank of a revision `r` is the size of the set
893 head. Equivalently, the rank of a revision `r` is the size of the set
890 `ancestors(r)`, `r` included.
894 `ancestors(r)`, `r` included.
891
895
892 This method returns the rank retrieved from the revlog in constant
896 This method returns the rank retrieved from the revlog in constant
893 time. It makes no attempt at computing unknown values for versions of
897 time. It makes no attempt at computing unknown values for versions of
894 the revlog which do not persist the rank.
898 the revlog which do not persist the rank.
895 """
899 """
896 rank = self.index[rev][ENTRY_RANK]
900 rank = self.index[rev][ENTRY_RANK]
897 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
901 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
898 return None
902 return None
899 if rev == nullrev:
903 if rev == nullrev:
900 return 0 # convention
904 return 0 # convention
901 return rank
905 return rank
902
906
903 def chainbase(self, rev):
907 def chainbase(self, rev):
904 base = self._chainbasecache.get(rev)
908 base = self._chainbasecache.get(rev)
905 if base is not None:
909 if base is not None:
906 return base
910 return base
907
911
908 index = self.index
912 index = self.index
909 iterrev = rev
913 iterrev = rev
910 base = index[iterrev][3]
914 base = index[iterrev][3]
911 while base != iterrev:
915 while base != iterrev:
912 iterrev = base
916 iterrev = base
913 base = index[iterrev][3]
917 base = index[iterrev][3]
914
918
915 self._chainbasecache[rev] = base
919 self._chainbasecache[rev] = base
916 return base
920 return base
917
921
918 def linkrev(self, rev):
922 def linkrev(self, rev):
919 return self.index[rev][4]
923 return self.index[rev][4]
920
924
921 def parentrevs(self, rev):
925 def parentrevs(self, rev):
922 try:
926 try:
923 entry = self.index[rev]
927 entry = self.index[rev]
924 except IndexError:
928 except IndexError:
925 if rev == wdirrev:
929 if rev == wdirrev:
926 raise error.WdirUnsupported
930 raise error.WdirUnsupported
927 raise
931 raise
928
932
929 if self.canonical_parent_order and entry[5] == nullrev:
933 if self.canonical_parent_order and entry[5] == nullrev:
930 return entry[6], entry[5]
934 return entry[6], entry[5]
931 else:
935 else:
932 return entry[5], entry[6]
936 return entry[5], entry[6]
933
937
934 # fast parentrevs(rev) where rev isn't filtered
938 # fast parentrevs(rev) where rev isn't filtered
935 _uncheckedparentrevs = parentrevs
939 _uncheckedparentrevs = parentrevs
936
940
937 def node(self, rev):
941 def node(self, rev):
938 try:
942 try:
939 return self.index[rev][7]
943 return self.index[rev][7]
940 except IndexError:
944 except IndexError:
941 if rev == wdirrev:
945 if rev == wdirrev:
942 raise error.WdirUnsupported
946 raise error.WdirUnsupported
943 raise
947 raise
944
948
945 # Derived from index values.
949 # Derived from index values.
946
950
947 def end(self, rev):
951 def end(self, rev):
948 return self.start(rev) + self.length(rev)
952 return self.start(rev) + self.length(rev)
949
953
950 def parents(self, node):
954 def parents(self, node):
951 i = self.index
955 i = self.index
952 d = i[self.rev(node)]
956 d = i[self.rev(node)]
953 # inline node() to avoid function call overhead
957 # inline node() to avoid function call overhead
954 if self.canonical_parent_order and d[5] == self.nullid:
958 if self.canonical_parent_order and d[5] == self.nullid:
955 return i[d[6]][7], i[d[5]][7]
959 return i[d[6]][7], i[d[5]][7]
956 else:
960 else:
957 return i[d[5]][7], i[d[6]][7]
961 return i[d[5]][7], i[d[6]][7]
958
962
959 def chainlen(self, rev):
963 def chainlen(self, rev):
960 return self._chaininfo(rev)[0]
964 return self._chaininfo(rev)[0]
961
965
962 def _chaininfo(self, rev):
966 def _chaininfo(self, rev):
963 chaininfocache = self._chaininfocache
967 chaininfocache = self._chaininfocache
964 if rev in chaininfocache:
968 if rev in chaininfocache:
965 return chaininfocache[rev]
969 return chaininfocache[rev]
966 index = self.index
970 index = self.index
967 generaldelta = self._generaldelta
971 generaldelta = self._generaldelta
968 iterrev = rev
972 iterrev = rev
969 e = index[iterrev]
973 e = index[iterrev]
970 clen = 0
974 clen = 0
971 compresseddeltalen = 0
975 compresseddeltalen = 0
972 while iterrev != e[3]:
976 while iterrev != e[3]:
973 clen += 1
977 clen += 1
974 compresseddeltalen += e[1]
978 compresseddeltalen += e[1]
975 if generaldelta:
979 if generaldelta:
976 iterrev = e[3]
980 iterrev = e[3]
977 else:
981 else:
978 iterrev -= 1
982 iterrev -= 1
979 if iterrev in chaininfocache:
983 if iterrev in chaininfocache:
980 t = chaininfocache[iterrev]
984 t = chaininfocache[iterrev]
981 clen += t[0]
985 clen += t[0]
982 compresseddeltalen += t[1]
986 compresseddeltalen += t[1]
983 break
987 break
984 e = index[iterrev]
988 e = index[iterrev]
985 else:
989 else:
986 # Add text length of base since decompressing that also takes
990 # Add text length of base since decompressing that also takes
987 # work. For cache hits the length is already included.
991 # work. For cache hits the length is already included.
988 compresseddeltalen += e[1]
992 compresseddeltalen += e[1]
989 r = (clen, compresseddeltalen)
993 r = (clen, compresseddeltalen)
990 chaininfocache[rev] = r
994 chaininfocache[rev] = r
991 return r
995 return r
992
996
993 def _deltachain(self, rev, stoprev=None):
997 def _deltachain(self, rev, stoprev=None):
994 """Obtain the delta chain for a revision.
998 """Obtain the delta chain for a revision.
995
999
996 ``stoprev`` specifies a revision to stop at. If not specified, we
1000 ``stoprev`` specifies a revision to stop at. If not specified, we
997 stop at the base of the chain.
1001 stop at the base of the chain.
998
1002
999 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1003 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1000 revs in ascending order and ``stopped`` is a bool indicating whether
1004 revs in ascending order and ``stopped`` is a bool indicating whether
1001 ``stoprev`` was hit.
1005 ``stoprev`` was hit.
1002 """
1006 """
1003 # Try C implementation.
1007 # Try C implementation.
1004 try:
1008 try:
1005 return self.index.deltachain(rev, stoprev, self._generaldelta)
1009 return self.index.deltachain(rev, stoprev, self._generaldelta)
1006 except AttributeError:
1010 except AttributeError:
1007 pass
1011 pass
1008
1012
1009 chain = []
1013 chain = []
1010
1014
1011 # Alias to prevent attribute lookup in tight loop.
1015 # Alias to prevent attribute lookup in tight loop.
1012 index = self.index
1016 index = self.index
1013 generaldelta = self._generaldelta
1017 generaldelta = self._generaldelta
1014
1018
1015 iterrev = rev
1019 iterrev = rev
1016 e = index[iterrev]
1020 e = index[iterrev]
1017 while iterrev != e[3] and iterrev != stoprev:
1021 while iterrev != e[3] and iterrev != stoprev:
1018 chain.append(iterrev)
1022 chain.append(iterrev)
1019 if generaldelta:
1023 if generaldelta:
1020 iterrev = e[3]
1024 iterrev = e[3]
1021 else:
1025 else:
1022 iterrev -= 1
1026 iterrev -= 1
1023 e = index[iterrev]
1027 e = index[iterrev]
1024
1028
1025 if iterrev == stoprev:
1029 if iterrev == stoprev:
1026 stopped = True
1030 stopped = True
1027 else:
1031 else:
1028 chain.append(iterrev)
1032 chain.append(iterrev)
1029 stopped = False
1033 stopped = False
1030
1034
1031 chain.reverse()
1035 chain.reverse()
1032 return chain, stopped
1036 return chain, stopped
1033
1037
1034 def ancestors(self, revs, stoprev=0, inclusive=False):
1038 def ancestors(self, revs, stoprev=0, inclusive=False):
1035 """Generate the ancestors of 'revs' in reverse revision order.
1039 """Generate the ancestors of 'revs' in reverse revision order.
1036 Does not generate revs lower than stoprev.
1040 Does not generate revs lower than stoprev.
1037
1041
1038 See the documentation for ancestor.lazyancestors for more details."""
1042 See the documentation for ancestor.lazyancestors for more details."""
1039
1043
1040 # first, make sure start revisions aren't filtered
1044 # first, make sure start revisions aren't filtered
1041 revs = list(revs)
1045 revs = list(revs)
1042 checkrev = self.node
1046 checkrev = self.node
1043 for r in revs:
1047 for r in revs:
1044 checkrev(r)
1048 checkrev(r)
1045 # and we're sure ancestors aren't filtered as well
1049 # and we're sure ancestors aren't filtered as well
1046
1050
1047 if rustancestor is not None and self.index.rust_ext_compat:
1051 if rustancestor is not None and self.index.rust_ext_compat:
1048 lazyancestors = rustancestor.LazyAncestors
1052 lazyancestors = rustancestor.LazyAncestors
1049 arg = self.index
1053 arg = self.index
1050 else:
1054 else:
1051 lazyancestors = ancestor.lazyancestors
1055 lazyancestors = ancestor.lazyancestors
1052 arg = self._uncheckedparentrevs
1056 arg = self._uncheckedparentrevs
1053 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1057 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1054
1058
1055 def descendants(self, revs):
1059 def descendants(self, revs):
1056 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1060 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1057
1061
1058 def findcommonmissing(self, common=None, heads=None):
1062 def findcommonmissing(self, common=None, heads=None):
1059 """Return a tuple of the ancestors of common and the ancestors of heads
1063 """Return a tuple of the ancestors of common and the ancestors of heads
1060 that are not ancestors of common. In revset terminology, we return the
1064 that are not ancestors of common. In revset terminology, we return the
1061 tuple:
1065 tuple:
1062
1066
1063 ::common, (::heads) - (::common)
1067 ::common, (::heads) - (::common)
1064
1068
1065 The list is sorted by revision number, meaning it is
1069 The list is sorted by revision number, meaning it is
1066 topologically sorted.
1070 topologically sorted.
1067
1071
1068 'heads' and 'common' are both lists of node IDs. If heads is
1072 'heads' and 'common' are both lists of node IDs. If heads is
1069 not supplied, uses all of the revlog's heads. If common is not
1073 not supplied, uses all of the revlog's heads. If common is not
1070 supplied, uses nullid."""
1074 supplied, uses nullid."""
1071 if common is None:
1075 if common is None:
1072 common = [self.nullid]
1076 common = [self.nullid]
1073 if heads is None:
1077 if heads is None:
1074 heads = self.heads()
1078 heads = self.heads()
1075
1079
1076 common = [self.rev(n) for n in common]
1080 common = [self.rev(n) for n in common]
1077 heads = [self.rev(n) for n in heads]
1081 heads = [self.rev(n) for n in heads]
1078
1082
1079 # we want the ancestors, but inclusive
1083 # we want the ancestors, but inclusive
1080 class lazyset:
1084 class lazyset:
1081 def __init__(self, lazyvalues):
1085 def __init__(self, lazyvalues):
1082 self.addedvalues = set()
1086 self.addedvalues = set()
1083 self.lazyvalues = lazyvalues
1087 self.lazyvalues = lazyvalues
1084
1088
1085 def __contains__(self, value):
1089 def __contains__(self, value):
1086 return value in self.addedvalues or value in self.lazyvalues
1090 return value in self.addedvalues or value in self.lazyvalues
1087
1091
1088 def __iter__(self):
1092 def __iter__(self):
1089 added = self.addedvalues
1093 added = self.addedvalues
1090 for r in added:
1094 for r in added:
1091 yield r
1095 yield r
1092 for r in self.lazyvalues:
1096 for r in self.lazyvalues:
1093 if not r in added:
1097 if not r in added:
1094 yield r
1098 yield r
1095
1099
1096 def add(self, value):
1100 def add(self, value):
1097 self.addedvalues.add(value)
1101 self.addedvalues.add(value)
1098
1102
1099 def update(self, values):
1103 def update(self, values):
1100 self.addedvalues.update(values)
1104 self.addedvalues.update(values)
1101
1105
1102 has = lazyset(self.ancestors(common))
1106 has = lazyset(self.ancestors(common))
1103 has.add(nullrev)
1107 has.add(nullrev)
1104 has.update(common)
1108 has.update(common)
1105
1109
1106 # take all ancestors from heads that aren't in has
1110 # take all ancestors from heads that aren't in has
1107 missing = set()
1111 missing = set()
1108 visit = collections.deque(r for r in heads if r not in has)
1112 visit = collections.deque(r for r in heads if r not in has)
1109 while visit:
1113 while visit:
1110 r = visit.popleft()
1114 r = visit.popleft()
1111 if r in missing:
1115 if r in missing:
1112 continue
1116 continue
1113 else:
1117 else:
1114 missing.add(r)
1118 missing.add(r)
1115 for p in self.parentrevs(r):
1119 for p in self.parentrevs(r):
1116 if p not in has:
1120 if p not in has:
1117 visit.append(p)
1121 visit.append(p)
1118 missing = list(missing)
1122 missing = list(missing)
1119 missing.sort()
1123 missing.sort()
1120 return has, [self.node(miss) for miss in missing]
1124 return has, [self.node(miss) for miss in missing]
1121
1125
1122 def incrementalmissingrevs(self, common=None):
1126 def incrementalmissingrevs(self, common=None):
1123 """Return an object that can be used to incrementally compute the
1127 """Return an object that can be used to incrementally compute the
1124 revision numbers of the ancestors of arbitrary sets that are not
1128 revision numbers of the ancestors of arbitrary sets that are not
1125 ancestors of common. This is an ancestor.incrementalmissingancestors
1129 ancestors of common. This is an ancestor.incrementalmissingancestors
1126 object.
1130 object.
1127
1131
1128 'common' is a list of revision numbers. If common is not supplied, uses
1132 'common' is a list of revision numbers. If common is not supplied, uses
1129 nullrev.
1133 nullrev.
1130 """
1134 """
1131 if common is None:
1135 if common is None:
1132 common = [nullrev]
1136 common = [nullrev]
1133
1137
1134 if rustancestor is not None and self.index.rust_ext_compat:
1138 if rustancestor is not None and self.index.rust_ext_compat:
1135 return rustancestor.MissingAncestors(self.index, common)
1139 return rustancestor.MissingAncestors(self.index, common)
1136 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1140 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1137
1141
1138 def findmissingrevs(self, common=None, heads=None):
1142 def findmissingrevs(self, common=None, heads=None):
1139 """Return the revision numbers of the ancestors of heads that
1143 """Return the revision numbers of the ancestors of heads that
1140 are not ancestors of common.
1144 are not ancestors of common.
1141
1145
1142 More specifically, return a list of revision numbers corresponding to
1146 More specifically, return a list of revision numbers corresponding to
1143 nodes N such that every N satisfies the following constraints:
1147 nodes N such that every N satisfies the following constraints:
1144
1148
1145 1. N is an ancestor of some node in 'heads'
1149 1. N is an ancestor of some node in 'heads'
1146 2. N is not an ancestor of any node in 'common'
1150 2. N is not an ancestor of any node in 'common'
1147
1151
1148 The list is sorted by revision number, meaning it is
1152 The list is sorted by revision number, meaning it is
1149 topologically sorted.
1153 topologically sorted.
1150
1154
1151 'heads' and 'common' are both lists of revision numbers. If heads is
1155 'heads' and 'common' are both lists of revision numbers. If heads is
1152 not supplied, uses all of the revlog's heads. If common is not
1156 not supplied, uses all of the revlog's heads. If common is not
1153 supplied, uses nullid."""
1157 supplied, uses nullid."""
1154 if common is None:
1158 if common is None:
1155 common = [nullrev]
1159 common = [nullrev]
1156 if heads is None:
1160 if heads is None:
1157 heads = self.headrevs()
1161 heads = self.headrevs()
1158
1162
1159 inc = self.incrementalmissingrevs(common=common)
1163 inc = self.incrementalmissingrevs(common=common)
1160 return inc.missingancestors(heads)
1164 return inc.missingancestors(heads)
1161
1165
1162 def findmissing(self, common=None, heads=None):
1166 def findmissing(self, common=None, heads=None):
1163 """Return the ancestors of heads that are not ancestors of common.
1167 """Return the ancestors of heads that are not ancestors of common.
1164
1168
1165 More specifically, return a list of nodes N such that every N
1169 More specifically, return a list of nodes N such that every N
1166 satisfies the following constraints:
1170 satisfies the following constraints:
1167
1171
1168 1. N is an ancestor of some node in 'heads'
1172 1. N is an ancestor of some node in 'heads'
1169 2. N is not an ancestor of any node in 'common'
1173 2. N is not an ancestor of any node in 'common'
1170
1174
1171 The list is sorted by revision number, meaning it is
1175 The list is sorted by revision number, meaning it is
1172 topologically sorted.
1176 topologically sorted.
1173
1177
1174 'heads' and 'common' are both lists of node IDs. If heads is
1178 'heads' and 'common' are both lists of node IDs. If heads is
1175 not supplied, uses all of the revlog's heads. If common is not
1179 not supplied, uses all of the revlog's heads. If common is not
1176 supplied, uses nullid."""
1180 supplied, uses nullid."""
1177 if common is None:
1181 if common is None:
1178 common = [self.nullid]
1182 common = [self.nullid]
1179 if heads is None:
1183 if heads is None:
1180 heads = self.heads()
1184 heads = self.heads()
1181
1185
1182 common = [self.rev(n) for n in common]
1186 common = [self.rev(n) for n in common]
1183 heads = [self.rev(n) for n in heads]
1187 heads = [self.rev(n) for n in heads]
1184
1188
1185 inc = self.incrementalmissingrevs(common=common)
1189 inc = self.incrementalmissingrevs(common=common)
1186 return [self.node(r) for r in inc.missingancestors(heads)]
1190 return [self.node(r) for r in inc.missingancestors(heads)]
1187
1191
1188 def nodesbetween(self, roots=None, heads=None):
1192 def nodesbetween(self, roots=None, heads=None):
1189 """Return a topological path from 'roots' to 'heads'.
1193 """Return a topological path from 'roots' to 'heads'.
1190
1194
1191 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1195 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1192 topologically sorted list of all nodes N that satisfy both of
1196 topologically sorted list of all nodes N that satisfy both of
1193 these constraints:
1197 these constraints:
1194
1198
1195 1. N is a descendant of some node in 'roots'
1199 1. N is a descendant of some node in 'roots'
1196 2. N is an ancestor of some node in 'heads'
1200 2. N is an ancestor of some node in 'heads'
1197
1201
1198 Every node is considered to be both a descendant and an ancestor
1202 Every node is considered to be both a descendant and an ancestor
1199 of itself, so every reachable node in 'roots' and 'heads' will be
1203 of itself, so every reachable node in 'roots' and 'heads' will be
1200 included in 'nodes'.
1204 included in 'nodes'.
1201
1205
1202 'outroots' is the list of reachable nodes in 'roots', i.e., the
1206 'outroots' is the list of reachable nodes in 'roots', i.e., the
1203 subset of 'roots' that is returned in 'nodes'. Likewise,
1207 subset of 'roots' that is returned in 'nodes'. Likewise,
1204 'outheads' is the subset of 'heads' that is also in 'nodes'.
1208 'outheads' is the subset of 'heads' that is also in 'nodes'.
1205
1209
1206 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1210 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1207 unspecified, uses nullid as the only root. If 'heads' is
1211 unspecified, uses nullid as the only root. If 'heads' is
1208 unspecified, uses list of all of the revlog's heads."""
1212 unspecified, uses list of all of the revlog's heads."""
1209 nonodes = ([], [], [])
1213 nonodes = ([], [], [])
1210 if roots is not None:
1214 if roots is not None:
1211 roots = list(roots)
1215 roots = list(roots)
1212 if not roots:
1216 if not roots:
1213 return nonodes
1217 return nonodes
1214 lowestrev = min([self.rev(n) for n in roots])
1218 lowestrev = min([self.rev(n) for n in roots])
1215 else:
1219 else:
1216 roots = [self.nullid] # Everybody's a descendant of nullid
1220 roots = [self.nullid] # Everybody's a descendant of nullid
1217 lowestrev = nullrev
1221 lowestrev = nullrev
1218 if (lowestrev == nullrev) and (heads is None):
1222 if (lowestrev == nullrev) and (heads is None):
1219 # We want _all_ the nodes!
1223 # We want _all_ the nodes!
1220 return (
1224 return (
1221 [self.node(r) for r in self],
1225 [self.node(r) for r in self],
1222 [self.nullid],
1226 [self.nullid],
1223 list(self.heads()),
1227 list(self.heads()),
1224 )
1228 )
1225 if heads is None:
1229 if heads is None:
1226 # All nodes are ancestors, so the latest ancestor is the last
1230 # All nodes are ancestors, so the latest ancestor is the last
1227 # node.
1231 # node.
1228 highestrev = len(self) - 1
1232 highestrev = len(self) - 1
1229 # Set ancestors to None to signal that every node is an ancestor.
1233 # Set ancestors to None to signal that every node is an ancestor.
1230 ancestors = None
1234 ancestors = None
1231 # Set heads to an empty dictionary for later discovery of heads
1235 # Set heads to an empty dictionary for later discovery of heads
1232 heads = {}
1236 heads = {}
1233 else:
1237 else:
1234 heads = list(heads)
1238 heads = list(heads)
1235 if not heads:
1239 if not heads:
1236 return nonodes
1240 return nonodes
1237 ancestors = set()
1241 ancestors = set()
1238 # Turn heads into a dictionary so we can remove 'fake' heads.
1242 # Turn heads into a dictionary so we can remove 'fake' heads.
1239 # Also, later we will be using it to filter out the heads we can't
1243 # Also, later we will be using it to filter out the heads we can't
1240 # find from roots.
1244 # find from roots.
1241 heads = dict.fromkeys(heads, False)
1245 heads = dict.fromkeys(heads, False)
1242 # Start at the top and keep marking parents until we're done.
1246 # Start at the top and keep marking parents until we're done.
1243 nodestotag = set(heads)
1247 nodestotag = set(heads)
1244 # Remember where the top was so we can use it as a limit later.
1248 # Remember where the top was so we can use it as a limit later.
1245 highestrev = max([self.rev(n) for n in nodestotag])
1249 highestrev = max([self.rev(n) for n in nodestotag])
1246 while nodestotag:
1250 while nodestotag:
1247 # grab a node to tag
1251 # grab a node to tag
1248 n = nodestotag.pop()
1252 n = nodestotag.pop()
1249 # Never tag nullid
1253 # Never tag nullid
1250 if n == self.nullid:
1254 if n == self.nullid:
1251 continue
1255 continue
1252 # A node's revision number represents its place in a
1256 # A node's revision number represents its place in a
1253 # topologically sorted list of nodes.
1257 # topologically sorted list of nodes.
1254 r = self.rev(n)
1258 r = self.rev(n)
1255 if r >= lowestrev:
1259 if r >= lowestrev:
1256 if n not in ancestors:
1260 if n not in ancestors:
1257 # If we are possibly a descendant of one of the roots
1261 # If we are possibly a descendant of one of the roots
1258 # and we haven't already been marked as an ancestor
1262 # and we haven't already been marked as an ancestor
1259 ancestors.add(n) # Mark as ancestor
1263 ancestors.add(n) # Mark as ancestor
1260 # Add non-nullid parents to list of nodes to tag.
1264 # Add non-nullid parents to list of nodes to tag.
1261 nodestotag.update(
1265 nodestotag.update(
1262 [p for p in self.parents(n) if p != self.nullid]
1266 [p for p in self.parents(n) if p != self.nullid]
1263 )
1267 )
1264 elif n in heads: # We've seen it before, is it a fake head?
1268 elif n in heads: # We've seen it before, is it a fake head?
1265 # So it is, real heads should not be the ancestors of
1269 # So it is, real heads should not be the ancestors of
1266 # any other heads.
1270 # any other heads.
1267 heads.pop(n)
1271 heads.pop(n)
1268 if not ancestors:
1272 if not ancestors:
1269 return nonodes
1273 return nonodes
1270 # Now that we have our set of ancestors, we want to remove any
1274 # Now that we have our set of ancestors, we want to remove any
1271 # roots that are not ancestors.
1275 # roots that are not ancestors.
1272
1276
1273 # If one of the roots was nullid, everything is included anyway.
1277 # If one of the roots was nullid, everything is included anyway.
1274 if lowestrev > nullrev:
1278 if lowestrev > nullrev:
1275 # But, since we weren't, let's recompute the lowest rev to not
1279 # But, since we weren't, let's recompute the lowest rev to not
1276 # include roots that aren't ancestors.
1280 # include roots that aren't ancestors.
1277
1281
1278 # Filter out roots that aren't ancestors of heads
1282 # Filter out roots that aren't ancestors of heads
1279 roots = [root for root in roots if root in ancestors]
1283 roots = [root for root in roots if root in ancestors]
1280 # Recompute the lowest revision
1284 # Recompute the lowest revision
1281 if roots:
1285 if roots:
1282 lowestrev = min([self.rev(root) for root in roots])
1286 lowestrev = min([self.rev(root) for root in roots])
1283 else:
1287 else:
1284 # No more roots? Return empty list
1288 # No more roots? Return empty list
1285 return nonodes
1289 return nonodes
1286 else:
1290 else:
1287 # We are descending from nullid, and don't need to care about
1291 # We are descending from nullid, and don't need to care about
1288 # any other roots.
1292 # any other roots.
1289 lowestrev = nullrev
1293 lowestrev = nullrev
1290 roots = [self.nullid]
1294 roots = [self.nullid]
1291 # Transform our roots list into a set.
1295 # Transform our roots list into a set.
1292 descendants = set(roots)
1296 descendants = set(roots)
1293 # Also, keep the original roots so we can filter out roots that aren't
1297 # Also, keep the original roots so we can filter out roots that aren't
1294 # 'real' roots (i.e. are descended from other roots).
1298 # 'real' roots (i.e. are descended from other roots).
1295 roots = descendants.copy()
1299 roots = descendants.copy()
1296 # Our topologically sorted list of output nodes.
1300 # Our topologically sorted list of output nodes.
1297 orderedout = []
1301 orderedout = []
1298 # Don't start at nullid since we don't want nullid in our output list,
1302 # Don't start at nullid since we don't want nullid in our output list,
1299 # and if nullid shows up in descendants, empty parents will look like
1303 # and if nullid shows up in descendants, empty parents will look like
1300 # they're descendants.
1304 # they're descendants.
1301 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1305 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1302 n = self.node(r)
1306 n = self.node(r)
1303 isdescendant = False
1307 isdescendant = False
1304 if lowestrev == nullrev: # Everybody is a descendant of nullid
1308 if lowestrev == nullrev: # Everybody is a descendant of nullid
1305 isdescendant = True
1309 isdescendant = True
1306 elif n in descendants:
1310 elif n in descendants:
1307 # n is already a descendant
1311 # n is already a descendant
1308 isdescendant = True
1312 isdescendant = True
1309 # This check only needs to be done here because all the roots
1313 # This check only needs to be done here because all the roots
1310 # will start being marked is descendants before the loop.
1314 # will start being marked is descendants before the loop.
1311 if n in roots:
1315 if n in roots:
1312 # If n was a root, check if it's a 'real' root.
1316 # If n was a root, check if it's a 'real' root.
1313 p = tuple(self.parents(n))
1317 p = tuple(self.parents(n))
1314 # If any of its parents are descendants, it's not a root.
1318 # If any of its parents are descendants, it's not a root.
1315 if (p[0] in descendants) or (p[1] in descendants):
1319 if (p[0] in descendants) or (p[1] in descendants):
1316 roots.remove(n)
1320 roots.remove(n)
1317 else:
1321 else:
1318 p = tuple(self.parents(n))
1322 p = tuple(self.parents(n))
1319 # A node is a descendant if either of its parents are
1323 # A node is a descendant if either of its parents are
1320 # descendants. (We seeded the dependents list with the roots
1324 # descendants. (We seeded the dependents list with the roots
1321 # up there, remember?)
1325 # up there, remember?)
1322 if (p[0] in descendants) or (p[1] in descendants):
1326 if (p[0] in descendants) or (p[1] in descendants):
1323 descendants.add(n)
1327 descendants.add(n)
1324 isdescendant = True
1328 isdescendant = True
1325 if isdescendant and ((ancestors is None) or (n in ancestors)):
1329 if isdescendant and ((ancestors is None) or (n in ancestors)):
1326 # Only include nodes that are both descendants and ancestors.
1330 # Only include nodes that are both descendants and ancestors.
1327 orderedout.append(n)
1331 orderedout.append(n)
1328 if (ancestors is not None) and (n in heads):
1332 if (ancestors is not None) and (n in heads):
1329 # We're trying to figure out which heads are reachable
1333 # We're trying to figure out which heads are reachable
1330 # from roots.
1334 # from roots.
1331 # Mark this head as having been reached
1335 # Mark this head as having been reached
1332 heads[n] = True
1336 heads[n] = True
1333 elif ancestors is None:
1337 elif ancestors is None:
1334 # Otherwise, we're trying to discover the heads.
1338 # Otherwise, we're trying to discover the heads.
1335 # Assume this is a head because if it isn't, the next step
1339 # Assume this is a head because if it isn't, the next step
1336 # will eventually remove it.
1340 # will eventually remove it.
1337 heads[n] = True
1341 heads[n] = True
1338 # But, obviously its parents aren't.
1342 # But, obviously its parents aren't.
1339 for p in self.parents(n):
1343 for p in self.parents(n):
1340 heads.pop(p, None)
1344 heads.pop(p, None)
1341 heads = [head for head, flag in heads.items() if flag]
1345 heads = [head for head, flag in heads.items() if flag]
1342 roots = list(roots)
1346 roots = list(roots)
1343 assert orderedout
1347 assert orderedout
1344 assert roots
1348 assert roots
1345 assert heads
1349 assert heads
1346 return (orderedout, roots, heads)
1350 return (orderedout, roots, heads)
1347
1351
1348 def headrevs(self, revs=None):
1352 def headrevs(self, revs=None):
1349 if revs is None:
1353 if revs is None:
1350 try:
1354 try:
1351 return self.index.headrevs()
1355 return self.index.headrevs()
1352 except AttributeError:
1356 except AttributeError:
1353 return self._headrevs()
1357 return self._headrevs()
1354 if rustdagop is not None and self.index.rust_ext_compat:
1358 if rustdagop is not None and self.index.rust_ext_compat:
1355 return rustdagop.headrevs(self.index, revs)
1359 return rustdagop.headrevs(self.index, revs)
1356 return dagop.headrevs(revs, self._uncheckedparentrevs)
1360 return dagop.headrevs(revs, self._uncheckedparentrevs)
1357
1361
1358 def computephases(self, roots):
1362 def computephases(self, roots):
1359 return self.index.computephasesmapsets(roots)
1363 return self.index.computephasesmapsets(roots)
1360
1364
1361 def _headrevs(self):
1365 def _headrevs(self):
1362 count = len(self)
1366 count = len(self)
1363 if not count:
1367 if not count:
1364 return [nullrev]
1368 return [nullrev]
1365 # we won't iter over filtered rev so nobody is a head at start
1369 # we won't iter over filtered rev so nobody is a head at start
1366 ishead = [0] * (count + 1)
1370 ishead = [0] * (count + 1)
1367 index = self.index
1371 index = self.index
1368 for r in self:
1372 for r in self:
1369 ishead[r] = 1 # I may be an head
1373 ishead[r] = 1 # I may be an head
1370 e = index[r]
1374 e = index[r]
1371 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1375 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1372 return [r for r, val in enumerate(ishead) if val]
1376 return [r for r, val in enumerate(ishead) if val]
1373
1377
1374 def heads(self, start=None, stop=None):
1378 def heads(self, start=None, stop=None):
1375 """return the list of all nodes that have no children
1379 """return the list of all nodes that have no children
1376
1380
1377 if start is specified, only heads that are descendants of
1381 if start is specified, only heads that are descendants of
1378 start will be returned
1382 start will be returned
1379 if stop is specified, it will consider all the revs from stop
1383 if stop is specified, it will consider all the revs from stop
1380 as if they had no children
1384 as if they had no children
1381 """
1385 """
1382 if start is None and stop is None:
1386 if start is None and stop is None:
1383 if not len(self):
1387 if not len(self):
1384 return [self.nullid]
1388 return [self.nullid]
1385 return [self.node(r) for r in self.headrevs()]
1389 return [self.node(r) for r in self.headrevs()]
1386
1390
1387 if start is None:
1391 if start is None:
1388 start = nullrev
1392 start = nullrev
1389 else:
1393 else:
1390 start = self.rev(start)
1394 start = self.rev(start)
1391
1395
1392 stoprevs = {self.rev(n) for n in stop or []}
1396 stoprevs = {self.rev(n) for n in stop or []}
1393
1397
1394 revs = dagop.headrevssubset(
1398 revs = dagop.headrevssubset(
1395 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1399 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1396 )
1400 )
1397
1401
1398 return [self.node(rev) for rev in revs]
1402 return [self.node(rev) for rev in revs]
1399
1403
1400 def children(self, node):
1404 def children(self, node):
1401 """find the children of a given node"""
1405 """find the children of a given node"""
1402 c = []
1406 c = []
1403 p = self.rev(node)
1407 p = self.rev(node)
1404 for r in self.revs(start=p + 1):
1408 for r in self.revs(start=p + 1):
1405 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1409 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1406 if prevs:
1410 if prevs:
1407 for pr in prevs:
1411 for pr in prevs:
1408 if pr == p:
1412 if pr == p:
1409 c.append(self.node(r))
1413 c.append(self.node(r))
1410 elif p == nullrev:
1414 elif p == nullrev:
1411 c.append(self.node(r))
1415 c.append(self.node(r))
1412 return c
1416 return c
1413
1417
1414 def commonancestorsheads(self, a, b):
1418 def commonancestorsheads(self, a, b):
1415 """calculate all the heads of the common ancestors of nodes a and b"""
1419 """calculate all the heads of the common ancestors of nodes a and b"""
1416 a, b = self.rev(a), self.rev(b)
1420 a, b = self.rev(a), self.rev(b)
1417 ancs = self._commonancestorsheads(a, b)
1421 ancs = self._commonancestorsheads(a, b)
1418 return pycompat.maplist(self.node, ancs)
1422 return pycompat.maplist(self.node, ancs)
1419
1423
1420 def _commonancestorsheads(self, *revs):
1424 def _commonancestorsheads(self, *revs):
1421 """calculate all the heads of the common ancestors of revs"""
1425 """calculate all the heads of the common ancestors of revs"""
1422 try:
1426 try:
1423 ancs = self.index.commonancestorsheads(*revs)
1427 ancs = self.index.commonancestorsheads(*revs)
1424 except (AttributeError, OverflowError): # C implementation failed
1428 except (AttributeError, OverflowError): # C implementation failed
1425 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1429 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1426 return ancs
1430 return ancs
1427
1431
1428 def isancestor(self, a, b):
1432 def isancestor(self, a, b):
1429 """return True if node a is an ancestor of node b
1433 """return True if node a is an ancestor of node b
1430
1434
1431 A revision is considered an ancestor of itself."""
1435 A revision is considered an ancestor of itself."""
1432 a, b = self.rev(a), self.rev(b)
1436 a, b = self.rev(a), self.rev(b)
1433 return self.isancestorrev(a, b)
1437 return self.isancestorrev(a, b)
1434
1438
1435 def isancestorrev(self, a, b):
1439 def isancestorrev(self, a, b):
1436 """return True if revision a is an ancestor of revision b
1440 """return True if revision a is an ancestor of revision b
1437
1441
1438 A revision is considered an ancestor of itself.
1442 A revision is considered an ancestor of itself.
1439
1443
1440 The implementation of this is trivial but the use of
1444 The implementation of this is trivial but the use of
1441 reachableroots is not."""
1445 reachableroots is not."""
1442 if a == nullrev:
1446 if a == nullrev:
1443 return True
1447 return True
1444 elif a == b:
1448 elif a == b:
1445 return True
1449 return True
1446 elif a > b:
1450 elif a > b:
1447 return False
1451 return False
1448 return bool(self.reachableroots(a, [b], [a], includepath=False))
1452 return bool(self.reachableroots(a, [b], [a], includepath=False))
1449
1453
1450 def reachableroots(self, minroot, heads, roots, includepath=False):
1454 def reachableroots(self, minroot, heads, roots, includepath=False):
1451 """return (heads(::(<roots> and <roots>::<heads>)))
1455 """return (heads(::(<roots> and <roots>::<heads>)))
1452
1456
1453 If includepath is True, return (<roots>::<heads>)."""
1457 If includepath is True, return (<roots>::<heads>)."""
1454 try:
1458 try:
1455 return self.index.reachableroots2(
1459 return self.index.reachableroots2(
1456 minroot, heads, roots, includepath
1460 minroot, heads, roots, includepath
1457 )
1461 )
1458 except AttributeError:
1462 except AttributeError:
1459 return dagop._reachablerootspure(
1463 return dagop._reachablerootspure(
1460 self.parentrevs, minroot, roots, heads, includepath
1464 self.parentrevs, minroot, roots, heads, includepath
1461 )
1465 )
1462
1466
1463 def ancestor(self, a, b):
1467 def ancestor(self, a, b):
1464 """calculate the "best" common ancestor of nodes a and b"""
1468 """calculate the "best" common ancestor of nodes a and b"""
1465
1469
1466 a, b = self.rev(a), self.rev(b)
1470 a, b = self.rev(a), self.rev(b)
1467 try:
1471 try:
1468 ancs = self.index.ancestors(a, b)
1472 ancs = self.index.ancestors(a, b)
1469 except (AttributeError, OverflowError):
1473 except (AttributeError, OverflowError):
1470 ancs = ancestor.ancestors(self.parentrevs, a, b)
1474 ancs = ancestor.ancestors(self.parentrevs, a, b)
1471 if ancs:
1475 if ancs:
1472 # choose a consistent winner when there's a tie
1476 # choose a consistent winner when there's a tie
1473 return min(map(self.node, ancs))
1477 return min(map(self.node, ancs))
1474 return self.nullid
1478 return self.nullid
1475
1479
1476 def _match(self, id):
1480 def _match(self, id):
1477 if isinstance(id, int):
1481 if isinstance(id, int):
1478 # rev
1482 # rev
1479 return self.node(id)
1483 return self.node(id)
1480 if len(id) == self.nodeconstants.nodelen:
1484 if len(id) == self.nodeconstants.nodelen:
1481 # possibly a binary node
1485 # possibly a binary node
1482 # odds of a binary node being all hex in ASCII are 1 in 10**25
1486 # odds of a binary node being all hex in ASCII are 1 in 10**25
1483 try:
1487 try:
1484 node = id
1488 node = id
1485 self.rev(node) # quick search the index
1489 self.rev(node) # quick search the index
1486 return node
1490 return node
1487 except error.LookupError:
1491 except error.LookupError:
1488 pass # may be partial hex id
1492 pass # may be partial hex id
1489 try:
1493 try:
1490 # str(rev)
1494 # str(rev)
1491 rev = int(id)
1495 rev = int(id)
1492 if b"%d" % rev != id:
1496 if b"%d" % rev != id:
1493 raise ValueError
1497 raise ValueError
1494 if rev < 0:
1498 if rev < 0:
1495 rev = len(self) + rev
1499 rev = len(self) + rev
1496 if rev < 0 or rev >= len(self):
1500 if rev < 0 or rev >= len(self):
1497 raise ValueError
1501 raise ValueError
1498 return self.node(rev)
1502 return self.node(rev)
1499 except (ValueError, OverflowError):
1503 except (ValueError, OverflowError):
1500 pass
1504 pass
1501 if len(id) == 2 * self.nodeconstants.nodelen:
1505 if len(id) == 2 * self.nodeconstants.nodelen:
1502 try:
1506 try:
1503 # a full hex nodeid?
1507 # a full hex nodeid?
1504 node = bin(id)
1508 node = bin(id)
1505 self.rev(node)
1509 self.rev(node)
1506 return node
1510 return node
1507 except (binascii.Error, error.LookupError):
1511 except (binascii.Error, error.LookupError):
1508 pass
1512 pass
1509
1513
1510 def _partialmatch(self, id):
1514 def _partialmatch(self, id):
1511 # we don't care wdirfilenodeids as they should be always full hash
1515 # we don't care wdirfilenodeids as they should be always full hash
1512 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1516 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1513 ambiguous = False
1517 ambiguous = False
1514 try:
1518 try:
1515 partial = self.index.partialmatch(id)
1519 partial = self.index.partialmatch(id)
1516 if partial and self.hasnode(partial):
1520 if partial and self.hasnode(partial):
1517 if maybewdir:
1521 if maybewdir:
1518 # single 'ff...' match in radix tree, ambiguous with wdir
1522 # single 'ff...' match in radix tree, ambiguous with wdir
1519 ambiguous = True
1523 ambiguous = True
1520 else:
1524 else:
1521 return partial
1525 return partial
1522 elif maybewdir:
1526 elif maybewdir:
1523 # no 'ff...' match in radix tree, wdir identified
1527 # no 'ff...' match in radix tree, wdir identified
1524 raise error.WdirUnsupported
1528 raise error.WdirUnsupported
1525 else:
1529 else:
1526 return None
1530 return None
1527 except error.RevlogError:
1531 except error.RevlogError:
1528 # parsers.c radix tree lookup gave multiple matches
1532 # parsers.c radix tree lookup gave multiple matches
1529 # fast path: for unfiltered changelog, radix tree is accurate
1533 # fast path: for unfiltered changelog, radix tree is accurate
1530 if not getattr(self, 'filteredrevs', None):
1534 if not getattr(self, 'filteredrevs', None):
1531 ambiguous = True
1535 ambiguous = True
1532 # fall through to slow path that filters hidden revisions
1536 # fall through to slow path that filters hidden revisions
1533 except (AttributeError, ValueError):
1537 except (AttributeError, ValueError):
1534 # we are pure python, or key is not hex
1538 # we are pure python, or key is not hex
1535 pass
1539 pass
1536 if ambiguous:
1540 if ambiguous:
1537 raise error.AmbiguousPrefixLookupError(
1541 raise error.AmbiguousPrefixLookupError(
1538 id, self.display_id, _(b'ambiguous identifier')
1542 id, self.display_id, _(b'ambiguous identifier')
1539 )
1543 )
1540
1544
1541 if id in self._pcache:
1545 if id in self._pcache:
1542 return self._pcache[id]
1546 return self._pcache[id]
1543
1547
1544 if len(id) <= 40:
1548 if len(id) <= 40:
1545 # hex(node)[:...]
1549 # hex(node)[:...]
1546 l = len(id) // 2 * 2 # grab an even number of digits
1550 l = len(id) // 2 * 2 # grab an even number of digits
1547 try:
1551 try:
1548 # we're dropping the last digit, so let's check that it's hex,
1552 # we're dropping the last digit, so let's check that it's hex,
1549 # to avoid the expensive computation below if it's not
1553 # to avoid the expensive computation below if it's not
1550 if len(id) % 2 > 0:
1554 if len(id) % 2 > 0:
1551 if not (id[-1] in hexdigits):
1555 if not (id[-1] in hexdigits):
1552 return None
1556 return None
1553 prefix = bin(id[:l])
1557 prefix = bin(id[:l])
1554 except binascii.Error:
1558 except binascii.Error:
1555 pass
1559 pass
1556 else:
1560 else:
1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1561 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1558 nl = [
1562 nl = [
1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1563 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1560 ]
1564 ]
1561 if self.nodeconstants.nullhex.startswith(id):
1565 if self.nodeconstants.nullhex.startswith(id):
1562 nl.append(self.nullid)
1566 nl.append(self.nullid)
1563 if len(nl) > 0:
1567 if len(nl) > 0:
1564 if len(nl) == 1 and not maybewdir:
1568 if len(nl) == 1 and not maybewdir:
1565 self._pcache[id] = nl[0]
1569 self._pcache[id] = nl[0]
1566 return nl[0]
1570 return nl[0]
1567 raise error.AmbiguousPrefixLookupError(
1571 raise error.AmbiguousPrefixLookupError(
1568 id, self.display_id, _(b'ambiguous identifier')
1572 id, self.display_id, _(b'ambiguous identifier')
1569 )
1573 )
1570 if maybewdir:
1574 if maybewdir:
1571 raise error.WdirUnsupported
1575 raise error.WdirUnsupported
1572 return None
1576 return None
1573
1577
1574 def lookup(self, id):
1578 def lookup(self, id):
1575 """locate a node based on:
1579 """locate a node based on:
1576 - revision number or str(revision number)
1580 - revision number or str(revision number)
1577 - nodeid or subset of hex nodeid
1581 - nodeid or subset of hex nodeid
1578 """
1582 """
1579 n = self._match(id)
1583 n = self._match(id)
1580 if n is not None:
1584 if n is not None:
1581 return n
1585 return n
1582 n = self._partialmatch(id)
1586 n = self._partialmatch(id)
1583 if n:
1587 if n:
1584 return n
1588 return n
1585
1589
1586 raise error.LookupError(id, self.display_id, _(b'no match found'))
1590 raise error.LookupError(id, self.display_id, _(b'no match found'))
1587
1591
1588 def shortest(self, node, minlength=1):
1592 def shortest(self, node, minlength=1):
1589 """Find the shortest unambiguous prefix that matches node."""
1593 """Find the shortest unambiguous prefix that matches node."""
1590
1594
1591 def isvalid(prefix):
1595 def isvalid(prefix):
1592 try:
1596 try:
1593 matchednode = self._partialmatch(prefix)
1597 matchednode = self._partialmatch(prefix)
1594 except error.AmbiguousPrefixLookupError:
1598 except error.AmbiguousPrefixLookupError:
1595 return False
1599 return False
1596 except error.WdirUnsupported:
1600 except error.WdirUnsupported:
1597 # single 'ff...' match
1601 # single 'ff...' match
1598 return True
1602 return True
1599 if matchednode is None:
1603 if matchednode is None:
1600 raise error.LookupError(node, self.display_id, _(b'no node'))
1604 raise error.LookupError(node, self.display_id, _(b'no node'))
1601 return True
1605 return True
1602
1606
1603 def maybewdir(prefix):
1607 def maybewdir(prefix):
1604 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1608 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1605
1609
1606 hexnode = hex(node)
1610 hexnode = hex(node)
1607
1611
1608 def disambiguate(hexnode, minlength):
1612 def disambiguate(hexnode, minlength):
1609 """Disambiguate against wdirid."""
1613 """Disambiguate against wdirid."""
1610 for length in range(minlength, len(hexnode) + 1):
1614 for length in range(minlength, len(hexnode) + 1):
1611 prefix = hexnode[:length]
1615 prefix = hexnode[:length]
1612 if not maybewdir(prefix):
1616 if not maybewdir(prefix):
1613 return prefix
1617 return prefix
1614
1618
1615 if not getattr(self, 'filteredrevs', None):
1619 if not getattr(self, 'filteredrevs', None):
1616 try:
1620 try:
1617 length = max(self.index.shortest(node), minlength)
1621 length = max(self.index.shortest(node), minlength)
1618 return disambiguate(hexnode, length)
1622 return disambiguate(hexnode, length)
1619 except error.RevlogError:
1623 except error.RevlogError:
1620 if node != self.nodeconstants.wdirid:
1624 if node != self.nodeconstants.wdirid:
1621 raise error.LookupError(
1625 raise error.LookupError(
1622 node, self.display_id, _(b'no node')
1626 node, self.display_id, _(b'no node')
1623 )
1627 )
1624 except AttributeError:
1628 except AttributeError:
1625 # Fall through to pure code
1629 # Fall through to pure code
1626 pass
1630 pass
1627
1631
1628 if node == self.nodeconstants.wdirid:
1632 if node == self.nodeconstants.wdirid:
1629 for length in range(minlength, len(hexnode) + 1):
1633 for length in range(minlength, len(hexnode) + 1):
1630 prefix = hexnode[:length]
1634 prefix = hexnode[:length]
1631 if isvalid(prefix):
1635 if isvalid(prefix):
1632 return prefix
1636 return prefix
1633
1637
1634 for length in range(minlength, len(hexnode) + 1):
1638 for length in range(minlength, len(hexnode) + 1):
1635 prefix = hexnode[:length]
1639 prefix = hexnode[:length]
1636 if isvalid(prefix):
1640 if isvalid(prefix):
1637 return disambiguate(hexnode, length)
1641 return disambiguate(hexnode, length)
1638
1642
1639 def cmp(self, node, text):
1643 def cmp(self, node, text):
1640 """compare text with a given file revision
1644 """compare text with a given file revision
1641
1645
1642 returns True if text is different than what is stored.
1646 returns True if text is different than what is stored.
1643 """
1647 """
1644 p1, p2 = self.parents(node)
1648 p1, p2 = self.parents(node)
1645 return storageutil.hashrevisionsha1(text, p1, p2) != node
1649 return storageutil.hashrevisionsha1(text, p1, p2) != node
1646
1650
1647 def _getsegmentforrevs(self, startrev, endrev, df=None):
1651 def _getsegmentforrevs(self, startrev, endrev, df=None):
1648 """Obtain a segment of raw data corresponding to a range of revisions.
1652 """Obtain a segment of raw data corresponding to a range of revisions.
1649
1653
1650 Accepts the start and end revisions and an optional already-open
1654 Accepts the start and end revisions and an optional already-open
1651 file handle to be used for reading. If the file handle is read, its
1655 file handle to be used for reading. If the file handle is read, its
1652 seek position will not be preserved.
1656 seek position will not be preserved.
1653
1657
1654 Requests for data may be satisfied by a cache.
1658 Requests for data may be satisfied by a cache.
1655
1659
1656 Returns a 2-tuple of (offset, data) for the requested range of
1660 Returns a 2-tuple of (offset, data) for the requested range of
1657 revisions. Offset is the integer offset from the beginning of the
1661 revisions. Offset is the integer offset from the beginning of the
1658 revlog and data is a str or buffer of the raw byte data.
1662 revlog and data is a str or buffer of the raw byte data.
1659
1663
1660 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1664 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1661 to determine where each revision's data begins and ends.
1665 to determine where each revision's data begins and ends.
1662 """
1666 """
1663 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1667 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1664 # (functions are expensive).
1668 # (functions are expensive).
1665 index = self.index
1669 index = self.index
1666 istart = index[startrev]
1670 istart = index[startrev]
1667 start = int(istart[0] >> 16)
1671 start = int(istart[0] >> 16)
1668 if startrev == endrev:
1672 if startrev == endrev:
1669 end = start + istart[1]
1673 end = start + istart[1]
1670 else:
1674 else:
1671 iend = index[endrev]
1675 iend = index[endrev]
1672 end = int(iend[0] >> 16) + iend[1]
1676 end = int(iend[0] >> 16) + iend[1]
1673
1677
1674 if self._inline:
1678 if self._inline:
1675 start += (startrev + 1) * self.index.entry_size
1679 start += (startrev + 1) * self.index.entry_size
1676 end += (endrev + 1) * self.index.entry_size
1680 end += (endrev + 1) * self.index.entry_size
1677 length = end - start
1681 length = end - start
1678
1682
1679 return start, self._segmentfile.read_chunk(start, length, df)
1683 return start, self._segmentfile.read_chunk(start, length, df)
1680
1684
1681 def _chunk(self, rev, df=None):
1685 def _chunk(self, rev, df=None):
1682 """Obtain a single decompressed chunk for a revision.
1686 """Obtain a single decompressed chunk for a revision.
1683
1687
1684 Accepts an integer revision and an optional already-open file handle
1688 Accepts an integer revision and an optional already-open file handle
1685 to be used for reading. If used, the seek position of the file will not
1689 to be used for reading. If used, the seek position of the file will not
1686 be preserved.
1690 be preserved.
1687
1691
1688 Returns a str holding uncompressed data for the requested revision.
1692 Returns a str holding uncompressed data for the requested revision.
1689 """
1693 """
1690 compression_mode = self.index[rev][10]
1694 compression_mode = self.index[rev][10]
1691 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1695 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1692 if compression_mode == COMP_MODE_PLAIN:
1696 if compression_mode == COMP_MODE_PLAIN:
1693 return data
1697 return data
1694 elif compression_mode == COMP_MODE_DEFAULT:
1698 elif compression_mode == COMP_MODE_DEFAULT:
1695 return self._decompressor(data)
1699 return self._decompressor(data)
1696 elif compression_mode == COMP_MODE_INLINE:
1700 elif compression_mode == COMP_MODE_INLINE:
1697 return self.decompress(data)
1701 return self.decompress(data)
1698 else:
1702 else:
1699 msg = b'unknown compression mode %d'
1703 msg = b'unknown compression mode %d'
1700 msg %= compression_mode
1704 msg %= compression_mode
1701 raise error.RevlogError(msg)
1705 raise error.RevlogError(msg)
1702
1706
1703 def _chunks(self, revs, df=None, targetsize=None):
1707 def _chunks(self, revs, df=None, targetsize=None):
1704 """Obtain decompressed chunks for the specified revisions.
1708 """Obtain decompressed chunks for the specified revisions.
1705
1709
1706 Accepts an iterable of numeric revisions that are assumed to be in
1710 Accepts an iterable of numeric revisions that are assumed to be in
1707 ascending order. Also accepts an optional already-open file handle
1711 ascending order. Also accepts an optional already-open file handle
1708 to be used for reading. If used, the seek position of the file will
1712 to be used for reading. If used, the seek position of the file will
1709 not be preserved.
1713 not be preserved.
1710
1714
1711 This function is similar to calling ``self._chunk()`` multiple times,
1715 This function is similar to calling ``self._chunk()`` multiple times,
1712 but is faster.
1716 but is faster.
1713
1717
1714 Returns a list with decompressed data for each requested revision.
1718 Returns a list with decompressed data for each requested revision.
1715 """
1719 """
1716 if not revs:
1720 if not revs:
1717 return []
1721 return []
1718 start = self.start
1722 start = self.start
1719 length = self.length
1723 length = self.length
1720 inline = self._inline
1724 inline = self._inline
1721 iosize = self.index.entry_size
1725 iosize = self.index.entry_size
1722 buffer = util.buffer
1726 buffer = util.buffer
1723
1727
1724 l = []
1728 l = []
1725 ladd = l.append
1729 ladd = l.append
1726
1730
1727 if not self._withsparseread:
1731 if not self._withsparseread:
1728 slicedchunks = (revs,)
1732 slicedchunks = (revs,)
1729 else:
1733 else:
1730 slicedchunks = deltautil.slicechunk(
1734 slicedchunks = deltautil.slicechunk(
1731 self, revs, targetsize=targetsize
1735 self, revs, targetsize=targetsize
1732 )
1736 )
1733
1737
1734 for revschunk in slicedchunks:
1738 for revschunk in slicedchunks:
1735 firstrev = revschunk[0]
1739 firstrev = revschunk[0]
1736 # Skip trailing revisions with empty diff
1740 # Skip trailing revisions with empty diff
1737 for lastrev in revschunk[::-1]:
1741 for lastrev in revschunk[::-1]:
1738 if length(lastrev) != 0:
1742 if length(lastrev) != 0:
1739 break
1743 break
1740
1744
1741 try:
1745 try:
1742 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1746 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1743 except OverflowError:
1747 except OverflowError:
1744 # issue4215 - we can't cache a run of chunks greater than
1748 # issue4215 - we can't cache a run of chunks greater than
1745 # 2G on Windows
1749 # 2G on Windows
1746 return [self._chunk(rev, df=df) for rev in revschunk]
1750 return [self._chunk(rev, df=df) for rev in revschunk]
1747
1751
1748 decomp = self.decompress
1752 decomp = self.decompress
1749 # self._decompressor might be None, but will not be used in that case
1753 # self._decompressor might be None, but will not be used in that case
1750 def_decomp = self._decompressor
1754 def_decomp = self._decompressor
1751 for rev in revschunk:
1755 for rev in revschunk:
1752 chunkstart = start(rev)
1756 chunkstart = start(rev)
1753 if inline:
1757 if inline:
1754 chunkstart += (rev + 1) * iosize
1758 chunkstart += (rev + 1) * iosize
1755 chunklength = length(rev)
1759 chunklength = length(rev)
1756 comp_mode = self.index[rev][10]
1760 comp_mode = self.index[rev][10]
1757 c = buffer(data, chunkstart - offset, chunklength)
1761 c = buffer(data, chunkstart - offset, chunklength)
1758 if comp_mode == COMP_MODE_PLAIN:
1762 if comp_mode == COMP_MODE_PLAIN:
1759 ladd(c)
1763 ladd(c)
1760 elif comp_mode == COMP_MODE_INLINE:
1764 elif comp_mode == COMP_MODE_INLINE:
1761 ladd(decomp(c))
1765 ladd(decomp(c))
1762 elif comp_mode == COMP_MODE_DEFAULT:
1766 elif comp_mode == COMP_MODE_DEFAULT:
1763 ladd(def_decomp(c))
1767 ladd(def_decomp(c))
1764 else:
1768 else:
1765 msg = b'unknown compression mode %d'
1769 msg = b'unknown compression mode %d'
1766 msg %= comp_mode
1770 msg %= comp_mode
1767 raise error.RevlogError(msg)
1771 raise error.RevlogError(msg)
1768
1772
1769 return l
1773 return l
1770
1774
1771 def deltaparent(self, rev):
1775 def deltaparent(self, rev):
1772 """return deltaparent of the given revision"""
1776 """return deltaparent of the given revision"""
1773 base = self.index[rev][3]
1777 base = self.index[rev][3]
1774 if base == rev:
1778 if base == rev:
1775 return nullrev
1779 return nullrev
1776 elif self._generaldelta:
1780 elif self._generaldelta:
1777 return base
1781 return base
1778 else:
1782 else:
1779 return rev - 1
1783 return rev - 1
1780
1784
1781 def issnapshot(self, rev):
1785 def issnapshot(self, rev):
1782 """tells whether rev is a snapshot"""
1786 """tells whether rev is a snapshot"""
1783 if not self._sparserevlog:
1787 if not self._sparserevlog:
1784 return self.deltaparent(rev) == nullrev
1788 return self.deltaparent(rev) == nullrev
1785 elif util.safehasattr(self.index, 'issnapshot'):
1789 elif util.safehasattr(self.index, 'issnapshot'):
1786 # directly assign the method to cache the testing and access
1790 # directly assign the method to cache the testing and access
1787 self.issnapshot = self.index.issnapshot
1791 self.issnapshot = self.index.issnapshot
1788 return self.issnapshot(rev)
1792 return self.issnapshot(rev)
1789 if rev == nullrev:
1793 if rev == nullrev:
1790 return True
1794 return True
1791 entry = self.index[rev]
1795 entry = self.index[rev]
1792 base = entry[3]
1796 base = entry[3]
1793 if base == rev:
1797 if base == rev:
1794 return True
1798 return True
1795 if base == nullrev:
1799 if base == nullrev:
1796 return True
1800 return True
1797 p1 = entry[5]
1801 p1 = entry[5]
1798 while self.length(p1) == 0:
1802 while self.length(p1) == 0:
1799 b = self.deltaparent(p1)
1803 b = self.deltaparent(p1)
1800 if b == p1:
1804 if b == p1:
1801 break
1805 break
1802 p1 = b
1806 p1 = b
1803 p2 = entry[6]
1807 p2 = entry[6]
1804 while self.length(p2) == 0:
1808 while self.length(p2) == 0:
1805 b = self.deltaparent(p2)
1809 b = self.deltaparent(p2)
1806 if b == p2:
1810 if b == p2:
1807 break
1811 break
1808 p2 = b
1812 p2 = b
1809 if base == p1 or base == p2:
1813 if base == p1 or base == p2:
1810 return False
1814 return False
1811 return self.issnapshot(base)
1815 return self.issnapshot(base)
1812
1816
1813 def snapshotdepth(self, rev):
1817 def snapshotdepth(self, rev):
1814 """number of snapshot in the chain before this one"""
1818 """number of snapshot in the chain before this one"""
1815 if not self.issnapshot(rev):
1819 if not self.issnapshot(rev):
1816 raise error.ProgrammingError(b'revision %d not a snapshot')
1820 raise error.ProgrammingError(b'revision %d not a snapshot')
1817 return len(self._deltachain(rev)[0]) - 1
1821 return len(self._deltachain(rev)[0]) - 1
1818
1822
1819 def revdiff(self, rev1, rev2):
1823 def revdiff(self, rev1, rev2):
1820 """return or calculate a delta between two revisions
1824 """return or calculate a delta between two revisions
1821
1825
1822 The delta calculated is in binary form and is intended to be written to
1826 The delta calculated is in binary form and is intended to be written to
1823 revlog data directly. So this function needs raw revision data.
1827 revlog data directly. So this function needs raw revision data.
1824 """
1828 """
1825 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1829 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1826 return bytes(self._chunk(rev2))
1830 return bytes(self._chunk(rev2))
1827
1831
1828 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1832 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1829
1833
1830 def revision(self, nodeorrev, _df=None):
1834 def revision(self, nodeorrev, _df=None):
1831 """return an uncompressed revision of a given node or revision
1835 """return an uncompressed revision of a given node or revision
1832 number.
1836 number.
1833
1837
1834 _df - an existing file handle to read from. (internal-only)
1838 _df - an existing file handle to read from. (internal-only)
1835 """
1839 """
1836 return self._revisiondata(nodeorrev, _df)
1840 return self._revisiondata(nodeorrev, _df)
1837
1841
1838 def sidedata(self, nodeorrev, _df=None):
1842 def sidedata(self, nodeorrev, _df=None):
1839 """a map of extra data related to the changeset but not part of the hash
1843 """a map of extra data related to the changeset but not part of the hash
1840
1844
1841 This function currently return a dictionary. However, more advanced
1845 This function currently return a dictionary. However, more advanced
1842 mapping object will likely be used in the future for a more
1846 mapping object will likely be used in the future for a more
1843 efficient/lazy code.
1847 efficient/lazy code.
1844 """
1848 """
1845 # deal with <nodeorrev> argument type
1849 # deal with <nodeorrev> argument type
1846 if isinstance(nodeorrev, int):
1850 if isinstance(nodeorrev, int):
1847 rev = nodeorrev
1851 rev = nodeorrev
1848 else:
1852 else:
1849 rev = self.rev(nodeorrev)
1853 rev = self.rev(nodeorrev)
1850 return self._sidedata(rev)
1854 return self._sidedata(rev)
1851
1855
1852 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1856 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1853 # deal with <nodeorrev> argument type
1857 # deal with <nodeorrev> argument type
1854 if isinstance(nodeorrev, int):
1858 if isinstance(nodeorrev, int):
1855 rev = nodeorrev
1859 rev = nodeorrev
1856 node = self.node(rev)
1860 node = self.node(rev)
1857 else:
1861 else:
1858 node = nodeorrev
1862 node = nodeorrev
1859 rev = None
1863 rev = None
1860
1864
1861 # fast path the special `nullid` rev
1865 # fast path the special `nullid` rev
1862 if node == self.nullid:
1866 if node == self.nullid:
1863 return b""
1867 return b""
1864
1868
1865 # ``rawtext`` is the text as stored inside the revlog. Might be the
1869 # ``rawtext`` is the text as stored inside the revlog. Might be the
1866 # revision or might need to be processed to retrieve the revision.
1870 # revision or might need to be processed to retrieve the revision.
1867 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1871 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1868
1872
1869 if raw and validated:
1873 if raw and validated:
1870 # if we don't want to process the raw text and that raw
1874 # if we don't want to process the raw text and that raw
1871 # text is cached, we can exit early.
1875 # text is cached, we can exit early.
1872 return rawtext
1876 return rawtext
1873 if rev is None:
1877 if rev is None:
1874 rev = self.rev(node)
1878 rev = self.rev(node)
1875 # the revlog's flag for this revision
1879 # the revlog's flag for this revision
1876 # (usually alter its state or content)
1880 # (usually alter its state or content)
1877 flags = self.flags(rev)
1881 flags = self.flags(rev)
1878
1882
1879 if validated and flags == REVIDX_DEFAULT_FLAGS:
1883 if validated and flags == REVIDX_DEFAULT_FLAGS:
1880 # no extra flags set, no flag processor runs, text = rawtext
1884 # no extra flags set, no flag processor runs, text = rawtext
1881 return rawtext
1885 return rawtext
1882
1886
1883 if raw:
1887 if raw:
1884 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1888 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1885 text = rawtext
1889 text = rawtext
1886 else:
1890 else:
1887 r = flagutil.processflagsread(self, rawtext, flags)
1891 r = flagutil.processflagsread(self, rawtext, flags)
1888 text, validatehash = r
1892 text, validatehash = r
1889 if validatehash:
1893 if validatehash:
1890 self.checkhash(text, node, rev=rev)
1894 self.checkhash(text, node, rev=rev)
1891 if not validated:
1895 if not validated:
1892 self._revisioncache = (node, rev, rawtext)
1896 self._revisioncache = (node, rev, rawtext)
1893
1897
1894 return text
1898 return text
1895
1899
1896 def _rawtext(self, node, rev, _df=None):
1900 def _rawtext(self, node, rev, _df=None):
1897 """return the possibly unvalidated rawtext for a revision
1901 """return the possibly unvalidated rawtext for a revision
1898
1902
1899 returns (rev, rawtext, validated)
1903 returns (rev, rawtext, validated)
1900 """
1904 """
1901
1905
1902 # revision in the cache (could be useful to apply delta)
1906 # revision in the cache (could be useful to apply delta)
1903 cachedrev = None
1907 cachedrev = None
1904 # An intermediate text to apply deltas to
1908 # An intermediate text to apply deltas to
1905 basetext = None
1909 basetext = None
1906
1910
1907 # Check if we have the entry in cache
1911 # Check if we have the entry in cache
1908 # The cache entry looks like (node, rev, rawtext)
1912 # The cache entry looks like (node, rev, rawtext)
1909 if self._revisioncache:
1913 if self._revisioncache:
1910 if self._revisioncache[0] == node:
1914 if self._revisioncache[0] == node:
1911 return (rev, self._revisioncache[2], True)
1915 return (rev, self._revisioncache[2], True)
1912 cachedrev = self._revisioncache[1]
1916 cachedrev = self._revisioncache[1]
1913
1917
1914 if rev is None:
1918 if rev is None:
1915 rev = self.rev(node)
1919 rev = self.rev(node)
1916
1920
1917 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1921 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1918 if stopped:
1922 if stopped:
1919 basetext = self._revisioncache[2]
1923 basetext = self._revisioncache[2]
1920
1924
1921 # drop cache to save memory, the caller is expected to
1925 # drop cache to save memory, the caller is expected to
1922 # update self._revisioncache after validating the text
1926 # update self._revisioncache after validating the text
1923 self._revisioncache = None
1927 self._revisioncache = None
1924
1928
1925 targetsize = None
1929 targetsize = None
1926 rawsize = self.index[rev][2]
1930 rawsize = self.index[rev][2]
1927 if 0 <= rawsize:
1931 if 0 <= rawsize:
1928 targetsize = 4 * rawsize
1932 targetsize = 4 * rawsize
1929
1933
1930 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1934 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1931 if basetext is None:
1935 if basetext is None:
1932 basetext = bytes(bins[0])
1936 basetext = bytes(bins[0])
1933 bins = bins[1:]
1937 bins = bins[1:]
1934
1938
1935 rawtext = mdiff.patches(basetext, bins)
1939 rawtext = mdiff.patches(basetext, bins)
1936 del basetext # let us have a chance to free memory early
1940 del basetext # let us have a chance to free memory early
1937 return (rev, rawtext, False)
1941 return (rev, rawtext, False)
1938
1942
1939 def _sidedata(self, rev):
1943 def _sidedata(self, rev):
1940 """Return the sidedata for a given revision number."""
1944 """Return the sidedata for a given revision number."""
1941 index_entry = self.index[rev]
1945 index_entry = self.index[rev]
1942 sidedata_offset = index_entry[8]
1946 sidedata_offset = index_entry[8]
1943 sidedata_size = index_entry[9]
1947 sidedata_size = index_entry[9]
1944
1948
1945 if self._inline:
1949 if self._inline:
1946 sidedata_offset += self.index.entry_size * (1 + rev)
1950 sidedata_offset += self.index.entry_size * (1 + rev)
1947 if sidedata_size == 0:
1951 if sidedata_size == 0:
1948 return {}
1952 return {}
1949
1953
1950 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1954 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1951 filename = self._sidedatafile
1955 filename = self._sidedatafile
1952 end = self._docket.sidedata_end
1956 end = self._docket.sidedata_end
1953 offset = sidedata_offset
1957 offset = sidedata_offset
1954 length = sidedata_size
1958 length = sidedata_size
1955 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1959 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1956 raise error.RevlogError(m)
1960 raise error.RevlogError(m)
1957
1961
1958 comp_segment = self._segmentfile_sidedata.read_chunk(
1962 comp_segment = self._segmentfile_sidedata.read_chunk(
1959 sidedata_offset, sidedata_size
1963 sidedata_offset, sidedata_size
1960 )
1964 )
1961
1965
1962 comp = self.index[rev][11]
1966 comp = self.index[rev][11]
1963 if comp == COMP_MODE_PLAIN:
1967 if comp == COMP_MODE_PLAIN:
1964 segment = comp_segment
1968 segment = comp_segment
1965 elif comp == COMP_MODE_DEFAULT:
1969 elif comp == COMP_MODE_DEFAULT:
1966 segment = self._decompressor(comp_segment)
1970 segment = self._decompressor(comp_segment)
1967 elif comp == COMP_MODE_INLINE:
1971 elif comp == COMP_MODE_INLINE:
1968 segment = self.decompress(comp_segment)
1972 segment = self.decompress(comp_segment)
1969 else:
1973 else:
1970 msg = b'unknown compression mode %d'
1974 msg = b'unknown compression mode %d'
1971 msg %= comp
1975 msg %= comp
1972 raise error.RevlogError(msg)
1976 raise error.RevlogError(msg)
1973
1977
1974 sidedata = sidedatautil.deserialize_sidedata(segment)
1978 sidedata = sidedatautil.deserialize_sidedata(segment)
1975 return sidedata
1979 return sidedata
1976
1980
1977 def rawdata(self, nodeorrev, _df=None):
1981 def rawdata(self, nodeorrev, _df=None):
1978 """return an uncompressed raw data of a given node or revision number.
1982 """return an uncompressed raw data of a given node or revision number.
1979
1983
1980 _df - an existing file handle to read from. (internal-only)
1984 _df - an existing file handle to read from. (internal-only)
1981 """
1985 """
1982 return self._revisiondata(nodeorrev, _df, raw=True)
1986 return self._revisiondata(nodeorrev, _df, raw=True)
1983
1987
1984 def hash(self, text, p1, p2):
1988 def hash(self, text, p1, p2):
1985 """Compute a node hash.
1989 """Compute a node hash.
1986
1990
1987 Available as a function so that subclasses can replace the hash
1991 Available as a function so that subclasses can replace the hash
1988 as needed.
1992 as needed.
1989 """
1993 """
1990 return storageutil.hashrevisionsha1(text, p1, p2)
1994 return storageutil.hashrevisionsha1(text, p1, p2)
1991
1995
1992 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1996 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1993 """Check node hash integrity.
1997 """Check node hash integrity.
1994
1998
1995 Available as a function so that subclasses can extend hash mismatch
1999 Available as a function so that subclasses can extend hash mismatch
1996 behaviors as needed.
2000 behaviors as needed.
1997 """
2001 """
1998 try:
2002 try:
1999 if p1 is None and p2 is None:
2003 if p1 is None and p2 is None:
2000 p1, p2 = self.parents(node)
2004 p1, p2 = self.parents(node)
2001 if node != self.hash(text, p1, p2):
2005 if node != self.hash(text, p1, p2):
2002 # Clear the revision cache on hash failure. The revision cache
2006 # Clear the revision cache on hash failure. The revision cache
2003 # only stores the raw revision and clearing the cache does have
2007 # only stores the raw revision and clearing the cache does have
2004 # the side-effect that we won't have a cache hit when the raw
2008 # the side-effect that we won't have a cache hit when the raw
2005 # revision data is accessed. But this case should be rare and
2009 # revision data is accessed. But this case should be rare and
2006 # it is extra work to teach the cache about the hash
2010 # it is extra work to teach the cache about the hash
2007 # verification state.
2011 # verification state.
2008 if self._revisioncache and self._revisioncache[0] == node:
2012 if self._revisioncache and self._revisioncache[0] == node:
2009 self._revisioncache = None
2013 self._revisioncache = None
2010
2014
2011 revornode = rev
2015 revornode = rev
2012 if revornode is None:
2016 if revornode is None:
2013 revornode = templatefilters.short(hex(node))
2017 revornode = templatefilters.short(hex(node))
2014 raise error.RevlogError(
2018 raise error.RevlogError(
2015 _(b"integrity check failed on %s:%s")
2019 _(b"integrity check failed on %s:%s")
2016 % (self.display_id, pycompat.bytestr(revornode))
2020 % (self.display_id, pycompat.bytestr(revornode))
2017 )
2021 )
2018 except error.RevlogError:
2022 except error.RevlogError:
2019 if self._censorable and storageutil.iscensoredtext(text):
2023 if self._censorable and storageutil.iscensoredtext(text):
2020 raise error.CensoredNodeError(self.display_id, node, text)
2024 raise error.CensoredNodeError(self.display_id, node, text)
2021 raise
2025 raise
2022
2026
2023 def _enforceinlinesize(self, tr, side_write=True):
2027 def _enforceinlinesize(self, tr, side_write=True):
2024 """Check if the revlog is too big for inline and convert if so.
2028 """Check if the revlog is too big for inline and convert if so.
2025
2029
2026 This should be called after revisions are added to the revlog. If the
2030 This should be called after revisions are added to the revlog. If the
2027 revlog has grown too large to be an inline revlog, it will convert it
2031 revlog has grown too large to be an inline revlog, it will convert it
2028 to use multiple index and data files.
2032 to use multiple index and data files.
2029 """
2033 """
2030 tiprev = len(self) - 1
2034 tiprev = len(self) - 1
2031 total_size = self.start(tiprev) + self.length(tiprev)
2035 total_size = self.start(tiprev) + self.length(tiprev)
2032 if not self._inline or total_size < _maxinline:
2036 if not self._inline or total_size < _maxinline:
2033 return
2037 return
2034
2038
2035 troffset = tr.findoffset(self._indexfile)
2039 troffset = tr.findoffset(self._indexfile)
2036 if troffset is None:
2040 if troffset is None:
2037 raise error.RevlogError(
2041 raise error.RevlogError(
2038 _(b"%s not found in the transaction") % self._indexfile
2042 _(b"%s not found in the transaction") % self._indexfile
2039 )
2043 )
2040 if troffset:
2044 if troffset:
2041 tr.addbackup(self._indexfile, for_offset=True)
2045 tr.addbackup(self._indexfile, for_offset=True)
2042 tr.add(self._datafile, 0)
2046 tr.add(self._datafile, 0)
2043
2047
2044 existing_handles = False
2048 existing_handles = False
2045 if self._writinghandles is not None:
2049 if self._writinghandles is not None:
2046 existing_handles = True
2050 existing_handles = True
2047 fp = self._writinghandles[0]
2051 fp = self._writinghandles[0]
2048 fp.flush()
2052 fp.flush()
2049 fp.close()
2053 fp.close()
2050 # We can't use the cached file handle after close(). So prevent
2054 # We can't use the cached file handle after close(). So prevent
2051 # its usage.
2055 # its usage.
2052 self._writinghandles = None
2056 self._writinghandles = None
2053 self._segmentfile.writing_handle = None
2057 self._segmentfile.writing_handle = None
2054 # No need to deal with sidedata writing handle as it is only
2058 # No need to deal with sidedata writing handle as it is only
2055 # relevant with revlog-v2 which is never inline, not reaching
2059 # relevant with revlog-v2 which is never inline, not reaching
2056 # this code
2060 # this code
2057 if side_write:
2061 if side_write:
2058 old_index_file_path = self._indexfile
2062 old_index_file_path = self._indexfile
2059 new_index_file_path = self._indexfile + b'.s'
2063 new_index_file_path = self._indexfile + b'.s'
2060 opener = self.opener
2064 opener = self.opener
2061 weak_self = weakref.ref(self)
2065 weak_self = weakref.ref(self)
2062
2066
2063 # the "split" index replace the real index when the transaction is finalized
2067 # the "split" index replace the real index when the transaction is finalized
2064 def finalize_callback(tr):
2068 def finalize_callback(tr):
2065 opener.rename(
2069 opener.rename(
2066 new_index_file_path,
2070 new_index_file_path,
2067 old_index_file_path,
2071 old_index_file_path,
2068 checkambig=True,
2072 checkambig=True,
2069 )
2073 )
2070 maybe_self = weak_self()
2074 maybe_self = weak_self()
2071 if maybe_self is not None:
2075 if maybe_self is not None:
2072 maybe_self._indexfile = old_index_file_path
2076 maybe_self._indexfile = old_index_file_path
2073
2077
2074 def abort_callback(tr):
2078 def abort_callback(tr):
2075 maybe_self = weak_self()
2079 maybe_self = weak_self()
2076 if maybe_self is not None:
2080 if maybe_self is not None:
2077 maybe_self._indexfile = old_index_file_path
2081 maybe_self._indexfile = old_index_file_path
2078
2082
2079 tr.registertmp(new_index_file_path)
2083 tr.registertmp(new_index_file_path)
2080 if self.target[1] is not None:
2084 if self.target[1] is not None:
2081 callback_id = b'000-revlog-split-%d-%s' % self.target
2085 callback_id = b'000-revlog-split-%d-%s' % self.target
2082 else:
2086 else:
2083 callback_id = b'000-revlog-split-%d' % self.target[0]
2087 callback_id = b'000-revlog-split-%d' % self.target[0]
2084 tr.addfinalize(callback_id, finalize_callback)
2088 tr.addfinalize(callback_id, finalize_callback)
2085 tr.addabort(callback_id, abort_callback)
2089 tr.addabort(callback_id, abort_callback)
2086
2090
2087 new_dfh = self._datafp(b'w+')
2091 new_dfh = self._datafp(b'w+')
2088 new_dfh.truncate(0) # drop any potentially existing data
2092 new_dfh.truncate(0) # drop any potentially existing data
2089 try:
2093 try:
2090 with self._indexfp() as read_ifh:
2094 with self._indexfp() as read_ifh:
2091 for r in self:
2095 for r in self:
2092 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2096 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2093 new_dfh.flush()
2097 new_dfh.flush()
2094
2098
2095 if side_write:
2099 if side_write:
2096 self._indexfile = new_index_file_path
2100 self._indexfile = new_index_file_path
2097 with self.__index_new_fp() as fp:
2101 with self.__index_new_fp() as fp:
2098 self._format_flags &= ~FLAG_INLINE_DATA
2102 self._format_flags &= ~FLAG_INLINE_DATA
2099 self._inline = False
2103 self._inline = False
2100 for i in self:
2104 for i in self:
2101 e = self.index.entry_binary(i)
2105 e = self.index.entry_binary(i)
2102 if i == 0 and self._docket is None:
2106 if i == 0 and self._docket is None:
2103 header = self._format_flags | self._format_version
2107 header = self._format_flags | self._format_version
2104 header = self.index.pack_header(header)
2108 header = self.index.pack_header(header)
2105 e = header + e
2109 e = header + e
2106 fp.write(e)
2110 fp.write(e)
2107 if self._docket is not None:
2111 if self._docket is not None:
2108 self._docket.index_end = fp.tell()
2112 self._docket.index_end = fp.tell()
2109
2113
2110 # If we don't use side-write, the temp file replace the real
2114 # If we don't use side-write, the temp file replace the real
2111 # index when we exit the context manager
2115 # index when we exit the context manager
2112
2116
2113 nodemaputil.setup_persistent_nodemap(tr, self)
2117 nodemaputil.setup_persistent_nodemap(tr, self)
2114 self._segmentfile = randomaccessfile.randomaccessfile(
2118 self._segmentfile = randomaccessfile.randomaccessfile(
2115 self.opener,
2119 self.opener,
2116 self._datafile,
2120 self._datafile,
2117 self._chunkcachesize,
2121 self._chunkcachesize,
2118 )
2122 )
2119
2123
2120 if existing_handles:
2124 if existing_handles:
2121 # switched from inline to conventional reopen the index
2125 # switched from inline to conventional reopen the index
2122 ifh = self.__index_write_fp()
2126 ifh = self.__index_write_fp()
2123 self._writinghandles = (ifh, new_dfh, None)
2127 self._writinghandles = (ifh, new_dfh, None)
2124 self._segmentfile.writing_handle = new_dfh
2128 self._segmentfile.writing_handle = new_dfh
2125 new_dfh = None
2129 new_dfh = None
2126 # No need to deal with sidedata writing handle as it is only
2130 # No need to deal with sidedata writing handle as it is only
2127 # relevant with revlog-v2 which is never inline, not reaching
2131 # relevant with revlog-v2 which is never inline, not reaching
2128 # this code
2132 # this code
2129 finally:
2133 finally:
2130 if new_dfh is not None:
2134 if new_dfh is not None:
2131 new_dfh.close()
2135 new_dfh.close()
2132
2136
2133 def _nodeduplicatecallback(self, transaction, node):
2137 def _nodeduplicatecallback(self, transaction, node):
2134 """called when trying to add a node already stored."""
2138 """called when trying to add a node already stored."""
2135
2139
2136 @contextlib.contextmanager
2140 @contextlib.contextmanager
2137 def reading(self):
2141 def reading(self):
2138 """Context manager that keeps data and sidedata files open for reading"""
2142 """Context manager that keeps data and sidedata files open for reading"""
2139 with self._segmentfile.reading():
2143 with self._segmentfile.reading():
2140 with self._segmentfile_sidedata.reading():
2144 with self._segmentfile_sidedata.reading():
2141 yield
2145 yield
2142
2146
2143 @contextlib.contextmanager
2147 @contextlib.contextmanager
2144 def _writing(self, transaction):
2148 def _writing(self, transaction):
2145 if self._trypending:
2149 if self._trypending:
2146 msg = b'try to write in a `trypending` revlog: %s'
2150 msg = b'try to write in a `trypending` revlog: %s'
2147 msg %= self.display_id
2151 msg %= self.display_id
2148 raise error.ProgrammingError(msg)
2152 raise error.ProgrammingError(msg)
2149 if self._writinghandles is not None:
2153 if self._writinghandles is not None:
2150 yield
2154 yield
2151 else:
2155 else:
2152 ifh = dfh = sdfh = None
2156 ifh = dfh = sdfh = None
2153 try:
2157 try:
2154 r = len(self)
2158 r = len(self)
2155 # opening the data file.
2159 # opening the data file.
2156 dsize = 0
2160 dsize = 0
2157 if r:
2161 if r:
2158 dsize = self.end(r - 1)
2162 dsize = self.end(r - 1)
2159 dfh = None
2163 dfh = None
2160 if not self._inline:
2164 if not self._inline:
2161 try:
2165 try:
2162 dfh = self._datafp(b"r+")
2166 dfh = self._datafp(b"r+")
2163 if self._docket is None:
2167 if self._docket is None:
2164 dfh.seek(0, os.SEEK_END)
2168 dfh.seek(0, os.SEEK_END)
2165 else:
2169 else:
2166 dfh.seek(self._docket.data_end, os.SEEK_SET)
2170 dfh.seek(self._docket.data_end, os.SEEK_SET)
2167 except FileNotFoundError:
2171 except FileNotFoundError:
2168 dfh = self._datafp(b"w+")
2172 dfh = self._datafp(b"w+")
2169 transaction.add(self._datafile, dsize)
2173 transaction.add(self._datafile, dsize)
2170 if self._sidedatafile is not None:
2174 if self._sidedatafile is not None:
2171 # revlog-v2 does not inline, help Pytype
2175 # revlog-v2 does not inline, help Pytype
2172 assert dfh is not None
2176 assert dfh is not None
2173 try:
2177 try:
2174 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2178 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2175 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2179 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2176 except FileNotFoundError:
2180 except FileNotFoundError:
2177 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2181 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2178 transaction.add(
2182 transaction.add(
2179 self._sidedatafile, self._docket.sidedata_end
2183 self._sidedatafile, self._docket.sidedata_end
2180 )
2184 )
2181
2185
2182 # opening the index file.
2186 # opening the index file.
2183 isize = r * self.index.entry_size
2187 isize = r * self.index.entry_size
2184 ifh = self.__index_write_fp()
2188 ifh = self.__index_write_fp()
2185 if self._inline:
2189 if self._inline:
2186 transaction.add(self._indexfile, dsize + isize)
2190 transaction.add(self._indexfile, dsize + isize)
2187 else:
2191 else:
2188 transaction.add(self._indexfile, isize)
2192 transaction.add(self._indexfile, isize)
2189 # exposing all file handle for writing.
2193 # exposing all file handle for writing.
2190 self._writinghandles = (ifh, dfh, sdfh)
2194 self._writinghandles = (ifh, dfh, sdfh)
2191 self._segmentfile.writing_handle = ifh if self._inline else dfh
2195 self._segmentfile.writing_handle = ifh if self._inline else dfh
2192 self._segmentfile_sidedata.writing_handle = sdfh
2196 self._segmentfile_sidedata.writing_handle = sdfh
2193 yield
2197 yield
2194 if self._docket is not None:
2198 if self._docket is not None:
2195 self._write_docket(transaction)
2199 self._write_docket(transaction)
2196 finally:
2200 finally:
2197 self._writinghandles = None
2201 self._writinghandles = None
2198 self._segmentfile.writing_handle = None
2202 self._segmentfile.writing_handle = None
2199 self._segmentfile_sidedata.writing_handle = None
2203 self._segmentfile_sidedata.writing_handle = None
2200 if dfh is not None:
2204 if dfh is not None:
2201 dfh.close()
2205 dfh.close()
2202 if sdfh is not None:
2206 if sdfh is not None:
2203 sdfh.close()
2207 sdfh.close()
2204 # closing the index file last to avoid exposing referent to
2208 # closing the index file last to avoid exposing referent to
2205 # potential unflushed data content.
2209 # potential unflushed data content.
2206 if ifh is not None:
2210 if ifh is not None:
2207 ifh.close()
2211 ifh.close()
2208
2212
2209 def _write_docket(self, transaction):
2213 def _write_docket(self, transaction):
2210 """write the current docket on disk
2214 """write the current docket on disk
2211
2215
2212 Exist as a method to help changelog to implement transaction logic
2216 Exist as a method to help changelog to implement transaction logic
2213
2217
2214 We could also imagine using the same transaction logic for all revlog
2218 We could also imagine using the same transaction logic for all revlog
2215 since docket are cheap."""
2219 since docket are cheap."""
2216 self._docket.write(transaction)
2220 self._docket.write(transaction)
2217
2221
2218 def addrevision(
2222 def addrevision(
2219 self,
2223 self,
2220 text,
2224 text,
2221 transaction,
2225 transaction,
2222 link,
2226 link,
2223 p1,
2227 p1,
2224 p2,
2228 p2,
2225 cachedelta=None,
2229 cachedelta=None,
2226 node=None,
2230 node=None,
2227 flags=REVIDX_DEFAULT_FLAGS,
2231 flags=REVIDX_DEFAULT_FLAGS,
2228 deltacomputer=None,
2232 deltacomputer=None,
2229 sidedata=None,
2233 sidedata=None,
2230 ):
2234 ):
2231 """add a revision to the log
2235 """add a revision to the log
2232
2236
2233 text - the revision data to add
2237 text - the revision data to add
2234 transaction - the transaction object used for rollback
2238 transaction - the transaction object used for rollback
2235 link - the linkrev data to add
2239 link - the linkrev data to add
2236 p1, p2 - the parent nodeids of the revision
2240 p1, p2 - the parent nodeids of the revision
2237 cachedelta - an optional precomputed delta
2241 cachedelta - an optional precomputed delta
2238 node - nodeid of revision; typically node is not specified, and it is
2242 node - nodeid of revision; typically node is not specified, and it is
2239 computed by default as hash(text, p1, p2), however subclasses might
2243 computed by default as hash(text, p1, p2), however subclasses might
2240 use different hashing method (and override checkhash() in such case)
2244 use different hashing method (and override checkhash() in such case)
2241 flags - the known flags to set on the revision
2245 flags - the known flags to set on the revision
2242 deltacomputer - an optional deltacomputer instance shared between
2246 deltacomputer - an optional deltacomputer instance shared between
2243 multiple calls
2247 multiple calls
2244 """
2248 """
2245 if link == nullrev:
2249 if link == nullrev:
2246 raise error.RevlogError(
2250 raise error.RevlogError(
2247 _(b"attempted to add linkrev -1 to %s") % self.display_id
2251 _(b"attempted to add linkrev -1 to %s") % self.display_id
2248 )
2252 )
2249
2253
2250 if sidedata is None:
2254 if sidedata is None:
2251 sidedata = {}
2255 sidedata = {}
2252 elif sidedata and not self.hassidedata:
2256 elif sidedata and not self.hassidedata:
2253 raise error.ProgrammingError(
2257 raise error.ProgrammingError(
2254 _(b"trying to add sidedata to a revlog who don't support them")
2258 _(b"trying to add sidedata to a revlog who don't support them")
2255 )
2259 )
2256
2260
2257 if flags:
2261 if flags:
2258 node = node or self.hash(text, p1, p2)
2262 node = node or self.hash(text, p1, p2)
2259
2263
2260 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2264 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2261
2265
2262 # If the flag processor modifies the revision data, ignore any provided
2266 # If the flag processor modifies the revision data, ignore any provided
2263 # cachedelta.
2267 # cachedelta.
2264 if rawtext != text:
2268 if rawtext != text:
2265 cachedelta = None
2269 cachedelta = None
2266
2270
2267 if len(rawtext) > _maxentrysize:
2271 if len(rawtext) > _maxentrysize:
2268 raise error.RevlogError(
2272 raise error.RevlogError(
2269 _(
2273 _(
2270 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2274 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2271 )
2275 )
2272 % (self.display_id, len(rawtext))
2276 % (self.display_id, len(rawtext))
2273 )
2277 )
2274
2278
2275 node = node or self.hash(rawtext, p1, p2)
2279 node = node or self.hash(rawtext, p1, p2)
2276 rev = self.index.get_rev(node)
2280 rev = self.index.get_rev(node)
2277 if rev is not None:
2281 if rev is not None:
2278 return rev
2282 return rev
2279
2283
2280 if validatehash:
2284 if validatehash:
2281 self.checkhash(rawtext, node, p1=p1, p2=p2)
2285 self.checkhash(rawtext, node, p1=p1, p2=p2)
2282
2286
2283 return self.addrawrevision(
2287 return self.addrawrevision(
2284 rawtext,
2288 rawtext,
2285 transaction,
2289 transaction,
2286 link,
2290 link,
2287 p1,
2291 p1,
2288 p2,
2292 p2,
2289 node,
2293 node,
2290 flags,
2294 flags,
2291 cachedelta=cachedelta,
2295 cachedelta=cachedelta,
2292 deltacomputer=deltacomputer,
2296 deltacomputer=deltacomputer,
2293 sidedata=sidedata,
2297 sidedata=sidedata,
2294 )
2298 )
2295
2299
2296 def addrawrevision(
2300 def addrawrevision(
2297 self,
2301 self,
2298 rawtext,
2302 rawtext,
2299 transaction,
2303 transaction,
2300 link,
2304 link,
2301 p1,
2305 p1,
2302 p2,
2306 p2,
2303 node,
2307 node,
2304 flags,
2308 flags,
2305 cachedelta=None,
2309 cachedelta=None,
2306 deltacomputer=None,
2310 deltacomputer=None,
2307 sidedata=None,
2311 sidedata=None,
2308 ):
2312 ):
2309 """add a raw revision with known flags, node and parents
2313 """add a raw revision with known flags, node and parents
2310 useful when reusing a revision not stored in this revlog (ex: received
2314 useful when reusing a revision not stored in this revlog (ex: received
2311 over wire, or read from an external bundle).
2315 over wire, or read from an external bundle).
2312 """
2316 """
2313 with self._writing(transaction):
2317 with self._writing(transaction):
2314 return self._addrevision(
2318 return self._addrevision(
2315 node,
2319 node,
2316 rawtext,
2320 rawtext,
2317 transaction,
2321 transaction,
2318 link,
2322 link,
2319 p1,
2323 p1,
2320 p2,
2324 p2,
2321 flags,
2325 flags,
2322 cachedelta,
2326 cachedelta,
2323 deltacomputer=deltacomputer,
2327 deltacomputer=deltacomputer,
2324 sidedata=sidedata,
2328 sidedata=sidedata,
2325 )
2329 )
2326
2330
2327 def compress(self, data):
2331 def compress(self, data):
2328 """Generate a possibly-compressed representation of data."""
2332 """Generate a possibly-compressed representation of data."""
2329 if not data:
2333 if not data:
2330 return b'', data
2334 return b'', data
2331
2335
2332 compressed = self._compressor.compress(data)
2336 compressed = self._compressor.compress(data)
2333
2337
2334 if compressed:
2338 if compressed:
2335 # The revlog compressor added the header in the returned data.
2339 # The revlog compressor added the header in the returned data.
2336 return b'', compressed
2340 return b'', compressed
2337
2341
2338 if data[0:1] == b'\0':
2342 if data[0:1] == b'\0':
2339 return b'', data
2343 return b'', data
2340 return b'u', data
2344 return b'u', data
2341
2345
2342 def decompress(self, data):
2346 def decompress(self, data):
2343 """Decompress a revlog chunk.
2347 """Decompress a revlog chunk.
2344
2348
2345 The chunk is expected to begin with a header identifying the
2349 The chunk is expected to begin with a header identifying the
2346 format type so it can be routed to an appropriate decompressor.
2350 format type so it can be routed to an appropriate decompressor.
2347 """
2351 """
2348 if not data:
2352 if not data:
2349 return data
2353 return data
2350
2354
2351 # Revlogs are read much more frequently than they are written and many
2355 # Revlogs are read much more frequently than they are written and many
2352 # chunks only take microseconds to decompress, so performance is
2356 # chunks only take microseconds to decompress, so performance is
2353 # important here.
2357 # important here.
2354 #
2358 #
2355 # We can make a few assumptions about revlogs:
2359 # We can make a few assumptions about revlogs:
2356 #
2360 #
2357 # 1) the majority of chunks will be compressed (as opposed to inline
2361 # 1) the majority of chunks will be compressed (as opposed to inline
2358 # raw data).
2362 # raw data).
2359 # 2) decompressing *any* data will likely by at least 10x slower than
2363 # 2) decompressing *any* data will likely by at least 10x slower than
2360 # returning raw inline data.
2364 # returning raw inline data.
2361 # 3) we want to prioritize common and officially supported compression
2365 # 3) we want to prioritize common and officially supported compression
2362 # engines
2366 # engines
2363 #
2367 #
2364 # It follows that we want to optimize for "decompress compressed data
2368 # It follows that we want to optimize for "decompress compressed data
2365 # when encoded with common and officially supported compression engines"
2369 # when encoded with common and officially supported compression engines"
2366 # case over "raw data" and "data encoded by less common or non-official
2370 # case over "raw data" and "data encoded by less common or non-official
2367 # compression engines." That is why we have the inline lookup first
2371 # compression engines." That is why we have the inline lookup first
2368 # followed by the compengines lookup.
2372 # followed by the compengines lookup.
2369 #
2373 #
2370 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2374 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2371 # compressed chunks. And this matters for changelog and manifest reads.
2375 # compressed chunks. And this matters for changelog and manifest reads.
2372 t = data[0:1]
2376 t = data[0:1]
2373
2377
2374 if t == b'x':
2378 if t == b'x':
2375 try:
2379 try:
2376 return _zlibdecompress(data)
2380 return _zlibdecompress(data)
2377 except zlib.error as e:
2381 except zlib.error as e:
2378 raise error.RevlogError(
2382 raise error.RevlogError(
2379 _(b'revlog decompress error: %s')
2383 _(b'revlog decompress error: %s')
2380 % stringutil.forcebytestr(e)
2384 % stringutil.forcebytestr(e)
2381 )
2385 )
2382 # '\0' is more common than 'u' so it goes first.
2386 # '\0' is more common than 'u' so it goes first.
2383 elif t == b'\0':
2387 elif t == b'\0':
2384 return data
2388 return data
2385 elif t == b'u':
2389 elif t == b'u':
2386 return util.buffer(data, 1)
2390 return util.buffer(data, 1)
2387
2391
2388 compressor = self._get_decompressor(t)
2392 compressor = self._get_decompressor(t)
2389
2393
2390 return compressor.decompress(data)
2394 return compressor.decompress(data)
2391
2395
2392 def _addrevision(
2396 def _addrevision(
2393 self,
2397 self,
2394 node,
2398 node,
2395 rawtext,
2399 rawtext,
2396 transaction,
2400 transaction,
2397 link,
2401 link,
2398 p1,
2402 p1,
2399 p2,
2403 p2,
2400 flags,
2404 flags,
2401 cachedelta,
2405 cachedelta,
2402 alwayscache=False,
2406 alwayscache=False,
2403 deltacomputer=None,
2407 deltacomputer=None,
2404 sidedata=None,
2408 sidedata=None,
2405 ):
2409 ):
2406 """internal function to add revisions to the log
2410 """internal function to add revisions to the log
2407
2411
2408 see addrevision for argument descriptions.
2412 see addrevision for argument descriptions.
2409
2413
2410 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2414 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2411
2415
2412 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2416 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2413 be used.
2417 be used.
2414
2418
2415 invariants:
2419 invariants:
2416 - rawtext is optional (can be None); if not set, cachedelta must be set.
2420 - rawtext is optional (can be None); if not set, cachedelta must be set.
2417 if both are set, they must correspond to each other.
2421 if both are set, they must correspond to each other.
2418 """
2422 """
2419 if node == self.nullid:
2423 if node == self.nullid:
2420 raise error.RevlogError(
2424 raise error.RevlogError(
2421 _(b"%s: attempt to add null revision") % self.display_id
2425 _(b"%s: attempt to add null revision") % self.display_id
2422 )
2426 )
2423 if (
2427 if (
2424 node == self.nodeconstants.wdirid
2428 node == self.nodeconstants.wdirid
2425 or node in self.nodeconstants.wdirfilenodeids
2429 or node in self.nodeconstants.wdirfilenodeids
2426 ):
2430 ):
2427 raise error.RevlogError(
2431 raise error.RevlogError(
2428 _(b"%s: attempt to add wdir revision") % self.display_id
2432 _(b"%s: attempt to add wdir revision") % self.display_id
2429 )
2433 )
2430 if self._writinghandles is None:
2434 if self._writinghandles is None:
2431 msg = b'adding revision outside `revlog._writing` context'
2435 msg = b'adding revision outside `revlog._writing` context'
2432 raise error.ProgrammingError(msg)
2436 raise error.ProgrammingError(msg)
2433
2437
2434 if self._inline:
2438 if self._inline:
2435 fh = self._writinghandles[0]
2439 fh = self._writinghandles[0]
2436 else:
2440 else:
2437 fh = self._writinghandles[1]
2441 fh = self._writinghandles[1]
2438
2442
2439 btext = [rawtext]
2443 btext = [rawtext]
2440
2444
2441 curr = len(self)
2445 curr = len(self)
2442 prev = curr - 1
2446 prev = curr - 1
2443
2447
2444 offset = self._get_data_offset(prev)
2448 offset = self._get_data_offset(prev)
2445
2449
2446 if self._concurrencychecker:
2450 if self._concurrencychecker:
2447 ifh, dfh, sdfh = self._writinghandles
2451 ifh, dfh, sdfh = self._writinghandles
2448 # XXX no checking for the sidedata file
2452 # XXX no checking for the sidedata file
2449 if self._inline:
2453 if self._inline:
2450 # offset is "as if" it were in the .d file, so we need to add on
2454 # offset is "as if" it were in the .d file, so we need to add on
2451 # the size of the entry metadata.
2455 # the size of the entry metadata.
2452 self._concurrencychecker(
2456 self._concurrencychecker(
2453 ifh, self._indexfile, offset + curr * self.index.entry_size
2457 ifh, self._indexfile, offset + curr * self.index.entry_size
2454 )
2458 )
2455 else:
2459 else:
2456 # Entries in the .i are a consistent size.
2460 # Entries in the .i are a consistent size.
2457 self._concurrencychecker(
2461 self._concurrencychecker(
2458 ifh, self._indexfile, curr * self.index.entry_size
2462 ifh, self._indexfile, curr * self.index.entry_size
2459 )
2463 )
2460 self._concurrencychecker(dfh, self._datafile, offset)
2464 self._concurrencychecker(dfh, self._datafile, offset)
2461
2465
2462 p1r, p2r = self.rev(p1), self.rev(p2)
2466 p1r, p2r = self.rev(p1), self.rev(p2)
2463
2467
2464 # full versions are inserted when the needed deltas
2468 # full versions are inserted when the needed deltas
2465 # become comparable to the uncompressed text
2469 # become comparable to the uncompressed text
2466 if rawtext is None:
2470 if rawtext is None:
2467 # need rawtext size, before changed by flag processors, which is
2471 # need rawtext size, before changed by flag processors, which is
2468 # the non-raw size. use revlog explicitly to avoid filelog's extra
2472 # the non-raw size. use revlog explicitly to avoid filelog's extra
2469 # logic that might remove metadata size.
2473 # logic that might remove metadata size.
2470 textlen = mdiff.patchedsize(
2474 textlen = mdiff.patchedsize(
2471 revlog.size(self, cachedelta[0]), cachedelta[1]
2475 revlog.size(self, cachedelta[0]), cachedelta[1]
2472 )
2476 )
2473 else:
2477 else:
2474 textlen = len(rawtext)
2478 textlen = len(rawtext)
2475
2479
2476 if deltacomputer is None:
2480 if deltacomputer is None:
2477 write_debug = None
2481 write_debug = None
2478 if self._debug_delta:
2482 if self._debug_delta:
2479 write_debug = transaction._report
2483 write_debug = transaction._report
2480 deltacomputer = deltautil.deltacomputer(
2484 deltacomputer = deltautil.deltacomputer(
2481 self, write_debug=write_debug
2485 self, write_debug=write_debug
2482 )
2486 )
2483
2487
2484 if cachedelta is not None and len(cachedelta) == 2:
2488 if cachedelta is not None and len(cachedelta) == 2:
2485 # If the cached delta has no information about how it should be
2489 # If the cached delta has no information about how it should be
2486 # reused, add the default reuse instruction according to the
2490 # reused, add the default reuse instruction according to the
2487 # revlog's configuration.
2491 # revlog's configuration.
2488 if self._generaldelta and self._lazydeltabase:
2492 if self._generaldelta and self._lazydeltabase:
2489 delta_base_reuse = DELTA_BASE_REUSE_TRY
2493 delta_base_reuse = DELTA_BASE_REUSE_TRY
2490 else:
2494 else:
2491 delta_base_reuse = DELTA_BASE_REUSE_NO
2495 delta_base_reuse = DELTA_BASE_REUSE_NO
2492 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2496 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2493
2497
2494 revinfo = revlogutils.revisioninfo(
2498 revinfo = revlogutils.revisioninfo(
2495 node,
2499 node,
2496 p1,
2500 p1,
2497 p2,
2501 p2,
2498 btext,
2502 btext,
2499 textlen,
2503 textlen,
2500 cachedelta,
2504 cachedelta,
2501 flags,
2505 flags,
2502 )
2506 )
2503
2507
2504 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2508 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2505
2509
2506 compression_mode = COMP_MODE_INLINE
2510 compression_mode = COMP_MODE_INLINE
2507 if self._docket is not None:
2511 if self._docket is not None:
2508 default_comp = self._docket.default_compression_header
2512 default_comp = self._docket.default_compression_header
2509 r = deltautil.delta_compression(default_comp, deltainfo)
2513 r = deltautil.delta_compression(default_comp, deltainfo)
2510 compression_mode, deltainfo = r
2514 compression_mode, deltainfo = r
2511
2515
2512 sidedata_compression_mode = COMP_MODE_INLINE
2516 sidedata_compression_mode = COMP_MODE_INLINE
2513 if sidedata and self.hassidedata:
2517 if sidedata and self.hassidedata:
2514 sidedata_compression_mode = COMP_MODE_PLAIN
2518 sidedata_compression_mode = COMP_MODE_PLAIN
2515 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2519 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2516 sidedata_offset = self._docket.sidedata_end
2520 sidedata_offset = self._docket.sidedata_end
2517 h, comp_sidedata = self.compress(serialized_sidedata)
2521 h, comp_sidedata = self.compress(serialized_sidedata)
2518 if (
2522 if (
2519 h != b'u'
2523 h != b'u'
2520 and comp_sidedata[0:1] != b'\0'
2524 and comp_sidedata[0:1] != b'\0'
2521 and len(comp_sidedata) < len(serialized_sidedata)
2525 and len(comp_sidedata) < len(serialized_sidedata)
2522 ):
2526 ):
2523 assert not h
2527 assert not h
2524 if (
2528 if (
2525 comp_sidedata[0:1]
2529 comp_sidedata[0:1]
2526 == self._docket.default_compression_header
2530 == self._docket.default_compression_header
2527 ):
2531 ):
2528 sidedata_compression_mode = COMP_MODE_DEFAULT
2532 sidedata_compression_mode = COMP_MODE_DEFAULT
2529 serialized_sidedata = comp_sidedata
2533 serialized_sidedata = comp_sidedata
2530 else:
2534 else:
2531 sidedata_compression_mode = COMP_MODE_INLINE
2535 sidedata_compression_mode = COMP_MODE_INLINE
2532 serialized_sidedata = comp_sidedata
2536 serialized_sidedata = comp_sidedata
2533 else:
2537 else:
2534 serialized_sidedata = b""
2538 serialized_sidedata = b""
2535 # Don't store the offset if the sidedata is empty, that way
2539 # Don't store the offset if the sidedata is empty, that way
2536 # we can easily detect empty sidedata and they will be no different
2540 # we can easily detect empty sidedata and they will be no different
2537 # than ones we manually add.
2541 # than ones we manually add.
2538 sidedata_offset = 0
2542 sidedata_offset = 0
2539
2543
2540 rank = RANK_UNKNOWN
2544 rank = RANK_UNKNOWN
2541 if self._compute_rank:
2545 if self._compute_rank:
2542 if (p1r, p2r) == (nullrev, nullrev):
2546 if (p1r, p2r) == (nullrev, nullrev):
2543 rank = 1
2547 rank = 1
2544 elif p1r != nullrev and p2r == nullrev:
2548 elif p1r != nullrev and p2r == nullrev:
2545 rank = 1 + self.fast_rank(p1r)
2549 rank = 1 + self.fast_rank(p1r)
2546 elif p1r == nullrev and p2r != nullrev:
2550 elif p1r == nullrev and p2r != nullrev:
2547 rank = 1 + self.fast_rank(p2r)
2551 rank = 1 + self.fast_rank(p2r)
2548 else: # merge node
2552 else: # merge node
2549 if rustdagop is not None and self.index.rust_ext_compat:
2553 if rustdagop is not None and self.index.rust_ext_compat:
2550 rank = rustdagop.rank(self.index, p1r, p2r)
2554 rank = rustdagop.rank(self.index, p1r, p2r)
2551 else:
2555 else:
2552 pmin, pmax = sorted((p1r, p2r))
2556 pmin, pmax = sorted((p1r, p2r))
2553 rank = 1 + self.fast_rank(pmax)
2557 rank = 1 + self.fast_rank(pmax)
2554 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2558 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2555
2559
2556 e = revlogutils.entry(
2560 e = revlogutils.entry(
2557 flags=flags,
2561 flags=flags,
2558 data_offset=offset,
2562 data_offset=offset,
2559 data_compressed_length=deltainfo.deltalen,
2563 data_compressed_length=deltainfo.deltalen,
2560 data_uncompressed_length=textlen,
2564 data_uncompressed_length=textlen,
2561 data_compression_mode=compression_mode,
2565 data_compression_mode=compression_mode,
2562 data_delta_base=deltainfo.base,
2566 data_delta_base=deltainfo.base,
2563 link_rev=link,
2567 link_rev=link,
2564 parent_rev_1=p1r,
2568 parent_rev_1=p1r,
2565 parent_rev_2=p2r,
2569 parent_rev_2=p2r,
2566 node_id=node,
2570 node_id=node,
2567 sidedata_offset=sidedata_offset,
2571 sidedata_offset=sidedata_offset,
2568 sidedata_compressed_length=len(serialized_sidedata),
2572 sidedata_compressed_length=len(serialized_sidedata),
2569 sidedata_compression_mode=sidedata_compression_mode,
2573 sidedata_compression_mode=sidedata_compression_mode,
2570 rank=rank,
2574 rank=rank,
2571 )
2575 )
2572
2576
2573 self.index.append(e)
2577 self.index.append(e)
2574 entry = self.index.entry_binary(curr)
2578 entry = self.index.entry_binary(curr)
2575 if curr == 0 and self._docket is None:
2579 if curr == 0 and self._docket is None:
2576 header = self._format_flags | self._format_version
2580 header = self._format_flags | self._format_version
2577 header = self.index.pack_header(header)
2581 header = self.index.pack_header(header)
2578 entry = header + entry
2582 entry = header + entry
2579 self._writeentry(
2583 self._writeentry(
2580 transaction,
2584 transaction,
2581 entry,
2585 entry,
2582 deltainfo.data,
2586 deltainfo.data,
2583 link,
2587 link,
2584 offset,
2588 offset,
2585 serialized_sidedata,
2589 serialized_sidedata,
2586 sidedata_offset,
2590 sidedata_offset,
2587 )
2591 )
2588
2592
2589 rawtext = btext[0]
2593 rawtext = btext[0]
2590
2594
2591 if alwayscache and rawtext is None:
2595 if alwayscache and rawtext is None:
2592 rawtext = deltacomputer.buildtext(revinfo, fh)
2596 rawtext = deltacomputer.buildtext(revinfo, fh)
2593
2597
2594 if type(rawtext) == bytes: # only accept immutable objects
2598 if type(rawtext) == bytes: # only accept immutable objects
2595 self._revisioncache = (node, curr, rawtext)
2599 self._revisioncache = (node, curr, rawtext)
2596 self._chainbasecache[curr] = deltainfo.chainbase
2600 self._chainbasecache[curr] = deltainfo.chainbase
2597 return curr
2601 return curr
2598
2602
2599 def _get_data_offset(self, prev):
2603 def _get_data_offset(self, prev):
2600 """Returns the current offset in the (in-transaction) data file.
2604 """Returns the current offset in the (in-transaction) data file.
2601 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2605 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2602 file to store that information: since sidedata can be rewritten to the
2606 file to store that information: since sidedata can be rewritten to the
2603 end of the data file within a transaction, you can have cases where, for
2607 end of the data file within a transaction, you can have cases where, for
2604 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2608 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2605 to `n - 1`'s sidedata being written after `n`'s data.
2609 to `n - 1`'s sidedata being written after `n`'s data.
2606
2610
2607 TODO cache this in a docket file before getting out of experimental."""
2611 TODO cache this in a docket file before getting out of experimental."""
2608 if self._docket is None:
2612 if self._docket is None:
2609 return self.end(prev)
2613 return self.end(prev)
2610 else:
2614 else:
2611 return self._docket.data_end
2615 return self._docket.data_end
2612
2616
2613 def _writeentry(
2617 def _writeentry(
2614 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2618 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2615 ):
2619 ):
2616 # Files opened in a+ mode have inconsistent behavior on various
2620 # Files opened in a+ mode have inconsistent behavior on various
2617 # platforms. Windows requires that a file positioning call be made
2621 # platforms. Windows requires that a file positioning call be made
2618 # when the file handle transitions between reads and writes. See
2622 # when the file handle transitions between reads and writes. See
2619 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2623 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2620 # platforms, Python or the platform itself can be buggy. Some versions
2624 # platforms, Python or the platform itself can be buggy. Some versions
2621 # of Solaris have been observed to not append at the end of the file
2625 # of Solaris have been observed to not append at the end of the file
2622 # if the file was seeked to before the end. See issue4943 for more.
2626 # if the file was seeked to before the end. See issue4943 for more.
2623 #
2627 #
2624 # We work around this issue by inserting a seek() before writing.
2628 # We work around this issue by inserting a seek() before writing.
2625 # Note: This is likely not necessary on Python 3. However, because
2629 # Note: This is likely not necessary on Python 3. However, because
2626 # the file handle is reused for reads and may be seeked there, we need
2630 # the file handle is reused for reads and may be seeked there, we need
2627 # to be careful before changing this.
2631 # to be careful before changing this.
2628 if self._writinghandles is None:
2632 if self._writinghandles is None:
2629 msg = b'adding revision outside `revlog._writing` context'
2633 msg = b'adding revision outside `revlog._writing` context'
2630 raise error.ProgrammingError(msg)
2634 raise error.ProgrammingError(msg)
2631 ifh, dfh, sdfh = self._writinghandles
2635 ifh, dfh, sdfh = self._writinghandles
2632 if self._docket is None:
2636 if self._docket is None:
2633 ifh.seek(0, os.SEEK_END)
2637 ifh.seek(0, os.SEEK_END)
2634 else:
2638 else:
2635 ifh.seek(self._docket.index_end, os.SEEK_SET)
2639 ifh.seek(self._docket.index_end, os.SEEK_SET)
2636 if dfh:
2640 if dfh:
2637 if self._docket is None:
2641 if self._docket is None:
2638 dfh.seek(0, os.SEEK_END)
2642 dfh.seek(0, os.SEEK_END)
2639 else:
2643 else:
2640 dfh.seek(self._docket.data_end, os.SEEK_SET)
2644 dfh.seek(self._docket.data_end, os.SEEK_SET)
2641 if sdfh:
2645 if sdfh:
2642 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2646 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2643
2647
2644 curr = len(self) - 1
2648 curr = len(self) - 1
2645 if not self._inline:
2649 if not self._inline:
2646 transaction.add(self._datafile, offset)
2650 transaction.add(self._datafile, offset)
2647 if self._sidedatafile:
2651 if self._sidedatafile:
2648 transaction.add(self._sidedatafile, sidedata_offset)
2652 transaction.add(self._sidedatafile, sidedata_offset)
2649 transaction.add(self._indexfile, curr * len(entry))
2653 transaction.add(self._indexfile, curr * len(entry))
2650 if data[0]:
2654 if data[0]:
2651 dfh.write(data[0])
2655 dfh.write(data[0])
2652 dfh.write(data[1])
2656 dfh.write(data[1])
2653 if sidedata:
2657 if sidedata:
2654 sdfh.write(sidedata)
2658 sdfh.write(sidedata)
2655 ifh.write(entry)
2659 ifh.write(entry)
2656 else:
2660 else:
2657 offset += curr * self.index.entry_size
2661 offset += curr * self.index.entry_size
2658 transaction.add(self._indexfile, offset)
2662 transaction.add(self._indexfile, offset)
2659 ifh.write(entry)
2663 ifh.write(entry)
2660 ifh.write(data[0])
2664 ifh.write(data[0])
2661 ifh.write(data[1])
2665 ifh.write(data[1])
2662 assert not sidedata
2666 assert not sidedata
2663 self._enforceinlinesize(transaction)
2667 self._enforceinlinesize(transaction)
2664 if self._docket is not None:
2668 if self._docket is not None:
2665 # revlog-v2 always has 3 writing handles, help Pytype
2669 # revlog-v2 always has 3 writing handles, help Pytype
2666 wh1 = self._writinghandles[0]
2670 wh1 = self._writinghandles[0]
2667 wh2 = self._writinghandles[1]
2671 wh2 = self._writinghandles[1]
2668 wh3 = self._writinghandles[2]
2672 wh3 = self._writinghandles[2]
2669 assert wh1 is not None
2673 assert wh1 is not None
2670 assert wh2 is not None
2674 assert wh2 is not None
2671 assert wh3 is not None
2675 assert wh3 is not None
2672 self._docket.index_end = wh1.tell()
2676 self._docket.index_end = wh1.tell()
2673 self._docket.data_end = wh2.tell()
2677 self._docket.data_end = wh2.tell()
2674 self._docket.sidedata_end = wh3.tell()
2678 self._docket.sidedata_end = wh3.tell()
2675
2679
2676 nodemaputil.setup_persistent_nodemap(transaction, self)
2680 nodemaputil.setup_persistent_nodemap(transaction, self)
2677
2681
2678 def addgroup(
2682 def addgroup(
2679 self,
2683 self,
2680 deltas,
2684 deltas,
2681 linkmapper,
2685 linkmapper,
2682 transaction,
2686 transaction,
2683 alwayscache=False,
2687 alwayscache=False,
2684 addrevisioncb=None,
2688 addrevisioncb=None,
2685 duplicaterevisioncb=None,
2689 duplicaterevisioncb=None,
2686 debug_info=None,
2690 debug_info=None,
2687 delta_base_reuse_policy=None,
2691 delta_base_reuse_policy=None,
2688 ):
2692 ):
2689 """
2693 """
2690 add a delta group
2694 add a delta group
2691
2695
2692 given a set of deltas, add them to the revision log. the
2696 given a set of deltas, add them to the revision log. the
2693 first delta is against its parent, which should be in our
2697 first delta is against its parent, which should be in our
2694 log, the rest are against the previous delta.
2698 log, the rest are against the previous delta.
2695
2699
2696 If ``addrevisioncb`` is defined, it will be called with arguments of
2700 If ``addrevisioncb`` is defined, it will be called with arguments of
2697 this revlog and the node that was added.
2701 this revlog and the node that was added.
2698 """
2702 """
2699
2703
2700 if self._adding_group:
2704 if self._adding_group:
2701 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2705 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2702
2706
2703 # read the default delta-base reuse policy from revlog config if the
2707 # read the default delta-base reuse policy from revlog config if the
2704 # group did not specify one.
2708 # group did not specify one.
2705 if delta_base_reuse_policy is None:
2709 if delta_base_reuse_policy is None:
2706 if self._generaldelta and self._lazydeltabase:
2710 if self._generaldelta and self._lazydeltabase:
2707 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2711 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2708 else:
2712 else:
2709 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2713 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2710
2714
2711 self._adding_group = True
2715 self._adding_group = True
2712 empty = True
2716 empty = True
2713 try:
2717 try:
2714 with self._writing(transaction):
2718 with self._writing(transaction):
2715 write_debug = None
2719 write_debug = None
2716 if self._debug_delta:
2720 if self._debug_delta:
2717 write_debug = transaction._report
2721 write_debug = transaction._report
2718 deltacomputer = deltautil.deltacomputer(
2722 deltacomputer = deltautil.deltacomputer(
2719 self,
2723 self,
2720 write_debug=write_debug,
2724 write_debug=write_debug,
2721 debug_info=debug_info,
2725 debug_info=debug_info,
2722 )
2726 )
2723 # loop through our set of deltas
2727 # loop through our set of deltas
2724 for data in deltas:
2728 for data in deltas:
2725 (
2729 (
2726 node,
2730 node,
2727 p1,
2731 p1,
2728 p2,
2732 p2,
2729 linknode,
2733 linknode,
2730 deltabase,
2734 deltabase,
2731 delta,
2735 delta,
2732 flags,
2736 flags,
2733 sidedata,
2737 sidedata,
2734 ) = data
2738 ) = data
2735 link = linkmapper(linknode)
2739 link = linkmapper(linknode)
2736 flags = flags or REVIDX_DEFAULT_FLAGS
2740 flags = flags or REVIDX_DEFAULT_FLAGS
2737
2741
2738 rev = self.index.get_rev(node)
2742 rev = self.index.get_rev(node)
2739 if rev is not None:
2743 if rev is not None:
2740 # this can happen if two branches make the same change
2744 # this can happen if two branches make the same change
2741 self._nodeduplicatecallback(transaction, rev)
2745 self._nodeduplicatecallback(transaction, rev)
2742 if duplicaterevisioncb:
2746 if duplicaterevisioncb:
2743 duplicaterevisioncb(self, rev)
2747 duplicaterevisioncb(self, rev)
2744 empty = False
2748 empty = False
2745 continue
2749 continue
2746
2750
2747 for p in (p1, p2):
2751 for p in (p1, p2):
2748 if not self.index.has_node(p):
2752 if not self.index.has_node(p):
2749 raise error.LookupError(
2753 raise error.LookupError(
2750 p, self.radix, _(b'unknown parent')
2754 p, self.radix, _(b'unknown parent')
2751 )
2755 )
2752
2756
2753 if not self.index.has_node(deltabase):
2757 if not self.index.has_node(deltabase):
2754 raise error.LookupError(
2758 raise error.LookupError(
2755 deltabase, self.display_id, _(b'unknown delta base')
2759 deltabase, self.display_id, _(b'unknown delta base')
2756 )
2760 )
2757
2761
2758 baserev = self.rev(deltabase)
2762 baserev = self.rev(deltabase)
2759
2763
2760 if baserev != nullrev and self.iscensored(baserev):
2764 if baserev != nullrev and self.iscensored(baserev):
2761 # if base is censored, delta must be full replacement in a
2765 # if base is censored, delta must be full replacement in a
2762 # single patch operation
2766 # single patch operation
2763 hlen = struct.calcsize(b">lll")
2767 hlen = struct.calcsize(b">lll")
2764 oldlen = self.rawsize(baserev)
2768 oldlen = self.rawsize(baserev)
2765 newlen = len(delta) - hlen
2769 newlen = len(delta) - hlen
2766 if delta[:hlen] != mdiff.replacediffheader(
2770 if delta[:hlen] != mdiff.replacediffheader(
2767 oldlen, newlen
2771 oldlen, newlen
2768 ):
2772 ):
2769 raise error.CensoredBaseError(
2773 raise error.CensoredBaseError(
2770 self.display_id, self.node(baserev)
2774 self.display_id, self.node(baserev)
2771 )
2775 )
2772
2776
2773 if not flags and self._peek_iscensored(baserev, delta):
2777 if not flags and self._peek_iscensored(baserev, delta):
2774 flags |= REVIDX_ISCENSORED
2778 flags |= REVIDX_ISCENSORED
2775
2779
2776 # We assume consumers of addrevisioncb will want to retrieve
2780 # We assume consumers of addrevisioncb will want to retrieve
2777 # the added revision, which will require a call to
2781 # the added revision, which will require a call to
2778 # revision(). revision() will fast path if there is a cache
2782 # revision(). revision() will fast path if there is a cache
2779 # hit. So, we tell _addrevision() to always cache in this case.
2783 # hit. So, we tell _addrevision() to always cache in this case.
2780 # We're only using addgroup() in the context of changegroup
2784 # We're only using addgroup() in the context of changegroup
2781 # generation so the revision data can always be handled as raw
2785 # generation so the revision data can always be handled as raw
2782 # by the flagprocessor.
2786 # by the flagprocessor.
2783 rev = self._addrevision(
2787 rev = self._addrevision(
2784 node,
2788 node,
2785 None,
2789 None,
2786 transaction,
2790 transaction,
2787 link,
2791 link,
2788 p1,
2792 p1,
2789 p2,
2793 p2,
2790 flags,
2794 flags,
2791 (baserev, delta, delta_base_reuse_policy),
2795 (baserev, delta, delta_base_reuse_policy),
2792 alwayscache=alwayscache,
2796 alwayscache=alwayscache,
2793 deltacomputer=deltacomputer,
2797 deltacomputer=deltacomputer,
2794 sidedata=sidedata,
2798 sidedata=sidedata,
2795 )
2799 )
2796
2800
2797 if addrevisioncb:
2801 if addrevisioncb:
2798 addrevisioncb(self, rev)
2802 addrevisioncb(self, rev)
2799 empty = False
2803 empty = False
2800 finally:
2804 finally:
2801 self._adding_group = False
2805 self._adding_group = False
2802 return not empty
2806 return not empty
2803
2807
2804 def iscensored(self, rev):
2808 def iscensored(self, rev):
2805 """Check if a file revision is censored."""
2809 """Check if a file revision is censored."""
2806 if not self._censorable:
2810 if not self._censorable:
2807 return False
2811 return False
2808
2812
2809 return self.flags(rev) & REVIDX_ISCENSORED
2813 return self.flags(rev) & REVIDX_ISCENSORED
2810
2814
2811 def _peek_iscensored(self, baserev, delta):
2815 def _peek_iscensored(self, baserev, delta):
2812 """Quickly check if a delta produces a censored revision."""
2816 """Quickly check if a delta produces a censored revision."""
2813 if not self._censorable:
2817 if not self._censorable:
2814 return False
2818 return False
2815
2819
2816 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2820 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2817
2821
2818 def getstrippoint(self, minlink):
2822 def getstrippoint(self, minlink):
2819 """find the minimum rev that must be stripped to strip the linkrev
2823 """find the minimum rev that must be stripped to strip the linkrev
2820
2824
2821 Returns a tuple containing the minimum rev and a set of all revs that
2825 Returns a tuple containing the minimum rev and a set of all revs that
2822 have linkrevs that will be broken by this strip.
2826 have linkrevs that will be broken by this strip.
2823 """
2827 """
2824 return storageutil.resolvestripinfo(
2828 return storageutil.resolvestripinfo(
2825 minlink,
2829 minlink,
2826 len(self) - 1,
2830 len(self) - 1,
2827 self.headrevs(),
2831 self.headrevs(),
2828 self.linkrev,
2832 self.linkrev,
2829 self.parentrevs,
2833 self.parentrevs,
2830 )
2834 )
2831
2835
2832 def strip(self, minlink, transaction):
2836 def strip(self, minlink, transaction):
2833 """truncate the revlog on the first revision with a linkrev >= minlink
2837 """truncate the revlog on the first revision with a linkrev >= minlink
2834
2838
2835 This function is called when we're stripping revision minlink and
2839 This function is called when we're stripping revision minlink and
2836 its descendants from the repository.
2840 its descendants from the repository.
2837
2841
2838 We have to remove all revisions with linkrev >= minlink, because
2842 We have to remove all revisions with linkrev >= minlink, because
2839 the equivalent changelog revisions will be renumbered after the
2843 the equivalent changelog revisions will be renumbered after the
2840 strip.
2844 strip.
2841
2845
2842 So we truncate the revlog on the first of these revisions, and
2846 So we truncate the revlog on the first of these revisions, and
2843 trust that the caller has saved the revisions that shouldn't be
2847 trust that the caller has saved the revisions that shouldn't be
2844 removed and that it'll re-add them after this truncation.
2848 removed and that it'll re-add them after this truncation.
2845 """
2849 """
2846 if len(self) == 0:
2850 if len(self) == 0:
2847 return
2851 return
2848
2852
2849 rev, _ = self.getstrippoint(minlink)
2853 rev, _ = self.getstrippoint(minlink)
2850 if rev == len(self):
2854 if rev == len(self):
2851 return
2855 return
2852
2856
2853 # first truncate the files on disk
2857 # first truncate the files on disk
2854 data_end = self.start(rev)
2858 data_end = self.start(rev)
2855 if not self._inline:
2859 if not self._inline:
2856 transaction.add(self._datafile, data_end)
2860 transaction.add(self._datafile, data_end)
2857 end = rev * self.index.entry_size
2861 end = rev * self.index.entry_size
2858 else:
2862 else:
2859 end = data_end + (rev * self.index.entry_size)
2863 end = data_end + (rev * self.index.entry_size)
2860
2864
2861 if self._sidedatafile:
2865 if self._sidedatafile:
2862 sidedata_end = self.sidedata_cut_off(rev)
2866 sidedata_end = self.sidedata_cut_off(rev)
2863 transaction.add(self._sidedatafile, sidedata_end)
2867 transaction.add(self._sidedatafile, sidedata_end)
2864
2868
2865 transaction.add(self._indexfile, end)
2869 transaction.add(self._indexfile, end)
2866 if self._docket is not None:
2870 if self._docket is not None:
2867 # XXX we could, leverage the docket while stripping. However it is
2871 # XXX we could, leverage the docket while stripping. However it is
2868 # not powerfull enough at the time of this comment
2872 # not powerfull enough at the time of this comment
2869 self._docket.index_end = end
2873 self._docket.index_end = end
2870 self._docket.data_end = data_end
2874 self._docket.data_end = data_end
2871 self._docket.sidedata_end = sidedata_end
2875 self._docket.sidedata_end = sidedata_end
2872 self._docket.write(transaction, stripping=True)
2876 self._docket.write(transaction, stripping=True)
2873
2877
2874 # then reset internal state in memory to forget those revisions
2878 # then reset internal state in memory to forget those revisions
2875 self._revisioncache = None
2879 self._revisioncache = None
2876 self._chaininfocache = util.lrucachedict(500)
2880 self._chaininfocache = util.lrucachedict(500)
2877 self._segmentfile.clear_cache()
2881 self._segmentfile.clear_cache()
2878 self._segmentfile_sidedata.clear_cache()
2882 self._segmentfile_sidedata.clear_cache()
2879
2883
2880 del self.index[rev:-1]
2884 del self.index[rev:-1]
2881
2885
2882 def checksize(self):
2886 def checksize(self):
2883 """Check size of index and data files
2887 """Check size of index and data files
2884
2888
2885 return a (dd, di) tuple.
2889 return a (dd, di) tuple.
2886 - dd: extra bytes for the "data" file
2890 - dd: extra bytes for the "data" file
2887 - di: extra bytes for the "index" file
2891 - di: extra bytes for the "index" file
2888
2892
2889 A healthy revlog will return (0, 0).
2893 A healthy revlog will return (0, 0).
2890 """
2894 """
2891 expected = 0
2895 expected = 0
2892 if len(self):
2896 if len(self):
2893 expected = max(0, self.end(len(self) - 1))
2897 expected = max(0, self.end(len(self) - 1))
2894
2898
2895 try:
2899 try:
2896 with self._datafp() as f:
2900 with self._datafp() as f:
2897 f.seek(0, io.SEEK_END)
2901 f.seek(0, io.SEEK_END)
2898 actual = f.tell()
2902 actual = f.tell()
2899 dd = actual - expected
2903 dd = actual - expected
2900 except FileNotFoundError:
2904 except FileNotFoundError:
2901 dd = 0
2905 dd = 0
2902
2906
2903 try:
2907 try:
2904 f = self.opener(self._indexfile)
2908 f = self.opener(self._indexfile)
2905 f.seek(0, io.SEEK_END)
2909 f.seek(0, io.SEEK_END)
2906 actual = f.tell()
2910 actual = f.tell()
2907 f.close()
2911 f.close()
2908 s = self.index.entry_size
2912 s = self.index.entry_size
2909 i = max(0, actual // s)
2913 i = max(0, actual // s)
2910 di = actual - (i * s)
2914 di = actual - (i * s)
2911 if self._inline:
2915 if self._inline:
2912 databytes = 0
2916 databytes = 0
2913 for r in self:
2917 for r in self:
2914 databytes += max(0, self.length(r))
2918 databytes += max(0, self.length(r))
2915 dd = 0
2919 dd = 0
2916 di = actual - len(self) * s - databytes
2920 di = actual - len(self) * s - databytes
2917 except FileNotFoundError:
2921 except FileNotFoundError:
2918 di = 0
2922 di = 0
2919
2923
2920 return (dd, di)
2924 return (dd, di)
2921
2925
2922 def files(self):
2926 def files(self):
2923 res = [self._indexfile]
2927 res = [self._indexfile]
2924 if self._docket_file is None:
2928 if self._docket_file is None:
2925 if not self._inline:
2929 if not self._inline:
2926 res.append(self._datafile)
2930 res.append(self._datafile)
2927 else:
2931 else:
2928 res.append(self._docket_file)
2932 res.append(self._docket_file)
2929 res.extend(self._docket.old_index_filepaths(include_empty=False))
2933 res.extend(self._docket.old_index_filepaths(include_empty=False))
2930 if self._docket.data_end:
2934 if self._docket.data_end:
2931 res.append(self._datafile)
2935 res.append(self._datafile)
2932 res.extend(self._docket.old_data_filepaths(include_empty=False))
2936 res.extend(self._docket.old_data_filepaths(include_empty=False))
2933 if self._docket.sidedata_end:
2937 if self._docket.sidedata_end:
2934 res.append(self._sidedatafile)
2938 res.append(self._sidedatafile)
2935 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2939 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2936 return res
2940 return res
2937
2941
2938 def emitrevisions(
2942 def emitrevisions(
2939 self,
2943 self,
2940 nodes,
2944 nodes,
2941 nodesorder=None,
2945 nodesorder=None,
2942 revisiondata=False,
2946 revisiondata=False,
2943 assumehaveparentrevisions=False,
2947 assumehaveparentrevisions=False,
2944 deltamode=repository.CG_DELTAMODE_STD,
2948 deltamode=repository.CG_DELTAMODE_STD,
2945 sidedata_helpers=None,
2949 sidedata_helpers=None,
2946 debug_info=None,
2950 debug_info=None,
2947 ):
2951 ):
2948 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2952 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2949 raise error.ProgrammingError(
2953 raise error.ProgrammingError(
2950 b'unhandled value for nodesorder: %s' % nodesorder
2954 b'unhandled value for nodesorder: %s' % nodesorder
2951 )
2955 )
2952
2956
2953 if nodesorder is None and not self._generaldelta:
2957 if nodesorder is None and not self._generaldelta:
2954 nodesorder = b'storage'
2958 nodesorder = b'storage'
2955
2959
2956 if (
2960 if (
2957 not self._storedeltachains
2961 not self._storedeltachains
2958 and deltamode != repository.CG_DELTAMODE_PREV
2962 and deltamode != repository.CG_DELTAMODE_PREV
2959 ):
2963 ):
2960 deltamode = repository.CG_DELTAMODE_FULL
2964 deltamode = repository.CG_DELTAMODE_FULL
2961
2965
2962 return storageutil.emitrevisions(
2966 return storageutil.emitrevisions(
2963 self,
2967 self,
2964 nodes,
2968 nodes,
2965 nodesorder,
2969 nodesorder,
2966 revlogrevisiondelta,
2970 revlogrevisiondelta,
2967 deltaparentfn=self.deltaparent,
2971 deltaparentfn=self.deltaparent,
2968 candeltafn=self.candelta,
2972 candeltafn=self.candelta,
2969 rawsizefn=self.rawsize,
2973 rawsizefn=self.rawsize,
2970 revdifffn=self.revdiff,
2974 revdifffn=self.revdiff,
2971 flagsfn=self.flags,
2975 flagsfn=self.flags,
2972 deltamode=deltamode,
2976 deltamode=deltamode,
2973 revisiondata=revisiondata,
2977 revisiondata=revisiondata,
2974 assumehaveparentrevisions=assumehaveparentrevisions,
2978 assumehaveparentrevisions=assumehaveparentrevisions,
2975 sidedata_helpers=sidedata_helpers,
2979 sidedata_helpers=sidedata_helpers,
2976 debug_info=debug_info,
2980 debug_info=debug_info,
2977 )
2981 )
2978
2982
2979 DELTAREUSEALWAYS = b'always'
2983 DELTAREUSEALWAYS = b'always'
2980 DELTAREUSESAMEREVS = b'samerevs'
2984 DELTAREUSESAMEREVS = b'samerevs'
2981 DELTAREUSENEVER = b'never'
2985 DELTAREUSENEVER = b'never'
2982
2986
2983 DELTAREUSEFULLADD = b'fulladd'
2987 DELTAREUSEFULLADD = b'fulladd'
2984
2988
2985 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2989 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2986
2990
2987 def clone(
2991 def clone(
2988 self,
2992 self,
2989 tr,
2993 tr,
2990 destrevlog,
2994 destrevlog,
2991 addrevisioncb=None,
2995 addrevisioncb=None,
2992 deltareuse=DELTAREUSESAMEREVS,
2996 deltareuse=DELTAREUSESAMEREVS,
2993 forcedeltabothparents=None,
2997 forcedeltabothparents=None,
2994 sidedata_helpers=None,
2998 sidedata_helpers=None,
2995 ):
2999 ):
2996 """Copy this revlog to another, possibly with format changes.
3000 """Copy this revlog to another, possibly with format changes.
2997
3001
2998 The destination revlog will contain the same revisions and nodes.
3002 The destination revlog will contain the same revisions and nodes.
2999 However, it may not be bit-for-bit identical due to e.g. delta encoding
3003 However, it may not be bit-for-bit identical due to e.g. delta encoding
3000 differences.
3004 differences.
3001
3005
3002 The ``deltareuse`` argument control how deltas from the existing revlog
3006 The ``deltareuse`` argument control how deltas from the existing revlog
3003 are preserved in the destination revlog. The argument can have the
3007 are preserved in the destination revlog. The argument can have the
3004 following values:
3008 following values:
3005
3009
3006 DELTAREUSEALWAYS
3010 DELTAREUSEALWAYS
3007 Deltas will always be reused (if possible), even if the destination
3011 Deltas will always be reused (if possible), even if the destination
3008 revlog would not select the same revisions for the delta. This is the
3012 revlog would not select the same revisions for the delta. This is the
3009 fastest mode of operation.
3013 fastest mode of operation.
3010 DELTAREUSESAMEREVS
3014 DELTAREUSESAMEREVS
3011 Deltas will be reused if the destination revlog would pick the same
3015 Deltas will be reused if the destination revlog would pick the same
3012 revisions for the delta. This mode strikes a balance between speed
3016 revisions for the delta. This mode strikes a balance between speed
3013 and optimization.
3017 and optimization.
3014 DELTAREUSENEVER
3018 DELTAREUSENEVER
3015 Deltas will never be reused. This is the slowest mode of execution.
3019 Deltas will never be reused. This is the slowest mode of execution.
3016 This mode can be used to recompute deltas (e.g. if the diff/delta
3020 This mode can be used to recompute deltas (e.g. if the diff/delta
3017 algorithm changes).
3021 algorithm changes).
3018 DELTAREUSEFULLADD
3022 DELTAREUSEFULLADD
3019 Revision will be re-added as if their were new content. This is
3023 Revision will be re-added as if their were new content. This is
3020 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3024 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3021 eg: large file detection and handling.
3025 eg: large file detection and handling.
3022
3026
3023 Delta computation can be slow, so the choice of delta reuse policy can
3027 Delta computation can be slow, so the choice of delta reuse policy can
3024 significantly affect run time.
3028 significantly affect run time.
3025
3029
3026 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3030 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3027 two extremes. Deltas will be reused if they are appropriate. But if the
3031 two extremes. Deltas will be reused if they are appropriate. But if the
3028 delta could choose a better revision, it will do so. This means if you
3032 delta could choose a better revision, it will do so. This means if you
3029 are converting a non-generaldelta revlog to a generaldelta revlog,
3033 are converting a non-generaldelta revlog to a generaldelta revlog,
3030 deltas will be recomputed if the delta's parent isn't a parent of the
3034 deltas will be recomputed if the delta's parent isn't a parent of the
3031 revision.
3035 revision.
3032
3036
3033 In addition to the delta policy, the ``forcedeltabothparents``
3037 In addition to the delta policy, the ``forcedeltabothparents``
3034 argument controls whether to force compute deltas against both parents
3038 argument controls whether to force compute deltas against both parents
3035 for merges. By default, the current default is used.
3039 for merges. By default, the current default is used.
3036
3040
3037 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3041 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3038 `sidedata_helpers`.
3042 `sidedata_helpers`.
3039 """
3043 """
3040 if deltareuse not in self.DELTAREUSEALL:
3044 if deltareuse not in self.DELTAREUSEALL:
3041 raise ValueError(
3045 raise ValueError(
3042 _(b'value for deltareuse invalid: %s') % deltareuse
3046 _(b'value for deltareuse invalid: %s') % deltareuse
3043 )
3047 )
3044
3048
3045 if len(destrevlog):
3049 if len(destrevlog):
3046 raise ValueError(_(b'destination revlog is not empty'))
3050 raise ValueError(_(b'destination revlog is not empty'))
3047
3051
3048 if getattr(self, 'filteredrevs', None):
3052 if getattr(self, 'filteredrevs', None):
3049 raise ValueError(_(b'source revlog has filtered revisions'))
3053 raise ValueError(_(b'source revlog has filtered revisions'))
3050 if getattr(destrevlog, 'filteredrevs', None):
3054 if getattr(destrevlog, 'filteredrevs', None):
3051 raise ValueError(_(b'destination revlog has filtered revisions'))
3055 raise ValueError(_(b'destination revlog has filtered revisions'))
3052
3056
3053 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3057 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3054 # if possible.
3058 # if possible.
3055 oldlazydelta = destrevlog._lazydelta
3059 oldlazydelta = destrevlog._lazydelta
3056 oldlazydeltabase = destrevlog._lazydeltabase
3060 oldlazydeltabase = destrevlog._lazydeltabase
3057 oldamd = destrevlog._deltabothparents
3061 oldamd = destrevlog._deltabothparents
3058
3062
3059 try:
3063 try:
3060 if deltareuse == self.DELTAREUSEALWAYS:
3064 if deltareuse == self.DELTAREUSEALWAYS:
3061 destrevlog._lazydeltabase = True
3065 destrevlog._lazydeltabase = True
3062 destrevlog._lazydelta = True
3066 destrevlog._lazydelta = True
3063 elif deltareuse == self.DELTAREUSESAMEREVS:
3067 elif deltareuse == self.DELTAREUSESAMEREVS:
3064 destrevlog._lazydeltabase = False
3068 destrevlog._lazydeltabase = False
3065 destrevlog._lazydelta = True
3069 destrevlog._lazydelta = True
3066 elif deltareuse == self.DELTAREUSENEVER:
3070 elif deltareuse == self.DELTAREUSENEVER:
3067 destrevlog._lazydeltabase = False
3071 destrevlog._lazydeltabase = False
3068 destrevlog._lazydelta = False
3072 destrevlog._lazydelta = False
3069
3073
3070 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3074 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3071
3075
3072 self._clone(
3076 self._clone(
3073 tr,
3077 tr,
3074 destrevlog,
3078 destrevlog,
3075 addrevisioncb,
3079 addrevisioncb,
3076 deltareuse,
3080 deltareuse,
3077 forcedeltabothparents,
3081 forcedeltabothparents,
3078 sidedata_helpers,
3082 sidedata_helpers,
3079 )
3083 )
3080
3084
3081 finally:
3085 finally:
3082 destrevlog._lazydelta = oldlazydelta
3086 destrevlog._lazydelta = oldlazydelta
3083 destrevlog._lazydeltabase = oldlazydeltabase
3087 destrevlog._lazydeltabase = oldlazydeltabase
3084 destrevlog._deltabothparents = oldamd
3088 destrevlog._deltabothparents = oldamd
3085
3089
3086 def _clone(
3090 def _clone(
3087 self,
3091 self,
3088 tr,
3092 tr,
3089 destrevlog,
3093 destrevlog,
3090 addrevisioncb,
3094 addrevisioncb,
3091 deltareuse,
3095 deltareuse,
3092 forcedeltabothparents,
3096 forcedeltabothparents,
3093 sidedata_helpers,
3097 sidedata_helpers,
3094 ):
3098 ):
3095 """perform the core duty of `revlog.clone` after parameter processing"""
3099 """perform the core duty of `revlog.clone` after parameter processing"""
3096 write_debug = None
3100 write_debug = None
3097 if self._debug_delta:
3101 if self._debug_delta:
3098 write_debug = tr._report
3102 write_debug = tr._report
3099 deltacomputer = deltautil.deltacomputer(
3103 deltacomputer = deltautil.deltacomputer(
3100 destrevlog,
3104 destrevlog,
3101 write_debug=write_debug,
3105 write_debug=write_debug,
3102 )
3106 )
3103 index = self.index
3107 index = self.index
3104 for rev in self:
3108 for rev in self:
3105 entry = index[rev]
3109 entry = index[rev]
3106
3110
3107 # Some classes override linkrev to take filtered revs into
3111 # Some classes override linkrev to take filtered revs into
3108 # account. Use raw entry from index.
3112 # account. Use raw entry from index.
3109 flags = entry[0] & 0xFFFF
3113 flags = entry[0] & 0xFFFF
3110 linkrev = entry[4]
3114 linkrev = entry[4]
3111 p1 = index[entry[5]][7]
3115 p1 = index[entry[5]][7]
3112 p2 = index[entry[6]][7]
3116 p2 = index[entry[6]][7]
3113 node = entry[7]
3117 node = entry[7]
3114
3118
3115 # (Possibly) reuse the delta from the revlog if allowed and
3119 # (Possibly) reuse the delta from the revlog if allowed and
3116 # the revlog chunk is a delta.
3120 # the revlog chunk is a delta.
3117 cachedelta = None
3121 cachedelta = None
3118 rawtext = None
3122 rawtext = None
3119 if deltareuse == self.DELTAREUSEFULLADD:
3123 if deltareuse == self.DELTAREUSEFULLADD:
3120 text = self._revisiondata(rev)
3124 text = self._revisiondata(rev)
3121 sidedata = self.sidedata(rev)
3125 sidedata = self.sidedata(rev)
3122
3126
3123 if sidedata_helpers is not None:
3127 if sidedata_helpers is not None:
3124 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3128 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3125 self, sidedata_helpers, sidedata, rev
3129 self, sidedata_helpers, sidedata, rev
3126 )
3130 )
3127 flags = flags | new_flags[0] & ~new_flags[1]
3131 flags = flags | new_flags[0] & ~new_flags[1]
3128
3132
3129 destrevlog.addrevision(
3133 destrevlog.addrevision(
3130 text,
3134 text,
3131 tr,
3135 tr,
3132 linkrev,
3136 linkrev,
3133 p1,
3137 p1,
3134 p2,
3138 p2,
3135 cachedelta=cachedelta,
3139 cachedelta=cachedelta,
3136 node=node,
3140 node=node,
3137 flags=flags,
3141 flags=flags,
3138 deltacomputer=deltacomputer,
3142 deltacomputer=deltacomputer,
3139 sidedata=sidedata,
3143 sidedata=sidedata,
3140 )
3144 )
3141 else:
3145 else:
3142 if destrevlog._lazydelta:
3146 if destrevlog._lazydelta:
3143 dp = self.deltaparent(rev)
3147 dp = self.deltaparent(rev)
3144 if dp != nullrev:
3148 if dp != nullrev:
3145 cachedelta = (dp, bytes(self._chunk(rev)))
3149 cachedelta = (dp, bytes(self._chunk(rev)))
3146
3150
3147 sidedata = None
3151 sidedata = None
3148 if not cachedelta:
3152 if not cachedelta:
3149 rawtext = self._revisiondata(rev)
3153 rawtext = self._revisiondata(rev)
3150 sidedata = self.sidedata(rev)
3154 sidedata = self.sidedata(rev)
3151 if sidedata is None:
3155 if sidedata is None:
3152 sidedata = self.sidedata(rev)
3156 sidedata = self.sidedata(rev)
3153
3157
3154 if sidedata_helpers is not None:
3158 if sidedata_helpers is not None:
3155 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3159 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3156 self, sidedata_helpers, sidedata, rev
3160 self, sidedata_helpers, sidedata, rev
3157 )
3161 )
3158 flags = flags | new_flags[0] & ~new_flags[1]
3162 flags = flags | new_flags[0] & ~new_flags[1]
3159
3163
3160 with destrevlog._writing(tr):
3164 with destrevlog._writing(tr):
3161 destrevlog._addrevision(
3165 destrevlog._addrevision(
3162 node,
3166 node,
3163 rawtext,
3167 rawtext,
3164 tr,
3168 tr,
3165 linkrev,
3169 linkrev,
3166 p1,
3170 p1,
3167 p2,
3171 p2,
3168 flags,
3172 flags,
3169 cachedelta,
3173 cachedelta,
3170 deltacomputer=deltacomputer,
3174 deltacomputer=deltacomputer,
3171 sidedata=sidedata,
3175 sidedata=sidedata,
3172 )
3176 )
3173
3177
3174 if addrevisioncb:
3178 if addrevisioncb:
3175 addrevisioncb(self, rev, node)
3179 addrevisioncb(self, rev, node)
3176
3180
3177 def censorrevision(self, tr, censornode, tombstone=b''):
3181 def censorrevision(self, tr, censornode, tombstone=b''):
3178 if self._format_version == REVLOGV0:
3182 if self._format_version == REVLOGV0:
3179 raise error.RevlogError(
3183 raise error.RevlogError(
3180 _(b'cannot censor with version %d revlogs')
3184 _(b'cannot censor with version %d revlogs')
3181 % self._format_version
3185 % self._format_version
3182 )
3186 )
3183 elif self._format_version == REVLOGV1:
3187 elif self._format_version == REVLOGV1:
3184 rewrite.v1_censor(self, tr, censornode, tombstone)
3188 rewrite.v1_censor(self, tr, censornode, tombstone)
3185 else:
3189 else:
3186 rewrite.v2_censor(self, tr, censornode, tombstone)
3190 rewrite.v2_censor(self, tr, censornode, tombstone)
3187
3191
3188 def verifyintegrity(self, state):
3192 def verifyintegrity(self, state):
3189 """Verifies the integrity of the revlog.
3193 """Verifies the integrity of the revlog.
3190
3194
3191 Yields ``revlogproblem`` instances describing problems that are
3195 Yields ``revlogproblem`` instances describing problems that are
3192 found.
3196 found.
3193 """
3197 """
3194 dd, di = self.checksize()
3198 dd, di = self.checksize()
3195 if dd:
3199 if dd:
3196 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3200 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3197 if di:
3201 if di:
3198 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3202 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3199
3203
3200 version = self._format_version
3204 version = self._format_version
3201
3205
3202 # The verifier tells us what version revlog we should be.
3206 # The verifier tells us what version revlog we should be.
3203 if version != state[b'expectedversion']:
3207 if version != state[b'expectedversion']:
3204 yield revlogproblem(
3208 yield revlogproblem(
3205 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3209 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3206 % (self.display_id, version, state[b'expectedversion'])
3210 % (self.display_id, version, state[b'expectedversion'])
3207 )
3211 )
3208
3212
3209 state[b'skipread'] = set()
3213 state[b'skipread'] = set()
3210 state[b'safe_renamed'] = set()
3214 state[b'safe_renamed'] = set()
3211
3215
3212 for rev in self:
3216 for rev in self:
3213 node = self.node(rev)
3217 node = self.node(rev)
3214
3218
3215 # Verify contents. 4 cases to care about:
3219 # Verify contents. 4 cases to care about:
3216 #
3220 #
3217 # common: the most common case
3221 # common: the most common case
3218 # rename: with a rename
3222 # rename: with a rename
3219 # meta: file content starts with b'\1\n', the metadata
3223 # meta: file content starts with b'\1\n', the metadata
3220 # header defined in filelog.py, but without a rename
3224 # header defined in filelog.py, but without a rename
3221 # ext: content stored externally
3225 # ext: content stored externally
3222 #
3226 #
3223 # More formally, their differences are shown below:
3227 # More formally, their differences are shown below:
3224 #
3228 #
3225 # | common | rename | meta | ext
3229 # | common | rename | meta | ext
3226 # -------------------------------------------------------
3230 # -------------------------------------------------------
3227 # flags() | 0 | 0 | 0 | not 0
3231 # flags() | 0 | 0 | 0 | not 0
3228 # renamed() | False | True | False | ?
3232 # renamed() | False | True | False | ?
3229 # rawtext[0:2]=='\1\n'| False | True | True | ?
3233 # rawtext[0:2]=='\1\n'| False | True | True | ?
3230 #
3234 #
3231 # "rawtext" means the raw text stored in revlog data, which
3235 # "rawtext" means the raw text stored in revlog data, which
3232 # could be retrieved by "rawdata(rev)". "text"
3236 # could be retrieved by "rawdata(rev)". "text"
3233 # mentioned below is "revision(rev)".
3237 # mentioned below is "revision(rev)".
3234 #
3238 #
3235 # There are 3 different lengths stored physically:
3239 # There are 3 different lengths stored physically:
3236 # 1. L1: rawsize, stored in revlog index
3240 # 1. L1: rawsize, stored in revlog index
3237 # 2. L2: len(rawtext), stored in revlog data
3241 # 2. L2: len(rawtext), stored in revlog data
3238 # 3. L3: len(text), stored in revlog data if flags==0, or
3242 # 3. L3: len(text), stored in revlog data if flags==0, or
3239 # possibly somewhere else if flags!=0
3243 # possibly somewhere else if flags!=0
3240 #
3244 #
3241 # L1 should be equal to L2. L3 could be different from them.
3245 # L1 should be equal to L2. L3 could be different from them.
3242 # "text" may or may not affect commit hash depending on flag
3246 # "text" may or may not affect commit hash depending on flag
3243 # processors (see flagutil.addflagprocessor).
3247 # processors (see flagutil.addflagprocessor).
3244 #
3248 #
3245 # | common | rename | meta | ext
3249 # | common | rename | meta | ext
3246 # -------------------------------------------------
3250 # -------------------------------------------------
3247 # rawsize() | L1 | L1 | L1 | L1
3251 # rawsize() | L1 | L1 | L1 | L1
3248 # size() | L1 | L2-LM | L1(*) | L1 (?)
3252 # size() | L1 | L2-LM | L1(*) | L1 (?)
3249 # len(rawtext) | L2 | L2 | L2 | L2
3253 # len(rawtext) | L2 | L2 | L2 | L2
3250 # len(text) | L2 | L2 | L2 | L3
3254 # len(text) | L2 | L2 | L2 | L3
3251 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3255 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3252 #
3256 #
3253 # LM: length of metadata, depending on rawtext
3257 # LM: length of metadata, depending on rawtext
3254 # (*): not ideal, see comment in filelog.size
3258 # (*): not ideal, see comment in filelog.size
3255 # (?): could be "- len(meta)" if the resolved content has
3259 # (?): could be "- len(meta)" if the resolved content has
3256 # rename metadata
3260 # rename metadata
3257 #
3261 #
3258 # Checks needed to be done:
3262 # Checks needed to be done:
3259 # 1. length check: L1 == L2, in all cases.
3263 # 1. length check: L1 == L2, in all cases.
3260 # 2. hash check: depending on flag processor, we may need to
3264 # 2. hash check: depending on flag processor, we may need to
3261 # use either "text" (external), or "rawtext" (in revlog).
3265 # use either "text" (external), or "rawtext" (in revlog).
3262
3266
3263 try:
3267 try:
3264 skipflags = state.get(b'skipflags', 0)
3268 skipflags = state.get(b'skipflags', 0)
3265 if skipflags:
3269 if skipflags:
3266 skipflags &= self.flags(rev)
3270 skipflags &= self.flags(rev)
3267
3271
3268 _verify_revision(self, skipflags, state, node)
3272 _verify_revision(self, skipflags, state, node)
3269
3273
3270 l1 = self.rawsize(rev)
3274 l1 = self.rawsize(rev)
3271 l2 = len(self.rawdata(node))
3275 l2 = len(self.rawdata(node))
3272
3276
3273 if l1 != l2:
3277 if l1 != l2:
3274 yield revlogproblem(
3278 yield revlogproblem(
3275 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3279 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3276 node=node,
3280 node=node,
3277 )
3281 )
3278
3282
3279 except error.CensoredNodeError:
3283 except error.CensoredNodeError:
3280 if state[b'erroroncensored']:
3284 if state[b'erroroncensored']:
3281 yield revlogproblem(
3285 yield revlogproblem(
3282 error=_(b'censored file data'), node=node
3286 error=_(b'censored file data'), node=node
3283 )
3287 )
3284 state[b'skipread'].add(node)
3288 state[b'skipread'].add(node)
3285 except Exception as e:
3289 except Exception as e:
3286 yield revlogproblem(
3290 yield revlogproblem(
3287 error=_(b'unpacking %s: %s')
3291 error=_(b'unpacking %s: %s')
3288 % (short(node), stringutil.forcebytestr(e)),
3292 % (short(node), stringutil.forcebytestr(e)),
3289 node=node,
3293 node=node,
3290 )
3294 )
3291 state[b'skipread'].add(node)
3295 state[b'skipread'].add(node)
3292
3296
3293 def storageinfo(
3297 def storageinfo(
3294 self,
3298 self,
3295 exclusivefiles=False,
3299 exclusivefiles=False,
3296 sharedfiles=False,
3300 sharedfiles=False,
3297 revisionscount=False,
3301 revisionscount=False,
3298 trackedsize=False,
3302 trackedsize=False,
3299 storedsize=False,
3303 storedsize=False,
3300 ):
3304 ):
3301 d = {}
3305 d = {}
3302
3306
3303 if exclusivefiles:
3307 if exclusivefiles:
3304 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3308 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3305 if not self._inline:
3309 if not self._inline:
3306 d[b'exclusivefiles'].append((self.opener, self._datafile))
3310 d[b'exclusivefiles'].append((self.opener, self._datafile))
3307
3311
3308 if sharedfiles:
3312 if sharedfiles:
3309 d[b'sharedfiles'] = []
3313 d[b'sharedfiles'] = []
3310
3314
3311 if revisionscount:
3315 if revisionscount:
3312 d[b'revisionscount'] = len(self)
3316 d[b'revisionscount'] = len(self)
3313
3317
3314 if trackedsize:
3318 if trackedsize:
3315 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3319 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3316
3320
3317 if storedsize:
3321 if storedsize:
3318 d[b'storedsize'] = sum(
3322 d[b'storedsize'] = sum(
3319 self.opener.stat(path).st_size for path in self.files()
3323 self.opener.stat(path).st_size for path in self.files()
3320 )
3324 )
3321
3325
3322 return d
3326 return d
3323
3327
3324 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3328 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3325 if not self.hassidedata:
3329 if not self.hassidedata:
3326 return
3330 return
3327 # revlog formats with sidedata support does not support inline
3331 # revlog formats with sidedata support does not support inline
3328 assert not self._inline
3332 assert not self._inline
3329 if not helpers[1] and not helpers[2]:
3333 if not helpers[1] and not helpers[2]:
3330 # Nothing to generate or remove
3334 # Nothing to generate or remove
3331 return
3335 return
3332
3336
3333 new_entries = []
3337 new_entries = []
3334 # append the new sidedata
3338 # append the new sidedata
3335 with self._writing(transaction):
3339 with self._writing(transaction):
3336 ifh, dfh, sdfh = self._writinghandles
3340 ifh, dfh, sdfh = self._writinghandles
3337 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3341 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3338
3342
3339 current_offset = sdfh.tell()
3343 current_offset = sdfh.tell()
3340 for rev in range(startrev, endrev + 1):
3344 for rev in range(startrev, endrev + 1):
3341 entry = self.index[rev]
3345 entry = self.index[rev]
3342 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3346 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3343 store=self,
3347 store=self,
3344 sidedata_helpers=helpers,
3348 sidedata_helpers=helpers,
3345 sidedata={},
3349 sidedata={},
3346 rev=rev,
3350 rev=rev,
3347 )
3351 )
3348
3352
3349 serialized_sidedata = sidedatautil.serialize_sidedata(
3353 serialized_sidedata = sidedatautil.serialize_sidedata(
3350 new_sidedata
3354 new_sidedata
3351 )
3355 )
3352
3356
3353 sidedata_compression_mode = COMP_MODE_INLINE
3357 sidedata_compression_mode = COMP_MODE_INLINE
3354 if serialized_sidedata and self.hassidedata:
3358 if serialized_sidedata and self.hassidedata:
3355 sidedata_compression_mode = COMP_MODE_PLAIN
3359 sidedata_compression_mode = COMP_MODE_PLAIN
3356 h, comp_sidedata = self.compress(serialized_sidedata)
3360 h, comp_sidedata = self.compress(serialized_sidedata)
3357 if (
3361 if (
3358 h != b'u'
3362 h != b'u'
3359 and comp_sidedata[0] != b'\0'
3363 and comp_sidedata[0] != b'\0'
3360 and len(comp_sidedata) < len(serialized_sidedata)
3364 and len(comp_sidedata) < len(serialized_sidedata)
3361 ):
3365 ):
3362 assert not h
3366 assert not h
3363 if (
3367 if (
3364 comp_sidedata[0]
3368 comp_sidedata[0]
3365 == self._docket.default_compression_header
3369 == self._docket.default_compression_header
3366 ):
3370 ):
3367 sidedata_compression_mode = COMP_MODE_DEFAULT
3371 sidedata_compression_mode = COMP_MODE_DEFAULT
3368 serialized_sidedata = comp_sidedata
3372 serialized_sidedata = comp_sidedata
3369 else:
3373 else:
3370 sidedata_compression_mode = COMP_MODE_INLINE
3374 sidedata_compression_mode = COMP_MODE_INLINE
3371 serialized_sidedata = comp_sidedata
3375 serialized_sidedata = comp_sidedata
3372 if entry[8] != 0 or entry[9] != 0:
3376 if entry[8] != 0 or entry[9] != 0:
3373 # rewriting entries that already have sidedata is not
3377 # rewriting entries that already have sidedata is not
3374 # supported yet, because it introduces garbage data in the
3378 # supported yet, because it introduces garbage data in the
3375 # revlog.
3379 # revlog.
3376 msg = b"rewriting existing sidedata is not supported yet"
3380 msg = b"rewriting existing sidedata is not supported yet"
3377 raise error.Abort(msg)
3381 raise error.Abort(msg)
3378
3382
3379 # Apply (potential) flags to add and to remove after running
3383 # Apply (potential) flags to add and to remove after running
3380 # the sidedata helpers
3384 # the sidedata helpers
3381 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3385 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3382 entry_update = (
3386 entry_update = (
3383 current_offset,
3387 current_offset,
3384 len(serialized_sidedata),
3388 len(serialized_sidedata),
3385 new_offset_flags,
3389 new_offset_flags,
3386 sidedata_compression_mode,
3390 sidedata_compression_mode,
3387 )
3391 )
3388
3392
3389 # the sidedata computation might have move the file cursors around
3393 # the sidedata computation might have move the file cursors around
3390 sdfh.seek(current_offset, os.SEEK_SET)
3394 sdfh.seek(current_offset, os.SEEK_SET)
3391 sdfh.write(serialized_sidedata)
3395 sdfh.write(serialized_sidedata)
3392 new_entries.append(entry_update)
3396 new_entries.append(entry_update)
3393 current_offset += len(serialized_sidedata)
3397 current_offset += len(serialized_sidedata)
3394 self._docket.sidedata_end = sdfh.tell()
3398 self._docket.sidedata_end = sdfh.tell()
3395
3399
3396 # rewrite the new index entries
3400 # rewrite the new index entries
3397 ifh.seek(startrev * self.index.entry_size)
3401 ifh.seek(startrev * self.index.entry_size)
3398 for i, e in enumerate(new_entries):
3402 for i, e in enumerate(new_entries):
3399 rev = startrev + i
3403 rev = startrev + i
3400 self.index.replace_sidedata_info(rev, *e)
3404 self.index.replace_sidedata_info(rev, *e)
3401 packed = self.index.entry_binary(rev)
3405 packed = self.index.entry_binary(rev)
3402 if rev == 0 and self._docket is None:
3406 if rev == 0 and self._docket is None:
3403 header = self._format_flags | self._format_version
3407 header = self._format_flags | self._format_version
3404 header = self.index.pack_header(header)
3408 header = self.index.pack_header(header)
3405 packed = header + packed
3409 packed = header + packed
3406 ifh.write(packed)
3410 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now