##// END OF EJS Templates
revlog: drop the df argument to `revision`...
marmoute -
r51915:33d2f016 default
parent child Browse files
Show More
@@ -1,313 +1,313 b''
1 # filelog.py - file history class for mercurial
1 # filelog.py - file history class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 from .i18n import _
9 from .i18n import _
10 from .node import nullrev
10 from .node import nullrev
11 from . import (
11 from . import (
12 error,
12 error,
13 revlog,
13 revlog,
14 )
14 )
15 from .interfaces import (
15 from .interfaces import (
16 repository,
16 repository,
17 util as interfaceutil,
17 util as interfaceutil,
18 )
18 )
19 from .utils import storageutil
19 from .utils import storageutil
20 from .revlogutils import (
20 from .revlogutils import (
21 constants as revlog_constants,
21 constants as revlog_constants,
22 rewrite,
22 rewrite,
23 )
23 )
24
24
25
25
26 @interfaceutil.implementer(repository.ifilestorage)
26 @interfaceutil.implementer(repository.ifilestorage)
27 class filelog:
27 class filelog:
28 def __init__(self, opener, path, try_split=False):
28 def __init__(self, opener, path, try_split=False):
29 self._revlog = revlog.revlog(
29 self._revlog = revlog.revlog(
30 opener,
30 opener,
31 # XXX should use the unencoded path
31 # XXX should use the unencoded path
32 target=(revlog_constants.KIND_FILELOG, path),
32 target=(revlog_constants.KIND_FILELOG, path),
33 radix=b'/'.join((b'data', path)),
33 radix=b'/'.join((b'data', path)),
34 censorable=True,
34 censorable=True,
35 canonical_parent_order=False, # see comment in revlog.py
35 canonical_parent_order=False, # see comment in revlog.py
36 try_split=try_split,
36 try_split=try_split,
37 )
37 )
38 # Full name of the user visible file, relative to the repository root.
38 # Full name of the user visible file, relative to the repository root.
39 # Used by LFS.
39 # Used by LFS.
40 self._revlog.filename = path
40 self._revlog.filename = path
41 self.nullid = self._revlog.nullid
41 self.nullid = self._revlog.nullid
42 opts = opener.options
42 opts = opener.options
43 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
43 self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
44
44
45 def get_revlog(self):
45 def get_revlog(self):
46 """return an actual revlog instance if any
46 """return an actual revlog instance if any
47
47
48 This exist because a lot of code leverage the fact the underlying
48 This exist because a lot of code leverage the fact the underlying
49 storage is a revlog for optimization, so giving simple way to access
49 storage is a revlog for optimization, so giving simple way to access
50 the revlog instance helps such code.
50 the revlog instance helps such code.
51 """
51 """
52 return self._revlog
52 return self._revlog
53
53
54 def __len__(self):
54 def __len__(self):
55 return len(self._revlog)
55 return len(self._revlog)
56
56
57 def __iter__(self):
57 def __iter__(self):
58 return self._revlog.__iter__()
58 return self._revlog.__iter__()
59
59
60 def hasnode(self, node):
60 def hasnode(self, node):
61 if node in (self.nullid, nullrev):
61 if node in (self.nullid, nullrev):
62 return False
62 return False
63
63
64 try:
64 try:
65 self._revlog.rev(node)
65 self._revlog.rev(node)
66 return True
66 return True
67 except (TypeError, ValueError, IndexError, error.LookupError):
67 except (TypeError, ValueError, IndexError, error.LookupError):
68 return False
68 return False
69
69
70 def revs(self, start=0, stop=None):
70 def revs(self, start=0, stop=None):
71 return self._revlog.revs(start=start, stop=stop)
71 return self._revlog.revs(start=start, stop=stop)
72
72
73 def parents(self, node):
73 def parents(self, node):
74 return self._revlog.parents(node)
74 return self._revlog.parents(node)
75
75
76 def parentrevs(self, rev):
76 def parentrevs(self, rev):
77 return self._revlog.parentrevs(rev)
77 return self._revlog.parentrevs(rev)
78
78
79 def rev(self, node):
79 def rev(self, node):
80 return self._revlog.rev(node)
80 return self._revlog.rev(node)
81
81
82 def node(self, rev):
82 def node(self, rev):
83 return self._revlog.node(rev)
83 return self._revlog.node(rev)
84
84
85 def lookup(self, node):
85 def lookup(self, node):
86 return storageutil.fileidlookup(
86 return storageutil.fileidlookup(
87 self._revlog, node, self._revlog.display_id
87 self._revlog, node, self._revlog.display_id
88 )
88 )
89
89
90 def linkrev(self, rev):
90 def linkrev(self, rev):
91 return self._revlog.linkrev(rev)
91 return self._revlog.linkrev(rev)
92
92
93 def commonancestorsheads(self, node1, node2):
93 def commonancestorsheads(self, node1, node2):
94 return self._revlog.commonancestorsheads(node1, node2)
94 return self._revlog.commonancestorsheads(node1, node2)
95
95
96 # Used by dagop.blockdescendants().
96 # Used by dagop.blockdescendants().
97 def descendants(self, revs):
97 def descendants(self, revs):
98 return self._revlog.descendants(revs)
98 return self._revlog.descendants(revs)
99
99
100 def heads(self, start=None, stop=None):
100 def heads(self, start=None, stop=None):
101 return self._revlog.heads(start, stop)
101 return self._revlog.heads(start, stop)
102
102
103 # Used by hgweb, children extension.
103 # Used by hgweb, children extension.
104 def children(self, node):
104 def children(self, node):
105 return self._revlog.children(node)
105 return self._revlog.children(node)
106
106
107 def iscensored(self, rev):
107 def iscensored(self, rev):
108 return self._revlog.iscensored(rev)
108 return self._revlog.iscensored(rev)
109
109
110 def revision(self, node, _df=None):
110 def revision(self, node):
111 return self._revlog.revision(node, _df=_df)
111 return self._revlog.revision(node)
112
112
113 def rawdata(self, node, _df=None):
113 def rawdata(self, node, _df=None):
114 return self._revlog.rawdata(node, _df=_df)
114 return self._revlog.rawdata(node, _df=_df)
115
115
116 def emitrevisions(
116 def emitrevisions(
117 self,
117 self,
118 nodes,
118 nodes,
119 nodesorder=None,
119 nodesorder=None,
120 revisiondata=False,
120 revisiondata=False,
121 assumehaveparentrevisions=False,
121 assumehaveparentrevisions=False,
122 deltamode=repository.CG_DELTAMODE_STD,
122 deltamode=repository.CG_DELTAMODE_STD,
123 sidedata_helpers=None,
123 sidedata_helpers=None,
124 debug_info=None,
124 debug_info=None,
125 ):
125 ):
126 return self._revlog.emitrevisions(
126 return self._revlog.emitrevisions(
127 nodes,
127 nodes,
128 nodesorder=nodesorder,
128 nodesorder=nodesorder,
129 revisiondata=revisiondata,
129 revisiondata=revisiondata,
130 assumehaveparentrevisions=assumehaveparentrevisions,
130 assumehaveparentrevisions=assumehaveparentrevisions,
131 deltamode=deltamode,
131 deltamode=deltamode,
132 sidedata_helpers=sidedata_helpers,
132 sidedata_helpers=sidedata_helpers,
133 debug_info=debug_info,
133 debug_info=debug_info,
134 )
134 )
135
135
136 def addrevision(
136 def addrevision(
137 self,
137 self,
138 revisiondata,
138 revisiondata,
139 transaction,
139 transaction,
140 linkrev,
140 linkrev,
141 p1,
141 p1,
142 p2,
142 p2,
143 node=None,
143 node=None,
144 flags=revlog.REVIDX_DEFAULT_FLAGS,
144 flags=revlog.REVIDX_DEFAULT_FLAGS,
145 cachedelta=None,
145 cachedelta=None,
146 ):
146 ):
147 return self._revlog.addrevision(
147 return self._revlog.addrevision(
148 revisiondata,
148 revisiondata,
149 transaction,
149 transaction,
150 linkrev,
150 linkrev,
151 p1,
151 p1,
152 p2,
152 p2,
153 node=node,
153 node=node,
154 flags=flags,
154 flags=flags,
155 cachedelta=cachedelta,
155 cachedelta=cachedelta,
156 )
156 )
157
157
158 def addgroup(
158 def addgroup(
159 self,
159 self,
160 deltas,
160 deltas,
161 linkmapper,
161 linkmapper,
162 transaction,
162 transaction,
163 addrevisioncb=None,
163 addrevisioncb=None,
164 duplicaterevisioncb=None,
164 duplicaterevisioncb=None,
165 maybemissingparents=False,
165 maybemissingparents=False,
166 debug_info=None,
166 debug_info=None,
167 delta_base_reuse_policy=None,
167 delta_base_reuse_policy=None,
168 ):
168 ):
169 if maybemissingparents:
169 if maybemissingparents:
170 raise error.Abort(
170 raise error.Abort(
171 _(
171 _(
172 b'revlog storage does not support missing '
172 b'revlog storage does not support missing '
173 b'parents write mode'
173 b'parents write mode'
174 )
174 )
175 )
175 )
176
176
177 with self._revlog._writing(transaction):
177 with self._revlog._writing(transaction):
178
178
179 if self._fix_issue6528:
179 if self._fix_issue6528:
180 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
180 deltas = rewrite.filter_delta_issue6528(self._revlog, deltas)
181
181
182 return self._revlog.addgroup(
182 return self._revlog.addgroup(
183 deltas,
183 deltas,
184 linkmapper,
184 linkmapper,
185 transaction,
185 transaction,
186 addrevisioncb=addrevisioncb,
186 addrevisioncb=addrevisioncb,
187 duplicaterevisioncb=duplicaterevisioncb,
187 duplicaterevisioncb=duplicaterevisioncb,
188 debug_info=debug_info,
188 debug_info=debug_info,
189 delta_base_reuse_policy=delta_base_reuse_policy,
189 delta_base_reuse_policy=delta_base_reuse_policy,
190 )
190 )
191
191
192 def getstrippoint(self, minlink):
192 def getstrippoint(self, minlink):
193 return self._revlog.getstrippoint(minlink)
193 return self._revlog.getstrippoint(minlink)
194
194
195 def strip(self, minlink, transaction):
195 def strip(self, minlink, transaction):
196 return self._revlog.strip(minlink, transaction)
196 return self._revlog.strip(minlink, transaction)
197
197
198 def censorrevision(self, tr, node, tombstone=b''):
198 def censorrevision(self, tr, node, tombstone=b''):
199 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
199 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
200
200
201 def files(self):
201 def files(self):
202 return self._revlog.files()
202 return self._revlog.files()
203
203
204 def read(self, node):
204 def read(self, node):
205 return storageutil.filtermetadata(self.revision(node))
205 return storageutil.filtermetadata(self.revision(node))
206
206
207 def add(self, text, meta, transaction, link, p1=None, p2=None):
207 def add(self, text, meta, transaction, link, p1=None, p2=None):
208 if meta or text.startswith(b'\1\n'):
208 if meta or text.startswith(b'\1\n'):
209 text = storageutil.packmeta(meta, text)
209 text = storageutil.packmeta(meta, text)
210 rev = self.addrevision(text, transaction, link, p1, p2)
210 rev = self.addrevision(text, transaction, link, p1, p2)
211 return self.node(rev)
211 return self.node(rev)
212
212
213 def renamed(self, node):
213 def renamed(self, node):
214 return storageutil.filerevisioncopied(self, node)
214 return storageutil.filerevisioncopied(self, node)
215
215
216 def size(self, rev):
216 def size(self, rev):
217 """return the size of a given revision"""
217 """return the size of a given revision"""
218
218
219 # for revisions with renames, we have to go the slow way
219 # for revisions with renames, we have to go the slow way
220 node = self.node(rev)
220 node = self.node(rev)
221 if self.iscensored(rev):
221 if self.iscensored(rev):
222 return 0
222 return 0
223 if self.renamed(node):
223 if self.renamed(node):
224 return len(self.read(node))
224 return len(self.read(node))
225
225
226 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
226 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
227 # XXX See also basefilectx.cmp.
227 # XXX See also basefilectx.cmp.
228 return self._revlog.size(rev)
228 return self._revlog.size(rev)
229
229
230 def cmp(self, node, text):
230 def cmp(self, node, text):
231 """compare text with a given file revision
231 """compare text with a given file revision
232
232
233 returns True if text is different than what is stored.
233 returns True if text is different than what is stored.
234 """
234 """
235 return not storageutil.filedataequivalent(self, node, text)
235 return not storageutil.filedataequivalent(self, node, text)
236
236
237 def verifyintegrity(self, state):
237 def verifyintegrity(self, state):
238 return self._revlog.verifyintegrity(state)
238 return self._revlog.verifyintegrity(state)
239
239
240 def storageinfo(
240 def storageinfo(
241 self,
241 self,
242 exclusivefiles=False,
242 exclusivefiles=False,
243 sharedfiles=False,
243 sharedfiles=False,
244 revisionscount=False,
244 revisionscount=False,
245 trackedsize=False,
245 trackedsize=False,
246 storedsize=False,
246 storedsize=False,
247 ):
247 ):
248 return self._revlog.storageinfo(
248 return self._revlog.storageinfo(
249 exclusivefiles=exclusivefiles,
249 exclusivefiles=exclusivefiles,
250 sharedfiles=sharedfiles,
250 sharedfiles=sharedfiles,
251 revisionscount=revisionscount,
251 revisionscount=revisionscount,
252 trackedsize=trackedsize,
252 trackedsize=trackedsize,
253 storedsize=storedsize,
253 storedsize=storedsize,
254 )
254 )
255
255
256 # Used by repo upgrade.
256 # Used by repo upgrade.
257 def clone(self, tr, destrevlog, **kwargs):
257 def clone(self, tr, destrevlog, **kwargs):
258 if not isinstance(destrevlog, filelog):
258 if not isinstance(destrevlog, filelog):
259 msg = b'expected filelog to clone(), not %r'
259 msg = b'expected filelog to clone(), not %r'
260 msg %= destrevlog
260 msg %= destrevlog
261 raise error.ProgrammingError(msg)
261 raise error.ProgrammingError(msg)
262
262
263 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
263 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
264
264
265
265
266 class narrowfilelog(filelog):
266 class narrowfilelog(filelog):
267 """Filelog variation to be used with narrow stores."""
267 """Filelog variation to be used with narrow stores."""
268
268
269 def __init__(self, opener, path, narrowmatch, try_split=False):
269 def __init__(self, opener, path, narrowmatch, try_split=False):
270 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
270 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
271 self._narrowmatch = narrowmatch
271 self._narrowmatch = narrowmatch
272
272
273 def renamed(self, node):
273 def renamed(self, node):
274 res = super(narrowfilelog, self).renamed(node)
274 res = super(narrowfilelog, self).renamed(node)
275
275
276 # Renames that come from outside the narrowspec are problematic
276 # Renames that come from outside the narrowspec are problematic
277 # because we may lack the base text for the rename. This can result
277 # because we may lack the base text for the rename. This can result
278 # in code attempting to walk the ancestry or compute a diff
278 # in code attempting to walk the ancestry or compute a diff
279 # encountering a missing revision. We address this by silently
279 # encountering a missing revision. We address this by silently
280 # removing rename metadata if the source file is outside the
280 # removing rename metadata if the source file is outside the
281 # narrow spec.
281 # narrow spec.
282 #
282 #
283 # A better solution would be to see if the base revision is available,
283 # A better solution would be to see if the base revision is available,
284 # rather than assuming it isn't.
284 # rather than assuming it isn't.
285 #
285 #
286 # An even better solution would be to teach all consumers of rename
286 # An even better solution would be to teach all consumers of rename
287 # metadata that the base revision may not be available.
287 # metadata that the base revision may not be available.
288 #
288 #
289 # TODO consider better ways of doing this.
289 # TODO consider better ways of doing this.
290 if res and not self._narrowmatch(res[0]):
290 if res and not self._narrowmatch(res[0]):
291 return None
291 return None
292
292
293 return res
293 return res
294
294
295 def size(self, rev):
295 def size(self, rev):
296 # Because we have a custom renamed() that may lie, we need to call
296 # Because we have a custom renamed() that may lie, we need to call
297 # the base renamed() to report accurate results.
297 # the base renamed() to report accurate results.
298 node = self.node(rev)
298 node = self.node(rev)
299 if super(narrowfilelog, self).renamed(node):
299 if super(narrowfilelog, self).renamed(node):
300 return len(self.read(node))
300 return len(self.read(node))
301 else:
301 else:
302 return super(narrowfilelog, self).size(rev)
302 return super(narrowfilelog, self).size(rev)
303
303
304 def cmp(self, node, text):
304 def cmp(self, node, text):
305 # We don't call `super` because narrow parents can be buggy in case of a
305 # We don't call `super` because narrow parents can be buggy in case of a
306 # ambiguous dirstate. Always take the slow path until there is a better
306 # ambiguous dirstate. Always take the slow path until there is a better
307 # fix, see issue6150.
307 # fix, see issue6150.
308
308
309 # Censored files compare against the empty file.
309 # Censored files compare against the empty file.
310 if self.iscensored(self.rev(node)):
310 if self.iscensored(self.rev(node)):
311 return text != b''
311 return text != b''
312
312
313 return self.read(node) != text
313 return self.read(node) != text
@@ -1,2079 +1,2079 b''
1 # repository.py - Interfaces and base classes for repositories and peers.
1 # repository.py - Interfaces and base classes for repositories and peers.
2 # coding: utf-8
2 # coding: utf-8
3 #
3 #
4 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9
9
10 from ..i18n import _
10 from ..i18n import _
11 from .. import error
11 from .. import error
12 from . import util as interfaceutil
12 from . import util as interfaceutil
13
13
14 # Local repository feature string.
14 # Local repository feature string.
15
15
16 # Revlogs are being used for file storage.
16 # Revlogs are being used for file storage.
17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 # The storage part of the repository is shared from an external source.
18 # The storage part of the repository is shared from an external source.
19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 # LFS supported for backing file storage.
20 # LFS supported for backing file storage.
21 REPO_FEATURE_LFS = b'lfs'
21 REPO_FEATURE_LFS = b'lfs'
22 # Repository supports being stream cloned.
22 # Repository supports being stream cloned.
23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 # Repository supports (at least) some sidedata to be stored
24 # Repository supports (at least) some sidedata to be stored
25 REPO_FEATURE_SIDE_DATA = b'side-data'
25 REPO_FEATURE_SIDE_DATA = b'side-data'
26 # Files storage may lack data for all ancestors.
26 # Files storage may lack data for all ancestors.
27 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
27 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
28
28
29 REVISION_FLAG_CENSORED = 1 << 15
29 REVISION_FLAG_CENSORED = 1 << 15
30 REVISION_FLAG_ELLIPSIS = 1 << 14
30 REVISION_FLAG_ELLIPSIS = 1 << 14
31 REVISION_FLAG_EXTSTORED = 1 << 13
31 REVISION_FLAG_EXTSTORED = 1 << 13
32 REVISION_FLAG_HASCOPIESINFO = 1 << 12
32 REVISION_FLAG_HASCOPIESINFO = 1 << 12
33
33
34 REVISION_FLAGS_KNOWN = (
34 REVISION_FLAGS_KNOWN = (
35 REVISION_FLAG_CENSORED
35 REVISION_FLAG_CENSORED
36 | REVISION_FLAG_ELLIPSIS
36 | REVISION_FLAG_ELLIPSIS
37 | REVISION_FLAG_EXTSTORED
37 | REVISION_FLAG_EXTSTORED
38 | REVISION_FLAG_HASCOPIESINFO
38 | REVISION_FLAG_HASCOPIESINFO
39 )
39 )
40
40
41 CG_DELTAMODE_STD = b'default'
41 CG_DELTAMODE_STD = b'default'
42 CG_DELTAMODE_PREV = b'previous'
42 CG_DELTAMODE_PREV = b'previous'
43 CG_DELTAMODE_FULL = b'fulltext'
43 CG_DELTAMODE_FULL = b'fulltext'
44 CG_DELTAMODE_P1 = b'p1'
44 CG_DELTAMODE_P1 = b'p1'
45
45
46
46
47 ## Cache related constants:
47 ## Cache related constants:
48 #
48 #
49 # Used to control which cache should be warmed in a repo.updatecaches(…) call.
49 # Used to control which cache should be warmed in a repo.updatecaches(…) call.
50
50
51 # Warm branchmaps of all known repoview's filter-level
51 # Warm branchmaps of all known repoview's filter-level
52 CACHE_BRANCHMAP_ALL = b"branchmap-all"
52 CACHE_BRANCHMAP_ALL = b"branchmap-all"
53 # Warm branchmaps of repoview's filter-level used by server
53 # Warm branchmaps of repoview's filter-level used by server
54 CACHE_BRANCHMAP_SERVED = b"branchmap-served"
54 CACHE_BRANCHMAP_SERVED = b"branchmap-served"
55 # Warm internal changelog cache (eg: persistent nodemap)
55 # Warm internal changelog cache (eg: persistent nodemap)
56 CACHE_CHANGELOG_CACHE = b"changelog-cache"
56 CACHE_CHANGELOG_CACHE = b"changelog-cache"
57 # Warm full manifest cache
57 # Warm full manifest cache
58 CACHE_FULL_MANIFEST = b"full-manifest"
58 CACHE_FULL_MANIFEST = b"full-manifest"
59 # Warm file-node-tags cache
59 # Warm file-node-tags cache
60 CACHE_FILE_NODE_TAGS = b"file-node-tags"
60 CACHE_FILE_NODE_TAGS = b"file-node-tags"
61 # Warm internal manifestlog cache (eg: persistent nodemap)
61 # Warm internal manifestlog cache (eg: persistent nodemap)
62 CACHE_MANIFESTLOG_CACHE = b"manifestlog-cache"
62 CACHE_MANIFESTLOG_CACHE = b"manifestlog-cache"
63 # Warn rev branch cache
63 # Warn rev branch cache
64 CACHE_REV_BRANCH = b"rev-branch-cache"
64 CACHE_REV_BRANCH = b"rev-branch-cache"
65 # Warm tags' cache for default repoview'
65 # Warm tags' cache for default repoview'
66 CACHE_TAGS_DEFAULT = b"tags-default"
66 CACHE_TAGS_DEFAULT = b"tags-default"
67 # Warm tags' cache for repoview's filter-level used by server
67 # Warm tags' cache for repoview's filter-level used by server
68 CACHE_TAGS_SERVED = b"tags-served"
68 CACHE_TAGS_SERVED = b"tags-served"
69
69
70 # the cache to warm by default after a simple transaction
70 # the cache to warm by default after a simple transaction
71 # (this is a mutable set to let extension update it)
71 # (this is a mutable set to let extension update it)
72 CACHES_DEFAULT = {
72 CACHES_DEFAULT = {
73 CACHE_BRANCHMAP_SERVED,
73 CACHE_BRANCHMAP_SERVED,
74 }
74 }
75
75
76 # the caches to warm when warming all of them
76 # the caches to warm when warming all of them
77 # (this is a mutable set to let extension update it)
77 # (this is a mutable set to let extension update it)
78 CACHES_ALL = {
78 CACHES_ALL = {
79 CACHE_BRANCHMAP_SERVED,
79 CACHE_BRANCHMAP_SERVED,
80 CACHE_BRANCHMAP_ALL,
80 CACHE_BRANCHMAP_ALL,
81 CACHE_CHANGELOG_CACHE,
81 CACHE_CHANGELOG_CACHE,
82 CACHE_FILE_NODE_TAGS,
82 CACHE_FILE_NODE_TAGS,
83 CACHE_FULL_MANIFEST,
83 CACHE_FULL_MANIFEST,
84 CACHE_MANIFESTLOG_CACHE,
84 CACHE_MANIFESTLOG_CACHE,
85 CACHE_TAGS_DEFAULT,
85 CACHE_TAGS_DEFAULT,
86 CACHE_TAGS_SERVED,
86 CACHE_TAGS_SERVED,
87 }
87 }
88
88
89 # the cache to warm by default on simple call
89 # the cache to warm by default on simple call
90 # (this is a mutable set to let extension update it)
90 # (this is a mutable set to let extension update it)
91 CACHES_POST_CLONE = CACHES_ALL.copy()
91 CACHES_POST_CLONE = CACHES_ALL.copy()
92 CACHES_POST_CLONE.discard(CACHE_FILE_NODE_TAGS)
92 CACHES_POST_CLONE.discard(CACHE_FILE_NODE_TAGS)
93
93
94
94
95 class ipeerconnection(interfaceutil.Interface):
95 class ipeerconnection(interfaceutil.Interface):
96 """Represents a "connection" to a repository.
96 """Represents a "connection" to a repository.
97
97
98 This is the base interface for representing a connection to a repository.
98 This is the base interface for representing a connection to a repository.
99 It holds basic properties and methods applicable to all peer types.
99 It holds basic properties and methods applicable to all peer types.
100
100
101 This is not a complete interface definition and should not be used
101 This is not a complete interface definition and should not be used
102 outside of this module.
102 outside of this module.
103 """
103 """
104
104
105 ui = interfaceutil.Attribute("""ui.ui instance""")
105 ui = interfaceutil.Attribute("""ui.ui instance""")
106 path = interfaceutil.Attribute("""a urlutil.path instance or None""")
106 path = interfaceutil.Attribute("""a urlutil.path instance or None""")
107
107
108 def url():
108 def url():
109 """Returns a URL string representing this peer.
109 """Returns a URL string representing this peer.
110
110
111 Currently, implementations expose the raw URL used to construct the
111 Currently, implementations expose the raw URL used to construct the
112 instance. It may contain credentials as part of the URL. The
112 instance. It may contain credentials as part of the URL. The
113 expectations of the value aren't well-defined and this could lead to
113 expectations of the value aren't well-defined and this could lead to
114 data leakage.
114 data leakage.
115
115
116 TODO audit/clean consumers and more clearly define the contents of this
116 TODO audit/clean consumers and more clearly define the contents of this
117 value.
117 value.
118 """
118 """
119
119
120 def local():
120 def local():
121 """Returns a local repository instance.
121 """Returns a local repository instance.
122
122
123 If the peer represents a local repository, returns an object that
123 If the peer represents a local repository, returns an object that
124 can be used to interface with it. Otherwise returns ``None``.
124 can be used to interface with it. Otherwise returns ``None``.
125 """
125 """
126
126
127 def canpush():
127 def canpush():
128 """Returns a boolean indicating if this peer can be pushed to."""
128 """Returns a boolean indicating if this peer can be pushed to."""
129
129
130 def close():
130 def close():
131 """Close the connection to this peer.
131 """Close the connection to this peer.
132
132
133 This is called when the peer will no longer be used. Resources
133 This is called when the peer will no longer be used. Resources
134 associated with the peer should be cleaned up.
134 associated with the peer should be cleaned up.
135 """
135 """
136
136
137
137
138 class ipeercapabilities(interfaceutil.Interface):
138 class ipeercapabilities(interfaceutil.Interface):
139 """Peer sub-interface related to capabilities."""
139 """Peer sub-interface related to capabilities."""
140
140
141 def capable(name):
141 def capable(name):
142 """Determine support for a named capability.
142 """Determine support for a named capability.
143
143
144 Returns ``False`` if capability not supported.
144 Returns ``False`` if capability not supported.
145
145
146 Returns ``True`` if boolean capability is supported. Returns a string
146 Returns ``True`` if boolean capability is supported. Returns a string
147 if capability support is non-boolean.
147 if capability support is non-boolean.
148
148
149 Capability strings may or may not map to wire protocol capabilities.
149 Capability strings may or may not map to wire protocol capabilities.
150 """
150 """
151
151
152 def requirecap(name, purpose):
152 def requirecap(name, purpose):
153 """Require a capability to be present.
153 """Require a capability to be present.
154
154
155 Raises a ``CapabilityError`` if the capability isn't present.
155 Raises a ``CapabilityError`` if the capability isn't present.
156 """
156 """
157
157
158
158
159 class ipeercommands(interfaceutil.Interface):
159 class ipeercommands(interfaceutil.Interface):
160 """Client-side interface for communicating over the wire protocol.
160 """Client-side interface for communicating over the wire protocol.
161
161
162 This interface is used as a gateway to the Mercurial wire protocol.
162 This interface is used as a gateway to the Mercurial wire protocol.
163 methods commonly call wire protocol commands of the same name.
163 methods commonly call wire protocol commands of the same name.
164 """
164 """
165
165
166 def branchmap():
166 def branchmap():
167 """Obtain heads in named branches.
167 """Obtain heads in named branches.
168
168
169 Returns a dict mapping branch name to an iterable of nodes that are
169 Returns a dict mapping branch name to an iterable of nodes that are
170 heads on that branch.
170 heads on that branch.
171 """
171 """
172
172
173 def capabilities():
173 def capabilities():
174 """Obtain capabilities of the peer.
174 """Obtain capabilities of the peer.
175
175
176 Returns a set of string capabilities.
176 Returns a set of string capabilities.
177 """
177 """
178
178
179 def get_cached_bundle_inline(path):
179 def get_cached_bundle_inline(path):
180 """Retrieve a clonebundle across the wire.
180 """Retrieve a clonebundle across the wire.
181
181
182 Returns a chunkbuffer
182 Returns a chunkbuffer
183 """
183 """
184
184
185 def clonebundles():
185 def clonebundles():
186 """Obtains the clone bundles manifest for the repo.
186 """Obtains the clone bundles manifest for the repo.
187
187
188 Returns the manifest as unparsed bytes.
188 Returns the manifest as unparsed bytes.
189 """
189 """
190
190
191 def debugwireargs(one, two, three=None, four=None, five=None):
191 def debugwireargs(one, two, three=None, four=None, five=None):
192 """Used to facilitate debugging of arguments passed over the wire."""
192 """Used to facilitate debugging of arguments passed over the wire."""
193
193
194 def getbundle(source, **kwargs):
194 def getbundle(source, **kwargs):
195 """Obtain remote repository data as a bundle.
195 """Obtain remote repository data as a bundle.
196
196
197 This command is how the bulk of repository data is transferred from
197 This command is how the bulk of repository data is transferred from
198 the peer to the local repository
198 the peer to the local repository
199
199
200 Returns a generator of bundle data.
200 Returns a generator of bundle data.
201 """
201 """
202
202
203 def heads():
203 def heads():
204 """Determine all known head revisions in the peer.
204 """Determine all known head revisions in the peer.
205
205
206 Returns an iterable of binary nodes.
206 Returns an iterable of binary nodes.
207 """
207 """
208
208
209 def known(nodes):
209 def known(nodes):
210 """Determine whether multiple nodes are known.
210 """Determine whether multiple nodes are known.
211
211
212 Accepts an iterable of nodes whose presence to check for.
212 Accepts an iterable of nodes whose presence to check for.
213
213
214 Returns an iterable of booleans indicating of the corresponding node
214 Returns an iterable of booleans indicating of the corresponding node
215 at that index is known to the peer.
215 at that index is known to the peer.
216 """
216 """
217
217
218 def listkeys(namespace):
218 def listkeys(namespace):
219 """Obtain all keys in a pushkey namespace.
219 """Obtain all keys in a pushkey namespace.
220
220
221 Returns an iterable of key names.
221 Returns an iterable of key names.
222 """
222 """
223
223
224 def lookup(key):
224 def lookup(key):
225 """Resolve a value to a known revision.
225 """Resolve a value to a known revision.
226
226
227 Returns a binary node of the resolved revision on success.
227 Returns a binary node of the resolved revision on success.
228 """
228 """
229
229
230 def pushkey(namespace, key, old, new):
230 def pushkey(namespace, key, old, new):
231 """Set a value using the ``pushkey`` protocol.
231 """Set a value using the ``pushkey`` protocol.
232
232
233 Arguments correspond to the pushkey namespace and key to operate on and
233 Arguments correspond to the pushkey namespace and key to operate on and
234 the old and new values for that key.
234 the old and new values for that key.
235
235
236 Returns a string with the peer result. The value inside varies by the
236 Returns a string with the peer result. The value inside varies by the
237 namespace.
237 namespace.
238 """
238 """
239
239
240 def stream_out():
240 def stream_out():
241 """Obtain streaming clone data.
241 """Obtain streaming clone data.
242
242
243 Successful result should be a generator of data chunks.
243 Successful result should be a generator of data chunks.
244 """
244 """
245
245
246 def unbundle(bundle, heads, url):
246 def unbundle(bundle, heads, url):
247 """Transfer repository data to the peer.
247 """Transfer repository data to the peer.
248
248
249 This is how the bulk of data during a push is transferred.
249 This is how the bulk of data during a push is transferred.
250
250
251 Returns the integer number of heads added to the peer.
251 Returns the integer number of heads added to the peer.
252 """
252 """
253
253
254
254
255 class ipeerlegacycommands(interfaceutil.Interface):
255 class ipeerlegacycommands(interfaceutil.Interface):
256 """Interface for implementing support for legacy wire protocol commands.
256 """Interface for implementing support for legacy wire protocol commands.
257
257
258 Wire protocol commands transition to legacy status when they are no longer
258 Wire protocol commands transition to legacy status when they are no longer
259 used by modern clients. To facilitate identifying which commands are
259 used by modern clients. To facilitate identifying which commands are
260 legacy, the interfaces are split.
260 legacy, the interfaces are split.
261 """
261 """
262
262
263 def between(pairs):
263 def between(pairs):
264 """Obtain nodes between pairs of nodes.
264 """Obtain nodes between pairs of nodes.
265
265
266 ``pairs`` is an iterable of node pairs.
266 ``pairs`` is an iterable of node pairs.
267
267
268 Returns an iterable of iterables of nodes corresponding to each
268 Returns an iterable of iterables of nodes corresponding to each
269 requested pair.
269 requested pair.
270 """
270 """
271
271
272 def branches(nodes):
272 def branches(nodes):
273 """Obtain ancestor changesets of specific nodes back to a branch point.
273 """Obtain ancestor changesets of specific nodes back to a branch point.
274
274
275 For each requested node, the peer finds the first ancestor node that is
275 For each requested node, the peer finds the first ancestor node that is
276 a DAG root or is a merge.
276 a DAG root or is a merge.
277
277
278 Returns an iterable of iterables with the resolved values for each node.
278 Returns an iterable of iterables with the resolved values for each node.
279 """
279 """
280
280
281 def changegroup(nodes, source):
281 def changegroup(nodes, source):
282 """Obtain a changegroup with data for descendants of specified nodes."""
282 """Obtain a changegroup with data for descendants of specified nodes."""
283
283
284 def changegroupsubset(bases, heads, source):
284 def changegroupsubset(bases, heads, source):
285 pass
285 pass
286
286
287
287
288 class ipeercommandexecutor(interfaceutil.Interface):
288 class ipeercommandexecutor(interfaceutil.Interface):
289 """Represents a mechanism to execute remote commands.
289 """Represents a mechanism to execute remote commands.
290
290
291 This is the primary interface for requesting that wire protocol commands
291 This is the primary interface for requesting that wire protocol commands
292 be executed. Instances of this interface are active in a context manager
292 be executed. Instances of this interface are active in a context manager
293 and have a well-defined lifetime. When the context manager exits, all
293 and have a well-defined lifetime. When the context manager exits, all
294 outstanding requests are waited on.
294 outstanding requests are waited on.
295 """
295 """
296
296
297 def callcommand(name, args):
297 def callcommand(name, args):
298 """Request that a named command be executed.
298 """Request that a named command be executed.
299
299
300 Receives the command name and a dictionary of command arguments.
300 Receives the command name and a dictionary of command arguments.
301
301
302 Returns a ``concurrent.futures.Future`` that will resolve to the
302 Returns a ``concurrent.futures.Future`` that will resolve to the
303 result of that command request. That exact value is left up to
303 result of that command request. That exact value is left up to
304 the implementation and possibly varies by command.
304 the implementation and possibly varies by command.
305
305
306 Not all commands can coexist with other commands in an executor
306 Not all commands can coexist with other commands in an executor
307 instance: it depends on the underlying wire protocol transport being
307 instance: it depends on the underlying wire protocol transport being
308 used and the command itself.
308 used and the command itself.
309
309
310 Implementations MAY call ``sendcommands()`` automatically if the
310 Implementations MAY call ``sendcommands()`` automatically if the
311 requested command can not coexist with other commands in this executor.
311 requested command can not coexist with other commands in this executor.
312
312
313 Implementations MAY call ``sendcommands()`` automatically when the
313 Implementations MAY call ``sendcommands()`` automatically when the
314 future's ``result()`` is called. So, consumers using multiple
314 future's ``result()`` is called. So, consumers using multiple
315 commands with an executor MUST ensure that ``result()`` is not called
315 commands with an executor MUST ensure that ``result()`` is not called
316 until all command requests have been issued.
316 until all command requests have been issued.
317 """
317 """
318
318
319 def sendcommands():
319 def sendcommands():
320 """Trigger submission of queued command requests.
320 """Trigger submission of queued command requests.
321
321
322 Not all transports submit commands as soon as they are requested to
322 Not all transports submit commands as soon as they are requested to
323 run. When called, this method forces queued command requests to be
323 run. When called, this method forces queued command requests to be
324 issued. It will no-op if all commands have already been sent.
324 issued. It will no-op if all commands have already been sent.
325
325
326 When called, no more new commands may be issued with this executor.
326 When called, no more new commands may be issued with this executor.
327 """
327 """
328
328
329 def close():
329 def close():
330 """Signal that this command request is finished.
330 """Signal that this command request is finished.
331
331
332 When called, no more new commands may be issued. All outstanding
332 When called, no more new commands may be issued. All outstanding
333 commands that have previously been issued are waited on before
333 commands that have previously been issued are waited on before
334 returning. This not only includes waiting for the futures to resolve,
334 returning. This not only includes waiting for the futures to resolve,
335 but also waiting for all response data to arrive. In other words,
335 but also waiting for all response data to arrive. In other words,
336 calling this waits for all on-wire state for issued command requests
336 calling this waits for all on-wire state for issued command requests
337 to finish.
337 to finish.
338
338
339 When used as a context manager, this method is called when exiting the
339 When used as a context manager, this method is called when exiting the
340 context manager.
340 context manager.
341
341
342 This method may call ``sendcommands()`` if there are buffered commands.
342 This method may call ``sendcommands()`` if there are buffered commands.
343 """
343 """
344
344
345
345
346 class ipeerrequests(interfaceutil.Interface):
346 class ipeerrequests(interfaceutil.Interface):
347 """Interface for executing commands on a peer."""
347 """Interface for executing commands on a peer."""
348
348
349 limitedarguments = interfaceutil.Attribute(
349 limitedarguments = interfaceutil.Attribute(
350 """True if the peer cannot receive large argument value for commands."""
350 """True if the peer cannot receive large argument value for commands."""
351 )
351 )
352
352
353 def commandexecutor():
353 def commandexecutor():
354 """A context manager that resolves to an ipeercommandexecutor.
354 """A context manager that resolves to an ipeercommandexecutor.
355
355
356 The object this resolves to can be used to issue command requests
356 The object this resolves to can be used to issue command requests
357 to the peer.
357 to the peer.
358
358
359 Callers should call its ``callcommand`` method to issue command
359 Callers should call its ``callcommand`` method to issue command
360 requests.
360 requests.
361
361
362 A new executor should be obtained for each distinct set of commands
362 A new executor should be obtained for each distinct set of commands
363 (possibly just a single command) that the consumer wants to execute
363 (possibly just a single command) that the consumer wants to execute
364 as part of a single operation or round trip. This is because some
364 as part of a single operation or round trip. This is because some
365 peers are half-duplex and/or don't support persistent connections.
365 peers are half-duplex and/or don't support persistent connections.
366 e.g. in the case of HTTP peers, commands sent to an executor represent
366 e.g. in the case of HTTP peers, commands sent to an executor represent
367 a single HTTP request. While some peers may support multiple command
367 a single HTTP request. While some peers may support multiple command
368 sends over the wire per executor, consumers need to code to the least
368 sends over the wire per executor, consumers need to code to the least
369 capable peer. So it should be assumed that command executors buffer
369 capable peer. So it should be assumed that command executors buffer
370 called commands until they are told to send them and that each
370 called commands until they are told to send them and that each
371 command executor could result in a new connection or wire-level request
371 command executor could result in a new connection or wire-level request
372 being issued.
372 being issued.
373 """
373 """
374
374
375
375
376 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
376 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
377 """Unified interface for peer repositories.
377 """Unified interface for peer repositories.
378
378
379 All peer instances must conform to this interface.
379 All peer instances must conform to this interface.
380 """
380 """
381
381
382
382
383 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
383 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
384 """Unified peer interface for wire protocol version 2 peers."""
384 """Unified peer interface for wire protocol version 2 peers."""
385
385
386 apidescriptor = interfaceutil.Attribute(
386 apidescriptor = interfaceutil.Attribute(
387 """Data structure holding description of server API."""
387 """Data structure holding description of server API."""
388 )
388 )
389
389
390
390
391 @interfaceutil.implementer(ipeerbase)
391 @interfaceutil.implementer(ipeerbase)
392 class peer:
392 class peer:
393 """Base class for peer repositories."""
393 """Base class for peer repositories."""
394
394
395 limitedarguments = False
395 limitedarguments = False
396
396
397 def __init__(self, ui, path=None, remotehidden=False):
397 def __init__(self, ui, path=None, remotehidden=False):
398 self.ui = ui
398 self.ui = ui
399 self.path = path
399 self.path = path
400
400
401 def capable(self, name):
401 def capable(self, name):
402 caps = self.capabilities()
402 caps = self.capabilities()
403 if name in caps:
403 if name in caps:
404 return True
404 return True
405
405
406 name = b'%s=' % name
406 name = b'%s=' % name
407 for cap in caps:
407 for cap in caps:
408 if cap.startswith(name):
408 if cap.startswith(name):
409 return cap[len(name) :]
409 return cap[len(name) :]
410
410
411 return False
411 return False
412
412
413 def requirecap(self, name, purpose):
413 def requirecap(self, name, purpose):
414 if self.capable(name):
414 if self.capable(name):
415 return
415 return
416
416
417 raise error.CapabilityError(
417 raise error.CapabilityError(
418 _(
418 _(
419 b'cannot %s; remote repository does not support the '
419 b'cannot %s; remote repository does not support the '
420 b'\'%s\' capability'
420 b'\'%s\' capability'
421 )
421 )
422 % (purpose, name)
422 % (purpose, name)
423 )
423 )
424
424
425
425
426 class iverifyproblem(interfaceutil.Interface):
426 class iverifyproblem(interfaceutil.Interface):
427 """Represents a problem with the integrity of the repository.
427 """Represents a problem with the integrity of the repository.
428
428
429 Instances of this interface are emitted to describe an integrity issue
429 Instances of this interface are emitted to describe an integrity issue
430 with a repository (e.g. corrupt storage, missing data, etc).
430 with a repository (e.g. corrupt storage, missing data, etc).
431
431
432 Instances are essentially messages associated with severity.
432 Instances are essentially messages associated with severity.
433 """
433 """
434
434
435 warning = interfaceutil.Attribute(
435 warning = interfaceutil.Attribute(
436 """Message indicating a non-fatal problem."""
436 """Message indicating a non-fatal problem."""
437 )
437 )
438
438
439 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
439 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
440
440
441 node = interfaceutil.Attribute(
441 node = interfaceutil.Attribute(
442 """Revision encountering the problem.
442 """Revision encountering the problem.
443
443
444 ``None`` means the problem doesn't apply to a single revision.
444 ``None`` means the problem doesn't apply to a single revision.
445 """
445 """
446 )
446 )
447
447
448
448
449 class irevisiondelta(interfaceutil.Interface):
449 class irevisiondelta(interfaceutil.Interface):
450 """Represents a delta between one revision and another.
450 """Represents a delta between one revision and another.
451
451
452 Instances convey enough information to allow a revision to be exchanged
452 Instances convey enough information to allow a revision to be exchanged
453 with another repository.
453 with another repository.
454
454
455 Instances represent the fulltext revision data or a delta against
455 Instances represent the fulltext revision data or a delta against
456 another revision. Therefore the ``revision`` and ``delta`` attributes
456 another revision. Therefore the ``revision`` and ``delta`` attributes
457 are mutually exclusive.
457 are mutually exclusive.
458
458
459 Typically used for changegroup generation.
459 Typically used for changegroup generation.
460 """
460 """
461
461
462 node = interfaceutil.Attribute("""20 byte node of this revision.""")
462 node = interfaceutil.Attribute("""20 byte node of this revision.""")
463
463
464 p1node = interfaceutil.Attribute(
464 p1node = interfaceutil.Attribute(
465 """20 byte node of 1st parent of this revision."""
465 """20 byte node of 1st parent of this revision."""
466 )
466 )
467
467
468 p2node = interfaceutil.Attribute(
468 p2node = interfaceutil.Attribute(
469 """20 byte node of 2nd parent of this revision."""
469 """20 byte node of 2nd parent of this revision."""
470 )
470 )
471
471
472 linknode = interfaceutil.Attribute(
472 linknode = interfaceutil.Attribute(
473 """20 byte node of the changelog revision this node is linked to."""
473 """20 byte node of the changelog revision this node is linked to."""
474 )
474 )
475
475
476 flags = interfaceutil.Attribute(
476 flags = interfaceutil.Attribute(
477 """2 bytes of integer flags that apply to this revision.
477 """2 bytes of integer flags that apply to this revision.
478
478
479 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
479 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
480 """
480 """
481 )
481 )
482
482
483 basenode = interfaceutil.Attribute(
483 basenode = interfaceutil.Attribute(
484 """20 byte node of the revision this data is a delta against.
484 """20 byte node of the revision this data is a delta against.
485
485
486 ``nullid`` indicates that the revision is a full revision and not
486 ``nullid`` indicates that the revision is a full revision and not
487 a delta.
487 a delta.
488 """
488 """
489 )
489 )
490
490
491 baserevisionsize = interfaceutil.Attribute(
491 baserevisionsize = interfaceutil.Attribute(
492 """Size of base revision this delta is against.
492 """Size of base revision this delta is against.
493
493
494 May be ``None`` if ``basenode`` is ``nullid``.
494 May be ``None`` if ``basenode`` is ``nullid``.
495 """
495 """
496 )
496 )
497
497
498 revision = interfaceutil.Attribute(
498 revision = interfaceutil.Attribute(
499 """Raw fulltext of revision data for this node."""
499 """Raw fulltext of revision data for this node."""
500 )
500 )
501
501
502 delta = interfaceutil.Attribute(
502 delta = interfaceutil.Attribute(
503 """Delta between ``basenode`` and ``node``.
503 """Delta between ``basenode`` and ``node``.
504
504
505 Stored in the bdiff delta format.
505 Stored in the bdiff delta format.
506 """
506 """
507 )
507 )
508
508
509 sidedata = interfaceutil.Attribute(
509 sidedata = interfaceutil.Attribute(
510 """Raw sidedata bytes for the given revision."""
510 """Raw sidedata bytes for the given revision."""
511 )
511 )
512
512
513 protocol_flags = interfaceutil.Attribute(
513 protocol_flags = interfaceutil.Attribute(
514 """Single byte of integer flags that can influence the protocol.
514 """Single byte of integer flags that can influence the protocol.
515
515
516 This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
516 This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
517 """
517 """
518 )
518 )
519
519
520
520
521 class ifilerevisionssequence(interfaceutil.Interface):
521 class ifilerevisionssequence(interfaceutil.Interface):
522 """Contains index data for all revisions of a file.
522 """Contains index data for all revisions of a file.
523
523
524 Types implementing this behave like lists of tuples. The index
524 Types implementing this behave like lists of tuples. The index
525 in the list corresponds to the revision number. The values contain
525 in the list corresponds to the revision number. The values contain
526 index metadata.
526 index metadata.
527
527
528 The *null* revision (revision number -1) is always the last item
528 The *null* revision (revision number -1) is always the last item
529 in the index.
529 in the index.
530 """
530 """
531
531
532 def __len__():
532 def __len__():
533 """The total number of revisions."""
533 """The total number of revisions."""
534
534
535 def __getitem__(rev):
535 def __getitem__(rev):
536 """Returns the object having a specific revision number.
536 """Returns the object having a specific revision number.
537
537
538 Returns an 8-tuple with the following fields:
538 Returns an 8-tuple with the following fields:
539
539
540 offset+flags
540 offset+flags
541 Contains the offset and flags for the revision. 64-bit unsigned
541 Contains the offset and flags for the revision. 64-bit unsigned
542 integer where first 6 bytes are the offset and the next 2 bytes
542 integer where first 6 bytes are the offset and the next 2 bytes
543 are flags. The offset can be 0 if it is not used by the store.
543 are flags. The offset can be 0 if it is not used by the store.
544 compressed size
544 compressed size
545 Size of the revision data in the store. It can be 0 if it isn't
545 Size of the revision data in the store. It can be 0 if it isn't
546 needed by the store.
546 needed by the store.
547 uncompressed size
547 uncompressed size
548 Fulltext size. It can be 0 if it isn't needed by the store.
548 Fulltext size. It can be 0 if it isn't needed by the store.
549 base revision
549 base revision
550 Revision number of revision the delta for storage is encoded
550 Revision number of revision the delta for storage is encoded
551 against. -1 indicates not encoded against a base revision.
551 against. -1 indicates not encoded against a base revision.
552 link revision
552 link revision
553 Revision number of changelog revision this entry is related to.
553 Revision number of changelog revision this entry is related to.
554 p1 revision
554 p1 revision
555 Revision number of 1st parent. -1 if no 1st parent.
555 Revision number of 1st parent. -1 if no 1st parent.
556 p2 revision
556 p2 revision
557 Revision number of 2nd parent. -1 if no 1st parent.
557 Revision number of 2nd parent. -1 if no 1st parent.
558 node
558 node
559 Binary node value for this revision number.
559 Binary node value for this revision number.
560
560
561 Negative values should index off the end of the sequence. ``-1``
561 Negative values should index off the end of the sequence. ``-1``
562 should return the null revision. ``-2`` should return the most
562 should return the null revision. ``-2`` should return the most
563 recent revision.
563 recent revision.
564 """
564 """
565
565
566 def __contains__(rev):
566 def __contains__(rev):
567 """Whether a revision number exists."""
567 """Whether a revision number exists."""
568
568
569 def insert(self, i, entry):
569 def insert(self, i, entry):
570 """Add an item to the index at specific revision."""
570 """Add an item to the index at specific revision."""
571
571
572
572
573 class ifileindex(interfaceutil.Interface):
573 class ifileindex(interfaceutil.Interface):
574 """Storage interface for index data of a single file.
574 """Storage interface for index data of a single file.
575
575
576 File storage data is divided into index metadata and data storage.
576 File storage data is divided into index metadata and data storage.
577 This interface defines the index portion of the interface.
577 This interface defines the index portion of the interface.
578
578
579 The index logically consists of:
579 The index logically consists of:
580
580
581 * A mapping between revision numbers and nodes.
581 * A mapping between revision numbers and nodes.
582 * DAG data (storing and querying the relationship between nodes).
582 * DAG data (storing and querying the relationship between nodes).
583 * Metadata to facilitate storage.
583 * Metadata to facilitate storage.
584 """
584 """
585
585
586 nullid = interfaceutil.Attribute(
586 nullid = interfaceutil.Attribute(
587 """node for the null revision for use as delta base."""
587 """node for the null revision for use as delta base."""
588 )
588 )
589
589
590 def __len__():
590 def __len__():
591 """Obtain the number of revisions stored for this file."""
591 """Obtain the number of revisions stored for this file."""
592
592
593 def __iter__():
593 def __iter__():
594 """Iterate over revision numbers for this file."""
594 """Iterate over revision numbers for this file."""
595
595
596 def hasnode(node):
596 def hasnode(node):
597 """Returns a bool indicating if a node is known to this store.
597 """Returns a bool indicating if a node is known to this store.
598
598
599 Implementations must only return True for full, binary node values:
599 Implementations must only return True for full, binary node values:
600 hex nodes, revision numbers, and partial node matches must be
600 hex nodes, revision numbers, and partial node matches must be
601 rejected.
601 rejected.
602
602
603 The null node is never present.
603 The null node is never present.
604 """
604 """
605
605
606 def revs(start=0, stop=None):
606 def revs(start=0, stop=None):
607 """Iterate over revision numbers for this file, with control."""
607 """Iterate over revision numbers for this file, with control."""
608
608
609 def parents(node):
609 def parents(node):
610 """Returns a 2-tuple of parent nodes for a revision.
610 """Returns a 2-tuple of parent nodes for a revision.
611
611
612 Values will be ``nullid`` if the parent is empty.
612 Values will be ``nullid`` if the parent is empty.
613 """
613 """
614
614
615 def parentrevs(rev):
615 def parentrevs(rev):
616 """Like parents() but operates on revision numbers."""
616 """Like parents() but operates on revision numbers."""
617
617
618 def rev(node):
618 def rev(node):
619 """Obtain the revision number given a node.
619 """Obtain the revision number given a node.
620
620
621 Raises ``error.LookupError`` if the node is not known.
621 Raises ``error.LookupError`` if the node is not known.
622 """
622 """
623
623
624 def node(rev):
624 def node(rev):
625 """Obtain the node value given a revision number.
625 """Obtain the node value given a revision number.
626
626
627 Raises ``IndexError`` if the node is not known.
627 Raises ``IndexError`` if the node is not known.
628 """
628 """
629
629
630 def lookup(node):
630 def lookup(node):
631 """Attempt to resolve a value to a node.
631 """Attempt to resolve a value to a node.
632
632
633 Value can be a binary node, hex node, revision number, or a string
633 Value can be a binary node, hex node, revision number, or a string
634 that can be converted to an integer.
634 that can be converted to an integer.
635
635
636 Raises ``error.LookupError`` if a node could not be resolved.
636 Raises ``error.LookupError`` if a node could not be resolved.
637 """
637 """
638
638
639 def linkrev(rev):
639 def linkrev(rev):
640 """Obtain the changeset revision number a revision is linked to."""
640 """Obtain the changeset revision number a revision is linked to."""
641
641
642 def iscensored(rev):
642 def iscensored(rev):
643 """Return whether a revision's content has been censored."""
643 """Return whether a revision's content has been censored."""
644
644
645 def commonancestorsheads(node1, node2):
645 def commonancestorsheads(node1, node2):
646 """Obtain an iterable of nodes containing heads of common ancestors.
646 """Obtain an iterable of nodes containing heads of common ancestors.
647
647
648 See ``ancestor.commonancestorsheads()``.
648 See ``ancestor.commonancestorsheads()``.
649 """
649 """
650
650
651 def descendants(revs):
651 def descendants(revs):
652 """Obtain descendant revision numbers for a set of revision numbers.
652 """Obtain descendant revision numbers for a set of revision numbers.
653
653
654 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
654 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
655 """
655 """
656
656
657 def heads(start=None, stop=None):
657 def heads(start=None, stop=None):
658 """Obtain a list of nodes that are DAG heads, with control.
658 """Obtain a list of nodes that are DAG heads, with control.
659
659
660 The set of revisions examined can be limited by specifying
660 The set of revisions examined can be limited by specifying
661 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
661 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
662 iterable of nodes. DAG traversal starts at earlier revision
662 iterable of nodes. DAG traversal starts at earlier revision
663 ``start`` and iterates forward until any node in ``stop`` is
663 ``start`` and iterates forward until any node in ``stop`` is
664 encountered.
664 encountered.
665 """
665 """
666
666
667 def children(node):
667 def children(node):
668 """Obtain nodes that are children of a node.
668 """Obtain nodes that are children of a node.
669
669
670 Returns a list of nodes.
670 Returns a list of nodes.
671 """
671 """
672
672
673
673
674 class ifiledata(interfaceutil.Interface):
674 class ifiledata(interfaceutil.Interface):
675 """Storage interface for data storage of a specific file.
675 """Storage interface for data storage of a specific file.
676
676
677 This complements ``ifileindex`` and provides an interface for accessing
677 This complements ``ifileindex`` and provides an interface for accessing
678 data for a tracked file.
678 data for a tracked file.
679 """
679 """
680
680
681 def size(rev):
681 def size(rev):
682 """Obtain the fulltext size of file data.
682 """Obtain the fulltext size of file data.
683
683
684 Any metadata is excluded from size measurements.
684 Any metadata is excluded from size measurements.
685 """
685 """
686
686
687 def revision(node):
687 def revision(node):
688 """Obtain fulltext data for a node.
688 """Obtain fulltext data for a node.
689
689
690 By default, any storage transformations are applied before the data
690 By default, any storage transformations are applied before the data
691 is returned. If ``raw`` is True, non-raw storage transformations
691 is returned. If ``raw`` is True, non-raw storage transformations
692 are not applied.
692 are not applied.
693
693
694 The fulltext data may contain a header containing metadata. Most
694 The fulltext data may contain a header containing metadata. Most
695 consumers should use ``read()`` to obtain the actual file data.
695 consumers should use ``read()`` to obtain the actual file data.
696 """
696 """
697
697
698 def rawdata(node):
698 def rawdata(node):
699 """Obtain raw data for a node."""
699 """Obtain raw data for a node."""
700
700
701 def read(node):
701 def read(node):
702 """Resolve file fulltext data.
702 """Resolve file fulltext data.
703
703
704 This is similar to ``revision()`` except any metadata in the data
704 This is similar to ``revision()`` except any metadata in the data
705 headers is stripped.
705 headers is stripped.
706 """
706 """
707
707
708 def renamed(node):
708 def renamed(node):
709 """Obtain copy metadata for a node.
709 """Obtain copy metadata for a node.
710
710
711 Returns ``False`` if no copy metadata is stored or a 2-tuple of
711 Returns ``False`` if no copy metadata is stored or a 2-tuple of
712 (path, node) from which this revision was copied.
712 (path, node) from which this revision was copied.
713 """
713 """
714
714
715 def cmp(node, fulltext):
715 def cmp(node, fulltext):
716 """Compare fulltext to another revision.
716 """Compare fulltext to another revision.
717
717
718 Returns True if the fulltext is different from what is stored.
718 Returns True if the fulltext is different from what is stored.
719
719
720 This takes copy metadata into account.
720 This takes copy metadata into account.
721
721
722 TODO better document the copy metadata and censoring logic.
722 TODO better document the copy metadata and censoring logic.
723 """
723 """
724
724
725 def emitrevisions(
725 def emitrevisions(
726 nodes,
726 nodes,
727 nodesorder=None,
727 nodesorder=None,
728 revisiondata=False,
728 revisiondata=False,
729 assumehaveparentrevisions=False,
729 assumehaveparentrevisions=False,
730 deltamode=CG_DELTAMODE_STD,
730 deltamode=CG_DELTAMODE_STD,
731 ):
731 ):
732 """Produce ``irevisiondelta`` for revisions.
732 """Produce ``irevisiondelta`` for revisions.
733
733
734 Given an iterable of nodes, emits objects conforming to the
734 Given an iterable of nodes, emits objects conforming to the
735 ``irevisiondelta`` interface that describe revisions in storage.
735 ``irevisiondelta`` interface that describe revisions in storage.
736
736
737 This method is a generator.
737 This method is a generator.
738
738
739 The input nodes may be unordered. Implementations must ensure that a
739 The input nodes may be unordered. Implementations must ensure that a
740 node's parents are emitted before the node itself. Transitively, this
740 node's parents are emitted before the node itself. Transitively, this
741 means that a node may only be emitted once all its ancestors in
741 means that a node may only be emitted once all its ancestors in
742 ``nodes`` have also been emitted.
742 ``nodes`` have also been emitted.
743
743
744 By default, emits "index" data (the ``node``, ``p1node``, and
744 By default, emits "index" data (the ``node``, ``p1node``, and
745 ``p2node`` attributes). If ``revisiondata`` is set, revision data
745 ``p2node`` attributes). If ``revisiondata`` is set, revision data
746 will also be present on the emitted objects.
746 will also be present on the emitted objects.
747
747
748 With default argument values, implementations can choose to emit
748 With default argument values, implementations can choose to emit
749 either fulltext revision data or a delta. When emitting deltas,
749 either fulltext revision data or a delta. When emitting deltas,
750 implementations must consider whether the delta's base revision
750 implementations must consider whether the delta's base revision
751 fulltext is available to the receiver.
751 fulltext is available to the receiver.
752
752
753 The base revision fulltext is guaranteed to be available if any of
753 The base revision fulltext is guaranteed to be available if any of
754 the following are met:
754 the following are met:
755
755
756 * Its fulltext revision was emitted by this method call.
756 * Its fulltext revision was emitted by this method call.
757 * A delta for that revision was emitted by this method call.
757 * A delta for that revision was emitted by this method call.
758 * ``assumehaveparentrevisions`` is True and the base revision is a
758 * ``assumehaveparentrevisions`` is True and the base revision is a
759 parent of the node.
759 parent of the node.
760
760
761 ``nodesorder`` can be used to control the order that revisions are
761 ``nodesorder`` can be used to control the order that revisions are
762 emitted. By default, revisions can be reordered as long as they are
762 emitted. By default, revisions can be reordered as long as they are
763 in DAG topological order (see above). If the value is ``nodes``,
763 in DAG topological order (see above). If the value is ``nodes``,
764 the iteration order from ``nodes`` should be used. If the value is
764 the iteration order from ``nodes`` should be used. If the value is
765 ``storage``, then the native order from the backing storage layer
765 ``storage``, then the native order from the backing storage layer
766 is used. (Not all storage layers will have strong ordering and behavior
766 is used. (Not all storage layers will have strong ordering and behavior
767 of this mode is storage-dependent.) ``nodes`` ordering can force
767 of this mode is storage-dependent.) ``nodes`` ordering can force
768 revisions to be emitted before their ancestors, so consumers should
768 revisions to be emitted before their ancestors, so consumers should
769 use it with care.
769 use it with care.
770
770
771 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
771 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
772 be set and it is the caller's responsibility to resolve it, if needed.
772 be set and it is the caller's responsibility to resolve it, if needed.
773
773
774 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
774 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
775 all revision data should be emitted as deltas against the revision
775 all revision data should be emitted as deltas against the revision
776 emitted just prior. The initial revision should be a delta against its
776 emitted just prior. The initial revision should be a delta against its
777 1st parent.
777 1st parent.
778 """
778 """
779
779
780
780
781 class ifilemutation(interfaceutil.Interface):
781 class ifilemutation(interfaceutil.Interface):
782 """Storage interface for mutation events of a tracked file."""
782 """Storage interface for mutation events of a tracked file."""
783
783
784 def add(filedata, meta, transaction, linkrev, p1, p2):
784 def add(filedata, meta, transaction, linkrev, p1, p2):
785 """Add a new revision to the store.
785 """Add a new revision to the store.
786
786
787 Takes file data, dictionary of metadata, a transaction, linkrev,
787 Takes file data, dictionary of metadata, a transaction, linkrev,
788 and parent nodes.
788 and parent nodes.
789
789
790 Returns the node that was added.
790 Returns the node that was added.
791
791
792 May no-op if a revision matching the supplied data is already stored.
792 May no-op if a revision matching the supplied data is already stored.
793 """
793 """
794
794
795 def addrevision(
795 def addrevision(
796 revisiondata,
796 revisiondata,
797 transaction,
797 transaction,
798 linkrev,
798 linkrev,
799 p1,
799 p1,
800 p2,
800 p2,
801 node=None,
801 node=None,
802 flags=0,
802 flags=0,
803 cachedelta=None,
803 cachedelta=None,
804 ):
804 ):
805 """Add a new revision to the store and return its number.
805 """Add a new revision to the store and return its number.
806
806
807 This is similar to ``add()`` except it operates at a lower level.
807 This is similar to ``add()`` except it operates at a lower level.
808
808
809 The data passed in already contains a metadata header, if any.
809 The data passed in already contains a metadata header, if any.
810
810
811 ``node`` and ``flags`` can be used to define the expected node and
811 ``node`` and ``flags`` can be used to define the expected node and
812 the flags to use with storage. ``flags`` is a bitwise value composed
812 the flags to use with storage. ``flags`` is a bitwise value composed
813 of the various ``REVISION_FLAG_*`` constants.
813 of the various ``REVISION_FLAG_*`` constants.
814
814
815 ``add()`` is usually called when adding files from e.g. the working
815 ``add()`` is usually called when adding files from e.g. the working
816 directory. ``addrevision()`` is often called by ``add()`` and for
816 directory. ``addrevision()`` is often called by ``add()`` and for
817 scenarios where revision data has already been computed, such as when
817 scenarios where revision data has already been computed, such as when
818 applying raw data from a peer repo.
818 applying raw data from a peer repo.
819 """
819 """
820
820
821 def addgroup(
821 def addgroup(
822 deltas,
822 deltas,
823 linkmapper,
823 linkmapper,
824 transaction,
824 transaction,
825 addrevisioncb=None,
825 addrevisioncb=None,
826 duplicaterevisioncb=None,
826 duplicaterevisioncb=None,
827 maybemissingparents=False,
827 maybemissingparents=False,
828 ):
828 ):
829 """Process a series of deltas for storage.
829 """Process a series of deltas for storage.
830
830
831 ``deltas`` is an iterable of 7-tuples of
831 ``deltas`` is an iterable of 7-tuples of
832 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
832 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
833 to add.
833 to add.
834
834
835 The ``delta`` field contains ``mpatch`` data to apply to a base
835 The ``delta`` field contains ``mpatch`` data to apply to a base
836 revision, identified by ``deltabase``. The base node can be
836 revision, identified by ``deltabase``. The base node can be
837 ``nullid``, in which case the header from the delta can be ignored
837 ``nullid``, in which case the header from the delta can be ignored
838 and the delta used as the fulltext.
838 and the delta used as the fulltext.
839
839
840 ``alwayscache`` instructs the lower layers to cache the content of the
840 ``alwayscache`` instructs the lower layers to cache the content of the
841 newly added revision, even if it needs to be explicitly computed.
841 newly added revision, even if it needs to be explicitly computed.
842 This used to be the default when ``addrevisioncb`` was provided up to
842 This used to be the default when ``addrevisioncb`` was provided up to
843 Mercurial 5.8.
843 Mercurial 5.8.
844
844
845 ``addrevisioncb`` should be called for each new rev as it is committed.
845 ``addrevisioncb`` should be called for each new rev as it is committed.
846 ``duplicaterevisioncb`` should be called for all revs with a
846 ``duplicaterevisioncb`` should be called for all revs with a
847 pre-existing node.
847 pre-existing node.
848
848
849 ``maybemissingparents`` is a bool indicating whether the incoming
849 ``maybemissingparents`` is a bool indicating whether the incoming
850 data may reference parents/ancestor revisions that aren't present.
850 data may reference parents/ancestor revisions that aren't present.
851 This flag is set when receiving data into a "shallow" store that
851 This flag is set when receiving data into a "shallow" store that
852 doesn't hold all history.
852 doesn't hold all history.
853
853
854 Returns a list of nodes that were processed. A node will be in the list
854 Returns a list of nodes that were processed. A node will be in the list
855 even if it existed in the store previously.
855 even if it existed in the store previously.
856 """
856 """
857
857
858 def censorrevision(tr, node, tombstone=b''):
858 def censorrevision(tr, node, tombstone=b''):
859 """Remove the content of a single revision.
859 """Remove the content of a single revision.
860
860
861 The specified ``node`` will have its content purged from storage.
861 The specified ``node`` will have its content purged from storage.
862 Future attempts to access the revision data for this node will
862 Future attempts to access the revision data for this node will
863 result in failure.
863 result in failure.
864
864
865 A ``tombstone`` message can optionally be stored. This message may be
865 A ``tombstone`` message can optionally be stored. This message may be
866 displayed to users when they attempt to access the missing revision
866 displayed to users when they attempt to access the missing revision
867 data.
867 data.
868
868
869 Storage backends may have stored deltas against the previous content
869 Storage backends may have stored deltas against the previous content
870 in this revision. As part of censoring a revision, these storage
870 in this revision. As part of censoring a revision, these storage
871 backends are expected to rewrite any internally stored deltas such
871 backends are expected to rewrite any internally stored deltas such
872 that they no longer reference the deleted content.
872 that they no longer reference the deleted content.
873 """
873 """
874
874
875 def getstrippoint(minlink):
875 def getstrippoint(minlink):
876 """Find the minimum revision that must be stripped to strip a linkrev.
876 """Find the minimum revision that must be stripped to strip a linkrev.
877
877
878 Returns a 2-tuple containing the minimum revision number and a set
878 Returns a 2-tuple containing the minimum revision number and a set
879 of all revisions numbers that would be broken by this strip.
879 of all revisions numbers that would be broken by this strip.
880
880
881 TODO this is highly revlog centric and should be abstracted into
881 TODO this is highly revlog centric and should be abstracted into
882 a higher-level deletion API. ``repair.strip()`` relies on this.
882 a higher-level deletion API. ``repair.strip()`` relies on this.
883 """
883 """
884
884
885 def strip(minlink, transaction):
885 def strip(minlink, transaction):
886 """Remove storage of items starting at a linkrev.
886 """Remove storage of items starting at a linkrev.
887
887
888 This uses ``getstrippoint()`` to determine the first node to remove.
888 This uses ``getstrippoint()`` to determine the first node to remove.
889 Then it effectively truncates storage for all revisions after that.
889 Then it effectively truncates storage for all revisions after that.
890
890
891 TODO this is highly revlog centric and should be abstracted into a
891 TODO this is highly revlog centric and should be abstracted into a
892 higher-level deletion API.
892 higher-level deletion API.
893 """
893 """
894
894
895
895
896 class ifilestorage(ifileindex, ifiledata, ifilemutation):
896 class ifilestorage(ifileindex, ifiledata, ifilemutation):
897 """Complete storage interface for a single tracked file."""
897 """Complete storage interface for a single tracked file."""
898
898
899 def files():
899 def files():
900 """Obtain paths that are backing storage for this file.
900 """Obtain paths that are backing storage for this file.
901
901
902 TODO this is used heavily by verify code and there should probably
902 TODO this is used heavily by verify code and there should probably
903 be a better API for that.
903 be a better API for that.
904 """
904 """
905
905
906 def storageinfo(
906 def storageinfo(
907 exclusivefiles=False,
907 exclusivefiles=False,
908 sharedfiles=False,
908 sharedfiles=False,
909 revisionscount=False,
909 revisionscount=False,
910 trackedsize=False,
910 trackedsize=False,
911 storedsize=False,
911 storedsize=False,
912 ):
912 ):
913 """Obtain information about storage for this file's data.
913 """Obtain information about storage for this file's data.
914
914
915 Returns a dict describing storage for this tracked path. The keys
915 Returns a dict describing storage for this tracked path. The keys
916 in the dict map to arguments of the same. The arguments are bools
916 in the dict map to arguments of the same. The arguments are bools
917 indicating whether to calculate and obtain that data.
917 indicating whether to calculate and obtain that data.
918
918
919 exclusivefiles
919 exclusivefiles
920 Iterable of (vfs, path) describing files that are exclusively
920 Iterable of (vfs, path) describing files that are exclusively
921 used to back storage for this tracked path.
921 used to back storage for this tracked path.
922
922
923 sharedfiles
923 sharedfiles
924 Iterable of (vfs, path) describing files that are used to back
924 Iterable of (vfs, path) describing files that are used to back
925 storage for this tracked path. Those files may also provide storage
925 storage for this tracked path. Those files may also provide storage
926 for other stored entities.
926 for other stored entities.
927
927
928 revisionscount
928 revisionscount
929 Number of revisions available for retrieval.
929 Number of revisions available for retrieval.
930
930
931 trackedsize
931 trackedsize
932 Total size in bytes of all tracked revisions. This is a sum of the
932 Total size in bytes of all tracked revisions. This is a sum of the
933 length of the fulltext of all revisions.
933 length of the fulltext of all revisions.
934
934
935 storedsize
935 storedsize
936 Total size in bytes used to store data for all tracked revisions.
936 Total size in bytes used to store data for all tracked revisions.
937 This is commonly less than ``trackedsize`` due to internal usage
937 This is commonly less than ``trackedsize`` due to internal usage
938 of deltas rather than fulltext revisions.
938 of deltas rather than fulltext revisions.
939
939
940 Not all storage backends may support all queries are have a reasonable
940 Not all storage backends may support all queries are have a reasonable
941 value to use. In that case, the value should be set to ``None`` and
941 value to use. In that case, the value should be set to ``None`` and
942 callers are expected to handle this special value.
942 callers are expected to handle this special value.
943 """
943 """
944
944
945 def verifyintegrity(state):
945 def verifyintegrity(state):
946 """Verifies the integrity of file storage.
946 """Verifies the integrity of file storage.
947
947
948 ``state`` is a dict holding state of the verifier process. It can be
948 ``state`` is a dict holding state of the verifier process. It can be
949 used to communicate data between invocations of multiple storage
949 used to communicate data between invocations of multiple storage
950 primitives.
950 primitives.
951
951
952 If individual revisions cannot have their revision content resolved,
952 If individual revisions cannot have their revision content resolved,
953 the method is expected to set the ``skipread`` key to a set of nodes
953 the method is expected to set the ``skipread`` key to a set of nodes
954 that encountered problems. If set, the method can also add the node(s)
954 that encountered problems. If set, the method can also add the node(s)
955 to ``safe_renamed`` in order to indicate nodes that may perform the
955 to ``safe_renamed`` in order to indicate nodes that may perform the
956 rename checks with currently accessible data.
956 rename checks with currently accessible data.
957
957
958 The method yields objects conforming to the ``iverifyproblem``
958 The method yields objects conforming to the ``iverifyproblem``
959 interface.
959 interface.
960 """
960 """
961
961
962
962
963 class idirs(interfaceutil.Interface):
963 class idirs(interfaceutil.Interface):
964 """Interface representing a collection of directories from paths.
964 """Interface representing a collection of directories from paths.
965
965
966 This interface is essentially a derived data structure representing
966 This interface is essentially a derived data structure representing
967 directories from a collection of paths.
967 directories from a collection of paths.
968 """
968 """
969
969
970 def addpath(path):
970 def addpath(path):
971 """Add a path to the collection.
971 """Add a path to the collection.
972
972
973 All directories in the path will be added to the collection.
973 All directories in the path will be added to the collection.
974 """
974 """
975
975
976 def delpath(path):
976 def delpath(path):
977 """Remove a path from the collection.
977 """Remove a path from the collection.
978
978
979 If the removal was the last path in a particular directory, the
979 If the removal was the last path in a particular directory, the
980 directory is removed from the collection.
980 directory is removed from the collection.
981 """
981 """
982
982
983 def __iter__():
983 def __iter__():
984 """Iterate over the directories in this collection of paths."""
984 """Iterate over the directories in this collection of paths."""
985
985
986 def __contains__(path):
986 def __contains__(path):
987 """Whether a specific directory is in this collection."""
987 """Whether a specific directory is in this collection."""
988
988
989
989
990 class imanifestdict(interfaceutil.Interface):
990 class imanifestdict(interfaceutil.Interface):
991 """Interface representing a manifest data structure.
991 """Interface representing a manifest data structure.
992
992
993 A manifest is effectively a dict mapping paths to entries. Each entry
993 A manifest is effectively a dict mapping paths to entries. Each entry
994 consists of a binary node and extra flags affecting that entry.
994 consists of a binary node and extra flags affecting that entry.
995 """
995 """
996
996
997 def __getitem__(path):
997 def __getitem__(path):
998 """Returns the binary node value for a path in the manifest.
998 """Returns the binary node value for a path in the manifest.
999
999
1000 Raises ``KeyError`` if the path does not exist in the manifest.
1000 Raises ``KeyError`` if the path does not exist in the manifest.
1001
1001
1002 Equivalent to ``self.find(path)[0]``.
1002 Equivalent to ``self.find(path)[0]``.
1003 """
1003 """
1004
1004
1005 def find(path):
1005 def find(path):
1006 """Returns the entry for a path in the manifest.
1006 """Returns the entry for a path in the manifest.
1007
1007
1008 Returns a 2-tuple of (node, flags).
1008 Returns a 2-tuple of (node, flags).
1009
1009
1010 Raises ``KeyError`` if the path does not exist in the manifest.
1010 Raises ``KeyError`` if the path does not exist in the manifest.
1011 """
1011 """
1012
1012
1013 def __len__():
1013 def __len__():
1014 """Return the number of entries in the manifest."""
1014 """Return the number of entries in the manifest."""
1015
1015
1016 def __nonzero__():
1016 def __nonzero__():
1017 """Returns True if the manifest has entries, False otherwise."""
1017 """Returns True if the manifest has entries, False otherwise."""
1018
1018
1019 __bool__ = __nonzero__
1019 __bool__ = __nonzero__
1020
1020
1021 def __setitem__(path, node):
1021 def __setitem__(path, node):
1022 """Define the node value for a path in the manifest.
1022 """Define the node value for a path in the manifest.
1023
1023
1024 If the path is already in the manifest, its flags will be copied to
1024 If the path is already in the manifest, its flags will be copied to
1025 the new entry.
1025 the new entry.
1026 """
1026 """
1027
1027
1028 def __contains__(path):
1028 def __contains__(path):
1029 """Whether a path exists in the manifest."""
1029 """Whether a path exists in the manifest."""
1030
1030
1031 def __delitem__(path):
1031 def __delitem__(path):
1032 """Remove a path from the manifest.
1032 """Remove a path from the manifest.
1033
1033
1034 Raises ``KeyError`` if the path is not in the manifest.
1034 Raises ``KeyError`` if the path is not in the manifest.
1035 """
1035 """
1036
1036
1037 def __iter__():
1037 def __iter__():
1038 """Iterate over paths in the manifest."""
1038 """Iterate over paths in the manifest."""
1039
1039
1040 def iterkeys():
1040 def iterkeys():
1041 """Iterate over paths in the manifest."""
1041 """Iterate over paths in the manifest."""
1042
1042
1043 def keys():
1043 def keys():
1044 """Obtain a list of paths in the manifest."""
1044 """Obtain a list of paths in the manifest."""
1045
1045
1046 def filesnotin(other, match=None):
1046 def filesnotin(other, match=None):
1047 """Obtain the set of paths in this manifest but not in another.
1047 """Obtain the set of paths in this manifest but not in another.
1048
1048
1049 ``match`` is an optional matcher function to be applied to both
1049 ``match`` is an optional matcher function to be applied to both
1050 manifests.
1050 manifests.
1051
1051
1052 Returns a set of paths.
1052 Returns a set of paths.
1053 """
1053 """
1054
1054
1055 def dirs():
1055 def dirs():
1056 """Returns an object implementing the ``idirs`` interface."""
1056 """Returns an object implementing the ``idirs`` interface."""
1057
1057
1058 def hasdir(dir):
1058 def hasdir(dir):
1059 """Returns a bool indicating if a directory is in this manifest."""
1059 """Returns a bool indicating if a directory is in this manifest."""
1060
1060
1061 def walk(match):
1061 def walk(match):
1062 """Generator of paths in manifest satisfying a matcher.
1062 """Generator of paths in manifest satisfying a matcher.
1063
1063
1064 If the matcher has explicit files listed and they don't exist in
1064 If the matcher has explicit files listed and they don't exist in
1065 the manifest, ``match.bad()`` is called for each missing file.
1065 the manifest, ``match.bad()`` is called for each missing file.
1066 """
1066 """
1067
1067
1068 def diff(other, match=None, clean=False):
1068 def diff(other, match=None, clean=False):
1069 """Find differences between this manifest and another.
1069 """Find differences between this manifest and another.
1070
1070
1071 This manifest is compared to ``other``.
1071 This manifest is compared to ``other``.
1072
1072
1073 If ``match`` is provided, the two manifests are filtered against this
1073 If ``match`` is provided, the two manifests are filtered against this
1074 matcher and only entries satisfying the matcher are compared.
1074 matcher and only entries satisfying the matcher are compared.
1075
1075
1076 If ``clean`` is True, unchanged files are included in the returned
1076 If ``clean`` is True, unchanged files are included in the returned
1077 object.
1077 object.
1078
1078
1079 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1079 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1080 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1080 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1081 represents the node and flags for this manifest and ``(node2, flag2)``
1081 represents the node and flags for this manifest and ``(node2, flag2)``
1082 are the same for the other manifest.
1082 are the same for the other manifest.
1083 """
1083 """
1084
1084
1085 def setflag(path, flag):
1085 def setflag(path, flag):
1086 """Set the flag value for a given path.
1086 """Set the flag value for a given path.
1087
1087
1088 Raises ``KeyError`` if the path is not already in the manifest.
1088 Raises ``KeyError`` if the path is not already in the manifest.
1089 """
1089 """
1090
1090
1091 def get(path, default=None):
1091 def get(path, default=None):
1092 """Obtain the node value for a path or a default value if missing."""
1092 """Obtain the node value for a path or a default value if missing."""
1093
1093
1094 def flags(path):
1094 def flags(path):
1095 """Return the flags value for a path (default: empty bytestring)."""
1095 """Return the flags value for a path (default: empty bytestring)."""
1096
1096
1097 def copy():
1097 def copy():
1098 """Return a copy of this manifest."""
1098 """Return a copy of this manifest."""
1099
1099
1100 def items():
1100 def items():
1101 """Returns an iterable of (path, node) for items in this manifest."""
1101 """Returns an iterable of (path, node) for items in this manifest."""
1102
1102
1103 def iteritems():
1103 def iteritems():
1104 """Identical to items()."""
1104 """Identical to items()."""
1105
1105
1106 def iterentries():
1106 def iterentries():
1107 """Returns an iterable of (path, node, flags) for this manifest.
1107 """Returns an iterable of (path, node, flags) for this manifest.
1108
1108
1109 Similar to ``iteritems()`` except items are a 3-tuple and include
1109 Similar to ``iteritems()`` except items are a 3-tuple and include
1110 flags.
1110 flags.
1111 """
1111 """
1112
1112
1113 def text():
1113 def text():
1114 """Obtain the raw data representation for this manifest.
1114 """Obtain the raw data representation for this manifest.
1115
1115
1116 Result is used to create a manifest revision.
1116 Result is used to create a manifest revision.
1117 """
1117 """
1118
1118
1119 def fastdelta(base, changes):
1119 def fastdelta(base, changes):
1120 """Obtain a delta between this manifest and another given changes.
1120 """Obtain a delta between this manifest and another given changes.
1121
1121
1122 ``base`` in the raw data representation for another manifest.
1122 ``base`` in the raw data representation for another manifest.
1123
1123
1124 ``changes`` is an iterable of ``(path, to_delete)``.
1124 ``changes`` is an iterable of ``(path, to_delete)``.
1125
1125
1126 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1126 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1127 delta between ``base`` and this manifest.
1127 delta between ``base`` and this manifest.
1128
1128
1129 If this manifest implementation can't support ``fastdelta()``,
1129 If this manifest implementation can't support ``fastdelta()``,
1130 raise ``mercurial.manifest.FastdeltaUnavailable``.
1130 raise ``mercurial.manifest.FastdeltaUnavailable``.
1131 """
1131 """
1132
1132
1133
1133
1134 class imanifestrevisionbase(interfaceutil.Interface):
1134 class imanifestrevisionbase(interfaceutil.Interface):
1135 """Base interface representing a single revision of a manifest.
1135 """Base interface representing a single revision of a manifest.
1136
1136
1137 Should not be used as a primary interface: should always be inherited
1137 Should not be used as a primary interface: should always be inherited
1138 as part of a larger interface.
1138 as part of a larger interface.
1139 """
1139 """
1140
1140
1141 def copy():
1141 def copy():
1142 """Obtain a copy of this manifest instance.
1142 """Obtain a copy of this manifest instance.
1143
1143
1144 Returns an object conforming to the ``imanifestrevisionwritable``
1144 Returns an object conforming to the ``imanifestrevisionwritable``
1145 interface. The instance will be associated with the same
1145 interface. The instance will be associated with the same
1146 ``imanifestlog`` collection as this instance.
1146 ``imanifestlog`` collection as this instance.
1147 """
1147 """
1148
1148
1149 def read():
1149 def read():
1150 """Obtain the parsed manifest data structure.
1150 """Obtain the parsed manifest data structure.
1151
1151
1152 The returned object conforms to the ``imanifestdict`` interface.
1152 The returned object conforms to the ``imanifestdict`` interface.
1153 """
1153 """
1154
1154
1155
1155
1156 class imanifestrevisionstored(imanifestrevisionbase):
1156 class imanifestrevisionstored(imanifestrevisionbase):
1157 """Interface representing a manifest revision committed to storage."""
1157 """Interface representing a manifest revision committed to storage."""
1158
1158
1159 def node():
1159 def node():
1160 """The binary node for this manifest."""
1160 """The binary node for this manifest."""
1161
1161
1162 parents = interfaceutil.Attribute(
1162 parents = interfaceutil.Attribute(
1163 """List of binary nodes that are parents for this manifest revision."""
1163 """List of binary nodes that are parents for this manifest revision."""
1164 )
1164 )
1165
1165
1166 def readdelta(shallow=False):
1166 def readdelta(shallow=False):
1167 """Obtain the manifest data structure representing changes from parent.
1167 """Obtain the manifest data structure representing changes from parent.
1168
1168
1169 This manifest is compared to its 1st parent. A new manifest representing
1169 This manifest is compared to its 1st parent. A new manifest representing
1170 those differences is constructed.
1170 those differences is constructed.
1171
1171
1172 The returned object conforms to the ``imanifestdict`` interface.
1172 The returned object conforms to the ``imanifestdict`` interface.
1173 """
1173 """
1174
1174
1175 def readfast(shallow=False):
1175 def readfast(shallow=False):
1176 """Calls either ``read()`` or ``readdelta()``.
1176 """Calls either ``read()`` or ``readdelta()``.
1177
1177
1178 The faster of the two options is called.
1178 The faster of the two options is called.
1179 """
1179 """
1180
1180
1181 def find(key):
1181 def find(key):
1182 """Calls self.read().find(key)``.
1182 """Calls self.read().find(key)``.
1183
1183
1184 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1184 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1185 """
1185 """
1186
1186
1187
1187
1188 class imanifestrevisionwritable(imanifestrevisionbase):
1188 class imanifestrevisionwritable(imanifestrevisionbase):
1189 """Interface representing a manifest revision that can be committed."""
1189 """Interface representing a manifest revision that can be committed."""
1190
1190
1191 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1191 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1192 """Add this revision to storage.
1192 """Add this revision to storage.
1193
1193
1194 Takes a transaction object, the changeset revision number it will
1194 Takes a transaction object, the changeset revision number it will
1195 be associated with, its parent nodes, and lists of added and
1195 be associated with, its parent nodes, and lists of added and
1196 removed paths.
1196 removed paths.
1197
1197
1198 If match is provided, storage can choose not to inspect or write out
1198 If match is provided, storage can choose not to inspect or write out
1199 items that do not match. Storage is still required to be able to provide
1199 items that do not match. Storage is still required to be able to provide
1200 the full manifest in the future for any directories written (these
1200 the full manifest in the future for any directories written (these
1201 manifests should not be "narrowed on disk").
1201 manifests should not be "narrowed on disk").
1202
1202
1203 Returns the binary node of the created revision.
1203 Returns the binary node of the created revision.
1204 """
1204 """
1205
1205
1206
1206
1207 class imanifeststorage(interfaceutil.Interface):
1207 class imanifeststorage(interfaceutil.Interface):
1208 """Storage interface for manifest data."""
1208 """Storage interface for manifest data."""
1209
1209
1210 nodeconstants = interfaceutil.Attribute(
1210 nodeconstants = interfaceutil.Attribute(
1211 """nodeconstants used by the current repository."""
1211 """nodeconstants used by the current repository."""
1212 )
1212 )
1213
1213
1214 tree = interfaceutil.Attribute(
1214 tree = interfaceutil.Attribute(
1215 """The path to the directory this manifest tracks.
1215 """The path to the directory this manifest tracks.
1216
1216
1217 The empty bytestring represents the root manifest.
1217 The empty bytestring represents the root manifest.
1218 """
1218 """
1219 )
1219 )
1220
1220
1221 index = interfaceutil.Attribute(
1221 index = interfaceutil.Attribute(
1222 """An ``ifilerevisionssequence`` instance."""
1222 """An ``ifilerevisionssequence`` instance."""
1223 )
1223 )
1224
1224
1225 opener = interfaceutil.Attribute(
1225 opener = interfaceutil.Attribute(
1226 """VFS opener to use to access underlying files used for storage.
1226 """VFS opener to use to access underlying files used for storage.
1227
1227
1228 TODO this is revlog specific and should not be exposed.
1228 TODO this is revlog specific and should not be exposed.
1229 """
1229 """
1230 )
1230 )
1231
1231
1232 _generaldelta = interfaceutil.Attribute(
1232 _generaldelta = interfaceutil.Attribute(
1233 """Whether generaldelta storage is being used.
1233 """Whether generaldelta storage is being used.
1234
1234
1235 TODO this is revlog specific and should not be exposed.
1235 TODO this is revlog specific and should not be exposed.
1236 """
1236 """
1237 )
1237 )
1238
1238
1239 fulltextcache = interfaceutil.Attribute(
1239 fulltextcache = interfaceutil.Attribute(
1240 """Dict with cache of fulltexts.
1240 """Dict with cache of fulltexts.
1241
1241
1242 TODO this doesn't feel appropriate for the storage interface.
1242 TODO this doesn't feel appropriate for the storage interface.
1243 """
1243 """
1244 )
1244 )
1245
1245
1246 def __len__():
1246 def __len__():
1247 """Obtain the number of revisions stored for this manifest."""
1247 """Obtain the number of revisions stored for this manifest."""
1248
1248
1249 def __iter__():
1249 def __iter__():
1250 """Iterate over revision numbers for this manifest."""
1250 """Iterate over revision numbers for this manifest."""
1251
1251
1252 def rev(node):
1252 def rev(node):
1253 """Obtain the revision number given a binary node.
1253 """Obtain the revision number given a binary node.
1254
1254
1255 Raises ``error.LookupError`` if the node is not known.
1255 Raises ``error.LookupError`` if the node is not known.
1256 """
1256 """
1257
1257
1258 def node(rev):
1258 def node(rev):
1259 """Obtain the node value given a revision number.
1259 """Obtain the node value given a revision number.
1260
1260
1261 Raises ``error.LookupError`` if the revision is not known.
1261 Raises ``error.LookupError`` if the revision is not known.
1262 """
1262 """
1263
1263
1264 def lookup(value):
1264 def lookup(value):
1265 """Attempt to resolve a value to a node.
1265 """Attempt to resolve a value to a node.
1266
1266
1267 Value can be a binary node, hex node, revision number, or a bytes
1267 Value can be a binary node, hex node, revision number, or a bytes
1268 that can be converted to an integer.
1268 that can be converted to an integer.
1269
1269
1270 Raises ``error.LookupError`` if a ndoe could not be resolved.
1270 Raises ``error.LookupError`` if a ndoe could not be resolved.
1271 """
1271 """
1272
1272
1273 def parents(node):
1273 def parents(node):
1274 """Returns a 2-tuple of parent nodes for a node.
1274 """Returns a 2-tuple of parent nodes for a node.
1275
1275
1276 Values will be ``nullid`` if the parent is empty.
1276 Values will be ``nullid`` if the parent is empty.
1277 """
1277 """
1278
1278
1279 def parentrevs(rev):
1279 def parentrevs(rev):
1280 """Like parents() but operates on revision numbers."""
1280 """Like parents() but operates on revision numbers."""
1281
1281
1282 def linkrev(rev):
1282 def linkrev(rev):
1283 """Obtain the changeset revision number a revision is linked to."""
1283 """Obtain the changeset revision number a revision is linked to."""
1284
1284
1285 def revision(node, _df=None):
1285 def revision(node):
1286 """Obtain fulltext data for a node."""
1286 """Obtain fulltext data for a node."""
1287
1287
1288 def rawdata(node, _df=None):
1288 def rawdata(node, _df=None):
1289 """Obtain raw data for a node."""
1289 """Obtain raw data for a node."""
1290
1290
1291 def revdiff(rev1, rev2):
1291 def revdiff(rev1, rev2):
1292 """Obtain a delta between two revision numbers.
1292 """Obtain a delta between two revision numbers.
1293
1293
1294 The returned data is the result of ``bdiff.bdiff()`` on the raw
1294 The returned data is the result of ``bdiff.bdiff()`` on the raw
1295 revision data.
1295 revision data.
1296 """
1296 """
1297
1297
1298 def cmp(node, fulltext):
1298 def cmp(node, fulltext):
1299 """Compare fulltext to another revision.
1299 """Compare fulltext to another revision.
1300
1300
1301 Returns True if the fulltext is different from what is stored.
1301 Returns True if the fulltext is different from what is stored.
1302 """
1302 """
1303
1303
1304 def emitrevisions(
1304 def emitrevisions(
1305 nodes,
1305 nodes,
1306 nodesorder=None,
1306 nodesorder=None,
1307 revisiondata=False,
1307 revisiondata=False,
1308 assumehaveparentrevisions=False,
1308 assumehaveparentrevisions=False,
1309 ):
1309 ):
1310 """Produce ``irevisiondelta`` describing revisions.
1310 """Produce ``irevisiondelta`` describing revisions.
1311
1311
1312 See the documentation for ``ifiledata`` for more.
1312 See the documentation for ``ifiledata`` for more.
1313 """
1313 """
1314
1314
1315 def addgroup(
1315 def addgroup(
1316 deltas,
1316 deltas,
1317 linkmapper,
1317 linkmapper,
1318 transaction,
1318 transaction,
1319 addrevisioncb=None,
1319 addrevisioncb=None,
1320 duplicaterevisioncb=None,
1320 duplicaterevisioncb=None,
1321 ):
1321 ):
1322 """Process a series of deltas for storage.
1322 """Process a series of deltas for storage.
1323
1323
1324 See the documentation in ``ifilemutation`` for more.
1324 See the documentation in ``ifilemutation`` for more.
1325 """
1325 """
1326
1326
1327 def rawsize(rev):
1327 def rawsize(rev):
1328 """Obtain the size of tracked data.
1328 """Obtain the size of tracked data.
1329
1329
1330 Is equivalent to ``len(m.rawdata(node))``.
1330 Is equivalent to ``len(m.rawdata(node))``.
1331
1331
1332 TODO this method is only used by upgrade code and may be removed.
1332 TODO this method is only used by upgrade code and may be removed.
1333 """
1333 """
1334
1334
1335 def getstrippoint(minlink):
1335 def getstrippoint(minlink):
1336 """Find minimum revision that must be stripped to strip a linkrev.
1336 """Find minimum revision that must be stripped to strip a linkrev.
1337
1337
1338 See the documentation in ``ifilemutation`` for more.
1338 See the documentation in ``ifilemutation`` for more.
1339 """
1339 """
1340
1340
1341 def strip(minlink, transaction):
1341 def strip(minlink, transaction):
1342 """Remove storage of items starting at a linkrev.
1342 """Remove storage of items starting at a linkrev.
1343
1343
1344 See the documentation in ``ifilemutation`` for more.
1344 See the documentation in ``ifilemutation`` for more.
1345 """
1345 """
1346
1346
1347 def checksize():
1347 def checksize():
1348 """Obtain the expected sizes of backing files.
1348 """Obtain the expected sizes of backing files.
1349
1349
1350 TODO this is used by verify and it should not be part of the interface.
1350 TODO this is used by verify and it should not be part of the interface.
1351 """
1351 """
1352
1352
1353 def files():
1353 def files():
1354 """Obtain paths that are backing storage for this manifest.
1354 """Obtain paths that are backing storage for this manifest.
1355
1355
1356 TODO this is used by verify and there should probably be a better API
1356 TODO this is used by verify and there should probably be a better API
1357 for this functionality.
1357 for this functionality.
1358 """
1358 """
1359
1359
1360 def deltaparent(rev):
1360 def deltaparent(rev):
1361 """Obtain the revision that a revision is delta'd against.
1361 """Obtain the revision that a revision is delta'd against.
1362
1362
1363 TODO delta encoding is an implementation detail of storage and should
1363 TODO delta encoding is an implementation detail of storage and should
1364 not be exposed to the storage interface.
1364 not be exposed to the storage interface.
1365 """
1365 """
1366
1366
1367 def clone(tr, dest, **kwargs):
1367 def clone(tr, dest, **kwargs):
1368 """Clone this instance to another."""
1368 """Clone this instance to another."""
1369
1369
1370 def clearcaches(clear_persisted_data=False):
1370 def clearcaches(clear_persisted_data=False):
1371 """Clear any caches associated with this instance."""
1371 """Clear any caches associated with this instance."""
1372
1372
1373 def dirlog(d):
1373 def dirlog(d):
1374 """Obtain a manifest storage instance for a tree."""
1374 """Obtain a manifest storage instance for a tree."""
1375
1375
1376 def add(
1376 def add(
1377 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1377 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1378 ):
1378 ):
1379 """Add a revision to storage.
1379 """Add a revision to storage.
1380
1380
1381 ``m`` is an object conforming to ``imanifestdict``.
1381 ``m`` is an object conforming to ``imanifestdict``.
1382
1382
1383 ``link`` is the linkrev revision number.
1383 ``link`` is the linkrev revision number.
1384
1384
1385 ``p1`` and ``p2`` are the parent revision numbers.
1385 ``p1`` and ``p2`` are the parent revision numbers.
1386
1386
1387 ``added`` and ``removed`` are iterables of added and removed paths,
1387 ``added`` and ``removed`` are iterables of added and removed paths,
1388 respectively.
1388 respectively.
1389
1389
1390 ``readtree`` is a function that can be used to read the child tree(s)
1390 ``readtree`` is a function that can be used to read the child tree(s)
1391 when recursively writing the full tree structure when using
1391 when recursively writing the full tree structure when using
1392 treemanifets.
1392 treemanifets.
1393
1393
1394 ``match`` is a matcher that can be used to hint to storage that not all
1394 ``match`` is a matcher that can be used to hint to storage that not all
1395 paths must be inspected; this is an optimization and can be safely
1395 paths must be inspected; this is an optimization and can be safely
1396 ignored. Note that the storage must still be able to reproduce a full
1396 ignored. Note that the storage must still be able to reproduce a full
1397 manifest including files that did not match.
1397 manifest including files that did not match.
1398 """
1398 """
1399
1399
1400 def storageinfo(
1400 def storageinfo(
1401 exclusivefiles=False,
1401 exclusivefiles=False,
1402 sharedfiles=False,
1402 sharedfiles=False,
1403 revisionscount=False,
1403 revisionscount=False,
1404 trackedsize=False,
1404 trackedsize=False,
1405 storedsize=False,
1405 storedsize=False,
1406 ):
1406 ):
1407 """Obtain information about storage for this manifest's data.
1407 """Obtain information about storage for this manifest's data.
1408
1408
1409 See ``ifilestorage.storageinfo()`` for a description of this method.
1409 See ``ifilestorage.storageinfo()`` for a description of this method.
1410 This one behaves the same way, except for manifest data.
1410 This one behaves the same way, except for manifest data.
1411 """
1411 """
1412
1412
1413 def get_revlog():
1413 def get_revlog():
1414 """return an actual revlog instance if any
1414 """return an actual revlog instance if any
1415
1415
1416 This exist because a lot of code leverage the fact the underlying
1416 This exist because a lot of code leverage the fact the underlying
1417 storage is a revlog for optimization, so giving simple way to access
1417 storage is a revlog for optimization, so giving simple way to access
1418 the revlog instance helps such code.
1418 the revlog instance helps such code.
1419 """
1419 """
1420
1420
1421
1421
1422 class imanifestlog(interfaceutil.Interface):
1422 class imanifestlog(interfaceutil.Interface):
1423 """Interface representing a collection of manifest snapshots.
1423 """Interface representing a collection of manifest snapshots.
1424
1424
1425 Represents the root manifest in a repository.
1425 Represents the root manifest in a repository.
1426
1426
1427 Also serves as a means to access nested tree manifests and to cache
1427 Also serves as a means to access nested tree manifests and to cache
1428 tree manifests.
1428 tree manifests.
1429 """
1429 """
1430
1430
1431 nodeconstants = interfaceutil.Attribute(
1431 nodeconstants = interfaceutil.Attribute(
1432 """nodeconstants used by the current repository."""
1432 """nodeconstants used by the current repository."""
1433 )
1433 )
1434
1434
1435 def __getitem__(node):
1435 def __getitem__(node):
1436 """Obtain a manifest instance for a given binary node.
1436 """Obtain a manifest instance for a given binary node.
1437
1437
1438 Equivalent to calling ``self.get('', node)``.
1438 Equivalent to calling ``self.get('', node)``.
1439
1439
1440 The returned object conforms to the ``imanifestrevisionstored``
1440 The returned object conforms to the ``imanifestrevisionstored``
1441 interface.
1441 interface.
1442 """
1442 """
1443
1443
1444 def get(tree, node, verify=True):
1444 def get(tree, node, verify=True):
1445 """Retrieve the manifest instance for a given directory and binary node.
1445 """Retrieve the manifest instance for a given directory and binary node.
1446
1446
1447 ``node`` always refers to the node of the root manifest (which will be
1447 ``node`` always refers to the node of the root manifest (which will be
1448 the only manifest if flat manifests are being used).
1448 the only manifest if flat manifests are being used).
1449
1449
1450 If ``tree`` is the empty string, the root manifest is returned.
1450 If ``tree`` is the empty string, the root manifest is returned.
1451 Otherwise the manifest for the specified directory will be returned
1451 Otherwise the manifest for the specified directory will be returned
1452 (requires tree manifests).
1452 (requires tree manifests).
1453
1453
1454 If ``verify`` is True, ``LookupError`` is raised if the node is not
1454 If ``verify`` is True, ``LookupError`` is raised if the node is not
1455 known.
1455 known.
1456
1456
1457 The returned object conforms to the ``imanifestrevisionstored``
1457 The returned object conforms to the ``imanifestrevisionstored``
1458 interface.
1458 interface.
1459 """
1459 """
1460
1460
1461 def getstorage(tree):
1461 def getstorage(tree):
1462 """Retrieve an interface to storage for a particular tree.
1462 """Retrieve an interface to storage for a particular tree.
1463
1463
1464 If ``tree`` is the empty bytestring, storage for the root manifest will
1464 If ``tree`` is the empty bytestring, storage for the root manifest will
1465 be returned. Otherwise storage for a tree manifest is returned.
1465 be returned. Otherwise storage for a tree manifest is returned.
1466
1466
1467 TODO formalize interface for returned object.
1467 TODO formalize interface for returned object.
1468 """
1468 """
1469
1469
1470 def clearcaches():
1470 def clearcaches():
1471 """Clear caches associated with this collection."""
1471 """Clear caches associated with this collection."""
1472
1472
1473 def rev(node):
1473 def rev(node):
1474 """Obtain the revision number for a binary node.
1474 """Obtain the revision number for a binary node.
1475
1475
1476 Raises ``error.LookupError`` if the node is not known.
1476 Raises ``error.LookupError`` if the node is not known.
1477 """
1477 """
1478
1478
1479 def update_caches(transaction):
1479 def update_caches(transaction):
1480 """update whatever cache are relevant for the used storage."""
1480 """update whatever cache are relevant for the used storage."""
1481
1481
1482
1482
1483 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1483 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1484 """Local repository sub-interface providing access to tracked file storage.
1484 """Local repository sub-interface providing access to tracked file storage.
1485
1485
1486 This interface defines how a repository accesses storage for a single
1486 This interface defines how a repository accesses storage for a single
1487 tracked file path.
1487 tracked file path.
1488 """
1488 """
1489
1489
1490 def file(f):
1490 def file(f):
1491 """Obtain a filelog for a tracked path.
1491 """Obtain a filelog for a tracked path.
1492
1492
1493 The returned type conforms to the ``ifilestorage`` interface.
1493 The returned type conforms to the ``ifilestorage`` interface.
1494 """
1494 """
1495
1495
1496
1496
1497 class ilocalrepositorymain(interfaceutil.Interface):
1497 class ilocalrepositorymain(interfaceutil.Interface):
1498 """Main interface for local repositories.
1498 """Main interface for local repositories.
1499
1499
1500 This currently captures the reality of things - not how things should be.
1500 This currently captures the reality of things - not how things should be.
1501 """
1501 """
1502
1502
1503 nodeconstants = interfaceutil.Attribute(
1503 nodeconstants = interfaceutil.Attribute(
1504 """Constant nodes matching the hash function used by the repository."""
1504 """Constant nodes matching the hash function used by the repository."""
1505 )
1505 )
1506 nullid = interfaceutil.Attribute(
1506 nullid = interfaceutil.Attribute(
1507 """null revision for the hash function used by the repository."""
1507 """null revision for the hash function used by the repository."""
1508 )
1508 )
1509
1509
1510 supported = interfaceutil.Attribute(
1510 supported = interfaceutil.Attribute(
1511 """Set of requirements that this repo is capable of opening."""
1511 """Set of requirements that this repo is capable of opening."""
1512 )
1512 )
1513
1513
1514 requirements = interfaceutil.Attribute(
1514 requirements = interfaceutil.Attribute(
1515 """Set of requirements this repo uses."""
1515 """Set of requirements this repo uses."""
1516 )
1516 )
1517
1517
1518 features = interfaceutil.Attribute(
1518 features = interfaceutil.Attribute(
1519 """Set of "features" this repository supports.
1519 """Set of "features" this repository supports.
1520
1520
1521 A "feature" is a loosely-defined term. It can refer to a feature
1521 A "feature" is a loosely-defined term. It can refer to a feature
1522 in the classical sense or can describe an implementation detail
1522 in the classical sense or can describe an implementation detail
1523 of the repository. For example, a ``readonly`` feature may denote
1523 of the repository. For example, a ``readonly`` feature may denote
1524 the repository as read-only. Or a ``revlogfilestore`` feature may
1524 the repository as read-only. Or a ``revlogfilestore`` feature may
1525 denote that the repository is using revlogs for file storage.
1525 denote that the repository is using revlogs for file storage.
1526
1526
1527 The intent of features is to provide a machine-queryable mechanism
1527 The intent of features is to provide a machine-queryable mechanism
1528 for repo consumers to test for various repository characteristics.
1528 for repo consumers to test for various repository characteristics.
1529
1529
1530 Features are similar to ``requirements``. The main difference is that
1530 Features are similar to ``requirements``. The main difference is that
1531 requirements are stored on-disk and represent requirements to open the
1531 requirements are stored on-disk and represent requirements to open the
1532 repository. Features are more run-time capabilities of the repository
1532 repository. Features are more run-time capabilities of the repository
1533 and more granular capabilities (which may be derived from requirements).
1533 and more granular capabilities (which may be derived from requirements).
1534 """
1534 """
1535 )
1535 )
1536
1536
1537 filtername = interfaceutil.Attribute(
1537 filtername = interfaceutil.Attribute(
1538 """Name of the repoview that is active on this repo."""
1538 """Name of the repoview that is active on this repo."""
1539 )
1539 )
1540
1540
1541 vfs_map = interfaceutil.Attribute(
1541 vfs_map = interfaceutil.Attribute(
1542 """a bytes-key β†’ vfs mapping used by transaction and others"""
1542 """a bytes-key β†’ vfs mapping used by transaction and others"""
1543 )
1543 )
1544
1544
1545 wvfs = interfaceutil.Attribute(
1545 wvfs = interfaceutil.Attribute(
1546 """VFS used to access the working directory."""
1546 """VFS used to access the working directory."""
1547 )
1547 )
1548
1548
1549 vfs = interfaceutil.Attribute(
1549 vfs = interfaceutil.Attribute(
1550 """VFS rooted at the .hg directory.
1550 """VFS rooted at the .hg directory.
1551
1551
1552 Used to access repository data not in the store.
1552 Used to access repository data not in the store.
1553 """
1553 """
1554 )
1554 )
1555
1555
1556 svfs = interfaceutil.Attribute(
1556 svfs = interfaceutil.Attribute(
1557 """VFS rooted at the store.
1557 """VFS rooted at the store.
1558
1558
1559 Used to access repository data in the store. Typically .hg/store.
1559 Used to access repository data in the store. Typically .hg/store.
1560 But can point elsewhere if the store is shared.
1560 But can point elsewhere if the store is shared.
1561 """
1561 """
1562 )
1562 )
1563
1563
1564 root = interfaceutil.Attribute(
1564 root = interfaceutil.Attribute(
1565 """Path to the root of the working directory."""
1565 """Path to the root of the working directory."""
1566 )
1566 )
1567
1567
1568 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1568 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1569
1569
1570 origroot = interfaceutil.Attribute(
1570 origroot = interfaceutil.Attribute(
1571 """The filesystem path that was used to construct the repo."""
1571 """The filesystem path that was used to construct the repo."""
1572 )
1572 )
1573
1573
1574 auditor = interfaceutil.Attribute(
1574 auditor = interfaceutil.Attribute(
1575 """A pathauditor for the working directory.
1575 """A pathauditor for the working directory.
1576
1576
1577 This checks if a path refers to a nested repository.
1577 This checks if a path refers to a nested repository.
1578
1578
1579 Operates on the filesystem.
1579 Operates on the filesystem.
1580 """
1580 """
1581 )
1581 )
1582
1582
1583 nofsauditor = interfaceutil.Attribute(
1583 nofsauditor = interfaceutil.Attribute(
1584 """A pathauditor for the working directory.
1584 """A pathauditor for the working directory.
1585
1585
1586 This is like ``auditor`` except it doesn't do filesystem checks.
1586 This is like ``auditor`` except it doesn't do filesystem checks.
1587 """
1587 """
1588 )
1588 )
1589
1589
1590 baseui = interfaceutil.Attribute(
1590 baseui = interfaceutil.Attribute(
1591 """Original ui instance passed into constructor."""
1591 """Original ui instance passed into constructor."""
1592 )
1592 )
1593
1593
1594 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1594 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1595
1595
1596 sharedpath = interfaceutil.Attribute(
1596 sharedpath = interfaceutil.Attribute(
1597 """Path to the .hg directory of the repo this repo was shared from."""
1597 """Path to the .hg directory of the repo this repo was shared from."""
1598 )
1598 )
1599
1599
1600 store = interfaceutil.Attribute("""A store instance.""")
1600 store = interfaceutil.Attribute("""A store instance.""")
1601
1601
1602 spath = interfaceutil.Attribute("""Path to the store.""")
1602 spath = interfaceutil.Attribute("""Path to the store.""")
1603
1603
1604 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1604 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1605
1605
1606 cachevfs = interfaceutil.Attribute(
1606 cachevfs = interfaceutil.Attribute(
1607 """A VFS used to access the cache directory.
1607 """A VFS used to access the cache directory.
1608
1608
1609 Typically .hg/cache.
1609 Typically .hg/cache.
1610 """
1610 """
1611 )
1611 )
1612
1612
1613 wcachevfs = interfaceutil.Attribute(
1613 wcachevfs = interfaceutil.Attribute(
1614 """A VFS used to access the cache directory dedicated to working copy
1614 """A VFS used to access the cache directory dedicated to working copy
1615
1615
1616 Typically .hg/wcache.
1616 Typically .hg/wcache.
1617 """
1617 """
1618 )
1618 )
1619
1619
1620 filteredrevcache = interfaceutil.Attribute(
1620 filteredrevcache = interfaceutil.Attribute(
1621 """Holds sets of revisions to be filtered."""
1621 """Holds sets of revisions to be filtered."""
1622 )
1622 )
1623
1623
1624 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1624 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1625
1625
1626 filecopiesmode = interfaceutil.Attribute(
1626 filecopiesmode = interfaceutil.Attribute(
1627 """The way files copies should be dealt with in this repo."""
1627 """The way files copies should be dealt with in this repo."""
1628 )
1628 )
1629
1629
1630 def close():
1630 def close():
1631 """Close the handle on this repository."""
1631 """Close the handle on this repository."""
1632
1632
1633 def peer(path=None):
1633 def peer(path=None):
1634 """Obtain an object conforming to the ``peer`` interface."""
1634 """Obtain an object conforming to the ``peer`` interface."""
1635
1635
1636 def unfiltered():
1636 def unfiltered():
1637 """Obtain an unfiltered/raw view of this repo."""
1637 """Obtain an unfiltered/raw view of this repo."""
1638
1638
1639 def filtered(name, visibilityexceptions=None):
1639 def filtered(name, visibilityexceptions=None):
1640 """Obtain a named view of this repository."""
1640 """Obtain a named view of this repository."""
1641
1641
1642 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1642 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1643
1643
1644 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1644 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1645
1645
1646 manifestlog = interfaceutil.Attribute(
1646 manifestlog = interfaceutil.Attribute(
1647 """An instance conforming to the ``imanifestlog`` interface.
1647 """An instance conforming to the ``imanifestlog`` interface.
1648
1648
1649 Provides access to manifests for the repository.
1649 Provides access to manifests for the repository.
1650 """
1650 """
1651 )
1651 )
1652
1652
1653 dirstate = interfaceutil.Attribute("""Working directory state.""")
1653 dirstate = interfaceutil.Attribute("""Working directory state.""")
1654
1654
1655 narrowpats = interfaceutil.Attribute(
1655 narrowpats = interfaceutil.Attribute(
1656 """Matcher patterns for this repository's narrowspec."""
1656 """Matcher patterns for this repository's narrowspec."""
1657 )
1657 )
1658
1658
1659 def narrowmatch(match=None, includeexact=False):
1659 def narrowmatch(match=None, includeexact=False):
1660 """Obtain a matcher for the narrowspec."""
1660 """Obtain a matcher for the narrowspec."""
1661
1661
1662 def setnarrowpats(newincludes, newexcludes):
1662 def setnarrowpats(newincludes, newexcludes):
1663 """Define the narrowspec for this repository."""
1663 """Define the narrowspec for this repository."""
1664
1664
1665 def __getitem__(changeid):
1665 def __getitem__(changeid):
1666 """Try to resolve a changectx."""
1666 """Try to resolve a changectx."""
1667
1667
1668 def __contains__(changeid):
1668 def __contains__(changeid):
1669 """Whether a changeset exists."""
1669 """Whether a changeset exists."""
1670
1670
1671 def __nonzero__():
1671 def __nonzero__():
1672 """Always returns True."""
1672 """Always returns True."""
1673 return True
1673 return True
1674
1674
1675 __bool__ = __nonzero__
1675 __bool__ = __nonzero__
1676
1676
1677 def __len__():
1677 def __len__():
1678 """Returns the number of changesets in the repo."""
1678 """Returns the number of changesets in the repo."""
1679
1679
1680 def __iter__():
1680 def __iter__():
1681 """Iterate over revisions in the changelog."""
1681 """Iterate over revisions in the changelog."""
1682
1682
1683 def revs(expr, *args):
1683 def revs(expr, *args):
1684 """Evaluate a revset.
1684 """Evaluate a revset.
1685
1685
1686 Emits revisions.
1686 Emits revisions.
1687 """
1687 """
1688
1688
1689 def set(expr, *args):
1689 def set(expr, *args):
1690 """Evaluate a revset.
1690 """Evaluate a revset.
1691
1691
1692 Emits changectx instances.
1692 Emits changectx instances.
1693 """
1693 """
1694
1694
1695 def anyrevs(specs, user=False, localalias=None):
1695 def anyrevs(specs, user=False, localalias=None):
1696 """Find revisions matching one of the given revsets."""
1696 """Find revisions matching one of the given revsets."""
1697
1697
1698 def url():
1698 def url():
1699 """Returns a string representing the location of this repo."""
1699 """Returns a string representing the location of this repo."""
1700
1700
1701 def hook(name, throw=False, **args):
1701 def hook(name, throw=False, **args):
1702 """Call a hook."""
1702 """Call a hook."""
1703
1703
1704 def tags():
1704 def tags():
1705 """Return a mapping of tag to node."""
1705 """Return a mapping of tag to node."""
1706
1706
1707 def tagtype(tagname):
1707 def tagtype(tagname):
1708 """Return the type of a given tag."""
1708 """Return the type of a given tag."""
1709
1709
1710 def tagslist():
1710 def tagslist():
1711 """Return a list of tags ordered by revision."""
1711 """Return a list of tags ordered by revision."""
1712
1712
1713 def nodetags(node):
1713 def nodetags(node):
1714 """Return the tags associated with a node."""
1714 """Return the tags associated with a node."""
1715
1715
1716 def nodebookmarks(node):
1716 def nodebookmarks(node):
1717 """Return the list of bookmarks pointing to the specified node."""
1717 """Return the list of bookmarks pointing to the specified node."""
1718
1718
1719 def branchmap():
1719 def branchmap():
1720 """Return a mapping of branch to heads in that branch."""
1720 """Return a mapping of branch to heads in that branch."""
1721
1721
1722 def revbranchcache():
1722 def revbranchcache():
1723 pass
1723 pass
1724
1724
1725 def register_changeset(rev, changelogrevision):
1725 def register_changeset(rev, changelogrevision):
1726 """Extension point for caches for new nodes.
1726 """Extension point for caches for new nodes.
1727
1727
1728 Multiple consumers are expected to need parts of the changelogrevision,
1728 Multiple consumers are expected to need parts of the changelogrevision,
1729 so it is provided as optimization to avoid duplicate lookups. A simple
1729 so it is provided as optimization to avoid duplicate lookups. A simple
1730 cache would be fragile when other revisions are accessed, too."""
1730 cache would be fragile when other revisions are accessed, too."""
1731 pass
1731 pass
1732
1732
1733 def branchtip(branchtip, ignoremissing=False):
1733 def branchtip(branchtip, ignoremissing=False):
1734 """Return the tip node for a given branch."""
1734 """Return the tip node for a given branch."""
1735
1735
1736 def lookup(key):
1736 def lookup(key):
1737 """Resolve the node for a revision."""
1737 """Resolve the node for a revision."""
1738
1738
1739 def lookupbranch(key):
1739 def lookupbranch(key):
1740 """Look up the branch name of the given revision or branch name."""
1740 """Look up the branch name of the given revision or branch name."""
1741
1741
1742 def known(nodes):
1742 def known(nodes):
1743 """Determine whether a series of nodes is known.
1743 """Determine whether a series of nodes is known.
1744
1744
1745 Returns a list of bools.
1745 Returns a list of bools.
1746 """
1746 """
1747
1747
1748 def local():
1748 def local():
1749 """Whether the repository is local."""
1749 """Whether the repository is local."""
1750 return True
1750 return True
1751
1751
1752 def publishing():
1752 def publishing():
1753 """Whether the repository is a publishing repository."""
1753 """Whether the repository is a publishing repository."""
1754
1754
1755 def cancopy():
1755 def cancopy():
1756 pass
1756 pass
1757
1757
1758 def shared():
1758 def shared():
1759 """The type of shared repository or None."""
1759 """The type of shared repository or None."""
1760
1760
1761 def wjoin(f, *insidef):
1761 def wjoin(f, *insidef):
1762 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1762 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1763
1763
1764 def setparents(p1, p2):
1764 def setparents(p1, p2):
1765 """Set the parent nodes of the working directory."""
1765 """Set the parent nodes of the working directory."""
1766
1766
1767 def filectx(path, changeid=None, fileid=None):
1767 def filectx(path, changeid=None, fileid=None):
1768 """Obtain a filectx for the given file revision."""
1768 """Obtain a filectx for the given file revision."""
1769
1769
1770 def getcwd():
1770 def getcwd():
1771 """Obtain the current working directory from the dirstate."""
1771 """Obtain the current working directory from the dirstate."""
1772
1772
1773 def pathto(f, cwd=None):
1773 def pathto(f, cwd=None):
1774 """Obtain the relative path to a file."""
1774 """Obtain the relative path to a file."""
1775
1775
1776 def adddatafilter(name, fltr):
1776 def adddatafilter(name, fltr):
1777 pass
1777 pass
1778
1778
1779 def wread(filename):
1779 def wread(filename):
1780 """Read a file from wvfs, using data filters."""
1780 """Read a file from wvfs, using data filters."""
1781
1781
1782 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1782 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1783 """Write data to a file in the wvfs, using data filters."""
1783 """Write data to a file in the wvfs, using data filters."""
1784
1784
1785 def wwritedata(filename, data):
1785 def wwritedata(filename, data):
1786 """Resolve data for writing to the wvfs, using data filters."""
1786 """Resolve data for writing to the wvfs, using data filters."""
1787
1787
1788 def currenttransaction():
1788 def currenttransaction():
1789 """Obtain the current transaction instance or None."""
1789 """Obtain the current transaction instance or None."""
1790
1790
1791 def transaction(desc, report=None):
1791 def transaction(desc, report=None):
1792 """Open a new transaction to write to the repository."""
1792 """Open a new transaction to write to the repository."""
1793
1793
1794 def undofiles():
1794 def undofiles():
1795 """Returns a list of (vfs, path) for files to undo transactions."""
1795 """Returns a list of (vfs, path) for files to undo transactions."""
1796
1796
1797 def recover():
1797 def recover():
1798 """Roll back an interrupted transaction."""
1798 """Roll back an interrupted transaction."""
1799
1799
1800 def rollback(dryrun=False, force=False):
1800 def rollback(dryrun=False, force=False):
1801 """Undo the last transaction.
1801 """Undo the last transaction.
1802
1802
1803 DANGEROUS.
1803 DANGEROUS.
1804 """
1804 """
1805
1805
1806 def updatecaches(tr=None, full=False, caches=None):
1806 def updatecaches(tr=None, full=False, caches=None):
1807 """Warm repo caches."""
1807 """Warm repo caches."""
1808
1808
1809 def invalidatecaches():
1809 def invalidatecaches():
1810 """Invalidate cached data due to the repository mutating."""
1810 """Invalidate cached data due to the repository mutating."""
1811
1811
1812 def invalidatevolatilesets():
1812 def invalidatevolatilesets():
1813 pass
1813 pass
1814
1814
1815 def invalidatedirstate():
1815 def invalidatedirstate():
1816 """Invalidate the dirstate."""
1816 """Invalidate the dirstate."""
1817
1817
1818 def invalidate(clearfilecache=False):
1818 def invalidate(clearfilecache=False):
1819 pass
1819 pass
1820
1820
1821 def invalidateall():
1821 def invalidateall():
1822 pass
1822 pass
1823
1823
1824 def lock(wait=True):
1824 def lock(wait=True):
1825 """Lock the repository store and return a lock instance."""
1825 """Lock the repository store and return a lock instance."""
1826
1826
1827 def currentlock():
1827 def currentlock():
1828 """Return the lock if it's held or None."""
1828 """Return the lock if it's held or None."""
1829
1829
1830 def wlock(wait=True):
1830 def wlock(wait=True):
1831 """Lock the non-store parts of the repository."""
1831 """Lock the non-store parts of the repository."""
1832
1832
1833 def currentwlock():
1833 def currentwlock():
1834 """Return the wlock if it's held or None."""
1834 """Return the wlock if it's held or None."""
1835
1835
1836 def checkcommitpatterns(wctx, match, status, fail):
1836 def checkcommitpatterns(wctx, match, status, fail):
1837 pass
1837 pass
1838
1838
1839 def commit(
1839 def commit(
1840 text=b'',
1840 text=b'',
1841 user=None,
1841 user=None,
1842 date=None,
1842 date=None,
1843 match=None,
1843 match=None,
1844 force=False,
1844 force=False,
1845 editor=False,
1845 editor=False,
1846 extra=None,
1846 extra=None,
1847 ):
1847 ):
1848 """Add a new revision to the repository."""
1848 """Add a new revision to the repository."""
1849
1849
1850 def commitctx(ctx, error=False, origctx=None):
1850 def commitctx(ctx, error=False, origctx=None):
1851 """Commit a commitctx instance to the repository."""
1851 """Commit a commitctx instance to the repository."""
1852
1852
1853 def destroying():
1853 def destroying():
1854 """Inform the repository that nodes are about to be destroyed."""
1854 """Inform the repository that nodes are about to be destroyed."""
1855
1855
1856 def destroyed():
1856 def destroyed():
1857 """Inform the repository that nodes have been destroyed."""
1857 """Inform the repository that nodes have been destroyed."""
1858
1858
1859 def status(
1859 def status(
1860 node1=b'.',
1860 node1=b'.',
1861 node2=None,
1861 node2=None,
1862 match=None,
1862 match=None,
1863 ignored=False,
1863 ignored=False,
1864 clean=False,
1864 clean=False,
1865 unknown=False,
1865 unknown=False,
1866 listsubrepos=False,
1866 listsubrepos=False,
1867 ):
1867 ):
1868 """Convenience method to call repo[x].status()."""
1868 """Convenience method to call repo[x].status()."""
1869
1869
1870 def addpostdsstatus(ps):
1870 def addpostdsstatus(ps):
1871 pass
1871 pass
1872
1872
1873 def postdsstatus():
1873 def postdsstatus():
1874 pass
1874 pass
1875
1875
1876 def clearpostdsstatus():
1876 def clearpostdsstatus():
1877 pass
1877 pass
1878
1878
1879 def heads(start=None):
1879 def heads(start=None):
1880 """Obtain list of nodes that are DAG heads."""
1880 """Obtain list of nodes that are DAG heads."""
1881
1881
1882 def branchheads(branch=None, start=None, closed=False):
1882 def branchheads(branch=None, start=None, closed=False):
1883 pass
1883 pass
1884
1884
1885 def branches(nodes):
1885 def branches(nodes):
1886 pass
1886 pass
1887
1887
1888 def between(pairs):
1888 def between(pairs):
1889 pass
1889 pass
1890
1890
1891 def checkpush(pushop):
1891 def checkpush(pushop):
1892 pass
1892 pass
1893
1893
1894 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1894 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1895
1895
1896 def pushkey(namespace, key, old, new):
1896 def pushkey(namespace, key, old, new):
1897 pass
1897 pass
1898
1898
1899 def listkeys(namespace):
1899 def listkeys(namespace):
1900 pass
1900 pass
1901
1901
1902 def debugwireargs(one, two, three=None, four=None, five=None):
1902 def debugwireargs(one, two, three=None, four=None, five=None):
1903 pass
1903 pass
1904
1904
1905 def savecommitmessage(text):
1905 def savecommitmessage(text):
1906 pass
1906 pass
1907
1907
1908 def register_sidedata_computer(
1908 def register_sidedata_computer(
1909 kind, category, keys, computer, flags, replace=False
1909 kind, category, keys, computer, flags, replace=False
1910 ):
1910 ):
1911 pass
1911 pass
1912
1912
1913 def register_wanted_sidedata(category):
1913 def register_wanted_sidedata(category):
1914 pass
1914 pass
1915
1915
1916
1916
1917 class completelocalrepository(
1917 class completelocalrepository(
1918 ilocalrepositorymain, ilocalrepositoryfilestorage
1918 ilocalrepositorymain, ilocalrepositoryfilestorage
1919 ):
1919 ):
1920 """Complete interface for a local repository."""
1920 """Complete interface for a local repository."""
1921
1921
1922
1922
1923 class iwireprotocolcommandcacher(interfaceutil.Interface):
1923 class iwireprotocolcommandcacher(interfaceutil.Interface):
1924 """Represents a caching backend for wire protocol commands.
1924 """Represents a caching backend for wire protocol commands.
1925
1925
1926 Wire protocol version 2 supports transparent caching of many commands.
1926 Wire protocol version 2 supports transparent caching of many commands.
1927 To leverage this caching, servers can activate objects that cache
1927 To leverage this caching, servers can activate objects that cache
1928 command responses. Objects handle both cache writing and reading.
1928 command responses. Objects handle both cache writing and reading.
1929 This interface defines how that response caching mechanism works.
1929 This interface defines how that response caching mechanism works.
1930
1930
1931 Wire protocol version 2 commands emit a series of objects that are
1931 Wire protocol version 2 commands emit a series of objects that are
1932 serialized and sent to the client. The caching layer exists between
1932 serialized and sent to the client. The caching layer exists between
1933 the invocation of the command function and the sending of its output
1933 the invocation of the command function and the sending of its output
1934 objects to an output layer.
1934 objects to an output layer.
1935
1935
1936 Instances of this interface represent a binding to a cache that
1936 Instances of this interface represent a binding to a cache that
1937 can serve a response (in place of calling a command function) and/or
1937 can serve a response (in place of calling a command function) and/or
1938 write responses to a cache for subsequent use.
1938 write responses to a cache for subsequent use.
1939
1939
1940 When a command request arrives, the following happens with regards
1940 When a command request arrives, the following happens with regards
1941 to this interface:
1941 to this interface:
1942
1942
1943 1. The server determines whether the command request is cacheable.
1943 1. The server determines whether the command request is cacheable.
1944 2. If it is, an instance of this interface is spawned.
1944 2. If it is, an instance of this interface is spawned.
1945 3. The cacher is activated in a context manager (``__enter__`` is called).
1945 3. The cacher is activated in a context manager (``__enter__`` is called).
1946 4. A cache *key* for that request is derived. This will call the
1946 4. A cache *key* for that request is derived. This will call the
1947 instance's ``adjustcachekeystate()`` method so the derivation
1947 instance's ``adjustcachekeystate()`` method so the derivation
1948 can be influenced.
1948 can be influenced.
1949 5. The cacher is informed of the derived cache key via a call to
1949 5. The cacher is informed of the derived cache key via a call to
1950 ``setcachekey()``.
1950 ``setcachekey()``.
1951 6. The cacher's ``lookup()`` method is called to test for presence of
1951 6. The cacher's ``lookup()`` method is called to test for presence of
1952 the derived key in the cache.
1952 the derived key in the cache.
1953 7. If ``lookup()`` returns a hit, that cached result is used in place
1953 7. If ``lookup()`` returns a hit, that cached result is used in place
1954 of invoking the command function. ``__exit__`` is called and the instance
1954 of invoking the command function. ``__exit__`` is called and the instance
1955 is discarded.
1955 is discarded.
1956 8. The command function is invoked.
1956 8. The command function is invoked.
1957 9. ``onobject()`` is called for each object emitted by the command
1957 9. ``onobject()`` is called for each object emitted by the command
1958 function.
1958 function.
1959 10. After the final object is seen, ``onfinished()`` is called.
1959 10. After the final object is seen, ``onfinished()`` is called.
1960 11. ``__exit__`` is called to signal the end of use of the instance.
1960 11. ``__exit__`` is called to signal the end of use of the instance.
1961
1961
1962 Cache *key* derivation can be influenced by the instance.
1962 Cache *key* derivation can be influenced by the instance.
1963
1963
1964 Cache keys are initially derived by a deterministic representation of
1964 Cache keys are initially derived by a deterministic representation of
1965 the command request. This includes the command name, arguments, protocol
1965 the command request. This includes the command name, arguments, protocol
1966 version, etc. This initial key derivation is performed by CBOR-encoding a
1966 version, etc. This initial key derivation is performed by CBOR-encoding a
1967 data structure and feeding that output into a hasher.
1967 data structure and feeding that output into a hasher.
1968
1968
1969 Instances of this interface can influence this initial key derivation
1969 Instances of this interface can influence this initial key derivation
1970 via ``adjustcachekeystate()``.
1970 via ``adjustcachekeystate()``.
1971
1971
1972 The instance is informed of the derived cache key via a call to
1972 The instance is informed of the derived cache key via a call to
1973 ``setcachekey()``. The instance must store the key locally so it can
1973 ``setcachekey()``. The instance must store the key locally so it can
1974 be consulted on subsequent operations that may require it.
1974 be consulted on subsequent operations that may require it.
1975
1975
1976 When constructed, the instance has access to a callable that can be used
1976 When constructed, the instance has access to a callable that can be used
1977 for encoding response objects. This callable receives as its single
1977 for encoding response objects. This callable receives as its single
1978 argument an object emitted by a command function. It returns an iterable
1978 argument an object emitted by a command function. It returns an iterable
1979 of bytes chunks representing the encoded object. Unless the cacher is
1979 of bytes chunks representing the encoded object. Unless the cacher is
1980 caching native Python objects in memory or has a way of reconstructing
1980 caching native Python objects in memory or has a way of reconstructing
1981 the original Python objects, implementations typically call this function
1981 the original Python objects, implementations typically call this function
1982 to produce bytes from the output objects and then store those bytes in
1982 to produce bytes from the output objects and then store those bytes in
1983 the cache. When it comes time to re-emit those bytes, they are wrapped
1983 the cache. When it comes time to re-emit those bytes, they are wrapped
1984 in a ``wireprototypes.encodedresponse`` instance to tell the output
1984 in a ``wireprototypes.encodedresponse`` instance to tell the output
1985 layer that they are pre-encoded.
1985 layer that they are pre-encoded.
1986
1986
1987 When receiving the objects emitted by the command function, instances
1987 When receiving the objects emitted by the command function, instances
1988 can choose what to do with those objects. The simplest thing to do is
1988 can choose what to do with those objects. The simplest thing to do is
1989 re-emit the original objects. They will be forwarded to the output
1989 re-emit the original objects. They will be forwarded to the output
1990 layer and will be processed as if the cacher did not exist.
1990 layer and will be processed as if the cacher did not exist.
1991
1991
1992 Implementations could also choose to not emit objects - instead locally
1992 Implementations could also choose to not emit objects - instead locally
1993 buffering objects or their encoded representation. They could then emit
1993 buffering objects or their encoded representation. They could then emit
1994 a single "coalesced" object when ``onfinished()`` is called. In
1994 a single "coalesced" object when ``onfinished()`` is called. In
1995 this way, the implementation would function as a filtering layer of
1995 this way, the implementation would function as a filtering layer of
1996 sorts.
1996 sorts.
1997
1997
1998 When caching objects, typically the encoded form of the object will
1998 When caching objects, typically the encoded form of the object will
1999 be stored. Keep in mind that if the original object is forwarded to
1999 be stored. Keep in mind that if the original object is forwarded to
2000 the output layer, it will need to be encoded there as well. For large
2000 the output layer, it will need to be encoded there as well. For large
2001 output, this redundant encoding could add overhead. Implementations
2001 output, this redundant encoding could add overhead. Implementations
2002 could wrap the encoded object data in ``wireprototypes.encodedresponse``
2002 could wrap the encoded object data in ``wireprototypes.encodedresponse``
2003 instances to avoid this overhead.
2003 instances to avoid this overhead.
2004 """
2004 """
2005
2005
2006 def __enter__():
2006 def __enter__():
2007 """Marks the instance as active.
2007 """Marks the instance as active.
2008
2008
2009 Should return self.
2009 Should return self.
2010 """
2010 """
2011
2011
2012 def __exit__(exctype, excvalue, exctb):
2012 def __exit__(exctype, excvalue, exctb):
2013 """Called when cacher is no longer used.
2013 """Called when cacher is no longer used.
2014
2014
2015 This can be used by implementations to perform cleanup actions (e.g.
2015 This can be used by implementations to perform cleanup actions (e.g.
2016 disconnecting network sockets, aborting a partially cached response.
2016 disconnecting network sockets, aborting a partially cached response.
2017 """
2017 """
2018
2018
2019 def adjustcachekeystate(state):
2019 def adjustcachekeystate(state):
2020 """Influences cache key derivation by adjusting state to derive key.
2020 """Influences cache key derivation by adjusting state to derive key.
2021
2021
2022 A dict defining the state used to derive the cache key is passed.
2022 A dict defining the state used to derive the cache key is passed.
2023
2023
2024 Implementations can modify this dict to record additional state that
2024 Implementations can modify this dict to record additional state that
2025 is wanted to influence key derivation.
2025 is wanted to influence key derivation.
2026
2026
2027 Implementations are *highly* encouraged to not modify or delete
2027 Implementations are *highly* encouraged to not modify or delete
2028 existing keys.
2028 existing keys.
2029 """
2029 """
2030
2030
2031 def setcachekey(key):
2031 def setcachekey(key):
2032 """Record the derived cache key for this request.
2032 """Record the derived cache key for this request.
2033
2033
2034 Instances may mutate the key for internal usage, as desired. e.g.
2034 Instances may mutate the key for internal usage, as desired. e.g.
2035 instances may wish to prepend the repo name, introduce path
2035 instances may wish to prepend the repo name, introduce path
2036 components for filesystem or URL addressing, etc. Behavior is up to
2036 components for filesystem or URL addressing, etc. Behavior is up to
2037 the cache.
2037 the cache.
2038
2038
2039 Returns a bool indicating if the request is cacheable by this
2039 Returns a bool indicating if the request is cacheable by this
2040 instance.
2040 instance.
2041 """
2041 """
2042
2042
2043 def lookup():
2043 def lookup():
2044 """Attempt to resolve an entry in the cache.
2044 """Attempt to resolve an entry in the cache.
2045
2045
2046 The instance is instructed to look for the cache key that it was
2046 The instance is instructed to look for the cache key that it was
2047 informed about via the call to ``setcachekey()``.
2047 informed about via the call to ``setcachekey()``.
2048
2048
2049 If there's no cache hit or the cacher doesn't wish to use the cached
2049 If there's no cache hit or the cacher doesn't wish to use the cached
2050 entry, ``None`` should be returned.
2050 entry, ``None`` should be returned.
2051
2051
2052 Else, a dict defining the cached result should be returned. The
2052 Else, a dict defining the cached result should be returned. The
2053 dict may have the following keys:
2053 dict may have the following keys:
2054
2054
2055 objs
2055 objs
2056 An iterable of objects that should be sent to the client. That
2056 An iterable of objects that should be sent to the client. That
2057 iterable of objects is expected to be what the command function
2057 iterable of objects is expected to be what the command function
2058 would return if invoked or an equivalent representation thereof.
2058 would return if invoked or an equivalent representation thereof.
2059 """
2059 """
2060
2060
2061 def onobject(obj):
2061 def onobject(obj):
2062 """Called when a new object is emitted from the command function.
2062 """Called when a new object is emitted from the command function.
2063
2063
2064 Receives as its argument the object that was emitted from the
2064 Receives as its argument the object that was emitted from the
2065 command function.
2065 command function.
2066
2066
2067 This method returns an iterator of objects to forward to the output
2067 This method returns an iterator of objects to forward to the output
2068 layer. The easiest implementation is a generator that just
2068 layer. The easiest implementation is a generator that just
2069 ``yield obj``.
2069 ``yield obj``.
2070 """
2070 """
2071
2071
2072 def onfinished():
2072 def onfinished():
2073 """Called after all objects have been emitted from the command function.
2073 """Called after all objects have been emitted from the command function.
2074
2074
2075 Implementations should return an iterator of objects to forward to
2075 Implementations should return an iterator of objects to forward to
2076 the output layer.
2076 the output layer.
2077
2077
2078 This method can be a generator.
2078 This method can be a generator.
2079 """
2079 """
@@ -1,2382 +1,2382 b''
1 # manifest.py - manifest revision class for mercurial
1 # manifest.py - manifest revision class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import heapq
9 import heapq
10 import itertools
10 import itertools
11 import struct
11 import struct
12 import weakref
12 import weakref
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 bin,
16 bin,
17 hex,
17 hex,
18 nullrev,
18 nullrev,
19 )
19 )
20 from . import (
20 from . import (
21 encoding,
21 encoding,
22 error,
22 error,
23 match as matchmod,
23 match as matchmod,
24 mdiff,
24 mdiff,
25 pathutil,
25 pathutil,
26 policy,
26 policy,
27 pycompat,
27 pycompat,
28 revlog,
28 revlog,
29 util,
29 util,
30 )
30 )
31 from .interfaces import (
31 from .interfaces import (
32 repository,
32 repository,
33 util as interfaceutil,
33 util as interfaceutil,
34 )
34 )
35 from .revlogutils import (
35 from .revlogutils import (
36 constants as revlog_constants,
36 constants as revlog_constants,
37 )
37 )
38
38
39 parsers = policy.importmod('parsers')
39 parsers = policy.importmod('parsers')
40 propertycache = util.propertycache
40 propertycache = util.propertycache
41
41
42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44
44
45
45
46 def _parse(nodelen, data):
46 def _parse(nodelen, data):
47 # This method does a little bit of excessive-looking
47 # This method does a little bit of excessive-looking
48 # precondition checking. This is so that the behavior of this
48 # precondition checking. This is so that the behavior of this
49 # class exactly matches its C counterpart to try and help
49 # class exactly matches its C counterpart to try and help
50 # prevent surprise breakage for anyone that develops against
50 # prevent surprise breakage for anyone that develops against
51 # the pure version.
51 # the pure version.
52 if data and data[-1:] != b'\n':
52 if data and data[-1:] != b'\n':
53 raise ValueError(b'Manifest did not end in a newline.')
53 raise ValueError(b'Manifest did not end in a newline.')
54 prev = None
54 prev = None
55 for l in data.splitlines():
55 for l in data.splitlines():
56 if prev is not None and prev > l:
56 if prev is not None and prev > l:
57 raise ValueError(b'Manifest lines not in sorted order.')
57 raise ValueError(b'Manifest lines not in sorted order.')
58 prev = l
58 prev = l
59 f, n = l.split(b'\0')
59 f, n = l.split(b'\0')
60 nl = len(n)
60 nl = len(n)
61 flags = n[-1:]
61 flags = n[-1:]
62 if flags in _manifestflags:
62 if flags in _manifestflags:
63 n = n[:-1]
63 n = n[:-1]
64 nl -= 1
64 nl -= 1
65 else:
65 else:
66 flags = b''
66 flags = b''
67 if nl != 2 * nodelen:
67 if nl != 2 * nodelen:
68 raise ValueError(b'Invalid manifest line')
68 raise ValueError(b'Invalid manifest line')
69
69
70 yield f, bin(n), flags
70 yield f, bin(n), flags
71
71
72
72
73 def _text(it):
73 def _text(it):
74 files = []
74 files = []
75 lines = []
75 lines = []
76 for f, n, fl in it:
76 for f, n, fl in it:
77 files.append(f)
77 files.append(f)
78 # if this is changed to support newlines in filenames,
78 # if this is changed to support newlines in filenames,
79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
81
81
82 _checkforbidden(files)
82 _checkforbidden(files)
83 return b''.join(lines)
83 return b''.join(lines)
84
84
85
85
86 class lazymanifestiter:
86 class lazymanifestiter:
87 def __init__(self, lm):
87 def __init__(self, lm):
88 self.pos = 0
88 self.pos = 0
89 self.lm = lm
89 self.lm = lm
90
90
91 def __iter__(self):
91 def __iter__(self):
92 return self
92 return self
93
93
94 def next(self):
94 def next(self):
95 try:
95 try:
96 data, pos = self.lm._get(self.pos)
96 data, pos = self.lm._get(self.pos)
97 except IndexError:
97 except IndexError:
98 raise StopIteration
98 raise StopIteration
99 if pos == -1:
99 if pos == -1:
100 self.pos += 1
100 self.pos += 1
101 return data[0]
101 return data[0]
102 self.pos += 1
102 self.pos += 1
103 zeropos = data.find(b'\x00', pos)
103 zeropos = data.find(b'\x00', pos)
104 return data[pos:zeropos]
104 return data[pos:zeropos]
105
105
106 __next__ = next
106 __next__ = next
107
107
108
108
109 class lazymanifestiterentries:
109 class lazymanifestiterentries:
110 def __init__(self, lm):
110 def __init__(self, lm):
111 self.lm = lm
111 self.lm = lm
112 self.pos = 0
112 self.pos = 0
113
113
114 def __iter__(self):
114 def __iter__(self):
115 return self
115 return self
116
116
117 def next(self):
117 def next(self):
118 try:
118 try:
119 data, pos = self.lm._get(self.pos)
119 data, pos = self.lm._get(self.pos)
120 except IndexError:
120 except IndexError:
121 raise StopIteration
121 raise StopIteration
122 if pos == -1:
122 if pos == -1:
123 self.pos += 1
123 self.pos += 1
124 return data
124 return data
125 zeropos = data.find(b'\x00', pos)
125 zeropos = data.find(b'\x00', pos)
126 nlpos = data.find(b'\n', pos)
126 nlpos = data.find(b'\n', pos)
127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
128 raise error.StorageError(b'Invalid manifest line')
128 raise error.StorageError(b'Invalid manifest line')
129 flags = data[nlpos - 1 : nlpos]
129 flags = data[nlpos - 1 : nlpos]
130 if flags in _manifestflags:
130 if flags in _manifestflags:
131 hlen = nlpos - zeropos - 2
131 hlen = nlpos - zeropos - 2
132 else:
132 else:
133 hlen = nlpos - zeropos - 1
133 hlen = nlpos - zeropos - 1
134 flags = b''
134 flags = b''
135 if hlen != 2 * self.lm._nodelen:
135 if hlen != 2 * self.lm._nodelen:
136 raise error.StorageError(b'Invalid manifest line')
136 raise error.StorageError(b'Invalid manifest line')
137 hashval = unhexlify(
137 hashval = unhexlify(
138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
139 )
139 )
140 self.pos += 1
140 self.pos += 1
141 return (data[pos:zeropos], hashval, flags)
141 return (data[pos:zeropos], hashval, flags)
142
142
143 __next__ = next
143 __next__ = next
144
144
145
145
146 def unhexlify(data, extra, pos, length):
146 def unhexlify(data, extra, pos, length):
147 s = bin(data[pos : pos + length])
147 s = bin(data[pos : pos + length])
148 if extra:
148 if extra:
149 s += chr(extra & 0xFF)
149 s += chr(extra & 0xFF)
150 return s
150 return s
151
151
152
152
153 def _cmp(a, b):
153 def _cmp(a, b):
154 return (a > b) - (a < b)
154 return (a > b) - (a < b)
155
155
156
156
157 _manifestflags = {b'', b'l', b't', b'x'}
157 _manifestflags = {b'', b'l', b't', b'x'}
158
158
159
159
160 class _lazymanifest:
160 class _lazymanifest:
161 """A pure python manifest backed by a byte string. It is supplimented with
161 """A pure python manifest backed by a byte string. It is supplimented with
162 internal lists as it is modified, until it is compacted back to a pure byte
162 internal lists as it is modified, until it is compacted back to a pure byte
163 string.
163 string.
164
164
165 ``data`` is the initial manifest data.
165 ``data`` is the initial manifest data.
166
166
167 ``positions`` is a list of offsets, one per manifest entry. Positive
167 ``positions`` is a list of offsets, one per manifest entry. Positive
168 values are offsets into ``data``, negative values are offsets into the
168 values are offsets into ``data``, negative values are offsets into the
169 ``extradata`` list. When an entry is removed, its entry is dropped from
169 ``extradata`` list. When an entry is removed, its entry is dropped from
170 ``positions``. The values are encoded such that when walking the list and
170 ``positions``. The values are encoded such that when walking the list and
171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
172 sorted by filename.
172 sorted by filename.
173
173
174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
175 modified since the manifest was created or compacted.
175 modified since the manifest was created or compacted.
176 """
176 """
177
177
178 def __init__(
178 def __init__(
179 self,
179 self,
180 nodelen,
180 nodelen,
181 data,
181 data,
182 positions=None,
182 positions=None,
183 extrainfo=None,
183 extrainfo=None,
184 extradata=None,
184 extradata=None,
185 hasremovals=False,
185 hasremovals=False,
186 ):
186 ):
187 self._nodelen = nodelen
187 self._nodelen = nodelen
188 if positions is None:
188 if positions is None:
189 self.positions = self.findlines(data)
189 self.positions = self.findlines(data)
190 self.extrainfo = [0] * len(self.positions)
190 self.extrainfo = [0] * len(self.positions)
191 self.data = data
191 self.data = data
192 self.extradata = []
192 self.extradata = []
193 self.hasremovals = False
193 self.hasremovals = False
194 else:
194 else:
195 self.positions = positions[:]
195 self.positions = positions[:]
196 self.extrainfo = extrainfo[:]
196 self.extrainfo = extrainfo[:]
197 self.extradata = extradata[:]
197 self.extradata = extradata[:]
198 self.data = data
198 self.data = data
199 self.hasremovals = hasremovals
199 self.hasremovals = hasremovals
200
200
201 def findlines(self, data):
201 def findlines(self, data):
202 if not data:
202 if not data:
203 return []
203 return []
204 pos = data.find(b"\n")
204 pos = data.find(b"\n")
205 if pos == -1 or data[-1:] != b'\n':
205 if pos == -1 or data[-1:] != b'\n':
206 raise ValueError(b"Manifest did not end in a newline.")
206 raise ValueError(b"Manifest did not end in a newline.")
207 positions = [0]
207 positions = [0]
208 prev = data[: data.find(b'\x00')]
208 prev = data[: data.find(b'\x00')]
209 while pos < len(data) - 1 and pos != -1:
209 while pos < len(data) - 1 and pos != -1:
210 positions.append(pos + 1)
210 positions.append(pos + 1)
211 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
211 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
212 if nexts < prev:
212 if nexts < prev:
213 raise ValueError(b"Manifest lines not in sorted order.")
213 raise ValueError(b"Manifest lines not in sorted order.")
214 prev = nexts
214 prev = nexts
215 pos = data.find(b"\n", pos + 1)
215 pos = data.find(b"\n", pos + 1)
216 return positions
216 return positions
217
217
218 def _get(self, index):
218 def _get(self, index):
219 # get the position encoded in pos:
219 # get the position encoded in pos:
220 # positive number is an index in 'data'
220 # positive number is an index in 'data'
221 # negative number is in extrapieces
221 # negative number is in extrapieces
222 pos = self.positions[index]
222 pos = self.positions[index]
223 if pos >= 0:
223 if pos >= 0:
224 return self.data, pos
224 return self.data, pos
225 return self.extradata[-pos - 1], -1
225 return self.extradata[-pos - 1], -1
226
226
227 def _getkey(self, pos):
227 def _getkey(self, pos):
228 if pos >= 0:
228 if pos >= 0:
229 return self.data[pos : self.data.find(b'\x00', pos + 1)]
229 return self.data[pos : self.data.find(b'\x00', pos + 1)]
230 return self.extradata[-pos - 1][0]
230 return self.extradata[-pos - 1][0]
231
231
232 def bsearch(self, key):
232 def bsearch(self, key):
233 first = 0
233 first = 0
234 last = len(self.positions) - 1
234 last = len(self.positions) - 1
235
235
236 while first <= last:
236 while first <= last:
237 midpoint = (first + last) // 2
237 midpoint = (first + last) // 2
238 nextpos = self.positions[midpoint]
238 nextpos = self.positions[midpoint]
239 candidate = self._getkey(nextpos)
239 candidate = self._getkey(nextpos)
240 r = _cmp(key, candidate)
240 r = _cmp(key, candidate)
241 if r == 0:
241 if r == 0:
242 return midpoint
242 return midpoint
243 else:
243 else:
244 if r < 0:
244 if r < 0:
245 last = midpoint - 1
245 last = midpoint - 1
246 else:
246 else:
247 first = midpoint + 1
247 first = midpoint + 1
248 return -1
248 return -1
249
249
250 def bsearch2(self, key):
250 def bsearch2(self, key):
251 # same as the above, but will always return the position
251 # same as the above, but will always return the position
252 # done for performance reasons
252 # done for performance reasons
253 first = 0
253 first = 0
254 last = len(self.positions) - 1
254 last = len(self.positions) - 1
255
255
256 while first <= last:
256 while first <= last:
257 midpoint = (first + last) // 2
257 midpoint = (first + last) // 2
258 nextpos = self.positions[midpoint]
258 nextpos = self.positions[midpoint]
259 candidate = self._getkey(nextpos)
259 candidate = self._getkey(nextpos)
260 r = _cmp(key, candidate)
260 r = _cmp(key, candidate)
261 if r == 0:
261 if r == 0:
262 return (midpoint, True)
262 return (midpoint, True)
263 else:
263 else:
264 if r < 0:
264 if r < 0:
265 last = midpoint - 1
265 last = midpoint - 1
266 else:
266 else:
267 first = midpoint + 1
267 first = midpoint + 1
268 return (first, False)
268 return (first, False)
269
269
270 def __contains__(self, key):
270 def __contains__(self, key):
271 return self.bsearch(key) != -1
271 return self.bsearch(key) != -1
272
272
273 def __getitem__(self, key):
273 def __getitem__(self, key):
274 if not isinstance(key, bytes):
274 if not isinstance(key, bytes):
275 raise TypeError(b"getitem: manifest keys must be a bytes.")
275 raise TypeError(b"getitem: manifest keys must be a bytes.")
276 needle = self.bsearch(key)
276 needle = self.bsearch(key)
277 if needle == -1:
277 if needle == -1:
278 raise KeyError
278 raise KeyError
279 data, pos = self._get(needle)
279 data, pos = self._get(needle)
280 if pos == -1:
280 if pos == -1:
281 return (data[1], data[2])
281 return (data[1], data[2])
282 zeropos = data.find(b'\x00', pos)
282 zeropos = data.find(b'\x00', pos)
283 nlpos = data.find(b'\n', zeropos)
283 nlpos = data.find(b'\n', zeropos)
284 assert 0 <= needle <= len(self.positions)
284 assert 0 <= needle <= len(self.positions)
285 assert len(self.extrainfo) == len(self.positions)
285 assert len(self.extrainfo) == len(self.positions)
286 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
286 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
287 raise error.StorageError(b'Invalid manifest line')
287 raise error.StorageError(b'Invalid manifest line')
288 hlen = nlpos - zeropos - 1
288 hlen = nlpos - zeropos - 1
289 flags = data[nlpos - 1 : nlpos]
289 flags = data[nlpos - 1 : nlpos]
290 if flags in _manifestflags:
290 if flags in _manifestflags:
291 hlen -= 1
291 hlen -= 1
292 else:
292 else:
293 flags = b''
293 flags = b''
294 if hlen != 2 * self._nodelen:
294 if hlen != 2 * self._nodelen:
295 raise error.StorageError(b'Invalid manifest line')
295 raise error.StorageError(b'Invalid manifest line')
296 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
296 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
297 return (hashval, flags)
297 return (hashval, flags)
298
298
299 def __delitem__(self, key):
299 def __delitem__(self, key):
300 needle, found = self.bsearch2(key)
300 needle, found = self.bsearch2(key)
301 if not found:
301 if not found:
302 raise KeyError
302 raise KeyError
303 cur = self.positions[needle]
303 cur = self.positions[needle]
304 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
304 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
305 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
305 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
306 if cur >= 0:
306 if cur >= 0:
307 # This does NOT unsort the list as far as the search functions are
307 # This does NOT unsort the list as far as the search functions are
308 # concerned, as they only examine lines mapped by self.positions.
308 # concerned, as they only examine lines mapped by self.positions.
309 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
309 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
310 self.hasremovals = True
310 self.hasremovals = True
311
311
312 def __setitem__(self, key, value):
312 def __setitem__(self, key, value):
313 if not isinstance(key, bytes):
313 if not isinstance(key, bytes):
314 raise TypeError(b"setitem: manifest keys must be a byte string.")
314 raise TypeError(b"setitem: manifest keys must be a byte string.")
315 if not isinstance(value, tuple) or len(value) != 2:
315 if not isinstance(value, tuple) or len(value) != 2:
316 raise TypeError(
316 raise TypeError(
317 b"Manifest values must be a tuple of (node, flags)."
317 b"Manifest values must be a tuple of (node, flags)."
318 )
318 )
319 hashval = value[0]
319 hashval = value[0]
320 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
320 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
321 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
321 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
322 flags = value[1]
322 flags = value[1]
323 if not isinstance(flags, bytes) or len(flags) > 1:
323 if not isinstance(flags, bytes) or len(flags) > 1:
324 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
324 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
325 needle, found = self.bsearch2(key)
325 needle, found = self.bsearch2(key)
326 if found:
326 if found:
327 # put the item
327 # put the item
328 pos = self.positions[needle]
328 pos = self.positions[needle]
329 if pos < 0:
329 if pos < 0:
330 self.extradata[-pos - 1] = (key, hashval, value[1])
330 self.extradata[-pos - 1] = (key, hashval, value[1])
331 else:
331 else:
332 # just don't bother
332 # just don't bother
333 self.extradata.append((key, hashval, value[1]))
333 self.extradata.append((key, hashval, value[1]))
334 self.positions[needle] = -len(self.extradata)
334 self.positions[needle] = -len(self.extradata)
335 else:
335 else:
336 # not found, put it in with extra positions
336 # not found, put it in with extra positions
337 self.extradata.append((key, hashval, value[1]))
337 self.extradata.append((key, hashval, value[1]))
338 self.positions = (
338 self.positions = (
339 self.positions[:needle]
339 self.positions[:needle]
340 + [-len(self.extradata)]
340 + [-len(self.extradata)]
341 + self.positions[needle:]
341 + self.positions[needle:]
342 )
342 )
343 self.extrainfo = (
343 self.extrainfo = (
344 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
344 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
345 )
345 )
346
346
347 def copy(self):
347 def copy(self):
348 # XXX call _compact like in C?
348 # XXX call _compact like in C?
349 return _lazymanifest(
349 return _lazymanifest(
350 self._nodelen,
350 self._nodelen,
351 self.data,
351 self.data,
352 self.positions,
352 self.positions,
353 self.extrainfo,
353 self.extrainfo,
354 self.extradata,
354 self.extradata,
355 self.hasremovals,
355 self.hasremovals,
356 )
356 )
357
357
358 def _compact(self):
358 def _compact(self):
359 # hopefully not called TOO often
359 # hopefully not called TOO often
360 if len(self.extradata) == 0 and not self.hasremovals:
360 if len(self.extradata) == 0 and not self.hasremovals:
361 return
361 return
362 l = []
362 l = []
363 i = 0
363 i = 0
364 offset = 0
364 offset = 0
365 self.extrainfo = [0] * len(self.positions)
365 self.extrainfo = [0] * len(self.positions)
366 while i < len(self.positions):
366 while i < len(self.positions):
367 if self.positions[i] >= 0:
367 if self.positions[i] >= 0:
368 cur = self.positions[i]
368 cur = self.positions[i]
369 last_cut = cur
369 last_cut = cur
370
370
371 # Collect all contiguous entries in the buffer at the current
371 # Collect all contiguous entries in the buffer at the current
372 # offset, breaking out only for added/modified items held in
372 # offset, breaking out only for added/modified items held in
373 # extradata, or a deleted line prior to the next position.
373 # extradata, or a deleted line prior to the next position.
374 while True:
374 while True:
375 self.positions[i] = offset
375 self.positions[i] = offset
376 i += 1
376 i += 1
377 if i == len(self.positions) or self.positions[i] < 0:
377 if i == len(self.positions) or self.positions[i] < 0:
378 break
378 break
379
379
380 # A removed file has no positions[] entry, but does have an
380 # A removed file has no positions[] entry, but does have an
381 # overwritten first byte. Break out and find the end of the
381 # overwritten first byte. Break out and find the end of the
382 # current good entry/entries if there is a removed file
382 # current good entry/entries if there is a removed file
383 # before the next position.
383 # before the next position.
384 if (
384 if (
385 self.hasremovals
385 self.hasremovals
386 and self.data.find(b'\n\x00', cur, self.positions[i])
386 and self.data.find(b'\n\x00', cur, self.positions[i])
387 != -1
387 != -1
388 ):
388 ):
389 break
389 break
390
390
391 offset += self.positions[i] - cur
391 offset += self.positions[i] - cur
392 cur = self.positions[i]
392 cur = self.positions[i]
393 end_cut = self.data.find(b'\n', cur)
393 end_cut = self.data.find(b'\n', cur)
394 if end_cut != -1:
394 if end_cut != -1:
395 end_cut += 1
395 end_cut += 1
396 offset += end_cut - cur
396 offset += end_cut - cur
397 l.append(self.data[last_cut:end_cut])
397 l.append(self.data[last_cut:end_cut])
398 else:
398 else:
399 while i < len(self.positions) and self.positions[i] < 0:
399 while i < len(self.positions) and self.positions[i] < 0:
400 cur = self.positions[i]
400 cur = self.positions[i]
401 t = self.extradata[-cur - 1]
401 t = self.extradata[-cur - 1]
402 l.append(self._pack(t))
402 l.append(self._pack(t))
403 self.positions[i] = offset
403 self.positions[i] = offset
404 # Hashes are either 20 bytes (old sha1s) or 32
404 # Hashes are either 20 bytes (old sha1s) or 32
405 # bytes (new non-sha1).
405 # bytes (new non-sha1).
406 hlen = 20
406 hlen = 20
407 if len(t[1]) > 25:
407 if len(t[1]) > 25:
408 hlen = 32
408 hlen = 32
409 if len(t[1]) > hlen:
409 if len(t[1]) > hlen:
410 self.extrainfo[i] = ord(t[1][hlen + 1])
410 self.extrainfo[i] = ord(t[1][hlen + 1])
411 offset += len(l[-1])
411 offset += len(l[-1])
412 i += 1
412 i += 1
413 self.data = b''.join(l)
413 self.data = b''.join(l)
414 self.hasremovals = False
414 self.hasremovals = False
415 self.extradata = []
415 self.extradata = []
416
416
417 def _pack(self, d):
417 def _pack(self, d):
418 n = d[1]
418 n = d[1]
419 assert len(n) in (20, 32)
419 assert len(n) in (20, 32)
420 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
420 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
421
421
422 def text(self):
422 def text(self):
423 self._compact()
423 self._compact()
424 return self.data
424 return self.data
425
425
426 def diff(self, m2, clean=False):
426 def diff(self, m2, clean=False):
427 '''Finds changes between the current manifest and m2.'''
427 '''Finds changes between the current manifest and m2.'''
428 # XXX think whether efficiency matters here
428 # XXX think whether efficiency matters here
429 diff = {}
429 diff = {}
430
430
431 for fn, e1, flags in self.iterentries():
431 for fn, e1, flags in self.iterentries():
432 if fn not in m2:
432 if fn not in m2:
433 diff[fn] = (e1, flags), (None, b'')
433 diff[fn] = (e1, flags), (None, b'')
434 else:
434 else:
435 e2 = m2[fn]
435 e2 = m2[fn]
436 if (e1, flags) != e2:
436 if (e1, flags) != e2:
437 diff[fn] = (e1, flags), e2
437 diff[fn] = (e1, flags), e2
438 elif clean:
438 elif clean:
439 diff[fn] = None
439 diff[fn] = None
440
440
441 for fn, e2, flags in m2.iterentries():
441 for fn, e2, flags in m2.iterentries():
442 if fn not in self:
442 if fn not in self:
443 diff[fn] = (None, b''), (e2, flags)
443 diff[fn] = (None, b''), (e2, flags)
444
444
445 return diff
445 return diff
446
446
447 def iterentries(self):
447 def iterentries(self):
448 return lazymanifestiterentries(self)
448 return lazymanifestiterentries(self)
449
449
450 def iterkeys(self):
450 def iterkeys(self):
451 return lazymanifestiter(self)
451 return lazymanifestiter(self)
452
452
453 def __iter__(self):
453 def __iter__(self):
454 return lazymanifestiter(self)
454 return lazymanifestiter(self)
455
455
456 def __len__(self):
456 def __len__(self):
457 return len(self.positions)
457 return len(self.positions)
458
458
459 def filtercopy(self, filterfn):
459 def filtercopy(self, filterfn):
460 # XXX should be optimized
460 # XXX should be optimized
461 c = _lazymanifest(self._nodelen, b'')
461 c = _lazymanifest(self._nodelen, b'')
462 for f, n, fl in self.iterentries():
462 for f, n, fl in self.iterentries():
463 if filterfn(f):
463 if filterfn(f):
464 c[f] = n, fl
464 c[f] = n, fl
465 return c
465 return c
466
466
467
467
468 try:
468 try:
469 _lazymanifest = parsers.lazymanifest
469 _lazymanifest = parsers.lazymanifest
470 except AttributeError:
470 except AttributeError:
471 pass
471 pass
472
472
473
473
474 @interfaceutil.implementer(repository.imanifestdict)
474 @interfaceutil.implementer(repository.imanifestdict)
475 class manifestdict:
475 class manifestdict:
476 def __init__(self, nodelen, data=b''):
476 def __init__(self, nodelen, data=b''):
477 self._nodelen = nodelen
477 self._nodelen = nodelen
478 self._lm = _lazymanifest(nodelen, data)
478 self._lm = _lazymanifest(nodelen, data)
479
479
480 def __getitem__(self, key):
480 def __getitem__(self, key):
481 return self._lm[key][0]
481 return self._lm[key][0]
482
482
483 def find(self, key):
483 def find(self, key):
484 return self._lm[key]
484 return self._lm[key]
485
485
486 def __len__(self):
486 def __len__(self):
487 return len(self._lm)
487 return len(self._lm)
488
488
489 def __nonzero__(self):
489 def __nonzero__(self):
490 # nonzero is covered by the __len__ function, but implementing it here
490 # nonzero is covered by the __len__ function, but implementing it here
491 # makes it easier for extensions to override.
491 # makes it easier for extensions to override.
492 return len(self._lm) != 0
492 return len(self._lm) != 0
493
493
494 __bool__ = __nonzero__
494 __bool__ = __nonzero__
495
495
496 def __setitem__(self, key, node):
496 def __setitem__(self, key, node):
497 self._lm[key] = node, self.flags(key)
497 self._lm[key] = node, self.flags(key)
498
498
499 def __contains__(self, key):
499 def __contains__(self, key):
500 if key is None:
500 if key is None:
501 return False
501 return False
502 return key in self._lm
502 return key in self._lm
503
503
504 def __delitem__(self, key):
504 def __delitem__(self, key):
505 del self._lm[key]
505 del self._lm[key]
506
506
507 def __iter__(self):
507 def __iter__(self):
508 return self._lm.__iter__()
508 return self._lm.__iter__()
509
509
510 def iterkeys(self):
510 def iterkeys(self):
511 return self._lm.iterkeys()
511 return self._lm.iterkeys()
512
512
513 def keys(self):
513 def keys(self):
514 return list(self.iterkeys())
514 return list(self.iterkeys())
515
515
516 def filesnotin(self, m2, match=None):
516 def filesnotin(self, m2, match=None):
517 '''Set of files in this manifest that are not in the other'''
517 '''Set of files in this manifest that are not in the other'''
518 if match is not None:
518 if match is not None:
519 match = matchmod.badmatch(match, lambda path, msg: None)
519 match = matchmod.badmatch(match, lambda path, msg: None)
520 sm2 = set(m2.walk(match))
520 sm2 = set(m2.walk(match))
521 return {f for f in self.walk(match) if f not in sm2}
521 return {f for f in self.walk(match) if f not in sm2}
522 return {f for f in self if f not in m2}
522 return {f for f in self if f not in m2}
523
523
524 @propertycache
524 @propertycache
525 def _dirs(self):
525 def _dirs(self):
526 return pathutil.dirs(self)
526 return pathutil.dirs(self)
527
527
528 def dirs(self):
528 def dirs(self):
529 return self._dirs
529 return self._dirs
530
530
531 def hasdir(self, dir):
531 def hasdir(self, dir):
532 return dir in self._dirs
532 return dir in self._dirs
533
533
534 def _filesfastpath(self, match):
534 def _filesfastpath(self, match):
535 """Checks whether we can correctly and quickly iterate over matcher
535 """Checks whether we can correctly and quickly iterate over matcher
536 files instead of over manifest files."""
536 files instead of over manifest files."""
537 files = match.files()
537 files = match.files()
538 return len(files) < 100 and (
538 return len(files) < 100 and (
539 match.isexact()
539 match.isexact()
540 or (match.prefix() and all(fn in self for fn in files))
540 or (match.prefix() and all(fn in self for fn in files))
541 )
541 )
542
542
543 def walk(self, match):
543 def walk(self, match):
544 """Generates matching file names.
544 """Generates matching file names.
545
545
546 Equivalent to manifest.matches(match).iterkeys(), but without creating
546 Equivalent to manifest.matches(match).iterkeys(), but without creating
547 an entirely new manifest.
547 an entirely new manifest.
548
548
549 It also reports nonexistent files by marking them bad with match.bad().
549 It also reports nonexistent files by marking them bad with match.bad().
550 """
550 """
551 if match.always():
551 if match.always():
552 for f in iter(self):
552 for f in iter(self):
553 yield f
553 yield f
554 return
554 return
555
555
556 fset = set(match.files())
556 fset = set(match.files())
557
557
558 # avoid the entire walk if we're only looking for specific files
558 # avoid the entire walk if we're only looking for specific files
559 if self._filesfastpath(match):
559 if self._filesfastpath(match):
560 for fn in sorted(fset):
560 for fn in sorted(fset):
561 if fn in self:
561 if fn in self:
562 yield fn
562 yield fn
563 return
563 return
564
564
565 for fn in self:
565 for fn in self:
566 if fn in fset:
566 if fn in fset:
567 # specified pattern is the exact name
567 # specified pattern is the exact name
568 fset.remove(fn)
568 fset.remove(fn)
569 if match(fn):
569 if match(fn):
570 yield fn
570 yield fn
571
571
572 # for dirstate.walk, files=[''] means "walk the whole tree".
572 # for dirstate.walk, files=[''] means "walk the whole tree".
573 # follow that here, too
573 # follow that here, too
574 fset.discard(b'')
574 fset.discard(b'')
575
575
576 for fn in sorted(fset):
576 for fn in sorted(fset):
577 if not self.hasdir(fn):
577 if not self.hasdir(fn):
578 match.bad(fn, None)
578 match.bad(fn, None)
579
579
580 def _matches(self, match):
580 def _matches(self, match):
581 '''generate a new manifest filtered by the match argument'''
581 '''generate a new manifest filtered by the match argument'''
582 if match.always():
582 if match.always():
583 return self.copy()
583 return self.copy()
584
584
585 if self._filesfastpath(match):
585 if self._filesfastpath(match):
586 m = manifestdict(self._nodelen)
586 m = manifestdict(self._nodelen)
587 lm = self._lm
587 lm = self._lm
588 for fn in match.files():
588 for fn in match.files():
589 if fn in lm:
589 if fn in lm:
590 m._lm[fn] = lm[fn]
590 m._lm[fn] = lm[fn]
591 return m
591 return m
592
592
593 m = manifestdict(self._nodelen)
593 m = manifestdict(self._nodelen)
594 m._lm = self._lm.filtercopy(match)
594 m._lm = self._lm.filtercopy(match)
595 return m
595 return m
596
596
597 def diff(self, m2, match=None, clean=False):
597 def diff(self, m2, match=None, clean=False):
598 """Finds changes between the current manifest and m2.
598 """Finds changes between the current manifest and m2.
599
599
600 Args:
600 Args:
601 m2: the manifest to which this manifest should be compared.
601 m2: the manifest to which this manifest should be compared.
602 clean: if true, include files unchanged between these manifests
602 clean: if true, include files unchanged between these manifests
603 with a None value in the returned dictionary.
603 with a None value in the returned dictionary.
604
604
605 The result is returned as a dict with filename as key and
605 The result is returned as a dict with filename as key and
606 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
606 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
607 nodeid in the current/other manifest and fl1/fl2 is the flag
607 nodeid in the current/other manifest and fl1/fl2 is the flag
608 in the current/other manifest. Where the file does not exist,
608 in the current/other manifest. Where the file does not exist,
609 the nodeid will be None and the flags will be the empty
609 the nodeid will be None and the flags will be the empty
610 string.
610 string.
611 """
611 """
612 if match:
612 if match:
613 m1 = self._matches(match)
613 m1 = self._matches(match)
614 m2 = m2._matches(match)
614 m2 = m2._matches(match)
615 return m1.diff(m2, clean=clean)
615 return m1.diff(m2, clean=clean)
616 return self._lm.diff(m2._lm, clean)
616 return self._lm.diff(m2._lm, clean)
617
617
618 def setflag(self, key, flag):
618 def setflag(self, key, flag):
619 if flag not in _manifestflags:
619 if flag not in _manifestflags:
620 raise TypeError(b"Invalid manifest flag set.")
620 raise TypeError(b"Invalid manifest flag set.")
621 self._lm[key] = self[key], flag
621 self._lm[key] = self[key], flag
622
622
623 def get(self, key, default=None):
623 def get(self, key, default=None):
624 try:
624 try:
625 return self._lm[key][0]
625 return self._lm[key][0]
626 except KeyError:
626 except KeyError:
627 return default
627 return default
628
628
629 def flags(self, key):
629 def flags(self, key):
630 try:
630 try:
631 return self._lm[key][1]
631 return self._lm[key][1]
632 except KeyError:
632 except KeyError:
633 return b''
633 return b''
634
634
635 def copy(self):
635 def copy(self):
636 c = manifestdict(self._nodelen)
636 c = manifestdict(self._nodelen)
637 c._lm = self._lm.copy()
637 c._lm = self._lm.copy()
638 return c
638 return c
639
639
640 def items(self):
640 def items(self):
641 return (x[:2] for x in self._lm.iterentries())
641 return (x[:2] for x in self._lm.iterentries())
642
642
643 def iteritems(self):
643 def iteritems(self):
644 return (x[:2] for x in self._lm.iterentries())
644 return (x[:2] for x in self._lm.iterentries())
645
645
646 def iterentries(self):
646 def iterentries(self):
647 return self._lm.iterentries()
647 return self._lm.iterentries()
648
648
649 def text(self):
649 def text(self):
650 # most likely uses native version
650 # most likely uses native version
651 return self._lm.text()
651 return self._lm.text()
652
652
653 def fastdelta(self, base, changes):
653 def fastdelta(self, base, changes):
654 """Given a base manifest text as a bytearray and a list of changes
654 """Given a base manifest text as a bytearray and a list of changes
655 relative to that text, compute a delta that can be used by revlog.
655 relative to that text, compute a delta that can be used by revlog.
656 """
656 """
657 delta = []
657 delta = []
658 dstart = None
658 dstart = None
659 dend = None
659 dend = None
660 dline = [b""]
660 dline = [b""]
661 start = 0
661 start = 0
662 # zero copy representation of base as a buffer
662 # zero copy representation of base as a buffer
663 addbuf = util.buffer(base)
663 addbuf = util.buffer(base)
664
664
665 changes = list(changes)
665 changes = list(changes)
666 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
666 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
667 # start with a readonly loop that finds the offset of
667 # start with a readonly loop that finds the offset of
668 # each line and creates the deltas
668 # each line and creates the deltas
669 for f, todelete in changes:
669 for f, todelete in changes:
670 # bs will either be the index of the item or the insert point
670 # bs will either be the index of the item or the insert point
671 start, end = _msearch(addbuf, f, start)
671 start, end = _msearch(addbuf, f, start)
672 if not todelete:
672 if not todelete:
673 h, fl = self._lm[f]
673 h, fl = self._lm[f]
674 l = b"%s\0%s%s\n" % (f, hex(h), fl)
674 l = b"%s\0%s%s\n" % (f, hex(h), fl)
675 else:
675 else:
676 if start == end:
676 if start == end:
677 # item we want to delete was not found, error out
677 # item we want to delete was not found, error out
678 raise AssertionError(
678 raise AssertionError(
679 _(b"failed to remove %s from manifest") % f
679 _(b"failed to remove %s from manifest") % f
680 )
680 )
681 l = b""
681 l = b""
682 if dstart is not None and dstart <= start and dend >= start:
682 if dstart is not None and dstart <= start and dend >= start:
683 if dend < end:
683 if dend < end:
684 dend = end
684 dend = end
685 if l:
685 if l:
686 dline.append(l)
686 dline.append(l)
687 else:
687 else:
688 if dstart is not None:
688 if dstart is not None:
689 delta.append([dstart, dend, b"".join(dline)])
689 delta.append([dstart, dend, b"".join(dline)])
690 dstart = start
690 dstart = start
691 dend = end
691 dend = end
692 dline = [l]
692 dline = [l]
693
693
694 if dstart is not None:
694 if dstart is not None:
695 delta.append([dstart, dend, b"".join(dline)])
695 delta.append([dstart, dend, b"".join(dline)])
696 # apply the delta to the base, and get a delta for addrevision
696 # apply the delta to the base, and get a delta for addrevision
697 deltatext, arraytext = _addlistdelta(base, delta)
697 deltatext, arraytext = _addlistdelta(base, delta)
698 else:
698 else:
699 # For large changes, it's much cheaper to just build the text and
699 # For large changes, it's much cheaper to just build the text and
700 # diff it.
700 # diff it.
701 arraytext = bytearray(self.text())
701 arraytext = bytearray(self.text())
702 deltatext = mdiff.textdiff(
702 deltatext = mdiff.textdiff(
703 util.buffer(base), util.buffer(arraytext)
703 util.buffer(base), util.buffer(arraytext)
704 )
704 )
705
705
706 return arraytext, deltatext
706 return arraytext, deltatext
707
707
708
708
709 def _msearch(m, s, lo=0, hi=None):
709 def _msearch(m, s, lo=0, hi=None):
710 """return a tuple (start, end) that says where to find s within m.
710 """return a tuple (start, end) that says where to find s within m.
711
711
712 If the string is found m[start:end] are the line containing
712 If the string is found m[start:end] are the line containing
713 that string. If start == end the string was not found and
713 that string. If start == end the string was not found and
714 they indicate the proper sorted insertion point.
714 they indicate the proper sorted insertion point.
715
715
716 m should be a buffer, a memoryview or a byte string.
716 m should be a buffer, a memoryview or a byte string.
717 s is a byte string"""
717 s is a byte string"""
718
718
719 def advance(i, c):
719 def advance(i, c):
720 while i < lenm and m[i : i + 1] != c:
720 while i < lenm and m[i : i + 1] != c:
721 i += 1
721 i += 1
722 return i
722 return i
723
723
724 if not s:
724 if not s:
725 return (lo, lo)
725 return (lo, lo)
726 lenm = len(m)
726 lenm = len(m)
727 if not hi:
727 if not hi:
728 hi = lenm
728 hi = lenm
729 while lo < hi:
729 while lo < hi:
730 mid = (lo + hi) // 2
730 mid = (lo + hi) // 2
731 start = mid
731 start = mid
732 while start > 0 and m[start - 1 : start] != b'\n':
732 while start > 0 and m[start - 1 : start] != b'\n':
733 start -= 1
733 start -= 1
734 end = advance(start, b'\0')
734 end = advance(start, b'\0')
735 if bytes(m[start:end]) < s:
735 if bytes(m[start:end]) < s:
736 # we know that after the null there are 40 bytes of sha1
736 # we know that after the null there are 40 bytes of sha1
737 # this translates to the bisect lo = mid + 1
737 # this translates to the bisect lo = mid + 1
738 lo = advance(end + 40, b'\n') + 1
738 lo = advance(end + 40, b'\n') + 1
739 else:
739 else:
740 # this translates to the bisect hi = mid
740 # this translates to the bisect hi = mid
741 hi = start
741 hi = start
742 end = advance(lo, b'\0')
742 end = advance(lo, b'\0')
743 found = m[lo:end]
743 found = m[lo:end]
744 if s == found:
744 if s == found:
745 # we know that after the null there are 40 bytes of sha1
745 # we know that after the null there are 40 bytes of sha1
746 end = advance(end + 40, b'\n')
746 end = advance(end + 40, b'\n')
747 return (lo, end + 1)
747 return (lo, end + 1)
748 else:
748 else:
749 return (lo, lo)
749 return (lo, lo)
750
750
751
751
752 def _checkforbidden(l):
752 def _checkforbidden(l):
753 """Check filenames for illegal characters."""
753 """Check filenames for illegal characters."""
754 for f in l:
754 for f in l:
755 if b'\n' in f or b'\r' in f:
755 if b'\n' in f or b'\r' in f:
756 raise error.StorageError(
756 raise error.StorageError(
757 _(b"'\\n' and '\\r' disallowed in filenames: %r")
757 _(b"'\\n' and '\\r' disallowed in filenames: %r")
758 % pycompat.bytestr(f)
758 % pycompat.bytestr(f)
759 )
759 )
760
760
761
761
762 # apply the changes collected during the bisect loop to our addlist
762 # apply the changes collected during the bisect loop to our addlist
763 # return a delta suitable for addrevision
763 # return a delta suitable for addrevision
764 def _addlistdelta(addlist, x):
764 def _addlistdelta(addlist, x):
765 # for large addlist arrays, building a new array is cheaper
765 # for large addlist arrays, building a new array is cheaper
766 # than repeatedly modifying the existing one
766 # than repeatedly modifying the existing one
767 currentposition = 0
767 currentposition = 0
768 newaddlist = bytearray()
768 newaddlist = bytearray()
769
769
770 for start, end, content in x:
770 for start, end, content in x:
771 newaddlist += addlist[currentposition:start]
771 newaddlist += addlist[currentposition:start]
772 if content:
772 if content:
773 newaddlist += bytearray(content)
773 newaddlist += bytearray(content)
774
774
775 currentposition = end
775 currentposition = end
776
776
777 newaddlist += addlist[currentposition:]
777 newaddlist += addlist[currentposition:]
778
778
779 deltatext = b"".join(
779 deltatext = b"".join(
780 struct.pack(b">lll", start, end, len(content)) + content
780 struct.pack(b">lll", start, end, len(content)) + content
781 for start, end, content in x
781 for start, end, content in x
782 )
782 )
783 return deltatext, newaddlist
783 return deltatext, newaddlist
784
784
785
785
786 def _splittopdir(f):
786 def _splittopdir(f):
787 if b'/' in f:
787 if b'/' in f:
788 dir, subpath = f.split(b'/', 1)
788 dir, subpath = f.split(b'/', 1)
789 return dir + b'/', subpath
789 return dir + b'/', subpath
790 else:
790 else:
791 return b'', f
791 return b'', f
792
792
793
793
794 _noop = lambda s: None
794 _noop = lambda s: None
795
795
796
796
797 @interfaceutil.implementer(repository.imanifestdict)
797 @interfaceutil.implementer(repository.imanifestdict)
798 class treemanifest:
798 class treemanifest:
799 def __init__(self, nodeconstants, dir=b'', text=b''):
799 def __init__(self, nodeconstants, dir=b'', text=b''):
800 self._dir = dir
800 self._dir = dir
801 self.nodeconstants = nodeconstants
801 self.nodeconstants = nodeconstants
802 self._node = self.nodeconstants.nullid
802 self._node = self.nodeconstants.nullid
803 self._nodelen = self.nodeconstants.nodelen
803 self._nodelen = self.nodeconstants.nodelen
804 self._loadfunc = _noop
804 self._loadfunc = _noop
805 self._copyfunc = _noop
805 self._copyfunc = _noop
806 self._dirty = False
806 self._dirty = False
807 self._dirs = {}
807 self._dirs = {}
808 self._lazydirs = {}
808 self._lazydirs = {}
809 # Using _lazymanifest here is a little slower than plain old dicts
809 # Using _lazymanifest here is a little slower than plain old dicts
810 self._files = {}
810 self._files = {}
811 self._flags = {}
811 self._flags = {}
812 if text:
812 if text:
813
813
814 def readsubtree(subdir, subm):
814 def readsubtree(subdir, subm):
815 raise AssertionError(
815 raise AssertionError(
816 b'treemanifest constructor only accepts flat manifests'
816 b'treemanifest constructor only accepts flat manifests'
817 )
817 )
818
818
819 self.parse(text, readsubtree)
819 self.parse(text, readsubtree)
820 self._dirty = True # Mark flat manifest dirty after parsing
820 self._dirty = True # Mark flat manifest dirty after parsing
821
821
822 def _subpath(self, path):
822 def _subpath(self, path):
823 return self._dir + path
823 return self._dir + path
824
824
825 def _loadalllazy(self):
825 def _loadalllazy(self):
826 selfdirs = self._dirs
826 selfdirs = self._dirs
827 subpath = self._subpath
827 subpath = self._subpath
828 for d, (node, readsubtree, docopy) in self._lazydirs.items():
828 for d, (node, readsubtree, docopy) in self._lazydirs.items():
829 if docopy:
829 if docopy:
830 selfdirs[d] = readsubtree(subpath(d), node).copy()
830 selfdirs[d] = readsubtree(subpath(d), node).copy()
831 else:
831 else:
832 selfdirs[d] = readsubtree(subpath(d), node)
832 selfdirs[d] = readsubtree(subpath(d), node)
833 self._lazydirs = {}
833 self._lazydirs = {}
834
834
835 def _loadlazy(self, d):
835 def _loadlazy(self, d):
836 v = self._lazydirs.get(d)
836 v = self._lazydirs.get(d)
837 if v:
837 if v:
838 node, readsubtree, docopy = v
838 node, readsubtree, docopy = v
839 if docopy:
839 if docopy:
840 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
840 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
841 else:
841 else:
842 self._dirs[d] = readsubtree(self._subpath(d), node)
842 self._dirs[d] = readsubtree(self._subpath(d), node)
843 del self._lazydirs[d]
843 del self._lazydirs[d]
844
844
845 def _loadchildrensetlazy(self, visit):
845 def _loadchildrensetlazy(self, visit):
846 if not visit:
846 if not visit:
847 return None
847 return None
848 if visit == b'all' or visit == b'this':
848 if visit == b'all' or visit == b'this':
849 self._loadalllazy()
849 self._loadalllazy()
850 return None
850 return None
851
851
852 loadlazy = self._loadlazy
852 loadlazy = self._loadlazy
853 for k in visit:
853 for k in visit:
854 loadlazy(k + b'/')
854 loadlazy(k + b'/')
855 return visit
855 return visit
856
856
857 def _loaddifflazy(self, t1, t2):
857 def _loaddifflazy(self, t1, t2):
858 """load items in t1 and t2 if they're needed for diffing.
858 """load items in t1 and t2 if they're needed for diffing.
859
859
860 The criteria currently is:
860 The criteria currently is:
861 - if it's not present in _lazydirs in either t1 or t2, load it in the
861 - if it's not present in _lazydirs in either t1 or t2, load it in the
862 other (it may already be loaded or it may not exist, doesn't matter)
862 other (it may already be loaded or it may not exist, doesn't matter)
863 - if it's present in _lazydirs in both, compare the nodeid; if it
863 - if it's present in _lazydirs in both, compare the nodeid; if it
864 differs, load it in both
864 differs, load it in both
865 """
865 """
866 toloadlazy = []
866 toloadlazy = []
867 for d, v1 in t1._lazydirs.items():
867 for d, v1 in t1._lazydirs.items():
868 v2 = t2._lazydirs.get(d)
868 v2 = t2._lazydirs.get(d)
869 if not v2 or v2[0] != v1[0]:
869 if not v2 or v2[0] != v1[0]:
870 toloadlazy.append(d)
870 toloadlazy.append(d)
871 for d, v1 in t2._lazydirs.items():
871 for d, v1 in t2._lazydirs.items():
872 if d not in t1._lazydirs:
872 if d not in t1._lazydirs:
873 toloadlazy.append(d)
873 toloadlazy.append(d)
874
874
875 for d in toloadlazy:
875 for d in toloadlazy:
876 t1._loadlazy(d)
876 t1._loadlazy(d)
877 t2._loadlazy(d)
877 t2._loadlazy(d)
878
878
879 def __len__(self):
879 def __len__(self):
880 self._load()
880 self._load()
881 size = len(self._files)
881 size = len(self._files)
882 self._loadalllazy()
882 self._loadalllazy()
883 for m in self._dirs.values():
883 for m in self._dirs.values():
884 size += m.__len__()
884 size += m.__len__()
885 return size
885 return size
886
886
887 def __nonzero__(self):
887 def __nonzero__(self):
888 # Faster than "__len() != 0" since it avoids loading sub-manifests
888 # Faster than "__len() != 0" since it avoids loading sub-manifests
889 return not self._isempty()
889 return not self._isempty()
890
890
891 __bool__ = __nonzero__
891 __bool__ = __nonzero__
892
892
893 def _isempty(self):
893 def _isempty(self):
894 self._load() # for consistency; already loaded by all callers
894 self._load() # for consistency; already loaded by all callers
895 # See if we can skip loading everything.
895 # See if we can skip loading everything.
896 if self._files or (
896 if self._files or (
897 self._dirs and any(not m._isempty() for m in self._dirs.values())
897 self._dirs and any(not m._isempty() for m in self._dirs.values())
898 ):
898 ):
899 return False
899 return False
900 self._loadalllazy()
900 self._loadalllazy()
901 return not self._dirs or all(m._isempty() for m in self._dirs.values())
901 return not self._dirs or all(m._isempty() for m in self._dirs.values())
902
902
903 @encoding.strmethod
903 @encoding.strmethod
904 def __repr__(self):
904 def __repr__(self):
905 return (
905 return (
906 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
906 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
907 % (
907 % (
908 self._dir,
908 self._dir,
909 hex(self._node),
909 hex(self._node),
910 bool(self._loadfunc is _noop),
910 bool(self._loadfunc is _noop),
911 self._dirty,
911 self._dirty,
912 id(self),
912 id(self),
913 )
913 )
914 )
914 )
915
915
916 def dir(self):
916 def dir(self):
917 """The directory that this tree manifest represents, including a
917 """The directory that this tree manifest represents, including a
918 trailing '/'. Empty string for the repo root directory."""
918 trailing '/'. Empty string for the repo root directory."""
919 return self._dir
919 return self._dir
920
920
921 def node(self):
921 def node(self):
922 """This node of this instance. nullid for unsaved instances. Should
922 """This node of this instance. nullid for unsaved instances. Should
923 be updated when the instance is read or written from a revlog.
923 be updated when the instance is read or written from a revlog.
924 """
924 """
925 assert not self._dirty
925 assert not self._dirty
926 return self._node
926 return self._node
927
927
928 def setnode(self, node):
928 def setnode(self, node):
929 self._node = node
929 self._node = node
930 self._dirty = False
930 self._dirty = False
931
931
932 def iterentries(self):
932 def iterentries(self):
933 self._load()
933 self._load()
934 self._loadalllazy()
934 self._loadalllazy()
935 for p, n in sorted(
935 for p, n in sorted(
936 itertools.chain(self._dirs.items(), self._files.items())
936 itertools.chain(self._dirs.items(), self._files.items())
937 ):
937 ):
938 if p in self._files:
938 if p in self._files:
939 yield self._subpath(p), n, self._flags.get(p, b'')
939 yield self._subpath(p), n, self._flags.get(p, b'')
940 else:
940 else:
941 for x in n.iterentries():
941 for x in n.iterentries():
942 yield x
942 yield x
943
943
944 def items(self):
944 def items(self):
945 self._load()
945 self._load()
946 self._loadalllazy()
946 self._loadalllazy()
947 for p, n in sorted(
947 for p, n in sorted(
948 itertools.chain(self._dirs.items(), self._files.items())
948 itertools.chain(self._dirs.items(), self._files.items())
949 ):
949 ):
950 if p in self._files:
950 if p in self._files:
951 yield self._subpath(p), n
951 yield self._subpath(p), n
952 else:
952 else:
953 for f, sn in n.items():
953 for f, sn in n.items():
954 yield f, sn
954 yield f, sn
955
955
956 iteritems = items
956 iteritems = items
957
957
958 def iterkeys(self):
958 def iterkeys(self):
959 self._load()
959 self._load()
960 self._loadalllazy()
960 self._loadalllazy()
961 for p in sorted(itertools.chain(self._dirs, self._files)):
961 for p in sorted(itertools.chain(self._dirs, self._files)):
962 if p in self._files:
962 if p in self._files:
963 yield self._subpath(p)
963 yield self._subpath(p)
964 else:
964 else:
965 for f in self._dirs[p]:
965 for f in self._dirs[p]:
966 yield f
966 yield f
967
967
968 def keys(self):
968 def keys(self):
969 return list(self.iterkeys())
969 return list(self.iterkeys())
970
970
971 def __iter__(self):
971 def __iter__(self):
972 return self.iterkeys()
972 return self.iterkeys()
973
973
974 def __contains__(self, f):
974 def __contains__(self, f):
975 if f is None:
975 if f is None:
976 return False
976 return False
977 self._load()
977 self._load()
978 dir, subpath = _splittopdir(f)
978 dir, subpath = _splittopdir(f)
979 if dir:
979 if dir:
980 self._loadlazy(dir)
980 self._loadlazy(dir)
981
981
982 if dir not in self._dirs:
982 if dir not in self._dirs:
983 return False
983 return False
984
984
985 return self._dirs[dir].__contains__(subpath)
985 return self._dirs[dir].__contains__(subpath)
986 else:
986 else:
987 return f in self._files
987 return f in self._files
988
988
989 def get(self, f, default=None):
989 def get(self, f, default=None):
990 self._load()
990 self._load()
991 dir, subpath = _splittopdir(f)
991 dir, subpath = _splittopdir(f)
992 if dir:
992 if dir:
993 self._loadlazy(dir)
993 self._loadlazy(dir)
994
994
995 if dir not in self._dirs:
995 if dir not in self._dirs:
996 return default
996 return default
997 return self._dirs[dir].get(subpath, default)
997 return self._dirs[dir].get(subpath, default)
998 else:
998 else:
999 return self._files.get(f, default)
999 return self._files.get(f, default)
1000
1000
1001 def __getitem__(self, f):
1001 def __getitem__(self, f):
1002 self._load()
1002 self._load()
1003 dir, subpath = _splittopdir(f)
1003 dir, subpath = _splittopdir(f)
1004 if dir:
1004 if dir:
1005 self._loadlazy(dir)
1005 self._loadlazy(dir)
1006
1006
1007 return self._dirs[dir].__getitem__(subpath)
1007 return self._dirs[dir].__getitem__(subpath)
1008 else:
1008 else:
1009 return self._files[f]
1009 return self._files[f]
1010
1010
1011 def flags(self, f):
1011 def flags(self, f):
1012 self._load()
1012 self._load()
1013 dir, subpath = _splittopdir(f)
1013 dir, subpath = _splittopdir(f)
1014 if dir:
1014 if dir:
1015 self._loadlazy(dir)
1015 self._loadlazy(dir)
1016
1016
1017 if dir not in self._dirs:
1017 if dir not in self._dirs:
1018 return b''
1018 return b''
1019 return self._dirs[dir].flags(subpath)
1019 return self._dirs[dir].flags(subpath)
1020 else:
1020 else:
1021 if f in self._lazydirs or f in self._dirs:
1021 if f in self._lazydirs or f in self._dirs:
1022 return b''
1022 return b''
1023 return self._flags.get(f, b'')
1023 return self._flags.get(f, b'')
1024
1024
1025 def find(self, f):
1025 def find(self, f):
1026 self._load()
1026 self._load()
1027 dir, subpath = _splittopdir(f)
1027 dir, subpath = _splittopdir(f)
1028 if dir:
1028 if dir:
1029 self._loadlazy(dir)
1029 self._loadlazy(dir)
1030
1030
1031 return self._dirs[dir].find(subpath)
1031 return self._dirs[dir].find(subpath)
1032 else:
1032 else:
1033 return self._files[f], self._flags.get(f, b'')
1033 return self._files[f], self._flags.get(f, b'')
1034
1034
1035 def __delitem__(self, f):
1035 def __delitem__(self, f):
1036 self._load()
1036 self._load()
1037 dir, subpath = _splittopdir(f)
1037 dir, subpath = _splittopdir(f)
1038 if dir:
1038 if dir:
1039 self._loadlazy(dir)
1039 self._loadlazy(dir)
1040
1040
1041 self._dirs[dir].__delitem__(subpath)
1041 self._dirs[dir].__delitem__(subpath)
1042 # If the directory is now empty, remove it
1042 # If the directory is now empty, remove it
1043 if self._dirs[dir]._isempty():
1043 if self._dirs[dir]._isempty():
1044 del self._dirs[dir]
1044 del self._dirs[dir]
1045 else:
1045 else:
1046 del self._files[f]
1046 del self._files[f]
1047 if f in self._flags:
1047 if f in self._flags:
1048 del self._flags[f]
1048 del self._flags[f]
1049 self._dirty = True
1049 self._dirty = True
1050
1050
1051 def __setitem__(self, f, n):
1051 def __setitem__(self, f, n):
1052 assert n is not None
1052 assert n is not None
1053 self._load()
1053 self._load()
1054 dir, subpath = _splittopdir(f)
1054 dir, subpath = _splittopdir(f)
1055 if dir:
1055 if dir:
1056 self._loadlazy(dir)
1056 self._loadlazy(dir)
1057 if dir not in self._dirs:
1057 if dir not in self._dirs:
1058 self._dirs[dir] = treemanifest(
1058 self._dirs[dir] = treemanifest(
1059 self.nodeconstants, self._subpath(dir)
1059 self.nodeconstants, self._subpath(dir)
1060 )
1060 )
1061 self._dirs[dir].__setitem__(subpath, n)
1061 self._dirs[dir].__setitem__(subpath, n)
1062 else:
1062 else:
1063 # manifest nodes are either 20 bytes or 32 bytes,
1063 # manifest nodes are either 20 bytes or 32 bytes,
1064 # depending on the hash in use. Assert this as historically
1064 # depending on the hash in use. Assert this as historically
1065 # sometimes extra bytes were added.
1065 # sometimes extra bytes were added.
1066 assert len(n) in (20, 32)
1066 assert len(n) in (20, 32)
1067 self._files[f] = n
1067 self._files[f] = n
1068 self._dirty = True
1068 self._dirty = True
1069
1069
1070 def _load(self):
1070 def _load(self):
1071 if self._loadfunc is not _noop:
1071 if self._loadfunc is not _noop:
1072 lf, self._loadfunc = self._loadfunc, _noop
1072 lf, self._loadfunc = self._loadfunc, _noop
1073 lf(self)
1073 lf(self)
1074 elif self._copyfunc is not _noop:
1074 elif self._copyfunc is not _noop:
1075 cf, self._copyfunc = self._copyfunc, _noop
1075 cf, self._copyfunc = self._copyfunc, _noop
1076 cf(self)
1076 cf(self)
1077
1077
1078 def setflag(self, f, flags):
1078 def setflag(self, f, flags):
1079 """Set the flags (symlink, executable) for path f."""
1079 """Set the flags (symlink, executable) for path f."""
1080 if flags not in _manifestflags:
1080 if flags not in _manifestflags:
1081 raise TypeError(b"Invalid manifest flag set.")
1081 raise TypeError(b"Invalid manifest flag set.")
1082 self._load()
1082 self._load()
1083 dir, subpath = _splittopdir(f)
1083 dir, subpath = _splittopdir(f)
1084 if dir:
1084 if dir:
1085 self._loadlazy(dir)
1085 self._loadlazy(dir)
1086 if dir not in self._dirs:
1086 if dir not in self._dirs:
1087 self._dirs[dir] = treemanifest(
1087 self._dirs[dir] = treemanifest(
1088 self.nodeconstants, self._subpath(dir)
1088 self.nodeconstants, self._subpath(dir)
1089 )
1089 )
1090 self._dirs[dir].setflag(subpath, flags)
1090 self._dirs[dir].setflag(subpath, flags)
1091 else:
1091 else:
1092 self._flags[f] = flags
1092 self._flags[f] = flags
1093 self._dirty = True
1093 self._dirty = True
1094
1094
1095 def copy(self):
1095 def copy(self):
1096 copy = treemanifest(self.nodeconstants, self._dir)
1096 copy = treemanifest(self.nodeconstants, self._dir)
1097 copy._node = self._node
1097 copy._node = self._node
1098 copy._dirty = self._dirty
1098 copy._dirty = self._dirty
1099 if self._copyfunc is _noop:
1099 if self._copyfunc is _noop:
1100
1100
1101 def _copyfunc(s):
1101 def _copyfunc(s):
1102 self._load()
1102 self._load()
1103 s._lazydirs = {
1103 s._lazydirs = {
1104 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1104 d: (n, r, True) for d, (n, r, c) in self._lazydirs.items()
1105 }
1105 }
1106 sdirs = s._dirs
1106 sdirs = s._dirs
1107 for d, v in self._dirs.items():
1107 for d, v in self._dirs.items():
1108 sdirs[d] = v.copy()
1108 sdirs[d] = v.copy()
1109 s._files = dict.copy(self._files)
1109 s._files = dict.copy(self._files)
1110 s._flags = dict.copy(self._flags)
1110 s._flags = dict.copy(self._flags)
1111
1111
1112 if self._loadfunc is _noop:
1112 if self._loadfunc is _noop:
1113 _copyfunc(copy)
1113 _copyfunc(copy)
1114 else:
1114 else:
1115 copy._copyfunc = _copyfunc
1115 copy._copyfunc = _copyfunc
1116 else:
1116 else:
1117 copy._copyfunc = self._copyfunc
1117 copy._copyfunc = self._copyfunc
1118 return copy
1118 return copy
1119
1119
1120 def filesnotin(self, m2, match=None):
1120 def filesnotin(self, m2, match=None):
1121 '''Set of files in this manifest that are not in the other'''
1121 '''Set of files in this manifest that are not in the other'''
1122 if match and not match.always():
1122 if match and not match.always():
1123 m1 = self._matches(match)
1123 m1 = self._matches(match)
1124 m2 = m2._matches(match)
1124 m2 = m2._matches(match)
1125 return m1.filesnotin(m2)
1125 return m1.filesnotin(m2)
1126
1126
1127 files = set()
1127 files = set()
1128
1128
1129 def _filesnotin(t1, t2):
1129 def _filesnotin(t1, t2):
1130 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1130 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1131 return
1131 return
1132 t1._load()
1132 t1._load()
1133 t2._load()
1133 t2._load()
1134 self._loaddifflazy(t1, t2)
1134 self._loaddifflazy(t1, t2)
1135 for d, m1 in t1._dirs.items():
1135 for d, m1 in t1._dirs.items():
1136 if d in t2._dirs:
1136 if d in t2._dirs:
1137 m2 = t2._dirs[d]
1137 m2 = t2._dirs[d]
1138 _filesnotin(m1, m2)
1138 _filesnotin(m1, m2)
1139 else:
1139 else:
1140 files.update(m1.iterkeys())
1140 files.update(m1.iterkeys())
1141
1141
1142 for fn in t1._files:
1142 for fn in t1._files:
1143 if fn not in t2._files:
1143 if fn not in t2._files:
1144 files.add(t1._subpath(fn))
1144 files.add(t1._subpath(fn))
1145
1145
1146 _filesnotin(self, m2)
1146 _filesnotin(self, m2)
1147 return files
1147 return files
1148
1148
1149 @propertycache
1149 @propertycache
1150 def _alldirs(self):
1150 def _alldirs(self):
1151 return pathutil.dirs(self)
1151 return pathutil.dirs(self)
1152
1152
1153 def dirs(self):
1153 def dirs(self):
1154 return self._alldirs
1154 return self._alldirs
1155
1155
1156 def hasdir(self, dir):
1156 def hasdir(self, dir):
1157 self._load()
1157 self._load()
1158 topdir, subdir = _splittopdir(dir)
1158 topdir, subdir = _splittopdir(dir)
1159 if topdir:
1159 if topdir:
1160 self._loadlazy(topdir)
1160 self._loadlazy(topdir)
1161 if topdir in self._dirs:
1161 if topdir in self._dirs:
1162 return self._dirs[topdir].hasdir(subdir)
1162 return self._dirs[topdir].hasdir(subdir)
1163 return False
1163 return False
1164 dirslash = dir + b'/'
1164 dirslash = dir + b'/'
1165 return dirslash in self._dirs or dirslash in self._lazydirs
1165 return dirslash in self._dirs or dirslash in self._lazydirs
1166
1166
1167 def walk(self, match):
1167 def walk(self, match):
1168 """Generates matching file names.
1168 """Generates matching file names.
1169
1169
1170 It also reports nonexistent files by marking them bad with match.bad().
1170 It also reports nonexistent files by marking them bad with match.bad().
1171 """
1171 """
1172 if match.always():
1172 if match.always():
1173 for f in iter(self):
1173 for f in iter(self):
1174 yield f
1174 yield f
1175 return
1175 return
1176
1176
1177 fset = set(match.files())
1177 fset = set(match.files())
1178
1178
1179 for fn in self._walk(match):
1179 for fn in self._walk(match):
1180 if fn in fset:
1180 if fn in fset:
1181 # specified pattern is the exact name
1181 # specified pattern is the exact name
1182 fset.remove(fn)
1182 fset.remove(fn)
1183 yield fn
1183 yield fn
1184
1184
1185 # for dirstate.walk, files=[''] means "walk the whole tree".
1185 # for dirstate.walk, files=[''] means "walk the whole tree".
1186 # follow that here, too
1186 # follow that here, too
1187 fset.discard(b'')
1187 fset.discard(b'')
1188
1188
1189 for fn in sorted(fset):
1189 for fn in sorted(fset):
1190 if not self.hasdir(fn):
1190 if not self.hasdir(fn):
1191 match.bad(fn, None)
1191 match.bad(fn, None)
1192
1192
1193 def _walk(self, match):
1193 def _walk(self, match):
1194 '''Recursively generates matching file names for walk().'''
1194 '''Recursively generates matching file names for walk().'''
1195 visit = match.visitchildrenset(self._dir[:-1])
1195 visit = match.visitchildrenset(self._dir[:-1])
1196 if not visit:
1196 if not visit:
1197 return
1197 return
1198
1198
1199 # yield this dir's files and walk its submanifests
1199 # yield this dir's files and walk its submanifests
1200 self._load()
1200 self._load()
1201 visit = self._loadchildrensetlazy(visit)
1201 visit = self._loadchildrensetlazy(visit)
1202 for p in sorted(list(self._dirs) + list(self._files)):
1202 for p in sorted(list(self._dirs) + list(self._files)):
1203 if p in self._files:
1203 if p in self._files:
1204 fullp = self._subpath(p)
1204 fullp = self._subpath(p)
1205 if match(fullp):
1205 if match(fullp):
1206 yield fullp
1206 yield fullp
1207 else:
1207 else:
1208 if not visit or p[:-1] in visit:
1208 if not visit or p[:-1] in visit:
1209 for f in self._dirs[p]._walk(match):
1209 for f in self._dirs[p]._walk(match):
1210 yield f
1210 yield f
1211
1211
1212 def _matches(self, match):
1212 def _matches(self, match):
1213 """recursively generate a new manifest filtered by the match argument."""
1213 """recursively generate a new manifest filtered by the match argument."""
1214 if match.always():
1214 if match.always():
1215 return self.copy()
1215 return self.copy()
1216 return self._matches_inner(match)
1216 return self._matches_inner(match)
1217
1217
1218 def _matches_inner(self, match):
1218 def _matches_inner(self, match):
1219 if match.always():
1219 if match.always():
1220 return self.copy()
1220 return self.copy()
1221
1221
1222 visit = match.visitchildrenset(self._dir[:-1])
1222 visit = match.visitchildrenset(self._dir[:-1])
1223 if visit == b'all':
1223 if visit == b'all':
1224 return self.copy()
1224 return self.copy()
1225 ret = treemanifest(self.nodeconstants, self._dir)
1225 ret = treemanifest(self.nodeconstants, self._dir)
1226 if not visit:
1226 if not visit:
1227 return ret
1227 return ret
1228
1228
1229 self._load()
1229 self._load()
1230 for fn in self._files:
1230 for fn in self._files:
1231 # While visitchildrenset *usually* lists only subdirs, this is
1231 # While visitchildrenset *usually* lists only subdirs, this is
1232 # actually up to the matcher and may have some files in the set().
1232 # actually up to the matcher and may have some files in the set().
1233 # If visit == 'this', we should obviously look at the files in this
1233 # If visit == 'this', we should obviously look at the files in this
1234 # directory; if visit is a set, and fn is in it, we should inspect
1234 # directory; if visit is a set, and fn is in it, we should inspect
1235 # fn (but no need to inspect things not in the set).
1235 # fn (but no need to inspect things not in the set).
1236 if visit != b'this' and fn not in visit:
1236 if visit != b'this' and fn not in visit:
1237 continue
1237 continue
1238 fullp = self._subpath(fn)
1238 fullp = self._subpath(fn)
1239 # visitchildrenset isn't perfect, we still need to call the regular
1239 # visitchildrenset isn't perfect, we still need to call the regular
1240 # matcher code to further filter results.
1240 # matcher code to further filter results.
1241 if not match(fullp):
1241 if not match(fullp):
1242 continue
1242 continue
1243 ret._files[fn] = self._files[fn]
1243 ret._files[fn] = self._files[fn]
1244 if fn in self._flags:
1244 if fn in self._flags:
1245 ret._flags[fn] = self._flags[fn]
1245 ret._flags[fn] = self._flags[fn]
1246
1246
1247 visit = self._loadchildrensetlazy(visit)
1247 visit = self._loadchildrensetlazy(visit)
1248 for dir, subm in self._dirs.items():
1248 for dir, subm in self._dirs.items():
1249 if visit and dir[:-1] not in visit:
1249 if visit and dir[:-1] not in visit:
1250 continue
1250 continue
1251 m = subm._matches_inner(match)
1251 m = subm._matches_inner(match)
1252 if not m._isempty():
1252 if not m._isempty():
1253 ret._dirs[dir] = m
1253 ret._dirs[dir] = m
1254
1254
1255 if not ret._isempty():
1255 if not ret._isempty():
1256 ret._dirty = True
1256 ret._dirty = True
1257 return ret
1257 return ret
1258
1258
1259 def fastdelta(self, base, changes):
1259 def fastdelta(self, base, changes):
1260 raise FastdeltaUnavailable()
1260 raise FastdeltaUnavailable()
1261
1261
1262 def diff(self, m2, match=None, clean=False):
1262 def diff(self, m2, match=None, clean=False):
1263 """Finds changes between the current manifest and m2.
1263 """Finds changes between the current manifest and m2.
1264
1264
1265 Args:
1265 Args:
1266 m2: the manifest to which this manifest should be compared.
1266 m2: the manifest to which this manifest should be compared.
1267 clean: if true, include files unchanged between these manifests
1267 clean: if true, include files unchanged between these manifests
1268 with a None value in the returned dictionary.
1268 with a None value in the returned dictionary.
1269
1269
1270 The result is returned as a dict with filename as key and
1270 The result is returned as a dict with filename as key and
1271 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1271 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1272 nodeid in the current/other manifest and fl1/fl2 is the flag
1272 nodeid in the current/other manifest and fl1/fl2 is the flag
1273 in the current/other manifest. Where the file does not exist,
1273 in the current/other manifest. Where the file does not exist,
1274 the nodeid will be None and the flags will be the empty
1274 the nodeid will be None and the flags will be the empty
1275 string.
1275 string.
1276 """
1276 """
1277 if match and not match.always():
1277 if match and not match.always():
1278 m1 = self._matches(match)
1278 m1 = self._matches(match)
1279 m2 = m2._matches(match)
1279 m2 = m2._matches(match)
1280 return m1.diff(m2, clean=clean)
1280 return m1.diff(m2, clean=clean)
1281 result = {}
1281 result = {}
1282 emptytree = treemanifest(self.nodeconstants)
1282 emptytree = treemanifest(self.nodeconstants)
1283
1283
1284 def _iterativediff(t1, t2, stack):
1284 def _iterativediff(t1, t2, stack):
1285 """compares two tree manifests and append new tree-manifests which
1285 """compares two tree manifests and append new tree-manifests which
1286 needs to be compared to stack"""
1286 needs to be compared to stack"""
1287 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1287 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1288 return
1288 return
1289 t1._load()
1289 t1._load()
1290 t2._load()
1290 t2._load()
1291 self._loaddifflazy(t1, t2)
1291 self._loaddifflazy(t1, t2)
1292
1292
1293 for d, m1 in t1._dirs.items():
1293 for d, m1 in t1._dirs.items():
1294 m2 = t2._dirs.get(d, emptytree)
1294 m2 = t2._dirs.get(d, emptytree)
1295 stack.append((m1, m2))
1295 stack.append((m1, m2))
1296
1296
1297 for d, m2 in t2._dirs.items():
1297 for d, m2 in t2._dirs.items():
1298 if d not in t1._dirs:
1298 if d not in t1._dirs:
1299 stack.append((emptytree, m2))
1299 stack.append((emptytree, m2))
1300
1300
1301 for fn, n1 in t1._files.items():
1301 for fn, n1 in t1._files.items():
1302 fl1 = t1._flags.get(fn, b'')
1302 fl1 = t1._flags.get(fn, b'')
1303 n2 = t2._files.get(fn, None)
1303 n2 = t2._files.get(fn, None)
1304 fl2 = t2._flags.get(fn, b'')
1304 fl2 = t2._flags.get(fn, b'')
1305 if n1 != n2 or fl1 != fl2:
1305 if n1 != n2 or fl1 != fl2:
1306 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1306 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1307 elif clean:
1307 elif clean:
1308 result[t1._subpath(fn)] = None
1308 result[t1._subpath(fn)] = None
1309
1309
1310 for fn, n2 in t2._files.items():
1310 for fn, n2 in t2._files.items():
1311 if fn not in t1._files:
1311 if fn not in t1._files:
1312 fl2 = t2._flags.get(fn, b'')
1312 fl2 = t2._flags.get(fn, b'')
1313 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1313 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1314
1314
1315 stackls = []
1315 stackls = []
1316 _iterativediff(self, m2, stackls)
1316 _iterativediff(self, m2, stackls)
1317 while stackls:
1317 while stackls:
1318 t1, t2 = stackls.pop()
1318 t1, t2 = stackls.pop()
1319 # stackls is populated in the function call
1319 # stackls is populated in the function call
1320 _iterativediff(t1, t2, stackls)
1320 _iterativediff(t1, t2, stackls)
1321 return result
1321 return result
1322
1322
1323 def unmodifiedsince(self, m2):
1323 def unmodifiedsince(self, m2):
1324 return not self._dirty and not m2._dirty and self._node == m2._node
1324 return not self._dirty and not m2._dirty and self._node == m2._node
1325
1325
1326 def parse(self, text, readsubtree):
1326 def parse(self, text, readsubtree):
1327 selflazy = self._lazydirs
1327 selflazy = self._lazydirs
1328 for f, n, fl in _parse(self._nodelen, text):
1328 for f, n, fl in _parse(self._nodelen, text):
1329 if fl == b't':
1329 if fl == b't':
1330 f = f + b'/'
1330 f = f + b'/'
1331 # False below means "doesn't need to be copied" and can use the
1331 # False below means "doesn't need to be copied" and can use the
1332 # cached value from readsubtree directly.
1332 # cached value from readsubtree directly.
1333 selflazy[f] = (n, readsubtree, False)
1333 selflazy[f] = (n, readsubtree, False)
1334 elif b'/' in f:
1334 elif b'/' in f:
1335 # This is a flat manifest, so use __setitem__ and setflag rather
1335 # This is a flat manifest, so use __setitem__ and setflag rather
1336 # than assigning directly to _files and _flags, so we can
1336 # than assigning directly to _files and _flags, so we can
1337 # assign a path in a subdirectory, and to mark dirty (compared
1337 # assign a path in a subdirectory, and to mark dirty (compared
1338 # to nullid).
1338 # to nullid).
1339 self[f] = n
1339 self[f] = n
1340 if fl:
1340 if fl:
1341 self.setflag(f, fl)
1341 self.setflag(f, fl)
1342 else:
1342 else:
1343 # Assigning to _files and _flags avoids marking as dirty,
1343 # Assigning to _files and _flags avoids marking as dirty,
1344 # and should be a little faster.
1344 # and should be a little faster.
1345 self._files[f] = n
1345 self._files[f] = n
1346 if fl:
1346 if fl:
1347 self._flags[f] = fl
1347 self._flags[f] = fl
1348
1348
1349 def text(self):
1349 def text(self):
1350 """Get the full data of this manifest as a bytestring."""
1350 """Get the full data of this manifest as a bytestring."""
1351 self._load()
1351 self._load()
1352 return _text(self.iterentries())
1352 return _text(self.iterentries())
1353
1353
1354 def dirtext(self):
1354 def dirtext(self):
1355 """Get the full data of this directory as a bytestring. Make sure that
1355 """Get the full data of this directory as a bytestring. Make sure that
1356 any submanifests have been written first, so their nodeids are correct.
1356 any submanifests have been written first, so their nodeids are correct.
1357 """
1357 """
1358 self._load()
1358 self._load()
1359 flags = self.flags
1359 flags = self.flags
1360 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1360 lazydirs = [(d[:-1], v[0], b't') for d, v in self._lazydirs.items()]
1361 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1361 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1362 files = [(f, self._files[f], flags(f)) for f in self._files]
1362 files = [(f, self._files[f], flags(f)) for f in self._files]
1363 return _text(sorted(dirs + files + lazydirs))
1363 return _text(sorted(dirs + files + lazydirs))
1364
1364
1365 def read(self, gettext, readsubtree):
1365 def read(self, gettext, readsubtree):
1366 def _load_for_read(s):
1366 def _load_for_read(s):
1367 s.parse(gettext(), readsubtree)
1367 s.parse(gettext(), readsubtree)
1368 s._dirty = False
1368 s._dirty = False
1369
1369
1370 self._loadfunc = _load_for_read
1370 self._loadfunc = _load_for_read
1371
1371
1372 def writesubtrees(self, m1, m2, writesubtree, match):
1372 def writesubtrees(self, m1, m2, writesubtree, match):
1373 self._load() # for consistency; should never have any effect here
1373 self._load() # for consistency; should never have any effect here
1374 m1._load()
1374 m1._load()
1375 m2._load()
1375 m2._load()
1376 emptytree = treemanifest(self.nodeconstants)
1376 emptytree = treemanifest(self.nodeconstants)
1377
1377
1378 def getnode(m, d):
1378 def getnode(m, d):
1379 ld = m._lazydirs.get(d)
1379 ld = m._lazydirs.get(d)
1380 if ld:
1380 if ld:
1381 return ld[0]
1381 return ld[0]
1382 return m._dirs.get(d, emptytree)._node
1382 return m._dirs.get(d, emptytree)._node
1383
1383
1384 # let's skip investigating things that `match` says we do not need.
1384 # let's skip investigating things that `match` says we do not need.
1385 visit = match.visitchildrenset(self._dir[:-1])
1385 visit = match.visitchildrenset(self._dir[:-1])
1386 visit = self._loadchildrensetlazy(visit)
1386 visit = self._loadchildrensetlazy(visit)
1387 if visit == b'this' or visit == b'all':
1387 if visit == b'this' or visit == b'all':
1388 visit = None
1388 visit = None
1389 for d, subm in self._dirs.items():
1389 for d, subm in self._dirs.items():
1390 if visit and d[:-1] not in visit:
1390 if visit and d[:-1] not in visit:
1391 continue
1391 continue
1392 subp1 = getnode(m1, d)
1392 subp1 = getnode(m1, d)
1393 subp2 = getnode(m2, d)
1393 subp2 = getnode(m2, d)
1394 if subp1 == self.nodeconstants.nullid:
1394 if subp1 == self.nodeconstants.nullid:
1395 subp1, subp2 = subp2, subp1
1395 subp1, subp2 = subp2, subp1
1396 writesubtree(subm, subp1, subp2, match)
1396 writesubtree(subm, subp1, subp2, match)
1397
1397
1398 def walksubtrees(self, matcher=None):
1398 def walksubtrees(self, matcher=None):
1399 """Returns an iterator of the subtrees of this manifest, including this
1399 """Returns an iterator of the subtrees of this manifest, including this
1400 manifest itself.
1400 manifest itself.
1401
1401
1402 If `matcher` is provided, it only returns subtrees that match.
1402 If `matcher` is provided, it only returns subtrees that match.
1403 """
1403 """
1404 if matcher and not matcher.visitdir(self._dir[:-1]):
1404 if matcher and not matcher.visitdir(self._dir[:-1]):
1405 return
1405 return
1406 if not matcher or matcher(self._dir[:-1]):
1406 if not matcher or matcher(self._dir[:-1]):
1407 yield self
1407 yield self
1408
1408
1409 self._load()
1409 self._load()
1410 # OPT: use visitchildrenset to avoid loading everything.
1410 # OPT: use visitchildrenset to avoid loading everything.
1411 self._loadalllazy()
1411 self._loadalllazy()
1412 for d, subm in self._dirs.items():
1412 for d, subm in self._dirs.items():
1413 for subtree in subm.walksubtrees(matcher=matcher):
1413 for subtree in subm.walksubtrees(matcher=matcher):
1414 yield subtree
1414 yield subtree
1415
1415
1416
1416
1417 class manifestfulltextcache(util.lrucachedict):
1417 class manifestfulltextcache(util.lrucachedict):
1418 """File-backed LRU cache for the manifest cache
1418 """File-backed LRU cache for the manifest cache
1419
1419
1420 File consists of entries, up to EOF:
1420 File consists of entries, up to EOF:
1421
1421
1422 - 20 bytes node, 4 bytes length, <length> manifest data
1422 - 20 bytes node, 4 bytes length, <length> manifest data
1423
1423
1424 These are written in reverse cache order (oldest to newest).
1424 These are written in reverse cache order (oldest to newest).
1425
1425
1426 """
1426 """
1427
1427
1428 _file = b'manifestfulltextcache'
1428 _file = b'manifestfulltextcache'
1429
1429
1430 def __init__(self, max):
1430 def __init__(self, max):
1431 super(manifestfulltextcache, self).__init__(max)
1431 super(manifestfulltextcache, self).__init__(max)
1432 self._dirty = False
1432 self._dirty = False
1433 self._read = False
1433 self._read = False
1434 self._opener = None
1434 self._opener = None
1435
1435
1436 def read(self):
1436 def read(self):
1437 if self._read or self._opener is None:
1437 if self._read or self._opener is None:
1438 return
1438 return
1439
1439
1440 try:
1440 try:
1441 with self._opener(self._file) as fp:
1441 with self._opener(self._file) as fp:
1442 set = super(manifestfulltextcache, self).__setitem__
1442 set = super(manifestfulltextcache, self).__setitem__
1443 # ignore trailing data, this is a cache, corruption is skipped
1443 # ignore trailing data, this is a cache, corruption is skipped
1444 while True:
1444 while True:
1445 # TODO do we need to do work here for sha1 portability?
1445 # TODO do we need to do work here for sha1 portability?
1446 node = fp.read(20)
1446 node = fp.read(20)
1447 if len(node) < 20:
1447 if len(node) < 20:
1448 break
1448 break
1449 try:
1449 try:
1450 size = struct.unpack(b'>L', fp.read(4))[0]
1450 size = struct.unpack(b'>L', fp.read(4))[0]
1451 except struct.error:
1451 except struct.error:
1452 break
1452 break
1453 value = bytearray(fp.read(size))
1453 value = bytearray(fp.read(size))
1454 if len(value) != size:
1454 if len(value) != size:
1455 break
1455 break
1456 set(node, value)
1456 set(node, value)
1457 except IOError:
1457 except IOError:
1458 # the file is allowed to be missing
1458 # the file is allowed to be missing
1459 pass
1459 pass
1460
1460
1461 self._read = True
1461 self._read = True
1462 self._dirty = False
1462 self._dirty = False
1463
1463
1464 def write(self):
1464 def write(self):
1465 if not self._dirty or self._opener is None:
1465 if not self._dirty or self._opener is None:
1466 return
1466 return
1467 # rotate backwards to the first used node
1467 # rotate backwards to the first used node
1468 try:
1468 try:
1469 with self._opener(
1469 with self._opener(
1470 self._file, b'w', atomictemp=True, checkambig=True
1470 self._file, b'w', atomictemp=True, checkambig=True
1471 ) as fp:
1471 ) as fp:
1472 node = self._head.prev
1472 node = self._head.prev
1473 while True:
1473 while True:
1474 if node.key in self._cache:
1474 if node.key in self._cache:
1475 fp.write(node.key)
1475 fp.write(node.key)
1476 fp.write(struct.pack(b'>L', len(node.value)))
1476 fp.write(struct.pack(b'>L', len(node.value)))
1477 fp.write(node.value)
1477 fp.write(node.value)
1478 if node is self._head:
1478 if node is self._head:
1479 break
1479 break
1480 node = node.prev
1480 node = node.prev
1481 except IOError:
1481 except IOError:
1482 # We could not write the cache (eg: permission error)
1482 # We could not write the cache (eg: permission error)
1483 # the content can be missing.
1483 # the content can be missing.
1484 #
1484 #
1485 # We could try harder and see if we could recreate a wcache
1485 # We could try harder and see if we could recreate a wcache
1486 # directory were we coudl write too.
1486 # directory were we coudl write too.
1487 #
1487 #
1488 # XXX the error pass silently, having some way to issue an error
1488 # XXX the error pass silently, having some way to issue an error
1489 # log `ui.log` would be nice.
1489 # log `ui.log` would be nice.
1490 pass
1490 pass
1491
1491
1492 def __len__(self):
1492 def __len__(self):
1493 if not self._read:
1493 if not self._read:
1494 self.read()
1494 self.read()
1495 return super(manifestfulltextcache, self).__len__()
1495 return super(manifestfulltextcache, self).__len__()
1496
1496
1497 def __contains__(self, k):
1497 def __contains__(self, k):
1498 if not self._read:
1498 if not self._read:
1499 self.read()
1499 self.read()
1500 return super(manifestfulltextcache, self).__contains__(k)
1500 return super(manifestfulltextcache, self).__contains__(k)
1501
1501
1502 def __iter__(self):
1502 def __iter__(self):
1503 if not self._read:
1503 if not self._read:
1504 self.read()
1504 self.read()
1505 return super(manifestfulltextcache, self).__iter__()
1505 return super(manifestfulltextcache, self).__iter__()
1506
1506
1507 def __getitem__(self, k):
1507 def __getitem__(self, k):
1508 if not self._read:
1508 if not self._read:
1509 self.read()
1509 self.read()
1510 # the cache lru order can change on read
1510 # the cache lru order can change on read
1511 setdirty = self._cache.get(k) is not self._head
1511 setdirty = self._cache.get(k) is not self._head
1512 value = super(manifestfulltextcache, self).__getitem__(k)
1512 value = super(manifestfulltextcache, self).__getitem__(k)
1513 if setdirty:
1513 if setdirty:
1514 self._dirty = True
1514 self._dirty = True
1515 return value
1515 return value
1516
1516
1517 def __setitem__(self, k, v):
1517 def __setitem__(self, k, v):
1518 if not self._read:
1518 if not self._read:
1519 self.read()
1519 self.read()
1520 super(manifestfulltextcache, self).__setitem__(k, v)
1520 super(manifestfulltextcache, self).__setitem__(k, v)
1521 self._dirty = True
1521 self._dirty = True
1522
1522
1523 def __delitem__(self, k):
1523 def __delitem__(self, k):
1524 if not self._read:
1524 if not self._read:
1525 self.read()
1525 self.read()
1526 super(manifestfulltextcache, self).__delitem__(k)
1526 super(manifestfulltextcache, self).__delitem__(k)
1527 self._dirty = True
1527 self._dirty = True
1528
1528
1529 def get(self, k, default=None):
1529 def get(self, k, default=None):
1530 if not self._read:
1530 if not self._read:
1531 self.read()
1531 self.read()
1532 return super(manifestfulltextcache, self).get(k, default=default)
1532 return super(manifestfulltextcache, self).get(k, default=default)
1533
1533
1534 def clear(self, clear_persisted_data=False):
1534 def clear(self, clear_persisted_data=False):
1535 super(manifestfulltextcache, self).clear()
1535 super(manifestfulltextcache, self).clear()
1536 if clear_persisted_data:
1536 if clear_persisted_data:
1537 self._dirty = True
1537 self._dirty = True
1538 self.write()
1538 self.write()
1539 self._read = False
1539 self._read = False
1540
1540
1541
1541
1542 # and upper bound of what we expect from compression
1542 # and upper bound of what we expect from compression
1543 # (real live value seems to be "3")
1543 # (real live value seems to be "3")
1544 MAXCOMPRESSION = 3
1544 MAXCOMPRESSION = 3
1545
1545
1546
1546
1547 class FastdeltaUnavailable(Exception):
1547 class FastdeltaUnavailable(Exception):
1548 """Exception raised when fastdelta isn't usable on a manifest."""
1548 """Exception raised when fastdelta isn't usable on a manifest."""
1549
1549
1550
1550
1551 @interfaceutil.implementer(repository.imanifeststorage)
1551 @interfaceutil.implementer(repository.imanifeststorage)
1552 class manifestrevlog:
1552 class manifestrevlog:
1553 """A revlog that stores manifest texts. This is responsible for caching the
1553 """A revlog that stores manifest texts. This is responsible for caching the
1554 full-text manifest contents.
1554 full-text manifest contents.
1555 """
1555 """
1556
1556
1557 def __init__(
1557 def __init__(
1558 self,
1558 self,
1559 nodeconstants,
1559 nodeconstants,
1560 opener,
1560 opener,
1561 tree=b'',
1561 tree=b'',
1562 dirlogcache=None,
1562 dirlogcache=None,
1563 treemanifest=False,
1563 treemanifest=False,
1564 ):
1564 ):
1565 """Constructs a new manifest revlog
1565 """Constructs a new manifest revlog
1566
1566
1567 `indexfile` - used by extensions to have two manifests at once, like
1567 `indexfile` - used by extensions to have two manifests at once, like
1568 when transitioning between flatmanifeset and treemanifests.
1568 when transitioning between flatmanifeset and treemanifests.
1569
1569
1570 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1570 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1571 options can also be used to make this a tree manifest revlog. The opener
1571 options can also be used to make this a tree manifest revlog. The opener
1572 option takes precedence, so if it is set to True, we ignore whatever
1572 option takes precedence, so if it is set to True, we ignore whatever
1573 value is passed in to the constructor.
1573 value is passed in to the constructor.
1574 """
1574 """
1575 self.nodeconstants = nodeconstants
1575 self.nodeconstants = nodeconstants
1576 # During normal operations, we expect to deal with not more than four
1576 # During normal operations, we expect to deal with not more than four
1577 # revs at a time (such as during commit --amend). When rebasing large
1577 # revs at a time (such as during commit --amend). When rebasing large
1578 # stacks of commits, the number can go up, hence the config knob below.
1578 # stacks of commits, the number can go up, hence the config knob below.
1579 cachesize = 4
1579 cachesize = 4
1580 optiontreemanifest = False
1580 optiontreemanifest = False
1581 opts = getattr(opener, 'options', None)
1581 opts = getattr(opener, 'options', None)
1582 if opts is not None:
1582 if opts is not None:
1583 cachesize = opts.get(b'manifestcachesize', cachesize)
1583 cachesize = opts.get(b'manifestcachesize', cachesize)
1584 optiontreemanifest = opts.get(b'treemanifest', False)
1584 optiontreemanifest = opts.get(b'treemanifest', False)
1585
1585
1586 self._treeondisk = optiontreemanifest or treemanifest
1586 self._treeondisk = optiontreemanifest or treemanifest
1587
1587
1588 self._fulltextcache = manifestfulltextcache(cachesize)
1588 self._fulltextcache = manifestfulltextcache(cachesize)
1589
1589
1590 if tree:
1590 if tree:
1591 assert self._treeondisk, (tree, b'opts is %r' % opts)
1591 assert self._treeondisk, (tree, b'opts is %r' % opts)
1592
1592
1593 radix = b'00manifest'
1593 radix = b'00manifest'
1594 if tree:
1594 if tree:
1595 radix = b"meta/" + tree + radix
1595 radix = b"meta/" + tree + radix
1596
1596
1597 self.tree = tree
1597 self.tree = tree
1598
1598
1599 # The dirlogcache is kept on the root manifest log
1599 # The dirlogcache is kept on the root manifest log
1600 if tree:
1600 if tree:
1601 self._dirlogcache = dirlogcache
1601 self._dirlogcache = dirlogcache
1602 else:
1602 else:
1603 self._dirlogcache = {b'': self}
1603 self._dirlogcache = {b'': self}
1604
1604
1605 self._revlog = revlog.revlog(
1605 self._revlog = revlog.revlog(
1606 opener,
1606 opener,
1607 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1607 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1608 radix=radix,
1608 radix=radix,
1609 # only root indexfile is cached
1609 # only root indexfile is cached
1610 checkambig=not bool(tree),
1610 checkambig=not bool(tree),
1611 mmaplargeindex=True,
1611 mmaplargeindex=True,
1612 upperboundcomp=MAXCOMPRESSION,
1612 upperboundcomp=MAXCOMPRESSION,
1613 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1613 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1614 )
1614 )
1615
1615
1616 self.index = self._revlog.index
1616 self.index = self._revlog.index
1617 self._generaldelta = self._revlog._generaldelta
1617 self._generaldelta = self._revlog._generaldelta
1618
1618
1619 def get_revlog(self):
1619 def get_revlog(self):
1620 """return an actual revlog instance if any
1620 """return an actual revlog instance if any
1621
1621
1622 This exist because a lot of code leverage the fact the underlying
1622 This exist because a lot of code leverage the fact the underlying
1623 storage is a revlog for optimization, so giving simple way to access
1623 storage is a revlog for optimization, so giving simple way to access
1624 the revlog instance helps such code.
1624 the revlog instance helps such code.
1625 """
1625 """
1626 return self._revlog
1626 return self._revlog
1627
1627
1628 def _setupmanifestcachehooks(self, repo):
1628 def _setupmanifestcachehooks(self, repo):
1629 """Persist the manifestfulltextcache on lock release"""
1629 """Persist the manifestfulltextcache on lock release"""
1630 if not hasattr(repo, '_wlockref'):
1630 if not hasattr(repo, '_wlockref'):
1631 return
1631 return
1632
1632
1633 self._fulltextcache._opener = repo.wcachevfs
1633 self._fulltextcache._opener = repo.wcachevfs
1634 if repo._currentlock(repo._wlockref) is None:
1634 if repo._currentlock(repo._wlockref) is None:
1635 return
1635 return
1636
1636
1637 reporef = weakref.ref(repo)
1637 reporef = weakref.ref(repo)
1638 manifestrevlogref = weakref.ref(self)
1638 manifestrevlogref = weakref.ref(self)
1639
1639
1640 def persistmanifestcache(success):
1640 def persistmanifestcache(success):
1641 # Repo is in an unknown state, do not persist.
1641 # Repo is in an unknown state, do not persist.
1642 if not success:
1642 if not success:
1643 return
1643 return
1644
1644
1645 repo = reporef()
1645 repo = reporef()
1646 self = manifestrevlogref()
1646 self = manifestrevlogref()
1647 if repo is None or self is None:
1647 if repo is None or self is None:
1648 return
1648 return
1649 if repo.manifestlog.getstorage(b'') is not self:
1649 if repo.manifestlog.getstorage(b'') is not self:
1650 # there's a different manifest in play now, abort
1650 # there's a different manifest in play now, abort
1651 return
1651 return
1652 self._fulltextcache.write()
1652 self._fulltextcache.write()
1653
1653
1654 repo._afterlock(persistmanifestcache)
1654 repo._afterlock(persistmanifestcache)
1655
1655
1656 @property
1656 @property
1657 def fulltextcache(self):
1657 def fulltextcache(self):
1658 return self._fulltextcache
1658 return self._fulltextcache
1659
1659
1660 def clearcaches(self, clear_persisted_data=False):
1660 def clearcaches(self, clear_persisted_data=False):
1661 self._revlog.clearcaches()
1661 self._revlog.clearcaches()
1662 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1662 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1663 self._dirlogcache = {self.tree: self}
1663 self._dirlogcache = {self.tree: self}
1664
1664
1665 def dirlog(self, d):
1665 def dirlog(self, d):
1666 if d:
1666 if d:
1667 assert self._treeondisk
1667 assert self._treeondisk
1668 if d not in self._dirlogcache:
1668 if d not in self._dirlogcache:
1669 mfrevlog = manifestrevlog(
1669 mfrevlog = manifestrevlog(
1670 self.nodeconstants,
1670 self.nodeconstants,
1671 self.opener,
1671 self.opener,
1672 d,
1672 d,
1673 self._dirlogcache,
1673 self._dirlogcache,
1674 treemanifest=self._treeondisk,
1674 treemanifest=self._treeondisk,
1675 )
1675 )
1676 self._dirlogcache[d] = mfrevlog
1676 self._dirlogcache[d] = mfrevlog
1677 return self._dirlogcache[d]
1677 return self._dirlogcache[d]
1678
1678
1679 def add(
1679 def add(
1680 self,
1680 self,
1681 m,
1681 m,
1682 transaction,
1682 transaction,
1683 link,
1683 link,
1684 p1,
1684 p1,
1685 p2,
1685 p2,
1686 added,
1686 added,
1687 removed,
1687 removed,
1688 readtree=None,
1688 readtree=None,
1689 match=None,
1689 match=None,
1690 ):
1690 ):
1691 """add some manifest entry in to the manifest log
1691 """add some manifest entry in to the manifest log
1692
1692
1693 input:
1693 input:
1694
1694
1695 m: the manifest dict we want to store
1695 m: the manifest dict we want to store
1696 transaction: the open transaction
1696 transaction: the open transaction
1697 p1: manifest-node of p1
1697 p1: manifest-node of p1
1698 p2: manifest-node of p2
1698 p2: manifest-node of p2
1699 added: file added/changed compared to parent
1699 added: file added/changed compared to parent
1700 removed: file removed compared to parent
1700 removed: file removed compared to parent
1701
1701
1702 tree manifest input:
1702 tree manifest input:
1703
1703
1704 readtree: a function to read a subtree
1704 readtree: a function to read a subtree
1705 match: a filematcher for the subpart of the tree manifest
1705 match: a filematcher for the subpart of the tree manifest
1706 """
1706 """
1707 try:
1707 try:
1708 if p1 not in self.fulltextcache:
1708 if p1 not in self.fulltextcache:
1709 raise FastdeltaUnavailable()
1709 raise FastdeltaUnavailable()
1710 # If our first parent is in the manifest cache, we can
1710 # If our first parent is in the manifest cache, we can
1711 # compute a delta here using properties we know about the
1711 # compute a delta here using properties we know about the
1712 # manifest up-front, which may save time later for the
1712 # manifest up-front, which may save time later for the
1713 # revlog layer.
1713 # revlog layer.
1714
1714
1715 _checkforbidden(added)
1715 _checkforbidden(added)
1716 # combine the changed lists into one sorted iterator
1716 # combine the changed lists into one sorted iterator
1717 work = heapq.merge(
1717 work = heapq.merge(
1718 [(x, False) for x in sorted(added)],
1718 [(x, False) for x in sorted(added)],
1719 [(x, True) for x in sorted(removed)],
1719 [(x, True) for x in sorted(removed)],
1720 )
1720 )
1721
1721
1722 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1722 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1723 cachedelta = self._revlog.rev(p1), deltatext
1723 cachedelta = self._revlog.rev(p1), deltatext
1724 text = util.buffer(arraytext)
1724 text = util.buffer(arraytext)
1725 rev = self._revlog.addrevision(
1725 rev = self._revlog.addrevision(
1726 text, transaction, link, p1, p2, cachedelta
1726 text, transaction, link, p1, p2, cachedelta
1727 )
1727 )
1728 n = self._revlog.node(rev)
1728 n = self._revlog.node(rev)
1729 except FastdeltaUnavailable:
1729 except FastdeltaUnavailable:
1730 # The first parent manifest isn't already loaded or the
1730 # The first parent manifest isn't already loaded or the
1731 # manifest implementation doesn't support fastdelta, so
1731 # manifest implementation doesn't support fastdelta, so
1732 # we'll just encode a fulltext of the manifest and pass
1732 # we'll just encode a fulltext of the manifest and pass
1733 # that through to the revlog layer, and let it handle the
1733 # that through to the revlog layer, and let it handle the
1734 # delta process.
1734 # delta process.
1735 if self._treeondisk:
1735 if self._treeondisk:
1736 assert readtree, b"readtree must be set for treemanifest writes"
1736 assert readtree, b"readtree must be set for treemanifest writes"
1737 assert match, b"match must be specified for treemanifest writes"
1737 assert match, b"match must be specified for treemanifest writes"
1738 m1 = readtree(self.tree, p1)
1738 m1 = readtree(self.tree, p1)
1739 m2 = readtree(self.tree, p2)
1739 m2 = readtree(self.tree, p2)
1740 n = self._addtree(
1740 n = self._addtree(
1741 m, transaction, link, m1, m2, readtree, match=match
1741 m, transaction, link, m1, m2, readtree, match=match
1742 )
1742 )
1743 arraytext = None
1743 arraytext = None
1744 else:
1744 else:
1745 text = m.text()
1745 text = m.text()
1746 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1746 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1747 n = self._revlog.node(rev)
1747 n = self._revlog.node(rev)
1748 arraytext = bytearray(text)
1748 arraytext = bytearray(text)
1749
1749
1750 if arraytext is not None:
1750 if arraytext is not None:
1751 self.fulltextcache[n] = arraytext
1751 self.fulltextcache[n] = arraytext
1752
1752
1753 return n
1753 return n
1754
1754
1755 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1755 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1756 # If the manifest is unchanged compared to one parent,
1756 # If the manifest is unchanged compared to one parent,
1757 # don't write a new revision
1757 # don't write a new revision
1758 if self.tree != b'' and (
1758 if self.tree != b'' and (
1759 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1759 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1760 ):
1760 ):
1761 return m.node()
1761 return m.node()
1762
1762
1763 def writesubtree(subm, subp1, subp2, match):
1763 def writesubtree(subm, subp1, subp2, match):
1764 sublog = self.dirlog(subm.dir())
1764 sublog = self.dirlog(subm.dir())
1765 sublog.add(
1765 sublog.add(
1766 subm,
1766 subm,
1767 transaction,
1767 transaction,
1768 link,
1768 link,
1769 subp1,
1769 subp1,
1770 subp2,
1770 subp2,
1771 None,
1771 None,
1772 None,
1772 None,
1773 readtree=readtree,
1773 readtree=readtree,
1774 match=match,
1774 match=match,
1775 )
1775 )
1776
1776
1777 m.writesubtrees(m1, m2, writesubtree, match)
1777 m.writesubtrees(m1, m2, writesubtree, match)
1778 text = m.dirtext()
1778 text = m.dirtext()
1779 n = None
1779 n = None
1780 if self.tree != b'':
1780 if self.tree != b'':
1781 # Double-check whether contents are unchanged to one parent
1781 # Double-check whether contents are unchanged to one parent
1782 if text == m1.dirtext():
1782 if text == m1.dirtext():
1783 n = m1.node()
1783 n = m1.node()
1784 elif text == m2.dirtext():
1784 elif text == m2.dirtext():
1785 n = m2.node()
1785 n = m2.node()
1786
1786
1787 if not n:
1787 if not n:
1788 rev = self._revlog.addrevision(
1788 rev = self._revlog.addrevision(
1789 text, transaction, link, m1.node(), m2.node()
1789 text, transaction, link, m1.node(), m2.node()
1790 )
1790 )
1791 n = self._revlog.node(rev)
1791 n = self._revlog.node(rev)
1792
1792
1793 # Save nodeid so parent manifest can calculate its nodeid
1793 # Save nodeid so parent manifest can calculate its nodeid
1794 m.setnode(n)
1794 m.setnode(n)
1795 return n
1795 return n
1796
1796
1797 def __len__(self):
1797 def __len__(self):
1798 return len(self._revlog)
1798 return len(self._revlog)
1799
1799
1800 def __iter__(self):
1800 def __iter__(self):
1801 return self._revlog.__iter__()
1801 return self._revlog.__iter__()
1802
1802
1803 def rev(self, node):
1803 def rev(self, node):
1804 return self._revlog.rev(node)
1804 return self._revlog.rev(node)
1805
1805
1806 def node(self, rev):
1806 def node(self, rev):
1807 return self._revlog.node(rev)
1807 return self._revlog.node(rev)
1808
1808
1809 def lookup(self, value):
1809 def lookup(self, value):
1810 return self._revlog.lookup(value)
1810 return self._revlog.lookup(value)
1811
1811
1812 def parentrevs(self, rev):
1812 def parentrevs(self, rev):
1813 return self._revlog.parentrevs(rev)
1813 return self._revlog.parentrevs(rev)
1814
1814
1815 def parents(self, node):
1815 def parents(self, node):
1816 return self._revlog.parents(node)
1816 return self._revlog.parents(node)
1817
1817
1818 def linkrev(self, rev):
1818 def linkrev(self, rev):
1819 return self._revlog.linkrev(rev)
1819 return self._revlog.linkrev(rev)
1820
1820
1821 def checksize(self):
1821 def checksize(self):
1822 return self._revlog.checksize()
1822 return self._revlog.checksize()
1823
1823
1824 def revision(self, node, _df=None):
1824 def revision(self, node):
1825 return self._revlog.revision(node, _df=_df)
1825 return self._revlog.revision(node)
1826
1826
1827 def rawdata(self, node, _df=None):
1827 def rawdata(self, node, _df=None):
1828 return self._revlog.rawdata(node, _df=_df)
1828 return self._revlog.rawdata(node, _df=_df)
1829
1829
1830 def revdiff(self, rev1, rev2):
1830 def revdiff(self, rev1, rev2):
1831 return self._revlog.revdiff(rev1, rev2)
1831 return self._revlog.revdiff(rev1, rev2)
1832
1832
1833 def cmp(self, node, text):
1833 def cmp(self, node, text):
1834 return self._revlog.cmp(node, text)
1834 return self._revlog.cmp(node, text)
1835
1835
1836 def deltaparent(self, rev):
1836 def deltaparent(self, rev):
1837 return self._revlog.deltaparent(rev)
1837 return self._revlog.deltaparent(rev)
1838
1838
1839 def emitrevisions(
1839 def emitrevisions(
1840 self,
1840 self,
1841 nodes,
1841 nodes,
1842 nodesorder=None,
1842 nodesorder=None,
1843 revisiondata=False,
1843 revisiondata=False,
1844 assumehaveparentrevisions=False,
1844 assumehaveparentrevisions=False,
1845 deltamode=repository.CG_DELTAMODE_STD,
1845 deltamode=repository.CG_DELTAMODE_STD,
1846 sidedata_helpers=None,
1846 sidedata_helpers=None,
1847 debug_info=None,
1847 debug_info=None,
1848 ):
1848 ):
1849 return self._revlog.emitrevisions(
1849 return self._revlog.emitrevisions(
1850 nodes,
1850 nodes,
1851 nodesorder=nodesorder,
1851 nodesorder=nodesorder,
1852 revisiondata=revisiondata,
1852 revisiondata=revisiondata,
1853 assumehaveparentrevisions=assumehaveparentrevisions,
1853 assumehaveparentrevisions=assumehaveparentrevisions,
1854 deltamode=deltamode,
1854 deltamode=deltamode,
1855 sidedata_helpers=sidedata_helpers,
1855 sidedata_helpers=sidedata_helpers,
1856 debug_info=debug_info,
1856 debug_info=debug_info,
1857 )
1857 )
1858
1858
1859 def addgroup(
1859 def addgroup(
1860 self,
1860 self,
1861 deltas,
1861 deltas,
1862 linkmapper,
1862 linkmapper,
1863 transaction,
1863 transaction,
1864 alwayscache=False,
1864 alwayscache=False,
1865 addrevisioncb=None,
1865 addrevisioncb=None,
1866 duplicaterevisioncb=None,
1866 duplicaterevisioncb=None,
1867 debug_info=None,
1867 debug_info=None,
1868 delta_base_reuse_policy=None,
1868 delta_base_reuse_policy=None,
1869 ):
1869 ):
1870 return self._revlog.addgroup(
1870 return self._revlog.addgroup(
1871 deltas,
1871 deltas,
1872 linkmapper,
1872 linkmapper,
1873 transaction,
1873 transaction,
1874 alwayscache=alwayscache,
1874 alwayscache=alwayscache,
1875 addrevisioncb=addrevisioncb,
1875 addrevisioncb=addrevisioncb,
1876 duplicaterevisioncb=duplicaterevisioncb,
1876 duplicaterevisioncb=duplicaterevisioncb,
1877 debug_info=debug_info,
1877 debug_info=debug_info,
1878 delta_base_reuse_policy=delta_base_reuse_policy,
1878 delta_base_reuse_policy=delta_base_reuse_policy,
1879 )
1879 )
1880
1880
1881 def rawsize(self, rev):
1881 def rawsize(self, rev):
1882 return self._revlog.rawsize(rev)
1882 return self._revlog.rawsize(rev)
1883
1883
1884 def getstrippoint(self, minlink):
1884 def getstrippoint(self, minlink):
1885 return self._revlog.getstrippoint(minlink)
1885 return self._revlog.getstrippoint(minlink)
1886
1886
1887 def strip(self, minlink, transaction):
1887 def strip(self, minlink, transaction):
1888 return self._revlog.strip(minlink, transaction)
1888 return self._revlog.strip(minlink, transaction)
1889
1889
1890 def files(self):
1890 def files(self):
1891 return self._revlog.files()
1891 return self._revlog.files()
1892
1892
1893 def clone(self, tr, destrevlog, **kwargs):
1893 def clone(self, tr, destrevlog, **kwargs):
1894 if not isinstance(destrevlog, manifestrevlog):
1894 if not isinstance(destrevlog, manifestrevlog):
1895 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1895 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1896
1896
1897 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1897 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1898
1898
1899 def storageinfo(
1899 def storageinfo(
1900 self,
1900 self,
1901 exclusivefiles=False,
1901 exclusivefiles=False,
1902 sharedfiles=False,
1902 sharedfiles=False,
1903 revisionscount=False,
1903 revisionscount=False,
1904 trackedsize=False,
1904 trackedsize=False,
1905 storedsize=False,
1905 storedsize=False,
1906 ):
1906 ):
1907 return self._revlog.storageinfo(
1907 return self._revlog.storageinfo(
1908 exclusivefiles=exclusivefiles,
1908 exclusivefiles=exclusivefiles,
1909 sharedfiles=sharedfiles,
1909 sharedfiles=sharedfiles,
1910 revisionscount=revisionscount,
1910 revisionscount=revisionscount,
1911 trackedsize=trackedsize,
1911 trackedsize=trackedsize,
1912 storedsize=storedsize,
1912 storedsize=storedsize,
1913 )
1913 )
1914
1914
1915 @property
1915 @property
1916 def opener(self):
1916 def opener(self):
1917 return self._revlog.opener
1917 return self._revlog.opener
1918
1918
1919 @opener.setter
1919 @opener.setter
1920 def opener(self, value):
1920 def opener(self, value):
1921 self._revlog.opener = value
1921 self._revlog.opener = value
1922
1922
1923
1923
1924 @interfaceutil.implementer(repository.imanifestlog)
1924 @interfaceutil.implementer(repository.imanifestlog)
1925 class manifestlog:
1925 class manifestlog:
1926 """A collection class representing the collection of manifest snapshots
1926 """A collection class representing the collection of manifest snapshots
1927 referenced by commits in the repository.
1927 referenced by commits in the repository.
1928
1928
1929 In this situation, 'manifest' refers to the abstract concept of a snapshot
1929 In this situation, 'manifest' refers to the abstract concept of a snapshot
1930 of the list of files in the given commit. Consumers of the output of this
1930 of the list of files in the given commit. Consumers of the output of this
1931 class do not care about the implementation details of the actual manifests
1931 class do not care about the implementation details of the actual manifests
1932 they receive (i.e. tree or flat or lazily loaded, etc)."""
1932 they receive (i.e. tree or flat or lazily loaded, etc)."""
1933
1933
1934 def __init__(self, opener, repo, rootstore, narrowmatch):
1934 def __init__(self, opener, repo, rootstore, narrowmatch):
1935 self.nodeconstants = repo.nodeconstants
1935 self.nodeconstants = repo.nodeconstants
1936 usetreemanifest = False
1936 usetreemanifest = False
1937 cachesize = 4
1937 cachesize = 4
1938
1938
1939 opts = getattr(opener, 'options', None)
1939 opts = getattr(opener, 'options', None)
1940 if opts is not None:
1940 if opts is not None:
1941 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1941 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1942 cachesize = opts.get(b'manifestcachesize', cachesize)
1942 cachesize = opts.get(b'manifestcachesize', cachesize)
1943
1943
1944 self._treemanifests = usetreemanifest
1944 self._treemanifests = usetreemanifest
1945
1945
1946 self._rootstore = rootstore
1946 self._rootstore = rootstore
1947 self._rootstore._setupmanifestcachehooks(repo)
1947 self._rootstore._setupmanifestcachehooks(repo)
1948 self._narrowmatch = narrowmatch
1948 self._narrowmatch = narrowmatch
1949
1949
1950 # A cache of the manifestctx or treemanifestctx for each directory
1950 # A cache of the manifestctx or treemanifestctx for each directory
1951 self._dirmancache = {}
1951 self._dirmancache = {}
1952 self._dirmancache[b''] = util.lrucachedict(cachesize)
1952 self._dirmancache[b''] = util.lrucachedict(cachesize)
1953
1953
1954 self._cachesize = cachesize
1954 self._cachesize = cachesize
1955
1955
1956 def __getitem__(self, node):
1956 def __getitem__(self, node):
1957 """Retrieves the manifest instance for the given node. Throws a
1957 """Retrieves the manifest instance for the given node. Throws a
1958 LookupError if not found.
1958 LookupError if not found.
1959 """
1959 """
1960 return self.get(b'', node)
1960 return self.get(b'', node)
1961
1961
1962 def get(self, tree, node, verify=True):
1962 def get(self, tree, node, verify=True):
1963 """Retrieves the manifest instance for the given node. Throws a
1963 """Retrieves the manifest instance for the given node. Throws a
1964 LookupError if not found.
1964 LookupError if not found.
1965
1965
1966 `verify` - if True an exception will be thrown if the node is not in
1966 `verify` - if True an exception will be thrown if the node is not in
1967 the revlog
1967 the revlog
1968 """
1968 """
1969 if node in self._dirmancache.get(tree, ()):
1969 if node in self._dirmancache.get(tree, ()):
1970 return self._dirmancache[tree][node]
1970 return self._dirmancache[tree][node]
1971
1971
1972 if not self._narrowmatch.always():
1972 if not self._narrowmatch.always():
1973 if not self._narrowmatch.visitdir(tree[:-1]):
1973 if not self._narrowmatch.visitdir(tree[:-1]):
1974 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1974 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1975 if tree:
1975 if tree:
1976 if self._rootstore._treeondisk:
1976 if self._rootstore._treeondisk:
1977 if verify:
1977 if verify:
1978 # Side-effect is LookupError is raised if node doesn't
1978 # Side-effect is LookupError is raised if node doesn't
1979 # exist.
1979 # exist.
1980 self.getstorage(tree).rev(node)
1980 self.getstorage(tree).rev(node)
1981
1981
1982 m = treemanifestctx(self, tree, node)
1982 m = treemanifestctx(self, tree, node)
1983 else:
1983 else:
1984 raise error.Abort(
1984 raise error.Abort(
1985 _(
1985 _(
1986 b"cannot ask for manifest directory '%s' in a flat "
1986 b"cannot ask for manifest directory '%s' in a flat "
1987 b"manifest"
1987 b"manifest"
1988 )
1988 )
1989 % tree
1989 % tree
1990 )
1990 )
1991 else:
1991 else:
1992 if verify:
1992 if verify:
1993 # Side-effect is LookupError is raised if node doesn't exist.
1993 # Side-effect is LookupError is raised if node doesn't exist.
1994 self._rootstore.rev(node)
1994 self._rootstore.rev(node)
1995
1995
1996 if self._treemanifests:
1996 if self._treemanifests:
1997 m = treemanifestctx(self, b'', node)
1997 m = treemanifestctx(self, b'', node)
1998 else:
1998 else:
1999 m = manifestctx(self, node)
1999 m = manifestctx(self, node)
2000
2000
2001 if node != self.nodeconstants.nullid:
2001 if node != self.nodeconstants.nullid:
2002 mancache = self._dirmancache.get(tree)
2002 mancache = self._dirmancache.get(tree)
2003 if not mancache:
2003 if not mancache:
2004 mancache = util.lrucachedict(self._cachesize)
2004 mancache = util.lrucachedict(self._cachesize)
2005 self._dirmancache[tree] = mancache
2005 self._dirmancache[tree] = mancache
2006 mancache[node] = m
2006 mancache[node] = m
2007 return m
2007 return m
2008
2008
2009 def getstorage(self, tree):
2009 def getstorage(self, tree):
2010 return self._rootstore.dirlog(tree)
2010 return self._rootstore.dirlog(tree)
2011
2011
2012 def clearcaches(self, clear_persisted_data=False):
2012 def clearcaches(self, clear_persisted_data=False):
2013 self._dirmancache.clear()
2013 self._dirmancache.clear()
2014 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2014 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2015
2015
2016 def rev(self, node):
2016 def rev(self, node):
2017 return self._rootstore.rev(node)
2017 return self._rootstore.rev(node)
2018
2018
2019 def update_caches(self, transaction):
2019 def update_caches(self, transaction):
2020 return self._rootstore._revlog.update_caches(transaction=transaction)
2020 return self._rootstore._revlog.update_caches(transaction=transaction)
2021
2021
2022
2022
2023 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2023 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2024 class memmanifestctx:
2024 class memmanifestctx:
2025 def __init__(self, manifestlog):
2025 def __init__(self, manifestlog):
2026 self._manifestlog = manifestlog
2026 self._manifestlog = manifestlog
2027 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2027 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2028
2028
2029 def _storage(self):
2029 def _storage(self):
2030 return self._manifestlog.getstorage(b'')
2030 return self._manifestlog.getstorage(b'')
2031
2031
2032 def copy(self):
2032 def copy(self):
2033 memmf = memmanifestctx(self._manifestlog)
2033 memmf = memmanifestctx(self._manifestlog)
2034 memmf._manifestdict = self.read().copy()
2034 memmf._manifestdict = self.read().copy()
2035 return memmf
2035 return memmf
2036
2036
2037 def read(self):
2037 def read(self):
2038 return self._manifestdict
2038 return self._manifestdict
2039
2039
2040 def write(self, transaction, link, p1, p2, added, removed, match=None):
2040 def write(self, transaction, link, p1, p2, added, removed, match=None):
2041 return self._storage().add(
2041 return self._storage().add(
2042 self._manifestdict,
2042 self._manifestdict,
2043 transaction,
2043 transaction,
2044 link,
2044 link,
2045 p1,
2045 p1,
2046 p2,
2046 p2,
2047 added,
2047 added,
2048 removed,
2048 removed,
2049 match=match,
2049 match=match,
2050 )
2050 )
2051
2051
2052
2052
2053 @interfaceutil.implementer(repository.imanifestrevisionstored)
2053 @interfaceutil.implementer(repository.imanifestrevisionstored)
2054 class manifestctx:
2054 class manifestctx:
2055 """A class representing a single revision of a manifest, including its
2055 """A class representing a single revision of a manifest, including its
2056 contents, its parent revs, and its linkrev.
2056 contents, its parent revs, and its linkrev.
2057 """
2057 """
2058
2058
2059 def __init__(self, manifestlog, node):
2059 def __init__(self, manifestlog, node):
2060 self._manifestlog = manifestlog
2060 self._manifestlog = manifestlog
2061 self._data = None
2061 self._data = None
2062
2062
2063 self._node = node
2063 self._node = node
2064
2064
2065 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2065 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2066 # but let's add it later when something needs it and we can load it
2066 # but let's add it later when something needs it and we can load it
2067 # lazily.
2067 # lazily.
2068 # self.p1, self.p2 = store.parents(node)
2068 # self.p1, self.p2 = store.parents(node)
2069 # rev = store.rev(node)
2069 # rev = store.rev(node)
2070 # self.linkrev = store.linkrev(rev)
2070 # self.linkrev = store.linkrev(rev)
2071
2071
2072 def _storage(self):
2072 def _storage(self):
2073 return self._manifestlog.getstorage(b'')
2073 return self._manifestlog.getstorage(b'')
2074
2074
2075 def node(self):
2075 def node(self):
2076 return self._node
2076 return self._node
2077
2077
2078 def copy(self):
2078 def copy(self):
2079 memmf = memmanifestctx(self._manifestlog)
2079 memmf = memmanifestctx(self._manifestlog)
2080 memmf._manifestdict = self.read().copy()
2080 memmf._manifestdict = self.read().copy()
2081 return memmf
2081 return memmf
2082
2082
2083 @propertycache
2083 @propertycache
2084 def parents(self):
2084 def parents(self):
2085 return self._storage().parents(self._node)
2085 return self._storage().parents(self._node)
2086
2086
2087 def read(self):
2087 def read(self):
2088 if self._data is None:
2088 if self._data is None:
2089 nc = self._manifestlog.nodeconstants
2089 nc = self._manifestlog.nodeconstants
2090 if self._node == nc.nullid:
2090 if self._node == nc.nullid:
2091 self._data = manifestdict(nc.nodelen)
2091 self._data = manifestdict(nc.nodelen)
2092 else:
2092 else:
2093 store = self._storage()
2093 store = self._storage()
2094 if self._node in store.fulltextcache:
2094 if self._node in store.fulltextcache:
2095 text = pycompat.bytestr(store.fulltextcache[self._node])
2095 text = pycompat.bytestr(store.fulltextcache[self._node])
2096 else:
2096 else:
2097 text = store.revision(self._node)
2097 text = store.revision(self._node)
2098 arraytext = bytearray(text)
2098 arraytext = bytearray(text)
2099 store.fulltextcache[self._node] = arraytext
2099 store.fulltextcache[self._node] = arraytext
2100 self._data = manifestdict(nc.nodelen, text)
2100 self._data = manifestdict(nc.nodelen, text)
2101 return self._data
2101 return self._data
2102
2102
2103 def readfast(self, shallow=False):
2103 def readfast(self, shallow=False):
2104 """Calls either readdelta or read, based on which would be less work.
2104 """Calls either readdelta or read, based on which would be less work.
2105 readdelta is called if the delta is against the p1, and therefore can be
2105 readdelta is called if the delta is against the p1, and therefore can be
2106 read quickly.
2106 read quickly.
2107
2107
2108 If `shallow` is True, nothing changes since this is a flat manifest.
2108 If `shallow` is True, nothing changes since this is a flat manifest.
2109 """
2109 """
2110 store = self._storage()
2110 store = self._storage()
2111 r = store.rev(self._node)
2111 r = store.rev(self._node)
2112 deltaparent = store.deltaparent(r)
2112 deltaparent = store.deltaparent(r)
2113 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2113 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2114 return self.readdelta()
2114 return self.readdelta()
2115 return self.read()
2115 return self.read()
2116
2116
2117 def readdelta(self, shallow=False):
2117 def readdelta(self, shallow=False):
2118 """Returns a manifest containing just the entries that are present
2118 """Returns a manifest containing just the entries that are present
2119 in this manifest, but not in its p1 manifest. This is efficient to read
2119 in this manifest, but not in its p1 manifest. This is efficient to read
2120 if the revlog delta is already p1.
2120 if the revlog delta is already p1.
2121
2121
2122 Changing the value of `shallow` has no effect on flat manifests.
2122 Changing the value of `shallow` has no effect on flat manifests.
2123 """
2123 """
2124 store = self._storage()
2124 store = self._storage()
2125 r = store.rev(self._node)
2125 r = store.rev(self._node)
2126 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2126 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2127 return manifestdict(store.nodeconstants.nodelen, d)
2127 return manifestdict(store.nodeconstants.nodelen, d)
2128
2128
2129 def find(self, key):
2129 def find(self, key):
2130 return self.read().find(key)
2130 return self.read().find(key)
2131
2131
2132
2132
2133 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2133 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2134 class memtreemanifestctx:
2134 class memtreemanifestctx:
2135 def __init__(self, manifestlog, dir=b''):
2135 def __init__(self, manifestlog, dir=b''):
2136 self._manifestlog = manifestlog
2136 self._manifestlog = manifestlog
2137 self._dir = dir
2137 self._dir = dir
2138 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2138 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2139
2139
2140 def _storage(self):
2140 def _storage(self):
2141 return self._manifestlog.getstorage(b'')
2141 return self._manifestlog.getstorage(b'')
2142
2142
2143 def copy(self):
2143 def copy(self):
2144 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2144 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2145 memmf._treemanifest = self._treemanifest.copy()
2145 memmf._treemanifest = self._treemanifest.copy()
2146 return memmf
2146 return memmf
2147
2147
2148 def read(self):
2148 def read(self):
2149 return self._treemanifest
2149 return self._treemanifest
2150
2150
2151 def write(self, transaction, link, p1, p2, added, removed, match=None):
2151 def write(self, transaction, link, p1, p2, added, removed, match=None):
2152 def readtree(dir, node):
2152 def readtree(dir, node):
2153 return self._manifestlog.get(dir, node).read()
2153 return self._manifestlog.get(dir, node).read()
2154
2154
2155 return self._storage().add(
2155 return self._storage().add(
2156 self._treemanifest,
2156 self._treemanifest,
2157 transaction,
2157 transaction,
2158 link,
2158 link,
2159 p1,
2159 p1,
2160 p2,
2160 p2,
2161 added,
2161 added,
2162 removed,
2162 removed,
2163 readtree=readtree,
2163 readtree=readtree,
2164 match=match,
2164 match=match,
2165 )
2165 )
2166
2166
2167
2167
2168 @interfaceutil.implementer(repository.imanifestrevisionstored)
2168 @interfaceutil.implementer(repository.imanifestrevisionstored)
2169 class treemanifestctx:
2169 class treemanifestctx:
2170 def __init__(self, manifestlog, dir, node):
2170 def __init__(self, manifestlog, dir, node):
2171 self._manifestlog = manifestlog
2171 self._manifestlog = manifestlog
2172 self._dir = dir
2172 self._dir = dir
2173 self._data = None
2173 self._data = None
2174
2174
2175 self._node = node
2175 self._node = node
2176
2176
2177 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2177 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2178 # we can instantiate treemanifestctx objects for directories we don't
2178 # we can instantiate treemanifestctx objects for directories we don't
2179 # have on disk.
2179 # have on disk.
2180 # self.p1, self.p2 = store.parents(node)
2180 # self.p1, self.p2 = store.parents(node)
2181 # rev = store.rev(node)
2181 # rev = store.rev(node)
2182 # self.linkrev = store.linkrev(rev)
2182 # self.linkrev = store.linkrev(rev)
2183
2183
2184 def _storage(self):
2184 def _storage(self):
2185 narrowmatch = self._manifestlog._narrowmatch
2185 narrowmatch = self._manifestlog._narrowmatch
2186 if not narrowmatch.always():
2186 if not narrowmatch.always():
2187 if not narrowmatch.visitdir(self._dir[:-1]):
2187 if not narrowmatch.visitdir(self._dir[:-1]):
2188 return excludedmanifestrevlog(
2188 return excludedmanifestrevlog(
2189 self._manifestlog.nodeconstants, self._dir
2189 self._manifestlog.nodeconstants, self._dir
2190 )
2190 )
2191 return self._manifestlog.getstorage(self._dir)
2191 return self._manifestlog.getstorage(self._dir)
2192
2192
2193 def read(self):
2193 def read(self):
2194 if self._data is None:
2194 if self._data is None:
2195 store = self._storage()
2195 store = self._storage()
2196 if self._node == self._manifestlog.nodeconstants.nullid:
2196 if self._node == self._manifestlog.nodeconstants.nullid:
2197 self._data = treemanifest(self._manifestlog.nodeconstants)
2197 self._data = treemanifest(self._manifestlog.nodeconstants)
2198 # TODO accessing non-public API
2198 # TODO accessing non-public API
2199 elif store._treeondisk:
2199 elif store._treeondisk:
2200 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2200 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2201
2201
2202 def gettext():
2202 def gettext():
2203 return store.revision(self._node)
2203 return store.revision(self._node)
2204
2204
2205 def readsubtree(dir, subm):
2205 def readsubtree(dir, subm):
2206 # Set verify to False since we need to be able to create
2206 # Set verify to False since we need to be able to create
2207 # subtrees for trees that don't exist on disk.
2207 # subtrees for trees that don't exist on disk.
2208 return self._manifestlog.get(dir, subm, verify=False).read()
2208 return self._manifestlog.get(dir, subm, verify=False).read()
2209
2209
2210 m.read(gettext, readsubtree)
2210 m.read(gettext, readsubtree)
2211 m.setnode(self._node)
2211 m.setnode(self._node)
2212 self._data = m
2212 self._data = m
2213 else:
2213 else:
2214 if self._node in store.fulltextcache:
2214 if self._node in store.fulltextcache:
2215 text = pycompat.bytestr(store.fulltextcache[self._node])
2215 text = pycompat.bytestr(store.fulltextcache[self._node])
2216 else:
2216 else:
2217 text = store.revision(self._node)
2217 text = store.revision(self._node)
2218 arraytext = bytearray(text)
2218 arraytext = bytearray(text)
2219 store.fulltextcache[self._node] = arraytext
2219 store.fulltextcache[self._node] = arraytext
2220 self._data = treemanifest(
2220 self._data = treemanifest(
2221 self._manifestlog.nodeconstants, dir=self._dir, text=text
2221 self._manifestlog.nodeconstants, dir=self._dir, text=text
2222 )
2222 )
2223
2223
2224 return self._data
2224 return self._data
2225
2225
2226 def node(self):
2226 def node(self):
2227 return self._node
2227 return self._node
2228
2228
2229 def copy(self):
2229 def copy(self):
2230 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2230 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2231 memmf._treemanifest = self.read().copy()
2231 memmf._treemanifest = self.read().copy()
2232 return memmf
2232 return memmf
2233
2233
2234 @propertycache
2234 @propertycache
2235 def parents(self):
2235 def parents(self):
2236 return self._storage().parents(self._node)
2236 return self._storage().parents(self._node)
2237
2237
2238 def readdelta(self, shallow=False):
2238 def readdelta(self, shallow=False):
2239 """Returns a manifest containing just the entries that are present
2239 """Returns a manifest containing just the entries that are present
2240 in this manifest, but not in its p1 manifest. This is efficient to read
2240 in this manifest, but not in its p1 manifest. This is efficient to read
2241 if the revlog delta is already p1.
2241 if the revlog delta is already p1.
2242
2242
2243 If `shallow` is True, this will read the delta for this directory,
2243 If `shallow` is True, this will read the delta for this directory,
2244 without recursively reading subdirectory manifests. Instead, any
2244 without recursively reading subdirectory manifests. Instead, any
2245 subdirectory entry will be reported as it appears in the manifest, i.e.
2245 subdirectory entry will be reported as it appears in the manifest, i.e.
2246 the subdirectory will be reported among files and distinguished only by
2246 the subdirectory will be reported among files and distinguished only by
2247 its 't' flag.
2247 its 't' flag.
2248 """
2248 """
2249 store = self._storage()
2249 store = self._storage()
2250 if shallow:
2250 if shallow:
2251 r = store.rev(self._node)
2251 r = store.rev(self._node)
2252 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2252 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2253 return manifestdict(store.nodeconstants.nodelen, d)
2253 return manifestdict(store.nodeconstants.nodelen, d)
2254 else:
2254 else:
2255 # Need to perform a slow delta
2255 # Need to perform a slow delta
2256 r0 = store.deltaparent(store.rev(self._node))
2256 r0 = store.deltaparent(store.rev(self._node))
2257 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2257 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2258 m1 = self.read()
2258 m1 = self.read()
2259 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2259 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2260 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2260 for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).items():
2261 if n1:
2261 if n1:
2262 md[f] = n1
2262 md[f] = n1
2263 if fl1:
2263 if fl1:
2264 md.setflag(f, fl1)
2264 md.setflag(f, fl1)
2265 return md
2265 return md
2266
2266
2267 def readfast(self, shallow=False):
2267 def readfast(self, shallow=False):
2268 """Calls either readdelta or read, based on which would be less work.
2268 """Calls either readdelta or read, based on which would be less work.
2269 readdelta is called if the delta is against the p1, and therefore can be
2269 readdelta is called if the delta is against the p1, and therefore can be
2270 read quickly.
2270 read quickly.
2271
2271
2272 If `shallow` is True, it only returns the entries from this manifest,
2272 If `shallow` is True, it only returns the entries from this manifest,
2273 and not any submanifests.
2273 and not any submanifests.
2274 """
2274 """
2275 store = self._storage()
2275 store = self._storage()
2276 r = store.rev(self._node)
2276 r = store.rev(self._node)
2277 deltaparent = store.deltaparent(r)
2277 deltaparent = store.deltaparent(r)
2278 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2278 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2279 return self.readdelta(shallow=shallow)
2279 return self.readdelta(shallow=shallow)
2280
2280
2281 if shallow:
2281 if shallow:
2282 return manifestdict(
2282 return manifestdict(
2283 store.nodeconstants.nodelen, store.revision(self._node)
2283 store.nodeconstants.nodelen, store.revision(self._node)
2284 )
2284 )
2285 else:
2285 else:
2286 return self.read()
2286 return self.read()
2287
2287
2288 def find(self, key):
2288 def find(self, key):
2289 return self.read().find(key)
2289 return self.read().find(key)
2290
2290
2291
2291
2292 class excludeddir(treemanifest):
2292 class excludeddir(treemanifest):
2293 """Stand-in for a directory that is excluded from the repository.
2293 """Stand-in for a directory that is excluded from the repository.
2294
2294
2295 With narrowing active on a repository that uses treemanifests,
2295 With narrowing active on a repository that uses treemanifests,
2296 some of the directory revlogs will be excluded from the resulting
2296 some of the directory revlogs will be excluded from the resulting
2297 clone. This is a huge storage win for clients, but means we need
2297 clone. This is a huge storage win for clients, but means we need
2298 some sort of pseudo-manifest to surface to internals so we can
2298 some sort of pseudo-manifest to surface to internals so we can
2299 detect a merge conflict outside the narrowspec. That's what this
2299 detect a merge conflict outside the narrowspec. That's what this
2300 class is: it stands in for a directory whose node is known, but
2300 class is: it stands in for a directory whose node is known, but
2301 whose contents are unknown.
2301 whose contents are unknown.
2302 """
2302 """
2303
2303
2304 def __init__(self, nodeconstants, dir, node):
2304 def __init__(self, nodeconstants, dir, node):
2305 super(excludeddir, self).__init__(nodeconstants, dir)
2305 super(excludeddir, self).__init__(nodeconstants, dir)
2306 self._node = node
2306 self._node = node
2307 # Add an empty file, which will be included by iterators and such,
2307 # Add an empty file, which will be included by iterators and such,
2308 # appearing as the directory itself (i.e. something like "dir/")
2308 # appearing as the directory itself (i.e. something like "dir/")
2309 self._files[b''] = node
2309 self._files[b''] = node
2310 self._flags[b''] = b't'
2310 self._flags[b''] = b't'
2311
2311
2312 # Manifests outside the narrowspec should never be modified, so avoid
2312 # Manifests outside the narrowspec should never be modified, so avoid
2313 # copying. This makes a noticeable difference when there are very many
2313 # copying. This makes a noticeable difference when there are very many
2314 # directories outside the narrowspec. Also, it makes sense for the copy to
2314 # directories outside the narrowspec. Also, it makes sense for the copy to
2315 # be of the same type as the original, which would not happen with the
2315 # be of the same type as the original, which would not happen with the
2316 # super type's copy().
2316 # super type's copy().
2317 def copy(self):
2317 def copy(self):
2318 return self
2318 return self
2319
2319
2320
2320
2321 class excludeddirmanifestctx(treemanifestctx):
2321 class excludeddirmanifestctx(treemanifestctx):
2322 """context wrapper for excludeddir - see that docstring for rationale"""
2322 """context wrapper for excludeddir - see that docstring for rationale"""
2323
2323
2324 def __init__(self, nodeconstants, dir, node):
2324 def __init__(self, nodeconstants, dir, node):
2325 self.nodeconstants = nodeconstants
2325 self.nodeconstants = nodeconstants
2326 self._dir = dir
2326 self._dir = dir
2327 self._node = node
2327 self._node = node
2328
2328
2329 def read(self):
2329 def read(self):
2330 return excludeddir(self.nodeconstants, self._dir, self._node)
2330 return excludeddir(self.nodeconstants, self._dir, self._node)
2331
2331
2332 def readfast(self, shallow=False):
2332 def readfast(self, shallow=False):
2333 # special version of readfast since we don't have underlying storage
2333 # special version of readfast since we don't have underlying storage
2334 return self.read()
2334 return self.read()
2335
2335
2336 def write(self, *args):
2336 def write(self, *args):
2337 raise error.ProgrammingError(
2337 raise error.ProgrammingError(
2338 b'attempt to write manifest from excluded dir %s' % self._dir
2338 b'attempt to write manifest from excluded dir %s' % self._dir
2339 )
2339 )
2340
2340
2341
2341
2342 class excludedmanifestrevlog(manifestrevlog):
2342 class excludedmanifestrevlog(manifestrevlog):
2343 """Stand-in for excluded treemanifest revlogs.
2343 """Stand-in for excluded treemanifest revlogs.
2344
2344
2345 When narrowing is active on a treemanifest repository, we'll have
2345 When narrowing is active on a treemanifest repository, we'll have
2346 references to directories we can't see due to the revlog being
2346 references to directories we can't see due to the revlog being
2347 skipped. This class exists to conform to the manifestrevlog
2347 skipped. This class exists to conform to the manifestrevlog
2348 interface for those directories and proactively prevent writes to
2348 interface for those directories and proactively prevent writes to
2349 outside the narrowspec.
2349 outside the narrowspec.
2350 """
2350 """
2351
2351
2352 def __init__(self, nodeconstants, dir):
2352 def __init__(self, nodeconstants, dir):
2353 self.nodeconstants = nodeconstants
2353 self.nodeconstants = nodeconstants
2354 self._dir = dir
2354 self._dir = dir
2355
2355
2356 def __len__(self):
2356 def __len__(self):
2357 raise error.ProgrammingError(
2357 raise error.ProgrammingError(
2358 b'attempt to get length of excluded dir %s' % self._dir
2358 b'attempt to get length of excluded dir %s' % self._dir
2359 )
2359 )
2360
2360
2361 def rev(self, node):
2361 def rev(self, node):
2362 raise error.ProgrammingError(
2362 raise error.ProgrammingError(
2363 b'attempt to get rev from excluded dir %s' % self._dir
2363 b'attempt to get rev from excluded dir %s' % self._dir
2364 )
2364 )
2365
2365
2366 def linkrev(self, node):
2366 def linkrev(self, node):
2367 raise error.ProgrammingError(
2367 raise error.ProgrammingError(
2368 b'attempt to get linkrev from excluded dir %s' % self._dir
2368 b'attempt to get linkrev from excluded dir %s' % self._dir
2369 )
2369 )
2370
2370
2371 def node(self, rev):
2371 def node(self, rev):
2372 raise error.ProgrammingError(
2372 raise error.ProgrammingError(
2373 b'attempt to get node from excluded dir %s' % self._dir
2373 b'attempt to get node from excluded dir %s' % self._dir
2374 )
2374 )
2375
2375
2376 def add(self, *args, **kwargs):
2376 def add(self, *args, **kwargs):
2377 # We should never write entries in dirlogs outside the narrow clone.
2377 # We should never write entries in dirlogs outside the narrow clone.
2378 # However, the method still gets called from writesubtree() in
2378 # However, the method still gets called from writesubtree() in
2379 # _addtree(), so we need to handle it. We should possibly make that
2379 # _addtree(), so we need to handle it. We should possibly make that
2380 # avoid calling add() with a clean manifest (_dirty is always False
2380 # avoid calling add() with a clean manifest (_dirty is always False
2381 # in excludeddir instances).
2381 # in excludeddir instances).
2382 pass
2382 pass
@@ -1,3551 +1,3549 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class revlog:
244 class revlog:
245 """
245 """
246 the underlying revision storage object
246 the underlying revision storage object
247
247
248 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
249
249
250 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
255 data.
255 data.
256
256
257 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
263
263
264 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
267 for locking while reading.
268
268
269 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
271
271
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 index will be mmapped rather than read if it is larger than the
273 index will be mmapped rather than read if it is larger than the
274 configured threshold.
274 configured threshold.
275
275
276 If censorable is True, the revlog can have censored revisions.
276 If censorable is True, the revlog can have censored revisions.
277
277
278 If `upperboundcomp` is not None, this is the expected maximal gain from
278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 compression for the data content.
279 compression for the data content.
280
280
281 `concurrencychecker` is an optional function that receives 3 arguments: a
281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 file handle, a filename, and an expected position. It should check whether
282 file handle, a filename, and an expected position. It should check whether
283 the current position in the file handle is valid, and log/warn/fail (by
283 the current position in the file handle is valid, and log/warn/fail (by
284 raising).
284 raising).
285
285
286 See mercurial/revlogutils/contants.py for details about the content of an
286 See mercurial/revlogutils/contants.py for details about the content of an
287 index entry.
287 index entry.
288 """
288 """
289
289
290 _flagserrorclass = error.RevlogError
290 _flagserrorclass = error.RevlogError
291
291
292 @staticmethod
292 @staticmethod
293 def is_inline_index(header_bytes):
293 def is_inline_index(header_bytes):
294 """Determine if a revlog is inline from the initial bytes of the index"""
294 """Determine if a revlog is inline from the initial bytes of the index"""
295 header = INDEX_HEADER.unpack(header_bytes)[0]
295 header = INDEX_HEADER.unpack(header_bytes)[0]
296
296
297 _format_flags = header & ~0xFFFF
297 _format_flags = header & ~0xFFFF
298 _format_version = header & 0xFFFF
298 _format_version = header & 0xFFFF
299
299
300 features = FEATURES_BY_VERSION[_format_version]
300 features = FEATURES_BY_VERSION[_format_version]
301 return features[b'inline'](_format_flags)
301 return features[b'inline'](_format_flags)
302
302
303 def __init__(
303 def __init__(
304 self,
304 self,
305 opener,
305 opener,
306 target,
306 target,
307 radix,
307 radix,
308 postfix=None, # only exist for `tmpcensored` now
308 postfix=None, # only exist for `tmpcensored` now
309 checkambig=False,
309 checkambig=False,
310 mmaplargeindex=False,
310 mmaplargeindex=False,
311 censorable=False,
311 censorable=False,
312 upperboundcomp=None,
312 upperboundcomp=None,
313 persistentnodemap=False,
313 persistentnodemap=False,
314 concurrencychecker=None,
314 concurrencychecker=None,
315 trypending=False,
315 trypending=False,
316 try_split=False,
316 try_split=False,
317 canonical_parent_order=True,
317 canonical_parent_order=True,
318 ):
318 ):
319 """
319 """
320 create a revlog object
320 create a revlog object
321
321
322 opener is a function that abstracts the file opening operation
322 opener is a function that abstracts the file opening operation
323 and can be used to implement COW semantics or the like.
323 and can be used to implement COW semantics or the like.
324
324
325 `target`: a (KIND, ID) tuple that identify the content stored in
325 `target`: a (KIND, ID) tuple that identify the content stored in
326 this revlog. It help the rest of the code to understand what the revlog
326 this revlog. It help the rest of the code to understand what the revlog
327 is about without having to resort to heuristic and index filename
327 is about without having to resort to heuristic and index filename
328 analysis. Note: that this must be reliably be set by normal code, but
328 analysis. Note: that this must be reliably be set by normal code, but
329 that test, debug, or performance measurement code might not set this to
329 that test, debug, or performance measurement code might not set this to
330 accurate value.
330 accurate value.
331 """
331 """
332 self.upperboundcomp = upperboundcomp
332 self.upperboundcomp = upperboundcomp
333
333
334 self.radix = radix
334 self.radix = radix
335
335
336 self._docket_file = None
336 self._docket_file = None
337 self._indexfile = None
337 self._indexfile = None
338 self._datafile = None
338 self._datafile = None
339 self._sidedatafile = None
339 self._sidedatafile = None
340 self._nodemap_file = None
340 self._nodemap_file = None
341 self.postfix = postfix
341 self.postfix = postfix
342 self._trypending = trypending
342 self._trypending = trypending
343 self._try_split = try_split
343 self._try_split = try_split
344 self.opener = opener
344 self.opener = opener
345 if persistentnodemap:
345 if persistentnodemap:
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
347
347
348 assert target[0] in ALL_KINDS
348 assert target[0] in ALL_KINDS
349 assert len(target) == 2
349 assert len(target) == 2
350 self.target = target
350 self.target = target
351 # When True, indexfile is opened with checkambig=True at writing, to
351 # When True, indexfile is opened with checkambig=True at writing, to
352 # avoid file stat ambiguity.
352 # avoid file stat ambiguity.
353 self._checkambig = checkambig
353 self._checkambig = checkambig
354 self._mmaplargeindex = mmaplargeindex
354 self._mmaplargeindex = mmaplargeindex
355 self._censorable = censorable
355 self._censorable = censorable
356 # 3-tuple of (node, rev, text) for a raw revision.
356 # 3-tuple of (node, rev, text) for a raw revision.
357 self._revisioncache = None
357 self._revisioncache = None
358 # Maps rev to chain base rev.
358 # Maps rev to chain base rev.
359 self._chainbasecache = util.lrucachedict(100)
359 self._chainbasecache = util.lrucachedict(100)
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 self._chunkcache = (0, b'')
361 self._chunkcache = (0, b'')
362 # How much data to read and cache into the raw revlog data cache.
362 # How much data to read and cache into the raw revlog data cache.
363 self._chunkcachesize = 65536
363 self._chunkcachesize = 65536
364 self._maxchainlen = None
364 self._maxchainlen = None
365 self._deltabothparents = True
365 self._deltabothparents = True
366 self._candidate_group_chunk_size = 0
366 self._candidate_group_chunk_size = 0
367 self._debug_delta = False
367 self._debug_delta = False
368 self.index = None
368 self.index = None
369 self._docket = None
369 self._docket = None
370 self._nodemap_docket = None
370 self._nodemap_docket = None
371 # Mapping of partial identifiers to full nodes.
371 # Mapping of partial identifiers to full nodes.
372 self._pcache = {}
372 self._pcache = {}
373 # Mapping of revision integer to full node.
373 # Mapping of revision integer to full node.
374 self._compengine = b'zlib'
374 self._compengine = b'zlib'
375 self._compengineopts = {}
375 self._compengineopts = {}
376 self._maxdeltachainspan = -1
376 self._maxdeltachainspan = -1
377 self._withsparseread = False
377 self._withsparseread = False
378 self._sparserevlog = False
378 self._sparserevlog = False
379 self.hassidedata = False
379 self.hassidedata = False
380 self._srdensitythreshold = 0.50
380 self._srdensitythreshold = 0.50
381 self._srmingapsize = 262144
381 self._srmingapsize = 262144
382
382
383 # other optionnals features
383 # other optionnals features
384
384
385 # might remove rank configuration once the computation has no impact
385 # might remove rank configuration once the computation has no impact
386 self._compute_rank = False
386 self._compute_rank = False
387
387
388 # Make copy of flag processors so each revlog instance can support
388 # Make copy of flag processors so each revlog instance can support
389 # custom flags.
389 # custom flags.
390 self._flagprocessors = dict(flagutil.flagprocessors)
390 self._flagprocessors = dict(flagutil.flagprocessors)
391
391
392 # 3-tuple of file handles being used for active writing.
392 # 3-tuple of file handles being used for active writing.
393 self._writinghandles = None
393 self._writinghandles = None
394 # prevent nesting of addgroup
394 # prevent nesting of addgroup
395 self._adding_group = None
395 self._adding_group = None
396
396
397 self._loadindex()
397 self._loadindex()
398
398
399 self._concurrencychecker = concurrencychecker
399 self._concurrencychecker = concurrencychecker
400
400
401 # parent order is supposed to be semantically irrelevant, so we
401 # parent order is supposed to be semantically irrelevant, so we
402 # normally resort parents to ensure that the first parent is non-null,
402 # normally resort parents to ensure that the first parent is non-null,
403 # if there is a non-null parent at all.
403 # if there is a non-null parent at all.
404 # filelog abuses the parent order as flag to mark some instances of
404 # filelog abuses the parent order as flag to mark some instances of
405 # meta-encoded files, so allow it to disable this behavior.
405 # meta-encoded files, so allow it to disable this behavior.
406 self.canonical_parent_order = canonical_parent_order
406 self.canonical_parent_order = canonical_parent_order
407
407
408 def _init_opts(self):
408 def _init_opts(self):
409 """process options (from above/config) to setup associated default revlog mode
409 """process options (from above/config) to setup associated default revlog mode
410
410
411 These values might be affected when actually reading on disk information.
411 These values might be affected when actually reading on disk information.
412
412
413 The relevant values are returned for use in _loadindex().
413 The relevant values are returned for use in _loadindex().
414
414
415 * newversionflags:
415 * newversionflags:
416 version header to use if we need to create a new revlog
416 version header to use if we need to create a new revlog
417
417
418 * mmapindexthreshold:
418 * mmapindexthreshold:
419 minimal index size for start to use mmap
419 minimal index size for start to use mmap
420
420
421 * force_nodemap:
421 * force_nodemap:
422 force the usage of a "development" version of the nodemap code
422 force the usage of a "development" version of the nodemap code
423 """
423 """
424 mmapindexthreshold = None
424 mmapindexthreshold = None
425 opts = self.opener.options
425 opts = self.opener.options
426
426
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
428 new_header = CHANGELOGV2
428 new_header = CHANGELOGV2
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
430 elif b'revlogv2' in opts:
430 elif b'revlogv2' in opts:
431 new_header = REVLOGV2
431 new_header = REVLOGV2
432 elif b'revlogv1' in opts:
432 elif b'revlogv1' in opts:
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
434 if b'generaldelta' in opts:
434 if b'generaldelta' in opts:
435 new_header |= FLAG_GENERALDELTA
435 new_header |= FLAG_GENERALDELTA
436 elif b'revlogv0' in self.opener.options:
436 elif b'revlogv0' in self.opener.options:
437 new_header = REVLOGV0
437 new_header = REVLOGV0
438 else:
438 else:
439 new_header = REVLOG_DEFAULT_VERSION
439 new_header = REVLOG_DEFAULT_VERSION
440
440
441 if b'chunkcachesize' in opts:
441 if b'chunkcachesize' in opts:
442 self._chunkcachesize = opts[b'chunkcachesize']
442 self._chunkcachesize = opts[b'chunkcachesize']
443 if b'maxchainlen' in opts:
443 if b'maxchainlen' in opts:
444 self._maxchainlen = opts[b'maxchainlen']
444 self._maxchainlen = opts[b'maxchainlen']
445 if b'deltabothparents' in opts:
445 if b'deltabothparents' in opts:
446 self._deltabothparents = opts[b'deltabothparents']
446 self._deltabothparents = opts[b'deltabothparents']
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
448 if dps_cgds:
448 if dps_cgds:
449 self._candidate_group_chunk_size = dps_cgds
449 self._candidate_group_chunk_size = dps_cgds
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
451 self._lazydeltabase = False
451 self._lazydeltabase = False
452 if self._lazydelta:
452 if self._lazydelta:
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
454 if b'debug-delta' in opts:
454 if b'debug-delta' in opts:
455 self._debug_delta = opts[b'debug-delta']
455 self._debug_delta = opts[b'debug-delta']
456 if b'compengine' in opts:
456 if b'compengine' in opts:
457 self._compengine = opts[b'compengine']
457 self._compengine = opts[b'compengine']
458 if b'zlib.level' in opts:
458 if b'zlib.level' in opts:
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
460 if b'zstd.level' in opts:
460 if b'zstd.level' in opts:
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
462 if b'maxdeltachainspan' in opts:
462 if b'maxdeltachainspan' in opts:
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
465 mmapindexthreshold = opts[b'mmapindexthreshold']
465 mmapindexthreshold = opts[b'mmapindexthreshold']
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
468 # sparse-revlog forces sparse-read
468 # sparse-revlog forces sparse-read
469 self._withsparseread = self._sparserevlog or withsparseread
469 self._withsparseread = self._sparserevlog or withsparseread
470 if b'sparse-read-density-threshold' in opts:
470 if b'sparse-read-density-threshold' in opts:
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
472 if b'sparse-read-min-gap-size' in opts:
472 if b'sparse-read-min-gap-size' in opts:
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
474 if opts.get(b'enableellipsis'):
474 if opts.get(b'enableellipsis'):
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
476
476
477 # revlog v0 doesn't have flag processors
477 # revlog v0 doesn't have flag processors
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
480
480
481 if self._chunkcachesize <= 0:
481 if self._chunkcachesize <= 0:
482 raise error.RevlogError(
482 raise error.RevlogError(
483 _(b'revlog chunk cache size %r is not greater than 0')
483 _(b'revlog chunk cache size %r is not greater than 0')
484 % self._chunkcachesize
484 % self._chunkcachesize
485 )
485 )
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
487 raise error.RevlogError(
487 raise error.RevlogError(
488 _(b'revlog chunk cache size %r is not a power of 2')
488 _(b'revlog chunk cache size %r is not a power of 2')
489 % self._chunkcachesize
489 % self._chunkcachesize
490 )
490 )
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
492 return new_header, mmapindexthreshold, force_nodemap
492 return new_header, mmapindexthreshold, force_nodemap
493
493
494 def _get_data(self, filepath, mmap_threshold, size=None):
494 def _get_data(self, filepath, mmap_threshold, size=None):
495 """return a file content with or without mmap
495 """return a file content with or without mmap
496
496
497 If the file is missing return the empty string"""
497 If the file is missing return the empty string"""
498 try:
498 try:
499 with self.opener(filepath) as fp:
499 with self.opener(filepath) as fp:
500 if mmap_threshold is not None:
500 if mmap_threshold is not None:
501 file_size = self.opener.fstat(fp).st_size
501 file_size = self.opener.fstat(fp).st_size
502 if file_size >= mmap_threshold:
502 if file_size >= mmap_threshold:
503 if size is not None:
503 if size is not None:
504 # avoid potentiel mmap crash
504 # avoid potentiel mmap crash
505 size = min(file_size, size)
505 size = min(file_size, size)
506 # TODO: should .close() to release resources without
506 # TODO: should .close() to release resources without
507 # relying on Python GC
507 # relying on Python GC
508 if size is None:
508 if size is None:
509 return util.buffer(util.mmapread(fp))
509 return util.buffer(util.mmapread(fp))
510 else:
510 else:
511 return util.buffer(util.mmapread(fp, size))
511 return util.buffer(util.mmapread(fp, size))
512 if size is None:
512 if size is None:
513 return fp.read()
513 return fp.read()
514 else:
514 else:
515 return fp.read(size)
515 return fp.read(size)
516 except FileNotFoundError:
516 except FileNotFoundError:
517 return b''
517 return b''
518
518
519 def get_streams(self, max_linkrev, force_inline=False):
519 def get_streams(self, max_linkrev, force_inline=False):
520 """return a list of streams that represent this revlog
520 """return a list of streams that represent this revlog
521
521
522 This is used by stream-clone to do bytes to bytes copies of a repository.
522 This is used by stream-clone to do bytes to bytes copies of a repository.
523
523
524 This streams data for all revisions that refer to a changelog revision up
524 This streams data for all revisions that refer to a changelog revision up
525 to `max_linkrev`.
525 to `max_linkrev`.
526
526
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
528
528
529 It returns is a list of three-tuple:
529 It returns is a list of three-tuple:
530
530
531 [
531 [
532 (filename, bytes_stream, stream_size),
532 (filename, bytes_stream, stream_size),
533 …
533 …
534 ]
534 ]
535 """
535 """
536 n = len(self)
536 n = len(self)
537 index = self.index
537 index = self.index
538 while n > 0:
538 while n > 0:
539 linkrev = index[n - 1][4]
539 linkrev = index[n - 1][4]
540 if linkrev < max_linkrev:
540 if linkrev < max_linkrev:
541 break
541 break
542 # note: this loop will rarely go through multiple iterations, since
542 # note: this loop will rarely go through multiple iterations, since
543 # it only traverses commits created during the current streaming
543 # it only traverses commits created during the current streaming
544 # pull operation.
544 # pull operation.
545 #
545 #
546 # If this become a problem, using a binary search should cap the
546 # If this become a problem, using a binary search should cap the
547 # runtime of this.
547 # runtime of this.
548 n = n - 1
548 n = n - 1
549 if n == 0:
549 if n == 0:
550 # no data to send
550 # no data to send
551 return []
551 return []
552 index_size = n * index.entry_size
552 index_size = n * index.entry_size
553 data_size = self.end(n - 1)
553 data_size = self.end(n - 1)
554
554
555 # XXX we might have been split (or stripped) since the object
555 # XXX we might have been split (or stripped) since the object
556 # initialization, We need to close this race too, but having a way to
556 # initialization, We need to close this race too, but having a way to
557 # pre-open the file we feed to the revlog and never closing them before
557 # pre-open the file we feed to the revlog and never closing them before
558 # we are done streaming.
558 # we are done streaming.
559
559
560 if self._inline:
560 if self._inline:
561
561
562 def get_stream():
562 def get_stream():
563 with self._indexfp() as fp:
563 with self._indexfp() as fp:
564 yield None
564 yield None
565 size = index_size + data_size
565 size = index_size + data_size
566 if size <= 65536:
566 if size <= 65536:
567 yield fp.read(size)
567 yield fp.read(size)
568 else:
568 else:
569 yield from util.filechunkiter(fp, limit=size)
569 yield from util.filechunkiter(fp, limit=size)
570
570
571 inline_stream = get_stream()
571 inline_stream = get_stream()
572 next(inline_stream)
572 next(inline_stream)
573 return [
573 return [
574 (self._indexfile, inline_stream, index_size + data_size),
574 (self._indexfile, inline_stream, index_size + data_size),
575 ]
575 ]
576 elif force_inline:
576 elif force_inline:
577
577
578 def get_stream():
578 def get_stream():
579 with self.reading():
579 with self.reading():
580 yield None
580 yield None
581
581
582 for rev in range(n):
582 for rev in range(n):
583 idx = self.index.entry_binary(rev)
583 idx = self.index.entry_binary(rev)
584 if rev == 0 and self._docket is None:
584 if rev == 0 and self._docket is None:
585 # re-inject the inline flag
585 # re-inject the inline flag
586 header = self._format_flags
586 header = self._format_flags
587 header |= self._format_version
587 header |= self._format_version
588 header |= FLAG_INLINE_DATA
588 header |= FLAG_INLINE_DATA
589 header = self.index.pack_header(header)
589 header = self.index.pack_header(header)
590 idx = header + idx
590 idx = header + idx
591 yield idx
591 yield idx
592 yield self._getsegmentforrevs(rev, rev)[1]
592 yield self._getsegmentforrevs(rev, rev)[1]
593
593
594 inline_stream = get_stream()
594 inline_stream = get_stream()
595 next(inline_stream)
595 next(inline_stream)
596 return [
596 return [
597 (self._indexfile, inline_stream, index_size + data_size),
597 (self._indexfile, inline_stream, index_size + data_size),
598 ]
598 ]
599 else:
599 else:
600
600
601 def get_index_stream():
601 def get_index_stream():
602 with self._indexfp() as fp:
602 with self._indexfp() as fp:
603 yield None
603 yield None
604 if index_size <= 65536:
604 if index_size <= 65536:
605 yield fp.read(index_size)
605 yield fp.read(index_size)
606 else:
606 else:
607 yield from util.filechunkiter(fp, limit=index_size)
607 yield from util.filechunkiter(fp, limit=index_size)
608
608
609 def get_data_stream():
609 def get_data_stream():
610 with self._datafp() as fp:
610 with self._datafp() as fp:
611 yield None
611 yield None
612 if data_size <= 65536:
612 if data_size <= 65536:
613 yield fp.read(data_size)
613 yield fp.read(data_size)
614 else:
614 else:
615 yield from util.filechunkiter(fp, limit=data_size)
615 yield from util.filechunkiter(fp, limit=data_size)
616
616
617 index_stream = get_index_stream()
617 index_stream = get_index_stream()
618 next(index_stream)
618 next(index_stream)
619 data_stream = get_data_stream()
619 data_stream = get_data_stream()
620 next(data_stream)
620 next(data_stream)
621 return [
621 return [
622 (self._datafile, data_stream, data_size),
622 (self._datafile, data_stream, data_size),
623 (self._indexfile, index_stream, index_size),
623 (self._indexfile, index_stream, index_size),
624 ]
624 ]
625
625
626 def _loadindex(self, docket=None):
626 def _loadindex(self, docket=None):
627
627
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
629
629
630 if self.postfix is not None:
630 if self.postfix is not None:
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
633 entry_point = b'%s.i.a' % self.radix
633 entry_point = b'%s.i.a' % self.radix
634 elif self._try_split and self.opener.exists(self._split_index_file):
634 elif self._try_split and self.opener.exists(self._split_index_file):
635 entry_point = self._split_index_file
635 entry_point = self._split_index_file
636 else:
636 else:
637 entry_point = b'%s.i' % self.radix
637 entry_point = b'%s.i' % self.radix
638
638
639 if docket is not None:
639 if docket is not None:
640 self._docket = docket
640 self._docket = docket
641 self._docket_file = entry_point
641 self._docket_file = entry_point
642 else:
642 else:
643 self._initempty = True
643 self._initempty = True
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
645 if len(entry_data) > 0:
645 if len(entry_data) > 0:
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
647 self._initempty = False
647 self._initempty = False
648 else:
648 else:
649 header = new_header
649 header = new_header
650
650
651 self._format_flags = header & ~0xFFFF
651 self._format_flags = header & ~0xFFFF
652 self._format_version = header & 0xFFFF
652 self._format_version = header & 0xFFFF
653
653
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
655 if supported_flags is None:
655 if supported_flags is None:
656 msg = _(b'unknown version (%d) in revlog %s')
656 msg = _(b'unknown version (%d) in revlog %s')
657 msg %= (self._format_version, self.display_id)
657 msg %= (self._format_version, self.display_id)
658 raise error.RevlogError(msg)
658 raise error.RevlogError(msg)
659 elif self._format_flags & ~supported_flags:
659 elif self._format_flags & ~supported_flags:
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
661 display_flag = self._format_flags >> 16
661 display_flag = self._format_flags >> 16
662 msg %= (display_flag, self._format_version, self.display_id)
662 msg %= (display_flag, self._format_version, self.display_id)
663 raise error.RevlogError(msg)
663 raise error.RevlogError(msg)
664
664
665 features = FEATURES_BY_VERSION[self._format_version]
665 features = FEATURES_BY_VERSION[self._format_version]
666 self._inline = features[b'inline'](self._format_flags)
666 self._inline = features[b'inline'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
668 self.hassidedata = features[b'sidedata']
668 self.hassidedata = features[b'sidedata']
669
669
670 if not features[b'docket']:
670 if not features[b'docket']:
671 self._indexfile = entry_point
671 self._indexfile = entry_point
672 index_data = entry_data
672 index_data = entry_data
673 else:
673 else:
674 self._docket_file = entry_point
674 self._docket_file = entry_point
675 if self._initempty:
675 if self._initempty:
676 self._docket = docketutil.default_docket(self, header)
676 self._docket = docketutil.default_docket(self, header)
677 else:
677 else:
678 self._docket = docketutil.parse_docket(
678 self._docket = docketutil.parse_docket(
679 self, entry_data, use_pending=self._trypending
679 self, entry_data, use_pending=self._trypending
680 )
680 )
681
681
682 if self._docket is not None:
682 if self._docket is not None:
683 self._indexfile = self._docket.index_filepath()
683 self._indexfile = self._docket.index_filepath()
684 index_data = b''
684 index_data = b''
685 index_size = self._docket.index_end
685 index_size = self._docket.index_end
686 if index_size > 0:
686 if index_size > 0:
687 index_data = self._get_data(
687 index_data = self._get_data(
688 self._indexfile, mmapindexthreshold, size=index_size
688 self._indexfile, mmapindexthreshold, size=index_size
689 )
689 )
690 if len(index_data) < index_size:
690 if len(index_data) < index_size:
691 msg = _(b'too few index data for %s: got %d, expected %d')
691 msg = _(b'too few index data for %s: got %d, expected %d')
692 msg %= (self.display_id, len(index_data), index_size)
692 msg %= (self.display_id, len(index_data), index_size)
693 raise error.RevlogError(msg)
693 raise error.RevlogError(msg)
694
694
695 self._inline = False
695 self._inline = False
696 # generaldelta implied by version 2 revlogs.
696 # generaldelta implied by version 2 revlogs.
697 self._generaldelta = True
697 self._generaldelta = True
698 # the logic for persistent nodemap will be dealt with within the
698 # the logic for persistent nodemap will be dealt with within the
699 # main docket, so disable it for now.
699 # main docket, so disable it for now.
700 self._nodemap_file = None
700 self._nodemap_file = None
701
701
702 if self._docket is not None:
702 if self._docket is not None:
703 self._datafile = self._docket.data_filepath()
703 self._datafile = self._docket.data_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
705 elif self.postfix is None:
705 elif self.postfix is None:
706 self._datafile = b'%s.d' % self.radix
706 self._datafile = b'%s.d' % self.radix
707 else:
707 else:
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
709
709
710 self.nodeconstants = sha1nodeconstants
710 self.nodeconstants = sha1nodeconstants
711 self.nullid = self.nodeconstants.nullid
711 self.nullid = self.nodeconstants.nullid
712
712
713 # sparse-revlog can't be on without general-delta (issue6056)
713 # sparse-revlog can't be on without general-delta (issue6056)
714 if not self._generaldelta:
714 if not self._generaldelta:
715 self._sparserevlog = False
715 self._sparserevlog = False
716
716
717 self._storedeltachains = True
717 self._storedeltachains = True
718
718
719 devel_nodemap = (
719 devel_nodemap = (
720 self._nodemap_file
720 self._nodemap_file
721 and force_nodemap
721 and force_nodemap
722 and parse_index_v1_nodemap is not None
722 and parse_index_v1_nodemap is not None
723 )
723 )
724
724
725 use_rust_index = False
725 use_rust_index = False
726 if rustrevlog is not None:
726 if rustrevlog is not None:
727 if self._nodemap_file is not None:
727 if self._nodemap_file is not None:
728 use_rust_index = True
728 use_rust_index = True
729 else:
729 else:
730 use_rust_index = self.opener.options.get(b'rust.index')
730 use_rust_index = self.opener.options.get(b'rust.index')
731
731
732 self._parse_index = parse_index_v1
732 self._parse_index = parse_index_v1
733 if self._format_version == REVLOGV0:
733 if self._format_version == REVLOGV0:
734 self._parse_index = revlogv0.parse_index_v0
734 self._parse_index = revlogv0.parse_index_v0
735 elif self._format_version == REVLOGV2:
735 elif self._format_version == REVLOGV2:
736 self._parse_index = parse_index_v2
736 self._parse_index = parse_index_v2
737 elif self._format_version == CHANGELOGV2:
737 elif self._format_version == CHANGELOGV2:
738 self._parse_index = parse_index_cl_v2
738 self._parse_index = parse_index_cl_v2
739 elif devel_nodemap:
739 elif devel_nodemap:
740 self._parse_index = parse_index_v1_nodemap
740 self._parse_index = parse_index_v1_nodemap
741 elif use_rust_index:
741 elif use_rust_index:
742 self._parse_index = parse_index_v1_mixed
742 self._parse_index = parse_index_v1_mixed
743 try:
743 try:
744 d = self._parse_index(index_data, self._inline)
744 d = self._parse_index(index_data, self._inline)
745 index, chunkcache = d
745 index, chunkcache = d
746 use_nodemap = (
746 use_nodemap = (
747 not self._inline
747 not self._inline
748 and self._nodemap_file is not None
748 and self._nodemap_file is not None
749 and hasattr(index, 'update_nodemap_data')
749 and hasattr(index, 'update_nodemap_data')
750 )
750 )
751 if use_nodemap:
751 if use_nodemap:
752 nodemap_data = nodemaputil.persisted_data(self)
752 nodemap_data = nodemaputil.persisted_data(self)
753 if nodemap_data is not None:
753 if nodemap_data is not None:
754 docket = nodemap_data[0]
754 docket = nodemap_data[0]
755 if (
755 if (
756 len(d[0]) > docket.tip_rev
756 len(d[0]) > docket.tip_rev
757 and d[0][docket.tip_rev][7] == docket.tip_node
757 and d[0][docket.tip_rev][7] == docket.tip_node
758 ):
758 ):
759 # no changelog tampering
759 # no changelog tampering
760 self._nodemap_docket = docket
760 self._nodemap_docket = docket
761 index.update_nodemap_data(*nodemap_data)
761 index.update_nodemap_data(*nodemap_data)
762 except (ValueError, IndexError):
762 except (ValueError, IndexError):
763 raise error.RevlogError(
763 raise error.RevlogError(
764 _(b"index %s is corrupted") % self.display_id
764 _(b"index %s is corrupted") % self.display_id
765 )
765 )
766 self.index = index
766 self.index = index
767 self._segmentfile = randomaccessfile.randomaccessfile(
767 self._segmentfile = randomaccessfile.randomaccessfile(
768 self.opener,
768 self.opener,
769 (self._indexfile if self._inline else self._datafile),
769 (self._indexfile if self._inline else self._datafile),
770 self._chunkcachesize,
770 self._chunkcachesize,
771 chunkcache,
771 chunkcache,
772 )
772 )
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
774 self.opener,
774 self.opener,
775 self._sidedatafile,
775 self._sidedatafile,
776 self._chunkcachesize,
776 self._chunkcachesize,
777 )
777 )
778 # revnum -> (chain-length, sum-delta-length)
778 # revnum -> (chain-length, sum-delta-length)
779 self._chaininfocache = util.lrucachedict(500)
779 self._chaininfocache = util.lrucachedict(500)
780 # revlog header -> revlog compressor
780 # revlog header -> revlog compressor
781 self._decompressors = {}
781 self._decompressors = {}
782
782
783 def get_revlog(self):
783 def get_revlog(self):
784 """simple function to mirror API of other not-really-revlog API"""
784 """simple function to mirror API of other not-really-revlog API"""
785 return self
785 return self
786
786
787 @util.propertycache
787 @util.propertycache
788 def revlog_kind(self):
788 def revlog_kind(self):
789 return self.target[0]
789 return self.target[0]
790
790
791 @util.propertycache
791 @util.propertycache
792 def display_id(self):
792 def display_id(self):
793 """The public facing "ID" of the revlog that we use in message"""
793 """The public facing "ID" of the revlog that we use in message"""
794 if self.revlog_kind == KIND_FILELOG:
794 if self.revlog_kind == KIND_FILELOG:
795 # Reference the file without the "data/" prefix, so it is familiar
795 # Reference the file without the "data/" prefix, so it is familiar
796 # to the user.
796 # to the user.
797 return self.target[1]
797 return self.target[1]
798 else:
798 else:
799 return self.radix
799 return self.radix
800
800
801 def _get_decompressor(self, t):
801 def _get_decompressor(self, t):
802 try:
802 try:
803 compressor = self._decompressors[t]
803 compressor = self._decompressors[t]
804 except KeyError:
804 except KeyError:
805 try:
805 try:
806 engine = util.compengines.forrevlogheader(t)
806 engine = util.compengines.forrevlogheader(t)
807 compressor = engine.revlogcompressor(self._compengineopts)
807 compressor = engine.revlogcompressor(self._compengineopts)
808 self._decompressors[t] = compressor
808 self._decompressors[t] = compressor
809 except KeyError:
809 except KeyError:
810 raise error.RevlogError(
810 raise error.RevlogError(
811 _(b'unknown compression type %s') % binascii.hexlify(t)
811 _(b'unknown compression type %s') % binascii.hexlify(t)
812 )
812 )
813 return compressor
813 return compressor
814
814
815 @util.propertycache
815 @util.propertycache
816 def _compressor(self):
816 def _compressor(self):
817 engine = util.compengines[self._compengine]
817 engine = util.compengines[self._compengine]
818 return engine.revlogcompressor(self._compengineopts)
818 return engine.revlogcompressor(self._compengineopts)
819
819
820 @util.propertycache
820 @util.propertycache
821 def _decompressor(self):
821 def _decompressor(self):
822 """the default decompressor"""
822 """the default decompressor"""
823 if self._docket is None:
823 if self._docket is None:
824 return None
824 return None
825 t = self._docket.default_compression_header
825 t = self._docket.default_compression_header
826 c = self._get_decompressor(t)
826 c = self._get_decompressor(t)
827 return c.decompress
827 return c.decompress
828
828
829 def _indexfp(self):
829 def _indexfp(self):
830 """file object for the revlog's index file"""
830 """file object for the revlog's index file"""
831 return self.opener(self._indexfile, mode=b"r")
831 return self.opener(self._indexfile, mode=b"r")
832
832
833 def __index_write_fp(self):
833 def __index_write_fp(self):
834 # You should not use this directly and use `_writing` instead
834 # You should not use this directly and use `_writing` instead
835 try:
835 try:
836 f = self.opener(
836 f = self.opener(
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
838 )
838 )
839 if self._docket is None:
839 if self._docket is None:
840 f.seek(0, os.SEEK_END)
840 f.seek(0, os.SEEK_END)
841 else:
841 else:
842 f.seek(self._docket.index_end, os.SEEK_SET)
842 f.seek(self._docket.index_end, os.SEEK_SET)
843 return f
843 return f
844 except FileNotFoundError:
844 except FileNotFoundError:
845 return self.opener(
845 return self.opener(
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
847 )
847 )
848
848
849 def __index_new_fp(self):
849 def __index_new_fp(self):
850 # You should not use this unless you are upgrading from inline revlog
850 # You should not use this unless you are upgrading from inline revlog
851 return self.opener(
851 return self.opener(
852 self._indexfile,
852 self._indexfile,
853 mode=b"w",
853 mode=b"w",
854 checkambig=self._checkambig,
854 checkambig=self._checkambig,
855 atomictemp=True,
855 atomictemp=True,
856 )
856 )
857
857
858 def _datafp(self, mode=b'r'):
858 def _datafp(self, mode=b'r'):
859 """file object for the revlog's data file"""
859 """file object for the revlog's data file"""
860 return self.opener(self._datafile, mode=mode)
860 return self.opener(self._datafile, mode=mode)
861
861
862 @contextlib.contextmanager
862 @contextlib.contextmanager
863 def _sidedatareadfp(self):
863 def _sidedatareadfp(self):
864 """file object suitable to read sidedata"""
864 """file object suitable to read sidedata"""
865 if self._writinghandles:
865 if self._writinghandles:
866 yield self._writinghandles[2]
866 yield self._writinghandles[2]
867 else:
867 else:
868 with self.opener(self._sidedatafile) as fp:
868 with self.opener(self._sidedatafile) as fp:
869 yield fp
869 yield fp
870
870
871 def tiprev(self):
871 def tiprev(self):
872 return len(self.index) - 1
872 return len(self.index) - 1
873
873
874 def tip(self):
874 def tip(self):
875 return self.node(self.tiprev())
875 return self.node(self.tiprev())
876
876
877 def __contains__(self, rev):
877 def __contains__(self, rev):
878 return 0 <= rev < len(self)
878 return 0 <= rev < len(self)
879
879
880 def __len__(self):
880 def __len__(self):
881 return len(self.index)
881 return len(self.index)
882
882
883 def __iter__(self):
883 def __iter__(self):
884 return iter(range(len(self)))
884 return iter(range(len(self)))
885
885
886 def revs(self, start=0, stop=None):
886 def revs(self, start=0, stop=None):
887 """iterate over all rev in this revlog (from start to stop)"""
887 """iterate over all rev in this revlog (from start to stop)"""
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
889
889
890 def hasnode(self, node):
890 def hasnode(self, node):
891 try:
891 try:
892 self.rev(node)
892 self.rev(node)
893 return True
893 return True
894 except KeyError:
894 except KeyError:
895 return False
895 return False
896
896
897 def _candelta(self, baserev, rev):
897 def _candelta(self, baserev, rev):
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
899 # Disable delta if either rev requires a content-changing flag
899 # Disable delta if either rev requires a content-changing flag
900 # processor (ex. LFS). This is because such flag processor can alter
900 # processor (ex. LFS). This is because such flag processor can alter
901 # the rawtext content that the delta will be based on, and two clients
901 # the rawtext content that the delta will be based on, and two clients
902 # could have a same revlog node with different flags (i.e. different
902 # could have a same revlog node with different flags (i.e. different
903 # rawtext contents) and the delta could be incompatible.
903 # rawtext contents) and the delta could be incompatible.
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
906 ):
906 ):
907 return False
907 return False
908 return True
908 return True
909
909
910 def update_caches(self, transaction):
910 def update_caches(self, transaction):
911 """update on disk cache
911 """update on disk cache
912
912
913 If a transaction is passed, the update may be delayed to transaction
913 If a transaction is passed, the update may be delayed to transaction
914 commit."""
914 commit."""
915 if self._nodemap_file is not None:
915 if self._nodemap_file is not None:
916 if transaction is None:
916 if transaction is None:
917 nodemaputil.update_persistent_nodemap(self)
917 nodemaputil.update_persistent_nodemap(self)
918 else:
918 else:
919 nodemaputil.setup_persistent_nodemap(transaction, self)
919 nodemaputil.setup_persistent_nodemap(transaction, self)
920
920
921 def clearcaches(self):
921 def clearcaches(self):
922 """Clear in-memory caches"""
922 """Clear in-memory caches"""
923 self._revisioncache = None
923 self._revisioncache = None
924 self._chainbasecache.clear()
924 self._chainbasecache.clear()
925 self._segmentfile.clear_cache()
925 self._segmentfile.clear_cache()
926 self._segmentfile_sidedata.clear_cache()
926 self._segmentfile_sidedata.clear_cache()
927 self._pcache = {}
927 self._pcache = {}
928 self._nodemap_docket = None
928 self._nodemap_docket = None
929 self.index.clearcaches()
929 self.index.clearcaches()
930 # The python code is the one responsible for validating the docket, we
930 # The python code is the one responsible for validating the docket, we
931 # end up having to refresh it here.
931 # end up having to refresh it here.
932 use_nodemap = (
932 use_nodemap = (
933 not self._inline
933 not self._inline
934 and self._nodemap_file is not None
934 and self._nodemap_file is not None
935 and hasattr(self.index, 'update_nodemap_data')
935 and hasattr(self.index, 'update_nodemap_data')
936 )
936 )
937 if use_nodemap:
937 if use_nodemap:
938 nodemap_data = nodemaputil.persisted_data(self)
938 nodemap_data = nodemaputil.persisted_data(self)
939 if nodemap_data is not None:
939 if nodemap_data is not None:
940 self._nodemap_docket = nodemap_data[0]
940 self._nodemap_docket = nodemap_data[0]
941 self.index.update_nodemap_data(*nodemap_data)
941 self.index.update_nodemap_data(*nodemap_data)
942
942
943 def rev(self, node):
943 def rev(self, node):
944 """return the revision number associated with a <nodeid>"""
944 """return the revision number associated with a <nodeid>"""
945 try:
945 try:
946 return self.index.rev(node)
946 return self.index.rev(node)
947 except TypeError:
947 except TypeError:
948 raise
948 raise
949 except error.RevlogError:
949 except error.RevlogError:
950 # parsers.c radix tree lookup failed
950 # parsers.c radix tree lookup failed
951 if (
951 if (
952 node == self.nodeconstants.wdirid
952 node == self.nodeconstants.wdirid
953 or node in self.nodeconstants.wdirfilenodeids
953 or node in self.nodeconstants.wdirfilenodeids
954 ):
954 ):
955 raise error.WdirUnsupported
955 raise error.WdirUnsupported
956 raise error.LookupError(node, self.display_id, _(b'no node'))
956 raise error.LookupError(node, self.display_id, _(b'no node'))
957
957
958 # Accessors for index entries.
958 # Accessors for index entries.
959
959
960 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
960 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
961 # are flags.
961 # are flags.
962 def start(self, rev):
962 def start(self, rev):
963 return int(self.index[rev][0] >> 16)
963 return int(self.index[rev][0] >> 16)
964
964
965 def sidedata_cut_off(self, rev):
965 def sidedata_cut_off(self, rev):
966 sd_cut_off = self.index[rev][8]
966 sd_cut_off = self.index[rev][8]
967 if sd_cut_off != 0:
967 if sd_cut_off != 0:
968 return sd_cut_off
968 return sd_cut_off
969 # This is some annoying dance, because entries without sidedata
969 # This is some annoying dance, because entries without sidedata
970 # currently use 0 as their ofsset. (instead of previous-offset +
970 # currently use 0 as their ofsset. (instead of previous-offset +
971 # previous-size)
971 # previous-size)
972 #
972 #
973 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
973 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
974 # In the meantime, we need this.
974 # In the meantime, we need this.
975 while 0 <= rev:
975 while 0 <= rev:
976 e = self.index[rev]
976 e = self.index[rev]
977 if e[9] != 0:
977 if e[9] != 0:
978 return e[8] + e[9]
978 return e[8] + e[9]
979 rev -= 1
979 rev -= 1
980 return 0
980 return 0
981
981
982 def flags(self, rev):
982 def flags(self, rev):
983 return self.index[rev][0] & 0xFFFF
983 return self.index[rev][0] & 0xFFFF
984
984
985 def length(self, rev):
985 def length(self, rev):
986 return self.index[rev][1]
986 return self.index[rev][1]
987
987
988 def sidedata_length(self, rev):
988 def sidedata_length(self, rev):
989 if not self.hassidedata:
989 if not self.hassidedata:
990 return 0
990 return 0
991 return self.index[rev][9]
991 return self.index[rev][9]
992
992
993 def rawsize(self, rev):
993 def rawsize(self, rev):
994 """return the length of the uncompressed text for a given revision"""
994 """return the length of the uncompressed text for a given revision"""
995 l = self.index[rev][2]
995 l = self.index[rev][2]
996 if l >= 0:
996 if l >= 0:
997 return l
997 return l
998
998
999 t = self.rawdata(rev)
999 t = self.rawdata(rev)
1000 return len(t)
1000 return len(t)
1001
1001
1002 def size(self, rev):
1002 def size(self, rev):
1003 """length of non-raw text (processed by a "read" flag processor)"""
1003 """length of non-raw text (processed by a "read" flag processor)"""
1004 # fast path: if no "read" flag processor could change the content,
1004 # fast path: if no "read" flag processor could change the content,
1005 # size is rawsize. note: ELLIPSIS is known to not change the content.
1005 # size is rawsize. note: ELLIPSIS is known to not change the content.
1006 flags = self.flags(rev)
1006 flags = self.flags(rev)
1007 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1007 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1008 return self.rawsize(rev)
1008 return self.rawsize(rev)
1009
1009
1010 return len(self.revision(rev))
1010 return len(self.revision(rev))
1011
1011
1012 def fast_rank(self, rev):
1012 def fast_rank(self, rev):
1013 """Return the rank of a revision if already known, or None otherwise.
1013 """Return the rank of a revision if already known, or None otherwise.
1014
1014
1015 The rank of a revision is the size of the sub-graph it defines as a
1015 The rank of a revision is the size of the sub-graph it defines as a
1016 head. Equivalently, the rank of a revision `r` is the size of the set
1016 head. Equivalently, the rank of a revision `r` is the size of the set
1017 `ancestors(r)`, `r` included.
1017 `ancestors(r)`, `r` included.
1018
1018
1019 This method returns the rank retrieved from the revlog in constant
1019 This method returns the rank retrieved from the revlog in constant
1020 time. It makes no attempt at computing unknown values for versions of
1020 time. It makes no attempt at computing unknown values for versions of
1021 the revlog which do not persist the rank.
1021 the revlog which do not persist the rank.
1022 """
1022 """
1023 rank = self.index[rev][ENTRY_RANK]
1023 rank = self.index[rev][ENTRY_RANK]
1024 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1024 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1025 return None
1025 return None
1026 if rev == nullrev:
1026 if rev == nullrev:
1027 return 0 # convention
1027 return 0 # convention
1028 return rank
1028 return rank
1029
1029
1030 def chainbase(self, rev):
1030 def chainbase(self, rev):
1031 base = self._chainbasecache.get(rev)
1031 base = self._chainbasecache.get(rev)
1032 if base is not None:
1032 if base is not None:
1033 return base
1033 return base
1034
1034
1035 index = self.index
1035 index = self.index
1036 iterrev = rev
1036 iterrev = rev
1037 base = index[iterrev][3]
1037 base = index[iterrev][3]
1038 while base != iterrev:
1038 while base != iterrev:
1039 iterrev = base
1039 iterrev = base
1040 base = index[iterrev][3]
1040 base = index[iterrev][3]
1041
1041
1042 self._chainbasecache[rev] = base
1042 self._chainbasecache[rev] = base
1043 return base
1043 return base
1044
1044
1045 def linkrev(self, rev):
1045 def linkrev(self, rev):
1046 return self.index[rev][4]
1046 return self.index[rev][4]
1047
1047
1048 def parentrevs(self, rev):
1048 def parentrevs(self, rev):
1049 try:
1049 try:
1050 entry = self.index[rev]
1050 entry = self.index[rev]
1051 except IndexError:
1051 except IndexError:
1052 if rev == wdirrev:
1052 if rev == wdirrev:
1053 raise error.WdirUnsupported
1053 raise error.WdirUnsupported
1054 raise
1054 raise
1055
1055
1056 if self.canonical_parent_order and entry[5] == nullrev:
1056 if self.canonical_parent_order and entry[5] == nullrev:
1057 return entry[6], entry[5]
1057 return entry[6], entry[5]
1058 else:
1058 else:
1059 return entry[5], entry[6]
1059 return entry[5], entry[6]
1060
1060
1061 # fast parentrevs(rev) where rev isn't filtered
1061 # fast parentrevs(rev) where rev isn't filtered
1062 _uncheckedparentrevs = parentrevs
1062 _uncheckedparentrevs = parentrevs
1063
1063
1064 def node(self, rev):
1064 def node(self, rev):
1065 try:
1065 try:
1066 return self.index[rev][7]
1066 return self.index[rev][7]
1067 except IndexError:
1067 except IndexError:
1068 if rev == wdirrev:
1068 if rev == wdirrev:
1069 raise error.WdirUnsupported
1069 raise error.WdirUnsupported
1070 raise
1070 raise
1071
1071
1072 # Derived from index values.
1072 # Derived from index values.
1073
1073
1074 def end(self, rev):
1074 def end(self, rev):
1075 return self.start(rev) + self.length(rev)
1075 return self.start(rev) + self.length(rev)
1076
1076
1077 def parents(self, node):
1077 def parents(self, node):
1078 i = self.index
1078 i = self.index
1079 d = i[self.rev(node)]
1079 d = i[self.rev(node)]
1080 # inline node() to avoid function call overhead
1080 # inline node() to avoid function call overhead
1081 if self.canonical_parent_order and d[5] == self.nullid:
1081 if self.canonical_parent_order and d[5] == self.nullid:
1082 return i[d[6]][7], i[d[5]][7]
1082 return i[d[6]][7], i[d[5]][7]
1083 else:
1083 else:
1084 return i[d[5]][7], i[d[6]][7]
1084 return i[d[5]][7], i[d[6]][7]
1085
1085
1086 def chainlen(self, rev):
1086 def chainlen(self, rev):
1087 return self._chaininfo(rev)[0]
1087 return self._chaininfo(rev)[0]
1088
1088
1089 def _chaininfo(self, rev):
1089 def _chaininfo(self, rev):
1090 chaininfocache = self._chaininfocache
1090 chaininfocache = self._chaininfocache
1091 if rev in chaininfocache:
1091 if rev in chaininfocache:
1092 return chaininfocache[rev]
1092 return chaininfocache[rev]
1093 index = self.index
1093 index = self.index
1094 generaldelta = self._generaldelta
1094 generaldelta = self._generaldelta
1095 iterrev = rev
1095 iterrev = rev
1096 e = index[iterrev]
1096 e = index[iterrev]
1097 clen = 0
1097 clen = 0
1098 compresseddeltalen = 0
1098 compresseddeltalen = 0
1099 while iterrev != e[3]:
1099 while iterrev != e[3]:
1100 clen += 1
1100 clen += 1
1101 compresseddeltalen += e[1]
1101 compresseddeltalen += e[1]
1102 if generaldelta:
1102 if generaldelta:
1103 iterrev = e[3]
1103 iterrev = e[3]
1104 else:
1104 else:
1105 iterrev -= 1
1105 iterrev -= 1
1106 if iterrev in chaininfocache:
1106 if iterrev in chaininfocache:
1107 t = chaininfocache[iterrev]
1107 t = chaininfocache[iterrev]
1108 clen += t[0]
1108 clen += t[0]
1109 compresseddeltalen += t[1]
1109 compresseddeltalen += t[1]
1110 break
1110 break
1111 e = index[iterrev]
1111 e = index[iterrev]
1112 else:
1112 else:
1113 # Add text length of base since decompressing that also takes
1113 # Add text length of base since decompressing that also takes
1114 # work. For cache hits the length is already included.
1114 # work. For cache hits the length is already included.
1115 compresseddeltalen += e[1]
1115 compresseddeltalen += e[1]
1116 r = (clen, compresseddeltalen)
1116 r = (clen, compresseddeltalen)
1117 chaininfocache[rev] = r
1117 chaininfocache[rev] = r
1118 return r
1118 return r
1119
1119
1120 def _deltachain(self, rev, stoprev=None):
1120 def _deltachain(self, rev, stoprev=None):
1121 """Obtain the delta chain for a revision.
1121 """Obtain the delta chain for a revision.
1122
1122
1123 ``stoprev`` specifies a revision to stop at. If not specified, we
1123 ``stoprev`` specifies a revision to stop at. If not specified, we
1124 stop at the base of the chain.
1124 stop at the base of the chain.
1125
1125
1126 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1126 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1127 revs in ascending order and ``stopped`` is a bool indicating whether
1127 revs in ascending order and ``stopped`` is a bool indicating whether
1128 ``stoprev`` was hit.
1128 ``stoprev`` was hit.
1129 """
1129 """
1130 # Try C implementation.
1130 # Try C implementation.
1131 try:
1131 try:
1132 return self.index.deltachain(rev, stoprev, self._generaldelta)
1132 return self.index.deltachain(rev, stoprev, self._generaldelta)
1133 except AttributeError:
1133 except AttributeError:
1134 pass
1134 pass
1135
1135
1136 chain = []
1136 chain = []
1137
1137
1138 # Alias to prevent attribute lookup in tight loop.
1138 # Alias to prevent attribute lookup in tight loop.
1139 index = self.index
1139 index = self.index
1140 generaldelta = self._generaldelta
1140 generaldelta = self._generaldelta
1141
1141
1142 iterrev = rev
1142 iterrev = rev
1143 e = index[iterrev]
1143 e = index[iterrev]
1144 while iterrev != e[3] and iterrev != stoprev:
1144 while iterrev != e[3] and iterrev != stoprev:
1145 chain.append(iterrev)
1145 chain.append(iterrev)
1146 if generaldelta:
1146 if generaldelta:
1147 iterrev = e[3]
1147 iterrev = e[3]
1148 else:
1148 else:
1149 iterrev -= 1
1149 iterrev -= 1
1150 e = index[iterrev]
1150 e = index[iterrev]
1151
1151
1152 if iterrev == stoprev:
1152 if iterrev == stoprev:
1153 stopped = True
1153 stopped = True
1154 else:
1154 else:
1155 chain.append(iterrev)
1155 chain.append(iterrev)
1156 stopped = False
1156 stopped = False
1157
1157
1158 chain.reverse()
1158 chain.reverse()
1159 return chain, stopped
1159 return chain, stopped
1160
1160
1161 def ancestors(self, revs, stoprev=0, inclusive=False):
1161 def ancestors(self, revs, stoprev=0, inclusive=False):
1162 """Generate the ancestors of 'revs' in reverse revision order.
1162 """Generate the ancestors of 'revs' in reverse revision order.
1163 Does not generate revs lower than stoprev.
1163 Does not generate revs lower than stoprev.
1164
1164
1165 See the documentation for ancestor.lazyancestors for more details."""
1165 See the documentation for ancestor.lazyancestors for more details."""
1166
1166
1167 # first, make sure start revisions aren't filtered
1167 # first, make sure start revisions aren't filtered
1168 revs = list(revs)
1168 revs = list(revs)
1169 checkrev = self.node
1169 checkrev = self.node
1170 for r in revs:
1170 for r in revs:
1171 checkrev(r)
1171 checkrev(r)
1172 # and we're sure ancestors aren't filtered as well
1172 # and we're sure ancestors aren't filtered as well
1173
1173
1174 if rustancestor is not None and self.index.rust_ext_compat:
1174 if rustancestor is not None and self.index.rust_ext_compat:
1175 lazyancestors = rustancestor.LazyAncestors
1175 lazyancestors = rustancestor.LazyAncestors
1176 arg = self.index
1176 arg = self.index
1177 else:
1177 else:
1178 lazyancestors = ancestor.lazyancestors
1178 lazyancestors = ancestor.lazyancestors
1179 arg = self._uncheckedparentrevs
1179 arg = self._uncheckedparentrevs
1180 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1180 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1181
1181
1182 def descendants(self, revs):
1182 def descendants(self, revs):
1183 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1183 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1184
1184
1185 def findcommonmissing(self, common=None, heads=None):
1185 def findcommonmissing(self, common=None, heads=None):
1186 """Return a tuple of the ancestors of common and the ancestors of heads
1186 """Return a tuple of the ancestors of common and the ancestors of heads
1187 that are not ancestors of common. In revset terminology, we return the
1187 that are not ancestors of common. In revset terminology, we return the
1188 tuple:
1188 tuple:
1189
1189
1190 ::common, (::heads) - (::common)
1190 ::common, (::heads) - (::common)
1191
1191
1192 The list is sorted by revision number, meaning it is
1192 The list is sorted by revision number, meaning it is
1193 topologically sorted.
1193 topologically sorted.
1194
1194
1195 'heads' and 'common' are both lists of node IDs. If heads is
1195 'heads' and 'common' are both lists of node IDs. If heads is
1196 not supplied, uses all of the revlog's heads. If common is not
1196 not supplied, uses all of the revlog's heads. If common is not
1197 supplied, uses nullid."""
1197 supplied, uses nullid."""
1198 if common is None:
1198 if common is None:
1199 common = [self.nullid]
1199 common = [self.nullid]
1200 if heads is None:
1200 if heads is None:
1201 heads = self.heads()
1201 heads = self.heads()
1202
1202
1203 common = [self.rev(n) for n in common]
1203 common = [self.rev(n) for n in common]
1204 heads = [self.rev(n) for n in heads]
1204 heads = [self.rev(n) for n in heads]
1205
1205
1206 # we want the ancestors, but inclusive
1206 # we want the ancestors, but inclusive
1207 class lazyset:
1207 class lazyset:
1208 def __init__(self, lazyvalues):
1208 def __init__(self, lazyvalues):
1209 self.addedvalues = set()
1209 self.addedvalues = set()
1210 self.lazyvalues = lazyvalues
1210 self.lazyvalues = lazyvalues
1211
1211
1212 def __contains__(self, value):
1212 def __contains__(self, value):
1213 return value in self.addedvalues or value in self.lazyvalues
1213 return value in self.addedvalues or value in self.lazyvalues
1214
1214
1215 def __iter__(self):
1215 def __iter__(self):
1216 added = self.addedvalues
1216 added = self.addedvalues
1217 for r in added:
1217 for r in added:
1218 yield r
1218 yield r
1219 for r in self.lazyvalues:
1219 for r in self.lazyvalues:
1220 if not r in added:
1220 if not r in added:
1221 yield r
1221 yield r
1222
1222
1223 def add(self, value):
1223 def add(self, value):
1224 self.addedvalues.add(value)
1224 self.addedvalues.add(value)
1225
1225
1226 def update(self, values):
1226 def update(self, values):
1227 self.addedvalues.update(values)
1227 self.addedvalues.update(values)
1228
1228
1229 has = lazyset(self.ancestors(common))
1229 has = lazyset(self.ancestors(common))
1230 has.add(nullrev)
1230 has.add(nullrev)
1231 has.update(common)
1231 has.update(common)
1232
1232
1233 # take all ancestors from heads that aren't in has
1233 # take all ancestors from heads that aren't in has
1234 missing = set()
1234 missing = set()
1235 visit = collections.deque(r for r in heads if r not in has)
1235 visit = collections.deque(r for r in heads if r not in has)
1236 while visit:
1236 while visit:
1237 r = visit.popleft()
1237 r = visit.popleft()
1238 if r in missing:
1238 if r in missing:
1239 continue
1239 continue
1240 else:
1240 else:
1241 missing.add(r)
1241 missing.add(r)
1242 for p in self.parentrevs(r):
1242 for p in self.parentrevs(r):
1243 if p not in has:
1243 if p not in has:
1244 visit.append(p)
1244 visit.append(p)
1245 missing = list(missing)
1245 missing = list(missing)
1246 missing.sort()
1246 missing.sort()
1247 return has, [self.node(miss) for miss in missing]
1247 return has, [self.node(miss) for miss in missing]
1248
1248
1249 def incrementalmissingrevs(self, common=None):
1249 def incrementalmissingrevs(self, common=None):
1250 """Return an object that can be used to incrementally compute the
1250 """Return an object that can be used to incrementally compute the
1251 revision numbers of the ancestors of arbitrary sets that are not
1251 revision numbers of the ancestors of arbitrary sets that are not
1252 ancestors of common. This is an ancestor.incrementalmissingancestors
1252 ancestors of common. This is an ancestor.incrementalmissingancestors
1253 object.
1253 object.
1254
1254
1255 'common' is a list of revision numbers. If common is not supplied, uses
1255 'common' is a list of revision numbers. If common is not supplied, uses
1256 nullrev.
1256 nullrev.
1257 """
1257 """
1258 if common is None:
1258 if common is None:
1259 common = [nullrev]
1259 common = [nullrev]
1260
1260
1261 if rustancestor is not None and self.index.rust_ext_compat:
1261 if rustancestor is not None and self.index.rust_ext_compat:
1262 return rustancestor.MissingAncestors(self.index, common)
1262 return rustancestor.MissingAncestors(self.index, common)
1263 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1263 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1264
1264
1265 def findmissingrevs(self, common=None, heads=None):
1265 def findmissingrevs(self, common=None, heads=None):
1266 """Return the revision numbers of the ancestors of heads that
1266 """Return the revision numbers of the ancestors of heads that
1267 are not ancestors of common.
1267 are not ancestors of common.
1268
1268
1269 More specifically, return a list of revision numbers corresponding to
1269 More specifically, return a list of revision numbers corresponding to
1270 nodes N such that every N satisfies the following constraints:
1270 nodes N such that every N satisfies the following constraints:
1271
1271
1272 1. N is an ancestor of some node in 'heads'
1272 1. N is an ancestor of some node in 'heads'
1273 2. N is not an ancestor of any node in 'common'
1273 2. N is not an ancestor of any node in 'common'
1274
1274
1275 The list is sorted by revision number, meaning it is
1275 The list is sorted by revision number, meaning it is
1276 topologically sorted.
1276 topologically sorted.
1277
1277
1278 'heads' and 'common' are both lists of revision numbers. If heads is
1278 'heads' and 'common' are both lists of revision numbers. If heads is
1279 not supplied, uses all of the revlog's heads. If common is not
1279 not supplied, uses all of the revlog's heads. If common is not
1280 supplied, uses nullid."""
1280 supplied, uses nullid."""
1281 if common is None:
1281 if common is None:
1282 common = [nullrev]
1282 common = [nullrev]
1283 if heads is None:
1283 if heads is None:
1284 heads = self.headrevs()
1284 heads = self.headrevs()
1285
1285
1286 inc = self.incrementalmissingrevs(common=common)
1286 inc = self.incrementalmissingrevs(common=common)
1287 return inc.missingancestors(heads)
1287 return inc.missingancestors(heads)
1288
1288
1289 def findmissing(self, common=None, heads=None):
1289 def findmissing(self, common=None, heads=None):
1290 """Return the ancestors of heads that are not ancestors of common.
1290 """Return the ancestors of heads that are not ancestors of common.
1291
1291
1292 More specifically, return a list of nodes N such that every N
1292 More specifically, return a list of nodes N such that every N
1293 satisfies the following constraints:
1293 satisfies the following constraints:
1294
1294
1295 1. N is an ancestor of some node in 'heads'
1295 1. N is an ancestor of some node in 'heads'
1296 2. N is not an ancestor of any node in 'common'
1296 2. N is not an ancestor of any node in 'common'
1297
1297
1298 The list is sorted by revision number, meaning it is
1298 The list is sorted by revision number, meaning it is
1299 topologically sorted.
1299 topologically sorted.
1300
1300
1301 'heads' and 'common' are both lists of node IDs. If heads is
1301 'heads' and 'common' are both lists of node IDs. If heads is
1302 not supplied, uses all of the revlog's heads. If common is not
1302 not supplied, uses all of the revlog's heads. If common is not
1303 supplied, uses nullid."""
1303 supplied, uses nullid."""
1304 if common is None:
1304 if common is None:
1305 common = [self.nullid]
1305 common = [self.nullid]
1306 if heads is None:
1306 if heads is None:
1307 heads = self.heads()
1307 heads = self.heads()
1308
1308
1309 common = [self.rev(n) for n in common]
1309 common = [self.rev(n) for n in common]
1310 heads = [self.rev(n) for n in heads]
1310 heads = [self.rev(n) for n in heads]
1311
1311
1312 inc = self.incrementalmissingrevs(common=common)
1312 inc = self.incrementalmissingrevs(common=common)
1313 return [self.node(r) for r in inc.missingancestors(heads)]
1313 return [self.node(r) for r in inc.missingancestors(heads)]
1314
1314
1315 def nodesbetween(self, roots=None, heads=None):
1315 def nodesbetween(self, roots=None, heads=None):
1316 """Return a topological path from 'roots' to 'heads'.
1316 """Return a topological path from 'roots' to 'heads'.
1317
1317
1318 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1318 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1319 topologically sorted list of all nodes N that satisfy both of
1319 topologically sorted list of all nodes N that satisfy both of
1320 these constraints:
1320 these constraints:
1321
1321
1322 1. N is a descendant of some node in 'roots'
1322 1. N is a descendant of some node in 'roots'
1323 2. N is an ancestor of some node in 'heads'
1323 2. N is an ancestor of some node in 'heads'
1324
1324
1325 Every node is considered to be both a descendant and an ancestor
1325 Every node is considered to be both a descendant and an ancestor
1326 of itself, so every reachable node in 'roots' and 'heads' will be
1326 of itself, so every reachable node in 'roots' and 'heads' will be
1327 included in 'nodes'.
1327 included in 'nodes'.
1328
1328
1329 'outroots' is the list of reachable nodes in 'roots', i.e., the
1329 'outroots' is the list of reachable nodes in 'roots', i.e., the
1330 subset of 'roots' that is returned in 'nodes'. Likewise,
1330 subset of 'roots' that is returned in 'nodes'. Likewise,
1331 'outheads' is the subset of 'heads' that is also in 'nodes'.
1331 'outheads' is the subset of 'heads' that is also in 'nodes'.
1332
1332
1333 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1333 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1334 unspecified, uses nullid as the only root. If 'heads' is
1334 unspecified, uses nullid as the only root. If 'heads' is
1335 unspecified, uses list of all of the revlog's heads."""
1335 unspecified, uses list of all of the revlog's heads."""
1336 nonodes = ([], [], [])
1336 nonodes = ([], [], [])
1337 if roots is not None:
1337 if roots is not None:
1338 roots = list(roots)
1338 roots = list(roots)
1339 if not roots:
1339 if not roots:
1340 return nonodes
1340 return nonodes
1341 lowestrev = min([self.rev(n) for n in roots])
1341 lowestrev = min([self.rev(n) for n in roots])
1342 else:
1342 else:
1343 roots = [self.nullid] # Everybody's a descendant of nullid
1343 roots = [self.nullid] # Everybody's a descendant of nullid
1344 lowestrev = nullrev
1344 lowestrev = nullrev
1345 if (lowestrev == nullrev) and (heads is None):
1345 if (lowestrev == nullrev) and (heads is None):
1346 # We want _all_ the nodes!
1346 # We want _all_ the nodes!
1347 return (
1347 return (
1348 [self.node(r) for r in self],
1348 [self.node(r) for r in self],
1349 [self.nullid],
1349 [self.nullid],
1350 list(self.heads()),
1350 list(self.heads()),
1351 )
1351 )
1352 if heads is None:
1352 if heads is None:
1353 # All nodes are ancestors, so the latest ancestor is the last
1353 # All nodes are ancestors, so the latest ancestor is the last
1354 # node.
1354 # node.
1355 highestrev = len(self) - 1
1355 highestrev = len(self) - 1
1356 # Set ancestors to None to signal that every node is an ancestor.
1356 # Set ancestors to None to signal that every node is an ancestor.
1357 ancestors = None
1357 ancestors = None
1358 # Set heads to an empty dictionary for later discovery of heads
1358 # Set heads to an empty dictionary for later discovery of heads
1359 heads = {}
1359 heads = {}
1360 else:
1360 else:
1361 heads = list(heads)
1361 heads = list(heads)
1362 if not heads:
1362 if not heads:
1363 return nonodes
1363 return nonodes
1364 ancestors = set()
1364 ancestors = set()
1365 # Turn heads into a dictionary so we can remove 'fake' heads.
1365 # Turn heads into a dictionary so we can remove 'fake' heads.
1366 # Also, later we will be using it to filter out the heads we can't
1366 # Also, later we will be using it to filter out the heads we can't
1367 # find from roots.
1367 # find from roots.
1368 heads = dict.fromkeys(heads, False)
1368 heads = dict.fromkeys(heads, False)
1369 # Start at the top and keep marking parents until we're done.
1369 # Start at the top and keep marking parents until we're done.
1370 nodestotag = set(heads)
1370 nodestotag = set(heads)
1371 # Remember where the top was so we can use it as a limit later.
1371 # Remember where the top was so we can use it as a limit later.
1372 highestrev = max([self.rev(n) for n in nodestotag])
1372 highestrev = max([self.rev(n) for n in nodestotag])
1373 while nodestotag:
1373 while nodestotag:
1374 # grab a node to tag
1374 # grab a node to tag
1375 n = nodestotag.pop()
1375 n = nodestotag.pop()
1376 # Never tag nullid
1376 # Never tag nullid
1377 if n == self.nullid:
1377 if n == self.nullid:
1378 continue
1378 continue
1379 # A node's revision number represents its place in a
1379 # A node's revision number represents its place in a
1380 # topologically sorted list of nodes.
1380 # topologically sorted list of nodes.
1381 r = self.rev(n)
1381 r = self.rev(n)
1382 if r >= lowestrev:
1382 if r >= lowestrev:
1383 if n not in ancestors:
1383 if n not in ancestors:
1384 # If we are possibly a descendant of one of the roots
1384 # If we are possibly a descendant of one of the roots
1385 # and we haven't already been marked as an ancestor
1385 # and we haven't already been marked as an ancestor
1386 ancestors.add(n) # Mark as ancestor
1386 ancestors.add(n) # Mark as ancestor
1387 # Add non-nullid parents to list of nodes to tag.
1387 # Add non-nullid parents to list of nodes to tag.
1388 nodestotag.update(
1388 nodestotag.update(
1389 [p for p in self.parents(n) if p != self.nullid]
1389 [p for p in self.parents(n) if p != self.nullid]
1390 )
1390 )
1391 elif n in heads: # We've seen it before, is it a fake head?
1391 elif n in heads: # We've seen it before, is it a fake head?
1392 # So it is, real heads should not be the ancestors of
1392 # So it is, real heads should not be the ancestors of
1393 # any other heads.
1393 # any other heads.
1394 heads.pop(n)
1394 heads.pop(n)
1395 if not ancestors:
1395 if not ancestors:
1396 return nonodes
1396 return nonodes
1397 # Now that we have our set of ancestors, we want to remove any
1397 # Now that we have our set of ancestors, we want to remove any
1398 # roots that are not ancestors.
1398 # roots that are not ancestors.
1399
1399
1400 # If one of the roots was nullid, everything is included anyway.
1400 # If one of the roots was nullid, everything is included anyway.
1401 if lowestrev > nullrev:
1401 if lowestrev > nullrev:
1402 # But, since we weren't, let's recompute the lowest rev to not
1402 # But, since we weren't, let's recompute the lowest rev to not
1403 # include roots that aren't ancestors.
1403 # include roots that aren't ancestors.
1404
1404
1405 # Filter out roots that aren't ancestors of heads
1405 # Filter out roots that aren't ancestors of heads
1406 roots = [root for root in roots if root in ancestors]
1406 roots = [root for root in roots if root in ancestors]
1407 # Recompute the lowest revision
1407 # Recompute the lowest revision
1408 if roots:
1408 if roots:
1409 lowestrev = min([self.rev(root) for root in roots])
1409 lowestrev = min([self.rev(root) for root in roots])
1410 else:
1410 else:
1411 # No more roots? Return empty list
1411 # No more roots? Return empty list
1412 return nonodes
1412 return nonodes
1413 else:
1413 else:
1414 # We are descending from nullid, and don't need to care about
1414 # We are descending from nullid, and don't need to care about
1415 # any other roots.
1415 # any other roots.
1416 lowestrev = nullrev
1416 lowestrev = nullrev
1417 roots = [self.nullid]
1417 roots = [self.nullid]
1418 # Transform our roots list into a set.
1418 # Transform our roots list into a set.
1419 descendants = set(roots)
1419 descendants = set(roots)
1420 # Also, keep the original roots so we can filter out roots that aren't
1420 # Also, keep the original roots so we can filter out roots that aren't
1421 # 'real' roots (i.e. are descended from other roots).
1421 # 'real' roots (i.e. are descended from other roots).
1422 roots = descendants.copy()
1422 roots = descendants.copy()
1423 # Our topologically sorted list of output nodes.
1423 # Our topologically sorted list of output nodes.
1424 orderedout = []
1424 orderedout = []
1425 # Don't start at nullid since we don't want nullid in our output list,
1425 # Don't start at nullid since we don't want nullid in our output list,
1426 # and if nullid shows up in descendants, empty parents will look like
1426 # and if nullid shows up in descendants, empty parents will look like
1427 # they're descendants.
1427 # they're descendants.
1428 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1428 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1429 n = self.node(r)
1429 n = self.node(r)
1430 isdescendant = False
1430 isdescendant = False
1431 if lowestrev == nullrev: # Everybody is a descendant of nullid
1431 if lowestrev == nullrev: # Everybody is a descendant of nullid
1432 isdescendant = True
1432 isdescendant = True
1433 elif n in descendants:
1433 elif n in descendants:
1434 # n is already a descendant
1434 # n is already a descendant
1435 isdescendant = True
1435 isdescendant = True
1436 # This check only needs to be done here because all the roots
1436 # This check only needs to be done here because all the roots
1437 # will start being marked is descendants before the loop.
1437 # will start being marked is descendants before the loop.
1438 if n in roots:
1438 if n in roots:
1439 # If n was a root, check if it's a 'real' root.
1439 # If n was a root, check if it's a 'real' root.
1440 p = tuple(self.parents(n))
1440 p = tuple(self.parents(n))
1441 # If any of its parents are descendants, it's not a root.
1441 # If any of its parents are descendants, it's not a root.
1442 if (p[0] in descendants) or (p[1] in descendants):
1442 if (p[0] in descendants) or (p[1] in descendants):
1443 roots.remove(n)
1443 roots.remove(n)
1444 else:
1444 else:
1445 p = tuple(self.parents(n))
1445 p = tuple(self.parents(n))
1446 # A node is a descendant if either of its parents are
1446 # A node is a descendant if either of its parents are
1447 # descendants. (We seeded the dependents list with the roots
1447 # descendants. (We seeded the dependents list with the roots
1448 # up there, remember?)
1448 # up there, remember?)
1449 if (p[0] in descendants) or (p[1] in descendants):
1449 if (p[0] in descendants) or (p[1] in descendants):
1450 descendants.add(n)
1450 descendants.add(n)
1451 isdescendant = True
1451 isdescendant = True
1452 if isdescendant and ((ancestors is None) or (n in ancestors)):
1452 if isdescendant and ((ancestors is None) or (n in ancestors)):
1453 # Only include nodes that are both descendants and ancestors.
1453 # Only include nodes that are both descendants and ancestors.
1454 orderedout.append(n)
1454 orderedout.append(n)
1455 if (ancestors is not None) and (n in heads):
1455 if (ancestors is not None) and (n in heads):
1456 # We're trying to figure out which heads are reachable
1456 # We're trying to figure out which heads are reachable
1457 # from roots.
1457 # from roots.
1458 # Mark this head as having been reached
1458 # Mark this head as having been reached
1459 heads[n] = True
1459 heads[n] = True
1460 elif ancestors is None:
1460 elif ancestors is None:
1461 # Otherwise, we're trying to discover the heads.
1461 # Otherwise, we're trying to discover the heads.
1462 # Assume this is a head because if it isn't, the next step
1462 # Assume this is a head because if it isn't, the next step
1463 # will eventually remove it.
1463 # will eventually remove it.
1464 heads[n] = True
1464 heads[n] = True
1465 # But, obviously its parents aren't.
1465 # But, obviously its parents aren't.
1466 for p in self.parents(n):
1466 for p in self.parents(n):
1467 heads.pop(p, None)
1467 heads.pop(p, None)
1468 heads = [head for head, flag in heads.items() if flag]
1468 heads = [head for head, flag in heads.items() if flag]
1469 roots = list(roots)
1469 roots = list(roots)
1470 assert orderedout
1470 assert orderedout
1471 assert roots
1471 assert roots
1472 assert heads
1472 assert heads
1473 return (orderedout, roots, heads)
1473 return (orderedout, roots, heads)
1474
1474
1475 def headrevs(self, revs=None):
1475 def headrevs(self, revs=None):
1476 if revs is None:
1476 if revs is None:
1477 try:
1477 try:
1478 return self.index.headrevs()
1478 return self.index.headrevs()
1479 except AttributeError:
1479 except AttributeError:
1480 return self._headrevs()
1480 return self._headrevs()
1481 if rustdagop is not None and self.index.rust_ext_compat:
1481 if rustdagop is not None and self.index.rust_ext_compat:
1482 return rustdagop.headrevs(self.index, revs)
1482 return rustdagop.headrevs(self.index, revs)
1483 return dagop.headrevs(revs, self._uncheckedparentrevs)
1483 return dagop.headrevs(revs, self._uncheckedparentrevs)
1484
1484
1485 def computephases(self, roots):
1485 def computephases(self, roots):
1486 return self.index.computephasesmapsets(roots)
1486 return self.index.computephasesmapsets(roots)
1487
1487
1488 def _headrevs(self):
1488 def _headrevs(self):
1489 count = len(self)
1489 count = len(self)
1490 if not count:
1490 if not count:
1491 return [nullrev]
1491 return [nullrev]
1492 # we won't iter over filtered rev so nobody is a head at start
1492 # we won't iter over filtered rev so nobody is a head at start
1493 ishead = [0] * (count + 1)
1493 ishead = [0] * (count + 1)
1494 index = self.index
1494 index = self.index
1495 for r in self:
1495 for r in self:
1496 ishead[r] = 1 # I may be an head
1496 ishead[r] = 1 # I may be an head
1497 e = index[r]
1497 e = index[r]
1498 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1498 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1499 return [r for r, val in enumerate(ishead) if val]
1499 return [r for r, val in enumerate(ishead) if val]
1500
1500
1501 def heads(self, start=None, stop=None):
1501 def heads(self, start=None, stop=None):
1502 """return the list of all nodes that have no children
1502 """return the list of all nodes that have no children
1503
1503
1504 if start is specified, only heads that are descendants of
1504 if start is specified, only heads that are descendants of
1505 start will be returned
1505 start will be returned
1506 if stop is specified, it will consider all the revs from stop
1506 if stop is specified, it will consider all the revs from stop
1507 as if they had no children
1507 as if they had no children
1508 """
1508 """
1509 if start is None and stop is None:
1509 if start is None and stop is None:
1510 if not len(self):
1510 if not len(self):
1511 return [self.nullid]
1511 return [self.nullid]
1512 return [self.node(r) for r in self.headrevs()]
1512 return [self.node(r) for r in self.headrevs()]
1513
1513
1514 if start is None:
1514 if start is None:
1515 start = nullrev
1515 start = nullrev
1516 else:
1516 else:
1517 start = self.rev(start)
1517 start = self.rev(start)
1518
1518
1519 stoprevs = {self.rev(n) for n in stop or []}
1519 stoprevs = {self.rev(n) for n in stop or []}
1520
1520
1521 revs = dagop.headrevssubset(
1521 revs = dagop.headrevssubset(
1522 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1522 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1523 )
1523 )
1524
1524
1525 return [self.node(rev) for rev in revs]
1525 return [self.node(rev) for rev in revs]
1526
1526
1527 def children(self, node):
1527 def children(self, node):
1528 """find the children of a given node"""
1528 """find the children of a given node"""
1529 c = []
1529 c = []
1530 p = self.rev(node)
1530 p = self.rev(node)
1531 for r in self.revs(start=p + 1):
1531 for r in self.revs(start=p + 1):
1532 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1532 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1533 if prevs:
1533 if prevs:
1534 for pr in prevs:
1534 for pr in prevs:
1535 if pr == p:
1535 if pr == p:
1536 c.append(self.node(r))
1536 c.append(self.node(r))
1537 elif p == nullrev:
1537 elif p == nullrev:
1538 c.append(self.node(r))
1538 c.append(self.node(r))
1539 return c
1539 return c
1540
1540
1541 def commonancestorsheads(self, a, b):
1541 def commonancestorsheads(self, a, b):
1542 """calculate all the heads of the common ancestors of nodes a and b"""
1542 """calculate all the heads of the common ancestors of nodes a and b"""
1543 a, b = self.rev(a), self.rev(b)
1543 a, b = self.rev(a), self.rev(b)
1544 ancs = self._commonancestorsheads(a, b)
1544 ancs = self._commonancestorsheads(a, b)
1545 return pycompat.maplist(self.node, ancs)
1545 return pycompat.maplist(self.node, ancs)
1546
1546
1547 def _commonancestorsheads(self, *revs):
1547 def _commonancestorsheads(self, *revs):
1548 """calculate all the heads of the common ancestors of revs"""
1548 """calculate all the heads of the common ancestors of revs"""
1549 try:
1549 try:
1550 ancs = self.index.commonancestorsheads(*revs)
1550 ancs = self.index.commonancestorsheads(*revs)
1551 except (AttributeError, OverflowError): # C implementation failed
1551 except (AttributeError, OverflowError): # C implementation failed
1552 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1552 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1553 return ancs
1553 return ancs
1554
1554
1555 def isancestor(self, a, b):
1555 def isancestor(self, a, b):
1556 """return True if node a is an ancestor of node b
1556 """return True if node a is an ancestor of node b
1557
1557
1558 A revision is considered an ancestor of itself."""
1558 A revision is considered an ancestor of itself."""
1559 a, b = self.rev(a), self.rev(b)
1559 a, b = self.rev(a), self.rev(b)
1560 return self.isancestorrev(a, b)
1560 return self.isancestorrev(a, b)
1561
1561
1562 def isancestorrev(self, a, b):
1562 def isancestorrev(self, a, b):
1563 """return True if revision a is an ancestor of revision b
1563 """return True if revision a is an ancestor of revision b
1564
1564
1565 A revision is considered an ancestor of itself.
1565 A revision is considered an ancestor of itself.
1566
1566
1567 The implementation of this is trivial but the use of
1567 The implementation of this is trivial but the use of
1568 reachableroots is not."""
1568 reachableroots is not."""
1569 if a == nullrev:
1569 if a == nullrev:
1570 return True
1570 return True
1571 elif a == b:
1571 elif a == b:
1572 return True
1572 return True
1573 elif a > b:
1573 elif a > b:
1574 return False
1574 return False
1575 return bool(self.reachableroots(a, [b], [a], includepath=False))
1575 return bool(self.reachableroots(a, [b], [a], includepath=False))
1576
1576
1577 def reachableroots(self, minroot, heads, roots, includepath=False):
1577 def reachableroots(self, minroot, heads, roots, includepath=False):
1578 """return (heads(::(<roots> and <roots>::<heads>)))
1578 """return (heads(::(<roots> and <roots>::<heads>)))
1579
1579
1580 If includepath is True, return (<roots>::<heads>)."""
1580 If includepath is True, return (<roots>::<heads>)."""
1581 try:
1581 try:
1582 return self.index.reachableroots2(
1582 return self.index.reachableroots2(
1583 minroot, heads, roots, includepath
1583 minroot, heads, roots, includepath
1584 )
1584 )
1585 except AttributeError:
1585 except AttributeError:
1586 return dagop._reachablerootspure(
1586 return dagop._reachablerootspure(
1587 self.parentrevs, minroot, roots, heads, includepath
1587 self.parentrevs, minroot, roots, heads, includepath
1588 )
1588 )
1589
1589
1590 def ancestor(self, a, b):
1590 def ancestor(self, a, b):
1591 """calculate the "best" common ancestor of nodes a and b"""
1591 """calculate the "best" common ancestor of nodes a and b"""
1592
1592
1593 a, b = self.rev(a), self.rev(b)
1593 a, b = self.rev(a), self.rev(b)
1594 try:
1594 try:
1595 ancs = self.index.ancestors(a, b)
1595 ancs = self.index.ancestors(a, b)
1596 except (AttributeError, OverflowError):
1596 except (AttributeError, OverflowError):
1597 ancs = ancestor.ancestors(self.parentrevs, a, b)
1597 ancs = ancestor.ancestors(self.parentrevs, a, b)
1598 if ancs:
1598 if ancs:
1599 # choose a consistent winner when there's a tie
1599 # choose a consistent winner when there's a tie
1600 return min(map(self.node, ancs))
1600 return min(map(self.node, ancs))
1601 return self.nullid
1601 return self.nullid
1602
1602
1603 def _match(self, id):
1603 def _match(self, id):
1604 if isinstance(id, int):
1604 if isinstance(id, int):
1605 # rev
1605 # rev
1606 return self.node(id)
1606 return self.node(id)
1607 if len(id) == self.nodeconstants.nodelen:
1607 if len(id) == self.nodeconstants.nodelen:
1608 # possibly a binary node
1608 # possibly a binary node
1609 # odds of a binary node being all hex in ASCII are 1 in 10**25
1609 # odds of a binary node being all hex in ASCII are 1 in 10**25
1610 try:
1610 try:
1611 node = id
1611 node = id
1612 self.rev(node) # quick search the index
1612 self.rev(node) # quick search the index
1613 return node
1613 return node
1614 except error.LookupError:
1614 except error.LookupError:
1615 pass # may be partial hex id
1615 pass # may be partial hex id
1616 try:
1616 try:
1617 # str(rev)
1617 # str(rev)
1618 rev = int(id)
1618 rev = int(id)
1619 if b"%d" % rev != id:
1619 if b"%d" % rev != id:
1620 raise ValueError
1620 raise ValueError
1621 if rev < 0:
1621 if rev < 0:
1622 rev = len(self) + rev
1622 rev = len(self) + rev
1623 if rev < 0 or rev >= len(self):
1623 if rev < 0 or rev >= len(self):
1624 raise ValueError
1624 raise ValueError
1625 return self.node(rev)
1625 return self.node(rev)
1626 except (ValueError, OverflowError):
1626 except (ValueError, OverflowError):
1627 pass
1627 pass
1628 if len(id) == 2 * self.nodeconstants.nodelen:
1628 if len(id) == 2 * self.nodeconstants.nodelen:
1629 try:
1629 try:
1630 # a full hex nodeid?
1630 # a full hex nodeid?
1631 node = bin(id)
1631 node = bin(id)
1632 self.rev(node)
1632 self.rev(node)
1633 return node
1633 return node
1634 except (binascii.Error, error.LookupError):
1634 except (binascii.Error, error.LookupError):
1635 pass
1635 pass
1636
1636
1637 def _partialmatch(self, id):
1637 def _partialmatch(self, id):
1638 # we don't care wdirfilenodeids as they should be always full hash
1638 # we don't care wdirfilenodeids as they should be always full hash
1639 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1639 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1640 ambiguous = False
1640 ambiguous = False
1641 try:
1641 try:
1642 partial = self.index.partialmatch(id)
1642 partial = self.index.partialmatch(id)
1643 if partial and self.hasnode(partial):
1643 if partial and self.hasnode(partial):
1644 if maybewdir:
1644 if maybewdir:
1645 # single 'ff...' match in radix tree, ambiguous with wdir
1645 # single 'ff...' match in radix tree, ambiguous with wdir
1646 ambiguous = True
1646 ambiguous = True
1647 else:
1647 else:
1648 return partial
1648 return partial
1649 elif maybewdir:
1649 elif maybewdir:
1650 # no 'ff...' match in radix tree, wdir identified
1650 # no 'ff...' match in radix tree, wdir identified
1651 raise error.WdirUnsupported
1651 raise error.WdirUnsupported
1652 else:
1652 else:
1653 return None
1653 return None
1654 except error.RevlogError:
1654 except error.RevlogError:
1655 # parsers.c radix tree lookup gave multiple matches
1655 # parsers.c radix tree lookup gave multiple matches
1656 # fast path: for unfiltered changelog, radix tree is accurate
1656 # fast path: for unfiltered changelog, radix tree is accurate
1657 if not getattr(self, 'filteredrevs', None):
1657 if not getattr(self, 'filteredrevs', None):
1658 ambiguous = True
1658 ambiguous = True
1659 # fall through to slow path that filters hidden revisions
1659 # fall through to slow path that filters hidden revisions
1660 except (AttributeError, ValueError):
1660 except (AttributeError, ValueError):
1661 # we are pure python, or key is not hex
1661 # we are pure python, or key is not hex
1662 pass
1662 pass
1663 if ambiguous:
1663 if ambiguous:
1664 raise error.AmbiguousPrefixLookupError(
1664 raise error.AmbiguousPrefixLookupError(
1665 id, self.display_id, _(b'ambiguous identifier')
1665 id, self.display_id, _(b'ambiguous identifier')
1666 )
1666 )
1667
1667
1668 if id in self._pcache:
1668 if id in self._pcache:
1669 return self._pcache[id]
1669 return self._pcache[id]
1670
1670
1671 if len(id) <= 40:
1671 if len(id) <= 40:
1672 # hex(node)[:...]
1672 # hex(node)[:...]
1673 l = len(id) // 2 * 2 # grab an even number of digits
1673 l = len(id) // 2 * 2 # grab an even number of digits
1674 try:
1674 try:
1675 # we're dropping the last digit, so let's check that it's hex,
1675 # we're dropping the last digit, so let's check that it's hex,
1676 # to avoid the expensive computation below if it's not
1676 # to avoid the expensive computation below if it's not
1677 if len(id) % 2 > 0:
1677 if len(id) % 2 > 0:
1678 if not (id[-1] in hexdigits):
1678 if not (id[-1] in hexdigits):
1679 return None
1679 return None
1680 prefix = bin(id[:l])
1680 prefix = bin(id[:l])
1681 except binascii.Error:
1681 except binascii.Error:
1682 pass
1682 pass
1683 else:
1683 else:
1684 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1684 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1685 nl = [
1685 nl = [
1686 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1686 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1687 ]
1687 ]
1688 if self.nodeconstants.nullhex.startswith(id):
1688 if self.nodeconstants.nullhex.startswith(id):
1689 nl.append(self.nullid)
1689 nl.append(self.nullid)
1690 if len(nl) > 0:
1690 if len(nl) > 0:
1691 if len(nl) == 1 and not maybewdir:
1691 if len(nl) == 1 and not maybewdir:
1692 self._pcache[id] = nl[0]
1692 self._pcache[id] = nl[0]
1693 return nl[0]
1693 return nl[0]
1694 raise error.AmbiguousPrefixLookupError(
1694 raise error.AmbiguousPrefixLookupError(
1695 id, self.display_id, _(b'ambiguous identifier')
1695 id, self.display_id, _(b'ambiguous identifier')
1696 )
1696 )
1697 if maybewdir:
1697 if maybewdir:
1698 raise error.WdirUnsupported
1698 raise error.WdirUnsupported
1699 return None
1699 return None
1700
1700
1701 def lookup(self, id):
1701 def lookup(self, id):
1702 """locate a node based on:
1702 """locate a node based on:
1703 - revision number or str(revision number)
1703 - revision number or str(revision number)
1704 - nodeid or subset of hex nodeid
1704 - nodeid or subset of hex nodeid
1705 """
1705 """
1706 n = self._match(id)
1706 n = self._match(id)
1707 if n is not None:
1707 if n is not None:
1708 return n
1708 return n
1709 n = self._partialmatch(id)
1709 n = self._partialmatch(id)
1710 if n:
1710 if n:
1711 return n
1711 return n
1712
1712
1713 raise error.LookupError(id, self.display_id, _(b'no match found'))
1713 raise error.LookupError(id, self.display_id, _(b'no match found'))
1714
1714
1715 def shortest(self, node, minlength=1):
1715 def shortest(self, node, minlength=1):
1716 """Find the shortest unambiguous prefix that matches node."""
1716 """Find the shortest unambiguous prefix that matches node."""
1717
1717
1718 def isvalid(prefix):
1718 def isvalid(prefix):
1719 try:
1719 try:
1720 matchednode = self._partialmatch(prefix)
1720 matchednode = self._partialmatch(prefix)
1721 except error.AmbiguousPrefixLookupError:
1721 except error.AmbiguousPrefixLookupError:
1722 return False
1722 return False
1723 except error.WdirUnsupported:
1723 except error.WdirUnsupported:
1724 # single 'ff...' match
1724 # single 'ff...' match
1725 return True
1725 return True
1726 if matchednode is None:
1726 if matchednode is None:
1727 raise error.LookupError(node, self.display_id, _(b'no node'))
1727 raise error.LookupError(node, self.display_id, _(b'no node'))
1728 return True
1728 return True
1729
1729
1730 def maybewdir(prefix):
1730 def maybewdir(prefix):
1731 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1731 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1732
1732
1733 hexnode = hex(node)
1733 hexnode = hex(node)
1734
1734
1735 def disambiguate(hexnode, minlength):
1735 def disambiguate(hexnode, minlength):
1736 """Disambiguate against wdirid."""
1736 """Disambiguate against wdirid."""
1737 for length in range(minlength, len(hexnode) + 1):
1737 for length in range(minlength, len(hexnode) + 1):
1738 prefix = hexnode[:length]
1738 prefix = hexnode[:length]
1739 if not maybewdir(prefix):
1739 if not maybewdir(prefix):
1740 return prefix
1740 return prefix
1741
1741
1742 if not getattr(self, 'filteredrevs', None):
1742 if not getattr(self, 'filteredrevs', None):
1743 try:
1743 try:
1744 length = max(self.index.shortest(node), minlength)
1744 length = max(self.index.shortest(node), minlength)
1745 return disambiguate(hexnode, length)
1745 return disambiguate(hexnode, length)
1746 except error.RevlogError:
1746 except error.RevlogError:
1747 if node != self.nodeconstants.wdirid:
1747 if node != self.nodeconstants.wdirid:
1748 raise error.LookupError(
1748 raise error.LookupError(
1749 node, self.display_id, _(b'no node')
1749 node, self.display_id, _(b'no node')
1750 )
1750 )
1751 except AttributeError:
1751 except AttributeError:
1752 # Fall through to pure code
1752 # Fall through to pure code
1753 pass
1753 pass
1754
1754
1755 if node == self.nodeconstants.wdirid:
1755 if node == self.nodeconstants.wdirid:
1756 for length in range(minlength, len(hexnode) + 1):
1756 for length in range(minlength, len(hexnode) + 1):
1757 prefix = hexnode[:length]
1757 prefix = hexnode[:length]
1758 if isvalid(prefix):
1758 if isvalid(prefix):
1759 return prefix
1759 return prefix
1760
1760
1761 for length in range(minlength, len(hexnode) + 1):
1761 for length in range(minlength, len(hexnode) + 1):
1762 prefix = hexnode[:length]
1762 prefix = hexnode[:length]
1763 if isvalid(prefix):
1763 if isvalid(prefix):
1764 return disambiguate(hexnode, length)
1764 return disambiguate(hexnode, length)
1765
1765
1766 def cmp(self, node, text):
1766 def cmp(self, node, text):
1767 """compare text with a given file revision
1767 """compare text with a given file revision
1768
1768
1769 returns True if text is different than what is stored.
1769 returns True if text is different than what is stored.
1770 """
1770 """
1771 p1, p2 = self.parents(node)
1771 p1, p2 = self.parents(node)
1772 return storageutil.hashrevisionsha1(text, p1, p2) != node
1772 return storageutil.hashrevisionsha1(text, p1, p2) != node
1773
1773
1774 def _getsegmentforrevs(self, startrev, endrev, df=None):
1774 def _getsegmentforrevs(self, startrev, endrev, df=None):
1775 """Obtain a segment of raw data corresponding to a range of revisions.
1775 """Obtain a segment of raw data corresponding to a range of revisions.
1776
1776
1777 Accepts the start and end revisions and an optional already-open
1777 Accepts the start and end revisions and an optional already-open
1778 file handle to be used for reading. If the file handle is read, its
1778 file handle to be used for reading. If the file handle is read, its
1779 seek position will not be preserved.
1779 seek position will not be preserved.
1780
1780
1781 Requests for data may be satisfied by a cache.
1781 Requests for data may be satisfied by a cache.
1782
1782
1783 Returns a 2-tuple of (offset, data) for the requested range of
1783 Returns a 2-tuple of (offset, data) for the requested range of
1784 revisions. Offset is the integer offset from the beginning of the
1784 revisions. Offset is the integer offset from the beginning of the
1785 revlog and data is a str or buffer of the raw byte data.
1785 revlog and data is a str or buffer of the raw byte data.
1786
1786
1787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1787 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1788 to determine where each revision's data begins and ends.
1788 to determine where each revision's data begins and ends.
1789 """
1789 """
1790 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1790 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1791 # (functions are expensive).
1791 # (functions are expensive).
1792 index = self.index
1792 index = self.index
1793 istart = index[startrev]
1793 istart = index[startrev]
1794 start = int(istart[0] >> 16)
1794 start = int(istart[0] >> 16)
1795 if startrev == endrev:
1795 if startrev == endrev:
1796 end = start + istart[1]
1796 end = start + istart[1]
1797 else:
1797 else:
1798 iend = index[endrev]
1798 iend = index[endrev]
1799 end = int(iend[0] >> 16) + iend[1]
1799 end = int(iend[0] >> 16) + iend[1]
1800
1800
1801 if self._inline:
1801 if self._inline:
1802 start += (startrev + 1) * self.index.entry_size
1802 start += (startrev + 1) * self.index.entry_size
1803 end += (endrev + 1) * self.index.entry_size
1803 end += (endrev + 1) * self.index.entry_size
1804 length = end - start
1804 length = end - start
1805
1805
1806 return start, self._segmentfile.read_chunk(start, length, df)
1806 return start, self._segmentfile.read_chunk(start, length, df)
1807
1807
1808 def _chunk(self, rev, df=None):
1808 def _chunk(self, rev, df=None):
1809 """Obtain a single decompressed chunk for a revision.
1809 """Obtain a single decompressed chunk for a revision.
1810
1810
1811 Accepts an integer revision and an optional already-open file handle
1811 Accepts an integer revision and an optional already-open file handle
1812 to be used for reading. If used, the seek position of the file will not
1812 to be used for reading. If used, the seek position of the file will not
1813 be preserved.
1813 be preserved.
1814
1814
1815 Returns a str holding uncompressed data for the requested revision.
1815 Returns a str holding uncompressed data for the requested revision.
1816 """
1816 """
1817 compression_mode = self.index[rev][10]
1817 compression_mode = self.index[rev][10]
1818 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1818 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1819 if compression_mode == COMP_MODE_PLAIN:
1819 if compression_mode == COMP_MODE_PLAIN:
1820 return data
1820 return data
1821 elif compression_mode == COMP_MODE_DEFAULT:
1821 elif compression_mode == COMP_MODE_DEFAULT:
1822 return self._decompressor(data)
1822 return self._decompressor(data)
1823 elif compression_mode == COMP_MODE_INLINE:
1823 elif compression_mode == COMP_MODE_INLINE:
1824 return self.decompress(data)
1824 return self.decompress(data)
1825 else:
1825 else:
1826 msg = b'unknown compression mode %d'
1826 msg = b'unknown compression mode %d'
1827 msg %= compression_mode
1827 msg %= compression_mode
1828 raise error.RevlogError(msg)
1828 raise error.RevlogError(msg)
1829
1829
1830 def _chunks(self, revs, df=None, targetsize=None):
1830 def _chunks(self, revs, df=None, targetsize=None):
1831 """Obtain decompressed chunks for the specified revisions.
1831 """Obtain decompressed chunks for the specified revisions.
1832
1832
1833 Accepts an iterable of numeric revisions that are assumed to be in
1833 Accepts an iterable of numeric revisions that are assumed to be in
1834 ascending order. Also accepts an optional already-open file handle
1834 ascending order. Also accepts an optional already-open file handle
1835 to be used for reading. If used, the seek position of the file will
1835 to be used for reading. If used, the seek position of the file will
1836 not be preserved.
1836 not be preserved.
1837
1837
1838 This function is similar to calling ``self._chunk()`` multiple times,
1838 This function is similar to calling ``self._chunk()`` multiple times,
1839 but is faster.
1839 but is faster.
1840
1840
1841 Returns a list with decompressed data for each requested revision.
1841 Returns a list with decompressed data for each requested revision.
1842 """
1842 """
1843 if not revs:
1843 if not revs:
1844 return []
1844 return []
1845 start = self.start
1845 start = self.start
1846 length = self.length
1846 length = self.length
1847 inline = self._inline
1847 inline = self._inline
1848 iosize = self.index.entry_size
1848 iosize = self.index.entry_size
1849 buffer = util.buffer
1849 buffer = util.buffer
1850
1850
1851 l = []
1851 l = []
1852 ladd = l.append
1852 ladd = l.append
1853
1853
1854 if not self._withsparseread:
1854 if not self._withsparseread:
1855 slicedchunks = (revs,)
1855 slicedchunks = (revs,)
1856 else:
1856 else:
1857 slicedchunks = deltautil.slicechunk(
1857 slicedchunks = deltautil.slicechunk(
1858 self, revs, targetsize=targetsize
1858 self, revs, targetsize=targetsize
1859 )
1859 )
1860
1860
1861 for revschunk in slicedchunks:
1861 for revschunk in slicedchunks:
1862 firstrev = revschunk[0]
1862 firstrev = revschunk[0]
1863 # Skip trailing revisions with empty diff
1863 # Skip trailing revisions with empty diff
1864 for lastrev in revschunk[::-1]:
1864 for lastrev in revschunk[::-1]:
1865 if length(lastrev) != 0:
1865 if length(lastrev) != 0:
1866 break
1866 break
1867
1867
1868 try:
1868 try:
1869 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1869 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1870 except OverflowError:
1870 except OverflowError:
1871 # issue4215 - we can't cache a run of chunks greater than
1871 # issue4215 - we can't cache a run of chunks greater than
1872 # 2G on Windows
1872 # 2G on Windows
1873 return [self._chunk(rev, df=df) for rev in revschunk]
1873 return [self._chunk(rev, df=df) for rev in revschunk]
1874
1874
1875 decomp = self.decompress
1875 decomp = self.decompress
1876 # self._decompressor might be None, but will not be used in that case
1876 # self._decompressor might be None, but will not be used in that case
1877 def_decomp = self._decompressor
1877 def_decomp = self._decompressor
1878 for rev in revschunk:
1878 for rev in revschunk:
1879 chunkstart = start(rev)
1879 chunkstart = start(rev)
1880 if inline:
1880 if inline:
1881 chunkstart += (rev + 1) * iosize
1881 chunkstart += (rev + 1) * iosize
1882 chunklength = length(rev)
1882 chunklength = length(rev)
1883 comp_mode = self.index[rev][10]
1883 comp_mode = self.index[rev][10]
1884 c = buffer(data, chunkstart - offset, chunklength)
1884 c = buffer(data, chunkstart - offset, chunklength)
1885 if comp_mode == COMP_MODE_PLAIN:
1885 if comp_mode == COMP_MODE_PLAIN:
1886 ladd(c)
1886 ladd(c)
1887 elif comp_mode == COMP_MODE_INLINE:
1887 elif comp_mode == COMP_MODE_INLINE:
1888 ladd(decomp(c))
1888 ladd(decomp(c))
1889 elif comp_mode == COMP_MODE_DEFAULT:
1889 elif comp_mode == COMP_MODE_DEFAULT:
1890 ladd(def_decomp(c))
1890 ladd(def_decomp(c))
1891 else:
1891 else:
1892 msg = b'unknown compression mode %d'
1892 msg = b'unknown compression mode %d'
1893 msg %= comp_mode
1893 msg %= comp_mode
1894 raise error.RevlogError(msg)
1894 raise error.RevlogError(msg)
1895
1895
1896 return l
1896 return l
1897
1897
1898 def deltaparent(self, rev):
1898 def deltaparent(self, rev):
1899 """return deltaparent of the given revision"""
1899 """return deltaparent of the given revision"""
1900 base = self.index[rev][3]
1900 base = self.index[rev][3]
1901 if base == rev:
1901 if base == rev:
1902 return nullrev
1902 return nullrev
1903 elif self._generaldelta:
1903 elif self._generaldelta:
1904 return base
1904 return base
1905 else:
1905 else:
1906 return rev - 1
1906 return rev - 1
1907
1907
1908 def issnapshot(self, rev):
1908 def issnapshot(self, rev):
1909 """tells whether rev is a snapshot"""
1909 """tells whether rev is a snapshot"""
1910 if not self._sparserevlog:
1910 if not self._sparserevlog:
1911 return self.deltaparent(rev) == nullrev
1911 return self.deltaparent(rev) == nullrev
1912 elif hasattr(self.index, 'issnapshot'):
1912 elif hasattr(self.index, 'issnapshot'):
1913 # directly assign the method to cache the testing and access
1913 # directly assign the method to cache the testing and access
1914 self.issnapshot = self.index.issnapshot
1914 self.issnapshot = self.index.issnapshot
1915 return self.issnapshot(rev)
1915 return self.issnapshot(rev)
1916 if rev == nullrev:
1916 if rev == nullrev:
1917 return True
1917 return True
1918 entry = self.index[rev]
1918 entry = self.index[rev]
1919 base = entry[3]
1919 base = entry[3]
1920 if base == rev:
1920 if base == rev:
1921 return True
1921 return True
1922 if base == nullrev:
1922 if base == nullrev:
1923 return True
1923 return True
1924 p1 = entry[5]
1924 p1 = entry[5]
1925 while self.length(p1) == 0:
1925 while self.length(p1) == 0:
1926 b = self.deltaparent(p1)
1926 b = self.deltaparent(p1)
1927 if b == p1:
1927 if b == p1:
1928 break
1928 break
1929 p1 = b
1929 p1 = b
1930 p2 = entry[6]
1930 p2 = entry[6]
1931 while self.length(p2) == 0:
1931 while self.length(p2) == 0:
1932 b = self.deltaparent(p2)
1932 b = self.deltaparent(p2)
1933 if b == p2:
1933 if b == p2:
1934 break
1934 break
1935 p2 = b
1935 p2 = b
1936 if base == p1 or base == p2:
1936 if base == p1 or base == p2:
1937 return False
1937 return False
1938 return self.issnapshot(base)
1938 return self.issnapshot(base)
1939
1939
1940 def snapshotdepth(self, rev):
1940 def snapshotdepth(self, rev):
1941 """number of snapshot in the chain before this one"""
1941 """number of snapshot in the chain before this one"""
1942 if not self.issnapshot(rev):
1942 if not self.issnapshot(rev):
1943 raise error.ProgrammingError(b'revision %d not a snapshot')
1943 raise error.ProgrammingError(b'revision %d not a snapshot')
1944 return len(self._deltachain(rev)[0]) - 1
1944 return len(self._deltachain(rev)[0]) - 1
1945
1945
1946 def revdiff(self, rev1, rev2):
1946 def revdiff(self, rev1, rev2):
1947 """return or calculate a delta between two revisions
1947 """return or calculate a delta between two revisions
1948
1948
1949 The delta calculated is in binary form and is intended to be written to
1949 The delta calculated is in binary form and is intended to be written to
1950 revlog data directly. So this function needs raw revision data.
1950 revlog data directly. So this function needs raw revision data.
1951 """
1951 """
1952 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1952 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1953 return bytes(self._chunk(rev2))
1953 return bytes(self._chunk(rev2))
1954
1954
1955 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1955 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1956
1956
1957 def revision(self, nodeorrev, _df=None):
1957 def revision(self, nodeorrev):
1958 """return an uncompressed revision of a given node or revision
1958 """return an uncompressed revision of a given node or revision
1959 number.
1959 number.
1960
1961 _df - an existing file handle to read from. (internal-only)
1962 """
1960 """
1963 return self._revisiondata(nodeorrev, _df)
1961 return self._revisiondata(nodeorrev)
1964
1962
1965 def sidedata(self, nodeorrev, _df=None):
1963 def sidedata(self, nodeorrev, _df=None):
1966 """a map of extra data related to the changeset but not part of the hash
1964 """a map of extra data related to the changeset but not part of the hash
1967
1965
1968 This function currently return a dictionary. However, more advanced
1966 This function currently return a dictionary. However, more advanced
1969 mapping object will likely be used in the future for a more
1967 mapping object will likely be used in the future for a more
1970 efficient/lazy code.
1968 efficient/lazy code.
1971 """
1969 """
1972 # deal with <nodeorrev> argument type
1970 # deal with <nodeorrev> argument type
1973 if isinstance(nodeorrev, int):
1971 if isinstance(nodeorrev, int):
1974 rev = nodeorrev
1972 rev = nodeorrev
1975 else:
1973 else:
1976 rev = self.rev(nodeorrev)
1974 rev = self.rev(nodeorrev)
1977 return self._sidedata(rev)
1975 return self._sidedata(rev)
1978
1976
1979 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1977 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1980 # deal with <nodeorrev> argument type
1978 # deal with <nodeorrev> argument type
1981 if isinstance(nodeorrev, int):
1979 if isinstance(nodeorrev, int):
1982 rev = nodeorrev
1980 rev = nodeorrev
1983 node = self.node(rev)
1981 node = self.node(rev)
1984 else:
1982 else:
1985 node = nodeorrev
1983 node = nodeorrev
1986 rev = None
1984 rev = None
1987
1985
1988 # fast path the special `nullid` rev
1986 # fast path the special `nullid` rev
1989 if node == self.nullid:
1987 if node == self.nullid:
1990 return b""
1988 return b""
1991
1989
1992 # ``rawtext`` is the text as stored inside the revlog. Might be the
1990 # ``rawtext`` is the text as stored inside the revlog. Might be the
1993 # revision or might need to be processed to retrieve the revision.
1991 # revision or might need to be processed to retrieve the revision.
1994 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1992 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1995
1993
1996 if raw and validated:
1994 if raw and validated:
1997 # if we don't want to process the raw text and that raw
1995 # if we don't want to process the raw text and that raw
1998 # text is cached, we can exit early.
1996 # text is cached, we can exit early.
1999 return rawtext
1997 return rawtext
2000 if rev is None:
1998 if rev is None:
2001 rev = self.rev(node)
1999 rev = self.rev(node)
2002 # the revlog's flag for this revision
2000 # the revlog's flag for this revision
2003 # (usually alter its state or content)
2001 # (usually alter its state or content)
2004 flags = self.flags(rev)
2002 flags = self.flags(rev)
2005
2003
2006 if validated and flags == REVIDX_DEFAULT_FLAGS:
2004 if validated and flags == REVIDX_DEFAULT_FLAGS:
2007 # no extra flags set, no flag processor runs, text = rawtext
2005 # no extra flags set, no flag processor runs, text = rawtext
2008 return rawtext
2006 return rawtext
2009
2007
2010 if raw:
2008 if raw:
2011 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2009 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2012 text = rawtext
2010 text = rawtext
2013 else:
2011 else:
2014 r = flagutil.processflagsread(self, rawtext, flags)
2012 r = flagutil.processflagsread(self, rawtext, flags)
2015 text, validatehash = r
2013 text, validatehash = r
2016 if validatehash:
2014 if validatehash:
2017 self.checkhash(text, node, rev=rev)
2015 self.checkhash(text, node, rev=rev)
2018 if not validated:
2016 if not validated:
2019 self._revisioncache = (node, rev, rawtext)
2017 self._revisioncache = (node, rev, rawtext)
2020
2018
2021 return text
2019 return text
2022
2020
2023 def _rawtext(self, node, rev, _df=None):
2021 def _rawtext(self, node, rev, _df=None):
2024 """return the possibly unvalidated rawtext for a revision
2022 """return the possibly unvalidated rawtext for a revision
2025
2023
2026 returns (rev, rawtext, validated)
2024 returns (rev, rawtext, validated)
2027 """
2025 """
2028
2026
2029 # revision in the cache (could be useful to apply delta)
2027 # revision in the cache (could be useful to apply delta)
2030 cachedrev = None
2028 cachedrev = None
2031 # An intermediate text to apply deltas to
2029 # An intermediate text to apply deltas to
2032 basetext = None
2030 basetext = None
2033
2031
2034 # Check if we have the entry in cache
2032 # Check if we have the entry in cache
2035 # The cache entry looks like (node, rev, rawtext)
2033 # The cache entry looks like (node, rev, rawtext)
2036 if self._revisioncache:
2034 if self._revisioncache:
2037 if self._revisioncache[0] == node:
2035 if self._revisioncache[0] == node:
2038 return (rev, self._revisioncache[2], True)
2036 return (rev, self._revisioncache[2], True)
2039 cachedrev = self._revisioncache[1]
2037 cachedrev = self._revisioncache[1]
2040
2038
2041 if rev is None:
2039 if rev is None:
2042 rev = self.rev(node)
2040 rev = self.rev(node)
2043
2041
2044 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2042 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2045 if stopped:
2043 if stopped:
2046 basetext = self._revisioncache[2]
2044 basetext = self._revisioncache[2]
2047
2045
2048 # drop cache to save memory, the caller is expected to
2046 # drop cache to save memory, the caller is expected to
2049 # update self._revisioncache after validating the text
2047 # update self._revisioncache after validating the text
2050 self._revisioncache = None
2048 self._revisioncache = None
2051
2049
2052 targetsize = None
2050 targetsize = None
2053 rawsize = self.index[rev][2]
2051 rawsize = self.index[rev][2]
2054 if 0 <= rawsize:
2052 if 0 <= rawsize:
2055 targetsize = 4 * rawsize
2053 targetsize = 4 * rawsize
2056
2054
2057 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2055 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2058 if basetext is None:
2056 if basetext is None:
2059 basetext = bytes(bins[0])
2057 basetext = bytes(bins[0])
2060 bins = bins[1:]
2058 bins = bins[1:]
2061
2059
2062 rawtext = mdiff.patches(basetext, bins)
2060 rawtext = mdiff.patches(basetext, bins)
2063 del basetext # let us have a chance to free memory early
2061 del basetext # let us have a chance to free memory early
2064 return (rev, rawtext, False)
2062 return (rev, rawtext, False)
2065
2063
2066 def _sidedata(self, rev):
2064 def _sidedata(self, rev):
2067 """Return the sidedata for a given revision number."""
2065 """Return the sidedata for a given revision number."""
2068 index_entry = self.index[rev]
2066 index_entry = self.index[rev]
2069 sidedata_offset = index_entry[8]
2067 sidedata_offset = index_entry[8]
2070 sidedata_size = index_entry[9]
2068 sidedata_size = index_entry[9]
2071
2069
2072 if self._inline:
2070 if self._inline:
2073 sidedata_offset += self.index.entry_size * (1 + rev)
2071 sidedata_offset += self.index.entry_size * (1 + rev)
2074 if sidedata_size == 0:
2072 if sidedata_size == 0:
2075 return {}
2073 return {}
2076
2074
2077 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2075 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2078 filename = self._sidedatafile
2076 filename = self._sidedatafile
2079 end = self._docket.sidedata_end
2077 end = self._docket.sidedata_end
2080 offset = sidedata_offset
2078 offset = sidedata_offset
2081 length = sidedata_size
2079 length = sidedata_size
2082 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2080 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2083 raise error.RevlogError(m)
2081 raise error.RevlogError(m)
2084
2082
2085 comp_segment = self._segmentfile_sidedata.read_chunk(
2083 comp_segment = self._segmentfile_sidedata.read_chunk(
2086 sidedata_offset, sidedata_size
2084 sidedata_offset, sidedata_size
2087 )
2085 )
2088
2086
2089 comp = self.index[rev][11]
2087 comp = self.index[rev][11]
2090 if comp == COMP_MODE_PLAIN:
2088 if comp == COMP_MODE_PLAIN:
2091 segment = comp_segment
2089 segment = comp_segment
2092 elif comp == COMP_MODE_DEFAULT:
2090 elif comp == COMP_MODE_DEFAULT:
2093 segment = self._decompressor(comp_segment)
2091 segment = self._decompressor(comp_segment)
2094 elif comp == COMP_MODE_INLINE:
2092 elif comp == COMP_MODE_INLINE:
2095 segment = self.decompress(comp_segment)
2093 segment = self.decompress(comp_segment)
2096 else:
2094 else:
2097 msg = b'unknown compression mode %d'
2095 msg = b'unknown compression mode %d'
2098 msg %= comp
2096 msg %= comp
2099 raise error.RevlogError(msg)
2097 raise error.RevlogError(msg)
2100
2098
2101 sidedata = sidedatautil.deserialize_sidedata(segment)
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2102 return sidedata
2100 return sidedata
2103
2101
2104 def rawdata(self, nodeorrev, _df=None):
2102 def rawdata(self, nodeorrev, _df=None):
2105 """return an uncompressed raw data of a given node or revision number.
2103 """return an uncompressed raw data of a given node or revision number.
2106
2104
2107 _df - an existing file handle to read from. (internal-only)
2105 _df - an existing file handle to read from. (internal-only)
2108 """
2106 """
2109 return self._revisiondata(nodeorrev, _df, raw=True)
2107 return self._revisiondata(nodeorrev, _df, raw=True)
2110
2108
2111 def hash(self, text, p1, p2):
2109 def hash(self, text, p1, p2):
2112 """Compute a node hash.
2110 """Compute a node hash.
2113
2111
2114 Available as a function so that subclasses can replace the hash
2112 Available as a function so that subclasses can replace the hash
2115 as needed.
2113 as needed.
2116 """
2114 """
2117 return storageutil.hashrevisionsha1(text, p1, p2)
2115 return storageutil.hashrevisionsha1(text, p1, p2)
2118
2116
2119 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2120 """Check node hash integrity.
2118 """Check node hash integrity.
2121
2119
2122 Available as a function so that subclasses can extend hash mismatch
2120 Available as a function so that subclasses can extend hash mismatch
2123 behaviors as needed.
2121 behaviors as needed.
2124 """
2122 """
2125 try:
2123 try:
2126 if p1 is None and p2 is None:
2124 if p1 is None and p2 is None:
2127 p1, p2 = self.parents(node)
2125 p1, p2 = self.parents(node)
2128 if node != self.hash(text, p1, p2):
2126 if node != self.hash(text, p1, p2):
2129 # Clear the revision cache on hash failure. The revision cache
2127 # Clear the revision cache on hash failure. The revision cache
2130 # only stores the raw revision and clearing the cache does have
2128 # only stores the raw revision and clearing the cache does have
2131 # the side-effect that we won't have a cache hit when the raw
2129 # the side-effect that we won't have a cache hit when the raw
2132 # revision data is accessed. But this case should be rare and
2130 # revision data is accessed. But this case should be rare and
2133 # it is extra work to teach the cache about the hash
2131 # it is extra work to teach the cache about the hash
2134 # verification state.
2132 # verification state.
2135 if self._revisioncache and self._revisioncache[0] == node:
2133 if self._revisioncache and self._revisioncache[0] == node:
2136 self._revisioncache = None
2134 self._revisioncache = None
2137
2135
2138 revornode = rev
2136 revornode = rev
2139 if revornode is None:
2137 if revornode is None:
2140 revornode = templatefilters.short(hex(node))
2138 revornode = templatefilters.short(hex(node))
2141 raise error.RevlogError(
2139 raise error.RevlogError(
2142 _(b"integrity check failed on %s:%s")
2140 _(b"integrity check failed on %s:%s")
2143 % (self.display_id, pycompat.bytestr(revornode))
2141 % (self.display_id, pycompat.bytestr(revornode))
2144 )
2142 )
2145 except error.RevlogError:
2143 except error.RevlogError:
2146 if self._censorable and storageutil.iscensoredtext(text):
2144 if self._censorable and storageutil.iscensoredtext(text):
2147 raise error.CensoredNodeError(self.display_id, node, text)
2145 raise error.CensoredNodeError(self.display_id, node, text)
2148 raise
2146 raise
2149
2147
2150 @property
2148 @property
2151 def _split_index_file(self):
2149 def _split_index_file(self):
2152 """the path where to expect the index of an ongoing splitting operation
2150 """the path where to expect the index of an ongoing splitting operation
2153
2151
2154 The file will only exist if a splitting operation is in progress, but
2152 The file will only exist if a splitting operation is in progress, but
2155 it is always expected at the same location."""
2153 it is always expected at the same location."""
2156 parts = self.radix.split(b'/')
2154 parts = self.radix.split(b'/')
2157 if len(parts) > 1:
2155 if len(parts) > 1:
2158 # adds a '-s' prefix to the ``data/` or `meta/` base
2156 # adds a '-s' prefix to the ``data/` or `meta/` base
2159 head = parts[0] + b'-s'
2157 head = parts[0] + b'-s'
2160 mids = parts[1:-1]
2158 mids = parts[1:-1]
2161 tail = parts[-1] + b'.i'
2159 tail = parts[-1] + b'.i'
2162 pieces = [head] + mids + [tail]
2160 pieces = [head] + mids + [tail]
2163 return b'/'.join(pieces)
2161 return b'/'.join(pieces)
2164 else:
2162 else:
2165 # the revlog is stored at the root of the store (changelog or
2163 # the revlog is stored at the root of the store (changelog or
2166 # manifest), no risk of collision.
2164 # manifest), no risk of collision.
2167 return self.radix + b'.i.s'
2165 return self.radix + b'.i.s'
2168
2166
2169 def _enforceinlinesize(self, tr, side_write=True):
2167 def _enforceinlinesize(self, tr, side_write=True):
2170 """Check if the revlog is too big for inline and convert if so.
2168 """Check if the revlog is too big for inline and convert if so.
2171
2169
2172 This should be called after revisions are added to the revlog. If the
2170 This should be called after revisions are added to the revlog. If the
2173 revlog has grown too large to be an inline revlog, it will convert it
2171 revlog has grown too large to be an inline revlog, it will convert it
2174 to use multiple index and data files.
2172 to use multiple index and data files.
2175 """
2173 """
2176 tiprev = len(self) - 1
2174 tiprev = len(self) - 1
2177 total_size = self.start(tiprev) + self.length(tiprev)
2175 total_size = self.start(tiprev) + self.length(tiprev)
2178 if not self._inline or total_size < _maxinline:
2176 if not self._inline or total_size < _maxinline:
2179 return
2177 return
2180
2178
2181 troffset = tr.findoffset(self._indexfile)
2179 troffset = tr.findoffset(self._indexfile)
2182 if troffset is None:
2180 if troffset is None:
2183 raise error.RevlogError(
2181 raise error.RevlogError(
2184 _(b"%s not found in the transaction") % self._indexfile
2182 _(b"%s not found in the transaction") % self._indexfile
2185 )
2183 )
2186 if troffset:
2184 if troffset:
2187 tr.addbackup(self._indexfile, for_offset=True)
2185 tr.addbackup(self._indexfile, for_offset=True)
2188 tr.add(self._datafile, 0)
2186 tr.add(self._datafile, 0)
2189
2187
2190 existing_handles = False
2188 existing_handles = False
2191 if self._writinghandles is not None:
2189 if self._writinghandles is not None:
2192 existing_handles = True
2190 existing_handles = True
2193 fp = self._writinghandles[0]
2191 fp = self._writinghandles[0]
2194 fp.flush()
2192 fp.flush()
2195 fp.close()
2193 fp.close()
2196 # We can't use the cached file handle after close(). So prevent
2194 # We can't use the cached file handle after close(). So prevent
2197 # its usage.
2195 # its usage.
2198 self._writinghandles = None
2196 self._writinghandles = None
2199 self._segmentfile.writing_handle = None
2197 self._segmentfile.writing_handle = None
2200 # No need to deal with sidedata writing handle as it is only
2198 # No need to deal with sidedata writing handle as it is only
2201 # relevant with revlog-v2 which is never inline, not reaching
2199 # relevant with revlog-v2 which is never inline, not reaching
2202 # this code
2200 # this code
2203 if side_write:
2201 if side_write:
2204 old_index_file_path = self._indexfile
2202 old_index_file_path = self._indexfile
2205 new_index_file_path = self._split_index_file
2203 new_index_file_path = self._split_index_file
2206 opener = self.opener
2204 opener = self.opener
2207 weak_self = weakref.ref(self)
2205 weak_self = weakref.ref(self)
2208
2206
2209 # the "split" index replace the real index when the transaction is finalized
2207 # the "split" index replace the real index when the transaction is finalized
2210 def finalize_callback(tr):
2208 def finalize_callback(tr):
2211 opener.rename(
2209 opener.rename(
2212 new_index_file_path,
2210 new_index_file_path,
2213 old_index_file_path,
2211 old_index_file_path,
2214 checkambig=True,
2212 checkambig=True,
2215 )
2213 )
2216 maybe_self = weak_self()
2214 maybe_self = weak_self()
2217 if maybe_self is not None:
2215 if maybe_self is not None:
2218 maybe_self._indexfile = old_index_file_path
2216 maybe_self._indexfile = old_index_file_path
2219
2217
2220 def abort_callback(tr):
2218 def abort_callback(tr):
2221 maybe_self = weak_self()
2219 maybe_self = weak_self()
2222 if maybe_self is not None:
2220 if maybe_self is not None:
2223 maybe_self._indexfile = old_index_file_path
2221 maybe_self._indexfile = old_index_file_path
2224
2222
2225 tr.registertmp(new_index_file_path)
2223 tr.registertmp(new_index_file_path)
2226 if self.target[1] is not None:
2224 if self.target[1] is not None:
2227 callback_id = b'000-revlog-split-%d-%s' % self.target
2225 callback_id = b'000-revlog-split-%d-%s' % self.target
2228 else:
2226 else:
2229 callback_id = b'000-revlog-split-%d' % self.target[0]
2227 callback_id = b'000-revlog-split-%d' % self.target[0]
2230 tr.addfinalize(callback_id, finalize_callback)
2228 tr.addfinalize(callback_id, finalize_callback)
2231 tr.addabort(callback_id, abort_callback)
2229 tr.addabort(callback_id, abort_callback)
2232
2230
2233 new_dfh = self._datafp(b'w+')
2231 new_dfh = self._datafp(b'w+')
2234 new_dfh.truncate(0) # drop any potentially existing data
2232 new_dfh.truncate(0) # drop any potentially existing data
2235 try:
2233 try:
2236 with self._indexfp() as read_ifh:
2234 with self._indexfp() as read_ifh:
2237 for r in self:
2235 for r in self:
2238 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2236 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2239 new_dfh.flush()
2237 new_dfh.flush()
2240
2238
2241 if side_write:
2239 if side_write:
2242 self._indexfile = new_index_file_path
2240 self._indexfile = new_index_file_path
2243 with self.__index_new_fp() as fp:
2241 with self.__index_new_fp() as fp:
2244 self._format_flags &= ~FLAG_INLINE_DATA
2242 self._format_flags &= ~FLAG_INLINE_DATA
2245 self._inline = False
2243 self._inline = False
2246 for i in self:
2244 for i in self:
2247 e = self.index.entry_binary(i)
2245 e = self.index.entry_binary(i)
2248 if i == 0 and self._docket is None:
2246 if i == 0 and self._docket is None:
2249 header = self._format_flags | self._format_version
2247 header = self._format_flags | self._format_version
2250 header = self.index.pack_header(header)
2248 header = self.index.pack_header(header)
2251 e = header + e
2249 e = header + e
2252 fp.write(e)
2250 fp.write(e)
2253 if self._docket is not None:
2251 if self._docket is not None:
2254 self._docket.index_end = fp.tell()
2252 self._docket.index_end = fp.tell()
2255
2253
2256 # If we don't use side-write, the temp file replace the real
2254 # If we don't use side-write, the temp file replace the real
2257 # index when we exit the context manager
2255 # index when we exit the context manager
2258
2256
2259 nodemaputil.setup_persistent_nodemap(tr, self)
2257 nodemaputil.setup_persistent_nodemap(tr, self)
2260 self._segmentfile = randomaccessfile.randomaccessfile(
2258 self._segmentfile = randomaccessfile.randomaccessfile(
2261 self.opener,
2259 self.opener,
2262 self._datafile,
2260 self._datafile,
2263 self._chunkcachesize,
2261 self._chunkcachesize,
2264 )
2262 )
2265
2263
2266 if existing_handles:
2264 if existing_handles:
2267 # switched from inline to conventional reopen the index
2265 # switched from inline to conventional reopen the index
2268 ifh = self.__index_write_fp()
2266 ifh = self.__index_write_fp()
2269 self._writinghandles = (ifh, new_dfh, None)
2267 self._writinghandles = (ifh, new_dfh, None)
2270 self._segmentfile.writing_handle = new_dfh
2268 self._segmentfile.writing_handle = new_dfh
2271 new_dfh = None
2269 new_dfh = None
2272 # No need to deal with sidedata writing handle as it is only
2270 # No need to deal with sidedata writing handle as it is only
2273 # relevant with revlog-v2 which is never inline, not reaching
2271 # relevant with revlog-v2 which is never inline, not reaching
2274 # this code
2272 # this code
2275 finally:
2273 finally:
2276 if new_dfh is not None:
2274 if new_dfh is not None:
2277 new_dfh.close()
2275 new_dfh.close()
2278
2276
2279 def _nodeduplicatecallback(self, transaction, node):
2277 def _nodeduplicatecallback(self, transaction, node):
2280 """called when trying to add a node already stored."""
2278 """called when trying to add a node already stored."""
2281
2279
2282 @contextlib.contextmanager
2280 @contextlib.contextmanager
2283 def reading(self):
2281 def reading(self):
2284 """Context manager that keeps data and sidedata files open for reading"""
2282 """Context manager that keeps data and sidedata files open for reading"""
2285 if len(self.index) == 0:
2283 if len(self.index) == 0:
2286 yield # nothing to be read
2284 yield # nothing to be read
2287 else:
2285 else:
2288 with self._segmentfile.reading():
2286 with self._segmentfile.reading():
2289 with self._segmentfile_sidedata.reading():
2287 with self._segmentfile_sidedata.reading():
2290 yield
2288 yield
2291
2289
2292 @contextlib.contextmanager
2290 @contextlib.contextmanager
2293 def _writing(self, transaction):
2291 def _writing(self, transaction):
2294 if self._trypending:
2292 if self._trypending:
2295 msg = b'try to write in a `trypending` revlog: %s'
2293 msg = b'try to write in a `trypending` revlog: %s'
2296 msg %= self.display_id
2294 msg %= self.display_id
2297 raise error.ProgrammingError(msg)
2295 raise error.ProgrammingError(msg)
2298 if self._writinghandles is not None:
2296 if self._writinghandles is not None:
2299 yield
2297 yield
2300 else:
2298 else:
2301 ifh = dfh = sdfh = None
2299 ifh = dfh = sdfh = None
2302 try:
2300 try:
2303 r = len(self)
2301 r = len(self)
2304 # opening the data file.
2302 # opening the data file.
2305 dsize = 0
2303 dsize = 0
2306 if r:
2304 if r:
2307 dsize = self.end(r - 1)
2305 dsize = self.end(r - 1)
2308 dfh = None
2306 dfh = None
2309 if not self._inline:
2307 if not self._inline:
2310 try:
2308 try:
2311 dfh = self._datafp(b"r+")
2309 dfh = self._datafp(b"r+")
2312 if self._docket is None:
2310 if self._docket is None:
2313 dfh.seek(0, os.SEEK_END)
2311 dfh.seek(0, os.SEEK_END)
2314 else:
2312 else:
2315 dfh.seek(self._docket.data_end, os.SEEK_SET)
2313 dfh.seek(self._docket.data_end, os.SEEK_SET)
2316 except FileNotFoundError:
2314 except FileNotFoundError:
2317 dfh = self._datafp(b"w+")
2315 dfh = self._datafp(b"w+")
2318 transaction.add(self._datafile, dsize)
2316 transaction.add(self._datafile, dsize)
2319 if self._sidedatafile is not None:
2317 if self._sidedatafile is not None:
2320 # revlog-v2 does not inline, help Pytype
2318 # revlog-v2 does not inline, help Pytype
2321 assert dfh is not None
2319 assert dfh is not None
2322 try:
2320 try:
2323 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2321 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2324 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2322 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2325 except FileNotFoundError:
2323 except FileNotFoundError:
2326 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2324 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2327 transaction.add(
2325 transaction.add(
2328 self._sidedatafile, self._docket.sidedata_end
2326 self._sidedatafile, self._docket.sidedata_end
2329 )
2327 )
2330
2328
2331 # opening the index file.
2329 # opening the index file.
2332 isize = r * self.index.entry_size
2330 isize = r * self.index.entry_size
2333 ifh = self.__index_write_fp()
2331 ifh = self.__index_write_fp()
2334 if self._inline:
2332 if self._inline:
2335 transaction.add(self._indexfile, dsize + isize)
2333 transaction.add(self._indexfile, dsize + isize)
2336 else:
2334 else:
2337 transaction.add(self._indexfile, isize)
2335 transaction.add(self._indexfile, isize)
2338 # exposing all file handle for writing.
2336 # exposing all file handle for writing.
2339 self._writinghandles = (ifh, dfh, sdfh)
2337 self._writinghandles = (ifh, dfh, sdfh)
2340 self._segmentfile.writing_handle = ifh if self._inline else dfh
2338 self._segmentfile.writing_handle = ifh if self._inline else dfh
2341 self._segmentfile_sidedata.writing_handle = sdfh
2339 self._segmentfile_sidedata.writing_handle = sdfh
2342 yield
2340 yield
2343 if self._docket is not None:
2341 if self._docket is not None:
2344 self._write_docket(transaction)
2342 self._write_docket(transaction)
2345 finally:
2343 finally:
2346 self._writinghandles = None
2344 self._writinghandles = None
2347 self._segmentfile.writing_handle = None
2345 self._segmentfile.writing_handle = None
2348 self._segmentfile_sidedata.writing_handle = None
2346 self._segmentfile_sidedata.writing_handle = None
2349 if dfh is not None:
2347 if dfh is not None:
2350 dfh.close()
2348 dfh.close()
2351 if sdfh is not None:
2349 if sdfh is not None:
2352 sdfh.close()
2350 sdfh.close()
2353 # closing the index file last to avoid exposing referent to
2351 # closing the index file last to avoid exposing referent to
2354 # potential unflushed data content.
2352 # potential unflushed data content.
2355 if ifh is not None:
2353 if ifh is not None:
2356 ifh.close()
2354 ifh.close()
2357
2355
2358 def _write_docket(self, transaction):
2356 def _write_docket(self, transaction):
2359 """write the current docket on disk
2357 """write the current docket on disk
2360
2358
2361 Exist as a method to help changelog to implement transaction logic
2359 Exist as a method to help changelog to implement transaction logic
2362
2360
2363 We could also imagine using the same transaction logic for all revlog
2361 We could also imagine using the same transaction logic for all revlog
2364 since docket are cheap."""
2362 since docket are cheap."""
2365 self._docket.write(transaction)
2363 self._docket.write(transaction)
2366
2364
2367 def addrevision(
2365 def addrevision(
2368 self,
2366 self,
2369 text,
2367 text,
2370 transaction,
2368 transaction,
2371 link,
2369 link,
2372 p1,
2370 p1,
2373 p2,
2371 p2,
2374 cachedelta=None,
2372 cachedelta=None,
2375 node=None,
2373 node=None,
2376 flags=REVIDX_DEFAULT_FLAGS,
2374 flags=REVIDX_DEFAULT_FLAGS,
2377 deltacomputer=None,
2375 deltacomputer=None,
2378 sidedata=None,
2376 sidedata=None,
2379 ):
2377 ):
2380 """add a revision to the log
2378 """add a revision to the log
2381
2379
2382 text - the revision data to add
2380 text - the revision data to add
2383 transaction - the transaction object used for rollback
2381 transaction - the transaction object used for rollback
2384 link - the linkrev data to add
2382 link - the linkrev data to add
2385 p1, p2 - the parent nodeids of the revision
2383 p1, p2 - the parent nodeids of the revision
2386 cachedelta - an optional precomputed delta
2384 cachedelta - an optional precomputed delta
2387 node - nodeid of revision; typically node is not specified, and it is
2385 node - nodeid of revision; typically node is not specified, and it is
2388 computed by default as hash(text, p1, p2), however subclasses might
2386 computed by default as hash(text, p1, p2), however subclasses might
2389 use different hashing method (and override checkhash() in such case)
2387 use different hashing method (and override checkhash() in such case)
2390 flags - the known flags to set on the revision
2388 flags - the known flags to set on the revision
2391 deltacomputer - an optional deltacomputer instance shared between
2389 deltacomputer - an optional deltacomputer instance shared between
2392 multiple calls
2390 multiple calls
2393 """
2391 """
2394 if link == nullrev:
2392 if link == nullrev:
2395 raise error.RevlogError(
2393 raise error.RevlogError(
2396 _(b"attempted to add linkrev -1 to %s") % self.display_id
2394 _(b"attempted to add linkrev -1 to %s") % self.display_id
2397 )
2395 )
2398
2396
2399 if sidedata is None:
2397 if sidedata is None:
2400 sidedata = {}
2398 sidedata = {}
2401 elif sidedata and not self.hassidedata:
2399 elif sidedata and not self.hassidedata:
2402 raise error.ProgrammingError(
2400 raise error.ProgrammingError(
2403 _(b"trying to add sidedata to a revlog who don't support them")
2401 _(b"trying to add sidedata to a revlog who don't support them")
2404 )
2402 )
2405
2403
2406 if flags:
2404 if flags:
2407 node = node or self.hash(text, p1, p2)
2405 node = node or self.hash(text, p1, p2)
2408
2406
2409 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2407 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2410
2408
2411 # If the flag processor modifies the revision data, ignore any provided
2409 # If the flag processor modifies the revision data, ignore any provided
2412 # cachedelta.
2410 # cachedelta.
2413 if rawtext != text:
2411 if rawtext != text:
2414 cachedelta = None
2412 cachedelta = None
2415
2413
2416 if len(rawtext) > _maxentrysize:
2414 if len(rawtext) > _maxentrysize:
2417 raise error.RevlogError(
2415 raise error.RevlogError(
2418 _(
2416 _(
2419 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2417 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2420 )
2418 )
2421 % (self.display_id, len(rawtext))
2419 % (self.display_id, len(rawtext))
2422 )
2420 )
2423
2421
2424 node = node or self.hash(rawtext, p1, p2)
2422 node = node or self.hash(rawtext, p1, p2)
2425 rev = self.index.get_rev(node)
2423 rev = self.index.get_rev(node)
2426 if rev is not None:
2424 if rev is not None:
2427 return rev
2425 return rev
2428
2426
2429 if validatehash:
2427 if validatehash:
2430 self.checkhash(rawtext, node, p1=p1, p2=p2)
2428 self.checkhash(rawtext, node, p1=p1, p2=p2)
2431
2429
2432 return self.addrawrevision(
2430 return self.addrawrevision(
2433 rawtext,
2431 rawtext,
2434 transaction,
2432 transaction,
2435 link,
2433 link,
2436 p1,
2434 p1,
2437 p2,
2435 p2,
2438 node,
2436 node,
2439 flags,
2437 flags,
2440 cachedelta=cachedelta,
2438 cachedelta=cachedelta,
2441 deltacomputer=deltacomputer,
2439 deltacomputer=deltacomputer,
2442 sidedata=sidedata,
2440 sidedata=sidedata,
2443 )
2441 )
2444
2442
2445 def addrawrevision(
2443 def addrawrevision(
2446 self,
2444 self,
2447 rawtext,
2445 rawtext,
2448 transaction,
2446 transaction,
2449 link,
2447 link,
2450 p1,
2448 p1,
2451 p2,
2449 p2,
2452 node,
2450 node,
2453 flags,
2451 flags,
2454 cachedelta=None,
2452 cachedelta=None,
2455 deltacomputer=None,
2453 deltacomputer=None,
2456 sidedata=None,
2454 sidedata=None,
2457 ):
2455 ):
2458 """add a raw revision with known flags, node and parents
2456 """add a raw revision with known flags, node and parents
2459 useful when reusing a revision not stored in this revlog (ex: received
2457 useful when reusing a revision not stored in this revlog (ex: received
2460 over wire, or read from an external bundle).
2458 over wire, or read from an external bundle).
2461 """
2459 """
2462 with self._writing(transaction):
2460 with self._writing(transaction):
2463 return self._addrevision(
2461 return self._addrevision(
2464 node,
2462 node,
2465 rawtext,
2463 rawtext,
2466 transaction,
2464 transaction,
2467 link,
2465 link,
2468 p1,
2466 p1,
2469 p2,
2467 p2,
2470 flags,
2468 flags,
2471 cachedelta,
2469 cachedelta,
2472 deltacomputer=deltacomputer,
2470 deltacomputer=deltacomputer,
2473 sidedata=sidedata,
2471 sidedata=sidedata,
2474 )
2472 )
2475
2473
2476 def compress(self, data):
2474 def compress(self, data):
2477 """Generate a possibly-compressed representation of data."""
2475 """Generate a possibly-compressed representation of data."""
2478 if not data:
2476 if not data:
2479 return b'', data
2477 return b'', data
2480
2478
2481 compressed = self._compressor.compress(data)
2479 compressed = self._compressor.compress(data)
2482
2480
2483 if compressed:
2481 if compressed:
2484 # The revlog compressor added the header in the returned data.
2482 # The revlog compressor added the header in the returned data.
2485 return b'', compressed
2483 return b'', compressed
2486
2484
2487 if data[0:1] == b'\0':
2485 if data[0:1] == b'\0':
2488 return b'', data
2486 return b'', data
2489 return b'u', data
2487 return b'u', data
2490
2488
2491 def decompress(self, data):
2489 def decompress(self, data):
2492 """Decompress a revlog chunk.
2490 """Decompress a revlog chunk.
2493
2491
2494 The chunk is expected to begin with a header identifying the
2492 The chunk is expected to begin with a header identifying the
2495 format type so it can be routed to an appropriate decompressor.
2493 format type so it can be routed to an appropriate decompressor.
2496 """
2494 """
2497 if not data:
2495 if not data:
2498 return data
2496 return data
2499
2497
2500 # Revlogs are read much more frequently than they are written and many
2498 # Revlogs are read much more frequently than they are written and many
2501 # chunks only take microseconds to decompress, so performance is
2499 # chunks only take microseconds to decompress, so performance is
2502 # important here.
2500 # important here.
2503 #
2501 #
2504 # We can make a few assumptions about revlogs:
2502 # We can make a few assumptions about revlogs:
2505 #
2503 #
2506 # 1) the majority of chunks will be compressed (as opposed to inline
2504 # 1) the majority of chunks will be compressed (as opposed to inline
2507 # raw data).
2505 # raw data).
2508 # 2) decompressing *any* data will likely by at least 10x slower than
2506 # 2) decompressing *any* data will likely by at least 10x slower than
2509 # returning raw inline data.
2507 # returning raw inline data.
2510 # 3) we want to prioritize common and officially supported compression
2508 # 3) we want to prioritize common and officially supported compression
2511 # engines
2509 # engines
2512 #
2510 #
2513 # It follows that we want to optimize for "decompress compressed data
2511 # It follows that we want to optimize for "decompress compressed data
2514 # when encoded with common and officially supported compression engines"
2512 # when encoded with common and officially supported compression engines"
2515 # case over "raw data" and "data encoded by less common or non-official
2513 # case over "raw data" and "data encoded by less common or non-official
2516 # compression engines." That is why we have the inline lookup first
2514 # compression engines." That is why we have the inline lookup first
2517 # followed by the compengines lookup.
2515 # followed by the compengines lookup.
2518 #
2516 #
2519 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2517 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2520 # compressed chunks. And this matters for changelog and manifest reads.
2518 # compressed chunks. And this matters for changelog and manifest reads.
2521 t = data[0:1]
2519 t = data[0:1]
2522
2520
2523 if t == b'x':
2521 if t == b'x':
2524 try:
2522 try:
2525 return _zlibdecompress(data)
2523 return _zlibdecompress(data)
2526 except zlib.error as e:
2524 except zlib.error as e:
2527 raise error.RevlogError(
2525 raise error.RevlogError(
2528 _(b'revlog decompress error: %s')
2526 _(b'revlog decompress error: %s')
2529 % stringutil.forcebytestr(e)
2527 % stringutil.forcebytestr(e)
2530 )
2528 )
2531 # '\0' is more common than 'u' so it goes first.
2529 # '\0' is more common than 'u' so it goes first.
2532 elif t == b'\0':
2530 elif t == b'\0':
2533 return data
2531 return data
2534 elif t == b'u':
2532 elif t == b'u':
2535 return util.buffer(data, 1)
2533 return util.buffer(data, 1)
2536
2534
2537 compressor = self._get_decompressor(t)
2535 compressor = self._get_decompressor(t)
2538
2536
2539 return compressor.decompress(data)
2537 return compressor.decompress(data)
2540
2538
2541 def _addrevision(
2539 def _addrevision(
2542 self,
2540 self,
2543 node,
2541 node,
2544 rawtext,
2542 rawtext,
2545 transaction,
2543 transaction,
2546 link,
2544 link,
2547 p1,
2545 p1,
2548 p2,
2546 p2,
2549 flags,
2547 flags,
2550 cachedelta,
2548 cachedelta,
2551 alwayscache=False,
2549 alwayscache=False,
2552 deltacomputer=None,
2550 deltacomputer=None,
2553 sidedata=None,
2551 sidedata=None,
2554 ):
2552 ):
2555 """internal function to add revisions to the log
2553 """internal function to add revisions to the log
2556
2554
2557 see addrevision for argument descriptions.
2555 see addrevision for argument descriptions.
2558
2556
2559 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2557 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2560
2558
2561 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2559 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2562 be used.
2560 be used.
2563
2561
2564 invariants:
2562 invariants:
2565 - rawtext is optional (can be None); if not set, cachedelta must be set.
2563 - rawtext is optional (can be None); if not set, cachedelta must be set.
2566 if both are set, they must correspond to each other.
2564 if both are set, they must correspond to each other.
2567 """
2565 """
2568 if node == self.nullid:
2566 if node == self.nullid:
2569 raise error.RevlogError(
2567 raise error.RevlogError(
2570 _(b"%s: attempt to add null revision") % self.display_id
2568 _(b"%s: attempt to add null revision") % self.display_id
2571 )
2569 )
2572 if (
2570 if (
2573 node == self.nodeconstants.wdirid
2571 node == self.nodeconstants.wdirid
2574 or node in self.nodeconstants.wdirfilenodeids
2572 or node in self.nodeconstants.wdirfilenodeids
2575 ):
2573 ):
2576 raise error.RevlogError(
2574 raise error.RevlogError(
2577 _(b"%s: attempt to add wdir revision") % self.display_id
2575 _(b"%s: attempt to add wdir revision") % self.display_id
2578 )
2576 )
2579 if self._writinghandles is None:
2577 if self._writinghandles is None:
2580 msg = b'adding revision outside `revlog._writing` context'
2578 msg = b'adding revision outside `revlog._writing` context'
2581 raise error.ProgrammingError(msg)
2579 raise error.ProgrammingError(msg)
2582
2580
2583 btext = [rawtext]
2581 btext = [rawtext]
2584
2582
2585 curr = len(self)
2583 curr = len(self)
2586 prev = curr - 1
2584 prev = curr - 1
2587
2585
2588 offset = self._get_data_offset(prev)
2586 offset = self._get_data_offset(prev)
2589
2587
2590 if self._concurrencychecker:
2588 if self._concurrencychecker:
2591 ifh, dfh, sdfh = self._writinghandles
2589 ifh, dfh, sdfh = self._writinghandles
2592 # XXX no checking for the sidedata file
2590 # XXX no checking for the sidedata file
2593 if self._inline:
2591 if self._inline:
2594 # offset is "as if" it were in the .d file, so we need to add on
2592 # offset is "as if" it were in the .d file, so we need to add on
2595 # the size of the entry metadata.
2593 # the size of the entry metadata.
2596 self._concurrencychecker(
2594 self._concurrencychecker(
2597 ifh, self._indexfile, offset + curr * self.index.entry_size
2595 ifh, self._indexfile, offset + curr * self.index.entry_size
2598 )
2596 )
2599 else:
2597 else:
2600 # Entries in the .i are a consistent size.
2598 # Entries in the .i are a consistent size.
2601 self._concurrencychecker(
2599 self._concurrencychecker(
2602 ifh, self._indexfile, curr * self.index.entry_size
2600 ifh, self._indexfile, curr * self.index.entry_size
2603 )
2601 )
2604 self._concurrencychecker(dfh, self._datafile, offset)
2602 self._concurrencychecker(dfh, self._datafile, offset)
2605
2603
2606 p1r, p2r = self.rev(p1), self.rev(p2)
2604 p1r, p2r = self.rev(p1), self.rev(p2)
2607
2605
2608 # full versions are inserted when the needed deltas
2606 # full versions are inserted when the needed deltas
2609 # become comparable to the uncompressed text
2607 # become comparable to the uncompressed text
2610 if rawtext is None:
2608 if rawtext is None:
2611 # need rawtext size, before changed by flag processors, which is
2609 # need rawtext size, before changed by flag processors, which is
2612 # the non-raw size. use revlog explicitly to avoid filelog's extra
2610 # the non-raw size. use revlog explicitly to avoid filelog's extra
2613 # logic that might remove metadata size.
2611 # logic that might remove metadata size.
2614 textlen = mdiff.patchedsize(
2612 textlen = mdiff.patchedsize(
2615 revlog.size(self, cachedelta[0]), cachedelta[1]
2613 revlog.size(self, cachedelta[0]), cachedelta[1]
2616 )
2614 )
2617 else:
2615 else:
2618 textlen = len(rawtext)
2616 textlen = len(rawtext)
2619
2617
2620 if deltacomputer is None:
2618 if deltacomputer is None:
2621 write_debug = None
2619 write_debug = None
2622 if self._debug_delta:
2620 if self._debug_delta:
2623 write_debug = transaction._report
2621 write_debug = transaction._report
2624 deltacomputer = deltautil.deltacomputer(
2622 deltacomputer = deltautil.deltacomputer(
2625 self, write_debug=write_debug
2623 self, write_debug=write_debug
2626 )
2624 )
2627
2625
2628 if cachedelta is not None and len(cachedelta) == 2:
2626 if cachedelta is not None and len(cachedelta) == 2:
2629 # If the cached delta has no information about how it should be
2627 # If the cached delta has no information about how it should be
2630 # reused, add the default reuse instruction according to the
2628 # reused, add the default reuse instruction according to the
2631 # revlog's configuration.
2629 # revlog's configuration.
2632 if self._generaldelta and self._lazydeltabase:
2630 if self._generaldelta and self._lazydeltabase:
2633 delta_base_reuse = DELTA_BASE_REUSE_TRY
2631 delta_base_reuse = DELTA_BASE_REUSE_TRY
2634 else:
2632 else:
2635 delta_base_reuse = DELTA_BASE_REUSE_NO
2633 delta_base_reuse = DELTA_BASE_REUSE_NO
2636 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2634 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2637
2635
2638 revinfo = revlogutils.revisioninfo(
2636 revinfo = revlogutils.revisioninfo(
2639 node,
2637 node,
2640 p1,
2638 p1,
2641 p2,
2639 p2,
2642 btext,
2640 btext,
2643 textlen,
2641 textlen,
2644 cachedelta,
2642 cachedelta,
2645 flags,
2643 flags,
2646 )
2644 )
2647
2645
2648 deltainfo = deltacomputer.finddeltainfo(revinfo)
2646 deltainfo = deltacomputer.finddeltainfo(revinfo)
2649
2647
2650 compression_mode = COMP_MODE_INLINE
2648 compression_mode = COMP_MODE_INLINE
2651 if self._docket is not None:
2649 if self._docket is not None:
2652 default_comp = self._docket.default_compression_header
2650 default_comp = self._docket.default_compression_header
2653 r = deltautil.delta_compression(default_comp, deltainfo)
2651 r = deltautil.delta_compression(default_comp, deltainfo)
2654 compression_mode, deltainfo = r
2652 compression_mode, deltainfo = r
2655
2653
2656 sidedata_compression_mode = COMP_MODE_INLINE
2654 sidedata_compression_mode = COMP_MODE_INLINE
2657 if sidedata and self.hassidedata:
2655 if sidedata and self.hassidedata:
2658 sidedata_compression_mode = COMP_MODE_PLAIN
2656 sidedata_compression_mode = COMP_MODE_PLAIN
2659 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2657 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2660 sidedata_offset = self._docket.sidedata_end
2658 sidedata_offset = self._docket.sidedata_end
2661 h, comp_sidedata = self.compress(serialized_sidedata)
2659 h, comp_sidedata = self.compress(serialized_sidedata)
2662 if (
2660 if (
2663 h != b'u'
2661 h != b'u'
2664 and comp_sidedata[0:1] != b'\0'
2662 and comp_sidedata[0:1] != b'\0'
2665 and len(comp_sidedata) < len(serialized_sidedata)
2663 and len(comp_sidedata) < len(serialized_sidedata)
2666 ):
2664 ):
2667 assert not h
2665 assert not h
2668 if (
2666 if (
2669 comp_sidedata[0:1]
2667 comp_sidedata[0:1]
2670 == self._docket.default_compression_header
2668 == self._docket.default_compression_header
2671 ):
2669 ):
2672 sidedata_compression_mode = COMP_MODE_DEFAULT
2670 sidedata_compression_mode = COMP_MODE_DEFAULT
2673 serialized_sidedata = comp_sidedata
2671 serialized_sidedata = comp_sidedata
2674 else:
2672 else:
2675 sidedata_compression_mode = COMP_MODE_INLINE
2673 sidedata_compression_mode = COMP_MODE_INLINE
2676 serialized_sidedata = comp_sidedata
2674 serialized_sidedata = comp_sidedata
2677 else:
2675 else:
2678 serialized_sidedata = b""
2676 serialized_sidedata = b""
2679 # Don't store the offset if the sidedata is empty, that way
2677 # Don't store the offset if the sidedata is empty, that way
2680 # we can easily detect empty sidedata and they will be no different
2678 # we can easily detect empty sidedata and they will be no different
2681 # than ones we manually add.
2679 # than ones we manually add.
2682 sidedata_offset = 0
2680 sidedata_offset = 0
2683
2681
2684 rank = RANK_UNKNOWN
2682 rank = RANK_UNKNOWN
2685 if self._compute_rank:
2683 if self._compute_rank:
2686 if (p1r, p2r) == (nullrev, nullrev):
2684 if (p1r, p2r) == (nullrev, nullrev):
2687 rank = 1
2685 rank = 1
2688 elif p1r != nullrev and p2r == nullrev:
2686 elif p1r != nullrev and p2r == nullrev:
2689 rank = 1 + self.fast_rank(p1r)
2687 rank = 1 + self.fast_rank(p1r)
2690 elif p1r == nullrev and p2r != nullrev:
2688 elif p1r == nullrev and p2r != nullrev:
2691 rank = 1 + self.fast_rank(p2r)
2689 rank = 1 + self.fast_rank(p2r)
2692 else: # merge node
2690 else: # merge node
2693 if rustdagop is not None and self.index.rust_ext_compat:
2691 if rustdagop is not None and self.index.rust_ext_compat:
2694 rank = rustdagop.rank(self.index, p1r, p2r)
2692 rank = rustdagop.rank(self.index, p1r, p2r)
2695 else:
2693 else:
2696 pmin, pmax = sorted((p1r, p2r))
2694 pmin, pmax = sorted((p1r, p2r))
2697 rank = 1 + self.fast_rank(pmax)
2695 rank = 1 + self.fast_rank(pmax)
2698 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2696 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2699
2697
2700 e = revlogutils.entry(
2698 e = revlogutils.entry(
2701 flags=flags,
2699 flags=flags,
2702 data_offset=offset,
2700 data_offset=offset,
2703 data_compressed_length=deltainfo.deltalen,
2701 data_compressed_length=deltainfo.deltalen,
2704 data_uncompressed_length=textlen,
2702 data_uncompressed_length=textlen,
2705 data_compression_mode=compression_mode,
2703 data_compression_mode=compression_mode,
2706 data_delta_base=deltainfo.base,
2704 data_delta_base=deltainfo.base,
2707 link_rev=link,
2705 link_rev=link,
2708 parent_rev_1=p1r,
2706 parent_rev_1=p1r,
2709 parent_rev_2=p2r,
2707 parent_rev_2=p2r,
2710 node_id=node,
2708 node_id=node,
2711 sidedata_offset=sidedata_offset,
2709 sidedata_offset=sidedata_offset,
2712 sidedata_compressed_length=len(serialized_sidedata),
2710 sidedata_compressed_length=len(serialized_sidedata),
2713 sidedata_compression_mode=sidedata_compression_mode,
2711 sidedata_compression_mode=sidedata_compression_mode,
2714 rank=rank,
2712 rank=rank,
2715 )
2713 )
2716
2714
2717 self.index.append(e)
2715 self.index.append(e)
2718 entry = self.index.entry_binary(curr)
2716 entry = self.index.entry_binary(curr)
2719 if curr == 0 and self._docket is None:
2717 if curr == 0 and self._docket is None:
2720 header = self._format_flags | self._format_version
2718 header = self._format_flags | self._format_version
2721 header = self.index.pack_header(header)
2719 header = self.index.pack_header(header)
2722 entry = header + entry
2720 entry = header + entry
2723 self._writeentry(
2721 self._writeentry(
2724 transaction,
2722 transaction,
2725 entry,
2723 entry,
2726 deltainfo.data,
2724 deltainfo.data,
2727 link,
2725 link,
2728 offset,
2726 offset,
2729 serialized_sidedata,
2727 serialized_sidedata,
2730 sidedata_offset,
2728 sidedata_offset,
2731 )
2729 )
2732
2730
2733 rawtext = btext[0]
2731 rawtext = btext[0]
2734
2732
2735 if alwayscache and rawtext is None:
2733 if alwayscache and rawtext is None:
2736 rawtext = deltacomputer.buildtext(revinfo)
2734 rawtext = deltacomputer.buildtext(revinfo)
2737
2735
2738 if type(rawtext) == bytes: # only accept immutable objects
2736 if type(rawtext) == bytes: # only accept immutable objects
2739 self._revisioncache = (node, curr, rawtext)
2737 self._revisioncache = (node, curr, rawtext)
2740 self._chainbasecache[curr] = deltainfo.chainbase
2738 self._chainbasecache[curr] = deltainfo.chainbase
2741 return curr
2739 return curr
2742
2740
2743 def _get_data_offset(self, prev):
2741 def _get_data_offset(self, prev):
2744 """Returns the current offset in the (in-transaction) data file.
2742 """Returns the current offset in the (in-transaction) data file.
2745 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2743 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2746 file to store that information: since sidedata can be rewritten to the
2744 file to store that information: since sidedata can be rewritten to the
2747 end of the data file within a transaction, you can have cases where, for
2745 end of the data file within a transaction, you can have cases where, for
2748 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2746 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2749 to `n - 1`'s sidedata being written after `n`'s data.
2747 to `n - 1`'s sidedata being written after `n`'s data.
2750
2748
2751 TODO cache this in a docket file before getting out of experimental."""
2749 TODO cache this in a docket file before getting out of experimental."""
2752 if self._docket is None:
2750 if self._docket is None:
2753 return self.end(prev)
2751 return self.end(prev)
2754 else:
2752 else:
2755 return self._docket.data_end
2753 return self._docket.data_end
2756
2754
2757 def _writeentry(
2755 def _writeentry(
2758 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2756 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2759 ):
2757 ):
2760 # Files opened in a+ mode have inconsistent behavior on various
2758 # Files opened in a+ mode have inconsistent behavior on various
2761 # platforms. Windows requires that a file positioning call be made
2759 # platforms. Windows requires that a file positioning call be made
2762 # when the file handle transitions between reads and writes. See
2760 # when the file handle transitions between reads and writes. See
2763 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2761 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2764 # platforms, Python or the platform itself can be buggy. Some versions
2762 # platforms, Python or the platform itself can be buggy. Some versions
2765 # of Solaris have been observed to not append at the end of the file
2763 # of Solaris have been observed to not append at the end of the file
2766 # if the file was seeked to before the end. See issue4943 for more.
2764 # if the file was seeked to before the end. See issue4943 for more.
2767 #
2765 #
2768 # We work around this issue by inserting a seek() before writing.
2766 # We work around this issue by inserting a seek() before writing.
2769 # Note: This is likely not necessary on Python 3. However, because
2767 # Note: This is likely not necessary on Python 3. However, because
2770 # the file handle is reused for reads and may be seeked there, we need
2768 # the file handle is reused for reads and may be seeked there, we need
2771 # to be careful before changing this.
2769 # to be careful before changing this.
2772 if self._writinghandles is None:
2770 if self._writinghandles is None:
2773 msg = b'adding revision outside `revlog._writing` context'
2771 msg = b'adding revision outside `revlog._writing` context'
2774 raise error.ProgrammingError(msg)
2772 raise error.ProgrammingError(msg)
2775 ifh, dfh, sdfh = self._writinghandles
2773 ifh, dfh, sdfh = self._writinghandles
2776 if self._docket is None:
2774 if self._docket is None:
2777 ifh.seek(0, os.SEEK_END)
2775 ifh.seek(0, os.SEEK_END)
2778 else:
2776 else:
2779 ifh.seek(self._docket.index_end, os.SEEK_SET)
2777 ifh.seek(self._docket.index_end, os.SEEK_SET)
2780 if dfh:
2778 if dfh:
2781 if self._docket is None:
2779 if self._docket is None:
2782 dfh.seek(0, os.SEEK_END)
2780 dfh.seek(0, os.SEEK_END)
2783 else:
2781 else:
2784 dfh.seek(self._docket.data_end, os.SEEK_SET)
2782 dfh.seek(self._docket.data_end, os.SEEK_SET)
2785 if sdfh:
2783 if sdfh:
2786 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2784 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2787
2785
2788 curr = len(self) - 1
2786 curr = len(self) - 1
2789 if not self._inline:
2787 if not self._inline:
2790 transaction.add(self._datafile, offset)
2788 transaction.add(self._datafile, offset)
2791 if self._sidedatafile:
2789 if self._sidedatafile:
2792 transaction.add(self._sidedatafile, sidedata_offset)
2790 transaction.add(self._sidedatafile, sidedata_offset)
2793 transaction.add(self._indexfile, curr * len(entry))
2791 transaction.add(self._indexfile, curr * len(entry))
2794 if data[0]:
2792 if data[0]:
2795 dfh.write(data[0])
2793 dfh.write(data[0])
2796 dfh.write(data[1])
2794 dfh.write(data[1])
2797 if sidedata:
2795 if sidedata:
2798 sdfh.write(sidedata)
2796 sdfh.write(sidedata)
2799 ifh.write(entry)
2797 ifh.write(entry)
2800 else:
2798 else:
2801 offset += curr * self.index.entry_size
2799 offset += curr * self.index.entry_size
2802 transaction.add(self._indexfile, offset)
2800 transaction.add(self._indexfile, offset)
2803 ifh.write(entry)
2801 ifh.write(entry)
2804 ifh.write(data[0])
2802 ifh.write(data[0])
2805 ifh.write(data[1])
2803 ifh.write(data[1])
2806 assert not sidedata
2804 assert not sidedata
2807 self._enforceinlinesize(transaction)
2805 self._enforceinlinesize(transaction)
2808 if self._docket is not None:
2806 if self._docket is not None:
2809 # revlog-v2 always has 3 writing handles, help Pytype
2807 # revlog-v2 always has 3 writing handles, help Pytype
2810 wh1 = self._writinghandles[0]
2808 wh1 = self._writinghandles[0]
2811 wh2 = self._writinghandles[1]
2809 wh2 = self._writinghandles[1]
2812 wh3 = self._writinghandles[2]
2810 wh3 = self._writinghandles[2]
2813 assert wh1 is not None
2811 assert wh1 is not None
2814 assert wh2 is not None
2812 assert wh2 is not None
2815 assert wh3 is not None
2813 assert wh3 is not None
2816 self._docket.index_end = wh1.tell()
2814 self._docket.index_end = wh1.tell()
2817 self._docket.data_end = wh2.tell()
2815 self._docket.data_end = wh2.tell()
2818 self._docket.sidedata_end = wh3.tell()
2816 self._docket.sidedata_end = wh3.tell()
2819
2817
2820 nodemaputil.setup_persistent_nodemap(transaction, self)
2818 nodemaputil.setup_persistent_nodemap(transaction, self)
2821
2819
2822 def addgroup(
2820 def addgroup(
2823 self,
2821 self,
2824 deltas,
2822 deltas,
2825 linkmapper,
2823 linkmapper,
2826 transaction,
2824 transaction,
2827 alwayscache=False,
2825 alwayscache=False,
2828 addrevisioncb=None,
2826 addrevisioncb=None,
2829 duplicaterevisioncb=None,
2827 duplicaterevisioncb=None,
2830 debug_info=None,
2828 debug_info=None,
2831 delta_base_reuse_policy=None,
2829 delta_base_reuse_policy=None,
2832 ):
2830 ):
2833 """
2831 """
2834 add a delta group
2832 add a delta group
2835
2833
2836 given a set of deltas, add them to the revision log. the
2834 given a set of deltas, add them to the revision log. the
2837 first delta is against its parent, which should be in our
2835 first delta is against its parent, which should be in our
2838 log, the rest are against the previous delta.
2836 log, the rest are against the previous delta.
2839
2837
2840 If ``addrevisioncb`` is defined, it will be called with arguments of
2838 If ``addrevisioncb`` is defined, it will be called with arguments of
2841 this revlog and the node that was added.
2839 this revlog and the node that was added.
2842 """
2840 """
2843
2841
2844 if self._adding_group:
2842 if self._adding_group:
2845 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2843 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2846
2844
2847 # read the default delta-base reuse policy from revlog config if the
2845 # read the default delta-base reuse policy from revlog config if the
2848 # group did not specify one.
2846 # group did not specify one.
2849 if delta_base_reuse_policy is None:
2847 if delta_base_reuse_policy is None:
2850 if self._generaldelta and self._lazydeltabase:
2848 if self._generaldelta and self._lazydeltabase:
2851 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2849 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2852 else:
2850 else:
2853 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2851 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2854
2852
2855 self._adding_group = True
2853 self._adding_group = True
2856 empty = True
2854 empty = True
2857 try:
2855 try:
2858 with self._writing(transaction):
2856 with self._writing(transaction):
2859 write_debug = None
2857 write_debug = None
2860 if self._debug_delta:
2858 if self._debug_delta:
2861 write_debug = transaction._report
2859 write_debug = transaction._report
2862 deltacomputer = deltautil.deltacomputer(
2860 deltacomputer = deltautil.deltacomputer(
2863 self,
2861 self,
2864 write_debug=write_debug,
2862 write_debug=write_debug,
2865 debug_info=debug_info,
2863 debug_info=debug_info,
2866 )
2864 )
2867 # loop through our set of deltas
2865 # loop through our set of deltas
2868 for data in deltas:
2866 for data in deltas:
2869 (
2867 (
2870 node,
2868 node,
2871 p1,
2869 p1,
2872 p2,
2870 p2,
2873 linknode,
2871 linknode,
2874 deltabase,
2872 deltabase,
2875 delta,
2873 delta,
2876 flags,
2874 flags,
2877 sidedata,
2875 sidedata,
2878 ) = data
2876 ) = data
2879 link = linkmapper(linknode)
2877 link = linkmapper(linknode)
2880 flags = flags or REVIDX_DEFAULT_FLAGS
2878 flags = flags or REVIDX_DEFAULT_FLAGS
2881
2879
2882 rev = self.index.get_rev(node)
2880 rev = self.index.get_rev(node)
2883 if rev is not None:
2881 if rev is not None:
2884 # this can happen if two branches make the same change
2882 # this can happen if two branches make the same change
2885 self._nodeduplicatecallback(transaction, rev)
2883 self._nodeduplicatecallback(transaction, rev)
2886 if duplicaterevisioncb:
2884 if duplicaterevisioncb:
2887 duplicaterevisioncb(self, rev)
2885 duplicaterevisioncb(self, rev)
2888 empty = False
2886 empty = False
2889 continue
2887 continue
2890
2888
2891 for p in (p1, p2):
2889 for p in (p1, p2):
2892 if not self.index.has_node(p):
2890 if not self.index.has_node(p):
2893 raise error.LookupError(
2891 raise error.LookupError(
2894 p, self.radix, _(b'unknown parent')
2892 p, self.radix, _(b'unknown parent')
2895 )
2893 )
2896
2894
2897 if not self.index.has_node(deltabase):
2895 if not self.index.has_node(deltabase):
2898 raise error.LookupError(
2896 raise error.LookupError(
2899 deltabase, self.display_id, _(b'unknown delta base')
2897 deltabase, self.display_id, _(b'unknown delta base')
2900 )
2898 )
2901
2899
2902 baserev = self.rev(deltabase)
2900 baserev = self.rev(deltabase)
2903
2901
2904 if baserev != nullrev and self.iscensored(baserev):
2902 if baserev != nullrev and self.iscensored(baserev):
2905 # if base is censored, delta must be full replacement in a
2903 # if base is censored, delta must be full replacement in a
2906 # single patch operation
2904 # single patch operation
2907 hlen = struct.calcsize(b">lll")
2905 hlen = struct.calcsize(b">lll")
2908 oldlen = self.rawsize(baserev)
2906 oldlen = self.rawsize(baserev)
2909 newlen = len(delta) - hlen
2907 newlen = len(delta) - hlen
2910 if delta[:hlen] != mdiff.replacediffheader(
2908 if delta[:hlen] != mdiff.replacediffheader(
2911 oldlen, newlen
2909 oldlen, newlen
2912 ):
2910 ):
2913 raise error.CensoredBaseError(
2911 raise error.CensoredBaseError(
2914 self.display_id, self.node(baserev)
2912 self.display_id, self.node(baserev)
2915 )
2913 )
2916
2914
2917 if not flags and self._peek_iscensored(baserev, delta):
2915 if not flags and self._peek_iscensored(baserev, delta):
2918 flags |= REVIDX_ISCENSORED
2916 flags |= REVIDX_ISCENSORED
2919
2917
2920 # We assume consumers of addrevisioncb will want to retrieve
2918 # We assume consumers of addrevisioncb will want to retrieve
2921 # the added revision, which will require a call to
2919 # the added revision, which will require a call to
2922 # revision(). revision() will fast path if there is a cache
2920 # revision(). revision() will fast path if there is a cache
2923 # hit. So, we tell _addrevision() to always cache in this case.
2921 # hit. So, we tell _addrevision() to always cache in this case.
2924 # We're only using addgroup() in the context of changegroup
2922 # We're only using addgroup() in the context of changegroup
2925 # generation so the revision data can always be handled as raw
2923 # generation so the revision data can always be handled as raw
2926 # by the flagprocessor.
2924 # by the flagprocessor.
2927 rev = self._addrevision(
2925 rev = self._addrevision(
2928 node,
2926 node,
2929 None,
2927 None,
2930 transaction,
2928 transaction,
2931 link,
2929 link,
2932 p1,
2930 p1,
2933 p2,
2931 p2,
2934 flags,
2932 flags,
2935 (baserev, delta, delta_base_reuse_policy),
2933 (baserev, delta, delta_base_reuse_policy),
2936 alwayscache=alwayscache,
2934 alwayscache=alwayscache,
2937 deltacomputer=deltacomputer,
2935 deltacomputer=deltacomputer,
2938 sidedata=sidedata,
2936 sidedata=sidedata,
2939 )
2937 )
2940
2938
2941 if addrevisioncb:
2939 if addrevisioncb:
2942 addrevisioncb(self, rev)
2940 addrevisioncb(self, rev)
2943 empty = False
2941 empty = False
2944 finally:
2942 finally:
2945 self._adding_group = False
2943 self._adding_group = False
2946 return not empty
2944 return not empty
2947
2945
2948 def iscensored(self, rev):
2946 def iscensored(self, rev):
2949 """Check if a file revision is censored."""
2947 """Check if a file revision is censored."""
2950 if not self._censorable:
2948 if not self._censorable:
2951 return False
2949 return False
2952
2950
2953 return self.flags(rev) & REVIDX_ISCENSORED
2951 return self.flags(rev) & REVIDX_ISCENSORED
2954
2952
2955 def _peek_iscensored(self, baserev, delta):
2953 def _peek_iscensored(self, baserev, delta):
2956 """Quickly check if a delta produces a censored revision."""
2954 """Quickly check if a delta produces a censored revision."""
2957 if not self._censorable:
2955 if not self._censorable:
2958 return False
2956 return False
2959
2957
2960 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2958 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2961
2959
2962 def getstrippoint(self, minlink):
2960 def getstrippoint(self, minlink):
2963 """find the minimum rev that must be stripped to strip the linkrev
2961 """find the minimum rev that must be stripped to strip the linkrev
2964
2962
2965 Returns a tuple containing the minimum rev and a set of all revs that
2963 Returns a tuple containing the minimum rev and a set of all revs that
2966 have linkrevs that will be broken by this strip.
2964 have linkrevs that will be broken by this strip.
2967 """
2965 """
2968 return storageutil.resolvestripinfo(
2966 return storageutil.resolvestripinfo(
2969 minlink,
2967 minlink,
2970 len(self) - 1,
2968 len(self) - 1,
2971 self.headrevs(),
2969 self.headrevs(),
2972 self.linkrev,
2970 self.linkrev,
2973 self.parentrevs,
2971 self.parentrevs,
2974 )
2972 )
2975
2973
2976 def strip(self, minlink, transaction):
2974 def strip(self, minlink, transaction):
2977 """truncate the revlog on the first revision with a linkrev >= minlink
2975 """truncate the revlog on the first revision with a linkrev >= minlink
2978
2976
2979 This function is called when we're stripping revision minlink and
2977 This function is called when we're stripping revision minlink and
2980 its descendants from the repository.
2978 its descendants from the repository.
2981
2979
2982 We have to remove all revisions with linkrev >= minlink, because
2980 We have to remove all revisions with linkrev >= minlink, because
2983 the equivalent changelog revisions will be renumbered after the
2981 the equivalent changelog revisions will be renumbered after the
2984 strip.
2982 strip.
2985
2983
2986 So we truncate the revlog on the first of these revisions, and
2984 So we truncate the revlog on the first of these revisions, and
2987 trust that the caller has saved the revisions that shouldn't be
2985 trust that the caller has saved the revisions that shouldn't be
2988 removed and that it'll re-add them after this truncation.
2986 removed and that it'll re-add them after this truncation.
2989 """
2987 """
2990 if len(self) == 0:
2988 if len(self) == 0:
2991 return
2989 return
2992
2990
2993 rev, _ = self.getstrippoint(minlink)
2991 rev, _ = self.getstrippoint(minlink)
2994 if rev == len(self):
2992 if rev == len(self):
2995 return
2993 return
2996
2994
2997 # first truncate the files on disk
2995 # first truncate the files on disk
2998 data_end = self.start(rev)
2996 data_end = self.start(rev)
2999 if not self._inline:
2997 if not self._inline:
3000 transaction.add(self._datafile, data_end)
2998 transaction.add(self._datafile, data_end)
3001 end = rev * self.index.entry_size
2999 end = rev * self.index.entry_size
3002 else:
3000 else:
3003 end = data_end + (rev * self.index.entry_size)
3001 end = data_end + (rev * self.index.entry_size)
3004
3002
3005 if self._sidedatafile:
3003 if self._sidedatafile:
3006 sidedata_end = self.sidedata_cut_off(rev)
3004 sidedata_end = self.sidedata_cut_off(rev)
3007 transaction.add(self._sidedatafile, sidedata_end)
3005 transaction.add(self._sidedatafile, sidedata_end)
3008
3006
3009 transaction.add(self._indexfile, end)
3007 transaction.add(self._indexfile, end)
3010 if self._docket is not None:
3008 if self._docket is not None:
3011 # XXX we could, leverage the docket while stripping. However it is
3009 # XXX we could, leverage the docket while stripping. However it is
3012 # not powerfull enough at the time of this comment
3010 # not powerfull enough at the time of this comment
3013 self._docket.index_end = end
3011 self._docket.index_end = end
3014 self._docket.data_end = data_end
3012 self._docket.data_end = data_end
3015 self._docket.sidedata_end = sidedata_end
3013 self._docket.sidedata_end = sidedata_end
3016 self._docket.write(transaction, stripping=True)
3014 self._docket.write(transaction, stripping=True)
3017
3015
3018 # then reset internal state in memory to forget those revisions
3016 # then reset internal state in memory to forget those revisions
3019 self._revisioncache = None
3017 self._revisioncache = None
3020 self._chaininfocache = util.lrucachedict(500)
3018 self._chaininfocache = util.lrucachedict(500)
3021 self._segmentfile.clear_cache()
3019 self._segmentfile.clear_cache()
3022 self._segmentfile_sidedata.clear_cache()
3020 self._segmentfile_sidedata.clear_cache()
3023
3021
3024 del self.index[rev:-1]
3022 del self.index[rev:-1]
3025
3023
3026 def checksize(self):
3024 def checksize(self):
3027 """Check size of index and data files
3025 """Check size of index and data files
3028
3026
3029 return a (dd, di) tuple.
3027 return a (dd, di) tuple.
3030 - dd: extra bytes for the "data" file
3028 - dd: extra bytes for the "data" file
3031 - di: extra bytes for the "index" file
3029 - di: extra bytes for the "index" file
3032
3030
3033 A healthy revlog will return (0, 0).
3031 A healthy revlog will return (0, 0).
3034 """
3032 """
3035 expected = 0
3033 expected = 0
3036 if len(self):
3034 if len(self):
3037 expected = max(0, self.end(len(self) - 1))
3035 expected = max(0, self.end(len(self) - 1))
3038
3036
3039 try:
3037 try:
3040 with self._datafp() as f:
3038 with self._datafp() as f:
3041 f.seek(0, io.SEEK_END)
3039 f.seek(0, io.SEEK_END)
3042 actual = f.tell()
3040 actual = f.tell()
3043 dd = actual - expected
3041 dd = actual - expected
3044 except FileNotFoundError:
3042 except FileNotFoundError:
3045 dd = 0
3043 dd = 0
3046
3044
3047 try:
3045 try:
3048 f = self.opener(self._indexfile)
3046 f = self.opener(self._indexfile)
3049 f.seek(0, io.SEEK_END)
3047 f.seek(0, io.SEEK_END)
3050 actual = f.tell()
3048 actual = f.tell()
3051 f.close()
3049 f.close()
3052 s = self.index.entry_size
3050 s = self.index.entry_size
3053 i = max(0, actual // s)
3051 i = max(0, actual // s)
3054 di = actual - (i * s)
3052 di = actual - (i * s)
3055 if self._inline:
3053 if self._inline:
3056 databytes = 0
3054 databytes = 0
3057 for r in self:
3055 for r in self:
3058 databytes += max(0, self.length(r))
3056 databytes += max(0, self.length(r))
3059 dd = 0
3057 dd = 0
3060 di = actual - len(self) * s - databytes
3058 di = actual - len(self) * s - databytes
3061 except FileNotFoundError:
3059 except FileNotFoundError:
3062 di = 0
3060 di = 0
3063
3061
3064 return (dd, di)
3062 return (dd, di)
3065
3063
3066 def files(self):
3064 def files(self):
3067 res = [self._indexfile]
3065 res = [self._indexfile]
3068 if self._docket_file is None:
3066 if self._docket_file is None:
3069 if not self._inline:
3067 if not self._inline:
3070 res.append(self._datafile)
3068 res.append(self._datafile)
3071 else:
3069 else:
3072 res.append(self._docket_file)
3070 res.append(self._docket_file)
3073 res.extend(self._docket.old_index_filepaths(include_empty=False))
3071 res.extend(self._docket.old_index_filepaths(include_empty=False))
3074 if self._docket.data_end:
3072 if self._docket.data_end:
3075 res.append(self._datafile)
3073 res.append(self._datafile)
3076 res.extend(self._docket.old_data_filepaths(include_empty=False))
3074 res.extend(self._docket.old_data_filepaths(include_empty=False))
3077 if self._docket.sidedata_end:
3075 if self._docket.sidedata_end:
3078 res.append(self._sidedatafile)
3076 res.append(self._sidedatafile)
3079 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3077 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3080 return res
3078 return res
3081
3079
3082 def emitrevisions(
3080 def emitrevisions(
3083 self,
3081 self,
3084 nodes,
3082 nodes,
3085 nodesorder=None,
3083 nodesorder=None,
3086 revisiondata=False,
3084 revisiondata=False,
3087 assumehaveparentrevisions=False,
3085 assumehaveparentrevisions=False,
3088 deltamode=repository.CG_DELTAMODE_STD,
3086 deltamode=repository.CG_DELTAMODE_STD,
3089 sidedata_helpers=None,
3087 sidedata_helpers=None,
3090 debug_info=None,
3088 debug_info=None,
3091 ):
3089 ):
3092 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3090 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3093 raise error.ProgrammingError(
3091 raise error.ProgrammingError(
3094 b'unhandled value for nodesorder: %s' % nodesorder
3092 b'unhandled value for nodesorder: %s' % nodesorder
3095 )
3093 )
3096
3094
3097 if nodesorder is None and not self._generaldelta:
3095 if nodesorder is None and not self._generaldelta:
3098 nodesorder = b'storage'
3096 nodesorder = b'storage'
3099
3097
3100 if (
3098 if (
3101 not self._storedeltachains
3099 not self._storedeltachains
3102 and deltamode != repository.CG_DELTAMODE_PREV
3100 and deltamode != repository.CG_DELTAMODE_PREV
3103 ):
3101 ):
3104 deltamode = repository.CG_DELTAMODE_FULL
3102 deltamode = repository.CG_DELTAMODE_FULL
3105
3103
3106 return storageutil.emitrevisions(
3104 return storageutil.emitrevisions(
3107 self,
3105 self,
3108 nodes,
3106 nodes,
3109 nodesorder,
3107 nodesorder,
3110 revlogrevisiondelta,
3108 revlogrevisiondelta,
3111 deltaparentfn=self.deltaparent,
3109 deltaparentfn=self.deltaparent,
3112 candeltafn=self._candelta,
3110 candeltafn=self._candelta,
3113 rawsizefn=self.rawsize,
3111 rawsizefn=self.rawsize,
3114 revdifffn=self.revdiff,
3112 revdifffn=self.revdiff,
3115 flagsfn=self.flags,
3113 flagsfn=self.flags,
3116 deltamode=deltamode,
3114 deltamode=deltamode,
3117 revisiondata=revisiondata,
3115 revisiondata=revisiondata,
3118 assumehaveparentrevisions=assumehaveparentrevisions,
3116 assumehaveparentrevisions=assumehaveparentrevisions,
3119 sidedata_helpers=sidedata_helpers,
3117 sidedata_helpers=sidedata_helpers,
3120 debug_info=debug_info,
3118 debug_info=debug_info,
3121 )
3119 )
3122
3120
3123 DELTAREUSEALWAYS = b'always'
3121 DELTAREUSEALWAYS = b'always'
3124 DELTAREUSESAMEREVS = b'samerevs'
3122 DELTAREUSESAMEREVS = b'samerevs'
3125 DELTAREUSENEVER = b'never'
3123 DELTAREUSENEVER = b'never'
3126
3124
3127 DELTAREUSEFULLADD = b'fulladd'
3125 DELTAREUSEFULLADD = b'fulladd'
3128
3126
3129 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3127 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3130
3128
3131 def clone(
3129 def clone(
3132 self,
3130 self,
3133 tr,
3131 tr,
3134 destrevlog,
3132 destrevlog,
3135 addrevisioncb=None,
3133 addrevisioncb=None,
3136 deltareuse=DELTAREUSESAMEREVS,
3134 deltareuse=DELTAREUSESAMEREVS,
3137 forcedeltabothparents=None,
3135 forcedeltabothparents=None,
3138 sidedata_helpers=None,
3136 sidedata_helpers=None,
3139 ):
3137 ):
3140 """Copy this revlog to another, possibly with format changes.
3138 """Copy this revlog to another, possibly with format changes.
3141
3139
3142 The destination revlog will contain the same revisions and nodes.
3140 The destination revlog will contain the same revisions and nodes.
3143 However, it may not be bit-for-bit identical due to e.g. delta encoding
3141 However, it may not be bit-for-bit identical due to e.g. delta encoding
3144 differences.
3142 differences.
3145
3143
3146 The ``deltareuse`` argument control how deltas from the existing revlog
3144 The ``deltareuse`` argument control how deltas from the existing revlog
3147 are preserved in the destination revlog. The argument can have the
3145 are preserved in the destination revlog. The argument can have the
3148 following values:
3146 following values:
3149
3147
3150 DELTAREUSEALWAYS
3148 DELTAREUSEALWAYS
3151 Deltas will always be reused (if possible), even if the destination
3149 Deltas will always be reused (if possible), even if the destination
3152 revlog would not select the same revisions for the delta. This is the
3150 revlog would not select the same revisions for the delta. This is the
3153 fastest mode of operation.
3151 fastest mode of operation.
3154 DELTAREUSESAMEREVS
3152 DELTAREUSESAMEREVS
3155 Deltas will be reused if the destination revlog would pick the same
3153 Deltas will be reused if the destination revlog would pick the same
3156 revisions for the delta. This mode strikes a balance between speed
3154 revisions for the delta. This mode strikes a balance between speed
3157 and optimization.
3155 and optimization.
3158 DELTAREUSENEVER
3156 DELTAREUSENEVER
3159 Deltas will never be reused. This is the slowest mode of execution.
3157 Deltas will never be reused. This is the slowest mode of execution.
3160 This mode can be used to recompute deltas (e.g. if the diff/delta
3158 This mode can be used to recompute deltas (e.g. if the diff/delta
3161 algorithm changes).
3159 algorithm changes).
3162 DELTAREUSEFULLADD
3160 DELTAREUSEFULLADD
3163 Revision will be re-added as if their were new content. This is
3161 Revision will be re-added as if their were new content. This is
3164 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3162 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3165 eg: large file detection and handling.
3163 eg: large file detection and handling.
3166
3164
3167 Delta computation can be slow, so the choice of delta reuse policy can
3165 Delta computation can be slow, so the choice of delta reuse policy can
3168 significantly affect run time.
3166 significantly affect run time.
3169
3167
3170 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3168 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3171 two extremes. Deltas will be reused if they are appropriate. But if the
3169 two extremes. Deltas will be reused if they are appropriate. But if the
3172 delta could choose a better revision, it will do so. This means if you
3170 delta could choose a better revision, it will do so. This means if you
3173 are converting a non-generaldelta revlog to a generaldelta revlog,
3171 are converting a non-generaldelta revlog to a generaldelta revlog,
3174 deltas will be recomputed if the delta's parent isn't a parent of the
3172 deltas will be recomputed if the delta's parent isn't a parent of the
3175 revision.
3173 revision.
3176
3174
3177 In addition to the delta policy, the ``forcedeltabothparents``
3175 In addition to the delta policy, the ``forcedeltabothparents``
3178 argument controls whether to force compute deltas against both parents
3176 argument controls whether to force compute deltas against both parents
3179 for merges. By default, the current default is used.
3177 for merges. By default, the current default is used.
3180
3178
3181 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3179 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3182 `sidedata_helpers`.
3180 `sidedata_helpers`.
3183 """
3181 """
3184 if deltareuse not in self.DELTAREUSEALL:
3182 if deltareuse not in self.DELTAREUSEALL:
3185 raise ValueError(
3183 raise ValueError(
3186 _(b'value for deltareuse invalid: %s') % deltareuse
3184 _(b'value for deltareuse invalid: %s') % deltareuse
3187 )
3185 )
3188
3186
3189 if len(destrevlog):
3187 if len(destrevlog):
3190 raise ValueError(_(b'destination revlog is not empty'))
3188 raise ValueError(_(b'destination revlog is not empty'))
3191
3189
3192 if getattr(self, 'filteredrevs', None):
3190 if getattr(self, 'filteredrevs', None):
3193 raise ValueError(_(b'source revlog has filtered revisions'))
3191 raise ValueError(_(b'source revlog has filtered revisions'))
3194 if getattr(destrevlog, 'filteredrevs', None):
3192 if getattr(destrevlog, 'filteredrevs', None):
3195 raise ValueError(_(b'destination revlog has filtered revisions'))
3193 raise ValueError(_(b'destination revlog has filtered revisions'))
3196
3194
3197 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3195 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3198 # if possible.
3196 # if possible.
3199 oldlazydelta = destrevlog._lazydelta
3197 oldlazydelta = destrevlog._lazydelta
3200 oldlazydeltabase = destrevlog._lazydeltabase
3198 oldlazydeltabase = destrevlog._lazydeltabase
3201 oldamd = destrevlog._deltabothparents
3199 oldamd = destrevlog._deltabothparents
3202
3200
3203 try:
3201 try:
3204 if deltareuse == self.DELTAREUSEALWAYS:
3202 if deltareuse == self.DELTAREUSEALWAYS:
3205 destrevlog._lazydeltabase = True
3203 destrevlog._lazydeltabase = True
3206 destrevlog._lazydelta = True
3204 destrevlog._lazydelta = True
3207 elif deltareuse == self.DELTAREUSESAMEREVS:
3205 elif deltareuse == self.DELTAREUSESAMEREVS:
3208 destrevlog._lazydeltabase = False
3206 destrevlog._lazydeltabase = False
3209 destrevlog._lazydelta = True
3207 destrevlog._lazydelta = True
3210 elif deltareuse == self.DELTAREUSENEVER:
3208 elif deltareuse == self.DELTAREUSENEVER:
3211 destrevlog._lazydeltabase = False
3209 destrevlog._lazydeltabase = False
3212 destrevlog._lazydelta = False
3210 destrevlog._lazydelta = False
3213
3211
3214 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3212 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3215
3213
3216 with self.reading():
3214 with self.reading():
3217 self._clone(
3215 self._clone(
3218 tr,
3216 tr,
3219 destrevlog,
3217 destrevlog,
3220 addrevisioncb,
3218 addrevisioncb,
3221 deltareuse,
3219 deltareuse,
3222 forcedeltabothparents,
3220 forcedeltabothparents,
3223 sidedata_helpers,
3221 sidedata_helpers,
3224 )
3222 )
3225
3223
3226 finally:
3224 finally:
3227 destrevlog._lazydelta = oldlazydelta
3225 destrevlog._lazydelta = oldlazydelta
3228 destrevlog._lazydeltabase = oldlazydeltabase
3226 destrevlog._lazydeltabase = oldlazydeltabase
3229 destrevlog._deltabothparents = oldamd
3227 destrevlog._deltabothparents = oldamd
3230
3228
3231 def _clone(
3229 def _clone(
3232 self,
3230 self,
3233 tr,
3231 tr,
3234 destrevlog,
3232 destrevlog,
3235 addrevisioncb,
3233 addrevisioncb,
3236 deltareuse,
3234 deltareuse,
3237 forcedeltabothparents,
3235 forcedeltabothparents,
3238 sidedata_helpers,
3236 sidedata_helpers,
3239 ):
3237 ):
3240 """perform the core duty of `revlog.clone` after parameter processing"""
3238 """perform the core duty of `revlog.clone` after parameter processing"""
3241 write_debug = None
3239 write_debug = None
3242 if self._debug_delta:
3240 if self._debug_delta:
3243 write_debug = tr._report
3241 write_debug = tr._report
3244 deltacomputer = deltautil.deltacomputer(
3242 deltacomputer = deltautil.deltacomputer(
3245 destrevlog,
3243 destrevlog,
3246 write_debug=write_debug,
3244 write_debug=write_debug,
3247 )
3245 )
3248 index = self.index
3246 index = self.index
3249 for rev in self:
3247 for rev in self:
3250 entry = index[rev]
3248 entry = index[rev]
3251
3249
3252 # Some classes override linkrev to take filtered revs into
3250 # Some classes override linkrev to take filtered revs into
3253 # account. Use raw entry from index.
3251 # account. Use raw entry from index.
3254 flags = entry[0] & 0xFFFF
3252 flags = entry[0] & 0xFFFF
3255 linkrev = entry[4]
3253 linkrev = entry[4]
3256 p1 = index[entry[5]][7]
3254 p1 = index[entry[5]][7]
3257 p2 = index[entry[6]][7]
3255 p2 = index[entry[6]][7]
3258 node = entry[7]
3256 node = entry[7]
3259
3257
3260 # (Possibly) reuse the delta from the revlog if allowed and
3258 # (Possibly) reuse the delta from the revlog if allowed and
3261 # the revlog chunk is a delta.
3259 # the revlog chunk is a delta.
3262 cachedelta = None
3260 cachedelta = None
3263 rawtext = None
3261 rawtext = None
3264 if deltareuse == self.DELTAREUSEFULLADD:
3262 if deltareuse == self.DELTAREUSEFULLADD:
3265 text = self._revisiondata(rev)
3263 text = self._revisiondata(rev)
3266 sidedata = self.sidedata(rev)
3264 sidedata = self.sidedata(rev)
3267
3265
3268 if sidedata_helpers is not None:
3266 if sidedata_helpers is not None:
3269 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3267 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3270 self, sidedata_helpers, sidedata, rev
3268 self, sidedata_helpers, sidedata, rev
3271 )
3269 )
3272 flags = flags | new_flags[0] & ~new_flags[1]
3270 flags = flags | new_flags[0] & ~new_flags[1]
3273
3271
3274 destrevlog.addrevision(
3272 destrevlog.addrevision(
3275 text,
3273 text,
3276 tr,
3274 tr,
3277 linkrev,
3275 linkrev,
3278 p1,
3276 p1,
3279 p2,
3277 p2,
3280 cachedelta=cachedelta,
3278 cachedelta=cachedelta,
3281 node=node,
3279 node=node,
3282 flags=flags,
3280 flags=flags,
3283 deltacomputer=deltacomputer,
3281 deltacomputer=deltacomputer,
3284 sidedata=sidedata,
3282 sidedata=sidedata,
3285 )
3283 )
3286 else:
3284 else:
3287 if destrevlog._lazydelta:
3285 if destrevlog._lazydelta:
3288 dp = self.deltaparent(rev)
3286 dp = self.deltaparent(rev)
3289 if dp != nullrev:
3287 if dp != nullrev:
3290 cachedelta = (dp, bytes(self._chunk(rev)))
3288 cachedelta = (dp, bytes(self._chunk(rev)))
3291
3289
3292 sidedata = None
3290 sidedata = None
3293 if not cachedelta:
3291 if not cachedelta:
3294 rawtext = self._revisiondata(rev)
3292 rawtext = self._revisiondata(rev)
3295 sidedata = self.sidedata(rev)
3293 sidedata = self.sidedata(rev)
3296 if sidedata is None:
3294 if sidedata is None:
3297 sidedata = self.sidedata(rev)
3295 sidedata = self.sidedata(rev)
3298
3296
3299 if sidedata_helpers is not None:
3297 if sidedata_helpers is not None:
3300 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3298 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3301 self, sidedata_helpers, sidedata, rev
3299 self, sidedata_helpers, sidedata, rev
3302 )
3300 )
3303 flags = flags | new_flags[0] & ~new_flags[1]
3301 flags = flags | new_flags[0] & ~new_flags[1]
3304
3302
3305 with destrevlog._writing(tr):
3303 with destrevlog._writing(tr):
3306 destrevlog._addrevision(
3304 destrevlog._addrevision(
3307 node,
3305 node,
3308 rawtext,
3306 rawtext,
3309 tr,
3307 tr,
3310 linkrev,
3308 linkrev,
3311 p1,
3309 p1,
3312 p2,
3310 p2,
3313 flags,
3311 flags,
3314 cachedelta,
3312 cachedelta,
3315 deltacomputer=deltacomputer,
3313 deltacomputer=deltacomputer,
3316 sidedata=sidedata,
3314 sidedata=sidedata,
3317 )
3315 )
3318
3316
3319 if addrevisioncb:
3317 if addrevisioncb:
3320 addrevisioncb(self, rev, node)
3318 addrevisioncb(self, rev, node)
3321
3319
3322 def censorrevision(self, tr, censornode, tombstone=b''):
3320 def censorrevision(self, tr, censornode, tombstone=b''):
3323 if self._format_version == REVLOGV0:
3321 if self._format_version == REVLOGV0:
3324 raise error.RevlogError(
3322 raise error.RevlogError(
3325 _(b'cannot censor with version %d revlogs')
3323 _(b'cannot censor with version %d revlogs')
3326 % self._format_version
3324 % self._format_version
3327 )
3325 )
3328 elif self._format_version == REVLOGV1:
3326 elif self._format_version == REVLOGV1:
3329 rewrite.v1_censor(self, tr, censornode, tombstone)
3327 rewrite.v1_censor(self, tr, censornode, tombstone)
3330 else:
3328 else:
3331 rewrite.v2_censor(self, tr, censornode, tombstone)
3329 rewrite.v2_censor(self, tr, censornode, tombstone)
3332
3330
3333 def verifyintegrity(self, state):
3331 def verifyintegrity(self, state):
3334 """Verifies the integrity of the revlog.
3332 """Verifies the integrity of the revlog.
3335
3333
3336 Yields ``revlogproblem`` instances describing problems that are
3334 Yields ``revlogproblem`` instances describing problems that are
3337 found.
3335 found.
3338 """
3336 """
3339 dd, di = self.checksize()
3337 dd, di = self.checksize()
3340 if dd:
3338 if dd:
3341 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3339 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3342 if di:
3340 if di:
3343 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3341 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3344
3342
3345 version = self._format_version
3343 version = self._format_version
3346
3344
3347 # The verifier tells us what version revlog we should be.
3345 # The verifier tells us what version revlog we should be.
3348 if version != state[b'expectedversion']:
3346 if version != state[b'expectedversion']:
3349 yield revlogproblem(
3347 yield revlogproblem(
3350 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3348 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3351 % (self.display_id, version, state[b'expectedversion'])
3349 % (self.display_id, version, state[b'expectedversion'])
3352 )
3350 )
3353
3351
3354 state[b'skipread'] = set()
3352 state[b'skipread'] = set()
3355 state[b'safe_renamed'] = set()
3353 state[b'safe_renamed'] = set()
3356
3354
3357 for rev in self:
3355 for rev in self:
3358 node = self.node(rev)
3356 node = self.node(rev)
3359
3357
3360 # Verify contents. 4 cases to care about:
3358 # Verify contents. 4 cases to care about:
3361 #
3359 #
3362 # common: the most common case
3360 # common: the most common case
3363 # rename: with a rename
3361 # rename: with a rename
3364 # meta: file content starts with b'\1\n', the metadata
3362 # meta: file content starts with b'\1\n', the metadata
3365 # header defined in filelog.py, but without a rename
3363 # header defined in filelog.py, but without a rename
3366 # ext: content stored externally
3364 # ext: content stored externally
3367 #
3365 #
3368 # More formally, their differences are shown below:
3366 # More formally, their differences are shown below:
3369 #
3367 #
3370 # | common | rename | meta | ext
3368 # | common | rename | meta | ext
3371 # -------------------------------------------------------
3369 # -------------------------------------------------------
3372 # flags() | 0 | 0 | 0 | not 0
3370 # flags() | 0 | 0 | 0 | not 0
3373 # renamed() | False | True | False | ?
3371 # renamed() | False | True | False | ?
3374 # rawtext[0:2]=='\1\n'| False | True | True | ?
3372 # rawtext[0:2]=='\1\n'| False | True | True | ?
3375 #
3373 #
3376 # "rawtext" means the raw text stored in revlog data, which
3374 # "rawtext" means the raw text stored in revlog data, which
3377 # could be retrieved by "rawdata(rev)". "text"
3375 # could be retrieved by "rawdata(rev)". "text"
3378 # mentioned below is "revision(rev)".
3376 # mentioned below is "revision(rev)".
3379 #
3377 #
3380 # There are 3 different lengths stored physically:
3378 # There are 3 different lengths stored physically:
3381 # 1. L1: rawsize, stored in revlog index
3379 # 1. L1: rawsize, stored in revlog index
3382 # 2. L2: len(rawtext), stored in revlog data
3380 # 2. L2: len(rawtext), stored in revlog data
3383 # 3. L3: len(text), stored in revlog data if flags==0, or
3381 # 3. L3: len(text), stored in revlog data if flags==0, or
3384 # possibly somewhere else if flags!=0
3382 # possibly somewhere else if flags!=0
3385 #
3383 #
3386 # L1 should be equal to L2. L3 could be different from them.
3384 # L1 should be equal to L2. L3 could be different from them.
3387 # "text" may or may not affect commit hash depending on flag
3385 # "text" may or may not affect commit hash depending on flag
3388 # processors (see flagutil.addflagprocessor).
3386 # processors (see flagutil.addflagprocessor).
3389 #
3387 #
3390 # | common | rename | meta | ext
3388 # | common | rename | meta | ext
3391 # -------------------------------------------------
3389 # -------------------------------------------------
3392 # rawsize() | L1 | L1 | L1 | L1
3390 # rawsize() | L1 | L1 | L1 | L1
3393 # size() | L1 | L2-LM | L1(*) | L1 (?)
3391 # size() | L1 | L2-LM | L1(*) | L1 (?)
3394 # len(rawtext) | L2 | L2 | L2 | L2
3392 # len(rawtext) | L2 | L2 | L2 | L2
3395 # len(text) | L2 | L2 | L2 | L3
3393 # len(text) | L2 | L2 | L2 | L3
3396 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3394 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3397 #
3395 #
3398 # LM: length of metadata, depending on rawtext
3396 # LM: length of metadata, depending on rawtext
3399 # (*): not ideal, see comment in filelog.size
3397 # (*): not ideal, see comment in filelog.size
3400 # (?): could be "- len(meta)" if the resolved content has
3398 # (?): could be "- len(meta)" if the resolved content has
3401 # rename metadata
3399 # rename metadata
3402 #
3400 #
3403 # Checks needed to be done:
3401 # Checks needed to be done:
3404 # 1. length check: L1 == L2, in all cases.
3402 # 1. length check: L1 == L2, in all cases.
3405 # 2. hash check: depending on flag processor, we may need to
3403 # 2. hash check: depending on flag processor, we may need to
3406 # use either "text" (external), or "rawtext" (in revlog).
3404 # use either "text" (external), or "rawtext" (in revlog).
3407
3405
3408 try:
3406 try:
3409 skipflags = state.get(b'skipflags', 0)
3407 skipflags = state.get(b'skipflags', 0)
3410 if skipflags:
3408 if skipflags:
3411 skipflags &= self.flags(rev)
3409 skipflags &= self.flags(rev)
3412
3410
3413 _verify_revision(self, skipflags, state, node)
3411 _verify_revision(self, skipflags, state, node)
3414
3412
3415 l1 = self.rawsize(rev)
3413 l1 = self.rawsize(rev)
3416 l2 = len(self.rawdata(node))
3414 l2 = len(self.rawdata(node))
3417
3415
3418 if l1 != l2:
3416 if l1 != l2:
3419 yield revlogproblem(
3417 yield revlogproblem(
3420 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3418 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3421 node=node,
3419 node=node,
3422 )
3420 )
3423
3421
3424 except error.CensoredNodeError:
3422 except error.CensoredNodeError:
3425 if state[b'erroroncensored']:
3423 if state[b'erroroncensored']:
3426 yield revlogproblem(
3424 yield revlogproblem(
3427 error=_(b'censored file data'), node=node
3425 error=_(b'censored file data'), node=node
3428 )
3426 )
3429 state[b'skipread'].add(node)
3427 state[b'skipread'].add(node)
3430 except Exception as e:
3428 except Exception as e:
3431 yield revlogproblem(
3429 yield revlogproblem(
3432 error=_(b'unpacking %s: %s')
3430 error=_(b'unpacking %s: %s')
3433 % (short(node), stringutil.forcebytestr(e)),
3431 % (short(node), stringutil.forcebytestr(e)),
3434 node=node,
3432 node=node,
3435 )
3433 )
3436 state[b'skipread'].add(node)
3434 state[b'skipread'].add(node)
3437
3435
3438 def storageinfo(
3436 def storageinfo(
3439 self,
3437 self,
3440 exclusivefiles=False,
3438 exclusivefiles=False,
3441 sharedfiles=False,
3439 sharedfiles=False,
3442 revisionscount=False,
3440 revisionscount=False,
3443 trackedsize=False,
3441 trackedsize=False,
3444 storedsize=False,
3442 storedsize=False,
3445 ):
3443 ):
3446 d = {}
3444 d = {}
3447
3445
3448 if exclusivefiles:
3446 if exclusivefiles:
3449 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3447 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3450 if not self._inline:
3448 if not self._inline:
3451 d[b'exclusivefiles'].append((self.opener, self._datafile))
3449 d[b'exclusivefiles'].append((self.opener, self._datafile))
3452
3450
3453 if sharedfiles:
3451 if sharedfiles:
3454 d[b'sharedfiles'] = []
3452 d[b'sharedfiles'] = []
3455
3453
3456 if revisionscount:
3454 if revisionscount:
3457 d[b'revisionscount'] = len(self)
3455 d[b'revisionscount'] = len(self)
3458
3456
3459 if trackedsize:
3457 if trackedsize:
3460 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3458 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3461
3459
3462 if storedsize:
3460 if storedsize:
3463 d[b'storedsize'] = sum(
3461 d[b'storedsize'] = sum(
3464 self.opener.stat(path).st_size for path in self.files()
3462 self.opener.stat(path).st_size for path in self.files()
3465 )
3463 )
3466
3464
3467 return d
3465 return d
3468
3466
3469 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3467 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3470 if not self.hassidedata:
3468 if not self.hassidedata:
3471 return
3469 return
3472 # revlog formats with sidedata support does not support inline
3470 # revlog formats with sidedata support does not support inline
3473 assert not self._inline
3471 assert not self._inline
3474 if not helpers[1] and not helpers[2]:
3472 if not helpers[1] and not helpers[2]:
3475 # Nothing to generate or remove
3473 # Nothing to generate or remove
3476 return
3474 return
3477
3475
3478 new_entries = []
3476 new_entries = []
3479 # append the new sidedata
3477 # append the new sidedata
3480 with self._writing(transaction):
3478 with self._writing(transaction):
3481 ifh, dfh, sdfh = self._writinghandles
3479 ifh, dfh, sdfh = self._writinghandles
3482 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3480 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3483
3481
3484 current_offset = sdfh.tell()
3482 current_offset = sdfh.tell()
3485 for rev in range(startrev, endrev + 1):
3483 for rev in range(startrev, endrev + 1):
3486 entry = self.index[rev]
3484 entry = self.index[rev]
3487 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3485 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3488 store=self,
3486 store=self,
3489 sidedata_helpers=helpers,
3487 sidedata_helpers=helpers,
3490 sidedata={},
3488 sidedata={},
3491 rev=rev,
3489 rev=rev,
3492 )
3490 )
3493
3491
3494 serialized_sidedata = sidedatautil.serialize_sidedata(
3492 serialized_sidedata = sidedatautil.serialize_sidedata(
3495 new_sidedata
3493 new_sidedata
3496 )
3494 )
3497
3495
3498 sidedata_compression_mode = COMP_MODE_INLINE
3496 sidedata_compression_mode = COMP_MODE_INLINE
3499 if serialized_sidedata and self.hassidedata:
3497 if serialized_sidedata and self.hassidedata:
3500 sidedata_compression_mode = COMP_MODE_PLAIN
3498 sidedata_compression_mode = COMP_MODE_PLAIN
3501 h, comp_sidedata = self.compress(serialized_sidedata)
3499 h, comp_sidedata = self.compress(serialized_sidedata)
3502 if (
3500 if (
3503 h != b'u'
3501 h != b'u'
3504 and comp_sidedata[0] != b'\0'
3502 and comp_sidedata[0] != b'\0'
3505 and len(comp_sidedata) < len(serialized_sidedata)
3503 and len(comp_sidedata) < len(serialized_sidedata)
3506 ):
3504 ):
3507 assert not h
3505 assert not h
3508 if (
3506 if (
3509 comp_sidedata[0]
3507 comp_sidedata[0]
3510 == self._docket.default_compression_header
3508 == self._docket.default_compression_header
3511 ):
3509 ):
3512 sidedata_compression_mode = COMP_MODE_DEFAULT
3510 sidedata_compression_mode = COMP_MODE_DEFAULT
3513 serialized_sidedata = comp_sidedata
3511 serialized_sidedata = comp_sidedata
3514 else:
3512 else:
3515 sidedata_compression_mode = COMP_MODE_INLINE
3513 sidedata_compression_mode = COMP_MODE_INLINE
3516 serialized_sidedata = comp_sidedata
3514 serialized_sidedata = comp_sidedata
3517 if entry[8] != 0 or entry[9] != 0:
3515 if entry[8] != 0 or entry[9] != 0:
3518 # rewriting entries that already have sidedata is not
3516 # rewriting entries that already have sidedata is not
3519 # supported yet, because it introduces garbage data in the
3517 # supported yet, because it introduces garbage data in the
3520 # revlog.
3518 # revlog.
3521 msg = b"rewriting existing sidedata is not supported yet"
3519 msg = b"rewriting existing sidedata is not supported yet"
3522 raise error.Abort(msg)
3520 raise error.Abort(msg)
3523
3521
3524 # Apply (potential) flags to add and to remove after running
3522 # Apply (potential) flags to add and to remove after running
3525 # the sidedata helpers
3523 # the sidedata helpers
3526 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3524 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3527 entry_update = (
3525 entry_update = (
3528 current_offset,
3526 current_offset,
3529 len(serialized_sidedata),
3527 len(serialized_sidedata),
3530 new_offset_flags,
3528 new_offset_flags,
3531 sidedata_compression_mode,
3529 sidedata_compression_mode,
3532 )
3530 )
3533
3531
3534 # the sidedata computation might have move the file cursors around
3532 # the sidedata computation might have move the file cursors around
3535 sdfh.seek(current_offset, os.SEEK_SET)
3533 sdfh.seek(current_offset, os.SEEK_SET)
3536 sdfh.write(serialized_sidedata)
3534 sdfh.write(serialized_sidedata)
3537 new_entries.append(entry_update)
3535 new_entries.append(entry_update)
3538 current_offset += len(serialized_sidedata)
3536 current_offset += len(serialized_sidedata)
3539 self._docket.sidedata_end = sdfh.tell()
3537 self._docket.sidedata_end = sdfh.tell()
3540
3538
3541 # rewrite the new index entries
3539 # rewrite the new index entries
3542 ifh.seek(startrev * self.index.entry_size)
3540 ifh.seek(startrev * self.index.entry_size)
3543 for i, e in enumerate(new_entries):
3541 for i, e in enumerate(new_entries):
3544 rev = startrev + i
3542 rev = startrev + i
3545 self.index.replace_sidedata_info(rev, *e)
3543 self.index.replace_sidedata_info(rev, *e)
3546 packed = self.index.entry_binary(rev)
3544 packed = self.index.entry_binary(rev)
3547 if rev == 0 and self._docket is None:
3545 if rev == 0 and self._docket is None:
3548 header = self._format_flags | self._format_version
3546 header = self._format_flags | self._format_version
3549 header = self.index.pack_header(header)
3547 header = self.index.pack_header(header)
3550 packed = header + packed
3548 packed = header + packed
3551 ifh.write(packed)
3549 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now