##// END OF EJS Templates
revlog: move censor logic out of censor extension...
Gregory Szorc -
r39814:a6b3c4c1 default
parent child Browse files
Show More
@@ -1,187 +1,99 b''
1 1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
2 2 #
3 3 # This extension enables removal of file content at a given revision,
4 4 # rewriting the data/metadata of successive revisions to preserve revision log
5 5 # integrity.
6 6
7 7 """erase file content at a given revision
8 8
9 9 The censor command instructs Mercurial to erase all content of a file at a given
10 10 revision *without updating the changeset hash.* This allows existing history to
11 11 remain valid while preventing future clones/pulls from receiving the erased
12 12 data.
13 13
14 14 Typical uses for censor are due to security or legal requirements, including::
15 15
16 16 * Passwords, private keys, cryptographic material
17 17 * Licensed data/code/libraries for which the license has expired
18 18 * Personally Identifiable Information or other private data
19 19
20 20 Censored nodes can interrupt mercurial's typical operation whenever the excised
21 21 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
22 22 simply fail when asked to produce censored data. Others, like ``hg verify`` and
23 23 ``hg update``, must be capable of tolerating censored data to continue to
24 24 function in a meaningful way. Such commands only tolerate censored file
25 25 revisions if they are allowed by the "censor.policy=ignore" config option.
26 26 """
27 27
28 28 from __future__ import absolute_import
29 29
30 30 from mercurial.i18n import _
31 31 from mercurial.node import short
32 32
33 33 from mercurial import (
34 34 error,
35 pycompat,
36 35 registrar,
37 revlog,
38 36 scmutil,
39 util,
40 37 )
41 38
42 39 cmdtable = {}
43 40 command = registrar.command(cmdtable)
44 41 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
45 42 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
46 43 # be specifying the version(s) of Mercurial they are tested with, or
47 44 # leave the attribute unspecified.
48 45 testedwith = 'ships-with-hg-core'
49 46
50 47 @command('censor',
51 48 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
52 49 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
53 50 _('-r REV [-t TEXT] [FILE]'))
54 51 def censor(ui, repo, path, rev='', tombstone='', **opts):
55 52 with repo.wlock(), repo.lock():
56 53 return _docensor(ui, repo, path, rev, tombstone, **opts)
57 54
58 55 def _docensor(ui, repo, path, rev='', tombstone='', **opts):
59 56 if not path:
60 57 raise error.Abort(_('must specify file path to censor'))
61 58 if not rev:
62 59 raise error.Abort(_('must specify revision to censor'))
63 60
64 61 wctx = repo[None]
65 62
66 63 m = scmutil.match(wctx, (path,))
67 64 if m.anypats() or len(m.files()) != 1:
68 65 raise error.Abort(_('can only specify an explicit filename'))
69 66 path = m.files()[0]
70 67 flog = repo.file(path)
71 68 if not len(flog):
72 69 raise error.Abort(_('cannot censor file with no history'))
73 70
74 71 rev = scmutil.revsingle(repo, rev, rev).rev()
75 72 try:
76 73 ctx = repo[rev]
77 74 except KeyError:
78 75 raise error.Abort(_('invalid revision identifier %s') % rev)
79 76
80 77 try:
81 78 fctx = ctx.filectx(path)
82 79 except error.LookupError:
83 80 raise error.Abort(_('file does not exist at revision %s') % rev)
84 81
85 82 fnode = fctx.filenode()
86 83 heads = []
87 84 for headnode in repo.heads():
88 85 hc = repo[headnode]
89 86 if path in hc and hc.filenode(path) == fnode:
90 87 heads.append(hc)
91 88 if heads:
92 89 headlist = ', '.join([short(c.node()) for c in heads])
93 90 raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
94 91 hint=_('clean/delete and commit first'))
95 92
96 93 wp = wctx.parents()
97 94 if ctx.node() in [p.node() for p in wp]:
98 95 raise error.Abort(_('cannot censor working directory'),
99 96 hint=_('clean/delete/update first'))
100 97
101 flogv = flog.version & 0xFFFF
102 if flogv != revlog.REVLOGV1:
103 raise error.Abort(
104 _('censor does not support revlog version %d') % (flogv,))
105
106 tombstone = revlog.packmeta({"censored": tombstone}, "")
107
108 crev = fctx.filerev()
109
110 if len(tombstone) > flog.rawsize(crev):
111 raise error.Abort(_(
112 'censor tombstone must be no longer than censored data'))
113
114 # Using two files instead of one makes it easy to rewrite entry-by-entry
115 idxread = repo.svfs(flog.indexfile, 'r')
116 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
117 if flog.version & revlog.FLAG_INLINE_DATA:
118 dataread, datawrite = idxread, idxwrite
119 else:
120 dataread = repo.svfs(flog.datafile, 'r')
121 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
122
123 # Copy all revlog data up to the entry to be censored.
124 rio = revlog.revlogio()
125 offset = flog.start(crev)
126
127 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
128 idxwrite.write(chunk)
129 for chunk in util.filechunkiter(dataread, limit=offset):
130 datawrite.write(chunk)
131
132 def rewriteindex(r, newoffs, newdata=None):
133 """Rewrite the index entry with a new data offset and optional new data.
134
135 The newdata argument, if given, is a tuple of three positive integers:
136 (new compressed, new uncompressed, added flag bits).
137 """
138 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
139 flags = revlog.gettype(offlags)
140 if newdata:
141 comp, uncomp, nflags = newdata
142 flags |= nflags
143 offlags = revlog.offset_type(newoffs, flags)
144 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
145 idxwrite.write(rio.packentry(e, None, flog.version, r))
146 idxread.seek(rio.size, 1)
147
148 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
149 """Write the given full text to the filelog with the given data offset.
150
151 Returns:
152 The integer number of data bytes written, for tracking data offsets.
153 """
154 flag, compdata = flog.compress(data)
155 newcomp = len(flag) + len(compdata)
156 rewriteindex(r, offs, (newcomp, len(data), nflags))
157 datawrite.write(flag)
158 datawrite.write(compdata)
159 dataread.seek(flog.length(r), 1)
160 return newcomp
161
162 # Rewrite censored revlog entry with (padded) tombstone data.
163 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
164 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
165
166 # Rewrite all following filelog revisions fixing up offsets and deltas.
167 for srev in pycompat.xrange(crev + 1, len(flog)):
168 if crev in flog.parentrevs(srev):
169 # Immediate children of censored node must be re-added as fulltext.
170 try:
171 revdata = flog.revision(srev)
172 except error.CensoredNodeError as e:
173 revdata = e.tombstone
174 dlen = rewrite(srev, offset, revdata)
175 else:
176 # Copy any other revision data verbatim after fixing up the offset.
177 rewriteindex(srev, offset)
178 dlen = flog.length(srev)
179 for chunk in util.filechunkiter(dataread, limit=dlen):
180 datawrite.write(chunk)
181 offset += dlen
182
183 idxread.close()
184 idxwrite.close()
185 if dataread is not idxread:
186 dataread.close()
187 datawrite.close()
98 with repo.transaction(b'censor') as tr:
99 flog.censorrevision(tr, fnode, tombstone=tombstone)
@@ -1,278 +1,281 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from . import (
11 11 error,
12 12 repository,
13 13 revlog,
14 14 )
15 15 from .utils import (
16 16 interfaceutil,
17 17 )
18 18
19 19 @interfaceutil.implementer(repository.ifilestorage)
20 20 class filelog(object):
21 21 def __init__(self, opener, path):
22 22 self._revlog = revlog.revlog(opener,
23 23 '/'.join(('data', path + '.i')),
24 24 censorable=True)
25 25 # full name of the user visible file, relative to the repository root
26 26 self.filename = path
27 27 self.index = self._revlog.index
28 28 self.version = self._revlog.version
29 29 self._generaldelta = self._revlog._generaldelta
30 30
31 31 def __len__(self):
32 32 return len(self._revlog)
33 33
34 34 def __iter__(self):
35 35 return self._revlog.__iter__()
36 36
37 37 def revs(self, start=0, stop=None):
38 38 return self._revlog.revs(start=start, stop=stop)
39 39
40 40 def parents(self, node):
41 41 return self._revlog.parents(node)
42 42
43 43 def parentrevs(self, rev):
44 44 return self._revlog.parentrevs(rev)
45 45
46 46 def rev(self, node):
47 47 return self._revlog.rev(node)
48 48
49 49 def node(self, rev):
50 50 return self._revlog.node(rev)
51 51
52 52 def lookup(self, node):
53 53 return self._revlog.lookup(node)
54 54
55 55 def linkrev(self, rev):
56 56 return self._revlog.linkrev(rev)
57 57
58 58 def flags(self, rev):
59 59 return self._revlog.flags(rev)
60 60
61 61 def commonancestorsheads(self, node1, node2):
62 62 return self._revlog.commonancestorsheads(node1, node2)
63 63
64 64 def descendants(self, revs):
65 65 return self._revlog.descendants(revs)
66 66
67 67 def headrevs(self):
68 68 return self._revlog.headrevs()
69 69
70 70 def heads(self, start=None, stop=None):
71 71 return self._revlog.heads(start, stop)
72 72
73 73 def children(self, node):
74 74 return self._revlog.children(node)
75 75
76 76 def deltaparent(self, rev):
77 77 return self._revlog.deltaparent(rev)
78 78
79 79 def iscensored(self, rev):
80 80 return self._revlog.iscensored(rev)
81 81
82 82 def rawsize(self, rev):
83 83 return self._revlog.rawsize(rev)
84 84
85 85 def checkhash(self, text, node, p1=None, p2=None, rev=None):
86 86 return self._revlog.checkhash(text, node, p1=p1, p2=p2, rev=rev)
87 87
88 88 def revision(self, node, _df=None, raw=False):
89 89 return self._revlog.revision(node, _df=_df, raw=raw)
90 90
91 91 def revdiff(self, rev1, rev2):
92 92 return self._revlog.revdiff(rev1, rev2)
93 93
94 94 def emitrevisiondeltas(self, requests):
95 95 return self._revlog.emitrevisiondeltas(requests)
96 96
97 97 def addrevision(self, revisiondata, transaction, linkrev, p1, p2,
98 98 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
99 99 cachedelta=None):
100 100 return self._revlog.addrevision(revisiondata, transaction, linkrev,
101 101 p1, p2, node=node, flags=flags,
102 102 cachedelta=cachedelta)
103 103
104 104 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
105 105 return self._revlog.addgroup(deltas, linkmapper, transaction,
106 106 addrevisioncb=addrevisioncb)
107 107
108 108 def getstrippoint(self, minlink):
109 109 return self._revlog.getstrippoint(minlink)
110 110
111 111 def strip(self, minlink, transaction):
112 112 return self._revlog.strip(minlink, transaction)
113 113
114 def censorrevision(self, tr, node, tombstone=b''):
115 return self._revlog.censorrevision(node, tombstone=tombstone)
116
114 117 def files(self):
115 118 return self._revlog.files()
116 119
117 120 def checksize(self):
118 121 return self._revlog.checksize()
119 122
120 123 def read(self, node):
121 124 t = self.revision(node)
122 125 if not t.startswith('\1\n'):
123 126 return t
124 127 s = t.index('\1\n', 2)
125 128 return t[s + 2:]
126 129
127 130 def add(self, text, meta, transaction, link, p1=None, p2=None):
128 131 if meta or text.startswith('\1\n'):
129 132 text = revlog.packmeta(meta, text)
130 133 return self.addrevision(text, transaction, link, p1, p2)
131 134
132 135 def renamed(self, node):
133 136 if self.parents(node)[0] != revlog.nullid:
134 137 return False
135 138 t = self.revision(node)
136 139 m = revlog.parsemeta(t)[0]
137 140 # copy and copyrev occur in pairs. In rare cases due to bugs,
138 141 # one can occur without the other.
139 142 if m and "copy" in m and "copyrev" in m:
140 143 return (m["copy"], revlog.bin(m["copyrev"]))
141 144 return False
142 145
143 146 def size(self, rev):
144 147 """return the size of a given revision"""
145 148
146 149 # for revisions with renames, we have to go the slow way
147 150 node = self.node(rev)
148 151 if self.renamed(node):
149 152 return len(self.read(node))
150 153 if self.iscensored(rev):
151 154 return 0
152 155
153 156 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
154 157 return self._revlog.size(rev)
155 158
156 159 def cmp(self, node, text):
157 160 """compare text with a given file revision
158 161
159 162 returns True if text is different than what is stored.
160 163 """
161 164
162 165 t = text
163 166 if text.startswith('\1\n'):
164 167 t = '\1\n\1\n' + text
165 168
166 169 samehashes = not self._revlog.cmp(node, t)
167 170 if samehashes:
168 171 return False
169 172
170 173 # censored files compare against the empty file
171 174 if self.iscensored(self.rev(node)):
172 175 return text != ''
173 176
174 177 # renaming a file produces a different hash, even if the data
175 178 # remains unchanged. Check if it's the case (slow):
176 179 if self.renamed(node):
177 180 t2 = self.read(node)
178 181 return t2 != text
179 182
180 183 return True
181 184
182 185 @property
183 186 def filename(self):
184 187 return self._revlog.filename
185 188
186 189 @filename.setter
187 190 def filename(self, value):
188 191 self._revlog.filename = value
189 192
190 193 # TODO these aren't part of the interface and aren't internal methods.
191 194 # Callers should be fixed to not use them.
192 195 @property
193 196 def indexfile(self):
194 197 return self._revlog.indexfile
195 198
196 199 @indexfile.setter
197 200 def indexfile(self, value):
198 201 self._revlog.indexfile = value
199 202
200 203 @property
201 204 def datafile(self):
202 205 return self._revlog.datafile
203 206
204 207 @property
205 208 def opener(self):
206 209 return self._revlog.opener
207 210
208 211 def clone(self, tr, destrevlog, **kwargs):
209 212 if not isinstance(destrevlog, filelog):
210 213 raise error.ProgrammingError('expected filelog to clone()')
211 214
212 215 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
213 216
214 217 def start(self, rev):
215 218 return self._revlog.start(rev)
216 219
217 220 def end(self, rev):
218 221 return self._revlog.end(rev)
219 222
220 223 def length(self, rev):
221 224 return self._revlog.length(rev)
222 225
223 226 def compress(self, data):
224 227 return self._revlog.compress(data)
225 228
226 229 def _addrevision(self, *args, **kwargs):
227 230 return self._revlog._addrevision(*args, **kwargs)
228 231
229 232 class narrowfilelog(filelog):
230 233 """Filelog variation to be used with narrow stores."""
231 234
232 235 def __init__(self, opener, path, narrowmatch):
233 236 super(narrowfilelog, self).__init__(opener, path)
234 237 self._narrowmatch = narrowmatch
235 238
236 239 def renamed(self, node):
237 240 res = super(narrowfilelog, self).renamed(node)
238 241
239 242 # Renames that come from outside the narrowspec are problematic
240 243 # because we may lack the base text for the rename. This can result
241 244 # in code attempting to walk the ancestry or compute a diff
242 245 # encountering a missing revision. We address this by silently
243 246 # removing rename metadata if the source file is outside the
244 247 # narrow spec.
245 248 #
246 249 # A better solution would be to see if the base revision is available,
247 250 # rather than assuming it isn't.
248 251 #
249 252 # An even better solution would be to teach all consumers of rename
250 253 # metadata that the base revision may not be available.
251 254 #
252 255 # TODO consider better ways of doing this.
253 256 if res and not self._narrowmatch(res[0]):
254 257 return None
255 258
256 259 return res
257 260
258 261 def size(self, rev):
259 262 # Because we have a custom renamed() that may lie, we need to call
260 263 # the base renamed() to report accurate results.
261 264 node = self.node(rev)
262 265 if super(narrowfilelog, self).renamed(node):
263 266 return len(self.read(node))
264 267 else:
265 268 return super(narrowfilelog, self).size(rev)
266 269
267 270 def cmp(self, node, text):
268 271 different = super(narrowfilelog, self).cmp(node, text)
269 272
270 273 # Because renamed() may lie, we may get false positives for
271 274 # different content. Check for this by comparing against the original
272 275 # renamed() implementation.
273 276 if different:
274 277 if super(narrowfilelog, self).renamed(node):
275 278 t2 = self.read(node)
276 279 return t2 != text
277 280
278 281 return different
@@ -1,1585 +1,1602 b''
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from . import (
12 12 error,
13 13 )
14 14 from .utils import (
15 15 interfaceutil,
16 16 )
17 17
18 18 # When narrowing is finalized and no longer subject to format changes,
19 19 # we should move this to just "narrow" or similar.
20 20 NARROW_REQUIREMENT = 'narrowhg-experimental'
21 21
22 22 class ipeerconnection(interfaceutil.Interface):
23 23 """Represents a "connection" to a repository.
24 24
25 25 This is the base interface for representing a connection to a repository.
26 26 It holds basic properties and methods applicable to all peer types.
27 27
28 28 This is not a complete interface definition and should not be used
29 29 outside of this module.
30 30 """
31 31 ui = interfaceutil.Attribute("""ui.ui instance""")
32 32
33 33 def url():
34 34 """Returns a URL string representing this peer.
35 35
36 36 Currently, implementations expose the raw URL used to construct the
37 37 instance. It may contain credentials as part of the URL. The
38 38 expectations of the value aren't well-defined and this could lead to
39 39 data leakage.
40 40
41 41 TODO audit/clean consumers and more clearly define the contents of this
42 42 value.
43 43 """
44 44
45 45 def local():
46 46 """Returns a local repository instance.
47 47
48 48 If the peer represents a local repository, returns an object that
49 49 can be used to interface with it. Otherwise returns ``None``.
50 50 """
51 51
52 52 def peer():
53 53 """Returns an object conforming to this interface.
54 54
55 55 Most implementations will ``return self``.
56 56 """
57 57
58 58 def canpush():
59 59 """Returns a boolean indicating if this peer can be pushed to."""
60 60
61 61 def close():
62 62 """Close the connection to this peer.
63 63
64 64 This is called when the peer will no longer be used. Resources
65 65 associated with the peer should be cleaned up.
66 66 """
67 67
68 68 class ipeercapabilities(interfaceutil.Interface):
69 69 """Peer sub-interface related to capabilities."""
70 70
71 71 def capable(name):
72 72 """Determine support for a named capability.
73 73
74 74 Returns ``False`` if capability not supported.
75 75
76 76 Returns ``True`` if boolean capability is supported. Returns a string
77 77 if capability support is non-boolean.
78 78
79 79 Capability strings may or may not map to wire protocol capabilities.
80 80 """
81 81
82 82 def requirecap(name, purpose):
83 83 """Require a capability to be present.
84 84
85 85 Raises a ``CapabilityError`` if the capability isn't present.
86 86 """
87 87
88 88 class ipeercommands(interfaceutil.Interface):
89 89 """Client-side interface for communicating over the wire protocol.
90 90
91 91 This interface is used as a gateway to the Mercurial wire protocol.
92 92 methods commonly call wire protocol commands of the same name.
93 93 """
94 94
95 95 def branchmap():
96 96 """Obtain heads in named branches.
97 97
98 98 Returns a dict mapping branch name to an iterable of nodes that are
99 99 heads on that branch.
100 100 """
101 101
102 102 def capabilities():
103 103 """Obtain capabilities of the peer.
104 104
105 105 Returns a set of string capabilities.
106 106 """
107 107
108 108 def clonebundles():
109 109 """Obtains the clone bundles manifest for the repo.
110 110
111 111 Returns the manifest as unparsed bytes.
112 112 """
113 113
114 114 def debugwireargs(one, two, three=None, four=None, five=None):
115 115 """Used to facilitate debugging of arguments passed over the wire."""
116 116
117 117 def getbundle(source, **kwargs):
118 118 """Obtain remote repository data as a bundle.
119 119
120 120 This command is how the bulk of repository data is transferred from
121 121 the peer to the local repository
122 122
123 123 Returns a generator of bundle data.
124 124 """
125 125
126 126 def heads():
127 127 """Determine all known head revisions in the peer.
128 128
129 129 Returns an iterable of binary nodes.
130 130 """
131 131
132 132 def known(nodes):
133 133 """Determine whether multiple nodes are known.
134 134
135 135 Accepts an iterable of nodes whose presence to check for.
136 136
137 137 Returns an iterable of booleans indicating of the corresponding node
138 138 at that index is known to the peer.
139 139 """
140 140
141 141 def listkeys(namespace):
142 142 """Obtain all keys in a pushkey namespace.
143 143
144 144 Returns an iterable of key names.
145 145 """
146 146
147 147 def lookup(key):
148 148 """Resolve a value to a known revision.
149 149
150 150 Returns a binary node of the resolved revision on success.
151 151 """
152 152
153 153 def pushkey(namespace, key, old, new):
154 154 """Set a value using the ``pushkey`` protocol.
155 155
156 156 Arguments correspond to the pushkey namespace and key to operate on and
157 157 the old and new values for that key.
158 158
159 159 Returns a string with the peer result. The value inside varies by the
160 160 namespace.
161 161 """
162 162
163 163 def stream_out():
164 164 """Obtain streaming clone data.
165 165
166 166 Successful result should be a generator of data chunks.
167 167 """
168 168
169 169 def unbundle(bundle, heads, url):
170 170 """Transfer repository data to the peer.
171 171
172 172 This is how the bulk of data during a push is transferred.
173 173
174 174 Returns the integer number of heads added to the peer.
175 175 """
176 176
177 177 class ipeerlegacycommands(interfaceutil.Interface):
178 178 """Interface for implementing support for legacy wire protocol commands.
179 179
180 180 Wire protocol commands transition to legacy status when they are no longer
181 181 used by modern clients. To facilitate identifying which commands are
182 182 legacy, the interfaces are split.
183 183 """
184 184
185 185 def between(pairs):
186 186 """Obtain nodes between pairs of nodes.
187 187
188 188 ``pairs`` is an iterable of node pairs.
189 189
190 190 Returns an iterable of iterables of nodes corresponding to each
191 191 requested pair.
192 192 """
193 193
194 194 def branches(nodes):
195 195 """Obtain ancestor changesets of specific nodes back to a branch point.
196 196
197 197 For each requested node, the peer finds the first ancestor node that is
198 198 a DAG root or is a merge.
199 199
200 200 Returns an iterable of iterables with the resolved values for each node.
201 201 """
202 202
203 203 def changegroup(nodes, source):
204 204 """Obtain a changegroup with data for descendants of specified nodes."""
205 205
206 206 def changegroupsubset(bases, heads, source):
207 207 pass
208 208
209 209 class ipeercommandexecutor(interfaceutil.Interface):
210 210 """Represents a mechanism to execute remote commands.
211 211
212 212 This is the primary interface for requesting that wire protocol commands
213 213 be executed. Instances of this interface are active in a context manager
214 214 and have a well-defined lifetime. When the context manager exits, all
215 215 outstanding requests are waited on.
216 216 """
217 217
218 218 def callcommand(name, args):
219 219 """Request that a named command be executed.
220 220
221 221 Receives the command name and a dictionary of command arguments.
222 222
223 223 Returns a ``concurrent.futures.Future`` that will resolve to the
224 224 result of that command request. That exact value is left up to
225 225 the implementation and possibly varies by command.
226 226
227 227 Not all commands can coexist with other commands in an executor
228 228 instance: it depends on the underlying wire protocol transport being
229 229 used and the command itself.
230 230
231 231 Implementations MAY call ``sendcommands()`` automatically if the
232 232 requested command can not coexist with other commands in this executor.
233 233
234 234 Implementations MAY call ``sendcommands()`` automatically when the
235 235 future's ``result()`` is called. So, consumers using multiple
236 236 commands with an executor MUST ensure that ``result()`` is not called
237 237 until all command requests have been issued.
238 238 """
239 239
240 240 def sendcommands():
241 241 """Trigger submission of queued command requests.
242 242
243 243 Not all transports submit commands as soon as they are requested to
244 244 run. When called, this method forces queued command requests to be
245 245 issued. It will no-op if all commands have already been sent.
246 246
247 247 When called, no more new commands may be issued with this executor.
248 248 """
249 249
250 250 def close():
251 251 """Signal that this command request is finished.
252 252
253 253 When called, no more new commands may be issued. All outstanding
254 254 commands that have previously been issued are waited on before
255 255 returning. This not only includes waiting for the futures to resolve,
256 256 but also waiting for all response data to arrive. In other words,
257 257 calling this waits for all on-wire state for issued command requests
258 258 to finish.
259 259
260 260 When used as a context manager, this method is called when exiting the
261 261 context manager.
262 262
263 263 This method may call ``sendcommands()`` if there are buffered commands.
264 264 """
265 265
266 266 class ipeerrequests(interfaceutil.Interface):
267 267 """Interface for executing commands on a peer."""
268 268
269 269 def commandexecutor():
270 270 """A context manager that resolves to an ipeercommandexecutor.
271 271
272 272 The object this resolves to can be used to issue command requests
273 273 to the peer.
274 274
275 275 Callers should call its ``callcommand`` method to issue command
276 276 requests.
277 277
278 278 A new executor should be obtained for each distinct set of commands
279 279 (possibly just a single command) that the consumer wants to execute
280 280 as part of a single operation or round trip. This is because some
281 281 peers are half-duplex and/or don't support persistent connections.
282 282 e.g. in the case of HTTP peers, commands sent to an executor represent
283 283 a single HTTP request. While some peers may support multiple command
284 284 sends over the wire per executor, consumers need to code to the least
285 285 capable peer. So it should be assumed that command executors buffer
286 286 called commands until they are told to send them and that each
287 287 command executor could result in a new connection or wire-level request
288 288 being issued.
289 289 """
290 290
291 291 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
292 292 """Unified interface for peer repositories.
293 293
294 294 All peer instances must conform to this interface.
295 295 """
296 296
297 297 @interfaceutil.implementer(ipeerbase)
298 298 class peer(object):
299 299 """Base class for peer repositories."""
300 300
301 301 def capable(self, name):
302 302 caps = self.capabilities()
303 303 if name in caps:
304 304 return True
305 305
306 306 name = '%s=' % name
307 307 for cap in caps:
308 308 if cap.startswith(name):
309 309 return cap[len(name):]
310 310
311 311 return False
312 312
313 313 def requirecap(self, name, purpose):
314 314 if self.capable(name):
315 315 return
316 316
317 317 raise error.CapabilityError(
318 318 _('cannot %s; remote repository does not support the %r '
319 319 'capability') % (purpose, name))
320 320
321 321 class irevisiondelta(interfaceutil.Interface):
322 322 """Represents a delta between one revision and another.
323 323
324 324 Instances convey enough information to allow a revision to be exchanged
325 325 with another repository.
326 326
327 327 Instances represent the fulltext revision data or a delta against
328 328 another revision. Therefore the ``revision`` and ``delta`` attributes
329 329 are mutually exclusive.
330 330
331 331 Typically used for changegroup generation.
332 332 """
333 333
334 334 node = interfaceutil.Attribute(
335 335 """20 byte node of this revision.""")
336 336
337 337 p1node = interfaceutil.Attribute(
338 338 """20 byte node of 1st parent of this revision.""")
339 339
340 340 p2node = interfaceutil.Attribute(
341 341 """20 byte node of 2nd parent of this revision.""")
342 342
343 343 linknode = interfaceutil.Attribute(
344 344 """20 byte node of the changelog revision this node is linked to.""")
345 345
346 346 flags = interfaceutil.Attribute(
347 347 """2 bytes of integer flags that apply to this revision.""")
348 348
349 349 basenode = interfaceutil.Attribute(
350 350 """20 byte node of the revision this data is a delta against.
351 351
352 352 ``nullid`` indicates that the revision is a full revision and not
353 353 a delta.
354 354 """)
355 355
356 356 baserevisionsize = interfaceutil.Attribute(
357 357 """Size of base revision this delta is against.
358 358
359 359 May be ``None`` if ``basenode`` is ``nullid``.
360 360 """)
361 361
362 362 revision = interfaceutil.Attribute(
363 363 """Raw fulltext of revision data for this node.""")
364 364
365 365 delta = interfaceutil.Attribute(
366 366 """Delta between ``basenode`` and ``node``.
367 367
368 368 Stored in the bdiff delta format.
369 369 """)
370 370
371 371 class irevisiondeltarequest(interfaceutil.Interface):
372 372 """Represents a request to generate an ``irevisiondelta``."""
373 373
374 374 node = interfaceutil.Attribute(
375 375 """20 byte node of revision being requested.""")
376 376
377 377 p1node = interfaceutil.Attribute(
378 378 """20 byte node of 1st parent of revision.""")
379 379
380 380 p2node = interfaceutil.Attribute(
381 381 """20 byte node of 2nd parent of revision.""")
382 382
383 383 linknode = interfaceutil.Attribute(
384 384 """20 byte node to store in ``linknode`` attribute.""")
385 385
386 386 basenode = interfaceutil.Attribute(
387 387 """Base revision that delta should be generated against.
388 388
389 389 If ``nullid``, the derived ``irevisiondelta`` should have its
390 390 ``revision`` field populated and no delta should be generated.
391 391
392 392 If ``None``, the delta may be generated against any revision that
393 393 is an ancestor of this revision. Or a full revision may be used.
394 394
395 395 If any other value, the delta should be produced against that
396 396 revision.
397 397 """)
398 398
399 399 ellipsis = interfaceutil.Attribute(
400 400 """Boolean on whether the ellipsis flag should be set.""")
401 401
402 402 class ifilerevisionssequence(interfaceutil.Interface):
403 403 """Contains index data for all revisions of a file.
404 404
405 405 Types implementing this behave like lists of tuples. The index
406 406 in the list corresponds to the revision number. The values contain
407 407 index metadata.
408 408
409 409 The *null* revision (revision number -1) is always the last item
410 410 in the index.
411 411 """
412 412
413 413 def __len__():
414 414 """The total number of revisions."""
415 415
416 416 def __getitem__(rev):
417 417 """Returns the object having a specific revision number.
418 418
419 419 Returns an 8-tuple with the following fields:
420 420
421 421 offset+flags
422 422 Contains the offset and flags for the revision. 64-bit unsigned
423 423 integer where first 6 bytes are the offset and the next 2 bytes
424 424 are flags. The offset can be 0 if it is not used by the store.
425 425 compressed size
426 426 Size of the revision data in the store. It can be 0 if it isn't
427 427 needed by the store.
428 428 uncompressed size
429 429 Fulltext size. It can be 0 if it isn't needed by the store.
430 430 base revision
431 431 Revision number of revision the delta for storage is encoded
432 432 against. -1 indicates not encoded against a base revision.
433 433 link revision
434 434 Revision number of changelog revision this entry is related to.
435 435 p1 revision
436 436 Revision number of 1st parent. -1 if no 1st parent.
437 437 p2 revision
438 438 Revision number of 2nd parent. -1 if no 1st parent.
439 439 node
440 440 Binary node value for this revision number.
441 441
442 442 Negative values should index off the end of the sequence. ``-1``
443 443 should return the null revision. ``-2`` should return the most
444 444 recent revision.
445 445 """
446 446
447 447 def __contains__(rev):
448 448 """Whether a revision number exists."""
449 449
450 450 def insert(self, i, entry):
451 451 """Add an item to the index at specific revision."""
452 452
453 453 class ifileindex(interfaceutil.Interface):
454 454 """Storage interface for index data of a single file.
455 455
456 456 File storage data is divided into index metadata and data storage.
457 457 This interface defines the index portion of the interface.
458 458
459 459 The index logically consists of:
460 460
461 461 * A mapping between revision numbers and nodes.
462 462 * DAG data (storing and querying the relationship between nodes).
463 463 * Metadata to facilitate storage.
464 464 """
465 465 index = interfaceutil.Attribute(
466 466 """An ``ifilerevisionssequence`` instance.""")
467 467
468 468 def __len__():
469 469 """Obtain the number of revisions stored for this file."""
470 470
471 471 def __iter__():
472 472 """Iterate over revision numbers for this file."""
473 473
474 474 def revs(start=0, stop=None):
475 475 """Iterate over revision numbers for this file, with control."""
476 476
477 477 def parents(node):
478 478 """Returns a 2-tuple of parent nodes for a revision.
479 479
480 480 Values will be ``nullid`` if the parent is empty.
481 481 """
482 482
483 483 def parentrevs(rev):
484 484 """Like parents() but operates on revision numbers."""
485 485
486 486 def rev(node):
487 487 """Obtain the revision number given a node.
488 488
489 489 Raises ``error.LookupError`` if the node is not known.
490 490 """
491 491
492 492 def node(rev):
493 493 """Obtain the node value given a revision number.
494 494
495 495 Raises ``IndexError`` if the node is not known.
496 496 """
497 497
498 498 def lookup(node):
499 499 """Attempt to resolve a value to a node.
500 500
501 501 Value can be a binary node, hex node, revision number, or a string
502 502 that can be converted to an integer.
503 503
504 504 Raises ``error.LookupError`` if a node could not be resolved.
505 505 """
506 506
507 507 def linkrev(rev):
508 508 """Obtain the changeset revision number a revision is linked to."""
509 509
510 510 def flags(rev):
511 511 """Obtain flags used to affect storage of a revision."""
512 512
513 513 def iscensored(rev):
514 514 """Return whether a revision's content has been censored."""
515 515
516 516 def commonancestorsheads(node1, node2):
517 517 """Obtain an iterable of nodes containing heads of common ancestors.
518 518
519 519 See ``ancestor.commonancestorsheads()``.
520 520 """
521 521
522 522 def descendants(revs):
523 523 """Obtain descendant revision numbers for a set of revision numbers.
524 524
525 525 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
526 526 """
527 527
528 528 def headrevs():
529 529 """Obtain a list of revision numbers that are DAG heads.
530 530
531 531 The list is sorted oldest to newest.
532 532
533 533 TODO determine if sorting is required.
534 534 """
535 535
536 536 def heads(start=None, stop=None):
537 537 """Obtain a list of nodes that are DAG heads, with control.
538 538
539 539 The set of revisions examined can be limited by specifying
540 540 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
541 541 iterable of nodes. DAG traversal starts at earlier revision
542 542 ``start`` and iterates forward until any node in ``stop`` is
543 543 encountered.
544 544 """
545 545
546 546 def children(node):
547 547 """Obtain nodes that are children of a node.
548 548
549 549 Returns a list of nodes.
550 550 """
551 551
552 552 def deltaparent(rev):
553 553 """"Return the revision that is a suitable parent to delta against."""
554 554
555 555 class ifiledata(interfaceutil.Interface):
556 556 """Storage interface for data storage of a specific file.
557 557
558 558 This complements ``ifileindex`` and provides an interface for accessing
559 559 data for a tracked file.
560 560 """
561 561 def rawsize(rev):
562 562 """The size of the fulltext data for a revision as stored."""
563 563
564 564 def size(rev):
565 565 """Obtain the fulltext size of file data.
566 566
567 567 Any metadata is excluded from size measurements. Use ``rawsize()`` if
568 568 metadata size is important.
569 569 """
570 570
571 571 def checkhash(fulltext, node, p1=None, p2=None, rev=None):
572 572 """Validate the stored hash of a given fulltext and node.
573 573
574 574 Raises ``error.StorageError`` is hash validation fails.
575 575 """
576 576
577 577 def revision(node, raw=False):
578 578 """"Obtain fulltext data for a node.
579 579
580 580 By default, any storage transformations are applied before the data
581 581 is returned. If ``raw`` is True, non-raw storage transformations
582 582 are not applied.
583 583
584 584 The fulltext data may contain a header containing metadata. Most
585 585 consumers should use ``read()`` to obtain the actual file data.
586 586 """
587 587
588 588 def read(node):
589 589 """Resolve file fulltext data.
590 590
591 591 This is similar to ``revision()`` except any metadata in the data
592 592 headers is stripped.
593 593 """
594 594
595 595 def renamed(node):
596 596 """Obtain copy metadata for a node.
597 597
598 598 Returns ``False`` if no copy metadata is stored or a 2-tuple of
599 599 (path, node) from which this revision was copied.
600 600 """
601 601
602 602 def cmp(node, fulltext):
603 603 """Compare fulltext to another revision.
604 604
605 605 Returns True if the fulltext is different from what is stored.
606 606
607 607 This takes copy metadata into account.
608 608
609 609 TODO better document the copy metadata and censoring logic.
610 610 """
611 611
612 612 def revdiff(rev1, rev2):
613 613 """Obtain a delta between two revision numbers.
614 614
615 615 Operates on raw data in the store (``revision(node, raw=True)``).
616 616
617 617 The returned data is the result of ``bdiff.bdiff`` on the raw
618 618 revision data.
619 619 """
620 620
621 621 def emitrevisiondeltas(requests):
622 622 """Produce ``irevisiondelta`` from ``irevisiondeltarequest``s.
623 623
624 624 Given an iterable of objects conforming to the ``irevisiondeltarequest``
625 625 interface, emits objects conforming to the ``irevisiondelta``
626 626 interface.
627 627
628 628 This method is a generator.
629 629
630 630 ``irevisiondelta`` should be emitted in the same order of
631 631 ``irevisiondeltarequest`` that was passed in.
632 632
633 633 The emitted objects MUST conform by the results of
634 634 ``irevisiondeltarequest``. Namely, they must respect any requests
635 635 for building a delta from a specific ``basenode`` if defined.
636 636
637 637 When sending deltas, implementations must take into account whether
638 638 the client has the base delta before encoding a delta against that
639 639 revision. A revision encountered previously in ``requests`` is
640 640 always a suitable base revision. An example of a bad delta is a delta
641 641 against a non-ancestor revision. Another example of a bad delta is a
642 642 delta against a censored revision.
643 643 """
644 644
645 645 class ifilemutation(interfaceutil.Interface):
646 646 """Storage interface for mutation events of a tracked file."""
647 647
648 648 def add(filedata, meta, transaction, linkrev, p1, p2):
649 649 """Add a new revision to the store.
650 650
651 651 Takes file data, dictionary of metadata, a transaction, linkrev,
652 652 and parent nodes.
653 653
654 654 Returns the node that was added.
655 655
656 656 May no-op if a revision matching the supplied data is already stored.
657 657 """
658 658
659 659 def addrevision(revisiondata, transaction, linkrev, p1, p2, node=None,
660 660 flags=0, cachedelta=None):
661 661 """Add a new revision to the store.
662 662
663 663 This is similar to ``add()`` except it operates at a lower level.
664 664
665 665 The data passed in already contains a metadata header, if any.
666 666
667 667 ``node`` and ``flags`` can be used to define the expected node and
668 668 the flags to use with storage.
669 669
670 670 ``add()`` is usually called when adding files from e.g. the working
671 671 directory. ``addrevision()`` is often called by ``add()`` and for
672 672 scenarios where revision data has already been computed, such as when
673 673 applying raw data from a peer repo.
674 674 """
675 675
676 676 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
677 677 """Process a series of deltas for storage.
678 678
679 679 ``deltas`` is an iterable of 7-tuples of
680 680 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
681 681 to add.
682 682
683 683 The ``delta`` field contains ``mpatch`` data to apply to a base
684 684 revision, identified by ``deltabase``. The base node can be
685 685 ``nullid``, in which case the header from the delta can be ignored
686 686 and the delta used as the fulltext.
687 687
688 688 ``addrevisioncb`` should be called for each node as it is committed.
689 689
690 690 Returns a list of nodes that were processed. A node will be in the list
691 691 even if it existed in the store previously.
692 692 """
693 693
694 def censorrevision(tr, node, tombstone=b''):
695 """Remove the content of a single revision.
696
697 The specified ``node`` will have its content purged from storage.
698 Future attempts to access the revision data for this node will
699 result in failure.
700
701 A ``tombstone`` message can optionally be stored. This message may be
702 displayed to users when they attempt to access the missing revision
703 data.
704
705 Storage backends may have stored deltas against the previous content
706 in this revision. As part of censoring a revision, these storage
707 backends are expected to rewrite any internally stored deltas such
708 that they no longer reference the deleted content.
709 """
710
694 711 def getstrippoint(minlink):
695 712 """Find the minimum revision that must be stripped to strip a linkrev.
696 713
697 714 Returns a 2-tuple containing the minimum revision number and a set
698 715 of all revisions numbers that would be broken by this strip.
699 716
700 717 TODO this is highly revlog centric and should be abstracted into
701 718 a higher-level deletion API. ``repair.strip()`` relies on this.
702 719 """
703 720
704 721 def strip(minlink, transaction):
705 722 """Remove storage of items starting at a linkrev.
706 723
707 724 This uses ``getstrippoint()`` to determine the first node to remove.
708 725 Then it effectively truncates storage for all revisions after that.
709 726
710 727 TODO this is highly revlog centric and should be abstracted into a
711 728 higher-level deletion API.
712 729 """
713 730
714 731 class ifilestorage(ifileindex, ifiledata, ifilemutation):
715 732 """Complete storage interface for a single tracked file."""
716 733
717 734 version = interfaceutil.Attribute(
718 735 """Version number of storage.
719 736
720 737 TODO this feels revlog centric and could likely be removed.
721 738 """)
722 739
723 740 _generaldelta = interfaceutil.Attribute(
724 741 """Whether deltas can be against any parent revision.
725 742
726 743 TODO this is used by changegroup code and it could probably be
727 744 folded into another API.
728 745 """)
729 746
730 747 def files():
731 748 """Obtain paths that are backing storage for this file.
732 749
733 750 TODO this is used heavily by verify code and there should probably
734 751 be a better API for that.
735 752 """
736 753
737 754 def checksize():
738 755 """Obtain the expected sizes of backing files.
739 756
740 757 TODO this is used by verify and it should not be part of the interface.
741 758 """
742 759
743 760 class idirs(interfaceutil.Interface):
744 761 """Interface representing a collection of directories from paths.
745 762
746 763 This interface is essentially a derived data structure representing
747 764 directories from a collection of paths.
748 765 """
749 766
750 767 def addpath(path):
751 768 """Add a path to the collection.
752 769
753 770 All directories in the path will be added to the collection.
754 771 """
755 772
756 773 def delpath(path):
757 774 """Remove a path from the collection.
758 775
759 776 If the removal was the last path in a particular directory, the
760 777 directory is removed from the collection.
761 778 """
762 779
763 780 def __iter__():
764 781 """Iterate over the directories in this collection of paths."""
765 782
766 783 def __contains__(path):
767 784 """Whether a specific directory is in this collection."""
768 785
769 786 class imanifestdict(interfaceutil.Interface):
770 787 """Interface representing a manifest data structure.
771 788
772 789 A manifest is effectively a dict mapping paths to entries. Each entry
773 790 consists of a binary node and extra flags affecting that entry.
774 791 """
775 792
776 793 def __getitem__(path):
777 794 """Returns the binary node value for a path in the manifest.
778 795
779 796 Raises ``KeyError`` if the path does not exist in the manifest.
780 797
781 798 Equivalent to ``self.find(path)[0]``.
782 799 """
783 800
784 801 def find(path):
785 802 """Returns the entry for a path in the manifest.
786 803
787 804 Returns a 2-tuple of (node, flags).
788 805
789 806 Raises ``KeyError`` if the path does not exist in the manifest.
790 807 """
791 808
792 809 def __len__():
793 810 """Return the number of entries in the manifest."""
794 811
795 812 def __nonzero__():
796 813 """Returns True if the manifest has entries, False otherwise."""
797 814
798 815 __bool__ = __nonzero__
799 816
800 817 def __setitem__(path, node):
801 818 """Define the node value for a path in the manifest.
802 819
803 820 If the path is already in the manifest, its flags will be copied to
804 821 the new entry.
805 822 """
806 823
807 824 def __contains__(path):
808 825 """Whether a path exists in the manifest."""
809 826
810 827 def __delitem__(path):
811 828 """Remove a path from the manifest.
812 829
813 830 Raises ``KeyError`` if the path is not in the manifest.
814 831 """
815 832
816 833 def __iter__():
817 834 """Iterate over paths in the manifest."""
818 835
819 836 def iterkeys():
820 837 """Iterate over paths in the manifest."""
821 838
822 839 def keys():
823 840 """Obtain a list of paths in the manifest."""
824 841
825 842 def filesnotin(other, match=None):
826 843 """Obtain the set of paths in this manifest but not in another.
827 844
828 845 ``match`` is an optional matcher function to be applied to both
829 846 manifests.
830 847
831 848 Returns a set of paths.
832 849 """
833 850
834 851 def dirs():
835 852 """Returns an object implementing the ``idirs`` interface."""
836 853
837 854 def hasdir(dir):
838 855 """Returns a bool indicating if a directory is in this manifest."""
839 856
840 857 def matches(match):
841 858 """Generate a new manifest filtered through a matcher.
842 859
843 860 Returns an object conforming to the ``imanifestdict`` interface.
844 861 """
845 862
846 863 def walk(match):
847 864 """Generator of paths in manifest satisfying a matcher.
848 865
849 866 This is equivalent to ``self.matches(match).iterkeys()`` except a new
850 867 manifest object is not created.
851 868
852 869 If the matcher has explicit files listed and they don't exist in
853 870 the manifest, ``match.bad()`` is called for each missing file.
854 871 """
855 872
856 873 def diff(other, match=None, clean=False):
857 874 """Find differences between this manifest and another.
858 875
859 876 This manifest is compared to ``other``.
860 877
861 878 If ``match`` is provided, the two manifests are filtered against this
862 879 matcher and only entries satisfying the matcher are compared.
863 880
864 881 If ``clean`` is True, unchanged files are included in the returned
865 882 object.
866 883
867 884 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
868 885 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
869 886 represents the node and flags for this manifest and ``(node2, flag2)``
870 887 are the same for the other manifest.
871 888 """
872 889
873 890 def setflag(path, flag):
874 891 """Set the flag value for a given path.
875 892
876 893 Raises ``KeyError`` if the path is not already in the manifest.
877 894 """
878 895
879 896 def get(path, default=None):
880 897 """Obtain the node value for a path or a default value if missing."""
881 898
882 899 def flags(path, default=''):
883 900 """Return the flags value for a path or a default value if missing."""
884 901
885 902 def copy():
886 903 """Return a copy of this manifest."""
887 904
888 905 def items():
889 906 """Returns an iterable of (path, node) for items in this manifest."""
890 907
891 908 def iteritems():
892 909 """Identical to items()."""
893 910
894 911 def iterentries():
895 912 """Returns an iterable of (path, node, flags) for this manifest.
896 913
897 914 Similar to ``iteritems()`` except items are a 3-tuple and include
898 915 flags.
899 916 """
900 917
901 918 def text():
902 919 """Obtain the raw data representation for this manifest.
903 920
904 921 Result is used to create a manifest revision.
905 922 """
906 923
907 924 def fastdelta(base, changes):
908 925 """Obtain a delta between this manifest and another given changes.
909 926
910 927 ``base`` in the raw data representation for another manifest.
911 928
912 929 ``changes`` is an iterable of ``(path, to_delete)``.
913 930
914 931 Returns a 2-tuple containing ``bytearray(self.text())`` and the
915 932 delta between ``base`` and this manifest.
916 933 """
917 934
918 935 class imanifestrevisionbase(interfaceutil.Interface):
919 936 """Base interface representing a single revision of a manifest.
920 937
921 938 Should not be used as a primary interface: should always be inherited
922 939 as part of a larger interface.
923 940 """
924 941
925 942 def new():
926 943 """Obtain a new manifest instance.
927 944
928 945 Returns an object conforming to the ``imanifestrevisionwritable``
929 946 interface. The instance will be associated with the same
930 947 ``imanifestlog`` collection as this instance.
931 948 """
932 949
933 950 def copy():
934 951 """Obtain a copy of this manifest instance.
935 952
936 953 Returns an object conforming to the ``imanifestrevisionwritable``
937 954 interface. The instance will be associated with the same
938 955 ``imanifestlog`` collection as this instance.
939 956 """
940 957
941 958 def read():
942 959 """Obtain the parsed manifest data structure.
943 960
944 961 The returned object conforms to the ``imanifestdict`` interface.
945 962 """
946 963
947 964 class imanifestrevisionstored(imanifestrevisionbase):
948 965 """Interface representing a manifest revision committed to storage."""
949 966
950 967 def node():
951 968 """The binary node for this manifest."""
952 969
953 970 parents = interfaceutil.Attribute(
954 971 """List of binary nodes that are parents for this manifest revision."""
955 972 )
956 973
957 974 def readdelta(shallow=False):
958 975 """Obtain the manifest data structure representing changes from parent.
959 976
960 977 This manifest is compared to its 1st parent. A new manifest representing
961 978 those differences is constructed.
962 979
963 980 The returned object conforms to the ``imanifestdict`` interface.
964 981 """
965 982
966 983 def readfast(shallow=False):
967 984 """Calls either ``read()`` or ``readdelta()``.
968 985
969 986 The faster of the two options is called.
970 987 """
971 988
972 989 def find(key):
973 990 """Calls self.read().find(key)``.
974 991
975 992 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
976 993 """
977 994
978 995 class imanifestrevisionwritable(imanifestrevisionbase):
979 996 """Interface representing a manifest revision that can be committed."""
980 997
981 998 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
982 999 """Add this revision to storage.
983 1000
984 1001 Takes a transaction object, the changeset revision number it will
985 1002 be associated with, its parent nodes, and lists of added and
986 1003 removed paths.
987 1004
988 1005 If match is provided, storage can choose not to inspect or write out
989 1006 items that do not match. Storage is still required to be able to provide
990 1007 the full manifest in the future for any directories written (these
991 1008 manifests should not be "narrowed on disk").
992 1009
993 1010 Returns the binary node of the created revision.
994 1011 """
995 1012
996 1013 class imanifeststorage(interfaceutil.Interface):
997 1014 """Storage interface for manifest data."""
998 1015
999 1016 tree = interfaceutil.Attribute(
1000 1017 """The path to the directory this manifest tracks.
1001 1018
1002 1019 The empty bytestring represents the root manifest.
1003 1020 """)
1004 1021
1005 1022 index = interfaceutil.Attribute(
1006 1023 """An ``ifilerevisionssequence`` instance.""")
1007 1024
1008 1025 indexfile = interfaceutil.Attribute(
1009 1026 """Path of revlog index file.
1010 1027
1011 1028 TODO this is revlog specific and should not be exposed.
1012 1029 """)
1013 1030
1014 1031 opener = interfaceutil.Attribute(
1015 1032 """VFS opener to use to access underlying files used for storage.
1016 1033
1017 1034 TODO this is revlog specific and should not be exposed.
1018 1035 """)
1019 1036
1020 1037 version = interfaceutil.Attribute(
1021 1038 """Revlog version number.
1022 1039
1023 1040 TODO this is revlog specific and should not be exposed.
1024 1041 """)
1025 1042
1026 1043 _generaldelta = interfaceutil.Attribute(
1027 1044 """Whether generaldelta storage is being used.
1028 1045
1029 1046 TODO this is revlog specific and should not be exposed.
1030 1047 """)
1031 1048
1032 1049 fulltextcache = interfaceutil.Attribute(
1033 1050 """Dict with cache of fulltexts.
1034 1051
1035 1052 TODO this doesn't feel appropriate for the storage interface.
1036 1053 """)
1037 1054
1038 1055 def __len__():
1039 1056 """Obtain the number of revisions stored for this manifest."""
1040 1057
1041 1058 def __iter__():
1042 1059 """Iterate over revision numbers for this manifest."""
1043 1060
1044 1061 def rev(node):
1045 1062 """Obtain the revision number given a binary node.
1046 1063
1047 1064 Raises ``error.LookupError`` if the node is not known.
1048 1065 """
1049 1066
1050 1067 def node(rev):
1051 1068 """Obtain the node value given a revision number.
1052 1069
1053 1070 Raises ``error.LookupError`` if the revision is not known.
1054 1071 """
1055 1072
1056 1073 def lookup(value):
1057 1074 """Attempt to resolve a value to a node.
1058 1075
1059 1076 Value can be a binary node, hex node, revision number, or a bytes
1060 1077 that can be converted to an integer.
1061 1078
1062 1079 Raises ``error.LookupError`` if a ndoe could not be resolved.
1063 1080
1064 1081 TODO this is only used by debug* commands and can probably be deleted
1065 1082 easily.
1066 1083 """
1067 1084
1068 1085 def parents(node):
1069 1086 """Returns a 2-tuple of parent nodes for a node.
1070 1087
1071 1088 Values will be ``nullid`` if the parent is empty.
1072 1089 """
1073 1090
1074 1091 def parentrevs(rev):
1075 1092 """Like parents() but operates on revision numbers."""
1076 1093
1077 1094 def linkrev(rev):
1078 1095 """Obtain the changeset revision number a revision is linked to."""
1079 1096
1080 1097 def revision(node, _df=None, raw=False):
1081 1098 """Obtain fulltext data for a node."""
1082 1099
1083 1100 def revdiff(rev1, rev2):
1084 1101 """Obtain a delta between two revision numbers.
1085 1102
1086 1103 The returned data is the result of ``bdiff.bdiff()`` on the raw
1087 1104 revision data.
1088 1105 """
1089 1106
1090 1107 def cmp(node, fulltext):
1091 1108 """Compare fulltext to another revision.
1092 1109
1093 1110 Returns True if the fulltext is different from what is stored.
1094 1111 """
1095 1112
1096 1113 def emitrevisiondeltas(requests):
1097 1114 """Produce ``irevisiondelta`` from ``irevisiondeltarequest``s.
1098 1115
1099 1116 See the documentation for ``ifiledata`` for more.
1100 1117 """
1101 1118
1102 1119 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
1103 1120 """Process a series of deltas for storage.
1104 1121
1105 1122 See the documentation in ``ifilemutation`` for more.
1106 1123 """
1107 1124
1108 1125 def getstrippoint(minlink):
1109 1126 """Find minimum revision that must be stripped to strip a linkrev.
1110 1127
1111 1128 See the documentation in ``ifilemutation`` for more.
1112 1129 """
1113 1130
1114 1131 def strip(minlink, transaction):
1115 1132 """Remove storage of items starting at a linkrev.
1116 1133
1117 1134 See the documentation in ``ifilemutation`` for more.
1118 1135 """
1119 1136
1120 1137 def checksize():
1121 1138 """Obtain the expected sizes of backing files.
1122 1139
1123 1140 TODO this is used by verify and it should not be part of the interface.
1124 1141 """
1125 1142
1126 1143 def files():
1127 1144 """Obtain paths that are backing storage for this manifest.
1128 1145
1129 1146 TODO this is used by verify and there should probably be a better API
1130 1147 for this functionality.
1131 1148 """
1132 1149
1133 1150 def deltaparent(rev):
1134 1151 """Obtain the revision that a revision is delta'd against.
1135 1152
1136 1153 TODO delta encoding is an implementation detail of storage and should
1137 1154 not be exposed to the storage interface.
1138 1155 """
1139 1156
1140 1157 def clone(tr, dest, **kwargs):
1141 1158 """Clone this instance to another."""
1142 1159
1143 1160 def clearcaches(clear_persisted_data=False):
1144 1161 """Clear any caches associated with this instance."""
1145 1162
1146 1163 def dirlog(d):
1147 1164 """Obtain a manifest storage instance for a tree."""
1148 1165
1149 1166 def add(m, transaction, link, p1, p2, added, removed, readtree=None,
1150 1167 match=None):
1151 1168 """Add a revision to storage.
1152 1169
1153 1170 ``m`` is an object conforming to ``imanifestdict``.
1154 1171
1155 1172 ``link`` is the linkrev revision number.
1156 1173
1157 1174 ``p1`` and ``p2`` are the parent revision numbers.
1158 1175
1159 1176 ``added`` and ``removed`` are iterables of added and removed paths,
1160 1177 respectively.
1161 1178
1162 1179 ``readtree`` is a function that can be used to read the child tree(s)
1163 1180 when recursively writing the full tree structure when using
1164 1181 treemanifets.
1165 1182
1166 1183 ``match`` is a matcher that can be used to hint to storage that not all
1167 1184 paths must be inspected; this is an optimization and can be safely
1168 1185 ignored. Note that the storage must still be able to reproduce a full
1169 1186 manifest including files that did not match.
1170 1187 """
1171 1188
1172 1189 class imanifestlog(interfaceutil.Interface):
1173 1190 """Interface representing a collection of manifest snapshots.
1174 1191
1175 1192 Represents the root manifest in a repository.
1176 1193
1177 1194 Also serves as a means to access nested tree manifests and to cache
1178 1195 tree manifests.
1179 1196 """
1180 1197
1181 1198 def __getitem__(node):
1182 1199 """Obtain a manifest instance for a given binary node.
1183 1200
1184 1201 Equivalent to calling ``self.get('', node)``.
1185 1202
1186 1203 The returned object conforms to the ``imanifestrevisionstored``
1187 1204 interface.
1188 1205 """
1189 1206
1190 1207 def get(tree, node, verify=True):
1191 1208 """Retrieve the manifest instance for a given directory and binary node.
1192 1209
1193 1210 ``node`` always refers to the node of the root manifest (which will be
1194 1211 the only manifest if flat manifests are being used).
1195 1212
1196 1213 If ``tree`` is the empty string, the root manifest is returned.
1197 1214 Otherwise the manifest for the specified directory will be returned
1198 1215 (requires tree manifests).
1199 1216
1200 1217 If ``verify`` is True, ``LookupError`` is raised if the node is not
1201 1218 known.
1202 1219
1203 1220 The returned object conforms to the ``imanifestrevisionstored``
1204 1221 interface.
1205 1222 """
1206 1223
1207 1224 def getstorage(tree):
1208 1225 """Retrieve an interface to storage for a particular tree.
1209 1226
1210 1227 If ``tree`` is the empty bytestring, storage for the root manifest will
1211 1228 be returned. Otherwise storage for a tree manifest is returned.
1212 1229
1213 1230 TODO formalize interface for returned object.
1214 1231 """
1215 1232
1216 1233 def clearcaches():
1217 1234 """Clear caches associated with this collection."""
1218 1235
1219 1236 def rev(node):
1220 1237 """Obtain the revision number for a binary node.
1221 1238
1222 1239 Raises ``error.LookupError`` if the node is not known.
1223 1240 """
1224 1241
1225 1242 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1226 1243 """Local repository sub-interface providing access to tracked file storage.
1227 1244
1228 1245 This interface defines how a repository accesses storage for a single
1229 1246 tracked file path.
1230 1247 """
1231 1248
1232 1249 def file(f):
1233 1250 """Obtain a filelog for a tracked path.
1234 1251
1235 1252 The returned type conforms to the ``ifilestorage`` interface.
1236 1253 """
1237 1254
1238 1255 class ilocalrepositorymain(interfaceutil.Interface):
1239 1256 """Main interface for local repositories.
1240 1257
1241 1258 This currently captures the reality of things - not how things should be.
1242 1259 """
1243 1260
1244 1261 supportedformats = interfaceutil.Attribute(
1245 1262 """Set of requirements that apply to stream clone.
1246 1263
1247 1264 This is actually a class attribute and is shared among all instances.
1248 1265 """)
1249 1266
1250 1267 supported = interfaceutil.Attribute(
1251 1268 """Set of requirements that this repo is capable of opening.""")
1252 1269
1253 1270 requirements = interfaceutil.Attribute(
1254 1271 """Set of requirements this repo uses.""")
1255 1272
1256 1273 filtername = interfaceutil.Attribute(
1257 1274 """Name of the repoview that is active on this repo.""")
1258 1275
1259 1276 wvfs = interfaceutil.Attribute(
1260 1277 """VFS used to access the working directory.""")
1261 1278
1262 1279 vfs = interfaceutil.Attribute(
1263 1280 """VFS rooted at the .hg directory.
1264 1281
1265 1282 Used to access repository data not in the store.
1266 1283 """)
1267 1284
1268 1285 svfs = interfaceutil.Attribute(
1269 1286 """VFS rooted at the store.
1270 1287
1271 1288 Used to access repository data in the store. Typically .hg/store.
1272 1289 But can point elsewhere if the store is shared.
1273 1290 """)
1274 1291
1275 1292 root = interfaceutil.Attribute(
1276 1293 """Path to the root of the working directory.""")
1277 1294
1278 1295 path = interfaceutil.Attribute(
1279 1296 """Path to the .hg directory.""")
1280 1297
1281 1298 origroot = interfaceutil.Attribute(
1282 1299 """The filesystem path that was used to construct the repo.""")
1283 1300
1284 1301 auditor = interfaceutil.Attribute(
1285 1302 """A pathauditor for the working directory.
1286 1303
1287 1304 This checks if a path refers to a nested repository.
1288 1305
1289 1306 Operates on the filesystem.
1290 1307 """)
1291 1308
1292 1309 nofsauditor = interfaceutil.Attribute(
1293 1310 """A pathauditor for the working directory.
1294 1311
1295 1312 This is like ``auditor`` except it doesn't do filesystem checks.
1296 1313 """)
1297 1314
1298 1315 baseui = interfaceutil.Attribute(
1299 1316 """Original ui instance passed into constructor.""")
1300 1317
1301 1318 ui = interfaceutil.Attribute(
1302 1319 """Main ui instance for this instance.""")
1303 1320
1304 1321 sharedpath = interfaceutil.Attribute(
1305 1322 """Path to the .hg directory of the repo this repo was shared from.""")
1306 1323
1307 1324 store = interfaceutil.Attribute(
1308 1325 """A store instance.""")
1309 1326
1310 1327 spath = interfaceutil.Attribute(
1311 1328 """Path to the store.""")
1312 1329
1313 1330 sjoin = interfaceutil.Attribute(
1314 1331 """Alias to self.store.join.""")
1315 1332
1316 1333 cachevfs = interfaceutil.Attribute(
1317 1334 """A VFS used to access the cache directory.
1318 1335
1319 1336 Typically .hg/cache.
1320 1337 """)
1321 1338
1322 1339 filteredrevcache = interfaceutil.Attribute(
1323 1340 """Holds sets of revisions to be filtered.""")
1324 1341
1325 1342 names = interfaceutil.Attribute(
1326 1343 """A ``namespaces`` instance.""")
1327 1344
1328 1345 def close():
1329 1346 """Close the handle on this repository."""
1330 1347
1331 1348 def peer():
1332 1349 """Obtain an object conforming to the ``peer`` interface."""
1333 1350
1334 1351 def unfiltered():
1335 1352 """Obtain an unfiltered/raw view of this repo."""
1336 1353
1337 1354 def filtered(name, visibilityexceptions=None):
1338 1355 """Obtain a named view of this repository."""
1339 1356
1340 1357 obsstore = interfaceutil.Attribute(
1341 1358 """A store of obsolescence data.""")
1342 1359
1343 1360 changelog = interfaceutil.Attribute(
1344 1361 """A handle on the changelog revlog.""")
1345 1362
1346 1363 manifestlog = interfaceutil.Attribute(
1347 1364 """An instance conforming to the ``imanifestlog`` interface.
1348 1365
1349 1366 Provides access to manifests for the repository.
1350 1367 """)
1351 1368
1352 1369 dirstate = interfaceutil.Attribute(
1353 1370 """Working directory state.""")
1354 1371
1355 1372 narrowpats = interfaceutil.Attribute(
1356 1373 """Matcher patterns for this repository's narrowspec.""")
1357 1374
1358 1375 def narrowmatch():
1359 1376 """Obtain a matcher for the narrowspec."""
1360 1377
1361 1378 def setnarrowpats(newincludes, newexcludes):
1362 1379 """Define the narrowspec for this repository."""
1363 1380
1364 1381 def __getitem__(changeid):
1365 1382 """Try to resolve a changectx."""
1366 1383
1367 1384 def __contains__(changeid):
1368 1385 """Whether a changeset exists."""
1369 1386
1370 1387 def __nonzero__():
1371 1388 """Always returns True."""
1372 1389 return True
1373 1390
1374 1391 __bool__ = __nonzero__
1375 1392
1376 1393 def __len__():
1377 1394 """Returns the number of changesets in the repo."""
1378 1395
1379 1396 def __iter__():
1380 1397 """Iterate over revisions in the changelog."""
1381 1398
1382 1399 def revs(expr, *args):
1383 1400 """Evaluate a revset.
1384 1401
1385 1402 Emits revisions.
1386 1403 """
1387 1404
1388 1405 def set(expr, *args):
1389 1406 """Evaluate a revset.
1390 1407
1391 1408 Emits changectx instances.
1392 1409 """
1393 1410
1394 1411 def anyrevs(specs, user=False, localalias=None):
1395 1412 """Find revisions matching one of the given revsets."""
1396 1413
1397 1414 def url():
1398 1415 """Returns a string representing the location of this repo."""
1399 1416
1400 1417 def hook(name, throw=False, **args):
1401 1418 """Call a hook."""
1402 1419
1403 1420 def tags():
1404 1421 """Return a mapping of tag to node."""
1405 1422
1406 1423 def tagtype(tagname):
1407 1424 """Return the type of a given tag."""
1408 1425
1409 1426 def tagslist():
1410 1427 """Return a list of tags ordered by revision."""
1411 1428
1412 1429 def nodetags(node):
1413 1430 """Return the tags associated with a node."""
1414 1431
1415 1432 def nodebookmarks(node):
1416 1433 """Return the list of bookmarks pointing to the specified node."""
1417 1434
1418 1435 def branchmap():
1419 1436 """Return a mapping of branch to heads in that branch."""
1420 1437
1421 1438 def revbranchcache():
1422 1439 pass
1423 1440
1424 1441 def branchtip(branchtip, ignoremissing=False):
1425 1442 """Return the tip node for a given branch."""
1426 1443
1427 1444 def lookup(key):
1428 1445 """Resolve the node for a revision."""
1429 1446
1430 1447 def lookupbranch(key):
1431 1448 """Look up the branch name of the given revision or branch name."""
1432 1449
1433 1450 def known(nodes):
1434 1451 """Determine whether a series of nodes is known.
1435 1452
1436 1453 Returns a list of bools.
1437 1454 """
1438 1455
1439 1456 def local():
1440 1457 """Whether the repository is local."""
1441 1458 return True
1442 1459
1443 1460 def publishing():
1444 1461 """Whether the repository is a publishing repository."""
1445 1462
1446 1463 def cancopy():
1447 1464 pass
1448 1465
1449 1466 def shared():
1450 1467 """The type of shared repository or None."""
1451 1468
1452 1469 def wjoin(f, *insidef):
1453 1470 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1454 1471
1455 1472 def setparents(p1, p2):
1456 1473 """Set the parent nodes of the working directory."""
1457 1474
1458 1475 def filectx(path, changeid=None, fileid=None):
1459 1476 """Obtain a filectx for the given file revision."""
1460 1477
1461 1478 def getcwd():
1462 1479 """Obtain the current working directory from the dirstate."""
1463 1480
1464 1481 def pathto(f, cwd=None):
1465 1482 """Obtain the relative path to a file."""
1466 1483
1467 1484 def adddatafilter(name, fltr):
1468 1485 pass
1469 1486
1470 1487 def wread(filename):
1471 1488 """Read a file from wvfs, using data filters."""
1472 1489
1473 1490 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1474 1491 """Write data to a file in the wvfs, using data filters."""
1475 1492
1476 1493 def wwritedata(filename, data):
1477 1494 """Resolve data for writing to the wvfs, using data filters."""
1478 1495
1479 1496 def currenttransaction():
1480 1497 """Obtain the current transaction instance or None."""
1481 1498
1482 1499 def transaction(desc, report=None):
1483 1500 """Open a new transaction to write to the repository."""
1484 1501
1485 1502 def undofiles():
1486 1503 """Returns a list of (vfs, path) for files to undo transactions."""
1487 1504
1488 1505 def recover():
1489 1506 """Roll back an interrupted transaction."""
1490 1507
1491 1508 def rollback(dryrun=False, force=False):
1492 1509 """Undo the last transaction.
1493 1510
1494 1511 DANGEROUS.
1495 1512 """
1496 1513
1497 1514 def updatecaches(tr=None, full=False):
1498 1515 """Warm repo caches."""
1499 1516
1500 1517 def invalidatecaches():
1501 1518 """Invalidate cached data due to the repository mutating."""
1502 1519
1503 1520 def invalidatevolatilesets():
1504 1521 pass
1505 1522
1506 1523 def invalidatedirstate():
1507 1524 """Invalidate the dirstate."""
1508 1525
1509 1526 def invalidate(clearfilecache=False):
1510 1527 pass
1511 1528
1512 1529 def invalidateall():
1513 1530 pass
1514 1531
1515 1532 def lock(wait=True):
1516 1533 """Lock the repository store and return a lock instance."""
1517 1534
1518 1535 def wlock(wait=True):
1519 1536 """Lock the non-store parts of the repository."""
1520 1537
1521 1538 def currentwlock():
1522 1539 """Return the wlock if it's held or None."""
1523 1540
1524 1541 def checkcommitpatterns(wctx, vdirs, match, status, fail):
1525 1542 pass
1526 1543
1527 1544 def commit(text='', user=None, date=None, match=None, force=False,
1528 1545 editor=False, extra=None):
1529 1546 """Add a new revision to the repository."""
1530 1547
1531 1548 def commitctx(ctx, error=False):
1532 1549 """Commit a commitctx instance to the repository."""
1533 1550
1534 1551 def destroying():
1535 1552 """Inform the repository that nodes are about to be destroyed."""
1536 1553
1537 1554 def destroyed():
1538 1555 """Inform the repository that nodes have been destroyed."""
1539 1556
1540 1557 def status(node1='.', node2=None, match=None, ignored=False,
1541 1558 clean=False, unknown=False, listsubrepos=False):
1542 1559 """Convenience method to call repo[x].status()."""
1543 1560
1544 1561 def addpostdsstatus(ps):
1545 1562 pass
1546 1563
1547 1564 def postdsstatus():
1548 1565 pass
1549 1566
1550 1567 def clearpostdsstatus():
1551 1568 pass
1552 1569
1553 1570 def heads(start=None):
1554 1571 """Obtain list of nodes that are DAG heads."""
1555 1572
1556 1573 def branchheads(branch=None, start=None, closed=False):
1557 1574 pass
1558 1575
1559 1576 def branches(nodes):
1560 1577 pass
1561 1578
1562 1579 def between(pairs):
1563 1580 pass
1564 1581
1565 1582 def checkpush(pushop):
1566 1583 pass
1567 1584
1568 1585 prepushoutgoinghooks = interfaceutil.Attribute(
1569 1586 """util.hooks instance.""")
1570 1587
1571 1588 def pushkey(namespace, key, old, new):
1572 1589 pass
1573 1590
1574 1591 def listkeys(namespace):
1575 1592 pass
1576 1593
1577 1594 def debugwireargs(one, two, three=None, four=None, five=None):
1578 1595 pass
1579 1596
1580 1597 def savecommitmessage(text):
1581 1598 pass
1582 1599
1583 1600 class completelocalrepository(ilocalrepositorymain,
1584 1601 ilocalrepositoryfilestorage):
1585 1602 """Complete interface for a local repository."""
@@ -1,2494 +1,2583 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import hashlib
20 20 import os
21 21 import re
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullhex,
30 30 nullid,
31 31 nullrev,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVIDX_DEFAULT_FLAGS,
42 42 REVIDX_ELLIPSIS,
43 43 REVIDX_EXTSTORED,
44 44 REVIDX_FLAGS_ORDER,
45 45 REVIDX_ISCENSORED,
46 46 REVIDX_KNOWN_FLAGS,
47 47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 48 REVLOGV0,
49 49 REVLOGV1,
50 50 REVLOGV1_FLAGS,
51 51 REVLOGV2,
52 52 REVLOGV2_FLAGS,
53 53 REVLOG_DEFAULT_FLAGS,
54 54 REVLOG_DEFAULT_FORMAT,
55 55 REVLOG_DEFAULT_VERSION,
56 56 )
57 57 from .thirdparty import (
58 58 attr,
59 59 )
60 60 from . import (
61 61 ancestor,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 stringutil,
76 76 )
77 77
78 78 # blanked usage of all the name to prevent pyflakes constraints
79 79 # We need these name available in the module for extensions.
80 80 REVLOGV0
81 81 REVLOGV1
82 82 REVLOGV2
83 83 FLAG_INLINE_DATA
84 84 FLAG_GENERALDELTA
85 85 REVLOG_DEFAULT_FLAGS
86 86 REVLOG_DEFAULT_FORMAT
87 87 REVLOG_DEFAULT_VERSION
88 88 REVLOGV1_FLAGS
89 89 REVLOGV2_FLAGS
90 90 REVIDX_ISCENSORED
91 91 REVIDX_ELLIPSIS
92 92 REVIDX_EXTSTORED
93 93 REVIDX_DEFAULT_FLAGS
94 94 REVIDX_FLAGS_ORDER
95 95 REVIDX_KNOWN_FLAGS
96 96 REVIDX_RAWTEXT_CHANGING_FLAGS
97 97
98 98 parsers = policy.importmod(r'parsers')
99 99
100 100 # Aliased for performance.
101 101 _zlibdecompress = zlib.decompress
102 102
103 103 # max size of revlog with inline data
104 104 _maxinline = 131072
105 105 _chunksize = 1048576
106 106
107 107 # Store flag processors (cf. 'addflagprocessor()' to register)
108 108 _flagprocessors = {
109 109 REVIDX_ISCENSORED: None,
110 110 }
111 111
112 112 # Flag processors for REVIDX_ELLIPSIS.
113 113 def ellipsisreadprocessor(rl, text):
114 114 return text, False
115 115
116 116 def ellipsiswriteprocessor(rl, text):
117 117 return text, False
118 118
119 119 def ellipsisrawprocessor(rl, text):
120 120 return False
121 121
122 122 ellipsisprocessor = (
123 123 ellipsisreadprocessor,
124 124 ellipsiswriteprocessor,
125 125 ellipsisrawprocessor,
126 126 )
127 127
128 128 _mdre = re.compile('\1\n')
129 129 def parsemeta(text):
130 130 """return (metadatadict, metadatasize)"""
131 131 # text can be buffer, so we can't use .startswith or .index
132 132 if text[:2] != '\1\n':
133 133 return None, None
134 134 s = _mdre.search(text, 2).start()
135 135 mtext = text[2:s]
136 136 meta = {}
137 137 for l in mtext.splitlines():
138 138 k, v = l.split(": ", 1)
139 139 meta[k] = v
140 140 return meta, (s + 2)
141 141
142 142 def packmeta(meta, text):
143 143 keys = sorted(meta)
144 144 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
145 145 return "\1\n%s\1\n%s" % (metatext, text)
146 146
147 147 def _censoredtext(text):
148 148 m, offs = parsemeta(text)
149 149 return m and "censored" in m
150 150
151 151 def addflagprocessor(flag, processor):
152 152 """Register a flag processor on a revision data flag.
153 153
154 154 Invariant:
155 155 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
156 156 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
157 157 - Only one flag processor can be registered on a specific flag.
158 158 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
159 159 following signatures:
160 160 - (read) f(self, rawtext) -> text, bool
161 161 - (write) f(self, text) -> rawtext, bool
162 162 - (raw) f(self, rawtext) -> bool
163 163 "text" is presented to the user. "rawtext" is stored in revlog data, not
164 164 directly visible to the user.
165 165 The boolean returned by these transforms is used to determine whether
166 166 the returned text can be used for hash integrity checking. For example,
167 167 if "write" returns False, then "text" is used to generate hash. If
168 168 "write" returns True, that basically means "rawtext" returned by "write"
169 169 should be used to generate hash. Usually, "write" and "read" return
170 170 different booleans. And "raw" returns a same boolean as "write".
171 171
172 172 Note: The 'raw' transform is used for changegroup generation and in some
173 173 debug commands. In this case the transform only indicates whether the
174 174 contents can be used for hash integrity checks.
175 175 """
176 176 if not flag & REVIDX_KNOWN_FLAGS:
177 177 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
178 178 raise error.ProgrammingError(msg)
179 179 if flag not in REVIDX_FLAGS_ORDER:
180 180 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
181 181 raise error.ProgrammingError(msg)
182 182 if flag in _flagprocessors:
183 183 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
184 184 raise error.Abort(msg)
185 185 _flagprocessors[flag] = processor
186 186
187 187 def getoffset(q):
188 188 return int(q >> 16)
189 189
190 190 def gettype(q):
191 191 return int(q & 0xFFFF)
192 192
193 193 def offset_type(offset, type):
194 194 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
195 195 raise ValueError('unknown revlog index flags')
196 196 return int(int(offset) << 16 | type)
197 197
198 198 _nullhash = hashlib.sha1(nullid)
199 199
200 200 def hash(text, p1, p2):
201 201 """generate a hash from the given text and its parent hashes
202 202
203 203 This hash combines both the current file contents and its history
204 204 in a manner that makes it easy to distinguish nodes with the same
205 205 content in the revision graph.
206 206 """
207 207 # As of now, if one of the parent node is null, p2 is null
208 208 if p2 == nullid:
209 209 # deep copy of a hash is faster than creating one
210 210 s = _nullhash.copy()
211 211 s.update(p1)
212 212 else:
213 213 # none of the parent nodes are nullid
214 214 if p1 < p2:
215 215 a = p1
216 216 b = p2
217 217 else:
218 218 a = p2
219 219 b = p1
220 220 s = hashlib.sha1(a)
221 221 s.update(b)
222 222 s.update(text)
223 223 return s.digest()
224 224
225 225 @attr.s(slots=True, frozen=True)
226 226 class _revisioninfo(object):
227 227 """Information about a revision that allows building its fulltext
228 228 node: expected hash of the revision
229 229 p1, p2: parent revs of the revision
230 230 btext: built text cache consisting of a one-element list
231 231 cachedelta: (baserev, uncompressed_delta) or None
232 232 flags: flags associated to the revision storage
233 233
234 234 One of btext[0] or cachedelta must be set.
235 235 """
236 236 node = attr.ib()
237 237 p1 = attr.ib()
238 238 p2 = attr.ib()
239 239 btext = attr.ib()
240 240 textlen = attr.ib()
241 241 cachedelta = attr.ib()
242 242 flags = attr.ib()
243 243
244 244 @interfaceutil.implementer(repository.irevisiondelta)
245 245 @attr.s(slots=True, frozen=True)
246 246 class revlogrevisiondelta(object):
247 247 node = attr.ib()
248 248 p1node = attr.ib()
249 249 p2node = attr.ib()
250 250 basenode = attr.ib()
251 251 linknode = attr.ib()
252 252 flags = attr.ib()
253 253 baserevisionsize = attr.ib()
254 254 revision = attr.ib()
255 255 delta = attr.ib()
256 256
257 257 # index v0:
258 258 # 4 bytes: offset
259 259 # 4 bytes: compressed length
260 260 # 4 bytes: base rev
261 261 # 4 bytes: link rev
262 262 # 20 bytes: parent 1 nodeid
263 263 # 20 bytes: parent 2 nodeid
264 264 # 20 bytes: nodeid
265 265 indexformatv0 = struct.Struct(">4l20s20s20s")
266 266 indexformatv0_pack = indexformatv0.pack
267 267 indexformatv0_unpack = indexformatv0.unpack
268 268
269 269 class revlogoldindex(list):
270 270 def __getitem__(self, i):
271 271 if i == -1:
272 272 return (0, 0, 0, -1, -1, -1, -1, nullid)
273 273 return list.__getitem__(self, i)
274 274
275 275 class revlogoldio(object):
276 276 def __init__(self):
277 277 self.size = indexformatv0.size
278 278
279 279 def parseindex(self, data, inline):
280 280 s = self.size
281 281 index = []
282 282 nodemap = {nullid: nullrev}
283 283 n = off = 0
284 284 l = len(data)
285 285 while off + s <= l:
286 286 cur = data[off:off + s]
287 287 off += s
288 288 e = indexformatv0_unpack(cur)
289 289 # transform to revlogv1 format
290 290 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
291 291 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
292 292 index.append(e2)
293 293 nodemap[e[6]] = n
294 294 n += 1
295 295
296 296 return revlogoldindex(index), nodemap, None
297 297
298 298 def packentry(self, entry, node, version, rev):
299 299 if gettype(entry[0]):
300 300 raise error.RevlogError(_('index entry flags need revlog '
301 301 'version 1'))
302 302 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
303 303 node(entry[5]), node(entry[6]), entry[7])
304 304 return indexformatv0_pack(*e2)
305 305
306 306 # index ng:
307 307 # 6 bytes: offset
308 308 # 2 bytes: flags
309 309 # 4 bytes: compressed length
310 310 # 4 bytes: uncompressed length
311 311 # 4 bytes: base rev
312 312 # 4 bytes: link rev
313 313 # 4 bytes: parent 1 rev
314 314 # 4 bytes: parent 2 rev
315 315 # 32 bytes: nodeid
316 316 indexformatng = struct.Struct(">Qiiiiii20s12x")
317 317 indexformatng_pack = indexformatng.pack
318 318 versionformat = struct.Struct(">I")
319 319 versionformat_pack = versionformat.pack
320 320 versionformat_unpack = versionformat.unpack
321 321
322 322 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
323 323 # signed integer)
324 324 _maxentrysize = 0x7fffffff
325 325
326 326 class revlogio(object):
327 327 def __init__(self):
328 328 self.size = indexformatng.size
329 329
330 330 def parseindex(self, data, inline):
331 331 # call the C implementation to parse the index data
332 332 index, cache = parsers.parse_index2(data, inline)
333 333 return index, getattr(index, 'nodemap', None), cache
334 334
335 335 def packentry(self, entry, node, version, rev):
336 336 p = indexformatng_pack(*entry)
337 337 if rev == 0:
338 338 p = versionformat_pack(version) + p[4:]
339 339 return p
340 340
341 341 class revlog(object):
342 342 """
343 343 the underlying revision storage object
344 344
345 345 A revlog consists of two parts, an index and the revision data.
346 346
347 347 The index is a file with a fixed record size containing
348 348 information on each revision, including its nodeid (hash), the
349 349 nodeids of its parents, the position and offset of its data within
350 350 the data file, and the revision it's based on. Finally, each entry
351 351 contains a linkrev entry that can serve as a pointer to external
352 352 data.
353 353
354 354 The revision data itself is a linear collection of data chunks.
355 355 Each chunk represents a revision and is usually represented as a
356 356 delta against the previous chunk. To bound lookup time, runs of
357 357 deltas are limited to about 2 times the length of the original
358 358 version data. This makes retrieval of a version proportional to
359 359 its size, or O(1) relative to the number of revisions.
360 360
361 361 Both pieces of the revlog are written to in an append-only
362 362 fashion, which means we never need to rewrite a file to insert or
363 363 remove data, and can use some simple techniques to avoid the need
364 364 for locking while reading.
365 365
366 366 If checkambig, indexfile is opened with checkambig=True at
367 367 writing, to avoid file stat ambiguity.
368 368
369 369 If mmaplargeindex is True, and an mmapindexthreshold is set, the
370 370 index will be mmapped rather than read if it is larger than the
371 371 configured threshold.
372 372
373 373 If censorable is True, the revlog can have censored revisions.
374 374 """
375 375 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
376 376 mmaplargeindex=False, censorable=False):
377 377 """
378 378 create a revlog object
379 379
380 380 opener is a function that abstracts the file opening operation
381 381 and can be used to implement COW semantics or the like.
382 382 """
383 383 self.indexfile = indexfile
384 384 self.datafile = datafile or (indexfile[:-2] + ".d")
385 385 self.opener = opener
386 386 # When True, indexfile is opened with checkambig=True at writing, to
387 387 # avoid file stat ambiguity.
388 388 self._checkambig = checkambig
389 389 self._censorable = censorable
390 390 # 3-tuple of (node, rev, text) for a raw revision.
391 391 self._cache = None
392 392 # Maps rev to chain base rev.
393 393 self._chainbasecache = util.lrucachedict(100)
394 394 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
395 395 self._chunkcache = (0, '')
396 396 # How much data to read and cache into the raw revlog data cache.
397 397 self._chunkcachesize = 65536
398 398 self._maxchainlen = None
399 399 self._deltabothparents = True
400 400 self.index = []
401 401 # Mapping of partial identifiers to full nodes.
402 402 self._pcache = {}
403 403 # Mapping of revision integer to full node.
404 404 self._nodecache = {nullid: nullrev}
405 405 self._nodepos = None
406 406 self._compengine = 'zlib'
407 407 self._maxdeltachainspan = -1
408 408 self._withsparseread = False
409 409 self._sparserevlog = False
410 410 self._srdensitythreshold = 0.50
411 411 self._srmingapsize = 262144
412 412
413 413 # Make copy of flag processors so each revlog instance can support
414 414 # custom flags.
415 415 self._flagprocessors = dict(_flagprocessors)
416 416
417 417 mmapindexthreshold = None
418 418 v = REVLOG_DEFAULT_VERSION
419 419 opts = getattr(opener, 'options', None)
420 420 if opts is not None:
421 421 if 'revlogv2' in opts:
422 422 # version 2 revlogs always use generaldelta.
423 423 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
424 424 elif 'revlogv1' in opts:
425 425 if 'generaldelta' in opts:
426 426 v |= FLAG_GENERALDELTA
427 427 else:
428 428 v = 0
429 429 if 'chunkcachesize' in opts:
430 430 self._chunkcachesize = opts['chunkcachesize']
431 431 if 'maxchainlen' in opts:
432 432 self._maxchainlen = opts['maxchainlen']
433 433 if 'deltabothparents' in opts:
434 434 self._deltabothparents = opts['deltabothparents']
435 435 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
436 436 if 'compengine' in opts:
437 437 self._compengine = opts['compengine']
438 438 if 'maxdeltachainspan' in opts:
439 439 self._maxdeltachainspan = opts['maxdeltachainspan']
440 440 if mmaplargeindex and 'mmapindexthreshold' in opts:
441 441 mmapindexthreshold = opts['mmapindexthreshold']
442 442 self._sparserevlog = bool(opts.get('sparse-revlog', False))
443 443 withsparseread = bool(opts.get('with-sparse-read', False))
444 444 # sparse-revlog forces sparse-read
445 445 self._withsparseread = self._sparserevlog or withsparseread
446 446 if 'sparse-read-density-threshold' in opts:
447 447 self._srdensitythreshold = opts['sparse-read-density-threshold']
448 448 if 'sparse-read-min-gap-size' in opts:
449 449 self._srmingapsize = opts['sparse-read-min-gap-size']
450 450 if opts.get('enableellipsis'):
451 451 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
452 452
453 453 if self._chunkcachesize <= 0:
454 454 raise error.RevlogError(_('revlog chunk cache size %r is not '
455 455 'greater than 0') % self._chunkcachesize)
456 456 elif self._chunkcachesize & (self._chunkcachesize - 1):
457 457 raise error.RevlogError(_('revlog chunk cache size %r is not a '
458 458 'power of 2') % self._chunkcachesize)
459 459
460 460 indexdata = ''
461 461 self._initempty = True
462 462 try:
463 463 with self._indexfp() as f:
464 464 if (mmapindexthreshold is not None and
465 465 self.opener.fstat(f).st_size >= mmapindexthreshold):
466 466 indexdata = util.buffer(util.mmapread(f))
467 467 else:
468 468 indexdata = f.read()
469 469 if len(indexdata) > 0:
470 470 v = versionformat_unpack(indexdata[:4])[0]
471 471 self._initempty = False
472 472 except IOError as inst:
473 473 if inst.errno != errno.ENOENT:
474 474 raise
475 475
476 476 self.version = v
477 477 self._inline = v & FLAG_INLINE_DATA
478 478 self._generaldelta = v & FLAG_GENERALDELTA
479 479 flags = v & ~0xFFFF
480 480 fmt = v & 0xFFFF
481 481 if fmt == REVLOGV0:
482 482 if flags:
483 483 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
484 484 'revlog %s') %
485 485 (flags >> 16, fmt, self.indexfile))
486 486 elif fmt == REVLOGV1:
487 487 if flags & ~REVLOGV1_FLAGS:
488 488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 489 'revlog %s') %
490 490 (flags >> 16, fmt, self.indexfile))
491 491 elif fmt == REVLOGV2:
492 492 if flags & ~REVLOGV2_FLAGS:
493 493 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
494 494 'revlog %s') %
495 495 (flags >> 16, fmt, self.indexfile))
496 496 else:
497 497 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
498 498 (fmt, self.indexfile))
499 499
500 500 self._storedeltachains = True
501 501
502 502 self._io = revlogio()
503 503 if self.version == REVLOGV0:
504 504 self._io = revlogoldio()
505 505 try:
506 506 d = self._io.parseindex(indexdata, self._inline)
507 507 except (ValueError, IndexError):
508 508 raise error.RevlogError(_("index %s is corrupted") %
509 509 self.indexfile)
510 510 self.index, nodemap, self._chunkcache = d
511 511 if nodemap is not None:
512 512 self.nodemap = self._nodecache = nodemap
513 513 if not self._chunkcache:
514 514 self._chunkclear()
515 515 # revnum -> (chain-length, sum-delta-length)
516 516 self._chaininfocache = {}
517 517 # revlog header -> revlog compressor
518 518 self._decompressors = {}
519 519
520 520 @util.propertycache
521 521 def _compressor(self):
522 522 return util.compengines[self._compengine].revlogcompressor()
523 523
524 524 def _indexfp(self, mode='r'):
525 525 """file object for the revlog's index file"""
526 526 args = {r'mode': mode}
527 527 if mode != 'r':
528 528 args[r'checkambig'] = self._checkambig
529 529 if mode == 'w':
530 530 args[r'atomictemp'] = True
531 531 return self.opener(self.indexfile, **args)
532 532
533 533 def _datafp(self, mode='r'):
534 534 """file object for the revlog's data file"""
535 535 return self.opener(self.datafile, mode=mode)
536 536
537 537 @contextlib.contextmanager
538 538 def _datareadfp(self, existingfp=None):
539 539 """file object suitable to read data"""
540 540 if existingfp is not None:
541 541 yield existingfp
542 542 else:
543 543 if self._inline:
544 544 func = self._indexfp
545 545 else:
546 546 func = self._datafp
547 547 with func() as fp:
548 548 yield fp
549 549
550 550 def tip(self):
551 551 return self.node(len(self.index) - 1)
552 552 def __contains__(self, rev):
553 553 return 0 <= rev < len(self)
554 554 def __len__(self):
555 555 return len(self.index)
556 556 def __iter__(self):
557 557 return iter(pycompat.xrange(len(self)))
558 558 def revs(self, start=0, stop=None):
559 559 """iterate over all rev in this revlog (from start to stop)"""
560 560 step = 1
561 561 length = len(self)
562 562 if stop is not None:
563 563 if start > stop:
564 564 step = -1
565 565 stop += step
566 566 if stop > length:
567 567 stop = length
568 568 else:
569 569 stop = length
570 570 return pycompat.xrange(start, stop, step)
571 571
572 572 @util.propertycache
573 573 def nodemap(self):
574 574 if self.index:
575 575 # populate mapping down to the initial node
576 576 node0 = self.index[0][7] # get around changelog filtering
577 577 self.rev(node0)
578 578 return self._nodecache
579 579
580 580 def hasnode(self, node):
581 581 try:
582 582 self.rev(node)
583 583 return True
584 584 except KeyError:
585 585 return False
586 586
587 587 def candelta(self, baserev, rev):
588 588 """whether two revisions (baserev, rev) can be delta-ed or not"""
589 589 # Disable delta if either rev requires a content-changing flag
590 590 # processor (ex. LFS). This is because such flag processor can alter
591 591 # the rawtext content that the delta will be based on, and two clients
592 592 # could have a same revlog node with different flags (i.e. different
593 593 # rawtext contents) and the delta could be incompatible.
594 594 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
595 595 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
596 596 return False
597 597 return True
598 598
599 599 def clearcaches(self):
600 600 self._cache = None
601 601 self._chainbasecache.clear()
602 602 self._chunkcache = (0, '')
603 603 self._pcache = {}
604 604
605 605 try:
606 606 self._nodecache.clearcaches()
607 607 except AttributeError:
608 608 self._nodecache = {nullid: nullrev}
609 609 self._nodepos = None
610 610
611 611 def rev(self, node):
612 612 try:
613 613 return self._nodecache[node]
614 614 except TypeError:
615 615 raise
616 616 except error.RevlogError:
617 617 # parsers.c radix tree lookup failed
618 618 if node == wdirid or node in wdirfilenodeids:
619 619 raise error.WdirUnsupported
620 620 raise error.LookupError(node, self.indexfile, _('no node'))
621 621 except KeyError:
622 622 # pure python cache lookup failed
623 623 n = self._nodecache
624 624 i = self.index
625 625 p = self._nodepos
626 626 if p is None:
627 627 p = len(i) - 1
628 628 else:
629 629 assert p < len(i)
630 630 for r in pycompat.xrange(p, -1, -1):
631 631 v = i[r][7]
632 632 n[v] = r
633 633 if v == node:
634 634 self._nodepos = r - 1
635 635 return r
636 636 if node == wdirid or node in wdirfilenodeids:
637 637 raise error.WdirUnsupported
638 638 raise error.LookupError(node, self.indexfile, _('no node'))
639 639
640 640 # Accessors for index entries.
641 641
642 642 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
643 643 # are flags.
644 644 def start(self, rev):
645 645 return int(self.index[rev][0] >> 16)
646 646
647 647 def flags(self, rev):
648 648 return self.index[rev][0] & 0xFFFF
649 649
650 650 def length(self, rev):
651 651 return self.index[rev][1]
652 652
653 653 def rawsize(self, rev):
654 654 """return the length of the uncompressed text for a given revision"""
655 655 l = self.index[rev][2]
656 656 if l >= 0:
657 657 return l
658 658
659 659 t = self.revision(rev, raw=True)
660 660 return len(t)
661 661
662 662 def size(self, rev):
663 663 """length of non-raw text (processed by a "read" flag processor)"""
664 664 # fast path: if no "read" flag processor could change the content,
665 665 # size is rawsize. note: ELLIPSIS is known to not change the content.
666 666 flags = self.flags(rev)
667 667 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
668 668 return self.rawsize(rev)
669 669
670 670 return len(self.revision(rev, raw=False))
671 671
672 672 def chainbase(self, rev):
673 673 base = self._chainbasecache.get(rev)
674 674 if base is not None:
675 675 return base
676 676
677 677 index = self.index
678 678 iterrev = rev
679 679 base = index[iterrev][3]
680 680 while base != iterrev:
681 681 iterrev = base
682 682 base = index[iterrev][3]
683 683
684 684 self._chainbasecache[rev] = base
685 685 return base
686 686
687 687 def linkrev(self, rev):
688 688 return self.index[rev][4]
689 689
690 690 def parentrevs(self, rev):
691 691 try:
692 692 entry = self.index[rev]
693 693 except IndexError:
694 694 if rev == wdirrev:
695 695 raise error.WdirUnsupported
696 696 raise
697 697
698 698 return entry[5], entry[6]
699 699
700 700 def node(self, rev):
701 701 try:
702 702 return self.index[rev][7]
703 703 except IndexError:
704 704 if rev == wdirrev:
705 705 raise error.WdirUnsupported
706 706 raise
707 707
708 708 # Derived from index values.
709 709
710 710 def end(self, rev):
711 711 return self.start(rev) + self.length(rev)
712 712
713 713 def parents(self, node):
714 714 i = self.index
715 715 d = i[self.rev(node)]
716 716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
717 717
718 718 def chainlen(self, rev):
719 719 return self._chaininfo(rev)[0]
720 720
721 721 def _chaininfo(self, rev):
722 722 chaininfocache = self._chaininfocache
723 723 if rev in chaininfocache:
724 724 return chaininfocache[rev]
725 725 index = self.index
726 726 generaldelta = self._generaldelta
727 727 iterrev = rev
728 728 e = index[iterrev]
729 729 clen = 0
730 730 compresseddeltalen = 0
731 731 while iterrev != e[3]:
732 732 clen += 1
733 733 compresseddeltalen += e[1]
734 734 if generaldelta:
735 735 iterrev = e[3]
736 736 else:
737 737 iterrev -= 1
738 738 if iterrev in chaininfocache:
739 739 t = chaininfocache[iterrev]
740 740 clen += t[0]
741 741 compresseddeltalen += t[1]
742 742 break
743 743 e = index[iterrev]
744 744 else:
745 745 # Add text length of base since decompressing that also takes
746 746 # work. For cache hits the length is already included.
747 747 compresseddeltalen += e[1]
748 748 r = (clen, compresseddeltalen)
749 749 chaininfocache[rev] = r
750 750 return r
751 751
752 752 def _deltachain(self, rev, stoprev=None):
753 753 """Obtain the delta chain for a revision.
754 754
755 755 ``stoprev`` specifies a revision to stop at. If not specified, we
756 756 stop at the base of the chain.
757 757
758 758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
759 759 revs in ascending order and ``stopped`` is a bool indicating whether
760 760 ``stoprev`` was hit.
761 761 """
762 762 # Try C implementation.
763 763 try:
764 764 return self.index.deltachain(rev, stoprev, self._generaldelta)
765 765 except AttributeError:
766 766 pass
767 767
768 768 chain = []
769 769
770 770 # Alias to prevent attribute lookup in tight loop.
771 771 index = self.index
772 772 generaldelta = self._generaldelta
773 773
774 774 iterrev = rev
775 775 e = index[iterrev]
776 776 while iterrev != e[3] and iterrev != stoprev:
777 777 chain.append(iterrev)
778 778 if generaldelta:
779 779 iterrev = e[3]
780 780 else:
781 781 iterrev -= 1
782 782 e = index[iterrev]
783 783
784 784 if iterrev == stoprev:
785 785 stopped = True
786 786 else:
787 787 chain.append(iterrev)
788 788 stopped = False
789 789
790 790 chain.reverse()
791 791 return chain, stopped
792 792
793 793 def ancestors(self, revs, stoprev=0, inclusive=False):
794 794 """Generate the ancestors of 'revs' in reverse topological order.
795 795 Does not generate revs lower than stoprev.
796 796
797 797 See the documentation for ancestor.lazyancestors for more details."""
798 798
799 799 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
800 800 inclusive=inclusive)
801 801
802 802 def descendants(self, revs):
803 803 """Generate the descendants of 'revs' in revision order.
804 804
805 805 Yield a sequence of revision numbers starting with a child of
806 806 some rev in revs, i.e., each revision is *not* considered a
807 807 descendant of itself. Results are ordered by revision number (a
808 808 topological sort)."""
809 809 first = min(revs)
810 810 if first == nullrev:
811 811 for i in self:
812 812 yield i
813 813 return
814 814
815 815 seen = set(revs)
816 816 for i in self.revs(start=first + 1):
817 817 for x in self.parentrevs(i):
818 818 if x != nullrev and x in seen:
819 819 seen.add(i)
820 820 yield i
821 821 break
822 822
823 823 def findcommonmissing(self, common=None, heads=None):
824 824 """Return a tuple of the ancestors of common and the ancestors of heads
825 825 that are not ancestors of common. In revset terminology, we return the
826 826 tuple:
827 827
828 828 ::common, (::heads) - (::common)
829 829
830 830 The list is sorted by revision number, meaning it is
831 831 topologically sorted.
832 832
833 833 'heads' and 'common' are both lists of node IDs. If heads is
834 834 not supplied, uses all of the revlog's heads. If common is not
835 835 supplied, uses nullid."""
836 836 if common is None:
837 837 common = [nullid]
838 838 if heads is None:
839 839 heads = self.heads()
840 840
841 841 common = [self.rev(n) for n in common]
842 842 heads = [self.rev(n) for n in heads]
843 843
844 844 # we want the ancestors, but inclusive
845 845 class lazyset(object):
846 846 def __init__(self, lazyvalues):
847 847 self.addedvalues = set()
848 848 self.lazyvalues = lazyvalues
849 849
850 850 def __contains__(self, value):
851 851 return value in self.addedvalues or value in self.lazyvalues
852 852
853 853 def __iter__(self):
854 854 added = self.addedvalues
855 855 for r in added:
856 856 yield r
857 857 for r in self.lazyvalues:
858 858 if not r in added:
859 859 yield r
860 860
861 861 def add(self, value):
862 862 self.addedvalues.add(value)
863 863
864 864 def update(self, values):
865 865 self.addedvalues.update(values)
866 866
867 867 has = lazyset(self.ancestors(common))
868 868 has.add(nullrev)
869 869 has.update(common)
870 870
871 871 # take all ancestors from heads that aren't in has
872 872 missing = set()
873 873 visit = collections.deque(r for r in heads if r not in has)
874 874 while visit:
875 875 r = visit.popleft()
876 876 if r in missing:
877 877 continue
878 878 else:
879 879 missing.add(r)
880 880 for p in self.parentrevs(r):
881 881 if p not in has:
882 882 visit.append(p)
883 883 missing = list(missing)
884 884 missing.sort()
885 885 return has, [self.node(miss) for miss in missing]
886 886
887 887 def incrementalmissingrevs(self, common=None):
888 888 """Return an object that can be used to incrementally compute the
889 889 revision numbers of the ancestors of arbitrary sets that are not
890 890 ancestors of common. This is an ancestor.incrementalmissingancestors
891 891 object.
892 892
893 893 'common' is a list of revision numbers. If common is not supplied, uses
894 894 nullrev.
895 895 """
896 896 if common is None:
897 897 common = [nullrev]
898 898
899 899 return ancestor.incrementalmissingancestors(self.parentrevs, common)
900 900
901 901 def findmissingrevs(self, common=None, heads=None):
902 902 """Return the revision numbers of the ancestors of heads that
903 903 are not ancestors of common.
904 904
905 905 More specifically, return a list of revision numbers corresponding to
906 906 nodes N such that every N satisfies the following constraints:
907 907
908 908 1. N is an ancestor of some node in 'heads'
909 909 2. N is not an ancestor of any node in 'common'
910 910
911 911 The list is sorted by revision number, meaning it is
912 912 topologically sorted.
913 913
914 914 'heads' and 'common' are both lists of revision numbers. If heads is
915 915 not supplied, uses all of the revlog's heads. If common is not
916 916 supplied, uses nullid."""
917 917 if common is None:
918 918 common = [nullrev]
919 919 if heads is None:
920 920 heads = self.headrevs()
921 921
922 922 inc = self.incrementalmissingrevs(common=common)
923 923 return inc.missingancestors(heads)
924 924
925 925 def findmissing(self, common=None, heads=None):
926 926 """Return the ancestors of heads that are not ancestors of common.
927 927
928 928 More specifically, return a list of nodes N such that every N
929 929 satisfies the following constraints:
930 930
931 931 1. N is an ancestor of some node in 'heads'
932 932 2. N is not an ancestor of any node in 'common'
933 933
934 934 The list is sorted by revision number, meaning it is
935 935 topologically sorted.
936 936
937 937 'heads' and 'common' are both lists of node IDs. If heads is
938 938 not supplied, uses all of the revlog's heads. If common is not
939 939 supplied, uses nullid."""
940 940 if common is None:
941 941 common = [nullid]
942 942 if heads is None:
943 943 heads = self.heads()
944 944
945 945 common = [self.rev(n) for n in common]
946 946 heads = [self.rev(n) for n in heads]
947 947
948 948 inc = self.incrementalmissingrevs(common=common)
949 949 return [self.node(r) for r in inc.missingancestors(heads)]
950 950
951 951 def nodesbetween(self, roots=None, heads=None):
952 952 """Return a topological path from 'roots' to 'heads'.
953 953
954 954 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
955 955 topologically sorted list of all nodes N that satisfy both of
956 956 these constraints:
957 957
958 958 1. N is a descendant of some node in 'roots'
959 959 2. N is an ancestor of some node in 'heads'
960 960
961 961 Every node is considered to be both a descendant and an ancestor
962 962 of itself, so every reachable node in 'roots' and 'heads' will be
963 963 included in 'nodes'.
964 964
965 965 'outroots' is the list of reachable nodes in 'roots', i.e., the
966 966 subset of 'roots' that is returned in 'nodes'. Likewise,
967 967 'outheads' is the subset of 'heads' that is also in 'nodes'.
968 968
969 969 'roots' and 'heads' are both lists of node IDs. If 'roots' is
970 970 unspecified, uses nullid as the only root. If 'heads' is
971 971 unspecified, uses list of all of the revlog's heads."""
972 972 nonodes = ([], [], [])
973 973 if roots is not None:
974 974 roots = list(roots)
975 975 if not roots:
976 976 return nonodes
977 977 lowestrev = min([self.rev(n) for n in roots])
978 978 else:
979 979 roots = [nullid] # Everybody's a descendant of nullid
980 980 lowestrev = nullrev
981 981 if (lowestrev == nullrev) and (heads is None):
982 982 # We want _all_ the nodes!
983 983 return ([self.node(r) for r in self], [nullid], list(self.heads()))
984 984 if heads is None:
985 985 # All nodes are ancestors, so the latest ancestor is the last
986 986 # node.
987 987 highestrev = len(self) - 1
988 988 # Set ancestors to None to signal that every node is an ancestor.
989 989 ancestors = None
990 990 # Set heads to an empty dictionary for later discovery of heads
991 991 heads = {}
992 992 else:
993 993 heads = list(heads)
994 994 if not heads:
995 995 return nonodes
996 996 ancestors = set()
997 997 # Turn heads into a dictionary so we can remove 'fake' heads.
998 998 # Also, later we will be using it to filter out the heads we can't
999 999 # find from roots.
1000 1000 heads = dict.fromkeys(heads, False)
1001 1001 # Start at the top and keep marking parents until we're done.
1002 1002 nodestotag = set(heads)
1003 1003 # Remember where the top was so we can use it as a limit later.
1004 1004 highestrev = max([self.rev(n) for n in nodestotag])
1005 1005 while nodestotag:
1006 1006 # grab a node to tag
1007 1007 n = nodestotag.pop()
1008 1008 # Never tag nullid
1009 1009 if n == nullid:
1010 1010 continue
1011 1011 # A node's revision number represents its place in a
1012 1012 # topologically sorted list of nodes.
1013 1013 r = self.rev(n)
1014 1014 if r >= lowestrev:
1015 1015 if n not in ancestors:
1016 1016 # If we are possibly a descendant of one of the roots
1017 1017 # and we haven't already been marked as an ancestor
1018 1018 ancestors.add(n) # Mark as ancestor
1019 1019 # Add non-nullid parents to list of nodes to tag.
1020 1020 nodestotag.update([p for p in self.parents(n) if
1021 1021 p != nullid])
1022 1022 elif n in heads: # We've seen it before, is it a fake head?
1023 1023 # So it is, real heads should not be the ancestors of
1024 1024 # any other heads.
1025 1025 heads.pop(n)
1026 1026 if not ancestors:
1027 1027 return nonodes
1028 1028 # Now that we have our set of ancestors, we want to remove any
1029 1029 # roots that are not ancestors.
1030 1030
1031 1031 # If one of the roots was nullid, everything is included anyway.
1032 1032 if lowestrev > nullrev:
1033 1033 # But, since we weren't, let's recompute the lowest rev to not
1034 1034 # include roots that aren't ancestors.
1035 1035
1036 1036 # Filter out roots that aren't ancestors of heads
1037 1037 roots = [root for root in roots if root in ancestors]
1038 1038 # Recompute the lowest revision
1039 1039 if roots:
1040 1040 lowestrev = min([self.rev(root) for root in roots])
1041 1041 else:
1042 1042 # No more roots? Return empty list
1043 1043 return nonodes
1044 1044 else:
1045 1045 # We are descending from nullid, and don't need to care about
1046 1046 # any other roots.
1047 1047 lowestrev = nullrev
1048 1048 roots = [nullid]
1049 1049 # Transform our roots list into a set.
1050 1050 descendants = set(roots)
1051 1051 # Also, keep the original roots so we can filter out roots that aren't
1052 1052 # 'real' roots (i.e. are descended from other roots).
1053 1053 roots = descendants.copy()
1054 1054 # Our topologically sorted list of output nodes.
1055 1055 orderedout = []
1056 1056 # Don't start at nullid since we don't want nullid in our output list,
1057 1057 # and if nullid shows up in descendants, empty parents will look like
1058 1058 # they're descendants.
1059 1059 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1060 1060 n = self.node(r)
1061 1061 isdescendant = False
1062 1062 if lowestrev == nullrev: # Everybody is a descendant of nullid
1063 1063 isdescendant = True
1064 1064 elif n in descendants:
1065 1065 # n is already a descendant
1066 1066 isdescendant = True
1067 1067 # This check only needs to be done here because all the roots
1068 1068 # will start being marked is descendants before the loop.
1069 1069 if n in roots:
1070 1070 # If n was a root, check if it's a 'real' root.
1071 1071 p = tuple(self.parents(n))
1072 1072 # If any of its parents are descendants, it's not a root.
1073 1073 if (p[0] in descendants) or (p[1] in descendants):
1074 1074 roots.remove(n)
1075 1075 else:
1076 1076 p = tuple(self.parents(n))
1077 1077 # A node is a descendant if either of its parents are
1078 1078 # descendants. (We seeded the dependents list with the roots
1079 1079 # up there, remember?)
1080 1080 if (p[0] in descendants) or (p[1] in descendants):
1081 1081 descendants.add(n)
1082 1082 isdescendant = True
1083 1083 if isdescendant and ((ancestors is None) or (n in ancestors)):
1084 1084 # Only include nodes that are both descendants and ancestors.
1085 1085 orderedout.append(n)
1086 1086 if (ancestors is not None) and (n in heads):
1087 1087 # We're trying to figure out which heads are reachable
1088 1088 # from roots.
1089 1089 # Mark this head as having been reached
1090 1090 heads[n] = True
1091 1091 elif ancestors is None:
1092 1092 # Otherwise, we're trying to discover the heads.
1093 1093 # Assume this is a head because if it isn't, the next step
1094 1094 # will eventually remove it.
1095 1095 heads[n] = True
1096 1096 # But, obviously its parents aren't.
1097 1097 for p in self.parents(n):
1098 1098 heads.pop(p, None)
1099 1099 heads = [head for head, flag in heads.iteritems() if flag]
1100 1100 roots = list(roots)
1101 1101 assert orderedout
1102 1102 assert roots
1103 1103 assert heads
1104 1104 return (orderedout, roots, heads)
1105 1105
1106 1106 def headrevs(self):
1107 1107 try:
1108 1108 return self.index.headrevs()
1109 1109 except AttributeError:
1110 1110 return self._headrevs()
1111 1111
1112 1112 def computephases(self, roots):
1113 1113 return self.index.computephasesmapsets(roots)
1114 1114
1115 1115 def _headrevs(self):
1116 1116 count = len(self)
1117 1117 if not count:
1118 1118 return [nullrev]
1119 1119 # we won't iter over filtered rev so nobody is a head at start
1120 1120 ishead = [0] * (count + 1)
1121 1121 index = self.index
1122 1122 for r in self:
1123 1123 ishead[r] = 1 # I may be an head
1124 1124 e = index[r]
1125 1125 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1126 1126 return [r for r, val in enumerate(ishead) if val]
1127 1127
1128 1128 def heads(self, start=None, stop=None):
1129 1129 """return the list of all nodes that have no children
1130 1130
1131 1131 if start is specified, only heads that are descendants of
1132 1132 start will be returned
1133 1133 if stop is specified, it will consider all the revs from stop
1134 1134 as if they had no children
1135 1135 """
1136 1136 if start is None and stop is None:
1137 1137 if not len(self):
1138 1138 return [nullid]
1139 1139 return [self.node(r) for r in self.headrevs()]
1140 1140
1141 1141 if start is None:
1142 1142 start = nullid
1143 1143 if stop is None:
1144 1144 stop = []
1145 1145 stoprevs = set([self.rev(n) for n in stop])
1146 1146 startrev = self.rev(start)
1147 1147 reachable = {startrev}
1148 1148 heads = {startrev}
1149 1149
1150 1150 parentrevs = self.parentrevs
1151 1151 for r in self.revs(start=startrev + 1):
1152 1152 for p in parentrevs(r):
1153 1153 if p in reachable:
1154 1154 if r not in stoprevs:
1155 1155 reachable.add(r)
1156 1156 heads.add(r)
1157 1157 if p in heads and p not in stoprevs:
1158 1158 heads.remove(p)
1159 1159
1160 1160 return [self.node(r) for r in heads]
1161 1161
1162 1162 def children(self, node):
1163 1163 """find the children of a given node"""
1164 1164 c = []
1165 1165 p = self.rev(node)
1166 1166 for r in self.revs(start=p + 1):
1167 1167 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1168 1168 if prevs:
1169 1169 for pr in prevs:
1170 1170 if pr == p:
1171 1171 c.append(self.node(r))
1172 1172 elif p == nullrev:
1173 1173 c.append(self.node(r))
1174 1174 return c
1175 1175
1176 1176 def commonancestorsheads(self, a, b):
1177 1177 """calculate all the heads of the common ancestors of nodes a and b"""
1178 1178 a, b = self.rev(a), self.rev(b)
1179 1179 ancs = self._commonancestorsheads(a, b)
1180 1180 return pycompat.maplist(self.node, ancs)
1181 1181
1182 1182 def _commonancestorsheads(self, *revs):
1183 1183 """calculate all the heads of the common ancestors of revs"""
1184 1184 try:
1185 1185 ancs = self.index.commonancestorsheads(*revs)
1186 1186 except (AttributeError, OverflowError): # C implementation failed
1187 1187 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1188 1188 return ancs
1189 1189
1190 1190 def isancestor(self, a, b):
1191 1191 """return True if node a is an ancestor of node b
1192 1192
1193 1193 A revision is considered an ancestor of itself."""
1194 1194 a, b = self.rev(a), self.rev(b)
1195 1195 return self.isancestorrev(a, b)
1196 1196
1197 1197 def isancestorrev(self, a, b):
1198 1198 """return True if revision a is an ancestor of revision b
1199 1199
1200 1200 A revision is considered an ancestor of itself.
1201 1201
1202 1202 The implementation of this is trivial but the use of
1203 1203 commonancestorsheads is not."""
1204 1204 if a == nullrev:
1205 1205 return True
1206 1206 elif a == b:
1207 1207 return True
1208 1208 elif a > b:
1209 1209 return False
1210 1210 return a in self._commonancestorsheads(a, b)
1211 1211
1212 1212 def ancestor(self, a, b):
1213 1213 """calculate the "best" common ancestor of nodes a and b"""
1214 1214
1215 1215 a, b = self.rev(a), self.rev(b)
1216 1216 try:
1217 1217 ancs = self.index.ancestors(a, b)
1218 1218 except (AttributeError, OverflowError):
1219 1219 ancs = ancestor.ancestors(self.parentrevs, a, b)
1220 1220 if ancs:
1221 1221 # choose a consistent winner when there's a tie
1222 1222 return min(map(self.node, ancs))
1223 1223 return nullid
1224 1224
1225 1225 def _match(self, id):
1226 1226 if isinstance(id, int):
1227 1227 # rev
1228 1228 return self.node(id)
1229 1229 if len(id) == 20:
1230 1230 # possibly a binary node
1231 1231 # odds of a binary node being all hex in ASCII are 1 in 10**25
1232 1232 try:
1233 1233 node = id
1234 1234 self.rev(node) # quick search the index
1235 1235 return node
1236 1236 except error.LookupError:
1237 1237 pass # may be partial hex id
1238 1238 try:
1239 1239 # str(rev)
1240 1240 rev = int(id)
1241 1241 if "%d" % rev != id:
1242 1242 raise ValueError
1243 1243 if rev < 0:
1244 1244 rev = len(self) + rev
1245 1245 if rev < 0 or rev >= len(self):
1246 1246 raise ValueError
1247 1247 return self.node(rev)
1248 1248 except (ValueError, OverflowError):
1249 1249 pass
1250 1250 if len(id) == 40:
1251 1251 try:
1252 1252 # a full hex nodeid?
1253 1253 node = bin(id)
1254 1254 self.rev(node)
1255 1255 return node
1256 1256 except (TypeError, error.LookupError):
1257 1257 pass
1258 1258
1259 1259 def _partialmatch(self, id):
1260 1260 # we don't care wdirfilenodeids as they should be always full hash
1261 1261 maybewdir = wdirhex.startswith(id)
1262 1262 try:
1263 1263 partial = self.index.partialmatch(id)
1264 1264 if partial and self.hasnode(partial):
1265 1265 if maybewdir:
1266 1266 # single 'ff...' match in radix tree, ambiguous with wdir
1267 1267 raise error.RevlogError
1268 1268 return partial
1269 1269 if maybewdir:
1270 1270 # no 'ff...' match in radix tree, wdir identified
1271 1271 raise error.WdirUnsupported
1272 1272 return None
1273 1273 except error.RevlogError:
1274 1274 # parsers.c radix tree lookup gave multiple matches
1275 1275 # fast path: for unfiltered changelog, radix tree is accurate
1276 1276 if not getattr(self, 'filteredrevs', None):
1277 1277 raise error.AmbiguousPrefixLookupError(
1278 1278 id, self.indexfile, _('ambiguous identifier'))
1279 1279 # fall through to slow path that filters hidden revisions
1280 1280 except (AttributeError, ValueError):
1281 1281 # we are pure python, or key was too short to search radix tree
1282 1282 pass
1283 1283
1284 1284 if id in self._pcache:
1285 1285 return self._pcache[id]
1286 1286
1287 1287 if len(id) <= 40:
1288 1288 try:
1289 1289 # hex(node)[:...]
1290 1290 l = len(id) // 2 # grab an even number of digits
1291 1291 prefix = bin(id[:l * 2])
1292 1292 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1293 1293 nl = [n for n in nl if hex(n).startswith(id) and
1294 1294 self.hasnode(n)]
1295 1295 if nullhex.startswith(id):
1296 1296 nl.append(nullid)
1297 1297 if len(nl) > 0:
1298 1298 if len(nl) == 1 and not maybewdir:
1299 1299 self._pcache[id] = nl[0]
1300 1300 return nl[0]
1301 1301 raise error.AmbiguousPrefixLookupError(
1302 1302 id, self.indexfile, _('ambiguous identifier'))
1303 1303 if maybewdir:
1304 1304 raise error.WdirUnsupported
1305 1305 return None
1306 1306 except TypeError:
1307 1307 pass
1308 1308
1309 1309 def lookup(self, id):
1310 1310 """locate a node based on:
1311 1311 - revision number or str(revision number)
1312 1312 - nodeid or subset of hex nodeid
1313 1313 """
1314 1314 n = self._match(id)
1315 1315 if n is not None:
1316 1316 return n
1317 1317 n = self._partialmatch(id)
1318 1318 if n:
1319 1319 return n
1320 1320
1321 1321 raise error.LookupError(id, self.indexfile, _('no match found'))
1322 1322
1323 1323 def shortest(self, node, minlength=1):
1324 1324 """Find the shortest unambiguous prefix that matches node."""
1325 1325 def isvalid(prefix):
1326 1326 try:
1327 1327 node = self._partialmatch(prefix)
1328 1328 except error.RevlogError:
1329 1329 return False
1330 1330 except error.WdirUnsupported:
1331 1331 # single 'ff...' match
1332 1332 return True
1333 1333 if node is None:
1334 1334 raise error.LookupError(node, self.indexfile, _('no node'))
1335 1335 return True
1336 1336
1337 1337 def maybewdir(prefix):
1338 1338 return all(c == 'f' for c in prefix)
1339 1339
1340 1340 hexnode = hex(node)
1341 1341
1342 1342 def disambiguate(hexnode, minlength):
1343 1343 """Disambiguate against wdirid."""
1344 1344 for length in range(minlength, 41):
1345 1345 prefix = hexnode[:length]
1346 1346 if not maybewdir(prefix):
1347 1347 return prefix
1348 1348
1349 1349 if not getattr(self, 'filteredrevs', None):
1350 1350 try:
1351 1351 length = max(self.index.shortest(node), minlength)
1352 1352 return disambiguate(hexnode, length)
1353 1353 except error.RevlogError:
1354 1354 if node != wdirid:
1355 1355 raise error.LookupError(node, self.indexfile, _('no node'))
1356 1356 except AttributeError:
1357 1357 # Fall through to pure code
1358 1358 pass
1359 1359
1360 1360 if node == wdirid:
1361 1361 for length in range(minlength, 41):
1362 1362 prefix = hexnode[:length]
1363 1363 if isvalid(prefix):
1364 1364 return prefix
1365 1365
1366 1366 for length in range(minlength, 41):
1367 1367 prefix = hexnode[:length]
1368 1368 if isvalid(prefix):
1369 1369 return disambiguate(hexnode, length)
1370 1370
1371 1371 def cmp(self, node, text):
1372 1372 """compare text with a given file revision
1373 1373
1374 1374 returns True if text is different than what is stored.
1375 1375 """
1376 1376 p1, p2 = self.parents(node)
1377 1377 return hash(text, p1, p2) != node
1378 1378
1379 1379 def _cachesegment(self, offset, data):
1380 1380 """Add a segment to the revlog cache.
1381 1381
1382 1382 Accepts an absolute offset and the data that is at that location.
1383 1383 """
1384 1384 o, d = self._chunkcache
1385 1385 # try to add to existing cache
1386 1386 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1387 1387 self._chunkcache = o, d + data
1388 1388 else:
1389 1389 self._chunkcache = offset, data
1390 1390
1391 1391 def _readsegment(self, offset, length, df=None):
1392 1392 """Load a segment of raw data from the revlog.
1393 1393
1394 1394 Accepts an absolute offset, length to read, and an optional existing
1395 1395 file handle to read from.
1396 1396
1397 1397 If an existing file handle is passed, it will be seeked and the
1398 1398 original seek position will NOT be restored.
1399 1399
1400 1400 Returns a str or buffer of raw byte data.
1401 1401 """
1402 1402 # Cache data both forward and backward around the requested
1403 1403 # data, in a fixed size window. This helps speed up operations
1404 1404 # involving reading the revlog backwards.
1405 1405 cachesize = self._chunkcachesize
1406 1406 realoffset = offset & ~(cachesize - 1)
1407 1407 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1408 1408 - realoffset)
1409 1409 with self._datareadfp(df) as df:
1410 1410 df.seek(realoffset)
1411 1411 d = df.read(reallength)
1412 1412 self._cachesegment(realoffset, d)
1413 1413 if offset != realoffset or reallength != length:
1414 1414 return util.buffer(d, offset - realoffset, length)
1415 1415 return d
1416 1416
1417 1417 def _getsegment(self, offset, length, df=None):
1418 1418 """Obtain a segment of raw data from the revlog.
1419 1419
1420 1420 Accepts an absolute offset, length of bytes to obtain, and an
1421 1421 optional file handle to the already-opened revlog. If the file
1422 1422 handle is used, it's original seek position will not be preserved.
1423 1423
1424 1424 Requests for data may be returned from a cache.
1425 1425
1426 1426 Returns a str or a buffer instance of raw byte data.
1427 1427 """
1428 1428 o, d = self._chunkcache
1429 1429 l = len(d)
1430 1430
1431 1431 # is it in the cache?
1432 1432 cachestart = offset - o
1433 1433 cacheend = cachestart + length
1434 1434 if cachestart >= 0 and cacheend <= l:
1435 1435 if cachestart == 0 and cacheend == l:
1436 1436 return d # avoid a copy
1437 1437 return util.buffer(d, cachestart, cacheend - cachestart)
1438 1438
1439 1439 return self._readsegment(offset, length, df=df)
1440 1440
1441 1441 def _getsegmentforrevs(self, startrev, endrev, df=None):
1442 1442 """Obtain a segment of raw data corresponding to a range of revisions.
1443 1443
1444 1444 Accepts the start and end revisions and an optional already-open
1445 1445 file handle to be used for reading. If the file handle is read, its
1446 1446 seek position will not be preserved.
1447 1447
1448 1448 Requests for data may be satisfied by a cache.
1449 1449
1450 1450 Returns a 2-tuple of (offset, data) for the requested range of
1451 1451 revisions. Offset is the integer offset from the beginning of the
1452 1452 revlog and data is a str or buffer of the raw byte data.
1453 1453
1454 1454 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1455 1455 to determine where each revision's data begins and ends.
1456 1456 """
1457 1457 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1458 1458 # (functions are expensive).
1459 1459 index = self.index
1460 1460 istart = index[startrev]
1461 1461 start = int(istart[0] >> 16)
1462 1462 if startrev == endrev:
1463 1463 end = start + istart[1]
1464 1464 else:
1465 1465 iend = index[endrev]
1466 1466 end = int(iend[0] >> 16) + iend[1]
1467 1467
1468 1468 if self._inline:
1469 1469 start += (startrev + 1) * self._io.size
1470 1470 end += (endrev + 1) * self._io.size
1471 1471 length = end - start
1472 1472
1473 1473 return start, self._getsegment(start, length, df=df)
1474 1474
1475 1475 def _chunk(self, rev, df=None):
1476 1476 """Obtain a single decompressed chunk for a revision.
1477 1477
1478 1478 Accepts an integer revision and an optional already-open file handle
1479 1479 to be used for reading. If used, the seek position of the file will not
1480 1480 be preserved.
1481 1481
1482 1482 Returns a str holding uncompressed data for the requested revision.
1483 1483 """
1484 1484 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1485 1485
1486 1486 def _chunks(self, revs, df=None, targetsize=None):
1487 1487 """Obtain decompressed chunks for the specified revisions.
1488 1488
1489 1489 Accepts an iterable of numeric revisions that are assumed to be in
1490 1490 ascending order. Also accepts an optional already-open file handle
1491 1491 to be used for reading. If used, the seek position of the file will
1492 1492 not be preserved.
1493 1493
1494 1494 This function is similar to calling ``self._chunk()`` multiple times,
1495 1495 but is faster.
1496 1496
1497 1497 Returns a list with decompressed data for each requested revision.
1498 1498 """
1499 1499 if not revs:
1500 1500 return []
1501 1501 start = self.start
1502 1502 length = self.length
1503 1503 inline = self._inline
1504 1504 iosize = self._io.size
1505 1505 buffer = util.buffer
1506 1506
1507 1507 l = []
1508 1508 ladd = l.append
1509 1509
1510 1510 if not self._withsparseread:
1511 1511 slicedchunks = (revs,)
1512 1512 else:
1513 1513 slicedchunks = deltautil.slicechunk(self, revs,
1514 1514 targetsize=targetsize)
1515 1515
1516 1516 for revschunk in slicedchunks:
1517 1517 firstrev = revschunk[0]
1518 1518 # Skip trailing revisions with empty diff
1519 1519 for lastrev in revschunk[::-1]:
1520 1520 if length(lastrev) != 0:
1521 1521 break
1522 1522
1523 1523 try:
1524 1524 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1525 1525 except OverflowError:
1526 1526 # issue4215 - we can't cache a run of chunks greater than
1527 1527 # 2G on Windows
1528 1528 return [self._chunk(rev, df=df) for rev in revschunk]
1529 1529
1530 1530 decomp = self.decompress
1531 1531 for rev in revschunk:
1532 1532 chunkstart = start(rev)
1533 1533 if inline:
1534 1534 chunkstart += (rev + 1) * iosize
1535 1535 chunklength = length(rev)
1536 1536 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1537 1537
1538 1538 return l
1539 1539
1540 1540 def _chunkclear(self):
1541 1541 """Clear the raw chunk cache."""
1542 1542 self._chunkcache = (0, '')
1543 1543
1544 1544 def deltaparent(self, rev):
1545 1545 """return deltaparent of the given revision"""
1546 1546 base = self.index[rev][3]
1547 1547 if base == rev:
1548 1548 return nullrev
1549 1549 elif self._generaldelta:
1550 1550 return base
1551 1551 else:
1552 1552 return rev - 1
1553 1553
1554 1554 def issnapshot(self, rev):
1555 1555 """tells whether rev is a snapshot
1556 1556 """
1557 1557 if rev == nullrev:
1558 1558 return True
1559 1559 deltap = self.deltaparent(rev)
1560 1560 if deltap == nullrev:
1561 1561 return True
1562 1562 p1, p2 = self.parentrevs(rev)
1563 1563 if deltap in (p1, p2):
1564 1564 return False
1565 1565 return self.issnapshot(deltap)
1566 1566
1567 1567 def snapshotdepth(self, rev):
1568 1568 """number of snapshot in the chain before this one"""
1569 1569 if not self.issnapshot(rev):
1570 1570 raise error.ProgrammingError('revision %d not a snapshot')
1571 1571 return len(self._deltachain(rev)[0]) - 1
1572 1572
1573 1573 def revdiff(self, rev1, rev2):
1574 1574 """return or calculate a delta between two revisions
1575 1575
1576 1576 The delta calculated is in binary form and is intended to be written to
1577 1577 revlog data directly. So this function needs raw revision data.
1578 1578 """
1579 1579 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1580 1580 return bytes(self._chunk(rev2))
1581 1581
1582 1582 return mdiff.textdiff(self.revision(rev1, raw=True),
1583 1583 self.revision(rev2, raw=True))
1584 1584
1585 1585 def revision(self, nodeorrev, _df=None, raw=False):
1586 1586 """return an uncompressed revision of a given node or revision
1587 1587 number.
1588 1588
1589 1589 _df - an existing file handle to read from. (internal-only)
1590 1590 raw - an optional argument specifying if the revision data is to be
1591 1591 treated as raw data when applying flag transforms. 'raw' should be set
1592 1592 to True when generating changegroups or in debug commands.
1593 1593 """
1594 1594 if isinstance(nodeorrev, int):
1595 1595 rev = nodeorrev
1596 1596 node = self.node(rev)
1597 1597 else:
1598 1598 node = nodeorrev
1599 1599 rev = None
1600 1600
1601 1601 cachedrev = None
1602 1602 flags = None
1603 1603 rawtext = None
1604 1604 if node == nullid:
1605 1605 return ""
1606 1606 if self._cache:
1607 1607 if self._cache[0] == node:
1608 1608 # _cache only stores rawtext
1609 1609 if raw:
1610 1610 return self._cache[2]
1611 1611 # duplicated, but good for perf
1612 1612 if rev is None:
1613 1613 rev = self.rev(node)
1614 1614 if flags is None:
1615 1615 flags = self.flags(rev)
1616 1616 # no extra flags set, no flag processor runs, text = rawtext
1617 1617 if flags == REVIDX_DEFAULT_FLAGS:
1618 1618 return self._cache[2]
1619 1619 # rawtext is reusable. need to run flag processor
1620 1620 rawtext = self._cache[2]
1621 1621
1622 1622 cachedrev = self._cache[1]
1623 1623
1624 1624 # look up what we need to read
1625 1625 if rawtext is None:
1626 1626 if rev is None:
1627 1627 rev = self.rev(node)
1628 1628
1629 1629 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1630 1630 if stopped:
1631 1631 rawtext = self._cache[2]
1632 1632
1633 1633 # drop cache to save memory
1634 1634 self._cache = None
1635 1635
1636 1636 targetsize = None
1637 1637 rawsize = self.index[rev][2]
1638 1638 if 0 <= rawsize:
1639 1639 targetsize = 4 * rawsize
1640 1640
1641 1641 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1642 1642 if rawtext is None:
1643 1643 rawtext = bytes(bins[0])
1644 1644 bins = bins[1:]
1645 1645
1646 1646 rawtext = mdiff.patches(rawtext, bins)
1647 1647 self._cache = (node, rev, rawtext)
1648 1648
1649 1649 if flags is None:
1650 1650 if rev is None:
1651 1651 rev = self.rev(node)
1652 1652 flags = self.flags(rev)
1653 1653
1654 1654 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1655 1655 if validatehash:
1656 1656 self.checkhash(text, node, rev=rev)
1657 1657
1658 1658 return text
1659 1659
1660 1660 def hash(self, text, p1, p2):
1661 1661 """Compute a node hash.
1662 1662
1663 1663 Available as a function so that subclasses can replace the hash
1664 1664 as needed.
1665 1665 """
1666 1666 return hash(text, p1, p2)
1667 1667
1668 1668 def _processflags(self, text, flags, operation, raw=False):
1669 1669 """Inspect revision data flags and applies transforms defined by
1670 1670 registered flag processors.
1671 1671
1672 1672 ``text`` - the revision data to process
1673 1673 ``flags`` - the revision flags
1674 1674 ``operation`` - the operation being performed (read or write)
1675 1675 ``raw`` - an optional argument describing if the raw transform should be
1676 1676 applied.
1677 1677
1678 1678 This method processes the flags in the order (or reverse order if
1679 1679 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1680 1680 flag processors registered for present flags. The order of flags defined
1681 1681 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1682 1682
1683 1683 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1684 1684 processed text and ``validatehash`` is a bool indicating whether the
1685 1685 returned text should be checked for hash integrity.
1686 1686
1687 1687 Note: If the ``raw`` argument is set, it has precedence over the
1688 1688 operation and will only update the value of ``validatehash``.
1689 1689 """
1690 1690 # fast path: no flag processors will run
1691 1691 if flags == 0:
1692 1692 return text, True
1693 1693 if not operation in ('read', 'write'):
1694 1694 raise error.ProgrammingError(_("invalid '%s' operation") %
1695 1695 operation)
1696 1696 # Check all flags are known.
1697 1697 if flags & ~REVIDX_KNOWN_FLAGS:
1698 1698 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1699 1699 (flags & ~REVIDX_KNOWN_FLAGS))
1700 1700 validatehash = True
1701 1701 # Depending on the operation (read or write), the order might be
1702 1702 # reversed due to non-commutative transforms.
1703 1703 orderedflags = REVIDX_FLAGS_ORDER
1704 1704 if operation == 'write':
1705 1705 orderedflags = reversed(orderedflags)
1706 1706
1707 1707 for flag in orderedflags:
1708 1708 # If a flagprocessor has been registered for a known flag, apply the
1709 1709 # related operation transform and update result tuple.
1710 1710 if flag & flags:
1711 1711 vhash = True
1712 1712
1713 1713 if flag not in self._flagprocessors:
1714 1714 message = _("missing processor for flag '%#x'") % (flag)
1715 1715 raise error.RevlogError(message)
1716 1716
1717 1717 processor = self._flagprocessors[flag]
1718 1718 if processor is not None:
1719 1719 readtransform, writetransform, rawtransform = processor
1720 1720
1721 1721 if raw:
1722 1722 vhash = rawtransform(self, text)
1723 1723 elif operation == 'read':
1724 1724 text, vhash = readtransform(self, text)
1725 1725 else: # write operation
1726 1726 text, vhash = writetransform(self, text)
1727 1727 validatehash = validatehash and vhash
1728 1728
1729 1729 return text, validatehash
1730 1730
1731 1731 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1732 1732 """Check node hash integrity.
1733 1733
1734 1734 Available as a function so that subclasses can extend hash mismatch
1735 1735 behaviors as needed.
1736 1736 """
1737 1737 try:
1738 1738 if p1 is None and p2 is None:
1739 1739 p1, p2 = self.parents(node)
1740 1740 if node != self.hash(text, p1, p2):
1741 1741 revornode = rev
1742 1742 if revornode is None:
1743 1743 revornode = templatefilters.short(hex(node))
1744 1744 raise error.RevlogError(_("integrity check failed on %s:%s")
1745 1745 % (self.indexfile, pycompat.bytestr(revornode)))
1746 1746 except error.RevlogError:
1747 1747 if self._censorable and _censoredtext(text):
1748 1748 raise error.CensoredNodeError(self.indexfile, node, text)
1749 1749 raise
1750 1750
1751 1751 def _enforceinlinesize(self, tr, fp=None):
1752 1752 """Check if the revlog is too big for inline and convert if so.
1753 1753
1754 1754 This should be called after revisions are added to the revlog. If the
1755 1755 revlog has grown too large to be an inline revlog, it will convert it
1756 1756 to use multiple index and data files.
1757 1757 """
1758 1758 tiprev = len(self) - 1
1759 1759 if (not self._inline or
1760 1760 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1761 1761 return
1762 1762
1763 1763 trinfo = tr.find(self.indexfile)
1764 1764 if trinfo is None:
1765 1765 raise error.RevlogError(_("%s not found in the transaction")
1766 1766 % self.indexfile)
1767 1767
1768 1768 trindex = trinfo[2]
1769 1769 if trindex is not None:
1770 1770 dataoff = self.start(trindex)
1771 1771 else:
1772 1772 # revlog was stripped at start of transaction, use all leftover data
1773 1773 trindex = len(self) - 1
1774 1774 dataoff = self.end(tiprev)
1775 1775
1776 1776 tr.add(self.datafile, dataoff)
1777 1777
1778 1778 if fp:
1779 1779 fp.flush()
1780 1780 fp.close()
1781 1781
1782 1782 with self._datafp('w') as df:
1783 1783 for r in self:
1784 1784 df.write(self._getsegmentforrevs(r, r)[1])
1785 1785
1786 1786 with self._indexfp('w') as fp:
1787 1787 self.version &= ~FLAG_INLINE_DATA
1788 1788 self._inline = False
1789 1789 io = self._io
1790 1790 for i in self:
1791 1791 e = io.packentry(self.index[i], self.node, self.version, i)
1792 1792 fp.write(e)
1793 1793
1794 1794 # the temp file replace the real index when we exit the context
1795 1795 # manager
1796 1796
1797 1797 tr.replace(self.indexfile, trindex * self._io.size)
1798 1798 self._chunkclear()
1799 1799
1800 1800 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1801 1801 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1802 1802 """add a revision to the log
1803 1803
1804 1804 text - the revision data to add
1805 1805 transaction - the transaction object used for rollback
1806 1806 link - the linkrev data to add
1807 1807 p1, p2 - the parent nodeids of the revision
1808 1808 cachedelta - an optional precomputed delta
1809 1809 node - nodeid of revision; typically node is not specified, and it is
1810 1810 computed by default as hash(text, p1, p2), however subclasses might
1811 1811 use different hashing method (and override checkhash() in such case)
1812 1812 flags - the known flags to set on the revision
1813 1813 deltacomputer - an optional deltacomputer instance shared between
1814 1814 multiple calls
1815 1815 """
1816 1816 if link == nullrev:
1817 1817 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1818 1818 % self.indexfile)
1819 1819
1820 1820 if flags:
1821 1821 node = node or self.hash(text, p1, p2)
1822 1822
1823 1823 rawtext, validatehash = self._processflags(text, flags, 'write')
1824 1824
1825 1825 # If the flag processor modifies the revision data, ignore any provided
1826 1826 # cachedelta.
1827 1827 if rawtext != text:
1828 1828 cachedelta = None
1829 1829
1830 1830 if len(rawtext) > _maxentrysize:
1831 1831 raise error.RevlogError(
1832 1832 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1833 1833 % (self.indexfile, len(rawtext)))
1834 1834
1835 1835 node = node or self.hash(rawtext, p1, p2)
1836 1836 if node in self.nodemap:
1837 1837 return node
1838 1838
1839 1839 if validatehash:
1840 1840 self.checkhash(rawtext, node, p1=p1, p2=p2)
1841 1841
1842 1842 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1843 1843 flags, cachedelta=cachedelta,
1844 1844 deltacomputer=deltacomputer)
1845 1845
1846 1846 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1847 1847 cachedelta=None, deltacomputer=None):
1848 1848 """add a raw revision with known flags, node and parents
1849 1849 useful when reusing a revision not stored in this revlog (ex: received
1850 1850 over wire, or read from an external bundle).
1851 1851 """
1852 1852 dfh = None
1853 1853 if not self._inline:
1854 1854 dfh = self._datafp("a+")
1855 1855 ifh = self._indexfp("a+")
1856 1856 try:
1857 1857 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1858 1858 flags, cachedelta, ifh, dfh,
1859 1859 deltacomputer=deltacomputer)
1860 1860 finally:
1861 1861 if dfh:
1862 1862 dfh.close()
1863 1863 ifh.close()
1864 1864
1865 1865 def compress(self, data):
1866 1866 """Generate a possibly-compressed representation of data."""
1867 1867 if not data:
1868 1868 return '', data
1869 1869
1870 1870 compressed = self._compressor.compress(data)
1871 1871
1872 1872 if compressed:
1873 1873 # The revlog compressor added the header in the returned data.
1874 1874 return '', compressed
1875 1875
1876 1876 if data[0:1] == '\0':
1877 1877 return '', data
1878 1878 return 'u', data
1879 1879
1880 1880 def decompress(self, data):
1881 1881 """Decompress a revlog chunk.
1882 1882
1883 1883 The chunk is expected to begin with a header identifying the
1884 1884 format type so it can be routed to an appropriate decompressor.
1885 1885 """
1886 1886 if not data:
1887 1887 return data
1888 1888
1889 1889 # Revlogs are read much more frequently than they are written and many
1890 1890 # chunks only take microseconds to decompress, so performance is
1891 1891 # important here.
1892 1892 #
1893 1893 # We can make a few assumptions about revlogs:
1894 1894 #
1895 1895 # 1) the majority of chunks will be compressed (as opposed to inline
1896 1896 # raw data).
1897 1897 # 2) decompressing *any* data will likely by at least 10x slower than
1898 1898 # returning raw inline data.
1899 1899 # 3) we want to prioritize common and officially supported compression
1900 1900 # engines
1901 1901 #
1902 1902 # It follows that we want to optimize for "decompress compressed data
1903 1903 # when encoded with common and officially supported compression engines"
1904 1904 # case over "raw data" and "data encoded by less common or non-official
1905 1905 # compression engines." That is why we have the inline lookup first
1906 1906 # followed by the compengines lookup.
1907 1907 #
1908 1908 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1909 1909 # compressed chunks. And this matters for changelog and manifest reads.
1910 1910 t = data[0:1]
1911 1911
1912 1912 if t == 'x':
1913 1913 try:
1914 1914 return _zlibdecompress(data)
1915 1915 except zlib.error as e:
1916 1916 raise error.RevlogError(_('revlog decompress error: %s') %
1917 1917 stringutil.forcebytestr(e))
1918 1918 # '\0' is more common than 'u' so it goes first.
1919 1919 elif t == '\0':
1920 1920 return data
1921 1921 elif t == 'u':
1922 1922 return util.buffer(data, 1)
1923 1923
1924 1924 try:
1925 1925 compressor = self._decompressors[t]
1926 1926 except KeyError:
1927 1927 try:
1928 1928 engine = util.compengines.forrevlogheader(t)
1929 1929 compressor = engine.revlogcompressor()
1930 1930 self._decompressors[t] = compressor
1931 1931 except KeyError:
1932 1932 raise error.RevlogError(_('unknown compression type %r') % t)
1933 1933
1934 1934 return compressor.decompress(data)
1935 1935
1936 1936 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1937 1937 cachedelta, ifh, dfh, alwayscache=False,
1938 1938 deltacomputer=None):
1939 1939 """internal function to add revisions to the log
1940 1940
1941 1941 see addrevision for argument descriptions.
1942 1942
1943 1943 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1944 1944
1945 1945 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1946 1946 be used.
1947 1947
1948 1948 invariants:
1949 1949 - rawtext is optional (can be None); if not set, cachedelta must be set.
1950 1950 if both are set, they must correspond to each other.
1951 1951 """
1952 1952 if node == nullid:
1953 1953 raise error.RevlogError(_("%s: attempt to add null revision") %
1954 1954 self.indexfile)
1955 1955 if node == wdirid or node in wdirfilenodeids:
1956 1956 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1957 1957 self.indexfile)
1958 1958
1959 1959 if self._inline:
1960 1960 fh = ifh
1961 1961 else:
1962 1962 fh = dfh
1963 1963
1964 1964 btext = [rawtext]
1965 1965
1966 1966 curr = len(self)
1967 1967 prev = curr - 1
1968 1968 offset = self.end(prev)
1969 1969 p1r, p2r = self.rev(p1), self.rev(p2)
1970 1970
1971 1971 # full versions are inserted when the needed deltas
1972 1972 # become comparable to the uncompressed text
1973 1973 if rawtext is None:
1974 1974 # need rawtext size, before changed by flag processors, which is
1975 1975 # the non-raw size. use revlog explicitly to avoid filelog's extra
1976 1976 # logic that might remove metadata size.
1977 1977 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1978 1978 cachedelta[1])
1979 1979 else:
1980 1980 textlen = len(rawtext)
1981 1981
1982 1982 if deltacomputer is None:
1983 1983 deltacomputer = deltautil.deltacomputer(self)
1984 1984
1985 1985 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1986 1986
1987 1987 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1988 1988
1989 1989 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1990 1990 deltainfo.base, link, p1r, p2r, node)
1991 1991 self.index.append(e)
1992 1992 self.nodemap[node] = curr
1993 1993
1994 1994 entry = self._io.packentry(e, self.node, self.version, curr)
1995 1995 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1996 1996 link, offset)
1997 1997
1998 1998 rawtext = btext[0]
1999 1999
2000 2000 if alwayscache and rawtext is None:
2001 2001 rawtext = deltacomputer.buildtext(revinfo, fh)
2002 2002
2003 2003 if type(rawtext) == bytes: # only accept immutable objects
2004 2004 self._cache = (node, curr, rawtext)
2005 2005 self._chainbasecache[curr] = deltainfo.chainbase
2006 2006 return node
2007 2007
2008 2008 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2009 2009 # Files opened in a+ mode have inconsistent behavior on various
2010 2010 # platforms. Windows requires that a file positioning call be made
2011 2011 # when the file handle transitions between reads and writes. See
2012 2012 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2013 2013 # platforms, Python or the platform itself can be buggy. Some versions
2014 2014 # of Solaris have been observed to not append at the end of the file
2015 2015 # if the file was seeked to before the end. See issue4943 for more.
2016 2016 #
2017 2017 # We work around this issue by inserting a seek() before writing.
2018 2018 # Note: This is likely not necessary on Python 3.
2019 2019 ifh.seek(0, os.SEEK_END)
2020 2020 if dfh:
2021 2021 dfh.seek(0, os.SEEK_END)
2022 2022
2023 2023 curr = len(self) - 1
2024 2024 if not self._inline:
2025 2025 transaction.add(self.datafile, offset)
2026 2026 transaction.add(self.indexfile, curr * len(entry))
2027 2027 if data[0]:
2028 2028 dfh.write(data[0])
2029 2029 dfh.write(data[1])
2030 2030 ifh.write(entry)
2031 2031 else:
2032 2032 offset += curr * self._io.size
2033 2033 transaction.add(self.indexfile, offset, curr)
2034 2034 ifh.write(entry)
2035 2035 ifh.write(data[0])
2036 2036 ifh.write(data[1])
2037 2037 self._enforceinlinesize(transaction, ifh)
2038 2038
2039 2039 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2040 2040 """
2041 2041 add a delta group
2042 2042
2043 2043 given a set of deltas, add them to the revision log. the
2044 2044 first delta is against its parent, which should be in our
2045 2045 log, the rest are against the previous delta.
2046 2046
2047 2047 If ``addrevisioncb`` is defined, it will be called with arguments of
2048 2048 this revlog and the node that was added.
2049 2049 """
2050 2050
2051 2051 nodes = []
2052 2052
2053 2053 r = len(self)
2054 2054 end = 0
2055 2055 if r:
2056 2056 end = self.end(r - 1)
2057 2057 ifh = self._indexfp("a+")
2058 2058 isize = r * self._io.size
2059 2059 if self._inline:
2060 2060 transaction.add(self.indexfile, end + isize, r)
2061 2061 dfh = None
2062 2062 else:
2063 2063 transaction.add(self.indexfile, isize, r)
2064 2064 transaction.add(self.datafile, end)
2065 2065 dfh = self._datafp("a+")
2066 2066 def flush():
2067 2067 if dfh:
2068 2068 dfh.flush()
2069 2069 ifh.flush()
2070 2070 try:
2071 2071 deltacomputer = deltautil.deltacomputer(self)
2072 2072 # loop through our set of deltas
2073 2073 for data in deltas:
2074 2074 node, p1, p2, linknode, deltabase, delta, flags = data
2075 2075 link = linkmapper(linknode)
2076 2076 flags = flags or REVIDX_DEFAULT_FLAGS
2077 2077
2078 2078 nodes.append(node)
2079 2079
2080 2080 if node in self.nodemap:
2081 2081 # this can happen if two branches make the same change
2082 2082 continue
2083 2083
2084 2084 for p in (p1, p2):
2085 2085 if p not in self.nodemap:
2086 2086 raise error.LookupError(p, self.indexfile,
2087 2087 _('unknown parent'))
2088 2088
2089 2089 if deltabase not in self.nodemap:
2090 2090 raise error.LookupError(deltabase, self.indexfile,
2091 2091 _('unknown delta base'))
2092 2092
2093 2093 baserev = self.rev(deltabase)
2094 2094
2095 2095 if baserev != nullrev and self.iscensored(baserev):
2096 2096 # if base is censored, delta must be full replacement in a
2097 2097 # single patch operation
2098 2098 hlen = struct.calcsize(">lll")
2099 2099 oldlen = self.rawsize(baserev)
2100 2100 newlen = len(delta) - hlen
2101 2101 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2102 2102 raise error.CensoredBaseError(self.indexfile,
2103 2103 self.node(baserev))
2104 2104
2105 2105 if not flags and self._peek_iscensored(baserev, delta, flush):
2106 2106 flags |= REVIDX_ISCENSORED
2107 2107
2108 2108 # We assume consumers of addrevisioncb will want to retrieve
2109 2109 # the added revision, which will require a call to
2110 2110 # revision(). revision() will fast path if there is a cache
2111 2111 # hit. So, we tell _addrevision() to always cache in this case.
2112 2112 # We're only using addgroup() in the context of changegroup
2113 2113 # generation so the revision data can always be handled as raw
2114 2114 # by the flagprocessor.
2115 2115 self._addrevision(node, None, transaction, link,
2116 2116 p1, p2, flags, (baserev, delta),
2117 2117 ifh, dfh,
2118 2118 alwayscache=bool(addrevisioncb),
2119 2119 deltacomputer=deltacomputer)
2120 2120
2121 2121 if addrevisioncb:
2122 2122 addrevisioncb(self, node)
2123 2123
2124 2124 if not dfh and not self._inline:
2125 2125 # addrevision switched from inline to conventional
2126 2126 # reopen the index
2127 2127 ifh.close()
2128 2128 dfh = self._datafp("a+")
2129 2129 ifh = self._indexfp("a+")
2130 2130 finally:
2131 2131 if dfh:
2132 2132 dfh.close()
2133 2133 ifh.close()
2134 2134
2135 2135 return nodes
2136 2136
2137 2137 def iscensored(self, rev):
2138 2138 """Check if a file revision is censored."""
2139 2139 if not self._censorable:
2140 2140 return False
2141 2141
2142 2142 return self.flags(rev) & REVIDX_ISCENSORED
2143 2143
2144 2144 def _peek_iscensored(self, baserev, delta, flush):
2145 2145 """Quickly check if a delta produces a censored revision."""
2146 2146 if not self._censorable:
2147 2147 return False
2148 2148
2149 2149 # Fragile heuristic: unless new file meta keys are added alphabetically
2150 2150 # preceding "censored", all censored revisions are prefixed by
2151 2151 # "\1\ncensored:". A delta producing such a censored revision must be a
2152 2152 # full-replacement delta, so we inspect the first and only patch in the
2153 2153 # delta for this prefix.
2154 2154 hlen = struct.calcsize(">lll")
2155 2155 if len(delta) <= hlen:
2156 2156 return False
2157 2157
2158 2158 oldlen = self.rawsize(baserev)
2159 2159 newlen = len(delta) - hlen
2160 2160 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2161 2161 return False
2162 2162
2163 2163 add = "\1\ncensored:"
2164 2164 addlen = len(add)
2165 2165 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2166 2166
2167 2167 def getstrippoint(self, minlink):
2168 2168 """find the minimum rev that must be stripped to strip the linkrev
2169 2169
2170 2170 Returns a tuple containing the minimum rev and a set of all revs that
2171 2171 have linkrevs that will be broken by this strip.
2172 2172 """
2173 2173 brokenrevs = set()
2174 2174 strippoint = len(self)
2175 2175
2176 2176 heads = {}
2177 2177 futurelargelinkrevs = set()
2178 2178 for head in self.headrevs():
2179 2179 headlinkrev = self.linkrev(head)
2180 2180 heads[head] = headlinkrev
2181 2181 if headlinkrev >= minlink:
2182 2182 futurelargelinkrevs.add(headlinkrev)
2183 2183
2184 2184 # This algorithm involves walking down the rev graph, starting at the
2185 2185 # heads. Since the revs are topologically sorted according to linkrev,
2186 2186 # once all head linkrevs are below the minlink, we know there are
2187 2187 # no more revs that could have a linkrev greater than minlink.
2188 2188 # So we can stop walking.
2189 2189 while futurelargelinkrevs:
2190 2190 strippoint -= 1
2191 2191 linkrev = heads.pop(strippoint)
2192 2192
2193 2193 if linkrev < minlink:
2194 2194 brokenrevs.add(strippoint)
2195 2195 else:
2196 2196 futurelargelinkrevs.remove(linkrev)
2197 2197
2198 2198 for p in self.parentrevs(strippoint):
2199 2199 if p != nullrev:
2200 2200 plinkrev = self.linkrev(p)
2201 2201 heads[p] = plinkrev
2202 2202 if plinkrev >= minlink:
2203 2203 futurelargelinkrevs.add(plinkrev)
2204 2204
2205 2205 return strippoint, brokenrevs
2206 2206
2207 2207 def strip(self, minlink, transaction):
2208 2208 """truncate the revlog on the first revision with a linkrev >= minlink
2209 2209
2210 2210 This function is called when we're stripping revision minlink and
2211 2211 its descendants from the repository.
2212 2212
2213 2213 We have to remove all revisions with linkrev >= minlink, because
2214 2214 the equivalent changelog revisions will be renumbered after the
2215 2215 strip.
2216 2216
2217 2217 So we truncate the revlog on the first of these revisions, and
2218 2218 trust that the caller has saved the revisions that shouldn't be
2219 2219 removed and that it'll re-add them after this truncation.
2220 2220 """
2221 2221 if len(self) == 0:
2222 2222 return
2223 2223
2224 2224 rev, _ = self.getstrippoint(minlink)
2225 2225 if rev == len(self):
2226 2226 return
2227 2227
2228 2228 # first truncate the files on disk
2229 2229 end = self.start(rev)
2230 2230 if not self._inline:
2231 2231 transaction.add(self.datafile, end)
2232 2232 end = rev * self._io.size
2233 2233 else:
2234 2234 end += rev * self._io.size
2235 2235
2236 2236 transaction.add(self.indexfile, end)
2237 2237
2238 2238 # then reset internal state in memory to forget those revisions
2239 2239 self._cache = None
2240 2240 self._chaininfocache = {}
2241 2241 self._chunkclear()
2242 2242 for x in pycompat.xrange(rev, len(self)):
2243 2243 del self.nodemap[self.node(x)]
2244 2244
2245 2245 del self.index[rev:-1]
2246 2246 self._nodepos = None
2247 2247
2248 2248 def checksize(self):
2249 2249 expected = 0
2250 2250 if len(self):
2251 2251 expected = max(0, self.end(len(self) - 1))
2252 2252
2253 2253 try:
2254 2254 with self._datafp() as f:
2255 2255 f.seek(0, 2)
2256 2256 actual = f.tell()
2257 2257 dd = actual - expected
2258 2258 except IOError as inst:
2259 2259 if inst.errno != errno.ENOENT:
2260 2260 raise
2261 2261 dd = 0
2262 2262
2263 2263 try:
2264 2264 f = self.opener(self.indexfile)
2265 2265 f.seek(0, 2)
2266 2266 actual = f.tell()
2267 2267 f.close()
2268 2268 s = self._io.size
2269 2269 i = max(0, actual // s)
2270 2270 di = actual - (i * s)
2271 2271 if self._inline:
2272 2272 databytes = 0
2273 2273 for r in self:
2274 2274 databytes += max(0, self.length(r))
2275 2275 dd = 0
2276 2276 di = actual - len(self) * s - databytes
2277 2277 except IOError as inst:
2278 2278 if inst.errno != errno.ENOENT:
2279 2279 raise
2280 2280 di = 0
2281 2281
2282 2282 return (dd, di)
2283 2283
2284 2284 def files(self):
2285 2285 res = [self.indexfile]
2286 2286 if not self._inline:
2287 2287 res.append(self.datafile)
2288 2288 return res
2289 2289
2290 2290 def emitrevisiondeltas(self, requests):
2291 2291 frev = self.rev
2292 2292
2293 2293 prevrev = None
2294 2294 for request in requests:
2295 2295 node = request.node
2296 2296 rev = frev(node)
2297 2297
2298 2298 if prevrev is None:
2299 2299 prevrev = self.index[rev][5]
2300 2300
2301 2301 # Requesting a full revision.
2302 2302 if request.basenode == nullid:
2303 2303 baserev = nullrev
2304 2304 # Requesting an explicit revision.
2305 2305 elif request.basenode is not None:
2306 2306 baserev = frev(request.basenode)
2307 2307 # Allowing us to choose.
2308 2308 else:
2309 2309 p1rev, p2rev = self.parentrevs(rev)
2310 2310 deltaparentrev = self.deltaparent(rev)
2311 2311
2312 2312 # Avoid sending full revisions when delta parent is null. Pick
2313 2313 # prev in that case. It's tempting to pick p1 in this case, as
2314 2314 # p1 will be smaller in the common case. However, computing a
2315 2315 # delta against p1 may require resolving the raw text of p1,
2316 2316 # which could be expensive. The revlog caches should have prev
2317 2317 # cached, meaning less CPU for delta generation. There is
2318 2318 # likely room to add a flag and/or config option to control this
2319 2319 # behavior.
2320 2320 if deltaparentrev == nullrev and self._storedeltachains:
2321 2321 baserev = prevrev
2322 2322
2323 2323 # Revlog is configured to use full snapshot for a reason.
2324 2324 # Stick to full snapshot.
2325 2325 elif deltaparentrev == nullrev:
2326 2326 baserev = nullrev
2327 2327
2328 2328 # Pick previous when we can't be sure the base is available
2329 2329 # on consumer.
2330 2330 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2331 2331 baserev = prevrev
2332 2332 else:
2333 2333 baserev = deltaparentrev
2334 2334
2335 2335 if baserev != nullrev and not self.candelta(baserev, rev):
2336 2336 baserev = nullrev
2337 2337
2338 2338 revision = None
2339 2339 delta = None
2340 2340 baserevisionsize = None
2341 2341
2342 2342 if self.iscensored(baserev) or self.iscensored(rev):
2343 2343 try:
2344 2344 revision = self.revision(node, raw=True)
2345 2345 except error.CensoredNodeError as e:
2346 2346 revision = e.tombstone
2347 2347
2348 2348 if baserev != nullrev:
2349 2349 baserevisionsize = self.rawsize(baserev)
2350 2350
2351 2351 elif baserev == nullrev:
2352 2352 revision = self.revision(node, raw=True)
2353 2353 else:
2354 2354 delta = self.revdiff(baserev, rev)
2355 2355
2356 2356 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2357 2357
2358 2358 yield revlogrevisiondelta(
2359 2359 node=node,
2360 2360 p1node=request.p1node,
2361 2361 p2node=request.p2node,
2362 2362 linknode=request.linknode,
2363 2363 basenode=self.node(baserev),
2364 2364 flags=self.flags(rev) | extraflags,
2365 2365 baserevisionsize=baserevisionsize,
2366 2366 revision=revision,
2367 2367 delta=delta)
2368 2368
2369 2369 prevrev = rev
2370 2370
2371 2371 DELTAREUSEALWAYS = 'always'
2372 2372 DELTAREUSESAMEREVS = 'samerevs'
2373 2373 DELTAREUSENEVER = 'never'
2374 2374
2375 2375 DELTAREUSEFULLADD = 'fulladd'
2376 2376
2377 2377 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2378 2378
2379 2379 def clone(self, tr, destrevlog, addrevisioncb=None,
2380 2380 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2381 2381 """Copy this revlog to another, possibly with format changes.
2382 2382
2383 2383 The destination revlog will contain the same revisions and nodes.
2384 2384 However, it may not be bit-for-bit identical due to e.g. delta encoding
2385 2385 differences.
2386 2386
2387 2387 The ``deltareuse`` argument control how deltas from the existing revlog
2388 2388 are preserved in the destination revlog. The argument can have the
2389 2389 following values:
2390 2390
2391 2391 DELTAREUSEALWAYS
2392 2392 Deltas will always be reused (if possible), even if the destination
2393 2393 revlog would not select the same revisions for the delta. This is the
2394 2394 fastest mode of operation.
2395 2395 DELTAREUSESAMEREVS
2396 2396 Deltas will be reused if the destination revlog would pick the same
2397 2397 revisions for the delta. This mode strikes a balance between speed
2398 2398 and optimization.
2399 2399 DELTAREUSENEVER
2400 2400 Deltas will never be reused. This is the slowest mode of execution.
2401 2401 This mode can be used to recompute deltas (e.g. if the diff/delta
2402 2402 algorithm changes).
2403 2403
2404 2404 Delta computation can be slow, so the choice of delta reuse policy can
2405 2405 significantly affect run time.
2406 2406
2407 2407 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2408 2408 two extremes. Deltas will be reused if they are appropriate. But if the
2409 2409 delta could choose a better revision, it will do so. This means if you
2410 2410 are converting a non-generaldelta revlog to a generaldelta revlog,
2411 2411 deltas will be recomputed if the delta's parent isn't a parent of the
2412 2412 revision.
2413 2413
2414 2414 In addition to the delta policy, the ``deltabothparents`` argument
2415 2415 controls whether to compute deltas against both parents for merges.
2416 2416 By default, the current default is used.
2417 2417 """
2418 2418 if deltareuse not in self.DELTAREUSEALL:
2419 2419 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2420 2420
2421 2421 if len(destrevlog):
2422 2422 raise ValueError(_('destination revlog is not empty'))
2423 2423
2424 2424 if getattr(self, 'filteredrevs', None):
2425 2425 raise ValueError(_('source revlog has filtered revisions'))
2426 2426 if getattr(destrevlog, 'filteredrevs', None):
2427 2427 raise ValueError(_('destination revlog has filtered revisions'))
2428 2428
2429 2429 # lazydeltabase controls whether to reuse a cached delta, if possible.
2430 2430 oldlazydeltabase = destrevlog._lazydeltabase
2431 2431 oldamd = destrevlog._deltabothparents
2432 2432
2433 2433 try:
2434 2434 if deltareuse == self.DELTAREUSEALWAYS:
2435 2435 destrevlog._lazydeltabase = True
2436 2436 elif deltareuse == self.DELTAREUSESAMEREVS:
2437 2437 destrevlog._lazydeltabase = False
2438 2438
2439 2439 destrevlog._deltabothparents = deltabothparents or oldamd
2440 2440
2441 2441 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2442 2442 self.DELTAREUSESAMEREVS)
2443 2443
2444 2444 deltacomputer = deltautil.deltacomputer(destrevlog)
2445 2445 index = self.index
2446 2446 for rev in self:
2447 2447 entry = index[rev]
2448 2448
2449 2449 # Some classes override linkrev to take filtered revs into
2450 2450 # account. Use raw entry from index.
2451 2451 flags = entry[0] & 0xffff
2452 2452 linkrev = entry[4]
2453 2453 p1 = index[entry[5]][7]
2454 2454 p2 = index[entry[6]][7]
2455 2455 node = entry[7]
2456 2456
2457 2457 # (Possibly) reuse the delta from the revlog if allowed and
2458 2458 # the revlog chunk is a delta.
2459 2459 cachedelta = None
2460 2460 rawtext = None
2461 2461 if populatecachedelta:
2462 2462 dp = self.deltaparent(rev)
2463 2463 if dp != nullrev:
2464 2464 cachedelta = (dp, bytes(self._chunk(rev)))
2465 2465
2466 2466 if not cachedelta:
2467 2467 rawtext = self.revision(rev, raw=True)
2468 2468
2469 2469
2470 2470 if deltareuse == self.DELTAREUSEFULLADD:
2471 2471 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2472 2472 cachedelta=cachedelta,
2473 2473 node=node, flags=flags,
2474 2474 deltacomputer=deltacomputer)
2475 2475 else:
2476 2476 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2477 2477 checkambig=False)
2478 2478 dfh = None
2479 2479 if not destrevlog._inline:
2480 2480 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2481 2481 try:
2482 2482 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2483 2483 p2, flags, cachedelta, ifh, dfh,
2484 2484 deltacomputer=deltacomputer)
2485 2485 finally:
2486 2486 if dfh:
2487 2487 dfh.close()
2488 2488 ifh.close()
2489 2489
2490 2490 if addrevisioncb:
2491 2491 addrevisioncb(self, rev, node)
2492 2492 finally:
2493 2493 destrevlog._lazydeltabase = oldlazydeltabase
2494 2494 destrevlog._deltabothparents = oldamd
2495
2496 def censorrevision(self, node, tombstone=b''):
2497 if (self.version & 0xFFFF) == REVLOGV0:
2498 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2499 self.version)
2500
2501 rev = self.rev(node)
2502 tombstone = packmeta({b'censored': tombstone}, b'')
2503
2504 if len(tombstone) > self.rawsize(rev):
2505 raise error.Abort(_('censor tombstone must be no longer than '
2506 'censored data'))
2507
2508 # Using two files instead of one makes it easy to rewrite entry-by-entry
2509 idxread = self.opener(self.indexfile, 'r')
2510 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2511 if self.version & FLAG_INLINE_DATA:
2512 dataread, datawrite = idxread, idxwrite
2513 else:
2514 dataread = self.opener(self.datafile, 'r')
2515 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2516
2517 # Copy all revlog data up to the entry to be censored.
2518 offset = self.start(rev)
2519
2520 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2521 idxwrite.write(chunk)
2522 for chunk in util.filechunkiter(dataread, limit=offset):
2523 datawrite.write(chunk)
2524
2525 def rewriteindex(r, newoffs, newdata=None):
2526 """Rewrite the index entry with a new data offset and new data.
2527
2528 The newdata argument, if given, is a tuple of three positive
2529 integers: (new compressed, new uncompressed, added flag bits).
2530 """
2531 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2532 flags = gettype(offlags)
2533 if newdata:
2534 comp, uncomp, nflags = newdata
2535 flags |= nflags
2536 offlags = offset_type(newoffs, flags)
2537 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2538 idxwrite.write(self._io.packentry(e, None, self.version, r))
2539 idxread.seek(self._io.size, 1)
2540
2541 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2542 """Write the given fulltext with the given data offset.
2543
2544 Returns:
2545 The integer number of data bytes written, for tracking data
2546 offsets.
2547 """
2548 flag, compdata = self.compress(data)
2549 newcomp = len(flag) + len(compdata)
2550 rewriteindex(r, offs, (newcomp, len(data), nflags))
2551 datawrite.write(flag)
2552 datawrite.write(compdata)
2553 dataread.seek(self.length(r), 1)
2554 return newcomp
2555
2556 # Rewrite censored entry with (padded) tombstone data.
2557 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2558 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2559
2560 # Rewrite all following filelog revisions fixing up offsets and deltas.
2561 for srev in pycompat.xrange(rev + 1, len(self)):
2562 if rev in self.parentrevs(srev):
2563 # Immediate children of censored node must be re-added as
2564 # fulltext.
2565 try:
2566 revdata = self.revision(srev)
2567 except error.CensoredNodeError as e:
2568 revdata = e.tombstone
2569 dlen = rewrite(srev, offset, revdata)
2570 else:
2571 # Copy any other revision data verbatim after fixing up the
2572 # offset.
2573 rewriteindex(srev, offset)
2574 dlen = self.length(srev)
2575 for chunk in util.filechunkiter(dataread, limit=dlen):
2576 datawrite.write(chunk)
2577 offset += dlen
2578
2579 idxread.close()
2580 idxwrite.close()
2581 if dataread is not idxread:
2582 dataread.close()
2583 datawrite.close()
General Comments 0
You need to be logged in to leave comments. Login now