##// END OF EJS Templates
revlog: move censor logic out of censor extension...
Gregory Szorc -
r39814:a6b3c4c1 default
parent child Browse files
Show More
@@ -1,187 +1,99 b''
1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
2 #
2 #
3 # This extension enables removal of file content at a given revision,
3 # This extension enables removal of file content at a given revision,
4 # rewriting the data/metadata of successive revisions to preserve revision log
4 # rewriting the data/metadata of successive revisions to preserve revision log
5 # integrity.
5 # integrity.
6
6
7 """erase file content at a given revision
7 """erase file content at a given revision
8
8
9 The censor command instructs Mercurial to erase all content of a file at a given
9 The censor command instructs Mercurial to erase all content of a file at a given
10 revision *without updating the changeset hash.* This allows existing history to
10 revision *without updating the changeset hash.* This allows existing history to
11 remain valid while preventing future clones/pulls from receiving the erased
11 remain valid while preventing future clones/pulls from receiving the erased
12 data.
12 data.
13
13
14 Typical uses for censor are due to security or legal requirements, including::
14 Typical uses for censor are due to security or legal requirements, including::
15
15
16 * Passwords, private keys, cryptographic material
16 * Passwords, private keys, cryptographic material
17 * Licensed data/code/libraries for which the license has expired
17 * Licensed data/code/libraries for which the license has expired
18 * Personally Identifiable Information or other private data
18 * Personally Identifiable Information or other private data
19
19
20 Censored nodes can interrupt mercurial's typical operation whenever the excised
20 Censored nodes can interrupt mercurial's typical operation whenever the excised
21 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
21 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
22 simply fail when asked to produce censored data. Others, like ``hg verify`` and
22 simply fail when asked to produce censored data. Others, like ``hg verify`` and
23 ``hg update``, must be capable of tolerating censored data to continue to
23 ``hg update``, must be capable of tolerating censored data to continue to
24 function in a meaningful way. Such commands only tolerate censored file
24 function in a meaningful way. Such commands only tolerate censored file
25 revisions if they are allowed by the "censor.policy=ignore" config option.
25 revisions if they are allowed by the "censor.policy=ignore" config option.
26 """
26 """
27
27
28 from __future__ import absolute_import
28 from __future__ import absolute_import
29
29
30 from mercurial.i18n import _
30 from mercurial.i18n import _
31 from mercurial.node import short
31 from mercurial.node import short
32
32
33 from mercurial import (
33 from mercurial import (
34 error,
34 error,
35 pycompat,
36 registrar,
35 registrar,
37 revlog,
38 scmutil,
36 scmutil,
39 util,
40 )
37 )
41
38
42 cmdtable = {}
39 cmdtable = {}
43 command = registrar.command(cmdtable)
40 command = registrar.command(cmdtable)
44 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
41 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
45 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
42 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
46 # be specifying the version(s) of Mercurial they are tested with, or
43 # be specifying the version(s) of Mercurial they are tested with, or
47 # leave the attribute unspecified.
44 # leave the attribute unspecified.
48 testedwith = 'ships-with-hg-core'
45 testedwith = 'ships-with-hg-core'
49
46
50 @command('censor',
47 @command('censor',
51 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
48 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
52 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
49 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
53 _('-r REV [-t TEXT] [FILE]'))
50 _('-r REV [-t TEXT] [FILE]'))
54 def censor(ui, repo, path, rev='', tombstone='', **opts):
51 def censor(ui, repo, path, rev='', tombstone='', **opts):
55 with repo.wlock(), repo.lock():
52 with repo.wlock(), repo.lock():
56 return _docensor(ui, repo, path, rev, tombstone, **opts)
53 return _docensor(ui, repo, path, rev, tombstone, **opts)
57
54
58 def _docensor(ui, repo, path, rev='', tombstone='', **opts):
55 def _docensor(ui, repo, path, rev='', tombstone='', **opts):
59 if not path:
56 if not path:
60 raise error.Abort(_('must specify file path to censor'))
57 raise error.Abort(_('must specify file path to censor'))
61 if not rev:
58 if not rev:
62 raise error.Abort(_('must specify revision to censor'))
59 raise error.Abort(_('must specify revision to censor'))
63
60
64 wctx = repo[None]
61 wctx = repo[None]
65
62
66 m = scmutil.match(wctx, (path,))
63 m = scmutil.match(wctx, (path,))
67 if m.anypats() or len(m.files()) != 1:
64 if m.anypats() or len(m.files()) != 1:
68 raise error.Abort(_('can only specify an explicit filename'))
65 raise error.Abort(_('can only specify an explicit filename'))
69 path = m.files()[0]
66 path = m.files()[0]
70 flog = repo.file(path)
67 flog = repo.file(path)
71 if not len(flog):
68 if not len(flog):
72 raise error.Abort(_('cannot censor file with no history'))
69 raise error.Abort(_('cannot censor file with no history'))
73
70
74 rev = scmutil.revsingle(repo, rev, rev).rev()
71 rev = scmutil.revsingle(repo, rev, rev).rev()
75 try:
72 try:
76 ctx = repo[rev]
73 ctx = repo[rev]
77 except KeyError:
74 except KeyError:
78 raise error.Abort(_('invalid revision identifier %s') % rev)
75 raise error.Abort(_('invalid revision identifier %s') % rev)
79
76
80 try:
77 try:
81 fctx = ctx.filectx(path)
78 fctx = ctx.filectx(path)
82 except error.LookupError:
79 except error.LookupError:
83 raise error.Abort(_('file does not exist at revision %s') % rev)
80 raise error.Abort(_('file does not exist at revision %s') % rev)
84
81
85 fnode = fctx.filenode()
82 fnode = fctx.filenode()
86 heads = []
83 heads = []
87 for headnode in repo.heads():
84 for headnode in repo.heads():
88 hc = repo[headnode]
85 hc = repo[headnode]
89 if path in hc and hc.filenode(path) == fnode:
86 if path in hc and hc.filenode(path) == fnode:
90 heads.append(hc)
87 heads.append(hc)
91 if heads:
88 if heads:
92 headlist = ', '.join([short(c.node()) for c in heads])
89 headlist = ', '.join([short(c.node()) for c in heads])
93 raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
90 raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
94 hint=_('clean/delete and commit first'))
91 hint=_('clean/delete and commit first'))
95
92
96 wp = wctx.parents()
93 wp = wctx.parents()
97 if ctx.node() in [p.node() for p in wp]:
94 if ctx.node() in [p.node() for p in wp]:
98 raise error.Abort(_('cannot censor working directory'),
95 raise error.Abort(_('cannot censor working directory'),
99 hint=_('clean/delete/update first'))
96 hint=_('clean/delete/update first'))
100
97
101 flogv = flog.version & 0xFFFF
98 with repo.transaction(b'censor') as tr:
102 if flogv != revlog.REVLOGV1:
99 flog.censorrevision(tr, fnode, tombstone=tombstone)
103 raise error.Abort(
104 _('censor does not support revlog version %d') % (flogv,))
105
106 tombstone = revlog.packmeta({"censored": tombstone}, "")
107
108 crev = fctx.filerev()
109
110 if len(tombstone) > flog.rawsize(crev):
111 raise error.Abort(_(
112 'censor tombstone must be no longer than censored data'))
113
114 # Using two files instead of one makes it easy to rewrite entry-by-entry
115 idxread = repo.svfs(flog.indexfile, 'r')
116 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
117 if flog.version & revlog.FLAG_INLINE_DATA:
118 dataread, datawrite = idxread, idxwrite
119 else:
120 dataread = repo.svfs(flog.datafile, 'r')
121 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
122
123 # Copy all revlog data up to the entry to be censored.
124 rio = revlog.revlogio()
125 offset = flog.start(crev)
126
127 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
128 idxwrite.write(chunk)
129 for chunk in util.filechunkiter(dataread, limit=offset):
130 datawrite.write(chunk)
131
132 def rewriteindex(r, newoffs, newdata=None):
133 """Rewrite the index entry with a new data offset and optional new data.
134
135 The newdata argument, if given, is a tuple of three positive integers:
136 (new compressed, new uncompressed, added flag bits).
137 """
138 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
139 flags = revlog.gettype(offlags)
140 if newdata:
141 comp, uncomp, nflags = newdata
142 flags |= nflags
143 offlags = revlog.offset_type(newoffs, flags)
144 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
145 idxwrite.write(rio.packentry(e, None, flog.version, r))
146 idxread.seek(rio.size, 1)
147
148 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
149 """Write the given full text to the filelog with the given data offset.
150
151 Returns:
152 The integer number of data bytes written, for tracking data offsets.
153 """
154 flag, compdata = flog.compress(data)
155 newcomp = len(flag) + len(compdata)
156 rewriteindex(r, offs, (newcomp, len(data), nflags))
157 datawrite.write(flag)
158 datawrite.write(compdata)
159 dataread.seek(flog.length(r), 1)
160 return newcomp
161
162 # Rewrite censored revlog entry with (padded) tombstone data.
163 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
164 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
165
166 # Rewrite all following filelog revisions fixing up offsets and deltas.
167 for srev in pycompat.xrange(crev + 1, len(flog)):
168 if crev in flog.parentrevs(srev):
169 # Immediate children of censored node must be re-added as fulltext.
170 try:
171 revdata = flog.revision(srev)
172 except error.CensoredNodeError as e:
173 revdata = e.tombstone
174 dlen = rewrite(srev, offset, revdata)
175 else:
176 # Copy any other revision data verbatim after fixing up the offset.
177 rewriteindex(srev, offset)
178 dlen = flog.length(srev)
179 for chunk in util.filechunkiter(dataread, limit=dlen):
180 datawrite.write(chunk)
181 offset += dlen
182
183 idxread.close()
184 idxwrite.close()
185 if dataread is not idxread:
186 dataread.close()
187 datawrite.close()
@@ -1,278 +1,281 b''
1 # filelog.py - file history class for mercurial
1 # filelog.py - file history class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from . import (
10 from . import (
11 error,
11 error,
12 repository,
12 repository,
13 revlog,
13 revlog,
14 )
14 )
15 from .utils import (
15 from .utils import (
16 interfaceutil,
16 interfaceutil,
17 )
17 )
18
18
19 @interfaceutil.implementer(repository.ifilestorage)
19 @interfaceutil.implementer(repository.ifilestorage)
20 class filelog(object):
20 class filelog(object):
21 def __init__(self, opener, path):
21 def __init__(self, opener, path):
22 self._revlog = revlog.revlog(opener,
22 self._revlog = revlog.revlog(opener,
23 '/'.join(('data', path + '.i')),
23 '/'.join(('data', path + '.i')),
24 censorable=True)
24 censorable=True)
25 # full name of the user visible file, relative to the repository root
25 # full name of the user visible file, relative to the repository root
26 self.filename = path
26 self.filename = path
27 self.index = self._revlog.index
27 self.index = self._revlog.index
28 self.version = self._revlog.version
28 self.version = self._revlog.version
29 self._generaldelta = self._revlog._generaldelta
29 self._generaldelta = self._revlog._generaldelta
30
30
31 def __len__(self):
31 def __len__(self):
32 return len(self._revlog)
32 return len(self._revlog)
33
33
34 def __iter__(self):
34 def __iter__(self):
35 return self._revlog.__iter__()
35 return self._revlog.__iter__()
36
36
37 def revs(self, start=0, stop=None):
37 def revs(self, start=0, stop=None):
38 return self._revlog.revs(start=start, stop=stop)
38 return self._revlog.revs(start=start, stop=stop)
39
39
40 def parents(self, node):
40 def parents(self, node):
41 return self._revlog.parents(node)
41 return self._revlog.parents(node)
42
42
43 def parentrevs(self, rev):
43 def parentrevs(self, rev):
44 return self._revlog.parentrevs(rev)
44 return self._revlog.parentrevs(rev)
45
45
46 def rev(self, node):
46 def rev(self, node):
47 return self._revlog.rev(node)
47 return self._revlog.rev(node)
48
48
49 def node(self, rev):
49 def node(self, rev):
50 return self._revlog.node(rev)
50 return self._revlog.node(rev)
51
51
52 def lookup(self, node):
52 def lookup(self, node):
53 return self._revlog.lookup(node)
53 return self._revlog.lookup(node)
54
54
55 def linkrev(self, rev):
55 def linkrev(self, rev):
56 return self._revlog.linkrev(rev)
56 return self._revlog.linkrev(rev)
57
57
58 def flags(self, rev):
58 def flags(self, rev):
59 return self._revlog.flags(rev)
59 return self._revlog.flags(rev)
60
60
61 def commonancestorsheads(self, node1, node2):
61 def commonancestorsheads(self, node1, node2):
62 return self._revlog.commonancestorsheads(node1, node2)
62 return self._revlog.commonancestorsheads(node1, node2)
63
63
64 def descendants(self, revs):
64 def descendants(self, revs):
65 return self._revlog.descendants(revs)
65 return self._revlog.descendants(revs)
66
66
67 def headrevs(self):
67 def headrevs(self):
68 return self._revlog.headrevs()
68 return self._revlog.headrevs()
69
69
70 def heads(self, start=None, stop=None):
70 def heads(self, start=None, stop=None):
71 return self._revlog.heads(start, stop)
71 return self._revlog.heads(start, stop)
72
72
73 def children(self, node):
73 def children(self, node):
74 return self._revlog.children(node)
74 return self._revlog.children(node)
75
75
76 def deltaparent(self, rev):
76 def deltaparent(self, rev):
77 return self._revlog.deltaparent(rev)
77 return self._revlog.deltaparent(rev)
78
78
79 def iscensored(self, rev):
79 def iscensored(self, rev):
80 return self._revlog.iscensored(rev)
80 return self._revlog.iscensored(rev)
81
81
82 def rawsize(self, rev):
82 def rawsize(self, rev):
83 return self._revlog.rawsize(rev)
83 return self._revlog.rawsize(rev)
84
84
85 def checkhash(self, text, node, p1=None, p2=None, rev=None):
85 def checkhash(self, text, node, p1=None, p2=None, rev=None):
86 return self._revlog.checkhash(text, node, p1=p1, p2=p2, rev=rev)
86 return self._revlog.checkhash(text, node, p1=p1, p2=p2, rev=rev)
87
87
88 def revision(self, node, _df=None, raw=False):
88 def revision(self, node, _df=None, raw=False):
89 return self._revlog.revision(node, _df=_df, raw=raw)
89 return self._revlog.revision(node, _df=_df, raw=raw)
90
90
91 def revdiff(self, rev1, rev2):
91 def revdiff(self, rev1, rev2):
92 return self._revlog.revdiff(rev1, rev2)
92 return self._revlog.revdiff(rev1, rev2)
93
93
94 def emitrevisiondeltas(self, requests):
94 def emitrevisiondeltas(self, requests):
95 return self._revlog.emitrevisiondeltas(requests)
95 return self._revlog.emitrevisiondeltas(requests)
96
96
97 def addrevision(self, revisiondata, transaction, linkrev, p1, p2,
97 def addrevision(self, revisiondata, transaction, linkrev, p1, p2,
98 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
98 node=None, flags=revlog.REVIDX_DEFAULT_FLAGS,
99 cachedelta=None):
99 cachedelta=None):
100 return self._revlog.addrevision(revisiondata, transaction, linkrev,
100 return self._revlog.addrevision(revisiondata, transaction, linkrev,
101 p1, p2, node=node, flags=flags,
101 p1, p2, node=node, flags=flags,
102 cachedelta=cachedelta)
102 cachedelta=cachedelta)
103
103
104 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
104 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
105 return self._revlog.addgroup(deltas, linkmapper, transaction,
105 return self._revlog.addgroup(deltas, linkmapper, transaction,
106 addrevisioncb=addrevisioncb)
106 addrevisioncb=addrevisioncb)
107
107
108 def getstrippoint(self, minlink):
108 def getstrippoint(self, minlink):
109 return self._revlog.getstrippoint(minlink)
109 return self._revlog.getstrippoint(minlink)
110
110
111 def strip(self, minlink, transaction):
111 def strip(self, minlink, transaction):
112 return self._revlog.strip(minlink, transaction)
112 return self._revlog.strip(minlink, transaction)
113
113
114 def censorrevision(self, tr, node, tombstone=b''):
115 return self._revlog.censorrevision(node, tombstone=tombstone)
116
114 def files(self):
117 def files(self):
115 return self._revlog.files()
118 return self._revlog.files()
116
119
117 def checksize(self):
120 def checksize(self):
118 return self._revlog.checksize()
121 return self._revlog.checksize()
119
122
120 def read(self, node):
123 def read(self, node):
121 t = self.revision(node)
124 t = self.revision(node)
122 if not t.startswith('\1\n'):
125 if not t.startswith('\1\n'):
123 return t
126 return t
124 s = t.index('\1\n', 2)
127 s = t.index('\1\n', 2)
125 return t[s + 2:]
128 return t[s + 2:]
126
129
127 def add(self, text, meta, transaction, link, p1=None, p2=None):
130 def add(self, text, meta, transaction, link, p1=None, p2=None):
128 if meta or text.startswith('\1\n'):
131 if meta or text.startswith('\1\n'):
129 text = revlog.packmeta(meta, text)
132 text = revlog.packmeta(meta, text)
130 return self.addrevision(text, transaction, link, p1, p2)
133 return self.addrevision(text, transaction, link, p1, p2)
131
134
132 def renamed(self, node):
135 def renamed(self, node):
133 if self.parents(node)[0] != revlog.nullid:
136 if self.parents(node)[0] != revlog.nullid:
134 return False
137 return False
135 t = self.revision(node)
138 t = self.revision(node)
136 m = revlog.parsemeta(t)[0]
139 m = revlog.parsemeta(t)[0]
137 # copy and copyrev occur in pairs. In rare cases due to bugs,
140 # copy and copyrev occur in pairs. In rare cases due to bugs,
138 # one can occur without the other.
141 # one can occur without the other.
139 if m and "copy" in m and "copyrev" in m:
142 if m and "copy" in m and "copyrev" in m:
140 return (m["copy"], revlog.bin(m["copyrev"]))
143 return (m["copy"], revlog.bin(m["copyrev"]))
141 return False
144 return False
142
145
143 def size(self, rev):
146 def size(self, rev):
144 """return the size of a given revision"""
147 """return the size of a given revision"""
145
148
146 # for revisions with renames, we have to go the slow way
149 # for revisions with renames, we have to go the slow way
147 node = self.node(rev)
150 node = self.node(rev)
148 if self.renamed(node):
151 if self.renamed(node):
149 return len(self.read(node))
152 return len(self.read(node))
150 if self.iscensored(rev):
153 if self.iscensored(rev):
151 return 0
154 return 0
152
155
153 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
156 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
154 return self._revlog.size(rev)
157 return self._revlog.size(rev)
155
158
156 def cmp(self, node, text):
159 def cmp(self, node, text):
157 """compare text with a given file revision
160 """compare text with a given file revision
158
161
159 returns True if text is different than what is stored.
162 returns True if text is different than what is stored.
160 """
163 """
161
164
162 t = text
165 t = text
163 if text.startswith('\1\n'):
166 if text.startswith('\1\n'):
164 t = '\1\n\1\n' + text
167 t = '\1\n\1\n' + text
165
168
166 samehashes = not self._revlog.cmp(node, t)
169 samehashes = not self._revlog.cmp(node, t)
167 if samehashes:
170 if samehashes:
168 return False
171 return False
169
172
170 # censored files compare against the empty file
173 # censored files compare against the empty file
171 if self.iscensored(self.rev(node)):
174 if self.iscensored(self.rev(node)):
172 return text != ''
175 return text != ''
173
176
174 # renaming a file produces a different hash, even if the data
177 # renaming a file produces a different hash, even if the data
175 # remains unchanged. Check if it's the case (slow):
178 # remains unchanged. Check if it's the case (slow):
176 if self.renamed(node):
179 if self.renamed(node):
177 t2 = self.read(node)
180 t2 = self.read(node)
178 return t2 != text
181 return t2 != text
179
182
180 return True
183 return True
181
184
182 @property
185 @property
183 def filename(self):
186 def filename(self):
184 return self._revlog.filename
187 return self._revlog.filename
185
188
186 @filename.setter
189 @filename.setter
187 def filename(self, value):
190 def filename(self, value):
188 self._revlog.filename = value
191 self._revlog.filename = value
189
192
190 # TODO these aren't part of the interface and aren't internal methods.
193 # TODO these aren't part of the interface and aren't internal methods.
191 # Callers should be fixed to not use them.
194 # Callers should be fixed to not use them.
192 @property
195 @property
193 def indexfile(self):
196 def indexfile(self):
194 return self._revlog.indexfile
197 return self._revlog.indexfile
195
198
196 @indexfile.setter
199 @indexfile.setter
197 def indexfile(self, value):
200 def indexfile(self, value):
198 self._revlog.indexfile = value
201 self._revlog.indexfile = value
199
202
200 @property
203 @property
201 def datafile(self):
204 def datafile(self):
202 return self._revlog.datafile
205 return self._revlog.datafile
203
206
204 @property
207 @property
205 def opener(self):
208 def opener(self):
206 return self._revlog.opener
209 return self._revlog.opener
207
210
208 def clone(self, tr, destrevlog, **kwargs):
211 def clone(self, tr, destrevlog, **kwargs):
209 if not isinstance(destrevlog, filelog):
212 if not isinstance(destrevlog, filelog):
210 raise error.ProgrammingError('expected filelog to clone()')
213 raise error.ProgrammingError('expected filelog to clone()')
211
214
212 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
215 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
213
216
214 def start(self, rev):
217 def start(self, rev):
215 return self._revlog.start(rev)
218 return self._revlog.start(rev)
216
219
217 def end(self, rev):
220 def end(self, rev):
218 return self._revlog.end(rev)
221 return self._revlog.end(rev)
219
222
220 def length(self, rev):
223 def length(self, rev):
221 return self._revlog.length(rev)
224 return self._revlog.length(rev)
222
225
223 def compress(self, data):
226 def compress(self, data):
224 return self._revlog.compress(data)
227 return self._revlog.compress(data)
225
228
226 def _addrevision(self, *args, **kwargs):
229 def _addrevision(self, *args, **kwargs):
227 return self._revlog._addrevision(*args, **kwargs)
230 return self._revlog._addrevision(*args, **kwargs)
228
231
229 class narrowfilelog(filelog):
232 class narrowfilelog(filelog):
230 """Filelog variation to be used with narrow stores."""
233 """Filelog variation to be used with narrow stores."""
231
234
232 def __init__(self, opener, path, narrowmatch):
235 def __init__(self, opener, path, narrowmatch):
233 super(narrowfilelog, self).__init__(opener, path)
236 super(narrowfilelog, self).__init__(opener, path)
234 self._narrowmatch = narrowmatch
237 self._narrowmatch = narrowmatch
235
238
236 def renamed(self, node):
239 def renamed(self, node):
237 res = super(narrowfilelog, self).renamed(node)
240 res = super(narrowfilelog, self).renamed(node)
238
241
239 # Renames that come from outside the narrowspec are problematic
242 # Renames that come from outside the narrowspec are problematic
240 # because we may lack the base text for the rename. This can result
243 # because we may lack the base text for the rename. This can result
241 # in code attempting to walk the ancestry or compute a diff
244 # in code attempting to walk the ancestry or compute a diff
242 # encountering a missing revision. We address this by silently
245 # encountering a missing revision. We address this by silently
243 # removing rename metadata if the source file is outside the
246 # removing rename metadata if the source file is outside the
244 # narrow spec.
247 # narrow spec.
245 #
248 #
246 # A better solution would be to see if the base revision is available,
249 # A better solution would be to see if the base revision is available,
247 # rather than assuming it isn't.
250 # rather than assuming it isn't.
248 #
251 #
249 # An even better solution would be to teach all consumers of rename
252 # An even better solution would be to teach all consumers of rename
250 # metadata that the base revision may not be available.
253 # metadata that the base revision may not be available.
251 #
254 #
252 # TODO consider better ways of doing this.
255 # TODO consider better ways of doing this.
253 if res and not self._narrowmatch(res[0]):
256 if res and not self._narrowmatch(res[0]):
254 return None
257 return None
255
258
256 return res
259 return res
257
260
258 def size(self, rev):
261 def size(self, rev):
259 # Because we have a custom renamed() that may lie, we need to call
262 # Because we have a custom renamed() that may lie, we need to call
260 # the base renamed() to report accurate results.
263 # the base renamed() to report accurate results.
261 node = self.node(rev)
264 node = self.node(rev)
262 if super(narrowfilelog, self).renamed(node):
265 if super(narrowfilelog, self).renamed(node):
263 return len(self.read(node))
266 return len(self.read(node))
264 else:
267 else:
265 return super(narrowfilelog, self).size(rev)
268 return super(narrowfilelog, self).size(rev)
266
269
267 def cmp(self, node, text):
270 def cmp(self, node, text):
268 different = super(narrowfilelog, self).cmp(node, text)
271 different = super(narrowfilelog, self).cmp(node, text)
269
272
270 # Because renamed() may lie, we may get false positives for
273 # Because renamed() may lie, we may get false positives for
271 # different content. Check for this by comparing against the original
274 # different content. Check for this by comparing against the original
272 # renamed() implementation.
275 # renamed() implementation.
273 if different:
276 if different:
274 if super(narrowfilelog, self).renamed(node):
277 if super(narrowfilelog, self).renamed(node):
275 t2 = self.read(node)
278 t2 = self.read(node)
276 return t2 != text
279 return t2 != text
277
280
278 return different
281 return different
@@ -1,1585 +1,1602 b''
1 # repository.py - Interfaces and base classes for repositories and peers.
1 # repository.py - Interfaces and base classes for repositories and peers.
2 #
2 #
3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from . import (
11 from . import (
12 error,
12 error,
13 )
13 )
14 from .utils import (
14 from .utils import (
15 interfaceutil,
15 interfaceutil,
16 )
16 )
17
17
18 # When narrowing is finalized and no longer subject to format changes,
18 # When narrowing is finalized and no longer subject to format changes,
19 # we should move this to just "narrow" or similar.
19 # we should move this to just "narrow" or similar.
20 NARROW_REQUIREMENT = 'narrowhg-experimental'
20 NARROW_REQUIREMENT = 'narrowhg-experimental'
21
21
22 class ipeerconnection(interfaceutil.Interface):
22 class ipeerconnection(interfaceutil.Interface):
23 """Represents a "connection" to a repository.
23 """Represents a "connection" to a repository.
24
24
25 This is the base interface for representing a connection to a repository.
25 This is the base interface for representing a connection to a repository.
26 It holds basic properties and methods applicable to all peer types.
26 It holds basic properties and methods applicable to all peer types.
27
27
28 This is not a complete interface definition and should not be used
28 This is not a complete interface definition and should not be used
29 outside of this module.
29 outside of this module.
30 """
30 """
31 ui = interfaceutil.Attribute("""ui.ui instance""")
31 ui = interfaceutil.Attribute("""ui.ui instance""")
32
32
33 def url():
33 def url():
34 """Returns a URL string representing this peer.
34 """Returns a URL string representing this peer.
35
35
36 Currently, implementations expose the raw URL used to construct the
36 Currently, implementations expose the raw URL used to construct the
37 instance. It may contain credentials as part of the URL. The
37 instance. It may contain credentials as part of the URL. The
38 expectations of the value aren't well-defined and this could lead to
38 expectations of the value aren't well-defined and this could lead to
39 data leakage.
39 data leakage.
40
40
41 TODO audit/clean consumers and more clearly define the contents of this
41 TODO audit/clean consumers and more clearly define the contents of this
42 value.
42 value.
43 """
43 """
44
44
45 def local():
45 def local():
46 """Returns a local repository instance.
46 """Returns a local repository instance.
47
47
48 If the peer represents a local repository, returns an object that
48 If the peer represents a local repository, returns an object that
49 can be used to interface with it. Otherwise returns ``None``.
49 can be used to interface with it. Otherwise returns ``None``.
50 """
50 """
51
51
52 def peer():
52 def peer():
53 """Returns an object conforming to this interface.
53 """Returns an object conforming to this interface.
54
54
55 Most implementations will ``return self``.
55 Most implementations will ``return self``.
56 """
56 """
57
57
58 def canpush():
58 def canpush():
59 """Returns a boolean indicating if this peer can be pushed to."""
59 """Returns a boolean indicating if this peer can be pushed to."""
60
60
61 def close():
61 def close():
62 """Close the connection to this peer.
62 """Close the connection to this peer.
63
63
64 This is called when the peer will no longer be used. Resources
64 This is called when the peer will no longer be used. Resources
65 associated with the peer should be cleaned up.
65 associated with the peer should be cleaned up.
66 """
66 """
67
67
68 class ipeercapabilities(interfaceutil.Interface):
68 class ipeercapabilities(interfaceutil.Interface):
69 """Peer sub-interface related to capabilities."""
69 """Peer sub-interface related to capabilities."""
70
70
71 def capable(name):
71 def capable(name):
72 """Determine support for a named capability.
72 """Determine support for a named capability.
73
73
74 Returns ``False`` if capability not supported.
74 Returns ``False`` if capability not supported.
75
75
76 Returns ``True`` if boolean capability is supported. Returns a string
76 Returns ``True`` if boolean capability is supported. Returns a string
77 if capability support is non-boolean.
77 if capability support is non-boolean.
78
78
79 Capability strings may or may not map to wire protocol capabilities.
79 Capability strings may or may not map to wire protocol capabilities.
80 """
80 """
81
81
82 def requirecap(name, purpose):
82 def requirecap(name, purpose):
83 """Require a capability to be present.
83 """Require a capability to be present.
84
84
85 Raises a ``CapabilityError`` if the capability isn't present.
85 Raises a ``CapabilityError`` if the capability isn't present.
86 """
86 """
87
87
88 class ipeercommands(interfaceutil.Interface):
88 class ipeercommands(interfaceutil.Interface):
89 """Client-side interface for communicating over the wire protocol.
89 """Client-side interface for communicating over the wire protocol.
90
90
91 This interface is used as a gateway to the Mercurial wire protocol.
91 This interface is used as a gateway to the Mercurial wire protocol.
92 methods commonly call wire protocol commands of the same name.
92 methods commonly call wire protocol commands of the same name.
93 """
93 """
94
94
95 def branchmap():
95 def branchmap():
96 """Obtain heads in named branches.
96 """Obtain heads in named branches.
97
97
98 Returns a dict mapping branch name to an iterable of nodes that are
98 Returns a dict mapping branch name to an iterable of nodes that are
99 heads on that branch.
99 heads on that branch.
100 """
100 """
101
101
102 def capabilities():
102 def capabilities():
103 """Obtain capabilities of the peer.
103 """Obtain capabilities of the peer.
104
104
105 Returns a set of string capabilities.
105 Returns a set of string capabilities.
106 """
106 """
107
107
108 def clonebundles():
108 def clonebundles():
109 """Obtains the clone bundles manifest for the repo.
109 """Obtains the clone bundles manifest for the repo.
110
110
111 Returns the manifest as unparsed bytes.
111 Returns the manifest as unparsed bytes.
112 """
112 """
113
113
114 def debugwireargs(one, two, three=None, four=None, five=None):
114 def debugwireargs(one, two, three=None, four=None, five=None):
115 """Used to facilitate debugging of arguments passed over the wire."""
115 """Used to facilitate debugging of arguments passed over the wire."""
116
116
117 def getbundle(source, **kwargs):
117 def getbundle(source, **kwargs):
118 """Obtain remote repository data as a bundle.
118 """Obtain remote repository data as a bundle.
119
119
120 This command is how the bulk of repository data is transferred from
120 This command is how the bulk of repository data is transferred from
121 the peer to the local repository
121 the peer to the local repository
122
122
123 Returns a generator of bundle data.
123 Returns a generator of bundle data.
124 """
124 """
125
125
126 def heads():
126 def heads():
127 """Determine all known head revisions in the peer.
127 """Determine all known head revisions in the peer.
128
128
129 Returns an iterable of binary nodes.
129 Returns an iterable of binary nodes.
130 """
130 """
131
131
132 def known(nodes):
132 def known(nodes):
133 """Determine whether multiple nodes are known.
133 """Determine whether multiple nodes are known.
134
134
135 Accepts an iterable of nodes whose presence to check for.
135 Accepts an iterable of nodes whose presence to check for.
136
136
137 Returns an iterable of booleans indicating of the corresponding node
137 Returns an iterable of booleans indicating of the corresponding node
138 at that index is known to the peer.
138 at that index is known to the peer.
139 """
139 """
140
140
141 def listkeys(namespace):
141 def listkeys(namespace):
142 """Obtain all keys in a pushkey namespace.
142 """Obtain all keys in a pushkey namespace.
143
143
144 Returns an iterable of key names.
144 Returns an iterable of key names.
145 """
145 """
146
146
147 def lookup(key):
147 def lookup(key):
148 """Resolve a value to a known revision.
148 """Resolve a value to a known revision.
149
149
150 Returns a binary node of the resolved revision on success.
150 Returns a binary node of the resolved revision on success.
151 """
151 """
152
152
153 def pushkey(namespace, key, old, new):
153 def pushkey(namespace, key, old, new):
154 """Set a value using the ``pushkey`` protocol.
154 """Set a value using the ``pushkey`` protocol.
155
155
156 Arguments correspond to the pushkey namespace and key to operate on and
156 Arguments correspond to the pushkey namespace and key to operate on and
157 the old and new values for that key.
157 the old and new values for that key.
158
158
159 Returns a string with the peer result. The value inside varies by the
159 Returns a string with the peer result. The value inside varies by the
160 namespace.
160 namespace.
161 """
161 """
162
162
163 def stream_out():
163 def stream_out():
164 """Obtain streaming clone data.
164 """Obtain streaming clone data.
165
165
166 Successful result should be a generator of data chunks.
166 Successful result should be a generator of data chunks.
167 """
167 """
168
168
169 def unbundle(bundle, heads, url):
169 def unbundle(bundle, heads, url):
170 """Transfer repository data to the peer.
170 """Transfer repository data to the peer.
171
171
172 This is how the bulk of data during a push is transferred.
172 This is how the bulk of data during a push is transferred.
173
173
174 Returns the integer number of heads added to the peer.
174 Returns the integer number of heads added to the peer.
175 """
175 """
176
176
177 class ipeerlegacycommands(interfaceutil.Interface):
177 class ipeerlegacycommands(interfaceutil.Interface):
178 """Interface for implementing support for legacy wire protocol commands.
178 """Interface for implementing support for legacy wire protocol commands.
179
179
180 Wire protocol commands transition to legacy status when they are no longer
180 Wire protocol commands transition to legacy status when they are no longer
181 used by modern clients. To facilitate identifying which commands are
181 used by modern clients. To facilitate identifying which commands are
182 legacy, the interfaces are split.
182 legacy, the interfaces are split.
183 """
183 """
184
184
185 def between(pairs):
185 def between(pairs):
186 """Obtain nodes between pairs of nodes.
186 """Obtain nodes between pairs of nodes.
187
187
188 ``pairs`` is an iterable of node pairs.
188 ``pairs`` is an iterable of node pairs.
189
189
190 Returns an iterable of iterables of nodes corresponding to each
190 Returns an iterable of iterables of nodes corresponding to each
191 requested pair.
191 requested pair.
192 """
192 """
193
193
194 def branches(nodes):
194 def branches(nodes):
195 """Obtain ancestor changesets of specific nodes back to a branch point.
195 """Obtain ancestor changesets of specific nodes back to a branch point.
196
196
197 For each requested node, the peer finds the first ancestor node that is
197 For each requested node, the peer finds the first ancestor node that is
198 a DAG root or is a merge.
198 a DAG root or is a merge.
199
199
200 Returns an iterable of iterables with the resolved values for each node.
200 Returns an iterable of iterables with the resolved values for each node.
201 """
201 """
202
202
203 def changegroup(nodes, source):
203 def changegroup(nodes, source):
204 """Obtain a changegroup with data for descendants of specified nodes."""
204 """Obtain a changegroup with data for descendants of specified nodes."""
205
205
206 def changegroupsubset(bases, heads, source):
206 def changegroupsubset(bases, heads, source):
207 pass
207 pass
208
208
209 class ipeercommandexecutor(interfaceutil.Interface):
209 class ipeercommandexecutor(interfaceutil.Interface):
210 """Represents a mechanism to execute remote commands.
210 """Represents a mechanism to execute remote commands.
211
211
212 This is the primary interface for requesting that wire protocol commands
212 This is the primary interface for requesting that wire protocol commands
213 be executed. Instances of this interface are active in a context manager
213 be executed. Instances of this interface are active in a context manager
214 and have a well-defined lifetime. When the context manager exits, all
214 and have a well-defined lifetime. When the context manager exits, all
215 outstanding requests are waited on.
215 outstanding requests are waited on.
216 """
216 """
217
217
218 def callcommand(name, args):
218 def callcommand(name, args):
219 """Request that a named command be executed.
219 """Request that a named command be executed.
220
220
221 Receives the command name and a dictionary of command arguments.
221 Receives the command name and a dictionary of command arguments.
222
222
223 Returns a ``concurrent.futures.Future`` that will resolve to the
223 Returns a ``concurrent.futures.Future`` that will resolve to the
224 result of that command request. That exact value is left up to
224 result of that command request. That exact value is left up to
225 the implementation and possibly varies by command.
225 the implementation and possibly varies by command.
226
226
227 Not all commands can coexist with other commands in an executor
227 Not all commands can coexist with other commands in an executor
228 instance: it depends on the underlying wire protocol transport being
228 instance: it depends on the underlying wire protocol transport being
229 used and the command itself.
229 used and the command itself.
230
230
231 Implementations MAY call ``sendcommands()`` automatically if the
231 Implementations MAY call ``sendcommands()`` automatically if the
232 requested command can not coexist with other commands in this executor.
232 requested command can not coexist with other commands in this executor.
233
233
234 Implementations MAY call ``sendcommands()`` automatically when the
234 Implementations MAY call ``sendcommands()`` automatically when the
235 future's ``result()`` is called. So, consumers using multiple
235 future's ``result()`` is called. So, consumers using multiple
236 commands with an executor MUST ensure that ``result()`` is not called
236 commands with an executor MUST ensure that ``result()`` is not called
237 until all command requests have been issued.
237 until all command requests have been issued.
238 """
238 """
239
239
240 def sendcommands():
240 def sendcommands():
241 """Trigger submission of queued command requests.
241 """Trigger submission of queued command requests.
242
242
243 Not all transports submit commands as soon as they are requested to
243 Not all transports submit commands as soon as they are requested to
244 run. When called, this method forces queued command requests to be
244 run. When called, this method forces queued command requests to be
245 issued. It will no-op if all commands have already been sent.
245 issued. It will no-op if all commands have already been sent.
246
246
247 When called, no more new commands may be issued with this executor.
247 When called, no more new commands may be issued with this executor.
248 """
248 """
249
249
250 def close():
250 def close():
251 """Signal that this command request is finished.
251 """Signal that this command request is finished.
252
252
253 When called, no more new commands may be issued. All outstanding
253 When called, no more new commands may be issued. All outstanding
254 commands that have previously been issued are waited on before
254 commands that have previously been issued are waited on before
255 returning. This not only includes waiting for the futures to resolve,
255 returning. This not only includes waiting for the futures to resolve,
256 but also waiting for all response data to arrive. In other words,
256 but also waiting for all response data to arrive. In other words,
257 calling this waits for all on-wire state for issued command requests
257 calling this waits for all on-wire state for issued command requests
258 to finish.
258 to finish.
259
259
260 When used as a context manager, this method is called when exiting the
260 When used as a context manager, this method is called when exiting the
261 context manager.
261 context manager.
262
262
263 This method may call ``sendcommands()`` if there are buffered commands.
263 This method may call ``sendcommands()`` if there are buffered commands.
264 """
264 """
265
265
266 class ipeerrequests(interfaceutil.Interface):
266 class ipeerrequests(interfaceutil.Interface):
267 """Interface for executing commands on a peer."""
267 """Interface for executing commands on a peer."""
268
268
269 def commandexecutor():
269 def commandexecutor():
270 """A context manager that resolves to an ipeercommandexecutor.
270 """A context manager that resolves to an ipeercommandexecutor.
271
271
272 The object this resolves to can be used to issue command requests
272 The object this resolves to can be used to issue command requests
273 to the peer.
273 to the peer.
274
274
275 Callers should call its ``callcommand`` method to issue command
275 Callers should call its ``callcommand`` method to issue command
276 requests.
276 requests.
277
277
278 A new executor should be obtained for each distinct set of commands
278 A new executor should be obtained for each distinct set of commands
279 (possibly just a single command) that the consumer wants to execute
279 (possibly just a single command) that the consumer wants to execute
280 as part of a single operation or round trip. This is because some
280 as part of a single operation or round trip. This is because some
281 peers are half-duplex and/or don't support persistent connections.
281 peers are half-duplex and/or don't support persistent connections.
282 e.g. in the case of HTTP peers, commands sent to an executor represent
282 e.g. in the case of HTTP peers, commands sent to an executor represent
283 a single HTTP request. While some peers may support multiple command
283 a single HTTP request. While some peers may support multiple command
284 sends over the wire per executor, consumers need to code to the least
284 sends over the wire per executor, consumers need to code to the least
285 capable peer. So it should be assumed that command executors buffer
285 capable peer. So it should be assumed that command executors buffer
286 called commands until they are told to send them and that each
286 called commands until they are told to send them and that each
287 command executor could result in a new connection or wire-level request
287 command executor could result in a new connection or wire-level request
288 being issued.
288 being issued.
289 """
289 """
290
290
291 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
291 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
292 """Unified interface for peer repositories.
292 """Unified interface for peer repositories.
293
293
294 All peer instances must conform to this interface.
294 All peer instances must conform to this interface.
295 """
295 """
296
296
297 @interfaceutil.implementer(ipeerbase)
297 @interfaceutil.implementer(ipeerbase)
298 class peer(object):
298 class peer(object):
299 """Base class for peer repositories."""
299 """Base class for peer repositories."""
300
300
301 def capable(self, name):
301 def capable(self, name):
302 caps = self.capabilities()
302 caps = self.capabilities()
303 if name in caps:
303 if name in caps:
304 return True
304 return True
305
305
306 name = '%s=' % name
306 name = '%s=' % name
307 for cap in caps:
307 for cap in caps:
308 if cap.startswith(name):
308 if cap.startswith(name):
309 return cap[len(name):]
309 return cap[len(name):]
310
310
311 return False
311 return False
312
312
313 def requirecap(self, name, purpose):
313 def requirecap(self, name, purpose):
314 if self.capable(name):
314 if self.capable(name):
315 return
315 return
316
316
317 raise error.CapabilityError(
317 raise error.CapabilityError(
318 _('cannot %s; remote repository does not support the %r '
318 _('cannot %s; remote repository does not support the %r '
319 'capability') % (purpose, name))
319 'capability') % (purpose, name))
320
320
321 class irevisiondelta(interfaceutil.Interface):
321 class irevisiondelta(interfaceutil.Interface):
322 """Represents a delta between one revision and another.
322 """Represents a delta between one revision and another.
323
323
324 Instances convey enough information to allow a revision to be exchanged
324 Instances convey enough information to allow a revision to be exchanged
325 with another repository.
325 with another repository.
326
326
327 Instances represent the fulltext revision data or a delta against
327 Instances represent the fulltext revision data or a delta against
328 another revision. Therefore the ``revision`` and ``delta`` attributes
328 another revision. Therefore the ``revision`` and ``delta`` attributes
329 are mutually exclusive.
329 are mutually exclusive.
330
330
331 Typically used for changegroup generation.
331 Typically used for changegroup generation.
332 """
332 """
333
333
334 node = interfaceutil.Attribute(
334 node = interfaceutil.Attribute(
335 """20 byte node of this revision.""")
335 """20 byte node of this revision.""")
336
336
337 p1node = interfaceutil.Attribute(
337 p1node = interfaceutil.Attribute(
338 """20 byte node of 1st parent of this revision.""")
338 """20 byte node of 1st parent of this revision.""")
339
339
340 p2node = interfaceutil.Attribute(
340 p2node = interfaceutil.Attribute(
341 """20 byte node of 2nd parent of this revision.""")
341 """20 byte node of 2nd parent of this revision.""")
342
342
343 linknode = interfaceutil.Attribute(
343 linknode = interfaceutil.Attribute(
344 """20 byte node of the changelog revision this node is linked to.""")
344 """20 byte node of the changelog revision this node is linked to.""")
345
345
346 flags = interfaceutil.Attribute(
346 flags = interfaceutil.Attribute(
347 """2 bytes of integer flags that apply to this revision.""")
347 """2 bytes of integer flags that apply to this revision.""")
348
348
349 basenode = interfaceutil.Attribute(
349 basenode = interfaceutil.Attribute(
350 """20 byte node of the revision this data is a delta against.
350 """20 byte node of the revision this data is a delta against.
351
351
352 ``nullid`` indicates that the revision is a full revision and not
352 ``nullid`` indicates that the revision is a full revision and not
353 a delta.
353 a delta.
354 """)
354 """)
355
355
356 baserevisionsize = interfaceutil.Attribute(
356 baserevisionsize = interfaceutil.Attribute(
357 """Size of base revision this delta is against.
357 """Size of base revision this delta is against.
358
358
359 May be ``None`` if ``basenode`` is ``nullid``.
359 May be ``None`` if ``basenode`` is ``nullid``.
360 """)
360 """)
361
361
362 revision = interfaceutil.Attribute(
362 revision = interfaceutil.Attribute(
363 """Raw fulltext of revision data for this node.""")
363 """Raw fulltext of revision data for this node.""")
364
364
365 delta = interfaceutil.Attribute(
365 delta = interfaceutil.Attribute(
366 """Delta between ``basenode`` and ``node``.
366 """Delta between ``basenode`` and ``node``.
367
367
368 Stored in the bdiff delta format.
368 Stored in the bdiff delta format.
369 """)
369 """)
370
370
371 class irevisiondeltarequest(interfaceutil.Interface):
371 class irevisiondeltarequest(interfaceutil.Interface):
372 """Represents a request to generate an ``irevisiondelta``."""
372 """Represents a request to generate an ``irevisiondelta``."""
373
373
374 node = interfaceutil.Attribute(
374 node = interfaceutil.Attribute(
375 """20 byte node of revision being requested.""")
375 """20 byte node of revision being requested.""")
376
376
377 p1node = interfaceutil.Attribute(
377 p1node = interfaceutil.Attribute(
378 """20 byte node of 1st parent of revision.""")
378 """20 byte node of 1st parent of revision.""")
379
379
380 p2node = interfaceutil.Attribute(
380 p2node = interfaceutil.Attribute(
381 """20 byte node of 2nd parent of revision.""")
381 """20 byte node of 2nd parent of revision.""")
382
382
383 linknode = interfaceutil.Attribute(
383 linknode = interfaceutil.Attribute(
384 """20 byte node to store in ``linknode`` attribute.""")
384 """20 byte node to store in ``linknode`` attribute.""")
385
385
386 basenode = interfaceutil.Attribute(
386 basenode = interfaceutil.Attribute(
387 """Base revision that delta should be generated against.
387 """Base revision that delta should be generated against.
388
388
389 If ``nullid``, the derived ``irevisiondelta`` should have its
389 If ``nullid``, the derived ``irevisiondelta`` should have its
390 ``revision`` field populated and no delta should be generated.
390 ``revision`` field populated and no delta should be generated.
391
391
392 If ``None``, the delta may be generated against any revision that
392 If ``None``, the delta may be generated against any revision that
393 is an ancestor of this revision. Or a full revision may be used.
393 is an ancestor of this revision. Or a full revision may be used.
394
394
395 If any other value, the delta should be produced against that
395 If any other value, the delta should be produced against that
396 revision.
396 revision.
397 """)
397 """)
398
398
399 ellipsis = interfaceutil.Attribute(
399 ellipsis = interfaceutil.Attribute(
400 """Boolean on whether the ellipsis flag should be set.""")
400 """Boolean on whether the ellipsis flag should be set.""")
401
401
402 class ifilerevisionssequence(interfaceutil.Interface):
402 class ifilerevisionssequence(interfaceutil.Interface):
403 """Contains index data for all revisions of a file.
403 """Contains index data for all revisions of a file.
404
404
405 Types implementing this behave like lists of tuples. The index
405 Types implementing this behave like lists of tuples. The index
406 in the list corresponds to the revision number. The values contain
406 in the list corresponds to the revision number. The values contain
407 index metadata.
407 index metadata.
408
408
409 The *null* revision (revision number -1) is always the last item
409 The *null* revision (revision number -1) is always the last item
410 in the index.
410 in the index.
411 """
411 """
412
412
413 def __len__():
413 def __len__():
414 """The total number of revisions."""
414 """The total number of revisions."""
415
415
416 def __getitem__(rev):
416 def __getitem__(rev):
417 """Returns the object having a specific revision number.
417 """Returns the object having a specific revision number.
418
418
419 Returns an 8-tuple with the following fields:
419 Returns an 8-tuple with the following fields:
420
420
421 offset+flags
421 offset+flags
422 Contains the offset and flags for the revision. 64-bit unsigned
422 Contains the offset and flags for the revision. 64-bit unsigned
423 integer where first 6 bytes are the offset and the next 2 bytes
423 integer where first 6 bytes are the offset and the next 2 bytes
424 are flags. The offset can be 0 if it is not used by the store.
424 are flags. The offset can be 0 if it is not used by the store.
425 compressed size
425 compressed size
426 Size of the revision data in the store. It can be 0 if it isn't
426 Size of the revision data in the store. It can be 0 if it isn't
427 needed by the store.
427 needed by the store.
428 uncompressed size
428 uncompressed size
429 Fulltext size. It can be 0 if it isn't needed by the store.
429 Fulltext size. It can be 0 if it isn't needed by the store.
430 base revision
430 base revision
431 Revision number of revision the delta for storage is encoded
431 Revision number of revision the delta for storage is encoded
432 against. -1 indicates not encoded against a base revision.
432 against. -1 indicates not encoded against a base revision.
433 link revision
433 link revision
434 Revision number of changelog revision this entry is related to.
434 Revision number of changelog revision this entry is related to.
435 p1 revision
435 p1 revision
436 Revision number of 1st parent. -1 if no 1st parent.
436 Revision number of 1st parent. -1 if no 1st parent.
437 p2 revision
437 p2 revision
438 Revision number of 2nd parent. -1 if no 1st parent.
438 Revision number of 2nd parent. -1 if no 1st parent.
439 node
439 node
440 Binary node value for this revision number.
440 Binary node value for this revision number.
441
441
442 Negative values should index off the end of the sequence. ``-1``
442 Negative values should index off the end of the sequence. ``-1``
443 should return the null revision. ``-2`` should return the most
443 should return the null revision. ``-2`` should return the most
444 recent revision.
444 recent revision.
445 """
445 """
446
446
447 def __contains__(rev):
447 def __contains__(rev):
448 """Whether a revision number exists."""
448 """Whether a revision number exists."""
449
449
450 def insert(self, i, entry):
450 def insert(self, i, entry):
451 """Add an item to the index at specific revision."""
451 """Add an item to the index at specific revision."""
452
452
453 class ifileindex(interfaceutil.Interface):
453 class ifileindex(interfaceutil.Interface):
454 """Storage interface for index data of a single file.
454 """Storage interface for index data of a single file.
455
455
456 File storage data is divided into index metadata and data storage.
456 File storage data is divided into index metadata and data storage.
457 This interface defines the index portion of the interface.
457 This interface defines the index portion of the interface.
458
458
459 The index logically consists of:
459 The index logically consists of:
460
460
461 * A mapping between revision numbers and nodes.
461 * A mapping between revision numbers and nodes.
462 * DAG data (storing and querying the relationship between nodes).
462 * DAG data (storing and querying the relationship between nodes).
463 * Metadata to facilitate storage.
463 * Metadata to facilitate storage.
464 """
464 """
465 index = interfaceutil.Attribute(
465 index = interfaceutil.Attribute(
466 """An ``ifilerevisionssequence`` instance.""")
466 """An ``ifilerevisionssequence`` instance.""")
467
467
468 def __len__():
468 def __len__():
469 """Obtain the number of revisions stored for this file."""
469 """Obtain the number of revisions stored for this file."""
470
470
471 def __iter__():
471 def __iter__():
472 """Iterate over revision numbers for this file."""
472 """Iterate over revision numbers for this file."""
473
473
474 def revs(start=0, stop=None):
474 def revs(start=0, stop=None):
475 """Iterate over revision numbers for this file, with control."""
475 """Iterate over revision numbers for this file, with control."""
476
476
477 def parents(node):
477 def parents(node):
478 """Returns a 2-tuple of parent nodes for a revision.
478 """Returns a 2-tuple of parent nodes for a revision.
479
479
480 Values will be ``nullid`` if the parent is empty.
480 Values will be ``nullid`` if the parent is empty.
481 """
481 """
482
482
483 def parentrevs(rev):
483 def parentrevs(rev):
484 """Like parents() but operates on revision numbers."""
484 """Like parents() but operates on revision numbers."""
485
485
486 def rev(node):
486 def rev(node):
487 """Obtain the revision number given a node.
487 """Obtain the revision number given a node.
488
488
489 Raises ``error.LookupError`` if the node is not known.
489 Raises ``error.LookupError`` if the node is not known.
490 """
490 """
491
491
492 def node(rev):
492 def node(rev):
493 """Obtain the node value given a revision number.
493 """Obtain the node value given a revision number.
494
494
495 Raises ``IndexError`` if the node is not known.
495 Raises ``IndexError`` if the node is not known.
496 """
496 """
497
497
498 def lookup(node):
498 def lookup(node):
499 """Attempt to resolve a value to a node.
499 """Attempt to resolve a value to a node.
500
500
501 Value can be a binary node, hex node, revision number, or a string
501 Value can be a binary node, hex node, revision number, or a string
502 that can be converted to an integer.
502 that can be converted to an integer.
503
503
504 Raises ``error.LookupError`` if a node could not be resolved.
504 Raises ``error.LookupError`` if a node could not be resolved.
505 """
505 """
506
506
507 def linkrev(rev):
507 def linkrev(rev):
508 """Obtain the changeset revision number a revision is linked to."""
508 """Obtain the changeset revision number a revision is linked to."""
509
509
510 def flags(rev):
510 def flags(rev):
511 """Obtain flags used to affect storage of a revision."""
511 """Obtain flags used to affect storage of a revision."""
512
512
513 def iscensored(rev):
513 def iscensored(rev):
514 """Return whether a revision's content has been censored."""
514 """Return whether a revision's content has been censored."""
515
515
516 def commonancestorsheads(node1, node2):
516 def commonancestorsheads(node1, node2):
517 """Obtain an iterable of nodes containing heads of common ancestors.
517 """Obtain an iterable of nodes containing heads of common ancestors.
518
518
519 See ``ancestor.commonancestorsheads()``.
519 See ``ancestor.commonancestorsheads()``.
520 """
520 """
521
521
522 def descendants(revs):
522 def descendants(revs):
523 """Obtain descendant revision numbers for a set of revision numbers.
523 """Obtain descendant revision numbers for a set of revision numbers.
524
524
525 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
525 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
526 """
526 """
527
527
528 def headrevs():
528 def headrevs():
529 """Obtain a list of revision numbers that are DAG heads.
529 """Obtain a list of revision numbers that are DAG heads.
530
530
531 The list is sorted oldest to newest.
531 The list is sorted oldest to newest.
532
532
533 TODO determine if sorting is required.
533 TODO determine if sorting is required.
534 """
534 """
535
535
536 def heads(start=None, stop=None):
536 def heads(start=None, stop=None):
537 """Obtain a list of nodes that are DAG heads, with control.
537 """Obtain a list of nodes that are DAG heads, with control.
538
538
539 The set of revisions examined can be limited by specifying
539 The set of revisions examined can be limited by specifying
540 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
540 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
541 iterable of nodes. DAG traversal starts at earlier revision
541 iterable of nodes. DAG traversal starts at earlier revision
542 ``start`` and iterates forward until any node in ``stop`` is
542 ``start`` and iterates forward until any node in ``stop`` is
543 encountered.
543 encountered.
544 """
544 """
545
545
546 def children(node):
546 def children(node):
547 """Obtain nodes that are children of a node.
547 """Obtain nodes that are children of a node.
548
548
549 Returns a list of nodes.
549 Returns a list of nodes.
550 """
550 """
551
551
552 def deltaparent(rev):
552 def deltaparent(rev):
553 """"Return the revision that is a suitable parent to delta against."""
553 """"Return the revision that is a suitable parent to delta against."""
554
554
555 class ifiledata(interfaceutil.Interface):
555 class ifiledata(interfaceutil.Interface):
556 """Storage interface for data storage of a specific file.
556 """Storage interface for data storage of a specific file.
557
557
558 This complements ``ifileindex`` and provides an interface for accessing
558 This complements ``ifileindex`` and provides an interface for accessing
559 data for a tracked file.
559 data for a tracked file.
560 """
560 """
561 def rawsize(rev):
561 def rawsize(rev):
562 """The size of the fulltext data for a revision as stored."""
562 """The size of the fulltext data for a revision as stored."""
563
563
564 def size(rev):
564 def size(rev):
565 """Obtain the fulltext size of file data.
565 """Obtain the fulltext size of file data.
566
566
567 Any metadata is excluded from size measurements. Use ``rawsize()`` if
567 Any metadata is excluded from size measurements. Use ``rawsize()`` if
568 metadata size is important.
568 metadata size is important.
569 """
569 """
570
570
571 def checkhash(fulltext, node, p1=None, p2=None, rev=None):
571 def checkhash(fulltext, node, p1=None, p2=None, rev=None):
572 """Validate the stored hash of a given fulltext and node.
572 """Validate the stored hash of a given fulltext and node.
573
573
574 Raises ``error.StorageError`` is hash validation fails.
574 Raises ``error.StorageError`` is hash validation fails.
575 """
575 """
576
576
577 def revision(node, raw=False):
577 def revision(node, raw=False):
578 """"Obtain fulltext data for a node.
578 """"Obtain fulltext data for a node.
579
579
580 By default, any storage transformations are applied before the data
580 By default, any storage transformations are applied before the data
581 is returned. If ``raw`` is True, non-raw storage transformations
581 is returned. If ``raw`` is True, non-raw storage transformations
582 are not applied.
582 are not applied.
583
583
584 The fulltext data may contain a header containing metadata. Most
584 The fulltext data may contain a header containing metadata. Most
585 consumers should use ``read()`` to obtain the actual file data.
585 consumers should use ``read()`` to obtain the actual file data.
586 """
586 """
587
587
588 def read(node):
588 def read(node):
589 """Resolve file fulltext data.
589 """Resolve file fulltext data.
590
590
591 This is similar to ``revision()`` except any metadata in the data
591 This is similar to ``revision()`` except any metadata in the data
592 headers is stripped.
592 headers is stripped.
593 """
593 """
594
594
595 def renamed(node):
595 def renamed(node):
596 """Obtain copy metadata for a node.
596 """Obtain copy metadata for a node.
597
597
598 Returns ``False`` if no copy metadata is stored or a 2-tuple of
598 Returns ``False`` if no copy metadata is stored or a 2-tuple of
599 (path, node) from which this revision was copied.
599 (path, node) from which this revision was copied.
600 """
600 """
601
601
602 def cmp(node, fulltext):
602 def cmp(node, fulltext):
603 """Compare fulltext to another revision.
603 """Compare fulltext to another revision.
604
604
605 Returns True if the fulltext is different from what is stored.
605 Returns True if the fulltext is different from what is stored.
606
606
607 This takes copy metadata into account.
607 This takes copy metadata into account.
608
608
609 TODO better document the copy metadata and censoring logic.
609 TODO better document the copy metadata and censoring logic.
610 """
610 """
611
611
612 def revdiff(rev1, rev2):
612 def revdiff(rev1, rev2):
613 """Obtain a delta between two revision numbers.
613 """Obtain a delta between two revision numbers.
614
614
615 Operates on raw data in the store (``revision(node, raw=True)``).
615 Operates on raw data in the store (``revision(node, raw=True)``).
616
616
617 The returned data is the result of ``bdiff.bdiff`` on the raw
617 The returned data is the result of ``bdiff.bdiff`` on the raw
618 revision data.
618 revision data.
619 """
619 """
620
620
621 def emitrevisiondeltas(requests):
621 def emitrevisiondeltas(requests):
622 """Produce ``irevisiondelta`` from ``irevisiondeltarequest``s.
622 """Produce ``irevisiondelta`` from ``irevisiondeltarequest``s.
623
623
624 Given an iterable of objects conforming to the ``irevisiondeltarequest``
624 Given an iterable of objects conforming to the ``irevisiondeltarequest``
625 interface, emits objects conforming to the ``irevisiondelta``
625 interface, emits objects conforming to the ``irevisiondelta``
626 interface.
626 interface.
627
627
628 This method is a generator.
628 This method is a generator.
629
629
630 ``irevisiondelta`` should be emitted in the same order of
630 ``irevisiondelta`` should be emitted in the same order of
631 ``irevisiondeltarequest`` that was passed in.
631 ``irevisiondeltarequest`` that was passed in.
632
632
633 The emitted objects MUST conform by the results of
633 The emitted objects MUST conform by the results of
634 ``irevisiondeltarequest``. Namely, they must respect any requests
634 ``irevisiondeltarequest``. Namely, they must respect any requests
635 for building a delta from a specific ``basenode`` if defined.
635 for building a delta from a specific ``basenode`` if defined.
636
636
637 When sending deltas, implementations must take into account whether
637 When sending deltas, implementations must take into account whether
638 the client has the base delta before encoding a delta against that
638 the client has the base delta before encoding a delta against that
639 revision. A revision encountered previously in ``requests`` is
639 revision. A revision encountered previously in ``requests`` is
640 always a suitable base revision. An example of a bad delta is a delta
640 always a suitable base revision. An example of a bad delta is a delta
641 against a non-ancestor revision. Another example of a bad delta is a
641 against a non-ancestor revision. Another example of a bad delta is a
642 delta against a censored revision.
642 delta against a censored revision.
643 """
643 """
644
644
645 class ifilemutation(interfaceutil.Interface):
645 class ifilemutation(interfaceutil.Interface):
646 """Storage interface for mutation events of a tracked file."""
646 """Storage interface for mutation events of a tracked file."""
647
647
648 def add(filedata, meta, transaction, linkrev, p1, p2):
648 def add(filedata, meta, transaction, linkrev, p1, p2):
649 """Add a new revision to the store.
649 """Add a new revision to the store.
650
650
651 Takes file data, dictionary of metadata, a transaction, linkrev,
651 Takes file data, dictionary of metadata, a transaction, linkrev,
652 and parent nodes.
652 and parent nodes.
653
653
654 Returns the node that was added.
654 Returns the node that was added.
655
655
656 May no-op if a revision matching the supplied data is already stored.
656 May no-op if a revision matching the supplied data is already stored.
657 """
657 """
658
658
659 def addrevision(revisiondata, transaction, linkrev, p1, p2, node=None,
659 def addrevision(revisiondata, transaction, linkrev, p1, p2, node=None,
660 flags=0, cachedelta=None):
660 flags=0, cachedelta=None):
661 """Add a new revision to the store.
661 """Add a new revision to the store.
662
662
663 This is similar to ``add()`` except it operates at a lower level.
663 This is similar to ``add()`` except it operates at a lower level.
664
664
665 The data passed in already contains a metadata header, if any.
665 The data passed in already contains a metadata header, if any.
666
666
667 ``node`` and ``flags`` can be used to define the expected node and
667 ``node`` and ``flags`` can be used to define the expected node and
668 the flags to use with storage.
668 the flags to use with storage.
669
669
670 ``add()`` is usually called when adding files from e.g. the working
670 ``add()`` is usually called when adding files from e.g. the working
671 directory. ``addrevision()`` is often called by ``add()`` and for
671 directory. ``addrevision()`` is often called by ``add()`` and for
672 scenarios where revision data has already been computed, such as when
672 scenarios where revision data has already been computed, such as when
673 applying raw data from a peer repo.
673 applying raw data from a peer repo.
674 """
674 """
675
675
676 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
676 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
677 """Process a series of deltas for storage.
677 """Process a series of deltas for storage.
678
678
679 ``deltas`` is an iterable of 7-tuples of
679 ``deltas`` is an iterable of 7-tuples of
680 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
680 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
681 to add.
681 to add.
682
682
683 The ``delta`` field contains ``mpatch`` data to apply to a base
683 The ``delta`` field contains ``mpatch`` data to apply to a base
684 revision, identified by ``deltabase``. The base node can be
684 revision, identified by ``deltabase``. The base node can be
685 ``nullid``, in which case the header from the delta can be ignored
685 ``nullid``, in which case the header from the delta can be ignored
686 and the delta used as the fulltext.
686 and the delta used as the fulltext.
687
687
688 ``addrevisioncb`` should be called for each node as it is committed.
688 ``addrevisioncb`` should be called for each node as it is committed.
689
689
690 Returns a list of nodes that were processed. A node will be in the list
690 Returns a list of nodes that were processed. A node will be in the list
691 even if it existed in the store previously.
691 even if it existed in the store previously.
692 """
692 """
693
693
694 def censorrevision(tr, node, tombstone=b''):
695 """Remove the content of a single revision.
696
697 The specified ``node`` will have its content purged from storage.
698 Future attempts to access the revision data for this node will
699 result in failure.
700
701 A ``tombstone`` message can optionally be stored. This message may be
702 displayed to users when they attempt to access the missing revision
703 data.
704
705 Storage backends may have stored deltas against the previous content
706 in this revision. As part of censoring a revision, these storage
707 backends are expected to rewrite any internally stored deltas such
708 that they no longer reference the deleted content.
709 """
710
694 def getstrippoint(minlink):
711 def getstrippoint(minlink):
695 """Find the minimum revision that must be stripped to strip a linkrev.
712 """Find the minimum revision that must be stripped to strip a linkrev.
696
713
697 Returns a 2-tuple containing the minimum revision number and a set
714 Returns a 2-tuple containing the minimum revision number and a set
698 of all revisions numbers that would be broken by this strip.
715 of all revisions numbers that would be broken by this strip.
699
716
700 TODO this is highly revlog centric and should be abstracted into
717 TODO this is highly revlog centric and should be abstracted into
701 a higher-level deletion API. ``repair.strip()`` relies on this.
718 a higher-level deletion API. ``repair.strip()`` relies on this.
702 """
719 """
703
720
704 def strip(minlink, transaction):
721 def strip(minlink, transaction):
705 """Remove storage of items starting at a linkrev.
722 """Remove storage of items starting at a linkrev.
706
723
707 This uses ``getstrippoint()`` to determine the first node to remove.
724 This uses ``getstrippoint()`` to determine the first node to remove.
708 Then it effectively truncates storage for all revisions after that.
725 Then it effectively truncates storage for all revisions after that.
709
726
710 TODO this is highly revlog centric and should be abstracted into a
727 TODO this is highly revlog centric and should be abstracted into a
711 higher-level deletion API.
728 higher-level deletion API.
712 """
729 """
713
730
714 class ifilestorage(ifileindex, ifiledata, ifilemutation):
731 class ifilestorage(ifileindex, ifiledata, ifilemutation):
715 """Complete storage interface for a single tracked file."""
732 """Complete storage interface for a single tracked file."""
716
733
717 version = interfaceutil.Attribute(
734 version = interfaceutil.Attribute(
718 """Version number of storage.
735 """Version number of storage.
719
736
720 TODO this feels revlog centric and could likely be removed.
737 TODO this feels revlog centric and could likely be removed.
721 """)
738 """)
722
739
723 _generaldelta = interfaceutil.Attribute(
740 _generaldelta = interfaceutil.Attribute(
724 """Whether deltas can be against any parent revision.
741 """Whether deltas can be against any parent revision.
725
742
726 TODO this is used by changegroup code and it could probably be
743 TODO this is used by changegroup code and it could probably be
727 folded into another API.
744 folded into another API.
728 """)
745 """)
729
746
730 def files():
747 def files():
731 """Obtain paths that are backing storage for this file.
748 """Obtain paths that are backing storage for this file.
732
749
733 TODO this is used heavily by verify code and there should probably
750 TODO this is used heavily by verify code and there should probably
734 be a better API for that.
751 be a better API for that.
735 """
752 """
736
753
737 def checksize():
754 def checksize():
738 """Obtain the expected sizes of backing files.
755 """Obtain the expected sizes of backing files.
739
756
740 TODO this is used by verify and it should not be part of the interface.
757 TODO this is used by verify and it should not be part of the interface.
741 """
758 """
742
759
743 class idirs(interfaceutil.Interface):
760 class idirs(interfaceutil.Interface):
744 """Interface representing a collection of directories from paths.
761 """Interface representing a collection of directories from paths.
745
762
746 This interface is essentially a derived data structure representing
763 This interface is essentially a derived data structure representing
747 directories from a collection of paths.
764 directories from a collection of paths.
748 """
765 """
749
766
750 def addpath(path):
767 def addpath(path):
751 """Add a path to the collection.
768 """Add a path to the collection.
752
769
753 All directories in the path will be added to the collection.
770 All directories in the path will be added to the collection.
754 """
771 """
755
772
756 def delpath(path):
773 def delpath(path):
757 """Remove a path from the collection.
774 """Remove a path from the collection.
758
775
759 If the removal was the last path in a particular directory, the
776 If the removal was the last path in a particular directory, the
760 directory is removed from the collection.
777 directory is removed from the collection.
761 """
778 """
762
779
763 def __iter__():
780 def __iter__():
764 """Iterate over the directories in this collection of paths."""
781 """Iterate over the directories in this collection of paths."""
765
782
766 def __contains__(path):
783 def __contains__(path):
767 """Whether a specific directory is in this collection."""
784 """Whether a specific directory is in this collection."""
768
785
769 class imanifestdict(interfaceutil.Interface):
786 class imanifestdict(interfaceutil.Interface):
770 """Interface representing a manifest data structure.
787 """Interface representing a manifest data structure.
771
788
772 A manifest is effectively a dict mapping paths to entries. Each entry
789 A manifest is effectively a dict mapping paths to entries. Each entry
773 consists of a binary node and extra flags affecting that entry.
790 consists of a binary node and extra flags affecting that entry.
774 """
791 """
775
792
776 def __getitem__(path):
793 def __getitem__(path):
777 """Returns the binary node value for a path in the manifest.
794 """Returns the binary node value for a path in the manifest.
778
795
779 Raises ``KeyError`` if the path does not exist in the manifest.
796 Raises ``KeyError`` if the path does not exist in the manifest.
780
797
781 Equivalent to ``self.find(path)[0]``.
798 Equivalent to ``self.find(path)[0]``.
782 """
799 """
783
800
784 def find(path):
801 def find(path):
785 """Returns the entry for a path in the manifest.
802 """Returns the entry for a path in the manifest.
786
803
787 Returns a 2-tuple of (node, flags).
804 Returns a 2-tuple of (node, flags).
788
805
789 Raises ``KeyError`` if the path does not exist in the manifest.
806 Raises ``KeyError`` if the path does not exist in the manifest.
790 """
807 """
791
808
792 def __len__():
809 def __len__():
793 """Return the number of entries in the manifest."""
810 """Return the number of entries in the manifest."""
794
811
795 def __nonzero__():
812 def __nonzero__():
796 """Returns True if the manifest has entries, False otherwise."""
813 """Returns True if the manifest has entries, False otherwise."""
797
814
798 __bool__ = __nonzero__
815 __bool__ = __nonzero__
799
816
800 def __setitem__(path, node):
817 def __setitem__(path, node):
801 """Define the node value for a path in the manifest.
818 """Define the node value for a path in the manifest.
802
819
803 If the path is already in the manifest, its flags will be copied to
820 If the path is already in the manifest, its flags will be copied to
804 the new entry.
821 the new entry.
805 """
822 """
806
823
807 def __contains__(path):
824 def __contains__(path):
808 """Whether a path exists in the manifest."""
825 """Whether a path exists in the manifest."""
809
826
810 def __delitem__(path):
827 def __delitem__(path):
811 """Remove a path from the manifest.
828 """Remove a path from the manifest.
812
829
813 Raises ``KeyError`` if the path is not in the manifest.
830 Raises ``KeyError`` if the path is not in the manifest.
814 """
831 """
815
832
816 def __iter__():
833 def __iter__():
817 """Iterate over paths in the manifest."""
834 """Iterate over paths in the manifest."""
818
835
819 def iterkeys():
836 def iterkeys():
820 """Iterate over paths in the manifest."""
837 """Iterate over paths in the manifest."""
821
838
822 def keys():
839 def keys():
823 """Obtain a list of paths in the manifest."""
840 """Obtain a list of paths in the manifest."""
824
841
825 def filesnotin(other, match=None):
842 def filesnotin(other, match=None):
826 """Obtain the set of paths in this manifest but not in another.
843 """Obtain the set of paths in this manifest but not in another.
827
844
828 ``match`` is an optional matcher function to be applied to both
845 ``match`` is an optional matcher function to be applied to both
829 manifests.
846 manifests.
830
847
831 Returns a set of paths.
848 Returns a set of paths.
832 """
849 """
833
850
834 def dirs():
851 def dirs():
835 """Returns an object implementing the ``idirs`` interface."""
852 """Returns an object implementing the ``idirs`` interface."""
836
853
837 def hasdir(dir):
854 def hasdir(dir):
838 """Returns a bool indicating if a directory is in this manifest."""
855 """Returns a bool indicating if a directory is in this manifest."""
839
856
840 def matches(match):
857 def matches(match):
841 """Generate a new manifest filtered through a matcher.
858 """Generate a new manifest filtered through a matcher.
842
859
843 Returns an object conforming to the ``imanifestdict`` interface.
860 Returns an object conforming to the ``imanifestdict`` interface.
844 """
861 """
845
862
846 def walk(match):
863 def walk(match):
847 """Generator of paths in manifest satisfying a matcher.
864 """Generator of paths in manifest satisfying a matcher.
848
865
849 This is equivalent to ``self.matches(match).iterkeys()`` except a new
866 This is equivalent to ``self.matches(match).iterkeys()`` except a new
850 manifest object is not created.
867 manifest object is not created.
851
868
852 If the matcher has explicit files listed and they don't exist in
869 If the matcher has explicit files listed and they don't exist in
853 the manifest, ``match.bad()`` is called for each missing file.
870 the manifest, ``match.bad()`` is called for each missing file.
854 """
871 """
855
872
856 def diff(other, match=None, clean=False):
873 def diff(other, match=None, clean=False):
857 """Find differences between this manifest and another.
874 """Find differences between this manifest and another.
858
875
859 This manifest is compared to ``other``.
876 This manifest is compared to ``other``.
860
877
861 If ``match`` is provided, the two manifests are filtered against this
878 If ``match`` is provided, the two manifests are filtered against this
862 matcher and only entries satisfying the matcher are compared.
879 matcher and only entries satisfying the matcher are compared.
863
880
864 If ``clean`` is True, unchanged files are included in the returned
881 If ``clean`` is True, unchanged files are included in the returned
865 object.
882 object.
866
883
867 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
884 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
868 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
885 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
869 represents the node and flags for this manifest and ``(node2, flag2)``
886 represents the node and flags for this manifest and ``(node2, flag2)``
870 are the same for the other manifest.
887 are the same for the other manifest.
871 """
888 """
872
889
873 def setflag(path, flag):
890 def setflag(path, flag):
874 """Set the flag value for a given path.
891 """Set the flag value for a given path.
875
892
876 Raises ``KeyError`` if the path is not already in the manifest.
893 Raises ``KeyError`` if the path is not already in the manifest.
877 """
894 """
878
895
879 def get(path, default=None):
896 def get(path, default=None):
880 """Obtain the node value for a path or a default value if missing."""
897 """Obtain the node value for a path or a default value if missing."""
881
898
882 def flags(path, default=''):
899 def flags(path, default=''):
883 """Return the flags value for a path or a default value if missing."""
900 """Return the flags value for a path or a default value if missing."""
884
901
885 def copy():
902 def copy():
886 """Return a copy of this manifest."""
903 """Return a copy of this manifest."""
887
904
888 def items():
905 def items():
889 """Returns an iterable of (path, node) for items in this manifest."""
906 """Returns an iterable of (path, node) for items in this manifest."""
890
907
891 def iteritems():
908 def iteritems():
892 """Identical to items()."""
909 """Identical to items()."""
893
910
894 def iterentries():
911 def iterentries():
895 """Returns an iterable of (path, node, flags) for this manifest.
912 """Returns an iterable of (path, node, flags) for this manifest.
896
913
897 Similar to ``iteritems()`` except items are a 3-tuple and include
914 Similar to ``iteritems()`` except items are a 3-tuple and include
898 flags.
915 flags.
899 """
916 """
900
917
901 def text():
918 def text():
902 """Obtain the raw data representation for this manifest.
919 """Obtain the raw data representation for this manifest.
903
920
904 Result is used to create a manifest revision.
921 Result is used to create a manifest revision.
905 """
922 """
906
923
907 def fastdelta(base, changes):
924 def fastdelta(base, changes):
908 """Obtain a delta between this manifest and another given changes.
925 """Obtain a delta between this manifest and another given changes.
909
926
910 ``base`` in the raw data representation for another manifest.
927 ``base`` in the raw data representation for another manifest.
911
928
912 ``changes`` is an iterable of ``(path, to_delete)``.
929 ``changes`` is an iterable of ``(path, to_delete)``.
913
930
914 Returns a 2-tuple containing ``bytearray(self.text())`` and the
931 Returns a 2-tuple containing ``bytearray(self.text())`` and the
915 delta between ``base`` and this manifest.
932 delta between ``base`` and this manifest.
916 """
933 """
917
934
918 class imanifestrevisionbase(interfaceutil.Interface):
935 class imanifestrevisionbase(interfaceutil.Interface):
919 """Base interface representing a single revision of a manifest.
936 """Base interface representing a single revision of a manifest.
920
937
921 Should not be used as a primary interface: should always be inherited
938 Should not be used as a primary interface: should always be inherited
922 as part of a larger interface.
939 as part of a larger interface.
923 """
940 """
924
941
925 def new():
942 def new():
926 """Obtain a new manifest instance.
943 """Obtain a new manifest instance.
927
944
928 Returns an object conforming to the ``imanifestrevisionwritable``
945 Returns an object conforming to the ``imanifestrevisionwritable``
929 interface. The instance will be associated with the same
946 interface. The instance will be associated with the same
930 ``imanifestlog`` collection as this instance.
947 ``imanifestlog`` collection as this instance.
931 """
948 """
932
949
933 def copy():
950 def copy():
934 """Obtain a copy of this manifest instance.
951 """Obtain a copy of this manifest instance.
935
952
936 Returns an object conforming to the ``imanifestrevisionwritable``
953 Returns an object conforming to the ``imanifestrevisionwritable``
937 interface. The instance will be associated with the same
954 interface. The instance will be associated with the same
938 ``imanifestlog`` collection as this instance.
955 ``imanifestlog`` collection as this instance.
939 """
956 """
940
957
941 def read():
958 def read():
942 """Obtain the parsed manifest data structure.
959 """Obtain the parsed manifest data structure.
943
960
944 The returned object conforms to the ``imanifestdict`` interface.
961 The returned object conforms to the ``imanifestdict`` interface.
945 """
962 """
946
963
947 class imanifestrevisionstored(imanifestrevisionbase):
964 class imanifestrevisionstored(imanifestrevisionbase):
948 """Interface representing a manifest revision committed to storage."""
965 """Interface representing a manifest revision committed to storage."""
949
966
950 def node():
967 def node():
951 """The binary node for this manifest."""
968 """The binary node for this manifest."""
952
969
953 parents = interfaceutil.Attribute(
970 parents = interfaceutil.Attribute(
954 """List of binary nodes that are parents for this manifest revision."""
971 """List of binary nodes that are parents for this manifest revision."""
955 )
972 )
956
973
957 def readdelta(shallow=False):
974 def readdelta(shallow=False):
958 """Obtain the manifest data structure representing changes from parent.
975 """Obtain the manifest data structure representing changes from parent.
959
976
960 This manifest is compared to its 1st parent. A new manifest representing
977 This manifest is compared to its 1st parent. A new manifest representing
961 those differences is constructed.
978 those differences is constructed.
962
979
963 The returned object conforms to the ``imanifestdict`` interface.
980 The returned object conforms to the ``imanifestdict`` interface.
964 """
981 """
965
982
966 def readfast(shallow=False):
983 def readfast(shallow=False):
967 """Calls either ``read()`` or ``readdelta()``.
984 """Calls either ``read()`` or ``readdelta()``.
968
985
969 The faster of the two options is called.
986 The faster of the two options is called.
970 """
987 """
971
988
972 def find(key):
989 def find(key):
973 """Calls self.read().find(key)``.
990 """Calls self.read().find(key)``.
974
991
975 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
992 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
976 """
993 """
977
994
978 class imanifestrevisionwritable(imanifestrevisionbase):
995 class imanifestrevisionwritable(imanifestrevisionbase):
979 """Interface representing a manifest revision that can be committed."""
996 """Interface representing a manifest revision that can be committed."""
980
997
981 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
998 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
982 """Add this revision to storage.
999 """Add this revision to storage.
983
1000
984 Takes a transaction object, the changeset revision number it will
1001 Takes a transaction object, the changeset revision number it will
985 be associated with, its parent nodes, and lists of added and
1002 be associated with, its parent nodes, and lists of added and
986 removed paths.
1003 removed paths.
987
1004
988 If match is provided, storage can choose not to inspect or write out
1005 If match is provided, storage can choose not to inspect or write out
989 items that do not match. Storage is still required to be able to provide
1006 items that do not match. Storage is still required to be able to provide
990 the full manifest in the future for any directories written (these
1007 the full manifest in the future for any directories written (these
991 manifests should not be "narrowed on disk").
1008 manifests should not be "narrowed on disk").
992
1009
993 Returns the binary node of the created revision.
1010 Returns the binary node of the created revision.
994 """
1011 """
995
1012
996 class imanifeststorage(interfaceutil.Interface):
1013 class imanifeststorage(interfaceutil.Interface):
997 """Storage interface for manifest data."""
1014 """Storage interface for manifest data."""
998
1015
999 tree = interfaceutil.Attribute(
1016 tree = interfaceutil.Attribute(
1000 """The path to the directory this manifest tracks.
1017 """The path to the directory this manifest tracks.
1001
1018
1002 The empty bytestring represents the root manifest.
1019 The empty bytestring represents the root manifest.
1003 """)
1020 """)
1004
1021
1005 index = interfaceutil.Attribute(
1022 index = interfaceutil.Attribute(
1006 """An ``ifilerevisionssequence`` instance.""")
1023 """An ``ifilerevisionssequence`` instance.""")
1007
1024
1008 indexfile = interfaceutil.Attribute(
1025 indexfile = interfaceutil.Attribute(
1009 """Path of revlog index file.
1026 """Path of revlog index file.
1010
1027
1011 TODO this is revlog specific and should not be exposed.
1028 TODO this is revlog specific and should not be exposed.
1012 """)
1029 """)
1013
1030
1014 opener = interfaceutil.Attribute(
1031 opener = interfaceutil.Attribute(
1015 """VFS opener to use to access underlying files used for storage.
1032 """VFS opener to use to access underlying files used for storage.
1016
1033
1017 TODO this is revlog specific and should not be exposed.
1034 TODO this is revlog specific and should not be exposed.
1018 """)
1035 """)
1019
1036
1020 version = interfaceutil.Attribute(
1037 version = interfaceutil.Attribute(
1021 """Revlog version number.
1038 """Revlog version number.
1022
1039
1023 TODO this is revlog specific and should not be exposed.
1040 TODO this is revlog specific and should not be exposed.
1024 """)
1041 """)
1025
1042
1026 _generaldelta = interfaceutil.Attribute(
1043 _generaldelta = interfaceutil.Attribute(
1027 """Whether generaldelta storage is being used.
1044 """Whether generaldelta storage is being used.
1028
1045
1029 TODO this is revlog specific and should not be exposed.
1046 TODO this is revlog specific and should not be exposed.
1030 """)
1047 """)
1031
1048
1032 fulltextcache = interfaceutil.Attribute(
1049 fulltextcache = interfaceutil.Attribute(
1033 """Dict with cache of fulltexts.
1050 """Dict with cache of fulltexts.
1034
1051
1035 TODO this doesn't feel appropriate for the storage interface.
1052 TODO this doesn't feel appropriate for the storage interface.
1036 """)
1053 """)
1037
1054
1038 def __len__():
1055 def __len__():
1039 """Obtain the number of revisions stored for this manifest."""
1056 """Obtain the number of revisions stored for this manifest."""
1040
1057
1041 def __iter__():
1058 def __iter__():
1042 """Iterate over revision numbers for this manifest."""
1059 """Iterate over revision numbers for this manifest."""
1043
1060
1044 def rev(node):
1061 def rev(node):
1045 """Obtain the revision number given a binary node.
1062 """Obtain the revision number given a binary node.
1046
1063
1047 Raises ``error.LookupError`` if the node is not known.
1064 Raises ``error.LookupError`` if the node is not known.
1048 """
1065 """
1049
1066
1050 def node(rev):
1067 def node(rev):
1051 """Obtain the node value given a revision number.
1068 """Obtain the node value given a revision number.
1052
1069
1053 Raises ``error.LookupError`` if the revision is not known.
1070 Raises ``error.LookupError`` if the revision is not known.
1054 """
1071 """
1055
1072
1056 def lookup(value):
1073 def lookup(value):
1057 """Attempt to resolve a value to a node.
1074 """Attempt to resolve a value to a node.
1058
1075
1059 Value can be a binary node, hex node, revision number, or a bytes
1076 Value can be a binary node, hex node, revision number, or a bytes
1060 that can be converted to an integer.
1077 that can be converted to an integer.
1061
1078
1062 Raises ``error.LookupError`` if a ndoe could not be resolved.
1079 Raises ``error.LookupError`` if a ndoe could not be resolved.
1063
1080
1064 TODO this is only used by debug* commands and can probably be deleted
1081 TODO this is only used by debug* commands and can probably be deleted
1065 easily.
1082 easily.
1066 """
1083 """
1067
1084
1068 def parents(node):
1085 def parents(node):
1069 """Returns a 2-tuple of parent nodes for a node.
1086 """Returns a 2-tuple of parent nodes for a node.
1070
1087
1071 Values will be ``nullid`` if the parent is empty.
1088 Values will be ``nullid`` if the parent is empty.
1072 """
1089 """
1073
1090
1074 def parentrevs(rev):
1091 def parentrevs(rev):
1075 """Like parents() but operates on revision numbers."""
1092 """Like parents() but operates on revision numbers."""
1076
1093
1077 def linkrev(rev):
1094 def linkrev(rev):
1078 """Obtain the changeset revision number a revision is linked to."""
1095 """Obtain the changeset revision number a revision is linked to."""
1079
1096
1080 def revision(node, _df=None, raw=False):
1097 def revision(node, _df=None, raw=False):
1081 """Obtain fulltext data for a node."""
1098 """Obtain fulltext data for a node."""
1082
1099
1083 def revdiff(rev1, rev2):
1100 def revdiff(rev1, rev2):
1084 """Obtain a delta between two revision numbers.
1101 """Obtain a delta between two revision numbers.
1085
1102
1086 The returned data is the result of ``bdiff.bdiff()`` on the raw
1103 The returned data is the result of ``bdiff.bdiff()`` on the raw
1087 revision data.
1104 revision data.
1088 """
1105 """
1089
1106
1090 def cmp(node, fulltext):
1107 def cmp(node, fulltext):
1091 """Compare fulltext to another revision.
1108 """Compare fulltext to another revision.
1092
1109
1093 Returns True if the fulltext is different from what is stored.
1110 Returns True if the fulltext is different from what is stored.
1094 """
1111 """
1095
1112
1096 def emitrevisiondeltas(requests):
1113 def emitrevisiondeltas(requests):
1097 """Produce ``irevisiondelta`` from ``irevisiondeltarequest``s.
1114 """Produce ``irevisiondelta`` from ``irevisiondeltarequest``s.
1098
1115
1099 See the documentation for ``ifiledata`` for more.
1116 See the documentation for ``ifiledata`` for more.
1100 """
1117 """
1101
1118
1102 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
1119 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
1103 """Process a series of deltas for storage.
1120 """Process a series of deltas for storage.
1104
1121
1105 See the documentation in ``ifilemutation`` for more.
1122 See the documentation in ``ifilemutation`` for more.
1106 """
1123 """
1107
1124
1108 def getstrippoint(minlink):
1125 def getstrippoint(minlink):
1109 """Find minimum revision that must be stripped to strip a linkrev.
1126 """Find minimum revision that must be stripped to strip a linkrev.
1110
1127
1111 See the documentation in ``ifilemutation`` for more.
1128 See the documentation in ``ifilemutation`` for more.
1112 """
1129 """
1113
1130
1114 def strip(minlink, transaction):
1131 def strip(minlink, transaction):
1115 """Remove storage of items starting at a linkrev.
1132 """Remove storage of items starting at a linkrev.
1116
1133
1117 See the documentation in ``ifilemutation`` for more.
1134 See the documentation in ``ifilemutation`` for more.
1118 """
1135 """
1119
1136
1120 def checksize():
1137 def checksize():
1121 """Obtain the expected sizes of backing files.
1138 """Obtain the expected sizes of backing files.
1122
1139
1123 TODO this is used by verify and it should not be part of the interface.
1140 TODO this is used by verify and it should not be part of the interface.
1124 """
1141 """
1125
1142
1126 def files():
1143 def files():
1127 """Obtain paths that are backing storage for this manifest.
1144 """Obtain paths that are backing storage for this manifest.
1128
1145
1129 TODO this is used by verify and there should probably be a better API
1146 TODO this is used by verify and there should probably be a better API
1130 for this functionality.
1147 for this functionality.
1131 """
1148 """
1132
1149
1133 def deltaparent(rev):
1150 def deltaparent(rev):
1134 """Obtain the revision that a revision is delta'd against.
1151 """Obtain the revision that a revision is delta'd against.
1135
1152
1136 TODO delta encoding is an implementation detail of storage and should
1153 TODO delta encoding is an implementation detail of storage and should
1137 not be exposed to the storage interface.
1154 not be exposed to the storage interface.
1138 """
1155 """
1139
1156
1140 def clone(tr, dest, **kwargs):
1157 def clone(tr, dest, **kwargs):
1141 """Clone this instance to another."""
1158 """Clone this instance to another."""
1142
1159
1143 def clearcaches(clear_persisted_data=False):
1160 def clearcaches(clear_persisted_data=False):
1144 """Clear any caches associated with this instance."""
1161 """Clear any caches associated with this instance."""
1145
1162
1146 def dirlog(d):
1163 def dirlog(d):
1147 """Obtain a manifest storage instance for a tree."""
1164 """Obtain a manifest storage instance for a tree."""
1148
1165
1149 def add(m, transaction, link, p1, p2, added, removed, readtree=None,
1166 def add(m, transaction, link, p1, p2, added, removed, readtree=None,
1150 match=None):
1167 match=None):
1151 """Add a revision to storage.
1168 """Add a revision to storage.
1152
1169
1153 ``m`` is an object conforming to ``imanifestdict``.
1170 ``m`` is an object conforming to ``imanifestdict``.
1154
1171
1155 ``link`` is the linkrev revision number.
1172 ``link`` is the linkrev revision number.
1156
1173
1157 ``p1`` and ``p2`` are the parent revision numbers.
1174 ``p1`` and ``p2`` are the parent revision numbers.
1158
1175
1159 ``added`` and ``removed`` are iterables of added and removed paths,
1176 ``added`` and ``removed`` are iterables of added and removed paths,
1160 respectively.
1177 respectively.
1161
1178
1162 ``readtree`` is a function that can be used to read the child tree(s)
1179 ``readtree`` is a function that can be used to read the child tree(s)
1163 when recursively writing the full tree structure when using
1180 when recursively writing the full tree structure when using
1164 treemanifets.
1181 treemanifets.
1165
1182
1166 ``match`` is a matcher that can be used to hint to storage that not all
1183 ``match`` is a matcher that can be used to hint to storage that not all
1167 paths must be inspected; this is an optimization and can be safely
1184 paths must be inspected; this is an optimization and can be safely
1168 ignored. Note that the storage must still be able to reproduce a full
1185 ignored. Note that the storage must still be able to reproduce a full
1169 manifest including files that did not match.
1186 manifest including files that did not match.
1170 """
1187 """
1171
1188
1172 class imanifestlog(interfaceutil.Interface):
1189 class imanifestlog(interfaceutil.Interface):
1173 """Interface representing a collection of manifest snapshots.
1190 """Interface representing a collection of manifest snapshots.
1174
1191
1175 Represents the root manifest in a repository.
1192 Represents the root manifest in a repository.
1176
1193
1177 Also serves as a means to access nested tree manifests and to cache
1194 Also serves as a means to access nested tree manifests and to cache
1178 tree manifests.
1195 tree manifests.
1179 """
1196 """
1180
1197
1181 def __getitem__(node):
1198 def __getitem__(node):
1182 """Obtain a manifest instance for a given binary node.
1199 """Obtain a manifest instance for a given binary node.
1183
1200
1184 Equivalent to calling ``self.get('', node)``.
1201 Equivalent to calling ``self.get('', node)``.
1185
1202
1186 The returned object conforms to the ``imanifestrevisionstored``
1203 The returned object conforms to the ``imanifestrevisionstored``
1187 interface.
1204 interface.
1188 """
1205 """
1189
1206
1190 def get(tree, node, verify=True):
1207 def get(tree, node, verify=True):
1191 """Retrieve the manifest instance for a given directory and binary node.
1208 """Retrieve the manifest instance for a given directory and binary node.
1192
1209
1193 ``node`` always refers to the node of the root manifest (which will be
1210 ``node`` always refers to the node of the root manifest (which will be
1194 the only manifest if flat manifests are being used).
1211 the only manifest if flat manifests are being used).
1195
1212
1196 If ``tree`` is the empty string, the root manifest is returned.
1213 If ``tree`` is the empty string, the root manifest is returned.
1197 Otherwise the manifest for the specified directory will be returned
1214 Otherwise the manifest for the specified directory will be returned
1198 (requires tree manifests).
1215 (requires tree manifests).
1199
1216
1200 If ``verify`` is True, ``LookupError`` is raised if the node is not
1217 If ``verify`` is True, ``LookupError`` is raised if the node is not
1201 known.
1218 known.
1202
1219
1203 The returned object conforms to the ``imanifestrevisionstored``
1220 The returned object conforms to the ``imanifestrevisionstored``
1204 interface.
1221 interface.
1205 """
1222 """
1206
1223
1207 def getstorage(tree):
1224 def getstorage(tree):
1208 """Retrieve an interface to storage for a particular tree.
1225 """Retrieve an interface to storage for a particular tree.
1209
1226
1210 If ``tree`` is the empty bytestring, storage for the root manifest will
1227 If ``tree`` is the empty bytestring, storage for the root manifest will
1211 be returned. Otherwise storage for a tree manifest is returned.
1228 be returned. Otherwise storage for a tree manifest is returned.
1212
1229
1213 TODO formalize interface for returned object.
1230 TODO formalize interface for returned object.
1214 """
1231 """
1215
1232
1216 def clearcaches():
1233 def clearcaches():
1217 """Clear caches associated with this collection."""
1234 """Clear caches associated with this collection."""
1218
1235
1219 def rev(node):
1236 def rev(node):
1220 """Obtain the revision number for a binary node.
1237 """Obtain the revision number for a binary node.
1221
1238
1222 Raises ``error.LookupError`` if the node is not known.
1239 Raises ``error.LookupError`` if the node is not known.
1223 """
1240 """
1224
1241
1225 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1242 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1226 """Local repository sub-interface providing access to tracked file storage.
1243 """Local repository sub-interface providing access to tracked file storage.
1227
1244
1228 This interface defines how a repository accesses storage for a single
1245 This interface defines how a repository accesses storage for a single
1229 tracked file path.
1246 tracked file path.
1230 """
1247 """
1231
1248
1232 def file(f):
1249 def file(f):
1233 """Obtain a filelog for a tracked path.
1250 """Obtain a filelog for a tracked path.
1234
1251
1235 The returned type conforms to the ``ifilestorage`` interface.
1252 The returned type conforms to the ``ifilestorage`` interface.
1236 """
1253 """
1237
1254
1238 class ilocalrepositorymain(interfaceutil.Interface):
1255 class ilocalrepositorymain(interfaceutil.Interface):
1239 """Main interface for local repositories.
1256 """Main interface for local repositories.
1240
1257
1241 This currently captures the reality of things - not how things should be.
1258 This currently captures the reality of things - not how things should be.
1242 """
1259 """
1243
1260
1244 supportedformats = interfaceutil.Attribute(
1261 supportedformats = interfaceutil.Attribute(
1245 """Set of requirements that apply to stream clone.
1262 """Set of requirements that apply to stream clone.
1246
1263
1247 This is actually a class attribute and is shared among all instances.
1264 This is actually a class attribute and is shared among all instances.
1248 """)
1265 """)
1249
1266
1250 supported = interfaceutil.Attribute(
1267 supported = interfaceutil.Attribute(
1251 """Set of requirements that this repo is capable of opening.""")
1268 """Set of requirements that this repo is capable of opening.""")
1252
1269
1253 requirements = interfaceutil.Attribute(
1270 requirements = interfaceutil.Attribute(
1254 """Set of requirements this repo uses.""")
1271 """Set of requirements this repo uses.""")
1255
1272
1256 filtername = interfaceutil.Attribute(
1273 filtername = interfaceutil.Attribute(
1257 """Name of the repoview that is active on this repo.""")
1274 """Name of the repoview that is active on this repo.""")
1258
1275
1259 wvfs = interfaceutil.Attribute(
1276 wvfs = interfaceutil.Attribute(
1260 """VFS used to access the working directory.""")
1277 """VFS used to access the working directory.""")
1261
1278
1262 vfs = interfaceutil.Attribute(
1279 vfs = interfaceutil.Attribute(
1263 """VFS rooted at the .hg directory.
1280 """VFS rooted at the .hg directory.
1264
1281
1265 Used to access repository data not in the store.
1282 Used to access repository data not in the store.
1266 """)
1283 """)
1267
1284
1268 svfs = interfaceutil.Attribute(
1285 svfs = interfaceutil.Attribute(
1269 """VFS rooted at the store.
1286 """VFS rooted at the store.
1270
1287
1271 Used to access repository data in the store. Typically .hg/store.
1288 Used to access repository data in the store. Typically .hg/store.
1272 But can point elsewhere if the store is shared.
1289 But can point elsewhere if the store is shared.
1273 """)
1290 """)
1274
1291
1275 root = interfaceutil.Attribute(
1292 root = interfaceutil.Attribute(
1276 """Path to the root of the working directory.""")
1293 """Path to the root of the working directory.""")
1277
1294
1278 path = interfaceutil.Attribute(
1295 path = interfaceutil.Attribute(
1279 """Path to the .hg directory.""")
1296 """Path to the .hg directory.""")
1280
1297
1281 origroot = interfaceutil.Attribute(
1298 origroot = interfaceutil.Attribute(
1282 """The filesystem path that was used to construct the repo.""")
1299 """The filesystem path that was used to construct the repo.""")
1283
1300
1284 auditor = interfaceutil.Attribute(
1301 auditor = interfaceutil.Attribute(
1285 """A pathauditor for the working directory.
1302 """A pathauditor for the working directory.
1286
1303
1287 This checks if a path refers to a nested repository.
1304 This checks if a path refers to a nested repository.
1288
1305
1289 Operates on the filesystem.
1306 Operates on the filesystem.
1290 """)
1307 """)
1291
1308
1292 nofsauditor = interfaceutil.Attribute(
1309 nofsauditor = interfaceutil.Attribute(
1293 """A pathauditor for the working directory.
1310 """A pathauditor for the working directory.
1294
1311
1295 This is like ``auditor`` except it doesn't do filesystem checks.
1312 This is like ``auditor`` except it doesn't do filesystem checks.
1296 """)
1313 """)
1297
1314
1298 baseui = interfaceutil.Attribute(
1315 baseui = interfaceutil.Attribute(
1299 """Original ui instance passed into constructor.""")
1316 """Original ui instance passed into constructor.""")
1300
1317
1301 ui = interfaceutil.Attribute(
1318 ui = interfaceutil.Attribute(
1302 """Main ui instance for this instance.""")
1319 """Main ui instance for this instance.""")
1303
1320
1304 sharedpath = interfaceutil.Attribute(
1321 sharedpath = interfaceutil.Attribute(
1305 """Path to the .hg directory of the repo this repo was shared from.""")
1322 """Path to the .hg directory of the repo this repo was shared from.""")
1306
1323
1307 store = interfaceutil.Attribute(
1324 store = interfaceutil.Attribute(
1308 """A store instance.""")
1325 """A store instance.""")
1309
1326
1310 spath = interfaceutil.Attribute(
1327 spath = interfaceutil.Attribute(
1311 """Path to the store.""")
1328 """Path to the store.""")
1312
1329
1313 sjoin = interfaceutil.Attribute(
1330 sjoin = interfaceutil.Attribute(
1314 """Alias to self.store.join.""")
1331 """Alias to self.store.join.""")
1315
1332
1316 cachevfs = interfaceutil.Attribute(
1333 cachevfs = interfaceutil.Attribute(
1317 """A VFS used to access the cache directory.
1334 """A VFS used to access the cache directory.
1318
1335
1319 Typically .hg/cache.
1336 Typically .hg/cache.
1320 """)
1337 """)
1321
1338
1322 filteredrevcache = interfaceutil.Attribute(
1339 filteredrevcache = interfaceutil.Attribute(
1323 """Holds sets of revisions to be filtered.""")
1340 """Holds sets of revisions to be filtered.""")
1324
1341
1325 names = interfaceutil.Attribute(
1342 names = interfaceutil.Attribute(
1326 """A ``namespaces`` instance.""")
1343 """A ``namespaces`` instance.""")
1327
1344
1328 def close():
1345 def close():
1329 """Close the handle on this repository."""
1346 """Close the handle on this repository."""
1330
1347
1331 def peer():
1348 def peer():
1332 """Obtain an object conforming to the ``peer`` interface."""
1349 """Obtain an object conforming to the ``peer`` interface."""
1333
1350
1334 def unfiltered():
1351 def unfiltered():
1335 """Obtain an unfiltered/raw view of this repo."""
1352 """Obtain an unfiltered/raw view of this repo."""
1336
1353
1337 def filtered(name, visibilityexceptions=None):
1354 def filtered(name, visibilityexceptions=None):
1338 """Obtain a named view of this repository."""
1355 """Obtain a named view of this repository."""
1339
1356
1340 obsstore = interfaceutil.Attribute(
1357 obsstore = interfaceutil.Attribute(
1341 """A store of obsolescence data.""")
1358 """A store of obsolescence data.""")
1342
1359
1343 changelog = interfaceutil.Attribute(
1360 changelog = interfaceutil.Attribute(
1344 """A handle on the changelog revlog.""")
1361 """A handle on the changelog revlog.""")
1345
1362
1346 manifestlog = interfaceutil.Attribute(
1363 manifestlog = interfaceutil.Attribute(
1347 """An instance conforming to the ``imanifestlog`` interface.
1364 """An instance conforming to the ``imanifestlog`` interface.
1348
1365
1349 Provides access to manifests for the repository.
1366 Provides access to manifests for the repository.
1350 """)
1367 """)
1351
1368
1352 dirstate = interfaceutil.Attribute(
1369 dirstate = interfaceutil.Attribute(
1353 """Working directory state.""")
1370 """Working directory state.""")
1354
1371
1355 narrowpats = interfaceutil.Attribute(
1372 narrowpats = interfaceutil.Attribute(
1356 """Matcher patterns for this repository's narrowspec.""")
1373 """Matcher patterns for this repository's narrowspec.""")
1357
1374
1358 def narrowmatch():
1375 def narrowmatch():
1359 """Obtain a matcher for the narrowspec."""
1376 """Obtain a matcher for the narrowspec."""
1360
1377
1361 def setnarrowpats(newincludes, newexcludes):
1378 def setnarrowpats(newincludes, newexcludes):
1362 """Define the narrowspec for this repository."""
1379 """Define the narrowspec for this repository."""
1363
1380
1364 def __getitem__(changeid):
1381 def __getitem__(changeid):
1365 """Try to resolve a changectx."""
1382 """Try to resolve a changectx."""
1366
1383
1367 def __contains__(changeid):
1384 def __contains__(changeid):
1368 """Whether a changeset exists."""
1385 """Whether a changeset exists."""
1369
1386
1370 def __nonzero__():
1387 def __nonzero__():
1371 """Always returns True."""
1388 """Always returns True."""
1372 return True
1389 return True
1373
1390
1374 __bool__ = __nonzero__
1391 __bool__ = __nonzero__
1375
1392
1376 def __len__():
1393 def __len__():
1377 """Returns the number of changesets in the repo."""
1394 """Returns the number of changesets in the repo."""
1378
1395
1379 def __iter__():
1396 def __iter__():
1380 """Iterate over revisions in the changelog."""
1397 """Iterate over revisions in the changelog."""
1381
1398
1382 def revs(expr, *args):
1399 def revs(expr, *args):
1383 """Evaluate a revset.
1400 """Evaluate a revset.
1384
1401
1385 Emits revisions.
1402 Emits revisions.
1386 """
1403 """
1387
1404
1388 def set(expr, *args):
1405 def set(expr, *args):
1389 """Evaluate a revset.
1406 """Evaluate a revset.
1390
1407
1391 Emits changectx instances.
1408 Emits changectx instances.
1392 """
1409 """
1393
1410
1394 def anyrevs(specs, user=False, localalias=None):
1411 def anyrevs(specs, user=False, localalias=None):
1395 """Find revisions matching one of the given revsets."""
1412 """Find revisions matching one of the given revsets."""
1396
1413
1397 def url():
1414 def url():
1398 """Returns a string representing the location of this repo."""
1415 """Returns a string representing the location of this repo."""
1399
1416
1400 def hook(name, throw=False, **args):
1417 def hook(name, throw=False, **args):
1401 """Call a hook."""
1418 """Call a hook."""
1402
1419
1403 def tags():
1420 def tags():
1404 """Return a mapping of tag to node."""
1421 """Return a mapping of tag to node."""
1405
1422
1406 def tagtype(tagname):
1423 def tagtype(tagname):
1407 """Return the type of a given tag."""
1424 """Return the type of a given tag."""
1408
1425
1409 def tagslist():
1426 def tagslist():
1410 """Return a list of tags ordered by revision."""
1427 """Return a list of tags ordered by revision."""
1411
1428
1412 def nodetags(node):
1429 def nodetags(node):
1413 """Return the tags associated with a node."""
1430 """Return the tags associated with a node."""
1414
1431
1415 def nodebookmarks(node):
1432 def nodebookmarks(node):
1416 """Return the list of bookmarks pointing to the specified node."""
1433 """Return the list of bookmarks pointing to the specified node."""
1417
1434
1418 def branchmap():
1435 def branchmap():
1419 """Return a mapping of branch to heads in that branch."""
1436 """Return a mapping of branch to heads in that branch."""
1420
1437
1421 def revbranchcache():
1438 def revbranchcache():
1422 pass
1439 pass
1423
1440
1424 def branchtip(branchtip, ignoremissing=False):
1441 def branchtip(branchtip, ignoremissing=False):
1425 """Return the tip node for a given branch."""
1442 """Return the tip node for a given branch."""
1426
1443
1427 def lookup(key):
1444 def lookup(key):
1428 """Resolve the node for a revision."""
1445 """Resolve the node for a revision."""
1429
1446
1430 def lookupbranch(key):
1447 def lookupbranch(key):
1431 """Look up the branch name of the given revision or branch name."""
1448 """Look up the branch name of the given revision or branch name."""
1432
1449
1433 def known(nodes):
1450 def known(nodes):
1434 """Determine whether a series of nodes is known.
1451 """Determine whether a series of nodes is known.
1435
1452
1436 Returns a list of bools.
1453 Returns a list of bools.
1437 """
1454 """
1438
1455
1439 def local():
1456 def local():
1440 """Whether the repository is local."""
1457 """Whether the repository is local."""
1441 return True
1458 return True
1442
1459
1443 def publishing():
1460 def publishing():
1444 """Whether the repository is a publishing repository."""
1461 """Whether the repository is a publishing repository."""
1445
1462
1446 def cancopy():
1463 def cancopy():
1447 pass
1464 pass
1448
1465
1449 def shared():
1466 def shared():
1450 """The type of shared repository or None."""
1467 """The type of shared repository or None."""
1451
1468
1452 def wjoin(f, *insidef):
1469 def wjoin(f, *insidef):
1453 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1470 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1454
1471
1455 def setparents(p1, p2):
1472 def setparents(p1, p2):
1456 """Set the parent nodes of the working directory."""
1473 """Set the parent nodes of the working directory."""
1457
1474
1458 def filectx(path, changeid=None, fileid=None):
1475 def filectx(path, changeid=None, fileid=None):
1459 """Obtain a filectx for the given file revision."""
1476 """Obtain a filectx for the given file revision."""
1460
1477
1461 def getcwd():
1478 def getcwd():
1462 """Obtain the current working directory from the dirstate."""
1479 """Obtain the current working directory from the dirstate."""
1463
1480
1464 def pathto(f, cwd=None):
1481 def pathto(f, cwd=None):
1465 """Obtain the relative path to a file."""
1482 """Obtain the relative path to a file."""
1466
1483
1467 def adddatafilter(name, fltr):
1484 def adddatafilter(name, fltr):
1468 pass
1485 pass
1469
1486
1470 def wread(filename):
1487 def wread(filename):
1471 """Read a file from wvfs, using data filters."""
1488 """Read a file from wvfs, using data filters."""
1472
1489
1473 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1490 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1474 """Write data to a file in the wvfs, using data filters."""
1491 """Write data to a file in the wvfs, using data filters."""
1475
1492
1476 def wwritedata(filename, data):
1493 def wwritedata(filename, data):
1477 """Resolve data for writing to the wvfs, using data filters."""
1494 """Resolve data for writing to the wvfs, using data filters."""
1478
1495
1479 def currenttransaction():
1496 def currenttransaction():
1480 """Obtain the current transaction instance or None."""
1497 """Obtain the current transaction instance or None."""
1481
1498
1482 def transaction(desc, report=None):
1499 def transaction(desc, report=None):
1483 """Open a new transaction to write to the repository."""
1500 """Open a new transaction to write to the repository."""
1484
1501
1485 def undofiles():
1502 def undofiles():
1486 """Returns a list of (vfs, path) for files to undo transactions."""
1503 """Returns a list of (vfs, path) for files to undo transactions."""
1487
1504
1488 def recover():
1505 def recover():
1489 """Roll back an interrupted transaction."""
1506 """Roll back an interrupted transaction."""
1490
1507
1491 def rollback(dryrun=False, force=False):
1508 def rollback(dryrun=False, force=False):
1492 """Undo the last transaction.
1509 """Undo the last transaction.
1493
1510
1494 DANGEROUS.
1511 DANGEROUS.
1495 """
1512 """
1496
1513
1497 def updatecaches(tr=None, full=False):
1514 def updatecaches(tr=None, full=False):
1498 """Warm repo caches."""
1515 """Warm repo caches."""
1499
1516
1500 def invalidatecaches():
1517 def invalidatecaches():
1501 """Invalidate cached data due to the repository mutating."""
1518 """Invalidate cached data due to the repository mutating."""
1502
1519
1503 def invalidatevolatilesets():
1520 def invalidatevolatilesets():
1504 pass
1521 pass
1505
1522
1506 def invalidatedirstate():
1523 def invalidatedirstate():
1507 """Invalidate the dirstate."""
1524 """Invalidate the dirstate."""
1508
1525
1509 def invalidate(clearfilecache=False):
1526 def invalidate(clearfilecache=False):
1510 pass
1527 pass
1511
1528
1512 def invalidateall():
1529 def invalidateall():
1513 pass
1530 pass
1514
1531
1515 def lock(wait=True):
1532 def lock(wait=True):
1516 """Lock the repository store and return a lock instance."""
1533 """Lock the repository store and return a lock instance."""
1517
1534
1518 def wlock(wait=True):
1535 def wlock(wait=True):
1519 """Lock the non-store parts of the repository."""
1536 """Lock the non-store parts of the repository."""
1520
1537
1521 def currentwlock():
1538 def currentwlock():
1522 """Return the wlock if it's held or None."""
1539 """Return the wlock if it's held or None."""
1523
1540
1524 def checkcommitpatterns(wctx, vdirs, match, status, fail):
1541 def checkcommitpatterns(wctx, vdirs, match, status, fail):
1525 pass
1542 pass
1526
1543
1527 def commit(text='', user=None, date=None, match=None, force=False,
1544 def commit(text='', user=None, date=None, match=None, force=False,
1528 editor=False, extra=None):
1545 editor=False, extra=None):
1529 """Add a new revision to the repository."""
1546 """Add a new revision to the repository."""
1530
1547
1531 def commitctx(ctx, error=False):
1548 def commitctx(ctx, error=False):
1532 """Commit a commitctx instance to the repository."""
1549 """Commit a commitctx instance to the repository."""
1533
1550
1534 def destroying():
1551 def destroying():
1535 """Inform the repository that nodes are about to be destroyed."""
1552 """Inform the repository that nodes are about to be destroyed."""
1536
1553
1537 def destroyed():
1554 def destroyed():
1538 """Inform the repository that nodes have been destroyed."""
1555 """Inform the repository that nodes have been destroyed."""
1539
1556
1540 def status(node1='.', node2=None, match=None, ignored=False,
1557 def status(node1='.', node2=None, match=None, ignored=False,
1541 clean=False, unknown=False, listsubrepos=False):
1558 clean=False, unknown=False, listsubrepos=False):
1542 """Convenience method to call repo[x].status()."""
1559 """Convenience method to call repo[x].status()."""
1543
1560
1544 def addpostdsstatus(ps):
1561 def addpostdsstatus(ps):
1545 pass
1562 pass
1546
1563
1547 def postdsstatus():
1564 def postdsstatus():
1548 pass
1565 pass
1549
1566
1550 def clearpostdsstatus():
1567 def clearpostdsstatus():
1551 pass
1568 pass
1552
1569
1553 def heads(start=None):
1570 def heads(start=None):
1554 """Obtain list of nodes that are DAG heads."""
1571 """Obtain list of nodes that are DAG heads."""
1555
1572
1556 def branchheads(branch=None, start=None, closed=False):
1573 def branchheads(branch=None, start=None, closed=False):
1557 pass
1574 pass
1558
1575
1559 def branches(nodes):
1576 def branches(nodes):
1560 pass
1577 pass
1561
1578
1562 def between(pairs):
1579 def between(pairs):
1563 pass
1580 pass
1564
1581
1565 def checkpush(pushop):
1582 def checkpush(pushop):
1566 pass
1583 pass
1567
1584
1568 prepushoutgoinghooks = interfaceutil.Attribute(
1585 prepushoutgoinghooks = interfaceutil.Attribute(
1569 """util.hooks instance.""")
1586 """util.hooks instance.""")
1570
1587
1571 def pushkey(namespace, key, old, new):
1588 def pushkey(namespace, key, old, new):
1572 pass
1589 pass
1573
1590
1574 def listkeys(namespace):
1591 def listkeys(namespace):
1575 pass
1592 pass
1576
1593
1577 def debugwireargs(one, two, three=None, four=None, five=None):
1594 def debugwireargs(one, two, three=None, four=None, five=None):
1578 pass
1595 pass
1579
1596
1580 def savecommitmessage(text):
1597 def savecommitmessage(text):
1581 pass
1598 pass
1582
1599
1583 class completelocalrepository(ilocalrepositorymain,
1600 class completelocalrepository(ilocalrepositorymain,
1584 ilocalrepositoryfilestorage):
1601 ilocalrepositoryfilestorage):
1585 """Complete interface for a local repository."""
1602 """Complete interface for a local repository."""
@@ -1,2494 +1,2583 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import os
20 import os
21 import re
21 import re
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullhex,
29 nullhex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVIDX_DEFAULT_FLAGS,
41 REVIDX_DEFAULT_FLAGS,
42 REVIDX_ELLIPSIS,
42 REVIDX_ELLIPSIS,
43 REVIDX_EXTSTORED,
43 REVIDX_EXTSTORED,
44 REVIDX_FLAGS_ORDER,
44 REVIDX_FLAGS_ORDER,
45 REVIDX_ISCENSORED,
45 REVIDX_ISCENSORED,
46 REVIDX_KNOWN_FLAGS,
46 REVIDX_KNOWN_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVIDX_RAWTEXT_CHANGING_FLAGS,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 )
56 )
57 from .thirdparty import (
57 from .thirdparty import (
58 attr,
58 attr,
59 )
59 )
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 stringutil,
75 stringutil,
76 )
76 )
77
77
78 # blanked usage of all the name to prevent pyflakes constraints
78 # blanked usage of all the name to prevent pyflakes constraints
79 # We need these name available in the module for extensions.
79 # We need these name available in the module for extensions.
80 REVLOGV0
80 REVLOGV0
81 REVLOGV1
81 REVLOGV1
82 REVLOGV2
82 REVLOGV2
83 FLAG_INLINE_DATA
83 FLAG_INLINE_DATA
84 FLAG_GENERALDELTA
84 FLAG_GENERALDELTA
85 REVLOG_DEFAULT_FLAGS
85 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FORMAT
86 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_VERSION
87 REVLOG_DEFAULT_VERSION
88 REVLOGV1_FLAGS
88 REVLOGV1_FLAGS
89 REVLOGV2_FLAGS
89 REVLOGV2_FLAGS
90 REVIDX_ISCENSORED
90 REVIDX_ISCENSORED
91 REVIDX_ELLIPSIS
91 REVIDX_ELLIPSIS
92 REVIDX_EXTSTORED
92 REVIDX_EXTSTORED
93 REVIDX_DEFAULT_FLAGS
93 REVIDX_DEFAULT_FLAGS
94 REVIDX_FLAGS_ORDER
94 REVIDX_FLAGS_ORDER
95 REVIDX_KNOWN_FLAGS
95 REVIDX_KNOWN_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
96 REVIDX_RAWTEXT_CHANGING_FLAGS
97
97
98 parsers = policy.importmod(r'parsers')
98 parsers = policy.importmod(r'parsers')
99
99
100 # Aliased for performance.
100 # Aliased for performance.
101 _zlibdecompress = zlib.decompress
101 _zlibdecompress = zlib.decompress
102
102
103 # max size of revlog with inline data
103 # max size of revlog with inline data
104 _maxinline = 131072
104 _maxinline = 131072
105 _chunksize = 1048576
105 _chunksize = 1048576
106
106
107 # Store flag processors (cf. 'addflagprocessor()' to register)
107 # Store flag processors (cf. 'addflagprocessor()' to register)
108 _flagprocessors = {
108 _flagprocessors = {
109 REVIDX_ISCENSORED: None,
109 REVIDX_ISCENSORED: None,
110 }
110 }
111
111
112 # Flag processors for REVIDX_ELLIPSIS.
112 # Flag processors for REVIDX_ELLIPSIS.
113 def ellipsisreadprocessor(rl, text):
113 def ellipsisreadprocessor(rl, text):
114 return text, False
114 return text, False
115
115
116 def ellipsiswriteprocessor(rl, text):
116 def ellipsiswriteprocessor(rl, text):
117 return text, False
117 return text, False
118
118
119 def ellipsisrawprocessor(rl, text):
119 def ellipsisrawprocessor(rl, text):
120 return False
120 return False
121
121
122 ellipsisprocessor = (
122 ellipsisprocessor = (
123 ellipsisreadprocessor,
123 ellipsisreadprocessor,
124 ellipsiswriteprocessor,
124 ellipsiswriteprocessor,
125 ellipsisrawprocessor,
125 ellipsisrawprocessor,
126 )
126 )
127
127
128 _mdre = re.compile('\1\n')
128 _mdre = re.compile('\1\n')
129 def parsemeta(text):
129 def parsemeta(text):
130 """return (metadatadict, metadatasize)"""
130 """return (metadatadict, metadatasize)"""
131 # text can be buffer, so we can't use .startswith or .index
131 # text can be buffer, so we can't use .startswith or .index
132 if text[:2] != '\1\n':
132 if text[:2] != '\1\n':
133 return None, None
133 return None, None
134 s = _mdre.search(text, 2).start()
134 s = _mdre.search(text, 2).start()
135 mtext = text[2:s]
135 mtext = text[2:s]
136 meta = {}
136 meta = {}
137 for l in mtext.splitlines():
137 for l in mtext.splitlines():
138 k, v = l.split(": ", 1)
138 k, v = l.split(": ", 1)
139 meta[k] = v
139 meta[k] = v
140 return meta, (s + 2)
140 return meta, (s + 2)
141
141
142 def packmeta(meta, text):
142 def packmeta(meta, text):
143 keys = sorted(meta)
143 keys = sorted(meta)
144 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
144 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
145 return "\1\n%s\1\n%s" % (metatext, text)
145 return "\1\n%s\1\n%s" % (metatext, text)
146
146
147 def _censoredtext(text):
147 def _censoredtext(text):
148 m, offs = parsemeta(text)
148 m, offs = parsemeta(text)
149 return m and "censored" in m
149 return m and "censored" in m
150
150
151 def addflagprocessor(flag, processor):
151 def addflagprocessor(flag, processor):
152 """Register a flag processor on a revision data flag.
152 """Register a flag processor on a revision data flag.
153
153
154 Invariant:
154 Invariant:
155 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
155 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
156 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
156 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
157 - Only one flag processor can be registered on a specific flag.
157 - Only one flag processor can be registered on a specific flag.
158 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
158 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
159 following signatures:
159 following signatures:
160 - (read) f(self, rawtext) -> text, bool
160 - (read) f(self, rawtext) -> text, bool
161 - (write) f(self, text) -> rawtext, bool
161 - (write) f(self, text) -> rawtext, bool
162 - (raw) f(self, rawtext) -> bool
162 - (raw) f(self, rawtext) -> bool
163 "text" is presented to the user. "rawtext" is stored in revlog data, not
163 "text" is presented to the user. "rawtext" is stored in revlog data, not
164 directly visible to the user.
164 directly visible to the user.
165 The boolean returned by these transforms is used to determine whether
165 The boolean returned by these transforms is used to determine whether
166 the returned text can be used for hash integrity checking. For example,
166 the returned text can be used for hash integrity checking. For example,
167 if "write" returns False, then "text" is used to generate hash. If
167 if "write" returns False, then "text" is used to generate hash. If
168 "write" returns True, that basically means "rawtext" returned by "write"
168 "write" returns True, that basically means "rawtext" returned by "write"
169 should be used to generate hash. Usually, "write" and "read" return
169 should be used to generate hash. Usually, "write" and "read" return
170 different booleans. And "raw" returns a same boolean as "write".
170 different booleans. And "raw" returns a same boolean as "write".
171
171
172 Note: The 'raw' transform is used for changegroup generation and in some
172 Note: The 'raw' transform is used for changegroup generation and in some
173 debug commands. In this case the transform only indicates whether the
173 debug commands. In this case the transform only indicates whether the
174 contents can be used for hash integrity checks.
174 contents can be used for hash integrity checks.
175 """
175 """
176 if not flag & REVIDX_KNOWN_FLAGS:
176 if not flag & REVIDX_KNOWN_FLAGS:
177 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
177 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
178 raise error.ProgrammingError(msg)
178 raise error.ProgrammingError(msg)
179 if flag not in REVIDX_FLAGS_ORDER:
179 if flag not in REVIDX_FLAGS_ORDER:
180 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
180 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
181 raise error.ProgrammingError(msg)
181 raise error.ProgrammingError(msg)
182 if flag in _flagprocessors:
182 if flag in _flagprocessors:
183 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
183 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
184 raise error.Abort(msg)
184 raise error.Abort(msg)
185 _flagprocessors[flag] = processor
185 _flagprocessors[flag] = processor
186
186
187 def getoffset(q):
187 def getoffset(q):
188 return int(q >> 16)
188 return int(q >> 16)
189
189
190 def gettype(q):
190 def gettype(q):
191 return int(q & 0xFFFF)
191 return int(q & 0xFFFF)
192
192
193 def offset_type(offset, type):
193 def offset_type(offset, type):
194 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
194 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
195 raise ValueError('unknown revlog index flags')
195 raise ValueError('unknown revlog index flags')
196 return int(int(offset) << 16 | type)
196 return int(int(offset) << 16 | type)
197
197
198 _nullhash = hashlib.sha1(nullid)
198 _nullhash = hashlib.sha1(nullid)
199
199
200 def hash(text, p1, p2):
200 def hash(text, p1, p2):
201 """generate a hash from the given text and its parent hashes
201 """generate a hash from the given text and its parent hashes
202
202
203 This hash combines both the current file contents and its history
203 This hash combines both the current file contents and its history
204 in a manner that makes it easy to distinguish nodes with the same
204 in a manner that makes it easy to distinguish nodes with the same
205 content in the revision graph.
205 content in the revision graph.
206 """
206 """
207 # As of now, if one of the parent node is null, p2 is null
207 # As of now, if one of the parent node is null, p2 is null
208 if p2 == nullid:
208 if p2 == nullid:
209 # deep copy of a hash is faster than creating one
209 # deep copy of a hash is faster than creating one
210 s = _nullhash.copy()
210 s = _nullhash.copy()
211 s.update(p1)
211 s.update(p1)
212 else:
212 else:
213 # none of the parent nodes are nullid
213 # none of the parent nodes are nullid
214 if p1 < p2:
214 if p1 < p2:
215 a = p1
215 a = p1
216 b = p2
216 b = p2
217 else:
217 else:
218 a = p2
218 a = p2
219 b = p1
219 b = p1
220 s = hashlib.sha1(a)
220 s = hashlib.sha1(a)
221 s.update(b)
221 s.update(b)
222 s.update(text)
222 s.update(text)
223 return s.digest()
223 return s.digest()
224
224
225 @attr.s(slots=True, frozen=True)
225 @attr.s(slots=True, frozen=True)
226 class _revisioninfo(object):
226 class _revisioninfo(object):
227 """Information about a revision that allows building its fulltext
227 """Information about a revision that allows building its fulltext
228 node: expected hash of the revision
228 node: expected hash of the revision
229 p1, p2: parent revs of the revision
229 p1, p2: parent revs of the revision
230 btext: built text cache consisting of a one-element list
230 btext: built text cache consisting of a one-element list
231 cachedelta: (baserev, uncompressed_delta) or None
231 cachedelta: (baserev, uncompressed_delta) or None
232 flags: flags associated to the revision storage
232 flags: flags associated to the revision storage
233
233
234 One of btext[0] or cachedelta must be set.
234 One of btext[0] or cachedelta must be set.
235 """
235 """
236 node = attr.ib()
236 node = attr.ib()
237 p1 = attr.ib()
237 p1 = attr.ib()
238 p2 = attr.ib()
238 p2 = attr.ib()
239 btext = attr.ib()
239 btext = attr.ib()
240 textlen = attr.ib()
240 textlen = attr.ib()
241 cachedelta = attr.ib()
241 cachedelta = attr.ib()
242 flags = attr.ib()
242 flags = attr.ib()
243
243
244 @interfaceutil.implementer(repository.irevisiondelta)
244 @interfaceutil.implementer(repository.irevisiondelta)
245 @attr.s(slots=True, frozen=True)
245 @attr.s(slots=True, frozen=True)
246 class revlogrevisiondelta(object):
246 class revlogrevisiondelta(object):
247 node = attr.ib()
247 node = attr.ib()
248 p1node = attr.ib()
248 p1node = attr.ib()
249 p2node = attr.ib()
249 p2node = attr.ib()
250 basenode = attr.ib()
250 basenode = attr.ib()
251 linknode = attr.ib()
251 linknode = attr.ib()
252 flags = attr.ib()
252 flags = attr.ib()
253 baserevisionsize = attr.ib()
253 baserevisionsize = attr.ib()
254 revision = attr.ib()
254 revision = attr.ib()
255 delta = attr.ib()
255 delta = attr.ib()
256
256
257 # index v0:
257 # index v0:
258 # 4 bytes: offset
258 # 4 bytes: offset
259 # 4 bytes: compressed length
259 # 4 bytes: compressed length
260 # 4 bytes: base rev
260 # 4 bytes: base rev
261 # 4 bytes: link rev
261 # 4 bytes: link rev
262 # 20 bytes: parent 1 nodeid
262 # 20 bytes: parent 1 nodeid
263 # 20 bytes: parent 2 nodeid
263 # 20 bytes: parent 2 nodeid
264 # 20 bytes: nodeid
264 # 20 bytes: nodeid
265 indexformatv0 = struct.Struct(">4l20s20s20s")
265 indexformatv0 = struct.Struct(">4l20s20s20s")
266 indexformatv0_pack = indexformatv0.pack
266 indexformatv0_pack = indexformatv0.pack
267 indexformatv0_unpack = indexformatv0.unpack
267 indexformatv0_unpack = indexformatv0.unpack
268
268
269 class revlogoldindex(list):
269 class revlogoldindex(list):
270 def __getitem__(self, i):
270 def __getitem__(self, i):
271 if i == -1:
271 if i == -1:
272 return (0, 0, 0, -1, -1, -1, -1, nullid)
272 return (0, 0, 0, -1, -1, -1, -1, nullid)
273 return list.__getitem__(self, i)
273 return list.__getitem__(self, i)
274
274
275 class revlogoldio(object):
275 class revlogoldio(object):
276 def __init__(self):
276 def __init__(self):
277 self.size = indexformatv0.size
277 self.size = indexformatv0.size
278
278
279 def parseindex(self, data, inline):
279 def parseindex(self, data, inline):
280 s = self.size
280 s = self.size
281 index = []
281 index = []
282 nodemap = {nullid: nullrev}
282 nodemap = {nullid: nullrev}
283 n = off = 0
283 n = off = 0
284 l = len(data)
284 l = len(data)
285 while off + s <= l:
285 while off + s <= l:
286 cur = data[off:off + s]
286 cur = data[off:off + s]
287 off += s
287 off += s
288 e = indexformatv0_unpack(cur)
288 e = indexformatv0_unpack(cur)
289 # transform to revlogv1 format
289 # transform to revlogv1 format
290 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
290 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
291 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
291 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
292 index.append(e2)
292 index.append(e2)
293 nodemap[e[6]] = n
293 nodemap[e[6]] = n
294 n += 1
294 n += 1
295
295
296 return revlogoldindex(index), nodemap, None
296 return revlogoldindex(index), nodemap, None
297
297
298 def packentry(self, entry, node, version, rev):
298 def packentry(self, entry, node, version, rev):
299 if gettype(entry[0]):
299 if gettype(entry[0]):
300 raise error.RevlogError(_('index entry flags need revlog '
300 raise error.RevlogError(_('index entry flags need revlog '
301 'version 1'))
301 'version 1'))
302 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
302 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
303 node(entry[5]), node(entry[6]), entry[7])
303 node(entry[5]), node(entry[6]), entry[7])
304 return indexformatv0_pack(*e2)
304 return indexformatv0_pack(*e2)
305
305
306 # index ng:
306 # index ng:
307 # 6 bytes: offset
307 # 6 bytes: offset
308 # 2 bytes: flags
308 # 2 bytes: flags
309 # 4 bytes: compressed length
309 # 4 bytes: compressed length
310 # 4 bytes: uncompressed length
310 # 4 bytes: uncompressed length
311 # 4 bytes: base rev
311 # 4 bytes: base rev
312 # 4 bytes: link rev
312 # 4 bytes: link rev
313 # 4 bytes: parent 1 rev
313 # 4 bytes: parent 1 rev
314 # 4 bytes: parent 2 rev
314 # 4 bytes: parent 2 rev
315 # 32 bytes: nodeid
315 # 32 bytes: nodeid
316 indexformatng = struct.Struct(">Qiiiiii20s12x")
316 indexformatng = struct.Struct(">Qiiiiii20s12x")
317 indexformatng_pack = indexformatng.pack
317 indexformatng_pack = indexformatng.pack
318 versionformat = struct.Struct(">I")
318 versionformat = struct.Struct(">I")
319 versionformat_pack = versionformat.pack
319 versionformat_pack = versionformat.pack
320 versionformat_unpack = versionformat.unpack
320 versionformat_unpack = versionformat.unpack
321
321
322 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
322 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
323 # signed integer)
323 # signed integer)
324 _maxentrysize = 0x7fffffff
324 _maxentrysize = 0x7fffffff
325
325
326 class revlogio(object):
326 class revlogio(object):
327 def __init__(self):
327 def __init__(self):
328 self.size = indexformatng.size
328 self.size = indexformatng.size
329
329
330 def parseindex(self, data, inline):
330 def parseindex(self, data, inline):
331 # call the C implementation to parse the index data
331 # call the C implementation to parse the index data
332 index, cache = parsers.parse_index2(data, inline)
332 index, cache = parsers.parse_index2(data, inline)
333 return index, getattr(index, 'nodemap', None), cache
333 return index, getattr(index, 'nodemap', None), cache
334
334
335 def packentry(self, entry, node, version, rev):
335 def packentry(self, entry, node, version, rev):
336 p = indexformatng_pack(*entry)
336 p = indexformatng_pack(*entry)
337 if rev == 0:
337 if rev == 0:
338 p = versionformat_pack(version) + p[4:]
338 p = versionformat_pack(version) + p[4:]
339 return p
339 return p
340
340
341 class revlog(object):
341 class revlog(object):
342 """
342 """
343 the underlying revision storage object
343 the underlying revision storage object
344
344
345 A revlog consists of two parts, an index and the revision data.
345 A revlog consists of two parts, an index and the revision data.
346
346
347 The index is a file with a fixed record size containing
347 The index is a file with a fixed record size containing
348 information on each revision, including its nodeid (hash), the
348 information on each revision, including its nodeid (hash), the
349 nodeids of its parents, the position and offset of its data within
349 nodeids of its parents, the position and offset of its data within
350 the data file, and the revision it's based on. Finally, each entry
350 the data file, and the revision it's based on. Finally, each entry
351 contains a linkrev entry that can serve as a pointer to external
351 contains a linkrev entry that can serve as a pointer to external
352 data.
352 data.
353
353
354 The revision data itself is a linear collection of data chunks.
354 The revision data itself is a linear collection of data chunks.
355 Each chunk represents a revision and is usually represented as a
355 Each chunk represents a revision and is usually represented as a
356 delta against the previous chunk. To bound lookup time, runs of
356 delta against the previous chunk. To bound lookup time, runs of
357 deltas are limited to about 2 times the length of the original
357 deltas are limited to about 2 times the length of the original
358 version data. This makes retrieval of a version proportional to
358 version data. This makes retrieval of a version proportional to
359 its size, or O(1) relative to the number of revisions.
359 its size, or O(1) relative to the number of revisions.
360
360
361 Both pieces of the revlog are written to in an append-only
361 Both pieces of the revlog are written to in an append-only
362 fashion, which means we never need to rewrite a file to insert or
362 fashion, which means we never need to rewrite a file to insert or
363 remove data, and can use some simple techniques to avoid the need
363 remove data, and can use some simple techniques to avoid the need
364 for locking while reading.
364 for locking while reading.
365
365
366 If checkambig, indexfile is opened with checkambig=True at
366 If checkambig, indexfile is opened with checkambig=True at
367 writing, to avoid file stat ambiguity.
367 writing, to avoid file stat ambiguity.
368
368
369 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 If mmaplargeindex is True, and an mmapindexthreshold is set, the
370 index will be mmapped rather than read if it is larger than the
370 index will be mmapped rather than read if it is larger than the
371 configured threshold.
371 configured threshold.
372
372
373 If censorable is True, the revlog can have censored revisions.
373 If censorable is True, the revlog can have censored revisions.
374 """
374 """
375 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
375 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
376 mmaplargeindex=False, censorable=False):
376 mmaplargeindex=False, censorable=False):
377 """
377 """
378 create a revlog object
378 create a revlog object
379
379
380 opener is a function that abstracts the file opening operation
380 opener is a function that abstracts the file opening operation
381 and can be used to implement COW semantics or the like.
381 and can be used to implement COW semantics or the like.
382 """
382 """
383 self.indexfile = indexfile
383 self.indexfile = indexfile
384 self.datafile = datafile or (indexfile[:-2] + ".d")
384 self.datafile = datafile or (indexfile[:-2] + ".d")
385 self.opener = opener
385 self.opener = opener
386 # When True, indexfile is opened with checkambig=True at writing, to
386 # When True, indexfile is opened with checkambig=True at writing, to
387 # avoid file stat ambiguity.
387 # avoid file stat ambiguity.
388 self._checkambig = checkambig
388 self._checkambig = checkambig
389 self._censorable = censorable
389 self._censorable = censorable
390 # 3-tuple of (node, rev, text) for a raw revision.
390 # 3-tuple of (node, rev, text) for a raw revision.
391 self._cache = None
391 self._cache = None
392 # Maps rev to chain base rev.
392 # Maps rev to chain base rev.
393 self._chainbasecache = util.lrucachedict(100)
393 self._chainbasecache = util.lrucachedict(100)
394 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
394 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
395 self._chunkcache = (0, '')
395 self._chunkcache = (0, '')
396 # How much data to read and cache into the raw revlog data cache.
396 # How much data to read and cache into the raw revlog data cache.
397 self._chunkcachesize = 65536
397 self._chunkcachesize = 65536
398 self._maxchainlen = None
398 self._maxchainlen = None
399 self._deltabothparents = True
399 self._deltabothparents = True
400 self.index = []
400 self.index = []
401 # Mapping of partial identifiers to full nodes.
401 # Mapping of partial identifiers to full nodes.
402 self._pcache = {}
402 self._pcache = {}
403 # Mapping of revision integer to full node.
403 # Mapping of revision integer to full node.
404 self._nodecache = {nullid: nullrev}
404 self._nodecache = {nullid: nullrev}
405 self._nodepos = None
405 self._nodepos = None
406 self._compengine = 'zlib'
406 self._compengine = 'zlib'
407 self._maxdeltachainspan = -1
407 self._maxdeltachainspan = -1
408 self._withsparseread = False
408 self._withsparseread = False
409 self._sparserevlog = False
409 self._sparserevlog = False
410 self._srdensitythreshold = 0.50
410 self._srdensitythreshold = 0.50
411 self._srmingapsize = 262144
411 self._srmingapsize = 262144
412
412
413 # Make copy of flag processors so each revlog instance can support
413 # Make copy of flag processors so each revlog instance can support
414 # custom flags.
414 # custom flags.
415 self._flagprocessors = dict(_flagprocessors)
415 self._flagprocessors = dict(_flagprocessors)
416
416
417 mmapindexthreshold = None
417 mmapindexthreshold = None
418 v = REVLOG_DEFAULT_VERSION
418 v = REVLOG_DEFAULT_VERSION
419 opts = getattr(opener, 'options', None)
419 opts = getattr(opener, 'options', None)
420 if opts is not None:
420 if opts is not None:
421 if 'revlogv2' in opts:
421 if 'revlogv2' in opts:
422 # version 2 revlogs always use generaldelta.
422 # version 2 revlogs always use generaldelta.
423 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
423 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
424 elif 'revlogv1' in opts:
424 elif 'revlogv1' in opts:
425 if 'generaldelta' in opts:
425 if 'generaldelta' in opts:
426 v |= FLAG_GENERALDELTA
426 v |= FLAG_GENERALDELTA
427 else:
427 else:
428 v = 0
428 v = 0
429 if 'chunkcachesize' in opts:
429 if 'chunkcachesize' in opts:
430 self._chunkcachesize = opts['chunkcachesize']
430 self._chunkcachesize = opts['chunkcachesize']
431 if 'maxchainlen' in opts:
431 if 'maxchainlen' in opts:
432 self._maxchainlen = opts['maxchainlen']
432 self._maxchainlen = opts['maxchainlen']
433 if 'deltabothparents' in opts:
433 if 'deltabothparents' in opts:
434 self._deltabothparents = opts['deltabothparents']
434 self._deltabothparents = opts['deltabothparents']
435 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
435 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
436 if 'compengine' in opts:
436 if 'compengine' in opts:
437 self._compengine = opts['compengine']
437 self._compengine = opts['compengine']
438 if 'maxdeltachainspan' in opts:
438 if 'maxdeltachainspan' in opts:
439 self._maxdeltachainspan = opts['maxdeltachainspan']
439 self._maxdeltachainspan = opts['maxdeltachainspan']
440 if mmaplargeindex and 'mmapindexthreshold' in opts:
440 if mmaplargeindex and 'mmapindexthreshold' in opts:
441 mmapindexthreshold = opts['mmapindexthreshold']
441 mmapindexthreshold = opts['mmapindexthreshold']
442 self._sparserevlog = bool(opts.get('sparse-revlog', False))
442 self._sparserevlog = bool(opts.get('sparse-revlog', False))
443 withsparseread = bool(opts.get('with-sparse-read', False))
443 withsparseread = bool(opts.get('with-sparse-read', False))
444 # sparse-revlog forces sparse-read
444 # sparse-revlog forces sparse-read
445 self._withsparseread = self._sparserevlog or withsparseread
445 self._withsparseread = self._sparserevlog or withsparseread
446 if 'sparse-read-density-threshold' in opts:
446 if 'sparse-read-density-threshold' in opts:
447 self._srdensitythreshold = opts['sparse-read-density-threshold']
447 self._srdensitythreshold = opts['sparse-read-density-threshold']
448 if 'sparse-read-min-gap-size' in opts:
448 if 'sparse-read-min-gap-size' in opts:
449 self._srmingapsize = opts['sparse-read-min-gap-size']
449 self._srmingapsize = opts['sparse-read-min-gap-size']
450 if opts.get('enableellipsis'):
450 if opts.get('enableellipsis'):
451 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
451 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
452
452
453 if self._chunkcachesize <= 0:
453 if self._chunkcachesize <= 0:
454 raise error.RevlogError(_('revlog chunk cache size %r is not '
454 raise error.RevlogError(_('revlog chunk cache size %r is not '
455 'greater than 0') % self._chunkcachesize)
455 'greater than 0') % self._chunkcachesize)
456 elif self._chunkcachesize & (self._chunkcachesize - 1):
456 elif self._chunkcachesize & (self._chunkcachesize - 1):
457 raise error.RevlogError(_('revlog chunk cache size %r is not a '
457 raise error.RevlogError(_('revlog chunk cache size %r is not a '
458 'power of 2') % self._chunkcachesize)
458 'power of 2') % self._chunkcachesize)
459
459
460 indexdata = ''
460 indexdata = ''
461 self._initempty = True
461 self._initempty = True
462 try:
462 try:
463 with self._indexfp() as f:
463 with self._indexfp() as f:
464 if (mmapindexthreshold is not None and
464 if (mmapindexthreshold is not None and
465 self.opener.fstat(f).st_size >= mmapindexthreshold):
465 self.opener.fstat(f).st_size >= mmapindexthreshold):
466 indexdata = util.buffer(util.mmapread(f))
466 indexdata = util.buffer(util.mmapread(f))
467 else:
467 else:
468 indexdata = f.read()
468 indexdata = f.read()
469 if len(indexdata) > 0:
469 if len(indexdata) > 0:
470 v = versionformat_unpack(indexdata[:4])[0]
470 v = versionformat_unpack(indexdata[:4])[0]
471 self._initempty = False
471 self._initempty = False
472 except IOError as inst:
472 except IOError as inst:
473 if inst.errno != errno.ENOENT:
473 if inst.errno != errno.ENOENT:
474 raise
474 raise
475
475
476 self.version = v
476 self.version = v
477 self._inline = v & FLAG_INLINE_DATA
477 self._inline = v & FLAG_INLINE_DATA
478 self._generaldelta = v & FLAG_GENERALDELTA
478 self._generaldelta = v & FLAG_GENERALDELTA
479 flags = v & ~0xFFFF
479 flags = v & ~0xFFFF
480 fmt = v & 0xFFFF
480 fmt = v & 0xFFFF
481 if fmt == REVLOGV0:
481 if fmt == REVLOGV0:
482 if flags:
482 if flags:
483 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
483 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
484 'revlog %s') %
484 'revlog %s') %
485 (flags >> 16, fmt, self.indexfile))
485 (flags >> 16, fmt, self.indexfile))
486 elif fmt == REVLOGV1:
486 elif fmt == REVLOGV1:
487 if flags & ~REVLOGV1_FLAGS:
487 if flags & ~REVLOGV1_FLAGS:
488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 'revlog %s') %
489 'revlog %s') %
490 (flags >> 16, fmt, self.indexfile))
490 (flags >> 16, fmt, self.indexfile))
491 elif fmt == REVLOGV2:
491 elif fmt == REVLOGV2:
492 if flags & ~REVLOGV2_FLAGS:
492 if flags & ~REVLOGV2_FLAGS:
493 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
493 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
494 'revlog %s') %
494 'revlog %s') %
495 (flags >> 16, fmt, self.indexfile))
495 (flags >> 16, fmt, self.indexfile))
496 else:
496 else:
497 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
497 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
498 (fmt, self.indexfile))
498 (fmt, self.indexfile))
499
499
500 self._storedeltachains = True
500 self._storedeltachains = True
501
501
502 self._io = revlogio()
502 self._io = revlogio()
503 if self.version == REVLOGV0:
503 if self.version == REVLOGV0:
504 self._io = revlogoldio()
504 self._io = revlogoldio()
505 try:
505 try:
506 d = self._io.parseindex(indexdata, self._inline)
506 d = self._io.parseindex(indexdata, self._inline)
507 except (ValueError, IndexError):
507 except (ValueError, IndexError):
508 raise error.RevlogError(_("index %s is corrupted") %
508 raise error.RevlogError(_("index %s is corrupted") %
509 self.indexfile)
509 self.indexfile)
510 self.index, nodemap, self._chunkcache = d
510 self.index, nodemap, self._chunkcache = d
511 if nodemap is not None:
511 if nodemap is not None:
512 self.nodemap = self._nodecache = nodemap
512 self.nodemap = self._nodecache = nodemap
513 if not self._chunkcache:
513 if not self._chunkcache:
514 self._chunkclear()
514 self._chunkclear()
515 # revnum -> (chain-length, sum-delta-length)
515 # revnum -> (chain-length, sum-delta-length)
516 self._chaininfocache = {}
516 self._chaininfocache = {}
517 # revlog header -> revlog compressor
517 # revlog header -> revlog compressor
518 self._decompressors = {}
518 self._decompressors = {}
519
519
520 @util.propertycache
520 @util.propertycache
521 def _compressor(self):
521 def _compressor(self):
522 return util.compengines[self._compengine].revlogcompressor()
522 return util.compengines[self._compengine].revlogcompressor()
523
523
524 def _indexfp(self, mode='r'):
524 def _indexfp(self, mode='r'):
525 """file object for the revlog's index file"""
525 """file object for the revlog's index file"""
526 args = {r'mode': mode}
526 args = {r'mode': mode}
527 if mode != 'r':
527 if mode != 'r':
528 args[r'checkambig'] = self._checkambig
528 args[r'checkambig'] = self._checkambig
529 if mode == 'w':
529 if mode == 'w':
530 args[r'atomictemp'] = True
530 args[r'atomictemp'] = True
531 return self.opener(self.indexfile, **args)
531 return self.opener(self.indexfile, **args)
532
532
533 def _datafp(self, mode='r'):
533 def _datafp(self, mode='r'):
534 """file object for the revlog's data file"""
534 """file object for the revlog's data file"""
535 return self.opener(self.datafile, mode=mode)
535 return self.opener(self.datafile, mode=mode)
536
536
537 @contextlib.contextmanager
537 @contextlib.contextmanager
538 def _datareadfp(self, existingfp=None):
538 def _datareadfp(self, existingfp=None):
539 """file object suitable to read data"""
539 """file object suitable to read data"""
540 if existingfp is not None:
540 if existingfp is not None:
541 yield existingfp
541 yield existingfp
542 else:
542 else:
543 if self._inline:
543 if self._inline:
544 func = self._indexfp
544 func = self._indexfp
545 else:
545 else:
546 func = self._datafp
546 func = self._datafp
547 with func() as fp:
547 with func() as fp:
548 yield fp
548 yield fp
549
549
550 def tip(self):
550 def tip(self):
551 return self.node(len(self.index) - 1)
551 return self.node(len(self.index) - 1)
552 def __contains__(self, rev):
552 def __contains__(self, rev):
553 return 0 <= rev < len(self)
553 return 0 <= rev < len(self)
554 def __len__(self):
554 def __len__(self):
555 return len(self.index)
555 return len(self.index)
556 def __iter__(self):
556 def __iter__(self):
557 return iter(pycompat.xrange(len(self)))
557 return iter(pycompat.xrange(len(self)))
558 def revs(self, start=0, stop=None):
558 def revs(self, start=0, stop=None):
559 """iterate over all rev in this revlog (from start to stop)"""
559 """iterate over all rev in this revlog (from start to stop)"""
560 step = 1
560 step = 1
561 length = len(self)
561 length = len(self)
562 if stop is not None:
562 if stop is not None:
563 if start > stop:
563 if start > stop:
564 step = -1
564 step = -1
565 stop += step
565 stop += step
566 if stop > length:
566 if stop > length:
567 stop = length
567 stop = length
568 else:
568 else:
569 stop = length
569 stop = length
570 return pycompat.xrange(start, stop, step)
570 return pycompat.xrange(start, stop, step)
571
571
572 @util.propertycache
572 @util.propertycache
573 def nodemap(self):
573 def nodemap(self):
574 if self.index:
574 if self.index:
575 # populate mapping down to the initial node
575 # populate mapping down to the initial node
576 node0 = self.index[0][7] # get around changelog filtering
576 node0 = self.index[0][7] # get around changelog filtering
577 self.rev(node0)
577 self.rev(node0)
578 return self._nodecache
578 return self._nodecache
579
579
580 def hasnode(self, node):
580 def hasnode(self, node):
581 try:
581 try:
582 self.rev(node)
582 self.rev(node)
583 return True
583 return True
584 except KeyError:
584 except KeyError:
585 return False
585 return False
586
586
587 def candelta(self, baserev, rev):
587 def candelta(self, baserev, rev):
588 """whether two revisions (baserev, rev) can be delta-ed or not"""
588 """whether two revisions (baserev, rev) can be delta-ed or not"""
589 # Disable delta if either rev requires a content-changing flag
589 # Disable delta if either rev requires a content-changing flag
590 # processor (ex. LFS). This is because such flag processor can alter
590 # processor (ex. LFS). This is because such flag processor can alter
591 # the rawtext content that the delta will be based on, and two clients
591 # the rawtext content that the delta will be based on, and two clients
592 # could have a same revlog node with different flags (i.e. different
592 # could have a same revlog node with different flags (i.e. different
593 # rawtext contents) and the delta could be incompatible.
593 # rawtext contents) and the delta could be incompatible.
594 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
594 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
595 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
595 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
596 return False
596 return False
597 return True
597 return True
598
598
599 def clearcaches(self):
599 def clearcaches(self):
600 self._cache = None
600 self._cache = None
601 self._chainbasecache.clear()
601 self._chainbasecache.clear()
602 self._chunkcache = (0, '')
602 self._chunkcache = (0, '')
603 self._pcache = {}
603 self._pcache = {}
604
604
605 try:
605 try:
606 self._nodecache.clearcaches()
606 self._nodecache.clearcaches()
607 except AttributeError:
607 except AttributeError:
608 self._nodecache = {nullid: nullrev}
608 self._nodecache = {nullid: nullrev}
609 self._nodepos = None
609 self._nodepos = None
610
610
611 def rev(self, node):
611 def rev(self, node):
612 try:
612 try:
613 return self._nodecache[node]
613 return self._nodecache[node]
614 except TypeError:
614 except TypeError:
615 raise
615 raise
616 except error.RevlogError:
616 except error.RevlogError:
617 # parsers.c radix tree lookup failed
617 # parsers.c radix tree lookup failed
618 if node == wdirid or node in wdirfilenodeids:
618 if node == wdirid or node in wdirfilenodeids:
619 raise error.WdirUnsupported
619 raise error.WdirUnsupported
620 raise error.LookupError(node, self.indexfile, _('no node'))
620 raise error.LookupError(node, self.indexfile, _('no node'))
621 except KeyError:
621 except KeyError:
622 # pure python cache lookup failed
622 # pure python cache lookup failed
623 n = self._nodecache
623 n = self._nodecache
624 i = self.index
624 i = self.index
625 p = self._nodepos
625 p = self._nodepos
626 if p is None:
626 if p is None:
627 p = len(i) - 1
627 p = len(i) - 1
628 else:
628 else:
629 assert p < len(i)
629 assert p < len(i)
630 for r in pycompat.xrange(p, -1, -1):
630 for r in pycompat.xrange(p, -1, -1):
631 v = i[r][7]
631 v = i[r][7]
632 n[v] = r
632 n[v] = r
633 if v == node:
633 if v == node:
634 self._nodepos = r - 1
634 self._nodepos = r - 1
635 return r
635 return r
636 if node == wdirid or node in wdirfilenodeids:
636 if node == wdirid or node in wdirfilenodeids:
637 raise error.WdirUnsupported
637 raise error.WdirUnsupported
638 raise error.LookupError(node, self.indexfile, _('no node'))
638 raise error.LookupError(node, self.indexfile, _('no node'))
639
639
640 # Accessors for index entries.
640 # Accessors for index entries.
641
641
642 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
642 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
643 # are flags.
643 # are flags.
644 def start(self, rev):
644 def start(self, rev):
645 return int(self.index[rev][0] >> 16)
645 return int(self.index[rev][0] >> 16)
646
646
647 def flags(self, rev):
647 def flags(self, rev):
648 return self.index[rev][0] & 0xFFFF
648 return self.index[rev][0] & 0xFFFF
649
649
650 def length(self, rev):
650 def length(self, rev):
651 return self.index[rev][1]
651 return self.index[rev][1]
652
652
653 def rawsize(self, rev):
653 def rawsize(self, rev):
654 """return the length of the uncompressed text for a given revision"""
654 """return the length of the uncompressed text for a given revision"""
655 l = self.index[rev][2]
655 l = self.index[rev][2]
656 if l >= 0:
656 if l >= 0:
657 return l
657 return l
658
658
659 t = self.revision(rev, raw=True)
659 t = self.revision(rev, raw=True)
660 return len(t)
660 return len(t)
661
661
662 def size(self, rev):
662 def size(self, rev):
663 """length of non-raw text (processed by a "read" flag processor)"""
663 """length of non-raw text (processed by a "read" flag processor)"""
664 # fast path: if no "read" flag processor could change the content,
664 # fast path: if no "read" flag processor could change the content,
665 # size is rawsize. note: ELLIPSIS is known to not change the content.
665 # size is rawsize. note: ELLIPSIS is known to not change the content.
666 flags = self.flags(rev)
666 flags = self.flags(rev)
667 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
667 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
668 return self.rawsize(rev)
668 return self.rawsize(rev)
669
669
670 return len(self.revision(rev, raw=False))
670 return len(self.revision(rev, raw=False))
671
671
672 def chainbase(self, rev):
672 def chainbase(self, rev):
673 base = self._chainbasecache.get(rev)
673 base = self._chainbasecache.get(rev)
674 if base is not None:
674 if base is not None:
675 return base
675 return base
676
676
677 index = self.index
677 index = self.index
678 iterrev = rev
678 iterrev = rev
679 base = index[iterrev][3]
679 base = index[iterrev][3]
680 while base != iterrev:
680 while base != iterrev:
681 iterrev = base
681 iterrev = base
682 base = index[iterrev][3]
682 base = index[iterrev][3]
683
683
684 self._chainbasecache[rev] = base
684 self._chainbasecache[rev] = base
685 return base
685 return base
686
686
687 def linkrev(self, rev):
687 def linkrev(self, rev):
688 return self.index[rev][4]
688 return self.index[rev][4]
689
689
690 def parentrevs(self, rev):
690 def parentrevs(self, rev):
691 try:
691 try:
692 entry = self.index[rev]
692 entry = self.index[rev]
693 except IndexError:
693 except IndexError:
694 if rev == wdirrev:
694 if rev == wdirrev:
695 raise error.WdirUnsupported
695 raise error.WdirUnsupported
696 raise
696 raise
697
697
698 return entry[5], entry[6]
698 return entry[5], entry[6]
699
699
700 def node(self, rev):
700 def node(self, rev):
701 try:
701 try:
702 return self.index[rev][7]
702 return self.index[rev][7]
703 except IndexError:
703 except IndexError:
704 if rev == wdirrev:
704 if rev == wdirrev:
705 raise error.WdirUnsupported
705 raise error.WdirUnsupported
706 raise
706 raise
707
707
708 # Derived from index values.
708 # Derived from index values.
709
709
710 def end(self, rev):
710 def end(self, rev):
711 return self.start(rev) + self.length(rev)
711 return self.start(rev) + self.length(rev)
712
712
713 def parents(self, node):
713 def parents(self, node):
714 i = self.index
714 i = self.index
715 d = i[self.rev(node)]
715 d = i[self.rev(node)]
716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
717
717
718 def chainlen(self, rev):
718 def chainlen(self, rev):
719 return self._chaininfo(rev)[0]
719 return self._chaininfo(rev)[0]
720
720
721 def _chaininfo(self, rev):
721 def _chaininfo(self, rev):
722 chaininfocache = self._chaininfocache
722 chaininfocache = self._chaininfocache
723 if rev in chaininfocache:
723 if rev in chaininfocache:
724 return chaininfocache[rev]
724 return chaininfocache[rev]
725 index = self.index
725 index = self.index
726 generaldelta = self._generaldelta
726 generaldelta = self._generaldelta
727 iterrev = rev
727 iterrev = rev
728 e = index[iterrev]
728 e = index[iterrev]
729 clen = 0
729 clen = 0
730 compresseddeltalen = 0
730 compresseddeltalen = 0
731 while iterrev != e[3]:
731 while iterrev != e[3]:
732 clen += 1
732 clen += 1
733 compresseddeltalen += e[1]
733 compresseddeltalen += e[1]
734 if generaldelta:
734 if generaldelta:
735 iterrev = e[3]
735 iterrev = e[3]
736 else:
736 else:
737 iterrev -= 1
737 iterrev -= 1
738 if iterrev in chaininfocache:
738 if iterrev in chaininfocache:
739 t = chaininfocache[iterrev]
739 t = chaininfocache[iterrev]
740 clen += t[0]
740 clen += t[0]
741 compresseddeltalen += t[1]
741 compresseddeltalen += t[1]
742 break
742 break
743 e = index[iterrev]
743 e = index[iterrev]
744 else:
744 else:
745 # Add text length of base since decompressing that also takes
745 # Add text length of base since decompressing that also takes
746 # work. For cache hits the length is already included.
746 # work. For cache hits the length is already included.
747 compresseddeltalen += e[1]
747 compresseddeltalen += e[1]
748 r = (clen, compresseddeltalen)
748 r = (clen, compresseddeltalen)
749 chaininfocache[rev] = r
749 chaininfocache[rev] = r
750 return r
750 return r
751
751
752 def _deltachain(self, rev, stoprev=None):
752 def _deltachain(self, rev, stoprev=None):
753 """Obtain the delta chain for a revision.
753 """Obtain the delta chain for a revision.
754
754
755 ``stoprev`` specifies a revision to stop at. If not specified, we
755 ``stoprev`` specifies a revision to stop at. If not specified, we
756 stop at the base of the chain.
756 stop at the base of the chain.
757
757
758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
759 revs in ascending order and ``stopped`` is a bool indicating whether
759 revs in ascending order and ``stopped`` is a bool indicating whether
760 ``stoprev`` was hit.
760 ``stoprev`` was hit.
761 """
761 """
762 # Try C implementation.
762 # Try C implementation.
763 try:
763 try:
764 return self.index.deltachain(rev, stoprev, self._generaldelta)
764 return self.index.deltachain(rev, stoprev, self._generaldelta)
765 except AttributeError:
765 except AttributeError:
766 pass
766 pass
767
767
768 chain = []
768 chain = []
769
769
770 # Alias to prevent attribute lookup in tight loop.
770 # Alias to prevent attribute lookup in tight loop.
771 index = self.index
771 index = self.index
772 generaldelta = self._generaldelta
772 generaldelta = self._generaldelta
773
773
774 iterrev = rev
774 iterrev = rev
775 e = index[iterrev]
775 e = index[iterrev]
776 while iterrev != e[3] and iterrev != stoprev:
776 while iterrev != e[3] and iterrev != stoprev:
777 chain.append(iterrev)
777 chain.append(iterrev)
778 if generaldelta:
778 if generaldelta:
779 iterrev = e[3]
779 iterrev = e[3]
780 else:
780 else:
781 iterrev -= 1
781 iterrev -= 1
782 e = index[iterrev]
782 e = index[iterrev]
783
783
784 if iterrev == stoprev:
784 if iterrev == stoprev:
785 stopped = True
785 stopped = True
786 else:
786 else:
787 chain.append(iterrev)
787 chain.append(iterrev)
788 stopped = False
788 stopped = False
789
789
790 chain.reverse()
790 chain.reverse()
791 return chain, stopped
791 return chain, stopped
792
792
793 def ancestors(self, revs, stoprev=0, inclusive=False):
793 def ancestors(self, revs, stoprev=0, inclusive=False):
794 """Generate the ancestors of 'revs' in reverse topological order.
794 """Generate the ancestors of 'revs' in reverse topological order.
795 Does not generate revs lower than stoprev.
795 Does not generate revs lower than stoprev.
796
796
797 See the documentation for ancestor.lazyancestors for more details."""
797 See the documentation for ancestor.lazyancestors for more details."""
798
798
799 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
799 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
800 inclusive=inclusive)
800 inclusive=inclusive)
801
801
802 def descendants(self, revs):
802 def descendants(self, revs):
803 """Generate the descendants of 'revs' in revision order.
803 """Generate the descendants of 'revs' in revision order.
804
804
805 Yield a sequence of revision numbers starting with a child of
805 Yield a sequence of revision numbers starting with a child of
806 some rev in revs, i.e., each revision is *not* considered a
806 some rev in revs, i.e., each revision is *not* considered a
807 descendant of itself. Results are ordered by revision number (a
807 descendant of itself. Results are ordered by revision number (a
808 topological sort)."""
808 topological sort)."""
809 first = min(revs)
809 first = min(revs)
810 if first == nullrev:
810 if first == nullrev:
811 for i in self:
811 for i in self:
812 yield i
812 yield i
813 return
813 return
814
814
815 seen = set(revs)
815 seen = set(revs)
816 for i in self.revs(start=first + 1):
816 for i in self.revs(start=first + 1):
817 for x in self.parentrevs(i):
817 for x in self.parentrevs(i):
818 if x != nullrev and x in seen:
818 if x != nullrev and x in seen:
819 seen.add(i)
819 seen.add(i)
820 yield i
820 yield i
821 break
821 break
822
822
823 def findcommonmissing(self, common=None, heads=None):
823 def findcommonmissing(self, common=None, heads=None):
824 """Return a tuple of the ancestors of common and the ancestors of heads
824 """Return a tuple of the ancestors of common and the ancestors of heads
825 that are not ancestors of common. In revset terminology, we return the
825 that are not ancestors of common. In revset terminology, we return the
826 tuple:
826 tuple:
827
827
828 ::common, (::heads) - (::common)
828 ::common, (::heads) - (::common)
829
829
830 The list is sorted by revision number, meaning it is
830 The list is sorted by revision number, meaning it is
831 topologically sorted.
831 topologically sorted.
832
832
833 'heads' and 'common' are both lists of node IDs. If heads is
833 'heads' and 'common' are both lists of node IDs. If heads is
834 not supplied, uses all of the revlog's heads. If common is not
834 not supplied, uses all of the revlog's heads. If common is not
835 supplied, uses nullid."""
835 supplied, uses nullid."""
836 if common is None:
836 if common is None:
837 common = [nullid]
837 common = [nullid]
838 if heads is None:
838 if heads is None:
839 heads = self.heads()
839 heads = self.heads()
840
840
841 common = [self.rev(n) for n in common]
841 common = [self.rev(n) for n in common]
842 heads = [self.rev(n) for n in heads]
842 heads = [self.rev(n) for n in heads]
843
843
844 # we want the ancestors, but inclusive
844 # we want the ancestors, but inclusive
845 class lazyset(object):
845 class lazyset(object):
846 def __init__(self, lazyvalues):
846 def __init__(self, lazyvalues):
847 self.addedvalues = set()
847 self.addedvalues = set()
848 self.lazyvalues = lazyvalues
848 self.lazyvalues = lazyvalues
849
849
850 def __contains__(self, value):
850 def __contains__(self, value):
851 return value in self.addedvalues or value in self.lazyvalues
851 return value in self.addedvalues or value in self.lazyvalues
852
852
853 def __iter__(self):
853 def __iter__(self):
854 added = self.addedvalues
854 added = self.addedvalues
855 for r in added:
855 for r in added:
856 yield r
856 yield r
857 for r in self.lazyvalues:
857 for r in self.lazyvalues:
858 if not r in added:
858 if not r in added:
859 yield r
859 yield r
860
860
861 def add(self, value):
861 def add(self, value):
862 self.addedvalues.add(value)
862 self.addedvalues.add(value)
863
863
864 def update(self, values):
864 def update(self, values):
865 self.addedvalues.update(values)
865 self.addedvalues.update(values)
866
866
867 has = lazyset(self.ancestors(common))
867 has = lazyset(self.ancestors(common))
868 has.add(nullrev)
868 has.add(nullrev)
869 has.update(common)
869 has.update(common)
870
870
871 # take all ancestors from heads that aren't in has
871 # take all ancestors from heads that aren't in has
872 missing = set()
872 missing = set()
873 visit = collections.deque(r for r in heads if r not in has)
873 visit = collections.deque(r for r in heads if r not in has)
874 while visit:
874 while visit:
875 r = visit.popleft()
875 r = visit.popleft()
876 if r in missing:
876 if r in missing:
877 continue
877 continue
878 else:
878 else:
879 missing.add(r)
879 missing.add(r)
880 for p in self.parentrevs(r):
880 for p in self.parentrevs(r):
881 if p not in has:
881 if p not in has:
882 visit.append(p)
882 visit.append(p)
883 missing = list(missing)
883 missing = list(missing)
884 missing.sort()
884 missing.sort()
885 return has, [self.node(miss) for miss in missing]
885 return has, [self.node(miss) for miss in missing]
886
886
887 def incrementalmissingrevs(self, common=None):
887 def incrementalmissingrevs(self, common=None):
888 """Return an object that can be used to incrementally compute the
888 """Return an object that can be used to incrementally compute the
889 revision numbers of the ancestors of arbitrary sets that are not
889 revision numbers of the ancestors of arbitrary sets that are not
890 ancestors of common. This is an ancestor.incrementalmissingancestors
890 ancestors of common. This is an ancestor.incrementalmissingancestors
891 object.
891 object.
892
892
893 'common' is a list of revision numbers. If common is not supplied, uses
893 'common' is a list of revision numbers. If common is not supplied, uses
894 nullrev.
894 nullrev.
895 """
895 """
896 if common is None:
896 if common is None:
897 common = [nullrev]
897 common = [nullrev]
898
898
899 return ancestor.incrementalmissingancestors(self.parentrevs, common)
899 return ancestor.incrementalmissingancestors(self.parentrevs, common)
900
900
901 def findmissingrevs(self, common=None, heads=None):
901 def findmissingrevs(self, common=None, heads=None):
902 """Return the revision numbers of the ancestors of heads that
902 """Return the revision numbers of the ancestors of heads that
903 are not ancestors of common.
903 are not ancestors of common.
904
904
905 More specifically, return a list of revision numbers corresponding to
905 More specifically, return a list of revision numbers corresponding to
906 nodes N such that every N satisfies the following constraints:
906 nodes N such that every N satisfies the following constraints:
907
907
908 1. N is an ancestor of some node in 'heads'
908 1. N is an ancestor of some node in 'heads'
909 2. N is not an ancestor of any node in 'common'
909 2. N is not an ancestor of any node in 'common'
910
910
911 The list is sorted by revision number, meaning it is
911 The list is sorted by revision number, meaning it is
912 topologically sorted.
912 topologically sorted.
913
913
914 'heads' and 'common' are both lists of revision numbers. If heads is
914 'heads' and 'common' are both lists of revision numbers. If heads is
915 not supplied, uses all of the revlog's heads. If common is not
915 not supplied, uses all of the revlog's heads. If common is not
916 supplied, uses nullid."""
916 supplied, uses nullid."""
917 if common is None:
917 if common is None:
918 common = [nullrev]
918 common = [nullrev]
919 if heads is None:
919 if heads is None:
920 heads = self.headrevs()
920 heads = self.headrevs()
921
921
922 inc = self.incrementalmissingrevs(common=common)
922 inc = self.incrementalmissingrevs(common=common)
923 return inc.missingancestors(heads)
923 return inc.missingancestors(heads)
924
924
925 def findmissing(self, common=None, heads=None):
925 def findmissing(self, common=None, heads=None):
926 """Return the ancestors of heads that are not ancestors of common.
926 """Return the ancestors of heads that are not ancestors of common.
927
927
928 More specifically, return a list of nodes N such that every N
928 More specifically, return a list of nodes N such that every N
929 satisfies the following constraints:
929 satisfies the following constraints:
930
930
931 1. N is an ancestor of some node in 'heads'
931 1. N is an ancestor of some node in 'heads'
932 2. N is not an ancestor of any node in 'common'
932 2. N is not an ancestor of any node in 'common'
933
933
934 The list is sorted by revision number, meaning it is
934 The list is sorted by revision number, meaning it is
935 topologically sorted.
935 topologically sorted.
936
936
937 'heads' and 'common' are both lists of node IDs. If heads is
937 'heads' and 'common' are both lists of node IDs. If heads is
938 not supplied, uses all of the revlog's heads. If common is not
938 not supplied, uses all of the revlog's heads. If common is not
939 supplied, uses nullid."""
939 supplied, uses nullid."""
940 if common is None:
940 if common is None:
941 common = [nullid]
941 common = [nullid]
942 if heads is None:
942 if heads is None:
943 heads = self.heads()
943 heads = self.heads()
944
944
945 common = [self.rev(n) for n in common]
945 common = [self.rev(n) for n in common]
946 heads = [self.rev(n) for n in heads]
946 heads = [self.rev(n) for n in heads]
947
947
948 inc = self.incrementalmissingrevs(common=common)
948 inc = self.incrementalmissingrevs(common=common)
949 return [self.node(r) for r in inc.missingancestors(heads)]
949 return [self.node(r) for r in inc.missingancestors(heads)]
950
950
951 def nodesbetween(self, roots=None, heads=None):
951 def nodesbetween(self, roots=None, heads=None):
952 """Return a topological path from 'roots' to 'heads'.
952 """Return a topological path from 'roots' to 'heads'.
953
953
954 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
954 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
955 topologically sorted list of all nodes N that satisfy both of
955 topologically sorted list of all nodes N that satisfy both of
956 these constraints:
956 these constraints:
957
957
958 1. N is a descendant of some node in 'roots'
958 1. N is a descendant of some node in 'roots'
959 2. N is an ancestor of some node in 'heads'
959 2. N is an ancestor of some node in 'heads'
960
960
961 Every node is considered to be both a descendant and an ancestor
961 Every node is considered to be both a descendant and an ancestor
962 of itself, so every reachable node in 'roots' and 'heads' will be
962 of itself, so every reachable node in 'roots' and 'heads' will be
963 included in 'nodes'.
963 included in 'nodes'.
964
964
965 'outroots' is the list of reachable nodes in 'roots', i.e., the
965 'outroots' is the list of reachable nodes in 'roots', i.e., the
966 subset of 'roots' that is returned in 'nodes'. Likewise,
966 subset of 'roots' that is returned in 'nodes'. Likewise,
967 'outheads' is the subset of 'heads' that is also in 'nodes'.
967 'outheads' is the subset of 'heads' that is also in 'nodes'.
968
968
969 'roots' and 'heads' are both lists of node IDs. If 'roots' is
969 'roots' and 'heads' are both lists of node IDs. If 'roots' is
970 unspecified, uses nullid as the only root. If 'heads' is
970 unspecified, uses nullid as the only root. If 'heads' is
971 unspecified, uses list of all of the revlog's heads."""
971 unspecified, uses list of all of the revlog's heads."""
972 nonodes = ([], [], [])
972 nonodes = ([], [], [])
973 if roots is not None:
973 if roots is not None:
974 roots = list(roots)
974 roots = list(roots)
975 if not roots:
975 if not roots:
976 return nonodes
976 return nonodes
977 lowestrev = min([self.rev(n) for n in roots])
977 lowestrev = min([self.rev(n) for n in roots])
978 else:
978 else:
979 roots = [nullid] # Everybody's a descendant of nullid
979 roots = [nullid] # Everybody's a descendant of nullid
980 lowestrev = nullrev
980 lowestrev = nullrev
981 if (lowestrev == nullrev) and (heads is None):
981 if (lowestrev == nullrev) and (heads is None):
982 # We want _all_ the nodes!
982 # We want _all_ the nodes!
983 return ([self.node(r) for r in self], [nullid], list(self.heads()))
983 return ([self.node(r) for r in self], [nullid], list(self.heads()))
984 if heads is None:
984 if heads is None:
985 # All nodes are ancestors, so the latest ancestor is the last
985 # All nodes are ancestors, so the latest ancestor is the last
986 # node.
986 # node.
987 highestrev = len(self) - 1
987 highestrev = len(self) - 1
988 # Set ancestors to None to signal that every node is an ancestor.
988 # Set ancestors to None to signal that every node is an ancestor.
989 ancestors = None
989 ancestors = None
990 # Set heads to an empty dictionary for later discovery of heads
990 # Set heads to an empty dictionary for later discovery of heads
991 heads = {}
991 heads = {}
992 else:
992 else:
993 heads = list(heads)
993 heads = list(heads)
994 if not heads:
994 if not heads:
995 return nonodes
995 return nonodes
996 ancestors = set()
996 ancestors = set()
997 # Turn heads into a dictionary so we can remove 'fake' heads.
997 # Turn heads into a dictionary so we can remove 'fake' heads.
998 # Also, later we will be using it to filter out the heads we can't
998 # Also, later we will be using it to filter out the heads we can't
999 # find from roots.
999 # find from roots.
1000 heads = dict.fromkeys(heads, False)
1000 heads = dict.fromkeys(heads, False)
1001 # Start at the top and keep marking parents until we're done.
1001 # Start at the top and keep marking parents until we're done.
1002 nodestotag = set(heads)
1002 nodestotag = set(heads)
1003 # Remember where the top was so we can use it as a limit later.
1003 # Remember where the top was so we can use it as a limit later.
1004 highestrev = max([self.rev(n) for n in nodestotag])
1004 highestrev = max([self.rev(n) for n in nodestotag])
1005 while nodestotag:
1005 while nodestotag:
1006 # grab a node to tag
1006 # grab a node to tag
1007 n = nodestotag.pop()
1007 n = nodestotag.pop()
1008 # Never tag nullid
1008 # Never tag nullid
1009 if n == nullid:
1009 if n == nullid:
1010 continue
1010 continue
1011 # A node's revision number represents its place in a
1011 # A node's revision number represents its place in a
1012 # topologically sorted list of nodes.
1012 # topologically sorted list of nodes.
1013 r = self.rev(n)
1013 r = self.rev(n)
1014 if r >= lowestrev:
1014 if r >= lowestrev:
1015 if n not in ancestors:
1015 if n not in ancestors:
1016 # If we are possibly a descendant of one of the roots
1016 # If we are possibly a descendant of one of the roots
1017 # and we haven't already been marked as an ancestor
1017 # and we haven't already been marked as an ancestor
1018 ancestors.add(n) # Mark as ancestor
1018 ancestors.add(n) # Mark as ancestor
1019 # Add non-nullid parents to list of nodes to tag.
1019 # Add non-nullid parents to list of nodes to tag.
1020 nodestotag.update([p for p in self.parents(n) if
1020 nodestotag.update([p for p in self.parents(n) if
1021 p != nullid])
1021 p != nullid])
1022 elif n in heads: # We've seen it before, is it a fake head?
1022 elif n in heads: # We've seen it before, is it a fake head?
1023 # So it is, real heads should not be the ancestors of
1023 # So it is, real heads should not be the ancestors of
1024 # any other heads.
1024 # any other heads.
1025 heads.pop(n)
1025 heads.pop(n)
1026 if not ancestors:
1026 if not ancestors:
1027 return nonodes
1027 return nonodes
1028 # Now that we have our set of ancestors, we want to remove any
1028 # Now that we have our set of ancestors, we want to remove any
1029 # roots that are not ancestors.
1029 # roots that are not ancestors.
1030
1030
1031 # If one of the roots was nullid, everything is included anyway.
1031 # If one of the roots was nullid, everything is included anyway.
1032 if lowestrev > nullrev:
1032 if lowestrev > nullrev:
1033 # But, since we weren't, let's recompute the lowest rev to not
1033 # But, since we weren't, let's recompute the lowest rev to not
1034 # include roots that aren't ancestors.
1034 # include roots that aren't ancestors.
1035
1035
1036 # Filter out roots that aren't ancestors of heads
1036 # Filter out roots that aren't ancestors of heads
1037 roots = [root for root in roots if root in ancestors]
1037 roots = [root for root in roots if root in ancestors]
1038 # Recompute the lowest revision
1038 # Recompute the lowest revision
1039 if roots:
1039 if roots:
1040 lowestrev = min([self.rev(root) for root in roots])
1040 lowestrev = min([self.rev(root) for root in roots])
1041 else:
1041 else:
1042 # No more roots? Return empty list
1042 # No more roots? Return empty list
1043 return nonodes
1043 return nonodes
1044 else:
1044 else:
1045 # We are descending from nullid, and don't need to care about
1045 # We are descending from nullid, and don't need to care about
1046 # any other roots.
1046 # any other roots.
1047 lowestrev = nullrev
1047 lowestrev = nullrev
1048 roots = [nullid]
1048 roots = [nullid]
1049 # Transform our roots list into a set.
1049 # Transform our roots list into a set.
1050 descendants = set(roots)
1050 descendants = set(roots)
1051 # Also, keep the original roots so we can filter out roots that aren't
1051 # Also, keep the original roots so we can filter out roots that aren't
1052 # 'real' roots (i.e. are descended from other roots).
1052 # 'real' roots (i.e. are descended from other roots).
1053 roots = descendants.copy()
1053 roots = descendants.copy()
1054 # Our topologically sorted list of output nodes.
1054 # Our topologically sorted list of output nodes.
1055 orderedout = []
1055 orderedout = []
1056 # Don't start at nullid since we don't want nullid in our output list,
1056 # Don't start at nullid since we don't want nullid in our output list,
1057 # and if nullid shows up in descendants, empty parents will look like
1057 # and if nullid shows up in descendants, empty parents will look like
1058 # they're descendants.
1058 # they're descendants.
1059 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1059 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1060 n = self.node(r)
1060 n = self.node(r)
1061 isdescendant = False
1061 isdescendant = False
1062 if lowestrev == nullrev: # Everybody is a descendant of nullid
1062 if lowestrev == nullrev: # Everybody is a descendant of nullid
1063 isdescendant = True
1063 isdescendant = True
1064 elif n in descendants:
1064 elif n in descendants:
1065 # n is already a descendant
1065 # n is already a descendant
1066 isdescendant = True
1066 isdescendant = True
1067 # This check only needs to be done here because all the roots
1067 # This check only needs to be done here because all the roots
1068 # will start being marked is descendants before the loop.
1068 # will start being marked is descendants before the loop.
1069 if n in roots:
1069 if n in roots:
1070 # If n was a root, check if it's a 'real' root.
1070 # If n was a root, check if it's a 'real' root.
1071 p = tuple(self.parents(n))
1071 p = tuple(self.parents(n))
1072 # If any of its parents are descendants, it's not a root.
1072 # If any of its parents are descendants, it's not a root.
1073 if (p[0] in descendants) or (p[1] in descendants):
1073 if (p[0] in descendants) or (p[1] in descendants):
1074 roots.remove(n)
1074 roots.remove(n)
1075 else:
1075 else:
1076 p = tuple(self.parents(n))
1076 p = tuple(self.parents(n))
1077 # A node is a descendant if either of its parents are
1077 # A node is a descendant if either of its parents are
1078 # descendants. (We seeded the dependents list with the roots
1078 # descendants. (We seeded the dependents list with the roots
1079 # up there, remember?)
1079 # up there, remember?)
1080 if (p[0] in descendants) or (p[1] in descendants):
1080 if (p[0] in descendants) or (p[1] in descendants):
1081 descendants.add(n)
1081 descendants.add(n)
1082 isdescendant = True
1082 isdescendant = True
1083 if isdescendant and ((ancestors is None) or (n in ancestors)):
1083 if isdescendant and ((ancestors is None) or (n in ancestors)):
1084 # Only include nodes that are both descendants and ancestors.
1084 # Only include nodes that are both descendants and ancestors.
1085 orderedout.append(n)
1085 orderedout.append(n)
1086 if (ancestors is not None) and (n in heads):
1086 if (ancestors is not None) and (n in heads):
1087 # We're trying to figure out which heads are reachable
1087 # We're trying to figure out which heads are reachable
1088 # from roots.
1088 # from roots.
1089 # Mark this head as having been reached
1089 # Mark this head as having been reached
1090 heads[n] = True
1090 heads[n] = True
1091 elif ancestors is None:
1091 elif ancestors is None:
1092 # Otherwise, we're trying to discover the heads.
1092 # Otherwise, we're trying to discover the heads.
1093 # Assume this is a head because if it isn't, the next step
1093 # Assume this is a head because if it isn't, the next step
1094 # will eventually remove it.
1094 # will eventually remove it.
1095 heads[n] = True
1095 heads[n] = True
1096 # But, obviously its parents aren't.
1096 # But, obviously its parents aren't.
1097 for p in self.parents(n):
1097 for p in self.parents(n):
1098 heads.pop(p, None)
1098 heads.pop(p, None)
1099 heads = [head for head, flag in heads.iteritems() if flag]
1099 heads = [head for head, flag in heads.iteritems() if flag]
1100 roots = list(roots)
1100 roots = list(roots)
1101 assert orderedout
1101 assert orderedout
1102 assert roots
1102 assert roots
1103 assert heads
1103 assert heads
1104 return (orderedout, roots, heads)
1104 return (orderedout, roots, heads)
1105
1105
1106 def headrevs(self):
1106 def headrevs(self):
1107 try:
1107 try:
1108 return self.index.headrevs()
1108 return self.index.headrevs()
1109 except AttributeError:
1109 except AttributeError:
1110 return self._headrevs()
1110 return self._headrevs()
1111
1111
1112 def computephases(self, roots):
1112 def computephases(self, roots):
1113 return self.index.computephasesmapsets(roots)
1113 return self.index.computephasesmapsets(roots)
1114
1114
1115 def _headrevs(self):
1115 def _headrevs(self):
1116 count = len(self)
1116 count = len(self)
1117 if not count:
1117 if not count:
1118 return [nullrev]
1118 return [nullrev]
1119 # we won't iter over filtered rev so nobody is a head at start
1119 # we won't iter over filtered rev so nobody is a head at start
1120 ishead = [0] * (count + 1)
1120 ishead = [0] * (count + 1)
1121 index = self.index
1121 index = self.index
1122 for r in self:
1122 for r in self:
1123 ishead[r] = 1 # I may be an head
1123 ishead[r] = 1 # I may be an head
1124 e = index[r]
1124 e = index[r]
1125 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1125 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1126 return [r for r, val in enumerate(ishead) if val]
1126 return [r for r, val in enumerate(ishead) if val]
1127
1127
1128 def heads(self, start=None, stop=None):
1128 def heads(self, start=None, stop=None):
1129 """return the list of all nodes that have no children
1129 """return the list of all nodes that have no children
1130
1130
1131 if start is specified, only heads that are descendants of
1131 if start is specified, only heads that are descendants of
1132 start will be returned
1132 start will be returned
1133 if stop is specified, it will consider all the revs from stop
1133 if stop is specified, it will consider all the revs from stop
1134 as if they had no children
1134 as if they had no children
1135 """
1135 """
1136 if start is None and stop is None:
1136 if start is None and stop is None:
1137 if not len(self):
1137 if not len(self):
1138 return [nullid]
1138 return [nullid]
1139 return [self.node(r) for r in self.headrevs()]
1139 return [self.node(r) for r in self.headrevs()]
1140
1140
1141 if start is None:
1141 if start is None:
1142 start = nullid
1142 start = nullid
1143 if stop is None:
1143 if stop is None:
1144 stop = []
1144 stop = []
1145 stoprevs = set([self.rev(n) for n in stop])
1145 stoprevs = set([self.rev(n) for n in stop])
1146 startrev = self.rev(start)
1146 startrev = self.rev(start)
1147 reachable = {startrev}
1147 reachable = {startrev}
1148 heads = {startrev}
1148 heads = {startrev}
1149
1149
1150 parentrevs = self.parentrevs
1150 parentrevs = self.parentrevs
1151 for r in self.revs(start=startrev + 1):
1151 for r in self.revs(start=startrev + 1):
1152 for p in parentrevs(r):
1152 for p in parentrevs(r):
1153 if p in reachable:
1153 if p in reachable:
1154 if r not in stoprevs:
1154 if r not in stoprevs:
1155 reachable.add(r)
1155 reachable.add(r)
1156 heads.add(r)
1156 heads.add(r)
1157 if p in heads and p not in stoprevs:
1157 if p in heads and p not in stoprevs:
1158 heads.remove(p)
1158 heads.remove(p)
1159
1159
1160 return [self.node(r) for r in heads]
1160 return [self.node(r) for r in heads]
1161
1161
1162 def children(self, node):
1162 def children(self, node):
1163 """find the children of a given node"""
1163 """find the children of a given node"""
1164 c = []
1164 c = []
1165 p = self.rev(node)
1165 p = self.rev(node)
1166 for r in self.revs(start=p + 1):
1166 for r in self.revs(start=p + 1):
1167 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1167 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1168 if prevs:
1168 if prevs:
1169 for pr in prevs:
1169 for pr in prevs:
1170 if pr == p:
1170 if pr == p:
1171 c.append(self.node(r))
1171 c.append(self.node(r))
1172 elif p == nullrev:
1172 elif p == nullrev:
1173 c.append(self.node(r))
1173 c.append(self.node(r))
1174 return c
1174 return c
1175
1175
1176 def commonancestorsheads(self, a, b):
1176 def commonancestorsheads(self, a, b):
1177 """calculate all the heads of the common ancestors of nodes a and b"""
1177 """calculate all the heads of the common ancestors of nodes a and b"""
1178 a, b = self.rev(a), self.rev(b)
1178 a, b = self.rev(a), self.rev(b)
1179 ancs = self._commonancestorsheads(a, b)
1179 ancs = self._commonancestorsheads(a, b)
1180 return pycompat.maplist(self.node, ancs)
1180 return pycompat.maplist(self.node, ancs)
1181
1181
1182 def _commonancestorsheads(self, *revs):
1182 def _commonancestorsheads(self, *revs):
1183 """calculate all the heads of the common ancestors of revs"""
1183 """calculate all the heads of the common ancestors of revs"""
1184 try:
1184 try:
1185 ancs = self.index.commonancestorsheads(*revs)
1185 ancs = self.index.commonancestorsheads(*revs)
1186 except (AttributeError, OverflowError): # C implementation failed
1186 except (AttributeError, OverflowError): # C implementation failed
1187 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1187 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1188 return ancs
1188 return ancs
1189
1189
1190 def isancestor(self, a, b):
1190 def isancestor(self, a, b):
1191 """return True if node a is an ancestor of node b
1191 """return True if node a is an ancestor of node b
1192
1192
1193 A revision is considered an ancestor of itself."""
1193 A revision is considered an ancestor of itself."""
1194 a, b = self.rev(a), self.rev(b)
1194 a, b = self.rev(a), self.rev(b)
1195 return self.isancestorrev(a, b)
1195 return self.isancestorrev(a, b)
1196
1196
1197 def isancestorrev(self, a, b):
1197 def isancestorrev(self, a, b):
1198 """return True if revision a is an ancestor of revision b
1198 """return True if revision a is an ancestor of revision b
1199
1199
1200 A revision is considered an ancestor of itself.
1200 A revision is considered an ancestor of itself.
1201
1201
1202 The implementation of this is trivial but the use of
1202 The implementation of this is trivial but the use of
1203 commonancestorsheads is not."""
1203 commonancestorsheads is not."""
1204 if a == nullrev:
1204 if a == nullrev:
1205 return True
1205 return True
1206 elif a == b:
1206 elif a == b:
1207 return True
1207 return True
1208 elif a > b:
1208 elif a > b:
1209 return False
1209 return False
1210 return a in self._commonancestorsheads(a, b)
1210 return a in self._commonancestorsheads(a, b)
1211
1211
1212 def ancestor(self, a, b):
1212 def ancestor(self, a, b):
1213 """calculate the "best" common ancestor of nodes a and b"""
1213 """calculate the "best" common ancestor of nodes a and b"""
1214
1214
1215 a, b = self.rev(a), self.rev(b)
1215 a, b = self.rev(a), self.rev(b)
1216 try:
1216 try:
1217 ancs = self.index.ancestors(a, b)
1217 ancs = self.index.ancestors(a, b)
1218 except (AttributeError, OverflowError):
1218 except (AttributeError, OverflowError):
1219 ancs = ancestor.ancestors(self.parentrevs, a, b)
1219 ancs = ancestor.ancestors(self.parentrevs, a, b)
1220 if ancs:
1220 if ancs:
1221 # choose a consistent winner when there's a tie
1221 # choose a consistent winner when there's a tie
1222 return min(map(self.node, ancs))
1222 return min(map(self.node, ancs))
1223 return nullid
1223 return nullid
1224
1224
1225 def _match(self, id):
1225 def _match(self, id):
1226 if isinstance(id, int):
1226 if isinstance(id, int):
1227 # rev
1227 # rev
1228 return self.node(id)
1228 return self.node(id)
1229 if len(id) == 20:
1229 if len(id) == 20:
1230 # possibly a binary node
1230 # possibly a binary node
1231 # odds of a binary node being all hex in ASCII are 1 in 10**25
1231 # odds of a binary node being all hex in ASCII are 1 in 10**25
1232 try:
1232 try:
1233 node = id
1233 node = id
1234 self.rev(node) # quick search the index
1234 self.rev(node) # quick search the index
1235 return node
1235 return node
1236 except error.LookupError:
1236 except error.LookupError:
1237 pass # may be partial hex id
1237 pass # may be partial hex id
1238 try:
1238 try:
1239 # str(rev)
1239 # str(rev)
1240 rev = int(id)
1240 rev = int(id)
1241 if "%d" % rev != id:
1241 if "%d" % rev != id:
1242 raise ValueError
1242 raise ValueError
1243 if rev < 0:
1243 if rev < 0:
1244 rev = len(self) + rev
1244 rev = len(self) + rev
1245 if rev < 0 or rev >= len(self):
1245 if rev < 0 or rev >= len(self):
1246 raise ValueError
1246 raise ValueError
1247 return self.node(rev)
1247 return self.node(rev)
1248 except (ValueError, OverflowError):
1248 except (ValueError, OverflowError):
1249 pass
1249 pass
1250 if len(id) == 40:
1250 if len(id) == 40:
1251 try:
1251 try:
1252 # a full hex nodeid?
1252 # a full hex nodeid?
1253 node = bin(id)
1253 node = bin(id)
1254 self.rev(node)
1254 self.rev(node)
1255 return node
1255 return node
1256 except (TypeError, error.LookupError):
1256 except (TypeError, error.LookupError):
1257 pass
1257 pass
1258
1258
1259 def _partialmatch(self, id):
1259 def _partialmatch(self, id):
1260 # we don't care wdirfilenodeids as they should be always full hash
1260 # we don't care wdirfilenodeids as they should be always full hash
1261 maybewdir = wdirhex.startswith(id)
1261 maybewdir = wdirhex.startswith(id)
1262 try:
1262 try:
1263 partial = self.index.partialmatch(id)
1263 partial = self.index.partialmatch(id)
1264 if partial and self.hasnode(partial):
1264 if partial and self.hasnode(partial):
1265 if maybewdir:
1265 if maybewdir:
1266 # single 'ff...' match in radix tree, ambiguous with wdir
1266 # single 'ff...' match in radix tree, ambiguous with wdir
1267 raise error.RevlogError
1267 raise error.RevlogError
1268 return partial
1268 return partial
1269 if maybewdir:
1269 if maybewdir:
1270 # no 'ff...' match in radix tree, wdir identified
1270 # no 'ff...' match in radix tree, wdir identified
1271 raise error.WdirUnsupported
1271 raise error.WdirUnsupported
1272 return None
1272 return None
1273 except error.RevlogError:
1273 except error.RevlogError:
1274 # parsers.c radix tree lookup gave multiple matches
1274 # parsers.c radix tree lookup gave multiple matches
1275 # fast path: for unfiltered changelog, radix tree is accurate
1275 # fast path: for unfiltered changelog, radix tree is accurate
1276 if not getattr(self, 'filteredrevs', None):
1276 if not getattr(self, 'filteredrevs', None):
1277 raise error.AmbiguousPrefixLookupError(
1277 raise error.AmbiguousPrefixLookupError(
1278 id, self.indexfile, _('ambiguous identifier'))
1278 id, self.indexfile, _('ambiguous identifier'))
1279 # fall through to slow path that filters hidden revisions
1279 # fall through to slow path that filters hidden revisions
1280 except (AttributeError, ValueError):
1280 except (AttributeError, ValueError):
1281 # we are pure python, or key was too short to search radix tree
1281 # we are pure python, or key was too short to search radix tree
1282 pass
1282 pass
1283
1283
1284 if id in self._pcache:
1284 if id in self._pcache:
1285 return self._pcache[id]
1285 return self._pcache[id]
1286
1286
1287 if len(id) <= 40:
1287 if len(id) <= 40:
1288 try:
1288 try:
1289 # hex(node)[:...]
1289 # hex(node)[:...]
1290 l = len(id) // 2 # grab an even number of digits
1290 l = len(id) // 2 # grab an even number of digits
1291 prefix = bin(id[:l * 2])
1291 prefix = bin(id[:l * 2])
1292 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1292 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1293 nl = [n for n in nl if hex(n).startswith(id) and
1293 nl = [n for n in nl if hex(n).startswith(id) and
1294 self.hasnode(n)]
1294 self.hasnode(n)]
1295 if nullhex.startswith(id):
1295 if nullhex.startswith(id):
1296 nl.append(nullid)
1296 nl.append(nullid)
1297 if len(nl) > 0:
1297 if len(nl) > 0:
1298 if len(nl) == 1 and not maybewdir:
1298 if len(nl) == 1 and not maybewdir:
1299 self._pcache[id] = nl[0]
1299 self._pcache[id] = nl[0]
1300 return nl[0]
1300 return nl[0]
1301 raise error.AmbiguousPrefixLookupError(
1301 raise error.AmbiguousPrefixLookupError(
1302 id, self.indexfile, _('ambiguous identifier'))
1302 id, self.indexfile, _('ambiguous identifier'))
1303 if maybewdir:
1303 if maybewdir:
1304 raise error.WdirUnsupported
1304 raise error.WdirUnsupported
1305 return None
1305 return None
1306 except TypeError:
1306 except TypeError:
1307 pass
1307 pass
1308
1308
1309 def lookup(self, id):
1309 def lookup(self, id):
1310 """locate a node based on:
1310 """locate a node based on:
1311 - revision number or str(revision number)
1311 - revision number or str(revision number)
1312 - nodeid or subset of hex nodeid
1312 - nodeid or subset of hex nodeid
1313 """
1313 """
1314 n = self._match(id)
1314 n = self._match(id)
1315 if n is not None:
1315 if n is not None:
1316 return n
1316 return n
1317 n = self._partialmatch(id)
1317 n = self._partialmatch(id)
1318 if n:
1318 if n:
1319 return n
1319 return n
1320
1320
1321 raise error.LookupError(id, self.indexfile, _('no match found'))
1321 raise error.LookupError(id, self.indexfile, _('no match found'))
1322
1322
1323 def shortest(self, node, minlength=1):
1323 def shortest(self, node, minlength=1):
1324 """Find the shortest unambiguous prefix that matches node."""
1324 """Find the shortest unambiguous prefix that matches node."""
1325 def isvalid(prefix):
1325 def isvalid(prefix):
1326 try:
1326 try:
1327 node = self._partialmatch(prefix)
1327 node = self._partialmatch(prefix)
1328 except error.RevlogError:
1328 except error.RevlogError:
1329 return False
1329 return False
1330 except error.WdirUnsupported:
1330 except error.WdirUnsupported:
1331 # single 'ff...' match
1331 # single 'ff...' match
1332 return True
1332 return True
1333 if node is None:
1333 if node is None:
1334 raise error.LookupError(node, self.indexfile, _('no node'))
1334 raise error.LookupError(node, self.indexfile, _('no node'))
1335 return True
1335 return True
1336
1336
1337 def maybewdir(prefix):
1337 def maybewdir(prefix):
1338 return all(c == 'f' for c in prefix)
1338 return all(c == 'f' for c in prefix)
1339
1339
1340 hexnode = hex(node)
1340 hexnode = hex(node)
1341
1341
1342 def disambiguate(hexnode, minlength):
1342 def disambiguate(hexnode, minlength):
1343 """Disambiguate against wdirid."""
1343 """Disambiguate against wdirid."""
1344 for length in range(minlength, 41):
1344 for length in range(minlength, 41):
1345 prefix = hexnode[:length]
1345 prefix = hexnode[:length]
1346 if not maybewdir(prefix):
1346 if not maybewdir(prefix):
1347 return prefix
1347 return prefix
1348
1348
1349 if not getattr(self, 'filteredrevs', None):
1349 if not getattr(self, 'filteredrevs', None):
1350 try:
1350 try:
1351 length = max(self.index.shortest(node), minlength)
1351 length = max(self.index.shortest(node), minlength)
1352 return disambiguate(hexnode, length)
1352 return disambiguate(hexnode, length)
1353 except error.RevlogError:
1353 except error.RevlogError:
1354 if node != wdirid:
1354 if node != wdirid:
1355 raise error.LookupError(node, self.indexfile, _('no node'))
1355 raise error.LookupError(node, self.indexfile, _('no node'))
1356 except AttributeError:
1356 except AttributeError:
1357 # Fall through to pure code
1357 # Fall through to pure code
1358 pass
1358 pass
1359
1359
1360 if node == wdirid:
1360 if node == wdirid:
1361 for length in range(minlength, 41):
1361 for length in range(minlength, 41):
1362 prefix = hexnode[:length]
1362 prefix = hexnode[:length]
1363 if isvalid(prefix):
1363 if isvalid(prefix):
1364 return prefix
1364 return prefix
1365
1365
1366 for length in range(minlength, 41):
1366 for length in range(minlength, 41):
1367 prefix = hexnode[:length]
1367 prefix = hexnode[:length]
1368 if isvalid(prefix):
1368 if isvalid(prefix):
1369 return disambiguate(hexnode, length)
1369 return disambiguate(hexnode, length)
1370
1370
1371 def cmp(self, node, text):
1371 def cmp(self, node, text):
1372 """compare text with a given file revision
1372 """compare text with a given file revision
1373
1373
1374 returns True if text is different than what is stored.
1374 returns True if text is different than what is stored.
1375 """
1375 """
1376 p1, p2 = self.parents(node)
1376 p1, p2 = self.parents(node)
1377 return hash(text, p1, p2) != node
1377 return hash(text, p1, p2) != node
1378
1378
1379 def _cachesegment(self, offset, data):
1379 def _cachesegment(self, offset, data):
1380 """Add a segment to the revlog cache.
1380 """Add a segment to the revlog cache.
1381
1381
1382 Accepts an absolute offset and the data that is at that location.
1382 Accepts an absolute offset and the data that is at that location.
1383 """
1383 """
1384 o, d = self._chunkcache
1384 o, d = self._chunkcache
1385 # try to add to existing cache
1385 # try to add to existing cache
1386 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1386 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1387 self._chunkcache = o, d + data
1387 self._chunkcache = o, d + data
1388 else:
1388 else:
1389 self._chunkcache = offset, data
1389 self._chunkcache = offset, data
1390
1390
1391 def _readsegment(self, offset, length, df=None):
1391 def _readsegment(self, offset, length, df=None):
1392 """Load a segment of raw data from the revlog.
1392 """Load a segment of raw data from the revlog.
1393
1393
1394 Accepts an absolute offset, length to read, and an optional existing
1394 Accepts an absolute offset, length to read, and an optional existing
1395 file handle to read from.
1395 file handle to read from.
1396
1396
1397 If an existing file handle is passed, it will be seeked and the
1397 If an existing file handle is passed, it will be seeked and the
1398 original seek position will NOT be restored.
1398 original seek position will NOT be restored.
1399
1399
1400 Returns a str or buffer of raw byte data.
1400 Returns a str or buffer of raw byte data.
1401 """
1401 """
1402 # Cache data both forward and backward around the requested
1402 # Cache data both forward and backward around the requested
1403 # data, in a fixed size window. This helps speed up operations
1403 # data, in a fixed size window. This helps speed up operations
1404 # involving reading the revlog backwards.
1404 # involving reading the revlog backwards.
1405 cachesize = self._chunkcachesize
1405 cachesize = self._chunkcachesize
1406 realoffset = offset & ~(cachesize - 1)
1406 realoffset = offset & ~(cachesize - 1)
1407 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1407 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1408 - realoffset)
1408 - realoffset)
1409 with self._datareadfp(df) as df:
1409 with self._datareadfp(df) as df:
1410 df.seek(realoffset)
1410 df.seek(realoffset)
1411 d = df.read(reallength)
1411 d = df.read(reallength)
1412 self._cachesegment(realoffset, d)
1412 self._cachesegment(realoffset, d)
1413 if offset != realoffset or reallength != length:
1413 if offset != realoffset or reallength != length:
1414 return util.buffer(d, offset - realoffset, length)
1414 return util.buffer(d, offset - realoffset, length)
1415 return d
1415 return d
1416
1416
1417 def _getsegment(self, offset, length, df=None):
1417 def _getsegment(self, offset, length, df=None):
1418 """Obtain a segment of raw data from the revlog.
1418 """Obtain a segment of raw data from the revlog.
1419
1419
1420 Accepts an absolute offset, length of bytes to obtain, and an
1420 Accepts an absolute offset, length of bytes to obtain, and an
1421 optional file handle to the already-opened revlog. If the file
1421 optional file handle to the already-opened revlog. If the file
1422 handle is used, it's original seek position will not be preserved.
1422 handle is used, it's original seek position will not be preserved.
1423
1423
1424 Requests for data may be returned from a cache.
1424 Requests for data may be returned from a cache.
1425
1425
1426 Returns a str or a buffer instance of raw byte data.
1426 Returns a str or a buffer instance of raw byte data.
1427 """
1427 """
1428 o, d = self._chunkcache
1428 o, d = self._chunkcache
1429 l = len(d)
1429 l = len(d)
1430
1430
1431 # is it in the cache?
1431 # is it in the cache?
1432 cachestart = offset - o
1432 cachestart = offset - o
1433 cacheend = cachestart + length
1433 cacheend = cachestart + length
1434 if cachestart >= 0 and cacheend <= l:
1434 if cachestart >= 0 and cacheend <= l:
1435 if cachestart == 0 and cacheend == l:
1435 if cachestart == 0 and cacheend == l:
1436 return d # avoid a copy
1436 return d # avoid a copy
1437 return util.buffer(d, cachestart, cacheend - cachestart)
1437 return util.buffer(d, cachestart, cacheend - cachestart)
1438
1438
1439 return self._readsegment(offset, length, df=df)
1439 return self._readsegment(offset, length, df=df)
1440
1440
1441 def _getsegmentforrevs(self, startrev, endrev, df=None):
1441 def _getsegmentforrevs(self, startrev, endrev, df=None):
1442 """Obtain a segment of raw data corresponding to a range of revisions.
1442 """Obtain a segment of raw data corresponding to a range of revisions.
1443
1443
1444 Accepts the start and end revisions and an optional already-open
1444 Accepts the start and end revisions and an optional already-open
1445 file handle to be used for reading. If the file handle is read, its
1445 file handle to be used for reading. If the file handle is read, its
1446 seek position will not be preserved.
1446 seek position will not be preserved.
1447
1447
1448 Requests for data may be satisfied by a cache.
1448 Requests for data may be satisfied by a cache.
1449
1449
1450 Returns a 2-tuple of (offset, data) for the requested range of
1450 Returns a 2-tuple of (offset, data) for the requested range of
1451 revisions. Offset is the integer offset from the beginning of the
1451 revisions. Offset is the integer offset from the beginning of the
1452 revlog and data is a str or buffer of the raw byte data.
1452 revlog and data is a str or buffer of the raw byte data.
1453
1453
1454 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1454 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1455 to determine where each revision's data begins and ends.
1455 to determine where each revision's data begins and ends.
1456 """
1456 """
1457 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1457 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1458 # (functions are expensive).
1458 # (functions are expensive).
1459 index = self.index
1459 index = self.index
1460 istart = index[startrev]
1460 istart = index[startrev]
1461 start = int(istart[0] >> 16)
1461 start = int(istart[0] >> 16)
1462 if startrev == endrev:
1462 if startrev == endrev:
1463 end = start + istart[1]
1463 end = start + istart[1]
1464 else:
1464 else:
1465 iend = index[endrev]
1465 iend = index[endrev]
1466 end = int(iend[0] >> 16) + iend[1]
1466 end = int(iend[0] >> 16) + iend[1]
1467
1467
1468 if self._inline:
1468 if self._inline:
1469 start += (startrev + 1) * self._io.size
1469 start += (startrev + 1) * self._io.size
1470 end += (endrev + 1) * self._io.size
1470 end += (endrev + 1) * self._io.size
1471 length = end - start
1471 length = end - start
1472
1472
1473 return start, self._getsegment(start, length, df=df)
1473 return start, self._getsegment(start, length, df=df)
1474
1474
1475 def _chunk(self, rev, df=None):
1475 def _chunk(self, rev, df=None):
1476 """Obtain a single decompressed chunk for a revision.
1476 """Obtain a single decompressed chunk for a revision.
1477
1477
1478 Accepts an integer revision and an optional already-open file handle
1478 Accepts an integer revision and an optional already-open file handle
1479 to be used for reading. If used, the seek position of the file will not
1479 to be used for reading. If used, the seek position of the file will not
1480 be preserved.
1480 be preserved.
1481
1481
1482 Returns a str holding uncompressed data for the requested revision.
1482 Returns a str holding uncompressed data for the requested revision.
1483 """
1483 """
1484 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1484 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1485
1485
1486 def _chunks(self, revs, df=None, targetsize=None):
1486 def _chunks(self, revs, df=None, targetsize=None):
1487 """Obtain decompressed chunks for the specified revisions.
1487 """Obtain decompressed chunks for the specified revisions.
1488
1488
1489 Accepts an iterable of numeric revisions that are assumed to be in
1489 Accepts an iterable of numeric revisions that are assumed to be in
1490 ascending order. Also accepts an optional already-open file handle
1490 ascending order. Also accepts an optional already-open file handle
1491 to be used for reading. If used, the seek position of the file will
1491 to be used for reading. If used, the seek position of the file will
1492 not be preserved.
1492 not be preserved.
1493
1493
1494 This function is similar to calling ``self._chunk()`` multiple times,
1494 This function is similar to calling ``self._chunk()`` multiple times,
1495 but is faster.
1495 but is faster.
1496
1496
1497 Returns a list with decompressed data for each requested revision.
1497 Returns a list with decompressed data for each requested revision.
1498 """
1498 """
1499 if not revs:
1499 if not revs:
1500 return []
1500 return []
1501 start = self.start
1501 start = self.start
1502 length = self.length
1502 length = self.length
1503 inline = self._inline
1503 inline = self._inline
1504 iosize = self._io.size
1504 iosize = self._io.size
1505 buffer = util.buffer
1505 buffer = util.buffer
1506
1506
1507 l = []
1507 l = []
1508 ladd = l.append
1508 ladd = l.append
1509
1509
1510 if not self._withsparseread:
1510 if not self._withsparseread:
1511 slicedchunks = (revs,)
1511 slicedchunks = (revs,)
1512 else:
1512 else:
1513 slicedchunks = deltautil.slicechunk(self, revs,
1513 slicedchunks = deltautil.slicechunk(self, revs,
1514 targetsize=targetsize)
1514 targetsize=targetsize)
1515
1515
1516 for revschunk in slicedchunks:
1516 for revschunk in slicedchunks:
1517 firstrev = revschunk[0]
1517 firstrev = revschunk[0]
1518 # Skip trailing revisions with empty diff
1518 # Skip trailing revisions with empty diff
1519 for lastrev in revschunk[::-1]:
1519 for lastrev in revschunk[::-1]:
1520 if length(lastrev) != 0:
1520 if length(lastrev) != 0:
1521 break
1521 break
1522
1522
1523 try:
1523 try:
1524 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1524 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1525 except OverflowError:
1525 except OverflowError:
1526 # issue4215 - we can't cache a run of chunks greater than
1526 # issue4215 - we can't cache a run of chunks greater than
1527 # 2G on Windows
1527 # 2G on Windows
1528 return [self._chunk(rev, df=df) for rev in revschunk]
1528 return [self._chunk(rev, df=df) for rev in revschunk]
1529
1529
1530 decomp = self.decompress
1530 decomp = self.decompress
1531 for rev in revschunk:
1531 for rev in revschunk:
1532 chunkstart = start(rev)
1532 chunkstart = start(rev)
1533 if inline:
1533 if inline:
1534 chunkstart += (rev + 1) * iosize
1534 chunkstart += (rev + 1) * iosize
1535 chunklength = length(rev)
1535 chunklength = length(rev)
1536 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1536 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1537
1537
1538 return l
1538 return l
1539
1539
1540 def _chunkclear(self):
1540 def _chunkclear(self):
1541 """Clear the raw chunk cache."""
1541 """Clear the raw chunk cache."""
1542 self._chunkcache = (0, '')
1542 self._chunkcache = (0, '')
1543
1543
1544 def deltaparent(self, rev):
1544 def deltaparent(self, rev):
1545 """return deltaparent of the given revision"""
1545 """return deltaparent of the given revision"""
1546 base = self.index[rev][3]
1546 base = self.index[rev][3]
1547 if base == rev:
1547 if base == rev:
1548 return nullrev
1548 return nullrev
1549 elif self._generaldelta:
1549 elif self._generaldelta:
1550 return base
1550 return base
1551 else:
1551 else:
1552 return rev - 1
1552 return rev - 1
1553
1553
1554 def issnapshot(self, rev):
1554 def issnapshot(self, rev):
1555 """tells whether rev is a snapshot
1555 """tells whether rev is a snapshot
1556 """
1556 """
1557 if rev == nullrev:
1557 if rev == nullrev:
1558 return True
1558 return True
1559 deltap = self.deltaparent(rev)
1559 deltap = self.deltaparent(rev)
1560 if deltap == nullrev:
1560 if deltap == nullrev:
1561 return True
1561 return True
1562 p1, p2 = self.parentrevs(rev)
1562 p1, p2 = self.parentrevs(rev)
1563 if deltap in (p1, p2):
1563 if deltap in (p1, p2):
1564 return False
1564 return False
1565 return self.issnapshot(deltap)
1565 return self.issnapshot(deltap)
1566
1566
1567 def snapshotdepth(self, rev):
1567 def snapshotdepth(self, rev):
1568 """number of snapshot in the chain before this one"""
1568 """number of snapshot in the chain before this one"""
1569 if not self.issnapshot(rev):
1569 if not self.issnapshot(rev):
1570 raise error.ProgrammingError('revision %d not a snapshot')
1570 raise error.ProgrammingError('revision %d not a snapshot')
1571 return len(self._deltachain(rev)[0]) - 1
1571 return len(self._deltachain(rev)[0]) - 1
1572
1572
1573 def revdiff(self, rev1, rev2):
1573 def revdiff(self, rev1, rev2):
1574 """return or calculate a delta between two revisions
1574 """return or calculate a delta between two revisions
1575
1575
1576 The delta calculated is in binary form and is intended to be written to
1576 The delta calculated is in binary form and is intended to be written to
1577 revlog data directly. So this function needs raw revision data.
1577 revlog data directly. So this function needs raw revision data.
1578 """
1578 """
1579 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1579 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1580 return bytes(self._chunk(rev2))
1580 return bytes(self._chunk(rev2))
1581
1581
1582 return mdiff.textdiff(self.revision(rev1, raw=True),
1582 return mdiff.textdiff(self.revision(rev1, raw=True),
1583 self.revision(rev2, raw=True))
1583 self.revision(rev2, raw=True))
1584
1584
1585 def revision(self, nodeorrev, _df=None, raw=False):
1585 def revision(self, nodeorrev, _df=None, raw=False):
1586 """return an uncompressed revision of a given node or revision
1586 """return an uncompressed revision of a given node or revision
1587 number.
1587 number.
1588
1588
1589 _df - an existing file handle to read from. (internal-only)
1589 _df - an existing file handle to read from. (internal-only)
1590 raw - an optional argument specifying if the revision data is to be
1590 raw - an optional argument specifying if the revision data is to be
1591 treated as raw data when applying flag transforms. 'raw' should be set
1591 treated as raw data when applying flag transforms. 'raw' should be set
1592 to True when generating changegroups or in debug commands.
1592 to True when generating changegroups or in debug commands.
1593 """
1593 """
1594 if isinstance(nodeorrev, int):
1594 if isinstance(nodeorrev, int):
1595 rev = nodeorrev
1595 rev = nodeorrev
1596 node = self.node(rev)
1596 node = self.node(rev)
1597 else:
1597 else:
1598 node = nodeorrev
1598 node = nodeorrev
1599 rev = None
1599 rev = None
1600
1600
1601 cachedrev = None
1601 cachedrev = None
1602 flags = None
1602 flags = None
1603 rawtext = None
1603 rawtext = None
1604 if node == nullid:
1604 if node == nullid:
1605 return ""
1605 return ""
1606 if self._cache:
1606 if self._cache:
1607 if self._cache[0] == node:
1607 if self._cache[0] == node:
1608 # _cache only stores rawtext
1608 # _cache only stores rawtext
1609 if raw:
1609 if raw:
1610 return self._cache[2]
1610 return self._cache[2]
1611 # duplicated, but good for perf
1611 # duplicated, but good for perf
1612 if rev is None:
1612 if rev is None:
1613 rev = self.rev(node)
1613 rev = self.rev(node)
1614 if flags is None:
1614 if flags is None:
1615 flags = self.flags(rev)
1615 flags = self.flags(rev)
1616 # no extra flags set, no flag processor runs, text = rawtext
1616 # no extra flags set, no flag processor runs, text = rawtext
1617 if flags == REVIDX_DEFAULT_FLAGS:
1617 if flags == REVIDX_DEFAULT_FLAGS:
1618 return self._cache[2]
1618 return self._cache[2]
1619 # rawtext is reusable. need to run flag processor
1619 # rawtext is reusable. need to run flag processor
1620 rawtext = self._cache[2]
1620 rawtext = self._cache[2]
1621
1621
1622 cachedrev = self._cache[1]
1622 cachedrev = self._cache[1]
1623
1623
1624 # look up what we need to read
1624 # look up what we need to read
1625 if rawtext is None:
1625 if rawtext is None:
1626 if rev is None:
1626 if rev is None:
1627 rev = self.rev(node)
1627 rev = self.rev(node)
1628
1628
1629 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1629 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1630 if stopped:
1630 if stopped:
1631 rawtext = self._cache[2]
1631 rawtext = self._cache[2]
1632
1632
1633 # drop cache to save memory
1633 # drop cache to save memory
1634 self._cache = None
1634 self._cache = None
1635
1635
1636 targetsize = None
1636 targetsize = None
1637 rawsize = self.index[rev][2]
1637 rawsize = self.index[rev][2]
1638 if 0 <= rawsize:
1638 if 0 <= rawsize:
1639 targetsize = 4 * rawsize
1639 targetsize = 4 * rawsize
1640
1640
1641 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1641 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1642 if rawtext is None:
1642 if rawtext is None:
1643 rawtext = bytes(bins[0])
1643 rawtext = bytes(bins[0])
1644 bins = bins[1:]
1644 bins = bins[1:]
1645
1645
1646 rawtext = mdiff.patches(rawtext, bins)
1646 rawtext = mdiff.patches(rawtext, bins)
1647 self._cache = (node, rev, rawtext)
1647 self._cache = (node, rev, rawtext)
1648
1648
1649 if flags is None:
1649 if flags is None:
1650 if rev is None:
1650 if rev is None:
1651 rev = self.rev(node)
1651 rev = self.rev(node)
1652 flags = self.flags(rev)
1652 flags = self.flags(rev)
1653
1653
1654 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1654 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1655 if validatehash:
1655 if validatehash:
1656 self.checkhash(text, node, rev=rev)
1656 self.checkhash(text, node, rev=rev)
1657
1657
1658 return text
1658 return text
1659
1659
1660 def hash(self, text, p1, p2):
1660 def hash(self, text, p1, p2):
1661 """Compute a node hash.
1661 """Compute a node hash.
1662
1662
1663 Available as a function so that subclasses can replace the hash
1663 Available as a function so that subclasses can replace the hash
1664 as needed.
1664 as needed.
1665 """
1665 """
1666 return hash(text, p1, p2)
1666 return hash(text, p1, p2)
1667
1667
1668 def _processflags(self, text, flags, operation, raw=False):
1668 def _processflags(self, text, flags, operation, raw=False):
1669 """Inspect revision data flags and applies transforms defined by
1669 """Inspect revision data flags and applies transforms defined by
1670 registered flag processors.
1670 registered flag processors.
1671
1671
1672 ``text`` - the revision data to process
1672 ``text`` - the revision data to process
1673 ``flags`` - the revision flags
1673 ``flags`` - the revision flags
1674 ``operation`` - the operation being performed (read or write)
1674 ``operation`` - the operation being performed (read or write)
1675 ``raw`` - an optional argument describing if the raw transform should be
1675 ``raw`` - an optional argument describing if the raw transform should be
1676 applied.
1676 applied.
1677
1677
1678 This method processes the flags in the order (or reverse order if
1678 This method processes the flags in the order (or reverse order if
1679 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1679 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1680 flag processors registered for present flags. The order of flags defined
1680 flag processors registered for present flags. The order of flags defined
1681 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1681 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1682
1682
1683 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1683 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1684 processed text and ``validatehash`` is a bool indicating whether the
1684 processed text and ``validatehash`` is a bool indicating whether the
1685 returned text should be checked for hash integrity.
1685 returned text should be checked for hash integrity.
1686
1686
1687 Note: If the ``raw`` argument is set, it has precedence over the
1687 Note: If the ``raw`` argument is set, it has precedence over the
1688 operation and will only update the value of ``validatehash``.
1688 operation and will only update the value of ``validatehash``.
1689 """
1689 """
1690 # fast path: no flag processors will run
1690 # fast path: no flag processors will run
1691 if flags == 0:
1691 if flags == 0:
1692 return text, True
1692 return text, True
1693 if not operation in ('read', 'write'):
1693 if not operation in ('read', 'write'):
1694 raise error.ProgrammingError(_("invalid '%s' operation") %
1694 raise error.ProgrammingError(_("invalid '%s' operation") %
1695 operation)
1695 operation)
1696 # Check all flags are known.
1696 # Check all flags are known.
1697 if flags & ~REVIDX_KNOWN_FLAGS:
1697 if flags & ~REVIDX_KNOWN_FLAGS:
1698 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1698 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1699 (flags & ~REVIDX_KNOWN_FLAGS))
1699 (flags & ~REVIDX_KNOWN_FLAGS))
1700 validatehash = True
1700 validatehash = True
1701 # Depending on the operation (read or write), the order might be
1701 # Depending on the operation (read or write), the order might be
1702 # reversed due to non-commutative transforms.
1702 # reversed due to non-commutative transforms.
1703 orderedflags = REVIDX_FLAGS_ORDER
1703 orderedflags = REVIDX_FLAGS_ORDER
1704 if operation == 'write':
1704 if operation == 'write':
1705 orderedflags = reversed(orderedflags)
1705 orderedflags = reversed(orderedflags)
1706
1706
1707 for flag in orderedflags:
1707 for flag in orderedflags:
1708 # If a flagprocessor has been registered for a known flag, apply the
1708 # If a flagprocessor has been registered for a known flag, apply the
1709 # related operation transform and update result tuple.
1709 # related operation transform and update result tuple.
1710 if flag & flags:
1710 if flag & flags:
1711 vhash = True
1711 vhash = True
1712
1712
1713 if flag not in self._flagprocessors:
1713 if flag not in self._flagprocessors:
1714 message = _("missing processor for flag '%#x'") % (flag)
1714 message = _("missing processor for flag '%#x'") % (flag)
1715 raise error.RevlogError(message)
1715 raise error.RevlogError(message)
1716
1716
1717 processor = self._flagprocessors[flag]
1717 processor = self._flagprocessors[flag]
1718 if processor is not None:
1718 if processor is not None:
1719 readtransform, writetransform, rawtransform = processor
1719 readtransform, writetransform, rawtransform = processor
1720
1720
1721 if raw:
1721 if raw:
1722 vhash = rawtransform(self, text)
1722 vhash = rawtransform(self, text)
1723 elif operation == 'read':
1723 elif operation == 'read':
1724 text, vhash = readtransform(self, text)
1724 text, vhash = readtransform(self, text)
1725 else: # write operation
1725 else: # write operation
1726 text, vhash = writetransform(self, text)
1726 text, vhash = writetransform(self, text)
1727 validatehash = validatehash and vhash
1727 validatehash = validatehash and vhash
1728
1728
1729 return text, validatehash
1729 return text, validatehash
1730
1730
1731 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1731 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1732 """Check node hash integrity.
1732 """Check node hash integrity.
1733
1733
1734 Available as a function so that subclasses can extend hash mismatch
1734 Available as a function so that subclasses can extend hash mismatch
1735 behaviors as needed.
1735 behaviors as needed.
1736 """
1736 """
1737 try:
1737 try:
1738 if p1 is None and p2 is None:
1738 if p1 is None and p2 is None:
1739 p1, p2 = self.parents(node)
1739 p1, p2 = self.parents(node)
1740 if node != self.hash(text, p1, p2):
1740 if node != self.hash(text, p1, p2):
1741 revornode = rev
1741 revornode = rev
1742 if revornode is None:
1742 if revornode is None:
1743 revornode = templatefilters.short(hex(node))
1743 revornode = templatefilters.short(hex(node))
1744 raise error.RevlogError(_("integrity check failed on %s:%s")
1744 raise error.RevlogError(_("integrity check failed on %s:%s")
1745 % (self.indexfile, pycompat.bytestr(revornode)))
1745 % (self.indexfile, pycompat.bytestr(revornode)))
1746 except error.RevlogError:
1746 except error.RevlogError:
1747 if self._censorable and _censoredtext(text):
1747 if self._censorable and _censoredtext(text):
1748 raise error.CensoredNodeError(self.indexfile, node, text)
1748 raise error.CensoredNodeError(self.indexfile, node, text)
1749 raise
1749 raise
1750
1750
1751 def _enforceinlinesize(self, tr, fp=None):
1751 def _enforceinlinesize(self, tr, fp=None):
1752 """Check if the revlog is too big for inline and convert if so.
1752 """Check if the revlog is too big for inline and convert if so.
1753
1753
1754 This should be called after revisions are added to the revlog. If the
1754 This should be called after revisions are added to the revlog. If the
1755 revlog has grown too large to be an inline revlog, it will convert it
1755 revlog has grown too large to be an inline revlog, it will convert it
1756 to use multiple index and data files.
1756 to use multiple index and data files.
1757 """
1757 """
1758 tiprev = len(self) - 1
1758 tiprev = len(self) - 1
1759 if (not self._inline or
1759 if (not self._inline or
1760 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1760 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1761 return
1761 return
1762
1762
1763 trinfo = tr.find(self.indexfile)
1763 trinfo = tr.find(self.indexfile)
1764 if trinfo is None:
1764 if trinfo is None:
1765 raise error.RevlogError(_("%s not found in the transaction")
1765 raise error.RevlogError(_("%s not found in the transaction")
1766 % self.indexfile)
1766 % self.indexfile)
1767
1767
1768 trindex = trinfo[2]
1768 trindex = trinfo[2]
1769 if trindex is not None:
1769 if trindex is not None:
1770 dataoff = self.start(trindex)
1770 dataoff = self.start(trindex)
1771 else:
1771 else:
1772 # revlog was stripped at start of transaction, use all leftover data
1772 # revlog was stripped at start of transaction, use all leftover data
1773 trindex = len(self) - 1
1773 trindex = len(self) - 1
1774 dataoff = self.end(tiprev)
1774 dataoff = self.end(tiprev)
1775
1775
1776 tr.add(self.datafile, dataoff)
1776 tr.add(self.datafile, dataoff)
1777
1777
1778 if fp:
1778 if fp:
1779 fp.flush()
1779 fp.flush()
1780 fp.close()
1780 fp.close()
1781
1781
1782 with self._datafp('w') as df:
1782 with self._datafp('w') as df:
1783 for r in self:
1783 for r in self:
1784 df.write(self._getsegmentforrevs(r, r)[1])
1784 df.write(self._getsegmentforrevs(r, r)[1])
1785
1785
1786 with self._indexfp('w') as fp:
1786 with self._indexfp('w') as fp:
1787 self.version &= ~FLAG_INLINE_DATA
1787 self.version &= ~FLAG_INLINE_DATA
1788 self._inline = False
1788 self._inline = False
1789 io = self._io
1789 io = self._io
1790 for i in self:
1790 for i in self:
1791 e = io.packentry(self.index[i], self.node, self.version, i)
1791 e = io.packentry(self.index[i], self.node, self.version, i)
1792 fp.write(e)
1792 fp.write(e)
1793
1793
1794 # the temp file replace the real index when we exit the context
1794 # the temp file replace the real index when we exit the context
1795 # manager
1795 # manager
1796
1796
1797 tr.replace(self.indexfile, trindex * self._io.size)
1797 tr.replace(self.indexfile, trindex * self._io.size)
1798 self._chunkclear()
1798 self._chunkclear()
1799
1799
1800 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1800 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1801 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1801 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1802 """add a revision to the log
1802 """add a revision to the log
1803
1803
1804 text - the revision data to add
1804 text - the revision data to add
1805 transaction - the transaction object used for rollback
1805 transaction - the transaction object used for rollback
1806 link - the linkrev data to add
1806 link - the linkrev data to add
1807 p1, p2 - the parent nodeids of the revision
1807 p1, p2 - the parent nodeids of the revision
1808 cachedelta - an optional precomputed delta
1808 cachedelta - an optional precomputed delta
1809 node - nodeid of revision; typically node is not specified, and it is
1809 node - nodeid of revision; typically node is not specified, and it is
1810 computed by default as hash(text, p1, p2), however subclasses might
1810 computed by default as hash(text, p1, p2), however subclasses might
1811 use different hashing method (and override checkhash() in such case)
1811 use different hashing method (and override checkhash() in such case)
1812 flags - the known flags to set on the revision
1812 flags - the known flags to set on the revision
1813 deltacomputer - an optional deltacomputer instance shared between
1813 deltacomputer - an optional deltacomputer instance shared between
1814 multiple calls
1814 multiple calls
1815 """
1815 """
1816 if link == nullrev:
1816 if link == nullrev:
1817 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1817 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1818 % self.indexfile)
1818 % self.indexfile)
1819
1819
1820 if flags:
1820 if flags:
1821 node = node or self.hash(text, p1, p2)
1821 node = node or self.hash(text, p1, p2)
1822
1822
1823 rawtext, validatehash = self._processflags(text, flags, 'write')
1823 rawtext, validatehash = self._processflags(text, flags, 'write')
1824
1824
1825 # If the flag processor modifies the revision data, ignore any provided
1825 # If the flag processor modifies the revision data, ignore any provided
1826 # cachedelta.
1826 # cachedelta.
1827 if rawtext != text:
1827 if rawtext != text:
1828 cachedelta = None
1828 cachedelta = None
1829
1829
1830 if len(rawtext) > _maxentrysize:
1830 if len(rawtext) > _maxentrysize:
1831 raise error.RevlogError(
1831 raise error.RevlogError(
1832 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1832 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1833 % (self.indexfile, len(rawtext)))
1833 % (self.indexfile, len(rawtext)))
1834
1834
1835 node = node or self.hash(rawtext, p1, p2)
1835 node = node or self.hash(rawtext, p1, p2)
1836 if node in self.nodemap:
1836 if node in self.nodemap:
1837 return node
1837 return node
1838
1838
1839 if validatehash:
1839 if validatehash:
1840 self.checkhash(rawtext, node, p1=p1, p2=p2)
1840 self.checkhash(rawtext, node, p1=p1, p2=p2)
1841
1841
1842 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1842 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1843 flags, cachedelta=cachedelta,
1843 flags, cachedelta=cachedelta,
1844 deltacomputer=deltacomputer)
1844 deltacomputer=deltacomputer)
1845
1845
1846 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1846 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1847 cachedelta=None, deltacomputer=None):
1847 cachedelta=None, deltacomputer=None):
1848 """add a raw revision with known flags, node and parents
1848 """add a raw revision with known flags, node and parents
1849 useful when reusing a revision not stored in this revlog (ex: received
1849 useful when reusing a revision not stored in this revlog (ex: received
1850 over wire, or read from an external bundle).
1850 over wire, or read from an external bundle).
1851 """
1851 """
1852 dfh = None
1852 dfh = None
1853 if not self._inline:
1853 if not self._inline:
1854 dfh = self._datafp("a+")
1854 dfh = self._datafp("a+")
1855 ifh = self._indexfp("a+")
1855 ifh = self._indexfp("a+")
1856 try:
1856 try:
1857 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1857 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1858 flags, cachedelta, ifh, dfh,
1858 flags, cachedelta, ifh, dfh,
1859 deltacomputer=deltacomputer)
1859 deltacomputer=deltacomputer)
1860 finally:
1860 finally:
1861 if dfh:
1861 if dfh:
1862 dfh.close()
1862 dfh.close()
1863 ifh.close()
1863 ifh.close()
1864
1864
1865 def compress(self, data):
1865 def compress(self, data):
1866 """Generate a possibly-compressed representation of data."""
1866 """Generate a possibly-compressed representation of data."""
1867 if not data:
1867 if not data:
1868 return '', data
1868 return '', data
1869
1869
1870 compressed = self._compressor.compress(data)
1870 compressed = self._compressor.compress(data)
1871
1871
1872 if compressed:
1872 if compressed:
1873 # The revlog compressor added the header in the returned data.
1873 # The revlog compressor added the header in the returned data.
1874 return '', compressed
1874 return '', compressed
1875
1875
1876 if data[0:1] == '\0':
1876 if data[0:1] == '\0':
1877 return '', data
1877 return '', data
1878 return 'u', data
1878 return 'u', data
1879
1879
1880 def decompress(self, data):
1880 def decompress(self, data):
1881 """Decompress a revlog chunk.
1881 """Decompress a revlog chunk.
1882
1882
1883 The chunk is expected to begin with a header identifying the
1883 The chunk is expected to begin with a header identifying the
1884 format type so it can be routed to an appropriate decompressor.
1884 format type so it can be routed to an appropriate decompressor.
1885 """
1885 """
1886 if not data:
1886 if not data:
1887 return data
1887 return data
1888
1888
1889 # Revlogs are read much more frequently than they are written and many
1889 # Revlogs are read much more frequently than they are written and many
1890 # chunks only take microseconds to decompress, so performance is
1890 # chunks only take microseconds to decompress, so performance is
1891 # important here.
1891 # important here.
1892 #
1892 #
1893 # We can make a few assumptions about revlogs:
1893 # We can make a few assumptions about revlogs:
1894 #
1894 #
1895 # 1) the majority of chunks will be compressed (as opposed to inline
1895 # 1) the majority of chunks will be compressed (as opposed to inline
1896 # raw data).
1896 # raw data).
1897 # 2) decompressing *any* data will likely by at least 10x slower than
1897 # 2) decompressing *any* data will likely by at least 10x slower than
1898 # returning raw inline data.
1898 # returning raw inline data.
1899 # 3) we want to prioritize common and officially supported compression
1899 # 3) we want to prioritize common and officially supported compression
1900 # engines
1900 # engines
1901 #
1901 #
1902 # It follows that we want to optimize for "decompress compressed data
1902 # It follows that we want to optimize for "decompress compressed data
1903 # when encoded with common and officially supported compression engines"
1903 # when encoded with common and officially supported compression engines"
1904 # case over "raw data" and "data encoded by less common or non-official
1904 # case over "raw data" and "data encoded by less common or non-official
1905 # compression engines." That is why we have the inline lookup first
1905 # compression engines." That is why we have the inline lookup first
1906 # followed by the compengines lookup.
1906 # followed by the compengines lookup.
1907 #
1907 #
1908 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1908 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1909 # compressed chunks. And this matters for changelog and manifest reads.
1909 # compressed chunks. And this matters for changelog and manifest reads.
1910 t = data[0:1]
1910 t = data[0:1]
1911
1911
1912 if t == 'x':
1912 if t == 'x':
1913 try:
1913 try:
1914 return _zlibdecompress(data)
1914 return _zlibdecompress(data)
1915 except zlib.error as e:
1915 except zlib.error as e:
1916 raise error.RevlogError(_('revlog decompress error: %s') %
1916 raise error.RevlogError(_('revlog decompress error: %s') %
1917 stringutil.forcebytestr(e))
1917 stringutil.forcebytestr(e))
1918 # '\0' is more common than 'u' so it goes first.
1918 # '\0' is more common than 'u' so it goes first.
1919 elif t == '\0':
1919 elif t == '\0':
1920 return data
1920 return data
1921 elif t == 'u':
1921 elif t == 'u':
1922 return util.buffer(data, 1)
1922 return util.buffer(data, 1)
1923
1923
1924 try:
1924 try:
1925 compressor = self._decompressors[t]
1925 compressor = self._decompressors[t]
1926 except KeyError:
1926 except KeyError:
1927 try:
1927 try:
1928 engine = util.compengines.forrevlogheader(t)
1928 engine = util.compengines.forrevlogheader(t)
1929 compressor = engine.revlogcompressor()
1929 compressor = engine.revlogcompressor()
1930 self._decompressors[t] = compressor
1930 self._decompressors[t] = compressor
1931 except KeyError:
1931 except KeyError:
1932 raise error.RevlogError(_('unknown compression type %r') % t)
1932 raise error.RevlogError(_('unknown compression type %r') % t)
1933
1933
1934 return compressor.decompress(data)
1934 return compressor.decompress(data)
1935
1935
1936 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1936 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1937 cachedelta, ifh, dfh, alwayscache=False,
1937 cachedelta, ifh, dfh, alwayscache=False,
1938 deltacomputer=None):
1938 deltacomputer=None):
1939 """internal function to add revisions to the log
1939 """internal function to add revisions to the log
1940
1940
1941 see addrevision for argument descriptions.
1941 see addrevision for argument descriptions.
1942
1942
1943 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1943 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1944
1944
1945 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1945 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1946 be used.
1946 be used.
1947
1947
1948 invariants:
1948 invariants:
1949 - rawtext is optional (can be None); if not set, cachedelta must be set.
1949 - rawtext is optional (can be None); if not set, cachedelta must be set.
1950 if both are set, they must correspond to each other.
1950 if both are set, they must correspond to each other.
1951 """
1951 """
1952 if node == nullid:
1952 if node == nullid:
1953 raise error.RevlogError(_("%s: attempt to add null revision") %
1953 raise error.RevlogError(_("%s: attempt to add null revision") %
1954 self.indexfile)
1954 self.indexfile)
1955 if node == wdirid or node in wdirfilenodeids:
1955 if node == wdirid or node in wdirfilenodeids:
1956 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1956 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1957 self.indexfile)
1957 self.indexfile)
1958
1958
1959 if self._inline:
1959 if self._inline:
1960 fh = ifh
1960 fh = ifh
1961 else:
1961 else:
1962 fh = dfh
1962 fh = dfh
1963
1963
1964 btext = [rawtext]
1964 btext = [rawtext]
1965
1965
1966 curr = len(self)
1966 curr = len(self)
1967 prev = curr - 1
1967 prev = curr - 1
1968 offset = self.end(prev)
1968 offset = self.end(prev)
1969 p1r, p2r = self.rev(p1), self.rev(p2)
1969 p1r, p2r = self.rev(p1), self.rev(p2)
1970
1970
1971 # full versions are inserted when the needed deltas
1971 # full versions are inserted when the needed deltas
1972 # become comparable to the uncompressed text
1972 # become comparable to the uncompressed text
1973 if rawtext is None:
1973 if rawtext is None:
1974 # need rawtext size, before changed by flag processors, which is
1974 # need rawtext size, before changed by flag processors, which is
1975 # the non-raw size. use revlog explicitly to avoid filelog's extra
1975 # the non-raw size. use revlog explicitly to avoid filelog's extra
1976 # logic that might remove metadata size.
1976 # logic that might remove metadata size.
1977 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1977 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1978 cachedelta[1])
1978 cachedelta[1])
1979 else:
1979 else:
1980 textlen = len(rawtext)
1980 textlen = len(rawtext)
1981
1981
1982 if deltacomputer is None:
1982 if deltacomputer is None:
1983 deltacomputer = deltautil.deltacomputer(self)
1983 deltacomputer = deltautil.deltacomputer(self)
1984
1984
1985 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1985 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1986
1986
1987 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1987 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1988
1988
1989 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1989 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1990 deltainfo.base, link, p1r, p2r, node)
1990 deltainfo.base, link, p1r, p2r, node)
1991 self.index.append(e)
1991 self.index.append(e)
1992 self.nodemap[node] = curr
1992 self.nodemap[node] = curr
1993
1993
1994 entry = self._io.packentry(e, self.node, self.version, curr)
1994 entry = self._io.packentry(e, self.node, self.version, curr)
1995 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1995 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1996 link, offset)
1996 link, offset)
1997
1997
1998 rawtext = btext[0]
1998 rawtext = btext[0]
1999
1999
2000 if alwayscache and rawtext is None:
2000 if alwayscache and rawtext is None:
2001 rawtext = deltacomputer.buildtext(revinfo, fh)
2001 rawtext = deltacomputer.buildtext(revinfo, fh)
2002
2002
2003 if type(rawtext) == bytes: # only accept immutable objects
2003 if type(rawtext) == bytes: # only accept immutable objects
2004 self._cache = (node, curr, rawtext)
2004 self._cache = (node, curr, rawtext)
2005 self._chainbasecache[curr] = deltainfo.chainbase
2005 self._chainbasecache[curr] = deltainfo.chainbase
2006 return node
2006 return node
2007
2007
2008 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2008 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2009 # Files opened in a+ mode have inconsistent behavior on various
2009 # Files opened in a+ mode have inconsistent behavior on various
2010 # platforms. Windows requires that a file positioning call be made
2010 # platforms. Windows requires that a file positioning call be made
2011 # when the file handle transitions between reads and writes. See
2011 # when the file handle transitions between reads and writes. See
2012 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2012 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2013 # platforms, Python or the platform itself can be buggy. Some versions
2013 # platforms, Python or the platform itself can be buggy. Some versions
2014 # of Solaris have been observed to not append at the end of the file
2014 # of Solaris have been observed to not append at the end of the file
2015 # if the file was seeked to before the end. See issue4943 for more.
2015 # if the file was seeked to before the end. See issue4943 for more.
2016 #
2016 #
2017 # We work around this issue by inserting a seek() before writing.
2017 # We work around this issue by inserting a seek() before writing.
2018 # Note: This is likely not necessary on Python 3.
2018 # Note: This is likely not necessary on Python 3.
2019 ifh.seek(0, os.SEEK_END)
2019 ifh.seek(0, os.SEEK_END)
2020 if dfh:
2020 if dfh:
2021 dfh.seek(0, os.SEEK_END)
2021 dfh.seek(0, os.SEEK_END)
2022
2022
2023 curr = len(self) - 1
2023 curr = len(self) - 1
2024 if not self._inline:
2024 if not self._inline:
2025 transaction.add(self.datafile, offset)
2025 transaction.add(self.datafile, offset)
2026 transaction.add(self.indexfile, curr * len(entry))
2026 transaction.add(self.indexfile, curr * len(entry))
2027 if data[0]:
2027 if data[0]:
2028 dfh.write(data[0])
2028 dfh.write(data[0])
2029 dfh.write(data[1])
2029 dfh.write(data[1])
2030 ifh.write(entry)
2030 ifh.write(entry)
2031 else:
2031 else:
2032 offset += curr * self._io.size
2032 offset += curr * self._io.size
2033 transaction.add(self.indexfile, offset, curr)
2033 transaction.add(self.indexfile, offset, curr)
2034 ifh.write(entry)
2034 ifh.write(entry)
2035 ifh.write(data[0])
2035 ifh.write(data[0])
2036 ifh.write(data[1])
2036 ifh.write(data[1])
2037 self._enforceinlinesize(transaction, ifh)
2037 self._enforceinlinesize(transaction, ifh)
2038
2038
2039 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2039 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2040 """
2040 """
2041 add a delta group
2041 add a delta group
2042
2042
2043 given a set of deltas, add them to the revision log. the
2043 given a set of deltas, add them to the revision log. the
2044 first delta is against its parent, which should be in our
2044 first delta is against its parent, which should be in our
2045 log, the rest are against the previous delta.
2045 log, the rest are against the previous delta.
2046
2046
2047 If ``addrevisioncb`` is defined, it will be called with arguments of
2047 If ``addrevisioncb`` is defined, it will be called with arguments of
2048 this revlog and the node that was added.
2048 this revlog and the node that was added.
2049 """
2049 """
2050
2050
2051 nodes = []
2051 nodes = []
2052
2052
2053 r = len(self)
2053 r = len(self)
2054 end = 0
2054 end = 0
2055 if r:
2055 if r:
2056 end = self.end(r - 1)
2056 end = self.end(r - 1)
2057 ifh = self._indexfp("a+")
2057 ifh = self._indexfp("a+")
2058 isize = r * self._io.size
2058 isize = r * self._io.size
2059 if self._inline:
2059 if self._inline:
2060 transaction.add(self.indexfile, end + isize, r)
2060 transaction.add(self.indexfile, end + isize, r)
2061 dfh = None
2061 dfh = None
2062 else:
2062 else:
2063 transaction.add(self.indexfile, isize, r)
2063 transaction.add(self.indexfile, isize, r)
2064 transaction.add(self.datafile, end)
2064 transaction.add(self.datafile, end)
2065 dfh = self._datafp("a+")
2065 dfh = self._datafp("a+")
2066 def flush():
2066 def flush():
2067 if dfh:
2067 if dfh:
2068 dfh.flush()
2068 dfh.flush()
2069 ifh.flush()
2069 ifh.flush()
2070 try:
2070 try:
2071 deltacomputer = deltautil.deltacomputer(self)
2071 deltacomputer = deltautil.deltacomputer(self)
2072 # loop through our set of deltas
2072 # loop through our set of deltas
2073 for data in deltas:
2073 for data in deltas:
2074 node, p1, p2, linknode, deltabase, delta, flags = data
2074 node, p1, p2, linknode, deltabase, delta, flags = data
2075 link = linkmapper(linknode)
2075 link = linkmapper(linknode)
2076 flags = flags or REVIDX_DEFAULT_FLAGS
2076 flags = flags or REVIDX_DEFAULT_FLAGS
2077
2077
2078 nodes.append(node)
2078 nodes.append(node)
2079
2079
2080 if node in self.nodemap:
2080 if node in self.nodemap:
2081 # this can happen if two branches make the same change
2081 # this can happen if two branches make the same change
2082 continue
2082 continue
2083
2083
2084 for p in (p1, p2):
2084 for p in (p1, p2):
2085 if p not in self.nodemap:
2085 if p not in self.nodemap:
2086 raise error.LookupError(p, self.indexfile,
2086 raise error.LookupError(p, self.indexfile,
2087 _('unknown parent'))
2087 _('unknown parent'))
2088
2088
2089 if deltabase not in self.nodemap:
2089 if deltabase not in self.nodemap:
2090 raise error.LookupError(deltabase, self.indexfile,
2090 raise error.LookupError(deltabase, self.indexfile,
2091 _('unknown delta base'))
2091 _('unknown delta base'))
2092
2092
2093 baserev = self.rev(deltabase)
2093 baserev = self.rev(deltabase)
2094
2094
2095 if baserev != nullrev and self.iscensored(baserev):
2095 if baserev != nullrev and self.iscensored(baserev):
2096 # if base is censored, delta must be full replacement in a
2096 # if base is censored, delta must be full replacement in a
2097 # single patch operation
2097 # single patch operation
2098 hlen = struct.calcsize(">lll")
2098 hlen = struct.calcsize(">lll")
2099 oldlen = self.rawsize(baserev)
2099 oldlen = self.rawsize(baserev)
2100 newlen = len(delta) - hlen
2100 newlen = len(delta) - hlen
2101 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2101 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2102 raise error.CensoredBaseError(self.indexfile,
2102 raise error.CensoredBaseError(self.indexfile,
2103 self.node(baserev))
2103 self.node(baserev))
2104
2104
2105 if not flags and self._peek_iscensored(baserev, delta, flush):
2105 if not flags and self._peek_iscensored(baserev, delta, flush):
2106 flags |= REVIDX_ISCENSORED
2106 flags |= REVIDX_ISCENSORED
2107
2107
2108 # We assume consumers of addrevisioncb will want to retrieve
2108 # We assume consumers of addrevisioncb will want to retrieve
2109 # the added revision, which will require a call to
2109 # the added revision, which will require a call to
2110 # revision(). revision() will fast path if there is a cache
2110 # revision(). revision() will fast path if there is a cache
2111 # hit. So, we tell _addrevision() to always cache in this case.
2111 # hit. So, we tell _addrevision() to always cache in this case.
2112 # We're only using addgroup() in the context of changegroup
2112 # We're only using addgroup() in the context of changegroup
2113 # generation so the revision data can always be handled as raw
2113 # generation so the revision data can always be handled as raw
2114 # by the flagprocessor.
2114 # by the flagprocessor.
2115 self._addrevision(node, None, transaction, link,
2115 self._addrevision(node, None, transaction, link,
2116 p1, p2, flags, (baserev, delta),
2116 p1, p2, flags, (baserev, delta),
2117 ifh, dfh,
2117 ifh, dfh,
2118 alwayscache=bool(addrevisioncb),
2118 alwayscache=bool(addrevisioncb),
2119 deltacomputer=deltacomputer)
2119 deltacomputer=deltacomputer)
2120
2120
2121 if addrevisioncb:
2121 if addrevisioncb:
2122 addrevisioncb(self, node)
2122 addrevisioncb(self, node)
2123
2123
2124 if not dfh and not self._inline:
2124 if not dfh and not self._inline:
2125 # addrevision switched from inline to conventional
2125 # addrevision switched from inline to conventional
2126 # reopen the index
2126 # reopen the index
2127 ifh.close()
2127 ifh.close()
2128 dfh = self._datafp("a+")
2128 dfh = self._datafp("a+")
2129 ifh = self._indexfp("a+")
2129 ifh = self._indexfp("a+")
2130 finally:
2130 finally:
2131 if dfh:
2131 if dfh:
2132 dfh.close()
2132 dfh.close()
2133 ifh.close()
2133 ifh.close()
2134
2134
2135 return nodes
2135 return nodes
2136
2136
2137 def iscensored(self, rev):
2137 def iscensored(self, rev):
2138 """Check if a file revision is censored."""
2138 """Check if a file revision is censored."""
2139 if not self._censorable:
2139 if not self._censorable:
2140 return False
2140 return False
2141
2141
2142 return self.flags(rev) & REVIDX_ISCENSORED
2142 return self.flags(rev) & REVIDX_ISCENSORED
2143
2143
2144 def _peek_iscensored(self, baserev, delta, flush):
2144 def _peek_iscensored(self, baserev, delta, flush):
2145 """Quickly check if a delta produces a censored revision."""
2145 """Quickly check if a delta produces a censored revision."""
2146 if not self._censorable:
2146 if not self._censorable:
2147 return False
2147 return False
2148
2148
2149 # Fragile heuristic: unless new file meta keys are added alphabetically
2149 # Fragile heuristic: unless new file meta keys are added alphabetically
2150 # preceding "censored", all censored revisions are prefixed by
2150 # preceding "censored", all censored revisions are prefixed by
2151 # "\1\ncensored:". A delta producing such a censored revision must be a
2151 # "\1\ncensored:". A delta producing such a censored revision must be a
2152 # full-replacement delta, so we inspect the first and only patch in the
2152 # full-replacement delta, so we inspect the first and only patch in the
2153 # delta for this prefix.
2153 # delta for this prefix.
2154 hlen = struct.calcsize(">lll")
2154 hlen = struct.calcsize(">lll")
2155 if len(delta) <= hlen:
2155 if len(delta) <= hlen:
2156 return False
2156 return False
2157
2157
2158 oldlen = self.rawsize(baserev)
2158 oldlen = self.rawsize(baserev)
2159 newlen = len(delta) - hlen
2159 newlen = len(delta) - hlen
2160 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2160 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2161 return False
2161 return False
2162
2162
2163 add = "\1\ncensored:"
2163 add = "\1\ncensored:"
2164 addlen = len(add)
2164 addlen = len(add)
2165 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2165 return newlen >= addlen and delta[hlen:hlen + addlen] == add
2166
2166
2167 def getstrippoint(self, minlink):
2167 def getstrippoint(self, minlink):
2168 """find the minimum rev that must be stripped to strip the linkrev
2168 """find the minimum rev that must be stripped to strip the linkrev
2169
2169
2170 Returns a tuple containing the minimum rev and a set of all revs that
2170 Returns a tuple containing the minimum rev and a set of all revs that
2171 have linkrevs that will be broken by this strip.
2171 have linkrevs that will be broken by this strip.
2172 """
2172 """
2173 brokenrevs = set()
2173 brokenrevs = set()
2174 strippoint = len(self)
2174 strippoint = len(self)
2175
2175
2176 heads = {}
2176 heads = {}
2177 futurelargelinkrevs = set()
2177 futurelargelinkrevs = set()
2178 for head in self.headrevs():
2178 for head in self.headrevs():
2179 headlinkrev = self.linkrev(head)
2179 headlinkrev = self.linkrev(head)
2180 heads[head] = headlinkrev
2180 heads[head] = headlinkrev
2181 if headlinkrev >= minlink:
2181 if headlinkrev >= minlink:
2182 futurelargelinkrevs.add(headlinkrev)
2182 futurelargelinkrevs.add(headlinkrev)
2183
2183
2184 # This algorithm involves walking down the rev graph, starting at the
2184 # This algorithm involves walking down the rev graph, starting at the
2185 # heads. Since the revs are topologically sorted according to linkrev,
2185 # heads. Since the revs are topologically sorted according to linkrev,
2186 # once all head linkrevs are below the minlink, we know there are
2186 # once all head linkrevs are below the minlink, we know there are
2187 # no more revs that could have a linkrev greater than minlink.
2187 # no more revs that could have a linkrev greater than minlink.
2188 # So we can stop walking.
2188 # So we can stop walking.
2189 while futurelargelinkrevs:
2189 while futurelargelinkrevs:
2190 strippoint -= 1
2190 strippoint -= 1
2191 linkrev = heads.pop(strippoint)
2191 linkrev = heads.pop(strippoint)
2192
2192
2193 if linkrev < minlink:
2193 if linkrev < minlink:
2194 brokenrevs.add(strippoint)
2194 brokenrevs.add(strippoint)
2195 else:
2195 else:
2196 futurelargelinkrevs.remove(linkrev)
2196 futurelargelinkrevs.remove(linkrev)
2197
2197
2198 for p in self.parentrevs(strippoint):
2198 for p in self.parentrevs(strippoint):
2199 if p != nullrev:
2199 if p != nullrev:
2200 plinkrev = self.linkrev(p)
2200 plinkrev = self.linkrev(p)
2201 heads[p] = plinkrev
2201 heads[p] = plinkrev
2202 if plinkrev >= minlink:
2202 if plinkrev >= minlink:
2203 futurelargelinkrevs.add(plinkrev)
2203 futurelargelinkrevs.add(plinkrev)
2204
2204
2205 return strippoint, brokenrevs
2205 return strippoint, brokenrevs
2206
2206
2207 def strip(self, minlink, transaction):
2207 def strip(self, minlink, transaction):
2208 """truncate the revlog on the first revision with a linkrev >= minlink
2208 """truncate the revlog on the first revision with a linkrev >= minlink
2209
2209
2210 This function is called when we're stripping revision minlink and
2210 This function is called when we're stripping revision minlink and
2211 its descendants from the repository.
2211 its descendants from the repository.
2212
2212
2213 We have to remove all revisions with linkrev >= minlink, because
2213 We have to remove all revisions with linkrev >= minlink, because
2214 the equivalent changelog revisions will be renumbered after the
2214 the equivalent changelog revisions will be renumbered after the
2215 strip.
2215 strip.
2216
2216
2217 So we truncate the revlog on the first of these revisions, and
2217 So we truncate the revlog on the first of these revisions, and
2218 trust that the caller has saved the revisions that shouldn't be
2218 trust that the caller has saved the revisions that shouldn't be
2219 removed and that it'll re-add them after this truncation.
2219 removed and that it'll re-add them after this truncation.
2220 """
2220 """
2221 if len(self) == 0:
2221 if len(self) == 0:
2222 return
2222 return
2223
2223
2224 rev, _ = self.getstrippoint(minlink)
2224 rev, _ = self.getstrippoint(minlink)
2225 if rev == len(self):
2225 if rev == len(self):
2226 return
2226 return
2227
2227
2228 # first truncate the files on disk
2228 # first truncate the files on disk
2229 end = self.start(rev)
2229 end = self.start(rev)
2230 if not self._inline:
2230 if not self._inline:
2231 transaction.add(self.datafile, end)
2231 transaction.add(self.datafile, end)
2232 end = rev * self._io.size
2232 end = rev * self._io.size
2233 else:
2233 else:
2234 end += rev * self._io.size
2234 end += rev * self._io.size
2235
2235
2236 transaction.add(self.indexfile, end)
2236 transaction.add(self.indexfile, end)
2237
2237
2238 # then reset internal state in memory to forget those revisions
2238 # then reset internal state in memory to forget those revisions
2239 self._cache = None
2239 self._cache = None
2240 self._chaininfocache = {}
2240 self._chaininfocache = {}
2241 self._chunkclear()
2241 self._chunkclear()
2242 for x in pycompat.xrange(rev, len(self)):
2242 for x in pycompat.xrange(rev, len(self)):
2243 del self.nodemap[self.node(x)]
2243 del self.nodemap[self.node(x)]
2244
2244
2245 del self.index[rev:-1]
2245 del self.index[rev:-1]
2246 self._nodepos = None
2246 self._nodepos = None
2247
2247
2248 def checksize(self):
2248 def checksize(self):
2249 expected = 0
2249 expected = 0
2250 if len(self):
2250 if len(self):
2251 expected = max(0, self.end(len(self) - 1))
2251 expected = max(0, self.end(len(self) - 1))
2252
2252
2253 try:
2253 try:
2254 with self._datafp() as f:
2254 with self._datafp() as f:
2255 f.seek(0, 2)
2255 f.seek(0, 2)
2256 actual = f.tell()
2256 actual = f.tell()
2257 dd = actual - expected
2257 dd = actual - expected
2258 except IOError as inst:
2258 except IOError as inst:
2259 if inst.errno != errno.ENOENT:
2259 if inst.errno != errno.ENOENT:
2260 raise
2260 raise
2261 dd = 0
2261 dd = 0
2262
2262
2263 try:
2263 try:
2264 f = self.opener(self.indexfile)
2264 f = self.opener(self.indexfile)
2265 f.seek(0, 2)
2265 f.seek(0, 2)
2266 actual = f.tell()
2266 actual = f.tell()
2267 f.close()
2267 f.close()
2268 s = self._io.size
2268 s = self._io.size
2269 i = max(0, actual // s)
2269 i = max(0, actual // s)
2270 di = actual - (i * s)
2270 di = actual - (i * s)
2271 if self._inline:
2271 if self._inline:
2272 databytes = 0
2272 databytes = 0
2273 for r in self:
2273 for r in self:
2274 databytes += max(0, self.length(r))
2274 databytes += max(0, self.length(r))
2275 dd = 0
2275 dd = 0
2276 di = actual - len(self) * s - databytes
2276 di = actual - len(self) * s - databytes
2277 except IOError as inst:
2277 except IOError as inst:
2278 if inst.errno != errno.ENOENT:
2278 if inst.errno != errno.ENOENT:
2279 raise
2279 raise
2280 di = 0
2280 di = 0
2281
2281
2282 return (dd, di)
2282 return (dd, di)
2283
2283
2284 def files(self):
2284 def files(self):
2285 res = [self.indexfile]
2285 res = [self.indexfile]
2286 if not self._inline:
2286 if not self._inline:
2287 res.append(self.datafile)
2287 res.append(self.datafile)
2288 return res
2288 return res
2289
2289
2290 def emitrevisiondeltas(self, requests):
2290 def emitrevisiondeltas(self, requests):
2291 frev = self.rev
2291 frev = self.rev
2292
2292
2293 prevrev = None
2293 prevrev = None
2294 for request in requests:
2294 for request in requests:
2295 node = request.node
2295 node = request.node
2296 rev = frev(node)
2296 rev = frev(node)
2297
2297
2298 if prevrev is None:
2298 if prevrev is None:
2299 prevrev = self.index[rev][5]
2299 prevrev = self.index[rev][5]
2300
2300
2301 # Requesting a full revision.
2301 # Requesting a full revision.
2302 if request.basenode == nullid:
2302 if request.basenode == nullid:
2303 baserev = nullrev
2303 baserev = nullrev
2304 # Requesting an explicit revision.
2304 # Requesting an explicit revision.
2305 elif request.basenode is not None:
2305 elif request.basenode is not None:
2306 baserev = frev(request.basenode)
2306 baserev = frev(request.basenode)
2307 # Allowing us to choose.
2307 # Allowing us to choose.
2308 else:
2308 else:
2309 p1rev, p2rev = self.parentrevs(rev)
2309 p1rev, p2rev = self.parentrevs(rev)
2310 deltaparentrev = self.deltaparent(rev)
2310 deltaparentrev = self.deltaparent(rev)
2311
2311
2312 # Avoid sending full revisions when delta parent is null. Pick
2312 # Avoid sending full revisions when delta parent is null. Pick
2313 # prev in that case. It's tempting to pick p1 in this case, as
2313 # prev in that case. It's tempting to pick p1 in this case, as
2314 # p1 will be smaller in the common case. However, computing a
2314 # p1 will be smaller in the common case. However, computing a
2315 # delta against p1 may require resolving the raw text of p1,
2315 # delta against p1 may require resolving the raw text of p1,
2316 # which could be expensive. The revlog caches should have prev
2316 # which could be expensive. The revlog caches should have prev
2317 # cached, meaning less CPU for delta generation. There is
2317 # cached, meaning less CPU for delta generation. There is
2318 # likely room to add a flag and/or config option to control this
2318 # likely room to add a flag and/or config option to control this
2319 # behavior.
2319 # behavior.
2320 if deltaparentrev == nullrev and self._storedeltachains:
2320 if deltaparentrev == nullrev and self._storedeltachains:
2321 baserev = prevrev
2321 baserev = prevrev
2322
2322
2323 # Revlog is configured to use full snapshot for a reason.
2323 # Revlog is configured to use full snapshot for a reason.
2324 # Stick to full snapshot.
2324 # Stick to full snapshot.
2325 elif deltaparentrev == nullrev:
2325 elif deltaparentrev == nullrev:
2326 baserev = nullrev
2326 baserev = nullrev
2327
2327
2328 # Pick previous when we can't be sure the base is available
2328 # Pick previous when we can't be sure the base is available
2329 # on consumer.
2329 # on consumer.
2330 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2330 elif deltaparentrev not in (p1rev, p2rev, prevrev):
2331 baserev = prevrev
2331 baserev = prevrev
2332 else:
2332 else:
2333 baserev = deltaparentrev
2333 baserev = deltaparentrev
2334
2334
2335 if baserev != nullrev and not self.candelta(baserev, rev):
2335 if baserev != nullrev and not self.candelta(baserev, rev):
2336 baserev = nullrev
2336 baserev = nullrev
2337
2337
2338 revision = None
2338 revision = None
2339 delta = None
2339 delta = None
2340 baserevisionsize = None
2340 baserevisionsize = None
2341
2341
2342 if self.iscensored(baserev) or self.iscensored(rev):
2342 if self.iscensored(baserev) or self.iscensored(rev):
2343 try:
2343 try:
2344 revision = self.revision(node, raw=True)
2344 revision = self.revision(node, raw=True)
2345 except error.CensoredNodeError as e:
2345 except error.CensoredNodeError as e:
2346 revision = e.tombstone
2346 revision = e.tombstone
2347
2347
2348 if baserev != nullrev:
2348 if baserev != nullrev:
2349 baserevisionsize = self.rawsize(baserev)
2349 baserevisionsize = self.rawsize(baserev)
2350
2350
2351 elif baserev == nullrev:
2351 elif baserev == nullrev:
2352 revision = self.revision(node, raw=True)
2352 revision = self.revision(node, raw=True)
2353 else:
2353 else:
2354 delta = self.revdiff(baserev, rev)
2354 delta = self.revdiff(baserev, rev)
2355
2355
2356 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2356 extraflags = REVIDX_ELLIPSIS if request.ellipsis else 0
2357
2357
2358 yield revlogrevisiondelta(
2358 yield revlogrevisiondelta(
2359 node=node,
2359 node=node,
2360 p1node=request.p1node,
2360 p1node=request.p1node,
2361 p2node=request.p2node,
2361 p2node=request.p2node,
2362 linknode=request.linknode,
2362 linknode=request.linknode,
2363 basenode=self.node(baserev),
2363 basenode=self.node(baserev),
2364 flags=self.flags(rev) | extraflags,
2364 flags=self.flags(rev) | extraflags,
2365 baserevisionsize=baserevisionsize,
2365 baserevisionsize=baserevisionsize,
2366 revision=revision,
2366 revision=revision,
2367 delta=delta)
2367 delta=delta)
2368
2368
2369 prevrev = rev
2369 prevrev = rev
2370
2370
2371 DELTAREUSEALWAYS = 'always'
2371 DELTAREUSEALWAYS = 'always'
2372 DELTAREUSESAMEREVS = 'samerevs'
2372 DELTAREUSESAMEREVS = 'samerevs'
2373 DELTAREUSENEVER = 'never'
2373 DELTAREUSENEVER = 'never'
2374
2374
2375 DELTAREUSEFULLADD = 'fulladd'
2375 DELTAREUSEFULLADD = 'fulladd'
2376
2376
2377 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2377 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2378
2378
2379 def clone(self, tr, destrevlog, addrevisioncb=None,
2379 def clone(self, tr, destrevlog, addrevisioncb=None,
2380 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2380 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2381 """Copy this revlog to another, possibly with format changes.
2381 """Copy this revlog to another, possibly with format changes.
2382
2382
2383 The destination revlog will contain the same revisions and nodes.
2383 The destination revlog will contain the same revisions and nodes.
2384 However, it may not be bit-for-bit identical due to e.g. delta encoding
2384 However, it may not be bit-for-bit identical due to e.g. delta encoding
2385 differences.
2385 differences.
2386
2386
2387 The ``deltareuse`` argument control how deltas from the existing revlog
2387 The ``deltareuse`` argument control how deltas from the existing revlog
2388 are preserved in the destination revlog. The argument can have the
2388 are preserved in the destination revlog. The argument can have the
2389 following values:
2389 following values:
2390
2390
2391 DELTAREUSEALWAYS
2391 DELTAREUSEALWAYS
2392 Deltas will always be reused (if possible), even if the destination
2392 Deltas will always be reused (if possible), even if the destination
2393 revlog would not select the same revisions for the delta. This is the
2393 revlog would not select the same revisions for the delta. This is the
2394 fastest mode of operation.
2394 fastest mode of operation.
2395 DELTAREUSESAMEREVS
2395 DELTAREUSESAMEREVS
2396 Deltas will be reused if the destination revlog would pick the same
2396 Deltas will be reused if the destination revlog would pick the same
2397 revisions for the delta. This mode strikes a balance between speed
2397 revisions for the delta. This mode strikes a balance between speed
2398 and optimization.
2398 and optimization.
2399 DELTAREUSENEVER
2399 DELTAREUSENEVER
2400 Deltas will never be reused. This is the slowest mode of execution.
2400 Deltas will never be reused. This is the slowest mode of execution.
2401 This mode can be used to recompute deltas (e.g. if the diff/delta
2401 This mode can be used to recompute deltas (e.g. if the diff/delta
2402 algorithm changes).
2402 algorithm changes).
2403
2403
2404 Delta computation can be slow, so the choice of delta reuse policy can
2404 Delta computation can be slow, so the choice of delta reuse policy can
2405 significantly affect run time.
2405 significantly affect run time.
2406
2406
2407 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2407 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2408 two extremes. Deltas will be reused if they are appropriate. But if the
2408 two extremes. Deltas will be reused if they are appropriate. But if the
2409 delta could choose a better revision, it will do so. This means if you
2409 delta could choose a better revision, it will do so. This means if you
2410 are converting a non-generaldelta revlog to a generaldelta revlog,
2410 are converting a non-generaldelta revlog to a generaldelta revlog,
2411 deltas will be recomputed if the delta's parent isn't a parent of the
2411 deltas will be recomputed if the delta's parent isn't a parent of the
2412 revision.
2412 revision.
2413
2413
2414 In addition to the delta policy, the ``deltabothparents`` argument
2414 In addition to the delta policy, the ``deltabothparents`` argument
2415 controls whether to compute deltas against both parents for merges.
2415 controls whether to compute deltas against both parents for merges.
2416 By default, the current default is used.
2416 By default, the current default is used.
2417 """
2417 """
2418 if deltareuse not in self.DELTAREUSEALL:
2418 if deltareuse not in self.DELTAREUSEALL:
2419 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2419 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2420
2420
2421 if len(destrevlog):
2421 if len(destrevlog):
2422 raise ValueError(_('destination revlog is not empty'))
2422 raise ValueError(_('destination revlog is not empty'))
2423
2423
2424 if getattr(self, 'filteredrevs', None):
2424 if getattr(self, 'filteredrevs', None):
2425 raise ValueError(_('source revlog has filtered revisions'))
2425 raise ValueError(_('source revlog has filtered revisions'))
2426 if getattr(destrevlog, 'filteredrevs', None):
2426 if getattr(destrevlog, 'filteredrevs', None):
2427 raise ValueError(_('destination revlog has filtered revisions'))
2427 raise ValueError(_('destination revlog has filtered revisions'))
2428
2428
2429 # lazydeltabase controls whether to reuse a cached delta, if possible.
2429 # lazydeltabase controls whether to reuse a cached delta, if possible.
2430 oldlazydeltabase = destrevlog._lazydeltabase
2430 oldlazydeltabase = destrevlog._lazydeltabase
2431 oldamd = destrevlog._deltabothparents
2431 oldamd = destrevlog._deltabothparents
2432
2432
2433 try:
2433 try:
2434 if deltareuse == self.DELTAREUSEALWAYS:
2434 if deltareuse == self.DELTAREUSEALWAYS:
2435 destrevlog._lazydeltabase = True
2435 destrevlog._lazydeltabase = True
2436 elif deltareuse == self.DELTAREUSESAMEREVS:
2436 elif deltareuse == self.DELTAREUSESAMEREVS:
2437 destrevlog._lazydeltabase = False
2437 destrevlog._lazydeltabase = False
2438
2438
2439 destrevlog._deltabothparents = deltabothparents or oldamd
2439 destrevlog._deltabothparents = deltabothparents or oldamd
2440
2440
2441 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2441 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2442 self.DELTAREUSESAMEREVS)
2442 self.DELTAREUSESAMEREVS)
2443
2443
2444 deltacomputer = deltautil.deltacomputer(destrevlog)
2444 deltacomputer = deltautil.deltacomputer(destrevlog)
2445 index = self.index
2445 index = self.index
2446 for rev in self:
2446 for rev in self:
2447 entry = index[rev]
2447 entry = index[rev]
2448
2448
2449 # Some classes override linkrev to take filtered revs into
2449 # Some classes override linkrev to take filtered revs into
2450 # account. Use raw entry from index.
2450 # account. Use raw entry from index.
2451 flags = entry[0] & 0xffff
2451 flags = entry[0] & 0xffff
2452 linkrev = entry[4]
2452 linkrev = entry[4]
2453 p1 = index[entry[5]][7]
2453 p1 = index[entry[5]][7]
2454 p2 = index[entry[6]][7]
2454 p2 = index[entry[6]][7]
2455 node = entry[7]
2455 node = entry[7]
2456
2456
2457 # (Possibly) reuse the delta from the revlog if allowed and
2457 # (Possibly) reuse the delta from the revlog if allowed and
2458 # the revlog chunk is a delta.
2458 # the revlog chunk is a delta.
2459 cachedelta = None
2459 cachedelta = None
2460 rawtext = None
2460 rawtext = None
2461 if populatecachedelta:
2461 if populatecachedelta:
2462 dp = self.deltaparent(rev)
2462 dp = self.deltaparent(rev)
2463 if dp != nullrev:
2463 if dp != nullrev:
2464 cachedelta = (dp, bytes(self._chunk(rev)))
2464 cachedelta = (dp, bytes(self._chunk(rev)))
2465
2465
2466 if not cachedelta:
2466 if not cachedelta:
2467 rawtext = self.revision(rev, raw=True)
2467 rawtext = self.revision(rev, raw=True)
2468
2468
2469
2469
2470 if deltareuse == self.DELTAREUSEFULLADD:
2470 if deltareuse == self.DELTAREUSEFULLADD:
2471 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2471 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2472 cachedelta=cachedelta,
2472 cachedelta=cachedelta,
2473 node=node, flags=flags,
2473 node=node, flags=flags,
2474 deltacomputer=deltacomputer)
2474 deltacomputer=deltacomputer)
2475 else:
2475 else:
2476 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2476 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2477 checkambig=False)
2477 checkambig=False)
2478 dfh = None
2478 dfh = None
2479 if not destrevlog._inline:
2479 if not destrevlog._inline:
2480 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2480 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2481 try:
2481 try:
2482 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2482 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2483 p2, flags, cachedelta, ifh, dfh,
2483 p2, flags, cachedelta, ifh, dfh,
2484 deltacomputer=deltacomputer)
2484 deltacomputer=deltacomputer)
2485 finally:
2485 finally:
2486 if dfh:
2486 if dfh:
2487 dfh.close()
2487 dfh.close()
2488 ifh.close()
2488 ifh.close()
2489
2489
2490 if addrevisioncb:
2490 if addrevisioncb:
2491 addrevisioncb(self, rev, node)
2491 addrevisioncb(self, rev, node)
2492 finally:
2492 finally:
2493 destrevlog._lazydeltabase = oldlazydeltabase
2493 destrevlog._lazydeltabase = oldlazydeltabase
2494 destrevlog._deltabothparents = oldamd
2494 destrevlog._deltabothparents = oldamd
2495
2496 def censorrevision(self, node, tombstone=b''):
2497 if (self.version & 0xFFFF) == REVLOGV0:
2498 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2499 self.version)
2500
2501 rev = self.rev(node)
2502 tombstone = packmeta({b'censored': tombstone}, b'')
2503
2504 if len(tombstone) > self.rawsize(rev):
2505 raise error.Abort(_('censor tombstone must be no longer than '
2506 'censored data'))
2507
2508 # Using two files instead of one makes it easy to rewrite entry-by-entry
2509 idxread = self.opener(self.indexfile, 'r')
2510 idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
2511 if self.version & FLAG_INLINE_DATA:
2512 dataread, datawrite = idxread, idxwrite
2513 else:
2514 dataread = self.opener(self.datafile, 'r')
2515 datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
2516
2517 # Copy all revlog data up to the entry to be censored.
2518 offset = self.start(rev)
2519
2520 for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
2521 idxwrite.write(chunk)
2522 for chunk in util.filechunkiter(dataread, limit=offset):
2523 datawrite.write(chunk)
2524
2525 def rewriteindex(r, newoffs, newdata=None):
2526 """Rewrite the index entry with a new data offset and new data.
2527
2528 The newdata argument, if given, is a tuple of three positive
2529 integers: (new compressed, new uncompressed, added flag bits).
2530 """
2531 offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
2532 flags = gettype(offlags)
2533 if newdata:
2534 comp, uncomp, nflags = newdata
2535 flags |= nflags
2536 offlags = offset_type(newoffs, flags)
2537 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
2538 idxwrite.write(self._io.packentry(e, None, self.version, r))
2539 idxread.seek(self._io.size, 1)
2540
2541 def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
2542 """Write the given fulltext with the given data offset.
2543
2544 Returns:
2545 The integer number of data bytes written, for tracking data
2546 offsets.
2547 """
2548 flag, compdata = self.compress(data)
2549 newcomp = len(flag) + len(compdata)
2550 rewriteindex(r, offs, (newcomp, len(data), nflags))
2551 datawrite.write(flag)
2552 datawrite.write(compdata)
2553 dataread.seek(self.length(r), 1)
2554 return newcomp
2555
2556 # Rewrite censored entry with (padded) tombstone data.
2557 pad = ' ' * (self.rawsize(rev) - len(tombstone))
2558 offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
2559
2560 # Rewrite all following filelog revisions fixing up offsets and deltas.
2561 for srev in pycompat.xrange(rev + 1, len(self)):
2562 if rev in self.parentrevs(srev):
2563 # Immediate children of censored node must be re-added as
2564 # fulltext.
2565 try:
2566 revdata = self.revision(srev)
2567 except error.CensoredNodeError as e:
2568 revdata = e.tombstone
2569 dlen = rewrite(srev, offset, revdata)
2570 else:
2571 # Copy any other revision data verbatim after fixing up the
2572 # offset.
2573 rewriteindex(srev, offset)
2574 dlen = self.length(srev)
2575 for chunk in util.filechunkiter(dataread, limit=dlen):
2576 datawrite.write(chunk)
2577 offset += dlen
2578
2579 idxread.close()
2580 idxwrite.close()
2581 if dataread is not idxread:
2582 dataread.close()
2583 datawrite.close()
General Comments 0
You need to be logged in to leave comments. Login now