##// END OF EJS Templates
revlog: move parsemeta() and packmeta() from filelog (API)...
Gregory Szorc -
r37460:0596d274 default
parent child Browse files
Show More
@@ -1,190 +1,189 b''
1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
2 #
2 #
3 # This extension enables removal of file content at a given revision,
3 # This extension enables removal of file content at a given revision,
4 # rewriting the data/metadata of successive revisions to preserve revision log
4 # rewriting the data/metadata of successive revisions to preserve revision log
5 # integrity.
5 # integrity.
6
6
7 """erase file content at a given revision
7 """erase file content at a given revision
8
8
9 The censor command instructs Mercurial to erase all content of a file at a given
9 The censor command instructs Mercurial to erase all content of a file at a given
10 revision *without updating the changeset hash.* This allows existing history to
10 revision *without updating the changeset hash.* This allows existing history to
11 remain valid while preventing future clones/pulls from receiving the erased
11 remain valid while preventing future clones/pulls from receiving the erased
12 data.
12 data.
13
13
14 Typical uses for censor are due to security or legal requirements, including::
14 Typical uses for censor are due to security or legal requirements, including::
15
15
16 * Passwords, private keys, cryptographic material
16 * Passwords, private keys, cryptographic material
17 * Licensed data/code/libraries for which the license has expired
17 * Licensed data/code/libraries for which the license has expired
18 * Personally Identifiable Information or other private data
18 * Personally Identifiable Information or other private data
19
19
20 Censored nodes can interrupt mercurial's typical operation whenever the excised
20 Censored nodes can interrupt mercurial's typical operation whenever the excised
21 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
21 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
22 simply fail when asked to produce censored data. Others, like ``hg verify`` and
22 simply fail when asked to produce censored data. Others, like ``hg verify`` and
23 ``hg update``, must be capable of tolerating censored data to continue to
23 ``hg update``, must be capable of tolerating censored data to continue to
24 function in a meaningful way. Such commands only tolerate censored file
24 function in a meaningful way. Such commands only tolerate censored file
25 revisions if they are allowed by the "censor.policy=ignore" config option.
25 revisions if they are allowed by the "censor.policy=ignore" config option.
26 """
26 """
27
27
28 from __future__ import absolute_import
28 from __future__ import absolute_import
29
29
30 from mercurial.i18n import _
30 from mercurial.i18n import _
31 from mercurial.node import short
31 from mercurial.node import short
32
32
33 from mercurial import (
33 from mercurial import (
34 error,
34 error,
35 filelog,
36 lock as lockmod,
35 lock as lockmod,
37 registrar,
36 registrar,
38 revlog,
37 revlog,
39 scmutil,
38 scmutil,
40 util,
39 util,
41 )
40 )
42
41
43 cmdtable = {}
42 cmdtable = {}
44 command = registrar.command(cmdtable)
43 command = registrar.command(cmdtable)
45 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
44 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
46 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
45 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
47 # be specifying the version(s) of Mercurial they are tested with, or
46 # be specifying the version(s) of Mercurial they are tested with, or
48 # leave the attribute unspecified.
47 # leave the attribute unspecified.
49 testedwith = 'ships-with-hg-core'
48 testedwith = 'ships-with-hg-core'
50
49
51 @command('censor',
50 @command('censor',
52 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
51 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
53 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
52 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
54 _('-r REV [-t TEXT] [FILE]'))
53 _('-r REV [-t TEXT] [FILE]'))
55 def censor(ui, repo, path, rev='', tombstone='', **opts):
54 def censor(ui, repo, path, rev='', tombstone='', **opts):
56 wlock = lock = None
55 wlock = lock = None
57 try:
56 try:
58 wlock = repo.wlock()
57 wlock = repo.wlock()
59 lock = repo.lock()
58 lock = repo.lock()
60 return _docensor(ui, repo, path, rev, tombstone, **opts)
59 return _docensor(ui, repo, path, rev, tombstone, **opts)
61 finally:
60 finally:
62 lockmod.release(lock, wlock)
61 lockmod.release(lock, wlock)
63
62
64 def _docensor(ui, repo, path, rev='', tombstone='', **opts):
63 def _docensor(ui, repo, path, rev='', tombstone='', **opts):
65 if not path:
64 if not path:
66 raise error.Abort(_('must specify file path to censor'))
65 raise error.Abort(_('must specify file path to censor'))
67 if not rev:
66 if not rev:
68 raise error.Abort(_('must specify revision to censor'))
67 raise error.Abort(_('must specify revision to censor'))
69
68
70 wctx = repo[None]
69 wctx = repo[None]
71
70
72 m = scmutil.match(wctx, (path,))
71 m = scmutil.match(wctx, (path,))
73 if m.anypats() or len(m.files()) != 1:
72 if m.anypats() or len(m.files()) != 1:
74 raise error.Abort(_('can only specify an explicit filename'))
73 raise error.Abort(_('can only specify an explicit filename'))
75 path = m.files()[0]
74 path = m.files()[0]
76 flog = repo.file(path)
75 flog = repo.file(path)
77 if not len(flog):
76 if not len(flog):
78 raise error.Abort(_('cannot censor file with no history'))
77 raise error.Abort(_('cannot censor file with no history'))
79
78
80 rev = scmutil.revsingle(repo, rev, rev).rev()
79 rev = scmutil.revsingle(repo, rev, rev).rev()
81 try:
80 try:
82 ctx = repo[rev]
81 ctx = repo[rev]
83 except KeyError:
82 except KeyError:
84 raise error.Abort(_('invalid revision identifier %s') % rev)
83 raise error.Abort(_('invalid revision identifier %s') % rev)
85
84
86 try:
85 try:
87 fctx = ctx.filectx(path)
86 fctx = ctx.filectx(path)
88 except error.LookupError:
87 except error.LookupError:
89 raise error.Abort(_('file does not exist at revision %s') % rev)
88 raise error.Abort(_('file does not exist at revision %s') % rev)
90
89
91 fnode = fctx.filenode()
90 fnode = fctx.filenode()
92 headctxs = [repo[c] for c in repo.heads()]
91 headctxs = [repo[c] for c in repo.heads()]
93 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
92 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
94 if heads:
93 if heads:
95 headlist = ', '.join([short(c.node()) for c in heads])
94 headlist = ', '.join([short(c.node()) for c in heads])
96 raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
95 raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
97 hint=_('clean/delete and commit first'))
96 hint=_('clean/delete and commit first'))
98
97
99 wp = wctx.parents()
98 wp = wctx.parents()
100 if ctx.node() in [p.node() for p in wp]:
99 if ctx.node() in [p.node() for p in wp]:
101 raise error.Abort(_('cannot censor working directory'),
100 raise error.Abort(_('cannot censor working directory'),
102 hint=_('clean/delete/update first'))
101 hint=_('clean/delete/update first'))
103
102
104 flogv = flog.version & 0xFFFF
103 flogv = flog.version & 0xFFFF
105 if flogv != revlog.REVLOGV1:
104 if flogv != revlog.REVLOGV1:
106 raise error.Abort(
105 raise error.Abort(
107 _('censor does not support revlog version %d') % (flogv,))
106 _('censor does not support revlog version %d') % (flogv,))
108
107
109 tombstone = filelog.packmeta({"censored": tombstone}, "")
108 tombstone = revlog.packmeta({"censored": tombstone}, "")
110
109
111 crev = fctx.filerev()
110 crev = fctx.filerev()
112
111
113 if len(tombstone) > flog.rawsize(crev):
112 if len(tombstone) > flog.rawsize(crev):
114 raise error.Abort(_(
113 raise error.Abort(_(
115 'censor tombstone must be no longer than censored data'))
114 'censor tombstone must be no longer than censored data'))
116
115
117 # Using two files instead of one makes it easy to rewrite entry-by-entry
116 # Using two files instead of one makes it easy to rewrite entry-by-entry
118 idxread = repo.svfs(flog.indexfile, 'r')
117 idxread = repo.svfs(flog.indexfile, 'r')
119 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
118 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
120 if flog.version & revlog.FLAG_INLINE_DATA:
119 if flog.version & revlog.FLAG_INLINE_DATA:
121 dataread, datawrite = idxread, idxwrite
120 dataread, datawrite = idxread, idxwrite
122 else:
121 else:
123 dataread = repo.svfs(flog.datafile, 'r')
122 dataread = repo.svfs(flog.datafile, 'r')
124 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
123 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
125
124
126 # Copy all revlog data up to the entry to be censored.
125 # Copy all revlog data up to the entry to be censored.
127 rio = revlog.revlogio()
126 rio = revlog.revlogio()
128 offset = flog.start(crev)
127 offset = flog.start(crev)
129
128
130 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
129 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
131 idxwrite.write(chunk)
130 idxwrite.write(chunk)
132 for chunk in util.filechunkiter(dataread, limit=offset):
131 for chunk in util.filechunkiter(dataread, limit=offset):
133 datawrite.write(chunk)
132 datawrite.write(chunk)
134
133
135 def rewriteindex(r, newoffs, newdata=None):
134 def rewriteindex(r, newoffs, newdata=None):
136 """Rewrite the index entry with a new data offset and optional new data.
135 """Rewrite the index entry with a new data offset and optional new data.
137
136
138 The newdata argument, if given, is a tuple of three positive integers:
137 The newdata argument, if given, is a tuple of three positive integers:
139 (new compressed, new uncompressed, added flag bits).
138 (new compressed, new uncompressed, added flag bits).
140 """
139 """
141 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
140 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
142 flags = revlog.gettype(offlags)
141 flags = revlog.gettype(offlags)
143 if newdata:
142 if newdata:
144 comp, uncomp, nflags = newdata
143 comp, uncomp, nflags = newdata
145 flags |= nflags
144 flags |= nflags
146 offlags = revlog.offset_type(newoffs, flags)
145 offlags = revlog.offset_type(newoffs, flags)
147 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
146 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
148 idxwrite.write(rio.packentry(e, None, flog.version, r))
147 idxwrite.write(rio.packentry(e, None, flog.version, r))
149 idxread.seek(rio.size, 1)
148 idxread.seek(rio.size, 1)
150
149
151 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
150 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
152 """Write the given full text to the filelog with the given data offset.
151 """Write the given full text to the filelog with the given data offset.
153
152
154 Returns:
153 Returns:
155 The integer number of data bytes written, for tracking data offsets.
154 The integer number of data bytes written, for tracking data offsets.
156 """
155 """
157 flag, compdata = flog.compress(data)
156 flag, compdata = flog.compress(data)
158 newcomp = len(flag) + len(compdata)
157 newcomp = len(flag) + len(compdata)
159 rewriteindex(r, offs, (newcomp, len(data), nflags))
158 rewriteindex(r, offs, (newcomp, len(data), nflags))
160 datawrite.write(flag)
159 datawrite.write(flag)
161 datawrite.write(compdata)
160 datawrite.write(compdata)
162 dataread.seek(flog.length(r), 1)
161 dataread.seek(flog.length(r), 1)
163 return newcomp
162 return newcomp
164
163
165 # Rewrite censored revlog entry with (padded) tombstone data.
164 # Rewrite censored revlog entry with (padded) tombstone data.
166 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
165 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
167 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
166 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
168
167
169 # Rewrite all following filelog revisions fixing up offsets and deltas.
168 # Rewrite all following filelog revisions fixing up offsets and deltas.
170 for srev in xrange(crev + 1, len(flog)):
169 for srev in xrange(crev + 1, len(flog)):
171 if crev in flog.parentrevs(srev):
170 if crev in flog.parentrevs(srev):
172 # Immediate children of censored node must be re-added as fulltext.
171 # Immediate children of censored node must be re-added as fulltext.
173 try:
172 try:
174 revdata = flog.revision(srev)
173 revdata = flog.revision(srev)
175 except error.CensoredNodeError as e:
174 except error.CensoredNodeError as e:
176 revdata = e.tombstone
175 revdata = e.tombstone
177 dlen = rewrite(srev, offset, revdata)
176 dlen = rewrite(srev, offset, revdata)
178 else:
177 else:
179 # Copy any other revision data verbatim after fixing up the offset.
178 # Copy any other revision data verbatim after fixing up the offset.
180 rewriteindex(srev, offset)
179 rewriteindex(srev, offset)
181 dlen = flog.length(srev)
180 dlen = flog.length(srev)
182 for chunk in util.filechunkiter(dataread, limit=dlen):
181 for chunk in util.filechunkiter(dataread, limit=dlen):
183 datawrite.write(chunk)
182 datawrite.write(chunk)
184 offset += dlen
183 offset += dlen
185
184
186 idxread.close()
185 idxread.close()
187 idxwrite.close()
186 idxwrite.close()
188 if dataread is not idxread:
187 if dataread is not idxread:
189 dataread.close()
188 dataread.close()
190 datawrite.close()
189 datawrite.close()
@@ -1,387 +1,386 b''
1 # wrapper.py - methods wrapping core mercurial logic
1 # wrapper.py - methods wrapping core mercurial logic
2 #
2 #
3 # Copyright 2017 Facebook, Inc.
3 # Copyright 2017 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import hashlib
10 import hashlib
11
11
12 from mercurial.i18n import _
12 from mercurial.i18n import _
13 from mercurial.node import bin, hex, nullid, short
13 from mercurial.node import bin, hex, nullid, short
14
14
15 from mercurial import (
15 from mercurial import (
16 error,
16 error,
17 filelog,
18 revlog,
17 revlog,
19 util,
18 util,
20 )
19 )
21
20
22 from mercurial.utils import (
21 from mercurial.utils import (
23 stringutil,
22 stringutil,
24 )
23 )
25
24
26 from ..largefiles import lfutil
25 from ..largefiles import lfutil
27
26
28 from . import (
27 from . import (
29 blobstore,
28 blobstore,
30 pointer,
29 pointer,
31 )
30 )
32
31
33 def allsupportedversions(orig, ui):
32 def allsupportedversions(orig, ui):
34 versions = orig(ui)
33 versions = orig(ui)
35 versions.add('03')
34 versions.add('03')
36 return versions
35 return versions
37
36
38 def _capabilities(orig, repo, proto):
37 def _capabilities(orig, repo, proto):
39 '''Wrap server command to announce lfs server capability'''
38 '''Wrap server command to announce lfs server capability'''
40 caps = orig(repo, proto)
39 caps = orig(repo, proto)
41 # XXX: change to 'lfs=serve' when separate git server isn't required?
40 # XXX: change to 'lfs=serve' when separate git server isn't required?
42 caps.append('lfs')
41 caps.append('lfs')
43 return caps
42 return caps
44
43
45 def bypasscheckhash(self, text):
44 def bypasscheckhash(self, text):
46 return False
45 return False
47
46
48 def readfromstore(self, text):
47 def readfromstore(self, text):
49 """Read filelog content from local blobstore transform for flagprocessor.
48 """Read filelog content from local blobstore transform for flagprocessor.
50
49
51 Default tranform for flagprocessor, returning contents from blobstore.
50 Default tranform for flagprocessor, returning contents from blobstore.
52 Returns a 2-typle (text, validatehash) where validatehash is True as the
51 Returns a 2-typle (text, validatehash) where validatehash is True as the
53 contents of the blobstore should be checked using checkhash.
52 contents of the blobstore should be checked using checkhash.
54 """
53 """
55 p = pointer.deserialize(text)
54 p = pointer.deserialize(text)
56 oid = p.oid()
55 oid = p.oid()
57 store = self.opener.lfslocalblobstore
56 store = self.opener.lfslocalblobstore
58 if not store.has(oid):
57 if not store.has(oid):
59 p.filename = self.filename
58 p.filename = self.filename
60 self.opener.lfsremoteblobstore.readbatch([p], store)
59 self.opener.lfsremoteblobstore.readbatch([p], store)
61
60
62 # The caller will validate the content
61 # The caller will validate the content
63 text = store.read(oid, verify=False)
62 text = store.read(oid, verify=False)
64
63
65 # pack hg filelog metadata
64 # pack hg filelog metadata
66 hgmeta = {}
65 hgmeta = {}
67 for k in p.keys():
66 for k in p.keys():
68 if k.startswith('x-hg-'):
67 if k.startswith('x-hg-'):
69 name = k[len('x-hg-'):]
68 name = k[len('x-hg-'):]
70 hgmeta[name] = p[k]
69 hgmeta[name] = p[k]
71 if hgmeta or text.startswith('\1\n'):
70 if hgmeta or text.startswith('\1\n'):
72 text = filelog.packmeta(hgmeta, text)
71 text = revlog.packmeta(hgmeta, text)
73
72
74 return (text, True)
73 return (text, True)
75
74
76 def writetostore(self, text):
75 def writetostore(self, text):
77 # hg filelog metadata (includes rename, etc)
76 # hg filelog metadata (includes rename, etc)
78 hgmeta, offset = filelog.parsemeta(text)
77 hgmeta, offset = revlog.parsemeta(text)
79 if offset and offset > 0:
78 if offset and offset > 0:
80 # lfs blob does not contain hg filelog metadata
79 # lfs blob does not contain hg filelog metadata
81 text = text[offset:]
80 text = text[offset:]
82
81
83 # git-lfs only supports sha256
82 # git-lfs only supports sha256
84 oid = hex(hashlib.sha256(text).digest())
83 oid = hex(hashlib.sha256(text).digest())
85 self.opener.lfslocalblobstore.write(oid, text)
84 self.opener.lfslocalblobstore.write(oid, text)
86
85
87 # replace contents with metadata
86 # replace contents with metadata
88 longoid = 'sha256:%s' % oid
87 longoid = 'sha256:%s' % oid
89 metadata = pointer.gitlfspointer(oid=longoid, size='%d' % len(text))
88 metadata = pointer.gitlfspointer(oid=longoid, size='%d' % len(text))
90
89
91 # by default, we expect the content to be binary. however, LFS could also
90 # by default, we expect the content to be binary. however, LFS could also
92 # be used for non-binary content. add a special entry for non-binary data.
91 # be used for non-binary content. add a special entry for non-binary data.
93 # this will be used by filectx.isbinary().
92 # this will be used by filectx.isbinary().
94 if not stringutil.binary(text):
93 if not stringutil.binary(text):
95 # not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
94 # not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
96 metadata['x-is-binary'] = '0'
95 metadata['x-is-binary'] = '0'
97
96
98 # translate hg filelog metadata to lfs metadata with "x-hg-" prefix
97 # translate hg filelog metadata to lfs metadata with "x-hg-" prefix
99 if hgmeta is not None:
98 if hgmeta is not None:
100 for k, v in hgmeta.iteritems():
99 for k, v in hgmeta.iteritems():
101 metadata['x-hg-%s' % k] = v
100 metadata['x-hg-%s' % k] = v
102
101
103 rawtext = metadata.serialize()
102 rawtext = metadata.serialize()
104 return (rawtext, False)
103 return (rawtext, False)
105
104
106 def _islfs(rlog, node=None, rev=None):
105 def _islfs(rlog, node=None, rev=None):
107 if rev is None:
106 if rev is None:
108 if node is None:
107 if node is None:
109 # both None - likely working copy content where node is not ready
108 # both None - likely working copy content where node is not ready
110 return False
109 return False
111 rev = rlog.rev(node)
110 rev = rlog.rev(node)
112 else:
111 else:
113 node = rlog.node(rev)
112 node = rlog.node(rev)
114 if node == nullid:
113 if node == nullid:
115 return False
114 return False
116 flags = rlog.flags(rev)
115 flags = rlog.flags(rev)
117 return bool(flags & revlog.REVIDX_EXTSTORED)
116 return bool(flags & revlog.REVIDX_EXTSTORED)
118
117
119 def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
118 def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
120 cachedelta=None, node=None,
119 cachedelta=None, node=None,
121 flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
120 flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
122 textlen = len(text)
121 textlen = len(text)
123 # exclude hg rename meta from file size
122 # exclude hg rename meta from file size
124 meta, offset = filelog.parsemeta(text)
123 meta, offset = revlog.parsemeta(text)
125 if offset:
124 if offset:
126 textlen -= offset
125 textlen -= offset
127
126
128 lfstrack = self.opener.options['lfstrack']
127 lfstrack = self.opener.options['lfstrack']
129
128
130 if lfstrack(self.filename, textlen):
129 if lfstrack(self.filename, textlen):
131 flags |= revlog.REVIDX_EXTSTORED
130 flags |= revlog.REVIDX_EXTSTORED
132
131
133 return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
132 return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
134 node=node, flags=flags, **kwds)
133 node=node, flags=flags, **kwds)
135
134
136 def filelogrenamed(orig, self, node):
135 def filelogrenamed(orig, self, node):
137 if _islfs(self, node):
136 if _islfs(self, node):
138 rawtext = self.revision(node, raw=True)
137 rawtext = self.revision(node, raw=True)
139 if not rawtext:
138 if not rawtext:
140 return False
139 return False
141 metadata = pointer.deserialize(rawtext)
140 metadata = pointer.deserialize(rawtext)
142 if 'x-hg-copy' in metadata and 'x-hg-copyrev' in metadata:
141 if 'x-hg-copy' in metadata and 'x-hg-copyrev' in metadata:
143 return metadata['x-hg-copy'], bin(metadata['x-hg-copyrev'])
142 return metadata['x-hg-copy'], bin(metadata['x-hg-copyrev'])
144 else:
143 else:
145 return False
144 return False
146 return orig(self, node)
145 return orig(self, node)
147
146
148 def filelogsize(orig, self, rev):
147 def filelogsize(orig, self, rev):
149 if _islfs(self, rev=rev):
148 if _islfs(self, rev=rev):
150 # fast path: use lfs metadata to answer size
149 # fast path: use lfs metadata to answer size
151 rawtext = self.revision(rev, raw=True)
150 rawtext = self.revision(rev, raw=True)
152 metadata = pointer.deserialize(rawtext)
151 metadata = pointer.deserialize(rawtext)
153 return int(metadata['size'])
152 return int(metadata['size'])
154 return orig(self, rev)
153 return orig(self, rev)
155
154
156 def filectxcmp(orig, self, fctx):
155 def filectxcmp(orig, self, fctx):
157 """returns True if text is different than fctx"""
156 """returns True if text is different than fctx"""
158 # some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
157 # some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
159 if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
158 if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
160 # fast path: check LFS oid
159 # fast path: check LFS oid
161 p1 = pointer.deserialize(self.rawdata())
160 p1 = pointer.deserialize(self.rawdata())
162 p2 = pointer.deserialize(fctx.rawdata())
161 p2 = pointer.deserialize(fctx.rawdata())
163 return p1.oid() != p2.oid()
162 return p1.oid() != p2.oid()
164 return orig(self, fctx)
163 return orig(self, fctx)
165
164
166 def filectxisbinary(orig, self):
165 def filectxisbinary(orig, self):
167 if self.islfs():
166 if self.islfs():
168 # fast path: use lfs metadata to answer isbinary
167 # fast path: use lfs metadata to answer isbinary
169 metadata = pointer.deserialize(self.rawdata())
168 metadata = pointer.deserialize(self.rawdata())
170 # if lfs metadata says nothing, assume it's binary by default
169 # if lfs metadata says nothing, assume it's binary by default
171 return bool(int(metadata.get('x-is-binary', 1)))
170 return bool(int(metadata.get('x-is-binary', 1)))
172 return orig(self)
171 return orig(self)
173
172
174 def filectxislfs(self):
173 def filectxislfs(self):
175 return _islfs(self.filelog(), self.filenode())
174 return _islfs(self.filelog(), self.filenode())
176
175
177 def _updatecatformatter(orig, fm, ctx, matcher, path, decode):
176 def _updatecatformatter(orig, fm, ctx, matcher, path, decode):
178 orig(fm, ctx, matcher, path, decode)
177 orig(fm, ctx, matcher, path, decode)
179 fm.data(rawdata=ctx[path].rawdata())
178 fm.data(rawdata=ctx[path].rawdata())
180
179
181 def convertsink(orig, sink):
180 def convertsink(orig, sink):
182 sink = orig(sink)
181 sink = orig(sink)
183 if sink.repotype == 'hg':
182 if sink.repotype == 'hg':
184 class lfssink(sink.__class__):
183 class lfssink(sink.__class__):
185 def putcommit(self, files, copies, parents, commit, source, revmap,
184 def putcommit(self, files, copies, parents, commit, source, revmap,
186 full, cleanp2):
185 full, cleanp2):
187 pc = super(lfssink, self).putcommit
186 pc = super(lfssink, self).putcommit
188 node = pc(files, copies, parents, commit, source, revmap, full,
187 node = pc(files, copies, parents, commit, source, revmap, full,
189 cleanp2)
188 cleanp2)
190
189
191 if 'lfs' not in self.repo.requirements:
190 if 'lfs' not in self.repo.requirements:
192 ctx = self.repo[node]
191 ctx = self.repo[node]
193
192
194 # The file list may contain removed files, so check for
193 # The file list may contain removed files, so check for
195 # membership before assuming it is in the context.
194 # membership before assuming it is in the context.
196 if any(f in ctx and ctx[f].islfs() for f, n in files):
195 if any(f in ctx and ctx[f].islfs() for f, n in files):
197 self.repo.requirements.add('lfs')
196 self.repo.requirements.add('lfs')
198 self.repo._writerequirements()
197 self.repo._writerequirements()
199
198
200 # Permanently enable lfs locally
199 # Permanently enable lfs locally
201 self.repo.vfs.append(
200 self.repo.vfs.append(
202 'hgrc', util.tonativeeol('\n[extensions]\nlfs=\n'))
201 'hgrc', util.tonativeeol('\n[extensions]\nlfs=\n'))
203
202
204 return node
203 return node
205
204
206 sink.__class__ = lfssink
205 sink.__class__ = lfssink
207
206
208 return sink
207 return sink
209
208
210 def vfsinit(orig, self, othervfs):
209 def vfsinit(orig, self, othervfs):
211 orig(self, othervfs)
210 orig(self, othervfs)
212 # copy lfs related options
211 # copy lfs related options
213 for k, v in othervfs.options.items():
212 for k, v in othervfs.options.items():
214 if k.startswith('lfs'):
213 if k.startswith('lfs'):
215 self.options[k] = v
214 self.options[k] = v
216 # also copy lfs blobstores. note: this can run before reposetup, so lfs
215 # also copy lfs blobstores. note: this can run before reposetup, so lfs
217 # blobstore attributes are not always ready at this time.
216 # blobstore attributes are not always ready at this time.
218 for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
217 for name in ['lfslocalblobstore', 'lfsremoteblobstore']:
219 if util.safehasattr(othervfs, name):
218 if util.safehasattr(othervfs, name):
220 setattr(self, name, getattr(othervfs, name))
219 setattr(self, name, getattr(othervfs, name))
221
220
222 def hgclone(orig, ui, opts, *args, **kwargs):
221 def hgclone(orig, ui, opts, *args, **kwargs):
223 result = orig(ui, opts, *args, **kwargs)
222 result = orig(ui, opts, *args, **kwargs)
224
223
225 if result is not None:
224 if result is not None:
226 sourcerepo, destrepo = result
225 sourcerepo, destrepo = result
227 repo = destrepo.local()
226 repo = destrepo.local()
228
227
229 # When cloning to a remote repo (like through SSH), no repo is available
228 # When cloning to a remote repo (like through SSH), no repo is available
230 # from the peer. Therefore the hgrc can't be updated.
229 # from the peer. Therefore the hgrc can't be updated.
231 if not repo:
230 if not repo:
232 return result
231 return result
233
232
234 # If lfs is required for this repo, permanently enable it locally
233 # If lfs is required for this repo, permanently enable it locally
235 if 'lfs' in repo.requirements:
234 if 'lfs' in repo.requirements:
236 repo.vfs.append('hgrc',
235 repo.vfs.append('hgrc',
237 util.tonativeeol('\n[extensions]\nlfs=\n'))
236 util.tonativeeol('\n[extensions]\nlfs=\n'))
238
237
239 return result
238 return result
240
239
241 def hgpostshare(orig, sourcerepo, destrepo, bookmarks=True, defaultpath=None):
240 def hgpostshare(orig, sourcerepo, destrepo, bookmarks=True, defaultpath=None):
242 orig(sourcerepo, destrepo, bookmarks, defaultpath)
241 orig(sourcerepo, destrepo, bookmarks, defaultpath)
243
242
244 # If lfs is required for this repo, permanently enable it locally
243 # If lfs is required for this repo, permanently enable it locally
245 if 'lfs' in destrepo.requirements:
244 if 'lfs' in destrepo.requirements:
246 destrepo.vfs.append('hgrc', util.tonativeeol('\n[extensions]\nlfs=\n'))
245 destrepo.vfs.append('hgrc', util.tonativeeol('\n[extensions]\nlfs=\n'))
247
246
248 def _prefetchfiles(repo, ctx, files):
247 def _prefetchfiles(repo, ctx, files):
249 """Ensure that required LFS blobs are present, fetching them as a group if
248 """Ensure that required LFS blobs are present, fetching them as a group if
250 needed."""
249 needed."""
251 pointers = []
250 pointers = []
252 localstore = repo.svfs.lfslocalblobstore
251 localstore = repo.svfs.lfslocalblobstore
253
252
254 for f in files:
253 for f in files:
255 p = pointerfromctx(ctx, f)
254 p = pointerfromctx(ctx, f)
256 if p and not localstore.has(p.oid()):
255 if p and not localstore.has(p.oid()):
257 p.filename = f
256 p.filename = f
258 pointers.append(p)
257 pointers.append(p)
259
258
260 if pointers:
259 if pointers:
261 repo.svfs.lfsremoteblobstore.readbatch(pointers, localstore)
260 repo.svfs.lfsremoteblobstore.readbatch(pointers, localstore)
262
261
263 def _canskipupload(repo):
262 def _canskipupload(repo):
264 # if remotestore is a null store, upload is a no-op and can be skipped
263 # if remotestore is a null store, upload is a no-op and can be skipped
265 return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
264 return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
266
265
267 def candownload(repo):
266 def candownload(repo):
268 # if remotestore is a null store, downloads will lead to nothing
267 # if remotestore is a null store, downloads will lead to nothing
269 return not isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
268 return not isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
270
269
271 def uploadblobsfromrevs(repo, revs):
270 def uploadblobsfromrevs(repo, revs):
272 '''upload lfs blobs introduced by revs
271 '''upload lfs blobs introduced by revs
273
272
274 Note: also used by other extensions e. g. infinitepush. avoid renaming.
273 Note: also used by other extensions e. g. infinitepush. avoid renaming.
275 '''
274 '''
276 if _canskipupload(repo):
275 if _canskipupload(repo):
277 return
276 return
278 pointers = extractpointers(repo, revs)
277 pointers = extractpointers(repo, revs)
279 uploadblobs(repo, pointers)
278 uploadblobs(repo, pointers)
280
279
281 def prepush(pushop):
280 def prepush(pushop):
282 """Prepush hook.
281 """Prepush hook.
283
282
284 Read through the revisions to push, looking for filelog entries that can be
283 Read through the revisions to push, looking for filelog entries that can be
285 deserialized into metadata so that we can block the push on their upload to
284 deserialized into metadata so that we can block the push on their upload to
286 the remote blobstore.
285 the remote blobstore.
287 """
286 """
288 return uploadblobsfromrevs(pushop.repo, pushop.outgoing.missing)
287 return uploadblobsfromrevs(pushop.repo, pushop.outgoing.missing)
289
288
290 def push(orig, repo, remote, *args, **kwargs):
289 def push(orig, repo, remote, *args, **kwargs):
291 """bail on push if the extension isn't enabled on remote when needed"""
290 """bail on push if the extension isn't enabled on remote when needed"""
292 if 'lfs' in repo.requirements:
291 if 'lfs' in repo.requirements:
293 # If the remote peer is for a local repo, the requirement tests in the
292 # If the remote peer is for a local repo, the requirement tests in the
294 # base class method enforce lfs support. Otherwise, some revisions in
293 # base class method enforce lfs support. Otherwise, some revisions in
295 # this repo use lfs, and the remote repo needs the extension loaded.
294 # this repo use lfs, and the remote repo needs the extension loaded.
296 if not remote.local() and not remote.capable('lfs'):
295 if not remote.local() and not remote.capable('lfs'):
297 # This is a copy of the message in exchange.push() when requirements
296 # This is a copy of the message in exchange.push() when requirements
298 # are missing between local repos.
297 # are missing between local repos.
299 m = _("required features are not supported in the destination: %s")
298 m = _("required features are not supported in the destination: %s")
300 raise error.Abort(m % 'lfs',
299 raise error.Abort(m % 'lfs',
301 hint=_('enable the lfs extension on the server'))
300 hint=_('enable the lfs extension on the server'))
302 return orig(repo, remote, *args, **kwargs)
301 return orig(repo, remote, *args, **kwargs)
303
302
304 def writenewbundle(orig, ui, repo, source, filename, bundletype, outgoing,
303 def writenewbundle(orig, ui, repo, source, filename, bundletype, outgoing,
305 *args, **kwargs):
304 *args, **kwargs):
306 """upload LFS blobs added by outgoing revisions on 'hg bundle'"""
305 """upload LFS blobs added by outgoing revisions on 'hg bundle'"""
307 uploadblobsfromrevs(repo, outgoing.missing)
306 uploadblobsfromrevs(repo, outgoing.missing)
308 return orig(ui, repo, source, filename, bundletype, outgoing, *args,
307 return orig(ui, repo, source, filename, bundletype, outgoing, *args,
309 **kwargs)
308 **kwargs)
310
309
311 def extractpointers(repo, revs):
310 def extractpointers(repo, revs):
312 """return a list of lfs pointers added by given revs"""
311 """return a list of lfs pointers added by given revs"""
313 repo.ui.debug('lfs: computing set of blobs to upload\n')
312 repo.ui.debug('lfs: computing set of blobs to upload\n')
314 pointers = {}
313 pointers = {}
315 for r in revs:
314 for r in revs:
316 ctx = repo[r]
315 ctx = repo[r]
317 for p in pointersfromctx(ctx).values():
316 for p in pointersfromctx(ctx).values():
318 pointers[p.oid()] = p
317 pointers[p.oid()] = p
319 return sorted(pointers.values())
318 return sorted(pointers.values())
320
319
321 def pointerfromctx(ctx, f, removed=False):
320 def pointerfromctx(ctx, f, removed=False):
322 """return a pointer for the named file from the given changectx, or None if
321 """return a pointer for the named file from the given changectx, or None if
323 the file isn't LFS.
322 the file isn't LFS.
324
323
325 Optionally, the pointer for a file deleted from the context can be returned.
324 Optionally, the pointer for a file deleted from the context can be returned.
326 Since no such pointer is actually stored, and to distinguish from a non LFS
325 Since no such pointer is actually stored, and to distinguish from a non LFS
327 file, this pointer is represented by an empty dict.
326 file, this pointer is represented by an empty dict.
328 """
327 """
329 _ctx = ctx
328 _ctx = ctx
330 if f not in ctx:
329 if f not in ctx:
331 if not removed:
330 if not removed:
332 return None
331 return None
333 if f in ctx.p1():
332 if f in ctx.p1():
334 _ctx = ctx.p1()
333 _ctx = ctx.p1()
335 elif f in ctx.p2():
334 elif f in ctx.p2():
336 _ctx = ctx.p2()
335 _ctx = ctx.p2()
337 else:
336 else:
338 return None
337 return None
339 fctx = _ctx[f]
338 fctx = _ctx[f]
340 if not _islfs(fctx.filelog(), fctx.filenode()):
339 if not _islfs(fctx.filelog(), fctx.filenode()):
341 return None
340 return None
342 try:
341 try:
343 p = pointer.deserialize(fctx.rawdata())
342 p = pointer.deserialize(fctx.rawdata())
344 if ctx == _ctx:
343 if ctx == _ctx:
345 return p
344 return p
346 return {}
345 return {}
347 except pointer.InvalidPointer as ex:
346 except pointer.InvalidPointer as ex:
348 raise error.Abort(_('lfs: corrupted pointer (%s@%s): %s\n')
347 raise error.Abort(_('lfs: corrupted pointer (%s@%s): %s\n')
349 % (f, short(_ctx.node()), ex))
348 % (f, short(_ctx.node()), ex))
350
349
351 def pointersfromctx(ctx, removed=False):
350 def pointersfromctx(ctx, removed=False):
352 """return a dict {path: pointer} for given single changectx.
351 """return a dict {path: pointer} for given single changectx.
353
352
354 If ``removed`` == True and the LFS file was removed from ``ctx``, the value
353 If ``removed`` == True and the LFS file was removed from ``ctx``, the value
355 stored for the path is an empty dict.
354 stored for the path is an empty dict.
356 """
355 """
357 result = {}
356 result = {}
358 for f in ctx.files():
357 for f in ctx.files():
359 p = pointerfromctx(ctx, f, removed=removed)
358 p = pointerfromctx(ctx, f, removed=removed)
360 if p is not None:
359 if p is not None:
361 result[f] = p
360 result[f] = p
362 return result
361 return result
363
362
364 def uploadblobs(repo, pointers):
363 def uploadblobs(repo, pointers):
365 """upload given pointers from local blobstore"""
364 """upload given pointers from local blobstore"""
366 if not pointers:
365 if not pointers:
367 return
366 return
368
367
369 remoteblob = repo.svfs.lfsremoteblobstore
368 remoteblob = repo.svfs.lfsremoteblobstore
370 remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
369 remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
371
370
372 def upgradefinishdatamigration(orig, ui, srcrepo, dstrepo, requirements):
371 def upgradefinishdatamigration(orig, ui, srcrepo, dstrepo, requirements):
373 orig(ui, srcrepo, dstrepo, requirements)
372 orig(ui, srcrepo, dstrepo, requirements)
374
373
375 srclfsvfs = srcrepo.svfs.lfslocalblobstore.vfs
374 srclfsvfs = srcrepo.svfs.lfslocalblobstore.vfs
376 dstlfsvfs = dstrepo.svfs.lfslocalblobstore.vfs
375 dstlfsvfs = dstrepo.svfs.lfslocalblobstore.vfs
377
376
378 for dirpath, dirs, files in srclfsvfs.walk():
377 for dirpath, dirs, files in srclfsvfs.walk():
379 for oid in files:
378 for oid in files:
380 ui.write(_('copying lfs blob %s\n') % oid)
379 ui.write(_('copying lfs blob %s\n') % oid)
381 lfutil.link(srclfsvfs.join(oid), dstlfsvfs.join(oid))
380 lfutil.link(srclfsvfs.join(oid), dstlfsvfs.join(oid))
382
381
383 def upgraderequirements(orig, repo):
382 def upgraderequirements(orig, repo):
384 reqs = orig(repo)
383 reqs = orig(repo)
385 if 'lfs' in repo.requirements:
384 if 'lfs' in repo.requirements:
386 reqs.add('lfs')
385 reqs.add('lfs')
387 return reqs
386 return reqs
@@ -1,144 +1,124 b''
1 # filelog.py - file history class for mercurial
1 # filelog.py - file history class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import re
11 import struct
10 import struct
12
11
13 from .thirdparty.zope import (
12 from .thirdparty.zope import (
14 interface as zi,
13 interface as zi,
15 )
14 )
16 from . import (
15 from . import (
17 error,
16 error,
18 mdiff,
17 mdiff,
19 repository,
18 repository,
20 revlog,
19 revlog,
21 )
20 )
22
21
23 _mdre = re.compile('\1\n')
24 def parsemeta(text):
25 """return (metadatadict, metadatasize)"""
26 # text can be buffer, so we can't use .startswith or .index
27 if text[:2] != '\1\n':
28 return None, None
29 s = _mdre.search(text, 2).start()
30 mtext = text[2:s]
31 meta = {}
32 for l in mtext.splitlines():
33 k, v = l.split(": ", 1)
34 meta[k] = v
35 return meta, (s + 2)
36
37 def packmeta(meta, text):
38 keys = sorted(meta)
39 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
40 return "\1\n%s\1\n%s" % (metatext, text)
41
42 def _censoredtext(text):
22 def _censoredtext(text):
43 m, offs = parsemeta(text)
23 m, offs = revlog.parsemeta(text)
44 return m and "censored" in m
24 return m and "censored" in m
45
25
46 @zi.implementer(repository.ifilestorage)
26 @zi.implementer(repository.ifilestorage)
47 class filelog(revlog.revlog):
27 class filelog(revlog.revlog):
48 def __init__(self, opener, path):
28 def __init__(self, opener, path):
49 super(filelog, self).__init__(opener,
29 super(filelog, self).__init__(opener,
50 "/".join(("data", path + ".i")))
30 "/".join(("data", path + ".i")))
51 # full name of the user visible file, relative to the repository root
31 # full name of the user visible file, relative to the repository root
52 self.filename = path
32 self.filename = path
53
33
54 def read(self, node):
34 def read(self, node):
55 t = self.revision(node)
35 t = self.revision(node)
56 if not t.startswith('\1\n'):
36 if not t.startswith('\1\n'):
57 return t
37 return t
58 s = t.index('\1\n', 2)
38 s = t.index('\1\n', 2)
59 return t[s + 2:]
39 return t[s + 2:]
60
40
61 def add(self, text, meta, transaction, link, p1=None, p2=None):
41 def add(self, text, meta, transaction, link, p1=None, p2=None):
62 if meta or text.startswith('\1\n'):
42 if meta or text.startswith('\1\n'):
63 text = packmeta(meta, text)
43 text = revlog.packmeta(meta, text)
64 return self.addrevision(text, transaction, link, p1, p2)
44 return self.addrevision(text, transaction, link, p1, p2)
65
45
66 def renamed(self, node):
46 def renamed(self, node):
67 if self.parents(node)[0] != revlog.nullid:
47 if self.parents(node)[0] != revlog.nullid:
68 return False
48 return False
69 t = self.revision(node)
49 t = self.revision(node)
70 m = parsemeta(t)[0]
50 m = revlog.parsemeta(t)[0]
71 if m and "copy" in m:
51 if m and "copy" in m:
72 return (m["copy"], revlog.bin(m["copyrev"]))
52 return (m["copy"], revlog.bin(m["copyrev"]))
73 return False
53 return False
74
54
75 def size(self, rev):
55 def size(self, rev):
76 """return the size of a given revision"""
56 """return the size of a given revision"""
77
57
78 # for revisions with renames, we have to go the slow way
58 # for revisions with renames, we have to go the slow way
79 node = self.node(rev)
59 node = self.node(rev)
80 if self.renamed(node):
60 if self.renamed(node):
81 return len(self.read(node))
61 return len(self.read(node))
82 if self.iscensored(rev):
62 if self.iscensored(rev):
83 return 0
63 return 0
84
64
85 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
65 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
86 return super(filelog, self).size(rev)
66 return super(filelog, self).size(rev)
87
67
88 def cmp(self, node, text):
68 def cmp(self, node, text):
89 """compare text with a given file revision
69 """compare text with a given file revision
90
70
91 returns True if text is different than what is stored.
71 returns True if text is different than what is stored.
92 """
72 """
93
73
94 t = text
74 t = text
95 if text.startswith('\1\n'):
75 if text.startswith('\1\n'):
96 t = '\1\n\1\n' + text
76 t = '\1\n\1\n' + text
97
77
98 samehashes = not super(filelog, self).cmp(node, t)
78 samehashes = not super(filelog, self).cmp(node, t)
99 if samehashes:
79 if samehashes:
100 return False
80 return False
101
81
102 # censored files compare against the empty file
82 # censored files compare against the empty file
103 if self.iscensored(self.rev(node)):
83 if self.iscensored(self.rev(node)):
104 return text != ''
84 return text != ''
105
85
106 # renaming a file produces a different hash, even if the data
86 # renaming a file produces a different hash, even if the data
107 # remains unchanged. Check if it's the case (slow):
87 # remains unchanged. Check if it's the case (slow):
108 if self.renamed(node):
88 if self.renamed(node):
109 t2 = self.read(node)
89 t2 = self.read(node)
110 return t2 != text
90 return t2 != text
111
91
112 return True
92 return True
113
93
114 def checkhash(self, text, node, p1=None, p2=None, rev=None):
94 def checkhash(self, text, node, p1=None, p2=None, rev=None):
115 try:
95 try:
116 super(filelog, self).checkhash(text, node, p1=p1, p2=p2, rev=rev)
96 super(filelog, self).checkhash(text, node, p1=p1, p2=p2, rev=rev)
117 except error.RevlogError:
97 except error.RevlogError:
118 if _censoredtext(text):
98 if _censoredtext(text):
119 raise error.CensoredNodeError(self.indexfile, node, text)
99 raise error.CensoredNodeError(self.indexfile, node, text)
120 raise
100 raise
121
101
122 def iscensored(self, rev):
102 def iscensored(self, rev):
123 """Check if a file revision is censored."""
103 """Check if a file revision is censored."""
124 return self.flags(rev) & revlog.REVIDX_ISCENSORED
104 return self.flags(rev) & revlog.REVIDX_ISCENSORED
125
105
126 def _peek_iscensored(self, baserev, delta, flush):
106 def _peek_iscensored(self, baserev, delta, flush):
127 """Quickly check if a delta produces a censored revision."""
107 """Quickly check if a delta produces a censored revision."""
128 # Fragile heuristic: unless new file meta keys are added alphabetically
108 # Fragile heuristic: unless new file meta keys are added alphabetically
129 # preceding "censored", all censored revisions are prefixed by
109 # preceding "censored", all censored revisions are prefixed by
130 # "\1\ncensored:". A delta producing such a censored revision must be a
110 # "\1\ncensored:". A delta producing such a censored revision must be a
131 # full-replacement delta, so we inspect the first and only patch in the
111 # full-replacement delta, so we inspect the first and only patch in the
132 # delta for this prefix.
112 # delta for this prefix.
133 hlen = struct.calcsize(">lll")
113 hlen = struct.calcsize(">lll")
134 if len(delta) <= hlen:
114 if len(delta) <= hlen:
135 return False
115 return False
136
116
137 oldlen = self.rawsize(baserev)
117 oldlen = self.rawsize(baserev)
138 newlen = len(delta) - hlen
118 newlen = len(delta) - hlen
139 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
119 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
140 return False
120 return False
141
121
142 add = "\1\ncensored:"
122 add = "\1\ncensored:"
143 addlen = len(add)
123 addlen = len(add)
144 return newlen >= addlen and delta[hlen:hlen + addlen] == add
124 return newlen >= addlen and delta[hlen:hlen + addlen] == add
@@ -1,2534 +1,2554 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import hashlib
19 import hashlib
20 import heapq
20 import heapq
21 import os
21 import os
22 import re
22 import struct
23 import struct
23 import zlib
24 import zlib
24
25
25 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
26 from .node import (
27 from .node import (
27 bin,
28 bin,
28 hex,
29 hex,
29 nullid,
30 nullid,
30 nullrev,
31 nullrev,
31 wdirhex,
32 wdirhex,
32 wdirid,
33 wdirid,
33 wdirrev,
34 wdirrev,
34 )
35 )
35 from .i18n import _
36 from .i18n import _
36 from .thirdparty import (
37 from .thirdparty import (
37 attr,
38 attr,
38 )
39 )
39 from . import (
40 from . import (
40 ancestor,
41 ancestor,
41 error,
42 error,
42 mdiff,
43 mdiff,
43 policy,
44 policy,
44 pycompat,
45 pycompat,
45 templatefilters,
46 templatefilters,
46 util,
47 util,
47 )
48 )
48 from .utils import (
49 from .utils import (
49 stringutil,
50 stringutil,
50 )
51 )
51
52
52 parsers = policy.importmod(r'parsers')
53 parsers = policy.importmod(r'parsers')
53
54
54 # Aliased for performance.
55 # Aliased for performance.
55 _zlibdecompress = zlib.decompress
56 _zlibdecompress = zlib.decompress
56
57
57 # revlog header flags
58 # revlog header flags
58 REVLOGV0 = 0
59 REVLOGV0 = 0
59 REVLOGV1 = 1
60 REVLOGV1 = 1
60 # Dummy value until file format is finalized.
61 # Dummy value until file format is finalized.
61 # Reminder: change the bounds check in revlog.__init__ when this is changed.
62 # Reminder: change the bounds check in revlog.__init__ when this is changed.
62 REVLOGV2 = 0xDEAD
63 REVLOGV2 = 0xDEAD
63 FLAG_INLINE_DATA = (1 << 16)
64 FLAG_INLINE_DATA = (1 << 16)
64 FLAG_GENERALDELTA = (1 << 17)
65 FLAG_GENERALDELTA = (1 << 17)
65 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
66 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
66 REVLOG_DEFAULT_FORMAT = REVLOGV1
67 REVLOG_DEFAULT_FORMAT = REVLOGV1
67 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
68 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
68 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
69 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
69 REVLOGV2_FLAGS = REVLOGV1_FLAGS
70 REVLOGV2_FLAGS = REVLOGV1_FLAGS
70
71
71 # revlog index flags
72 # revlog index flags
72 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
73 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
73 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
74 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
74 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
75 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
75 REVIDX_DEFAULT_FLAGS = 0
76 REVIDX_DEFAULT_FLAGS = 0
76 # stable order in which flags need to be processed and their processors applied
77 # stable order in which flags need to be processed and their processors applied
77 REVIDX_FLAGS_ORDER = [
78 REVIDX_FLAGS_ORDER = [
78 REVIDX_ISCENSORED,
79 REVIDX_ISCENSORED,
79 REVIDX_ELLIPSIS,
80 REVIDX_ELLIPSIS,
80 REVIDX_EXTSTORED,
81 REVIDX_EXTSTORED,
81 ]
82 ]
82 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
83 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
83 # bitmark for flags that could cause rawdata content change
84 # bitmark for flags that could cause rawdata content change
84 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
85 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
85
86
86 # max size of revlog with inline data
87 # max size of revlog with inline data
87 _maxinline = 131072
88 _maxinline = 131072
88 _chunksize = 1048576
89 _chunksize = 1048576
89
90
90 RevlogError = error.RevlogError
91 RevlogError = error.RevlogError
91 LookupError = error.LookupError
92 LookupError = error.LookupError
92 CensoredNodeError = error.CensoredNodeError
93 CensoredNodeError = error.CensoredNodeError
93 ProgrammingError = error.ProgrammingError
94 ProgrammingError = error.ProgrammingError
94
95
95 # Store flag processors (cf. 'addflagprocessor()' to register)
96 # Store flag processors (cf. 'addflagprocessor()' to register)
96 _flagprocessors = {
97 _flagprocessors = {
97 REVIDX_ISCENSORED: None,
98 REVIDX_ISCENSORED: None,
98 }
99 }
99
100
101 _mdre = re.compile('\1\n')
102 def parsemeta(text):
103 """return (metadatadict, metadatasize)"""
104 # text can be buffer, so we can't use .startswith or .index
105 if text[:2] != '\1\n':
106 return None, None
107 s = _mdre.search(text, 2).start()
108 mtext = text[2:s]
109 meta = {}
110 for l in mtext.splitlines():
111 k, v = l.split(": ", 1)
112 meta[k] = v
113 return meta, (s + 2)
114
115 def packmeta(meta, text):
116 keys = sorted(meta)
117 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
118 return "\1\n%s\1\n%s" % (metatext, text)
119
100 def addflagprocessor(flag, processor):
120 def addflagprocessor(flag, processor):
101 """Register a flag processor on a revision data flag.
121 """Register a flag processor on a revision data flag.
102
122
103 Invariant:
123 Invariant:
104 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
124 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
105 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
125 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
106 - Only one flag processor can be registered on a specific flag.
126 - Only one flag processor can be registered on a specific flag.
107 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
127 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
108 following signatures:
128 following signatures:
109 - (read) f(self, rawtext) -> text, bool
129 - (read) f(self, rawtext) -> text, bool
110 - (write) f(self, text) -> rawtext, bool
130 - (write) f(self, text) -> rawtext, bool
111 - (raw) f(self, rawtext) -> bool
131 - (raw) f(self, rawtext) -> bool
112 "text" is presented to the user. "rawtext" is stored in revlog data, not
132 "text" is presented to the user. "rawtext" is stored in revlog data, not
113 directly visible to the user.
133 directly visible to the user.
114 The boolean returned by these transforms is used to determine whether
134 The boolean returned by these transforms is used to determine whether
115 the returned text can be used for hash integrity checking. For example,
135 the returned text can be used for hash integrity checking. For example,
116 if "write" returns False, then "text" is used to generate hash. If
136 if "write" returns False, then "text" is used to generate hash. If
117 "write" returns True, that basically means "rawtext" returned by "write"
137 "write" returns True, that basically means "rawtext" returned by "write"
118 should be used to generate hash. Usually, "write" and "read" return
138 should be used to generate hash. Usually, "write" and "read" return
119 different booleans. And "raw" returns a same boolean as "write".
139 different booleans. And "raw" returns a same boolean as "write".
120
140
121 Note: The 'raw' transform is used for changegroup generation and in some
141 Note: The 'raw' transform is used for changegroup generation and in some
122 debug commands. In this case the transform only indicates whether the
142 debug commands. In this case the transform only indicates whether the
123 contents can be used for hash integrity checks.
143 contents can be used for hash integrity checks.
124 """
144 """
125 if not flag & REVIDX_KNOWN_FLAGS:
145 if not flag & REVIDX_KNOWN_FLAGS:
126 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
146 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
127 raise ProgrammingError(msg)
147 raise ProgrammingError(msg)
128 if flag not in REVIDX_FLAGS_ORDER:
148 if flag not in REVIDX_FLAGS_ORDER:
129 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
149 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
130 raise ProgrammingError(msg)
150 raise ProgrammingError(msg)
131 if flag in _flagprocessors:
151 if flag in _flagprocessors:
132 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
152 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
133 raise error.Abort(msg)
153 raise error.Abort(msg)
134 _flagprocessors[flag] = processor
154 _flagprocessors[flag] = processor
135
155
136 def getoffset(q):
156 def getoffset(q):
137 return int(q >> 16)
157 return int(q >> 16)
138
158
139 def gettype(q):
159 def gettype(q):
140 return int(q & 0xFFFF)
160 return int(q & 0xFFFF)
141
161
142 def offset_type(offset, type):
162 def offset_type(offset, type):
143 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
163 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
144 raise ValueError('unknown revlog index flags')
164 raise ValueError('unknown revlog index flags')
145 return int(int(offset) << 16 | type)
165 return int(int(offset) << 16 | type)
146
166
147 _nullhash = hashlib.sha1(nullid)
167 _nullhash = hashlib.sha1(nullid)
148
168
149 def hash(text, p1, p2):
169 def hash(text, p1, p2):
150 """generate a hash from the given text and its parent hashes
170 """generate a hash from the given text and its parent hashes
151
171
152 This hash combines both the current file contents and its history
172 This hash combines both the current file contents and its history
153 in a manner that makes it easy to distinguish nodes with the same
173 in a manner that makes it easy to distinguish nodes with the same
154 content in the revision graph.
174 content in the revision graph.
155 """
175 """
156 # As of now, if one of the parent node is null, p2 is null
176 # As of now, if one of the parent node is null, p2 is null
157 if p2 == nullid:
177 if p2 == nullid:
158 # deep copy of a hash is faster than creating one
178 # deep copy of a hash is faster than creating one
159 s = _nullhash.copy()
179 s = _nullhash.copy()
160 s.update(p1)
180 s.update(p1)
161 else:
181 else:
162 # none of the parent nodes are nullid
182 # none of the parent nodes are nullid
163 if p1 < p2:
183 if p1 < p2:
164 a = p1
184 a = p1
165 b = p2
185 b = p2
166 else:
186 else:
167 a = p2
187 a = p2
168 b = p1
188 b = p1
169 s = hashlib.sha1(a)
189 s = hashlib.sha1(a)
170 s.update(b)
190 s.update(b)
171 s.update(text)
191 s.update(text)
172 return s.digest()
192 return s.digest()
173
193
174 def _trimchunk(revlog, revs, startidx, endidx=None):
194 def _trimchunk(revlog, revs, startidx, endidx=None):
175 """returns revs[startidx:endidx] without empty trailing revs
195 """returns revs[startidx:endidx] without empty trailing revs
176 """
196 """
177 length = revlog.length
197 length = revlog.length
178
198
179 if endidx is None:
199 if endidx is None:
180 endidx = len(revs)
200 endidx = len(revs)
181
201
182 # Trim empty revs at the end, but never the very first revision of a chain
202 # Trim empty revs at the end, but never the very first revision of a chain
183 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
203 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
184 endidx -= 1
204 endidx -= 1
185
205
186 return revs[startidx:endidx]
206 return revs[startidx:endidx]
187
207
188 def _slicechunk(revlog, revs):
208 def _slicechunk(revlog, revs):
189 """slice revs to reduce the amount of unrelated data to be read from disk.
209 """slice revs to reduce the amount of unrelated data to be read from disk.
190
210
191 ``revs`` is sliced into groups that should be read in one time.
211 ``revs`` is sliced into groups that should be read in one time.
192 Assume that revs are sorted.
212 Assume that revs are sorted.
193 """
213 """
194 start = revlog.start
214 start = revlog.start
195 length = revlog.length
215 length = revlog.length
196
216
197 if len(revs) <= 1:
217 if len(revs) <= 1:
198 yield revs
218 yield revs
199 return
219 return
200
220
201 startbyte = start(revs[0])
221 startbyte = start(revs[0])
202 endbyte = start(revs[-1]) + length(revs[-1])
222 endbyte = start(revs[-1]) + length(revs[-1])
203 readdata = deltachainspan = endbyte - startbyte
223 readdata = deltachainspan = endbyte - startbyte
204
224
205 chainpayload = sum(length(r) for r in revs)
225 chainpayload = sum(length(r) for r in revs)
206
226
207 if deltachainspan:
227 if deltachainspan:
208 density = chainpayload / float(deltachainspan)
228 density = chainpayload / float(deltachainspan)
209 else:
229 else:
210 density = 1.0
230 density = 1.0
211
231
212 # Store the gaps in a heap to have them sorted by decreasing size
232 # Store the gaps in a heap to have them sorted by decreasing size
213 gapsheap = []
233 gapsheap = []
214 heapq.heapify(gapsheap)
234 heapq.heapify(gapsheap)
215 prevend = None
235 prevend = None
216 for i, rev in enumerate(revs):
236 for i, rev in enumerate(revs):
217 revstart = start(rev)
237 revstart = start(rev)
218 revlen = length(rev)
238 revlen = length(rev)
219
239
220 # Skip empty revisions to form larger holes
240 # Skip empty revisions to form larger holes
221 if revlen == 0:
241 if revlen == 0:
222 continue
242 continue
223
243
224 if prevend is not None:
244 if prevend is not None:
225 gapsize = revstart - prevend
245 gapsize = revstart - prevend
226 # only consider holes that are large enough
246 # only consider holes that are large enough
227 if gapsize > revlog._srmingapsize:
247 if gapsize > revlog._srmingapsize:
228 heapq.heappush(gapsheap, (-gapsize, i))
248 heapq.heappush(gapsheap, (-gapsize, i))
229
249
230 prevend = revstart + revlen
250 prevend = revstart + revlen
231
251
232 # Collect the indices of the largest holes until the density is acceptable
252 # Collect the indices of the largest holes until the density is acceptable
233 indicesheap = []
253 indicesheap = []
234 heapq.heapify(indicesheap)
254 heapq.heapify(indicesheap)
235 while gapsheap and density < revlog._srdensitythreshold:
255 while gapsheap and density < revlog._srdensitythreshold:
236 oppgapsize, gapidx = heapq.heappop(gapsheap)
256 oppgapsize, gapidx = heapq.heappop(gapsheap)
237
257
238 heapq.heappush(indicesheap, gapidx)
258 heapq.heappush(indicesheap, gapidx)
239
259
240 # the gap sizes are stored as negatives to be sorted decreasingly
260 # the gap sizes are stored as negatives to be sorted decreasingly
241 # by the heap
261 # by the heap
242 readdata -= (-oppgapsize)
262 readdata -= (-oppgapsize)
243 if readdata > 0:
263 if readdata > 0:
244 density = chainpayload / float(readdata)
264 density = chainpayload / float(readdata)
245 else:
265 else:
246 density = 1.0
266 density = 1.0
247
267
248 # Cut the revs at collected indices
268 # Cut the revs at collected indices
249 previdx = 0
269 previdx = 0
250 while indicesheap:
270 while indicesheap:
251 idx = heapq.heappop(indicesheap)
271 idx = heapq.heappop(indicesheap)
252
272
253 chunk = _trimchunk(revlog, revs, previdx, idx)
273 chunk = _trimchunk(revlog, revs, previdx, idx)
254 if chunk:
274 if chunk:
255 yield chunk
275 yield chunk
256
276
257 previdx = idx
277 previdx = idx
258
278
259 chunk = _trimchunk(revlog, revs, previdx)
279 chunk = _trimchunk(revlog, revs, previdx)
260 if chunk:
280 if chunk:
261 yield chunk
281 yield chunk
262
282
263 @attr.s(slots=True, frozen=True)
283 @attr.s(slots=True, frozen=True)
264 class _deltainfo(object):
284 class _deltainfo(object):
265 distance = attr.ib()
285 distance = attr.ib()
266 deltalen = attr.ib()
286 deltalen = attr.ib()
267 data = attr.ib()
287 data = attr.ib()
268 base = attr.ib()
288 base = attr.ib()
269 chainbase = attr.ib()
289 chainbase = attr.ib()
270 chainlen = attr.ib()
290 chainlen = attr.ib()
271 compresseddeltalen = attr.ib()
291 compresseddeltalen = attr.ib()
272
292
273 class _deltacomputer(object):
293 class _deltacomputer(object):
274 def __init__(self, revlog):
294 def __init__(self, revlog):
275 self.revlog = revlog
295 self.revlog = revlog
276
296
277 def _getcandidaterevs(self, p1, p2, cachedelta):
297 def _getcandidaterevs(self, p1, p2, cachedelta):
278 """
298 """
279 Provides revisions that present an interest to be diffed against,
299 Provides revisions that present an interest to be diffed against,
280 grouped by level of easiness.
300 grouped by level of easiness.
281 """
301 """
282 revlog = self.revlog
302 revlog = self.revlog
283 curr = len(revlog)
303 curr = len(revlog)
284 prev = curr - 1
304 prev = curr - 1
285 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
305 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
286
306
287 # should we try to build a delta?
307 # should we try to build a delta?
288 if prev != nullrev and revlog.storedeltachains:
308 if prev != nullrev and revlog.storedeltachains:
289 tested = set()
309 tested = set()
290 # This condition is true most of the time when processing
310 # This condition is true most of the time when processing
291 # changegroup data into a generaldelta repo. The only time it
311 # changegroup data into a generaldelta repo. The only time it
292 # isn't true is if this is the first revision in a delta chain
312 # isn't true is if this is the first revision in a delta chain
293 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
313 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
294 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
314 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
295 # Assume what we received from the server is a good choice
315 # Assume what we received from the server is a good choice
296 # build delta will reuse the cache
316 # build delta will reuse the cache
297 yield (cachedelta[0],)
317 yield (cachedelta[0],)
298 tested.add(cachedelta[0])
318 tested.add(cachedelta[0])
299
319
300 if revlog._generaldelta:
320 if revlog._generaldelta:
301 # exclude already lazy tested base if any
321 # exclude already lazy tested base if any
302 parents = [p for p in (p1r, p2r)
322 parents = [p for p in (p1r, p2r)
303 if p != nullrev and p not in tested]
323 if p != nullrev and p not in tested]
304 if parents and not revlog._aggressivemergedeltas:
324 if parents and not revlog._aggressivemergedeltas:
305 # Pick whichever parent is closer to us (to minimize the
325 # Pick whichever parent is closer to us (to minimize the
306 # chance of having to build a fulltext).
326 # chance of having to build a fulltext).
307 parents = [max(parents)]
327 parents = [max(parents)]
308 tested.update(parents)
328 tested.update(parents)
309 yield parents
329 yield parents
310
330
311 if prev not in tested:
331 if prev not in tested:
312 # other approach failed try against prev to hopefully save us a
332 # other approach failed try against prev to hopefully save us a
313 # fulltext.
333 # fulltext.
314 yield (prev,)
334 yield (prev,)
315
335
316 def buildtext(self, revinfo, fh):
336 def buildtext(self, revinfo, fh):
317 """Builds a fulltext version of a revision
337 """Builds a fulltext version of a revision
318
338
319 revinfo: _revisioninfo instance that contains all needed info
339 revinfo: _revisioninfo instance that contains all needed info
320 fh: file handle to either the .i or the .d revlog file,
340 fh: file handle to either the .i or the .d revlog file,
321 depending on whether it is inlined or not
341 depending on whether it is inlined or not
322 """
342 """
323 btext = revinfo.btext
343 btext = revinfo.btext
324 if btext[0] is not None:
344 if btext[0] is not None:
325 return btext[0]
345 return btext[0]
326
346
327 revlog = self.revlog
347 revlog = self.revlog
328 cachedelta = revinfo.cachedelta
348 cachedelta = revinfo.cachedelta
329 flags = revinfo.flags
349 flags = revinfo.flags
330 node = revinfo.node
350 node = revinfo.node
331
351
332 baserev = cachedelta[0]
352 baserev = cachedelta[0]
333 delta = cachedelta[1]
353 delta = cachedelta[1]
334 # special case deltas which replace entire base; no need to decode
354 # special case deltas which replace entire base; no need to decode
335 # base revision. this neatly avoids censored bases, which throw when
355 # base revision. this neatly avoids censored bases, which throw when
336 # they're decoded.
356 # they're decoded.
337 hlen = struct.calcsize(">lll")
357 hlen = struct.calcsize(">lll")
338 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
358 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
339 len(delta) - hlen):
359 len(delta) - hlen):
340 btext[0] = delta[hlen:]
360 btext[0] = delta[hlen:]
341 else:
361 else:
342 # deltabase is rawtext before changed by flag processors, which is
362 # deltabase is rawtext before changed by flag processors, which is
343 # equivalent to non-raw text
363 # equivalent to non-raw text
344 basetext = revlog.revision(baserev, _df=fh, raw=False)
364 basetext = revlog.revision(baserev, _df=fh, raw=False)
345 btext[0] = mdiff.patch(basetext, delta)
365 btext[0] = mdiff.patch(basetext, delta)
346
366
347 try:
367 try:
348 res = revlog._processflags(btext[0], flags, 'read', raw=True)
368 res = revlog._processflags(btext[0], flags, 'read', raw=True)
349 btext[0], validatehash = res
369 btext[0], validatehash = res
350 if validatehash:
370 if validatehash:
351 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
371 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
352 if flags & REVIDX_ISCENSORED:
372 if flags & REVIDX_ISCENSORED:
353 raise RevlogError(_('node %s is not censored') % node)
373 raise RevlogError(_('node %s is not censored') % node)
354 except CensoredNodeError:
374 except CensoredNodeError:
355 # must pass the censored index flag to add censored revisions
375 # must pass the censored index flag to add censored revisions
356 if not flags & REVIDX_ISCENSORED:
376 if not flags & REVIDX_ISCENSORED:
357 raise
377 raise
358 return btext[0]
378 return btext[0]
359
379
360 def _builddeltadiff(self, base, revinfo, fh):
380 def _builddeltadiff(self, base, revinfo, fh):
361 revlog = self.revlog
381 revlog = self.revlog
362 t = self.buildtext(revinfo, fh)
382 t = self.buildtext(revinfo, fh)
363 if revlog.iscensored(base):
383 if revlog.iscensored(base):
364 # deltas based on a censored revision must replace the
384 # deltas based on a censored revision must replace the
365 # full content in one patch, so delta works everywhere
385 # full content in one patch, so delta works everywhere
366 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
386 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
367 delta = header + t
387 delta = header + t
368 else:
388 else:
369 ptext = revlog.revision(base, _df=fh, raw=True)
389 ptext = revlog.revision(base, _df=fh, raw=True)
370 delta = mdiff.textdiff(ptext, t)
390 delta = mdiff.textdiff(ptext, t)
371
391
372 return delta
392 return delta
373
393
374 def _builddeltainfo(self, revinfo, base, fh):
394 def _builddeltainfo(self, revinfo, base, fh):
375 # can we use the cached delta?
395 # can we use the cached delta?
376 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
396 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
377 delta = revinfo.cachedelta[1]
397 delta = revinfo.cachedelta[1]
378 else:
398 else:
379 delta = self._builddeltadiff(base, revinfo, fh)
399 delta = self._builddeltadiff(base, revinfo, fh)
380 revlog = self.revlog
400 revlog = self.revlog
381 header, data = revlog.compress(delta)
401 header, data = revlog.compress(delta)
382 deltalen = len(header) + len(data)
402 deltalen = len(header) + len(data)
383 chainbase = revlog.chainbase(base)
403 chainbase = revlog.chainbase(base)
384 offset = revlog.end(len(revlog) - 1)
404 offset = revlog.end(len(revlog) - 1)
385 dist = deltalen + offset - revlog.start(chainbase)
405 dist = deltalen + offset - revlog.start(chainbase)
386 if revlog._generaldelta:
406 if revlog._generaldelta:
387 deltabase = base
407 deltabase = base
388 else:
408 else:
389 deltabase = chainbase
409 deltabase = chainbase
390 chainlen, compresseddeltalen = revlog._chaininfo(base)
410 chainlen, compresseddeltalen = revlog._chaininfo(base)
391 chainlen += 1
411 chainlen += 1
392 compresseddeltalen += deltalen
412 compresseddeltalen += deltalen
393 return _deltainfo(dist, deltalen, (header, data), deltabase,
413 return _deltainfo(dist, deltalen, (header, data), deltabase,
394 chainbase, chainlen, compresseddeltalen)
414 chainbase, chainlen, compresseddeltalen)
395
415
396 def finddeltainfo(self, revinfo, fh):
416 def finddeltainfo(self, revinfo, fh):
397 """Find an acceptable delta against a candidate revision
417 """Find an acceptable delta against a candidate revision
398
418
399 revinfo: information about the revision (instance of _revisioninfo)
419 revinfo: information about the revision (instance of _revisioninfo)
400 fh: file handle to either the .i or the .d revlog file,
420 fh: file handle to either the .i or the .d revlog file,
401 depending on whether it is inlined or not
421 depending on whether it is inlined or not
402
422
403 Returns the first acceptable candidate revision, as ordered by
423 Returns the first acceptable candidate revision, as ordered by
404 _getcandidaterevs
424 _getcandidaterevs
405 """
425 """
406 cachedelta = revinfo.cachedelta
426 cachedelta = revinfo.cachedelta
407 p1 = revinfo.p1
427 p1 = revinfo.p1
408 p2 = revinfo.p2
428 p2 = revinfo.p2
409 revlog = self.revlog
429 revlog = self.revlog
410
430
411 deltainfo = None
431 deltainfo = None
412 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
432 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
413 nominateddeltas = []
433 nominateddeltas = []
414 for candidaterev in candidaterevs:
434 for candidaterev in candidaterevs:
415 # no delta for rawtext-changing revs (see "candelta" for why)
435 # no delta for rawtext-changing revs (see "candelta" for why)
416 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
436 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
417 continue
437 continue
418 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
438 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
419 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
439 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
420 nominateddeltas.append(candidatedelta)
440 nominateddeltas.append(candidatedelta)
421 if nominateddeltas:
441 if nominateddeltas:
422 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
442 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
423 break
443 break
424
444
425 return deltainfo
445 return deltainfo
426
446
427 @attr.s(slots=True, frozen=True)
447 @attr.s(slots=True, frozen=True)
428 class _revisioninfo(object):
448 class _revisioninfo(object):
429 """Information about a revision that allows building its fulltext
449 """Information about a revision that allows building its fulltext
430 node: expected hash of the revision
450 node: expected hash of the revision
431 p1, p2: parent revs of the revision
451 p1, p2: parent revs of the revision
432 btext: built text cache consisting of a one-element list
452 btext: built text cache consisting of a one-element list
433 cachedelta: (baserev, uncompressed_delta) or None
453 cachedelta: (baserev, uncompressed_delta) or None
434 flags: flags associated to the revision storage
454 flags: flags associated to the revision storage
435
455
436 One of btext[0] or cachedelta must be set.
456 One of btext[0] or cachedelta must be set.
437 """
457 """
438 node = attr.ib()
458 node = attr.ib()
439 p1 = attr.ib()
459 p1 = attr.ib()
440 p2 = attr.ib()
460 p2 = attr.ib()
441 btext = attr.ib()
461 btext = attr.ib()
442 textlen = attr.ib()
462 textlen = attr.ib()
443 cachedelta = attr.ib()
463 cachedelta = attr.ib()
444 flags = attr.ib()
464 flags = attr.ib()
445
465
446 # index v0:
466 # index v0:
447 # 4 bytes: offset
467 # 4 bytes: offset
448 # 4 bytes: compressed length
468 # 4 bytes: compressed length
449 # 4 bytes: base rev
469 # 4 bytes: base rev
450 # 4 bytes: link rev
470 # 4 bytes: link rev
451 # 20 bytes: parent 1 nodeid
471 # 20 bytes: parent 1 nodeid
452 # 20 bytes: parent 2 nodeid
472 # 20 bytes: parent 2 nodeid
453 # 20 bytes: nodeid
473 # 20 bytes: nodeid
454 indexformatv0 = struct.Struct(">4l20s20s20s")
474 indexformatv0 = struct.Struct(">4l20s20s20s")
455 indexformatv0_pack = indexformatv0.pack
475 indexformatv0_pack = indexformatv0.pack
456 indexformatv0_unpack = indexformatv0.unpack
476 indexformatv0_unpack = indexformatv0.unpack
457
477
458 class revlogoldio(object):
478 class revlogoldio(object):
459 def __init__(self):
479 def __init__(self):
460 self.size = indexformatv0.size
480 self.size = indexformatv0.size
461
481
462 def parseindex(self, data, inline):
482 def parseindex(self, data, inline):
463 s = self.size
483 s = self.size
464 index = []
484 index = []
465 nodemap = {nullid: nullrev}
485 nodemap = {nullid: nullrev}
466 n = off = 0
486 n = off = 0
467 l = len(data)
487 l = len(data)
468 while off + s <= l:
488 while off + s <= l:
469 cur = data[off:off + s]
489 cur = data[off:off + s]
470 off += s
490 off += s
471 e = indexformatv0_unpack(cur)
491 e = indexformatv0_unpack(cur)
472 # transform to revlogv1 format
492 # transform to revlogv1 format
473 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
493 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
474 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
494 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
475 index.append(e2)
495 index.append(e2)
476 nodemap[e[6]] = n
496 nodemap[e[6]] = n
477 n += 1
497 n += 1
478
498
479 # add the magic null revision at -1
499 # add the magic null revision at -1
480 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
500 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
481
501
482 return index, nodemap, None
502 return index, nodemap, None
483
503
484 def packentry(self, entry, node, version, rev):
504 def packentry(self, entry, node, version, rev):
485 if gettype(entry[0]):
505 if gettype(entry[0]):
486 raise RevlogError(_('index entry flags need revlog version 1'))
506 raise RevlogError(_('index entry flags need revlog version 1'))
487 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
507 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
488 node(entry[5]), node(entry[6]), entry[7])
508 node(entry[5]), node(entry[6]), entry[7])
489 return indexformatv0_pack(*e2)
509 return indexformatv0_pack(*e2)
490
510
491 # index ng:
511 # index ng:
492 # 6 bytes: offset
512 # 6 bytes: offset
493 # 2 bytes: flags
513 # 2 bytes: flags
494 # 4 bytes: compressed length
514 # 4 bytes: compressed length
495 # 4 bytes: uncompressed length
515 # 4 bytes: uncompressed length
496 # 4 bytes: base rev
516 # 4 bytes: base rev
497 # 4 bytes: link rev
517 # 4 bytes: link rev
498 # 4 bytes: parent 1 rev
518 # 4 bytes: parent 1 rev
499 # 4 bytes: parent 2 rev
519 # 4 bytes: parent 2 rev
500 # 32 bytes: nodeid
520 # 32 bytes: nodeid
501 indexformatng = struct.Struct(">Qiiiiii20s12x")
521 indexformatng = struct.Struct(">Qiiiiii20s12x")
502 indexformatng_pack = indexformatng.pack
522 indexformatng_pack = indexformatng.pack
503 versionformat = struct.Struct(">I")
523 versionformat = struct.Struct(">I")
504 versionformat_pack = versionformat.pack
524 versionformat_pack = versionformat.pack
505 versionformat_unpack = versionformat.unpack
525 versionformat_unpack = versionformat.unpack
506
526
507 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
527 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
508 # signed integer)
528 # signed integer)
509 _maxentrysize = 0x7fffffff
529 _maxentrysize = 0x7fffffff
510
530
511 class revlogio(object):
531 class revlogio(object):
512 def __init__(self):
532 def __init__(self):
513 self.size = indexformatng.size
533 self.size = indexformatng.size
514
534
515 def parseindex(self, data, inline):
535 def parseindex(self, data, inline):
516 # call the C implementation to parse the index data
536 # call the C implementation to parse the index data
517 index, cache = parsers.parse_index2(data, inline)
537 index, cache = parsers.parse_index2(data, inline)
518 return index, getattr(index, 'nodemap', None), cache
538 return index, getattr(index, 'nodemap', None), cache
519
539
520 def packentry(self, entry, node, version, rev):
540 def packentry(self, entry, node, version, rev):
521 p = indexformatng_pack(*entry)
541 p = indexformatng_pack(*entry)
522 if rev == 0:
542 if rev == 0:
523 p = versionformat_pack(version) + p[4:]
543 p = versionformat_pack(version) + p[4:]
524 return p
544 return p
525
545
526 class revlog(object):
546 class revlog(object):
527 """
547 """
528 the underlying revision storage object
548 the underlying revision storage object
529
549
530 A revlog consists of two parts, an index and the revision data.
550 A revlog consists of two parts, an index and the revision data.
531
551
532 The index is a file with a fixed record size containing
552 The index is a file with a fixed record size containing
533 information on each revision, including its nodeid (hash), the
553 information on each revision, including its nodeid (hash), the
534 nodeids of its parents, the position and offset of its data within
554 nodeids of its parents, the position and offset of its data within
535 the data file, and the revision it's based on. Finally, each entry
555 the data file, and the revision it's based on. Finally, each entry
536 contains a linkrev entry that can serve as a pointer to external
556 contains a linkrev entry that can serve as a pointer to external
537 data.
557 data.
538
558
539 The revision data itself is a linear collection of data chunks.
559 The revision data itself is a linear collection of data chunks.
540 Each chunk represents a revision and is usually represented as a
560 Each chunk represents a revision and is usually represented as a
541 delta against the previous chunk. To bound lookup time, runs of
561 delta against the previous chunk. To bound lookup time, runs of
542 deltas are limited to about 2 times the length of the original
562 deltas are limited to about 2 times the length of the original
543 version data. This makes retrieval of a version proportional to
563 version data. This makes retrieval of a version proportional to
544 its size, or O(1) relative to the number of revisions.
564 its size, or O(1) relative to the number of revisions.
545
565
546 Both pieces of the revlog are written to in an append-only
566 Both pieces of the revlog are written to in an append-only
547 fashion, which means we never need to rewrite a file to insert or
567 fashion, which means we never need to rewrite a file to insert or
548 remove data, and can use some simple techniques to avoid the need
568 remove data, and can use some simple techniques to avoid the need
549 for locking while reading.
569 for locking while reading.
550
570
551 If checkambig, indexfile is opened with checkambig=True at
571 If checkambig, indexfile is opened with checkambig=True at
552 writing, to avoid file stat ambiguity.
572 writing, to avoid file stat ambiguity.
553
573
554 If mmaplargeindex is True, and an mmapindexthreshold is set, the
574 If mmaplargeindex is True, and an mmapindexthreshold is set, the
555 index will be mmapped rather than read if it is larger than the
575 index will be mmapped rather than read if it is larger than the
556 configured threshold.
576 configured threshold.
557 """
577 """
558 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
578 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
559 mmaplargeindex=False):
579 mmaplargeindex=False):
560 """
580 """
561 create a revlog object
581 create a revlog object
562
582
563 opener is a function that abstracts the file opening operation
583 opener is a function that abstracts the file opening operation
564 and can be used to implement COW semantics or the like.
584 and can be used to implement COW semantics or the like.
565 """
585 """
566 self.indexfile = indexfile
586 self.indexfile = indexfile
567 self.datafile = datafile or (indexfile[:-2] + ".d")
587 self.datafile = datafile or (indexfile[:-2] + ".d")
568 self.opener = opener
588 self.opener = opener
569 # When True, indexfile is opened with checkambig=True at writing, to
589 # When True, indexfile is opened with checkambig=True at writing, to
570 # avoid file stat ambiguity.
590 # avoid file stat ambiguity.
571 self._checkambig = checkambig
591 self._checkambig = checkambig
572 # 3-tuple of (node, rev, text) for a raw revision.
592 # 3-tuple of (node, rev, text) for a raw revision.
573 self._cache = None
593 self._cache = None
574 # Maps rev to chain base rev.
594 # Maps rev to chain base rev.
575 self._chainbasecache = util.lrucachedict(100)
595 self._chainbasecache = util.lrucachedict(100)
576 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
596 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
577 self._chunkcache = (0, '')
597 self._chunkcache = (0, '')
578 # How much data to read and cache into the raw revlog data cache.
598 # How much data to read and cache into the raw revlog data cache.
579 self._chunkcachesize = 65536
599 self._chunkcachesize = 65536
580 self._maxchainlen = None
600 self._maxchainlen = None
581 self._aggressivemergedeltas = False
601 self._aggressivemergedeltas = False
582 self.index = []
602 self.index = []
583 # Mapping of partial identifiers to full nodes.
603 # Mapping of partial identifiers to full nodes.
584 self._pcache = {}
604 self._pcache = {}
585 # Mapping of revision integer to full node.
605 # Mapping of revision integer to full node.
586 self._nodecache = {nullid: nullrev}
606 self._nodecache = {nullid: nullrev}
587 self._nodepos = None
607 self._nodepos = None
588 self._compengine = 'zlib'
608 self._compengine = 'zlib'
589 self._maxdeltachainspan = -1
609 self._maxdeltachainspan = -1
590 self._withsparseread = False
610 self._withsparseread = False
591 self._srdensitythreshold = 0.25
611 self._srdensitythreshold = 0.25
592 self._srmingapsize = 262144
612 self._srmingapsize = 262144
593
613
594 mmapindexthreshold = None
614 mmapindexthreshold = None
595 v = REVLOG_DEFAULT_VERSION
615 v = REVLOG_DEFAULT_VERSION
596 opts = getattr(opener, 'options', None)
616 opts = getattr(opener, 'options', None)
597 if opts is not None:
617 if opts is not None:
598 if 'revlogv2' in opts:
618 if 'revlogv2' in opts:
599 # version 2 revlogs always use generaldelta.
619 # version 2 revlogs always use generaldelta.
600 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
620 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
601 elif 'revlogv1' in opts:
621 elif 'revlogv1' in opts:
602 if 'generaldelta' in opts:
622 if 'generaldelta' in opts:
603 v |= FLAG_GENERALDELTA
623 v |= FLAG_GENERALDELTA
604 else:
624 else:
605 v = 0
625 v = 0
606 if 'chunkcachesize' in opts:
626 if 'chunkcachesize' in opts:
607 self._chunkcachesize = opts['chunkcachesize']
627 self._chunkcachesize = opts['chunkcachesize']
608 if 'maxchainlen' in opts:
628 if 'maxchainlen' in opts:
609 self._maxchainlen = opts['maxchainlen']
629 self._maxchainlen = opts['maxchainlen']
610 if 'aggressivemergedeltas' in opts:
630 if 'aggressivemergedeltas' in opts:
611 self._aggressivemergedeltas = opts['aggressivemergedeltas']
631 self._aggressivemergedeltas = opts['aggressivemergedeltas']
612 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
632 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
613 if 'compengine' in opts:
633 if 'compengine' in opts:
614 self._compengine = opts['compengine']
634 self._compengine = opts['compengine']
615 if 'maxdeltachainspan' in opts:
635 if 'maxdeltachainspan' in opts:
616 self._maxdeltachainspan = opts['maxdeltachainspan']
636 self._maxdeltachainspan = opts['maxdeltachainspan']
617 if mmaplargeindex and 'mmapindexthreshold' in opts:
637 if mmaplargeindex and 'mmapindexthreshold' in opts:
618 mmapindexthreshold = opts['mmapindexthreshold']
638 mmapindexthreshold = opts['mmapindexthreshold']
619 self._withsparseread = bool(opts.get('with-sparse-read', False))
639 self._withsparseread = bool(opts.get('with-sparse-read', False))
620 if 'sparse-read-density-threshold' in opts:
640 if 'sparse-read-density-threshold' in opts:
621 self._srdensitythreshold = opts['sparse-read-density-threshold']
641 self._srdensitythreshold = opts['sparse-read-density-threshold']
622 if 'sparse-read-min-gap-size' in opts:
642 if 'sparse-read-min-gap-size' in opts:
623 self._srmingapsize = opts['sparse-read-min-gap-size']
643 self._srmingapsize = opts['sparse-read-min-gap-size']
624
644
625 if self._chunkcachesize <= 0:
645 if self._chunkcachesize <= 0:
626 raise RevlogError(_('revlog chunk cache size %r is not greater '
646 raise RevlogError(_('revlog chunk cache size %r is not greater '
627 'than 0') % self._chunkcachesize)
647 'than 0') % self._chunkcachesize)
628 elif self._chunkcachesize & (self._chunkcachesize - 1):
648 elif self._chunkcachesize & (self._chunkcachesize - 1):
629 raise RevlogError(_('revlog chunk cache size %r is not a power '
649 raise RevlogError(_('revlog chunk cache size %r is not a power '
630 'of 2') % self._chunkcachesize)
650 'of 2') % self._chunkcachesize)
631
651
632 indexdata = ''
652 indexdata = ''
633 self._initempty = True
653 self._initempty = True
634 try:
654 try:
635 with self._indexfp() as f:
655 with self._indexfp() as f:
636 if (mmapindexthreshold is not None and
656 if (mmapindexthreshold is not None and
637 self.opener.fstat(f).st_size >= mmapindexthreshold):
657 self.opener.fstat(f).st_size >= mmapindexthreshold):
638 indexdata = util.buffer(util.mmapread(f))
658 indexdata = util.buffer(util.mmapread(f))
639 else:
659 else:
640 indexdata = f.read()
660 indexdata = f.read()
641 if len(indexdata) > 0:
661 if len(indexdata) > 0:
642 v = versionformat_unpack(indexdata[:4])[0]
662 v = versionformat_unpack(indexdata[:4])[0]
643 self._initempty = False
663 self._initempty = False
644 except IOError as inst:
664 except IOError as inst:
645 if inst.errno != errno.ENOENT:
665 if inst.errno != errno.ENOENT:
646 raise
666 raise
647
667
648 self.version = v
668 self.version = v
649 self._inline = v & FLAG_INLINE_DATA
669 self._inline = v & FLAG_INLINE_DATA
650 self._generaldelta = v & FLAG_GENERALDELTA
670 self._generaldelta = v & FLAG_GENERALDELTA
651 flags = v & ~0xFFFF
671 flags = v & ~0xFFFF
652 fmt = v & 0xFFFF
672 fmt = v & 0xFFFF
653 if fmt == REVLOGV0:
673 if fmt == REVLOGV0:
654 if flags:
674 if flags:
655 raise RevlogError(_('unknown flags (%#04x) in version %d '
675 raise RevlogError(_('unknown flags (%#04x) in version %d '
656 'revlog %s') %
676 'revlog %s') %
657 (flags >> 16, fmt, self.indexfile))
677 (flags >> 16, fmt, self.indexfile))
658 elif fmt == REVLOGV1:
678 elif fmt == REVLOGV1:
659 if flags & ~REVLOGV1_FLAGS:
679 if flags & ~REVLOGV1_FLAGS:
660 raise RevlogError(_('unknown flags (%#04x) in version %d '
680 raise RevlogError(_('unknown flags (%#04x) in version %d '
661 'revlog %s') %
681 'revlog %s') %
662 (flags >> 16, fmt, self.indexfile))
682 (flags >> 16, fmt, self.indexfile))
663 elif fmt == REVLOGV2:
683 elif fmt == REVLOGV2:
664 if flags & ~REVLOGV2_FLAGS:
684 if flags & ~REVLOGV2_FLAGS:
665 raise RevlogError(_('unknown flags (%#04x) in version %d '
685 raise RevlogError(_('unknown flags (%#04x) in version %d '
666 'revlog %s') %
686 'revlog %s') %
667 (flags >> 16, fmt, self.indexfile))
687 (flags >> 16, fmt, self.indexfile))
668 else:
688 else:
669 raise RevlogError(_('unknown version (%d) in revlog %s') %
689 raise RevlogError(_('unknown version (%d) in revlog %s') %
670 (fmt, self.indexfile))
690 (fmt, self.indexfile))
671
691
672 self.storedeltachains = True
692 self.storedeltachains = True
673
693
674 self._io = revlogio()
694 self._io = revlogio()
675 if self.version == REVLOGV0:
695 if self.version == REVLOGV0:
676 self._io = revlogoldio()
696 self._io = revlogoldio()
677 try:
697 try:
678 d = self._io.parseindex(indexdata, self._inline)
698 d = self._io.parseindex(indexdata, self._inline)
679 except (ValueError, IndexError):
699 except (ValueError, IndexError):
680 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
700 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
681 self.index, nodemap, self._chunkcache = d
701 self.index, nodemap, self._chunkcache = d
682 if nodemap is not None:
702 if nodemap is not None:
683 self.nodemap = self._nodecache = nodemap
703 self.nodemap = self._nodecache = nodemap
684 if not self._chunkcache:
704 if not self._chunkcache:
685 self._chunkclear()
705 self._chunkclear()
686 # revnum -> (chain-length, sum-delta-length)
706 # revnum -> (chain-length, sum-delta-length)
687 self._chaininfocache = {}
707 self._chaininfocache = {}
688 # revlog header -> revlog compressor
708 # revlog header -> revlog compressor
689 self._decompressors = {}
709 self._decompressors = {}
690
710
691 @util.propertycache
711 @util.propertycache
692 def _compressor(self):
712 def _compressor(self):
693 return util.compengines[self._compengine].revlogcompressor()
713 return util.compengines[self._compengine].revlogcompressor()
694
714
695 def _indexfp(self, mode='r'):
715 def _indexfp(self, mode='r'):
696 """file object for the revlog's index file"""
716 """file object for the revlog's index file"""
697 args = {r'mode': mode}
717 args = {r'mode': mode}
698 if mode != 'r':
718 if mode != 'r':
699 args[r'checkambig'] = self._checkambig
719 args[r'checkambig'] = self._checkambig
700 if mode == 'w':
720 if mode == 'w':
701 args[r'atomictemp'] = True
721 args[r'atomictemp'] = True
702 return self.opener(self.indexfile, **args)
722 return self.opener(self.indexfile, **args)
703
723
704 def _datafp(self, mode='r'):
724 def _datafp(self, mode='r'):
705 """file object for the revlog's data file"""
725 """file object for the revlog's data file"""
706 return self.opener(self.datafile, mode=mode)
726 return self.opener(self.datafile, mode=mode)
707
727
708 @contextlib.contextmanager
728 @contextlib.contextmanager
709 def _datareadfp(self, existingfp=None):
729 def _datareadfp(self, existingfp=None):
710 """file object suitable to read data"""
730 """file object suitable to read data"""
711 if existingfp is not None:
731 if existingfp is not None:
712 yield existingfp
732 yield existingfp
713 else:
733 else:
714 if self._inline:
734 if self._inline:
715 func = self._indexfp
735 func = self._indexfp
716 else:
736 else:
717 func = self._datafp
737 func = self._datafp
718 with func() as fp:
738 with func() as fp:
719 yield fp
739 yield fp
720
740
721 def tip(self):
741 def tip(self):
722 return self.node(len(self.index) - 2)
742 return self.node(len(self.index) - 2)
723 def __contains__(self, rev):
743 def __contains__(self, rev):
724 return 0 <= rev < len(self)
744 return 0 <= rev < len(self)
725 def __len__(self):
745 def __len__(self):
726 return len(self.index) - 1
746 return len(self.index) - 1
727 def __iter__(self):
747 def __iter__(self):
728 return iter(xrange(len(self)))
748 return iter(xrange(len(self)))
729 def revs(self, start=0, stop=None):
749 def revs(self, start=0, stop=None):
730 """iterate over all rev in this revlog (from start to stop)"""
750 """iterate over all rev in this revlog (from start to stop)"""
731 step = 1
751 step = 1
732 if stop is not None:
752 if stop is not None:
733 if start > stop:
753 if start > stop:
734 step = -1
754 step = -1
735 stop += step
755 stop += step
736 else:
756 else:
737 stop = len(self)
757 stop = len(self)
738 return xrange(start, stop, step)
758 return xrange(start, stop, step)
739
759
740 @util.propertycache
760 @util.propertycache
741 def nodemap(self):
761 def nodemap(self):
742 self.rev(self.node(0))
762 self.rev(self.node(0))
743 return self._nodecache
763 return self._nodecache
744
764
745 def hasnode(self, node):
765 def hasnode(self, node):
746 try:
766 try:
747 self.rev(node)
767 self.rev(node)
748 return True
768 return True
749 except KeyError:
769 except KeyError:
750 return False
770 return False
751
771
752 def candelta(self, baserev, rev):
772 def candelta(self, baserev, rev):
753 """whether two revisions (baserev, rev) can be delta-ed or not"""
773 """whether two revisions (baserev, rev) can be delta-ed or not"""
754 # Disable delta if either rev requires a content-changing flag
774 # Disable delta if either rev requires a content-changing flag
755 # processor (ex. LFS). This is because such flag processor can alter
775 # processor (ex. LFS). This is because such flag processor can alter
756 # the rawtext content that the delta will be based on, and two clients
776 # the rawtext content that the delta will be based on, and two clients
757 # could have a same revlog node with different flags (i.e. different
777 # could have a same revlog node with different flags (i.e. different
758 # rawtext contents) and the delta could be incompatible.
778 # rawtext contents) and the delta could be incompatible.
759 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
779 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
760 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
780 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
761 return False
781 return False
762 return True
782 return True
763
783
764 def clearcaches(self):
784 def clearcaches(self):
765 self._cache = None
785 self._cache = None
766 self._chainbasecache.clear()
786 self._chainbasecache.clear()
767 self._chunkcache = (0, '')
787 self._chunkcache = (0, '')
768 self._pcache = {}
788 self._pcache = {}
769
789
770 try:
790 try:
771 self._nodecache.clearcaches()
791 self._nodecache.clearcaches()
772 except AttributeError:
792 except AttributeError:
773 self._nodecache = {nullid: nullrev}
793 self._nodecache = {nullid: nullrev}
774 self._nodepos = None
794 self._nodepos = None
775
795
776 def rev(self, node):
796 def rev(self, node):
777 try:
797 try:
778 return self._nodecache[node]
798 return self._nodecache[node]
779 except TypeError:
799 except TypeError:
780 raise
800 raise
781 except RevlogError:
801 except RevlogError:
782 # parsers.c radix tree lookup failed
802 # parsers.c radix tree lookup failed
783 if node == wdirid:
803 if node == wdirid:
784 raise error.WdirUnsupported
804 raise error.WdirUnsupported
785 raise LookupError(node, self.indexfile, _('no node'))
805 raise LookupError(node, self.indexfile, _('no node'))
786 except KeyError:
806 except KeyError:
787 # pure python cache lookup failed
807 # pure python cache lookup failed
788 n = self._nodecache
808 n = self._nodecache
789 i = self.index
809 i = self.index
790 p = self._nodepos
810 p = self._nodepos
791 if p is None:
811 if p is None:
792 p = len(i) - 2
812 p = len(i) - 2
793 for r in xrange(p, -1, -1):
813 for r in xrange(p, -1, -1):
794 v = i[r][7]
814 v = i[r][7]
795 n[v] = r
815 n[v] = r
796 if v == node:
816 if v == node:
797 self._nodepos = r - 1
817 self._nodepos = r - 1
798 return r
818 return r
799 if node == wdirid:
819 if node == wdirid:
800 raise error.WdirUnsupported
820 raise error.WdirUnsupported
801 raise LookupError(node, self.indexfile, _('no node'))
821 raise LookupError(node, self.indexfile, _('no node'))
802
822
803 # Accessors for index entries.
823 # Accessors for index entries.
804
824
805 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
825 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
806 # are flags.
826 # are flags.
807 def start(self, rev):
827 def start(self, rev):
808 return int(self.index[rev][0] >> 16)
828 return int(self.index[rev][0] >> 16)
809
829
810 def flags(self, rev):
830 def flags(self, rev):
811 return self.index[rev][0] & 0xFFFF
831 return self.index[rev][0] & 0xFFFF
812
832
813 def length(self, rev):
833 def length(self, rev):
814 return self.index[rev][1]
834 return self.index[rev][1]
815
835
816 def rawsize(self, rev):
836 def rawsize(self, rev):
817 """return the length of the uncompressed text for a given revision"""
837 """return the length of the uncompressed text for a given revision"""
818 l = self.index[rev][2]
838 l = self.index[rev][2]
819 if l >= 0:
839 if l >= 0:
820 return l
840 return l
821
841
822 t = self.revision(rev, raw=True)
842 t = self.revision(rev, raw=True)
823 return len(t)
843 return len(t)
824
844
825 def size(self, rev):
845 def size(self, rev):
826 """length of non-raw text (processed by a "read" flag processor)"""
846 """length of non-raw text (processed by a "read" flag processor)"""
827 # fast path: if no "read" flag processor could change the content,
847 # fast path: if no "read" flag processor could change the content,
828 # size is rawsize. note: ELLIPSIS is known to not change the content.
848 # size is rawsize. note: ELLIPSIS is known to not change the content.
829 flags = self.flags(rev)
849 flags = self.flags(rev)
830 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
850 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
831 return self.rawsize(rev)
851 return self.rawsize(rev)
832
852
833 return len(self.revision(rev, raw=False))
853 return len(self.revision(rev, raw=False))
834
854
835 def chainbase(self, rev):
855 def chainbase(self, rev):
836 base = self._chainbasecache.get(rev)
856 base = self._chainbasecache.get(rev)
837 if base is not None:
857 if base is not None:
838 return base
858 return base
839
859
840 index = self.index
860 index = self.index
841 base = index[rev][3]
861 base = index[rev][3]
842 while base != rev:
862 while base != rev:
843 rev = base
863 rev = base
844 base = index[rev][3]
864 base = index[rev][3]
845
865
846 self._chainbasecache[rev] = base
866 self._chainbasecache[rev] = base
847 return base
867 return base
848
868
849 def linkrev(self, rev):
869 def linkrev(self, rev):
850 return self.index[rev][4]
870 return self.index[rev][4]
851
871
852 def parentrevs(self, rev):
872 def parentrevs(self, rev):
853 try:
873 try:
854 entry = self.index[rev]
874 entry = self.index[rev]
855 except IndexError:
875 except IndexError:
856 if rev == wdirrev:
876 if rev == wdirrev:
857 raise error.WdirUnsupported
877 raise error.WdirUnsupported
858 raise
878 raise
859
879
860 return entry[5], entry[6]
880 return entry[5], entry[6]
861
881
862 def node(self, rev):
882 def node(self, rev):
863 try:
883 try:
864 return self.index[rev][7]
884 return self.index[rev][7]
865 except IndexError:
885 except IndexError:
866 if rev == wdirrev:
886 if rev == wdirrev:
867 raise error.WdirUnsupported
887 raise error.WdirUnsupported
868 raise
888 raise
869
889
870 # Derived from index values.
890 # Derived from index values.
871
891
872 def end(self, rev):
892 def end(self, rev):
873 return self.start(rev) + self.length(rev)
893 return self.start(rev) + self.length(rev)
874
894
875 def parents(self, node):
895 def parents(self, node):
876 i = self.index
896 i = self.index
877 d = i[self.rev(node)]
897 d = i[self.rev(node)]
878 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
898 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
879
899
880 def chainlen(self, rev):
900 def chainlen(self, rev):
881 return self._chaininfo(rev)[0]
901 return self._chaininfo(rev)[0]
882
902
883 def _chaininfo(self, rev):
903 def _chaininfo(self, rev):
884 chaininfocache = self._chaininfocache
904 chaininfocache = self._chaininfocache
885 if rev in chaininfocache:
905 if rev in chaininfocache:
886 return chaininfocache[rev]
906 return chaininfocache[rev]
887 index = self.index
907 index = self.index
888 generaldelta = self._generaldelta
908 generaldelta = self._generaldelta
889 iterrev = rev
909 iterrev = rev
890 e = index[iterrev]
910 e = index[iterrev]
891 clen = 0
911 clen = 0
892 compresseddeltalen = 0
912 compresseddeltalen = 0
893 while iterrev != e[3]:
913 while iterrev != e[3]:
894 clen += 1
914 clen += 1
895 compresseddeltalen += e[1]
915 compresseddeltalen += e[1]
896 if generaldelta:
916 if generaldelta:
897 iterrev = e[3]
917 iterrev = e[3]
898 else:
918 else:
899 iterrev -= 1
919 iterrev -= 1
900 if iterrev in chaininfocache:
920 if iterrev in chaininfocache:
901 t = chaininfocache[iterrev]
921 t = chaininfocache[iterrev]
902 clen += t[0]
922 clen += t[0]
903 compresseddeltalen += t[1]
923 compresseddeltalen += t[1]
904 break
924 break
905 e = index[iterrev]
925 e = index[iterrev]
906 else:
926 else:
907 # Add text length of base since decompressing that also takes
927 # Add text length of base since decompressing that also takes
908 # work. For cache hits the length is already included.
928 # work. For cache hits the length is already included.
909 compresseddeltalen += e[1]
929 compresseddeltalen += e[1]
910 r = (clen, compresseddeltalen)
930 r = (clen, compresseddeltalen)
911 chaininfocache[rev] = r
931 chaininfocache[rev] = r
912 return r
932 return r
913
933
914 def _deltachain(self, rev, stoprev=None):
934 def _deltachain(self, rev, stoprev=None):
915 """Obtain the delta chain for a revision.
935 """Obtain the delta chain for a revision.
916
936
917 ``stoprev`` specifies a revision to stop at. If not specified, we
937 ``stoprev`` specifies a revision to stop at. If not specified, we
918 stop at the base of the chain.
938 stop at the base of the chain.
919
939
920 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
940 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
921 revs in ascending order and ``stopped`` is a bool indicating whether
941 revs in ascending order and ``stopped`` is a bool indicating whether
922 ``stoprev`` was hit.
942 ``stoprev`` was hit.
923 """
943 """
924 # Try C implementation.
944 # Try C implementation.
925 try:
945 try:
926 return self.index.deltachain(rev, stoprev, self._generaldelta)
946 return self.index.deltachain(rev, stoprev, self._generaldelta)
927 except AttributeError:
947 except AttributeError:
928 pass
948 pass
929
949
930 chain = []
950 chain = []
931
951
932 # Alias to prevent attribute lookup in tight loop.
952 # Alias to prevent attribute lookup in tight loop.
933 index = self.index
953 index = self.index
934 generaldelta = self._generaldelta
954 generaldelta = self._generaldelta
935
955
936 iterrev = rev
956 iterrev = rev
937 e = index[iterrev]
957 e = index[iterrev]
938 while iterrev != e[3] and iterrev != stoprev:
958 while iterrev != e[3] and iterrev != stoprev:
939 chain.append(iterrev)
959 chain.append(iterrev)
940 if generaldelta:
960 if generaldelta:
941 iterrev = e[3]
961 iterrev = e[3]
942 else:
962 else:
943 iterrev -= 1
963 iterrev -= 1
944 e = index[iterrev]
964 e = index[iterrev]
945
965
946 if iterrev == stoprev:
966 if iterrev == stoprev:
947 stopped = True
967 stopped = True
948 else:
968 else:
949 chain.append(iterrev)
969 chain.append(iterrev)
950 stopped = False
970 stopped = False
951
971
952 chain.reverse()
972 chain.reverse()
953 return chain, stopped
973 return chain, stopped
954
974
955 def ancestors(self, revs, stoprev=0, inclusive=False):
975 def ancestors(self, revs, stoprev=0, inclusive=False):
956 """Generate the ancestors of 'revs' in reverse topological order.
976 """Generate the ancestors of 'revs' in reverse topological order.
957 Does not generate revs lower than stoprev.
977 Does not generate revs lower than stoprev.
958
978
959 See the documentation for ancestor.lazyancestors for more details."""
979 See the documentation for ancestor.lazyancestors for more details."""
960
980
961 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
981 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
962 inclusive=inclusive)
982 inclusive=inclusive)
963
983
964 def descendants(self, revs):
984 def descendants(self, revs):
965 """Generate the descendants of 'revs' in revision order.
985 """Generate the descendants of 'revs' in revision order.
966
986
967 Yield a sequence of revision numbers starting with a child of
987 Yield a sequence of revision numbers starting with a child of
968 some rev in revs, i.e., each revision is *not* considered a
988 some rev in revs, i.e., each revision is *not* considered a
969 descendant of itself. Results are ordered by revision number (a
989 descendant of itself. Results are ordered by revision number (a
970 topological sort)."""
990 topological sort)."""
971 first = min(revs)
991 first = min(revs)
972 if first == nullrev:
992 if first == nullrev:
973 for i in self:
993 for i in self:
974 yield i
994 yield i
975 return
995 return
976
996
977 seen = set(revs)
997 seen = set(revs)
978 for i in self.revs(start=first + 1):
998 for i in self.revs(start=first + 1):
979 for x in self.parentrevs(i):
999 for x in self.parentrevs(i):
980 if x != nullrev and x in seen:
1000 if x != nullrev and x in seen:
981 seen.add(i)
1001 seen.add(i)
982 yield i
1002 yield i
983 break
1003 break
984
1004
985 def findcommonmissing(self, common=None, heads=None):
1005 def findcommonmissing(self, common=None, heads=None):
986 """Return a tuple of the ancestors of common and the ancestors of heads
1006 """Return a tuple of the ancestors of common and the ancestors of heads
987 that are not ancestors of common. In revset terminology, we return the
1007 that are not ancestors of common. In revset terminology, we return the
988 tuple:
1008 tuple:
989
1009
990 ::common, (::heads) - (::common)
1010 ::common, (::heads) - (::common)
991
1011
992 The list is sorted by revision number, meaning it is
1012 The list is sorted by revision number, meaning it is
993 topologically sorted.
1013 topologically sorted.
994
1014
995 'heads' and 'common' are both lists of node IDs. If heads is
1015 'heads' and 'common' are both lists of node IDs. If heads is
996 not supplied, uses all of the revlog's heads. If common is not
1016 not supplied, uses all of the revlog's heads. If common is not
997 supplied, uses nullid."""
1017 supplied, uses nullid."""
998 if common is None:
1018 if common is None:
999 common = [nullid]
1019 common = [nullid]
1000 if heads is None:
1020 if heads is None:
1001 heads = self.heads()
1021 heads = self.heads()
1002
1022
1003 common = [self.rev(n) for n in common]
1023 common = [self.rev(n) for n in common]
1004 heads = [self.rev(n) for n in heads]
1024 heads = [self.rev(n) for n in heads]
1005
1025
1006 # we want the ancestors, but inclusive
1026 # we want the ancestors, but inclusive
1007 class lazyset(object):
1027 class lazyset(object):
1008 def __init__(self, lazyvalues):
1028 def __init__(self, lazyvalues):
1009 self.addedvalues = set()
1029 self.addedvalues = set()
1010 self.lazyvalues = lazyvalues
1030 self.lazyvalues = lazyvalues
1011
1031
1012 def __contains__(self, value):
1032 def __contains__(self, value):
1013 return value in self.addedvalues or value in self.lazyvalues
1033 return value in self.addedvalues or value in self.lazyvalues
1014
1034
1015 def __iter__(self):
1035 def __iter__(self):
1016 added = self.addedvalues
1036 added = self.addedvalues
1017 for r in added:
1037 for r in added:
1018 yield r
1038 yield r
1019 for r in self.lazyvalues:
1039 for r in self.lazyvalues:
1020 if not r in added:
1040 if not r in added:
1021 yield r
1041 yield r
1022
1042
1023 def add(self, value):
1043 def add(self, value):
1024 self.addedvalues.add(value)
1044 self.addedvalues.add(value)
1025
1045
1026 def update(self, values):
1046 def update(self, values):
1027 self.addedvalues.update(values)
1047 self.addedvalues.update(values)
1028
1048
1029 has = lazyset(self.ancestors(common))
1049 has = lazyset(self.ancestors(common))
1030 has.add(nullrev)
1050 has.add(nullrev)
1031 has.update(common)
1051 has.update(common)
1032
1052
1033 # take all ancestors from heads that aren't in has
1053 # take all ancestors from heads that aren't in has
1034 missing = set()
1054 missing = set()
1035 visit = collections.deque(r for r in heads if r not in has)
1055 visit = collections.deque(r for r in heads if r not in has)
1036 while visit:
1056 while visit:
1037 r = visit.popleft()
1057 r = visit.popleft()
1038 if r in missing:
1058 if r in missing:
1039 continue
1059 continue
1040 else:
1060 else:
1041 missing.add(r)
1061 missing.add(r)
1042 for p in self.parentrevs(r):
1062 for p in self.parentrevs(r):
1043 if p not in has:
1063 if p not in has:
1044 visit.append(p)
1064 visit.append(p)
1045 missing = list(missing)
1065 missing = list(missing)
1046 missing.sort()
1066 missing.sort()
1047 return has, [self.node(miss) for miss in missing]
1067 return has, [self.node(miss) for miss in missing]
1048
1068
1049 def incrementalmissingrevs(self, common=None):
1069 def incrementalmissingrevs(self, common=None):
1050 """Return an object that can be used to incrementally compute the
1070 """Return an object that can be used to incrementally compute the
1051 revision numbers of the ancestors of arbitrary sets that are not
1071 revision numbers of the ancestors of arbitrary sets that are not
1052 ancestors of common. This is an ancestor.incrementalmissingancestors
1072 ancestors of common. This is an ancestor.incrementalmissingancestors
1053 object.
1073 object.
1054
1074
1055 'common' is a list of revision numbers. If common is not supplied, uses
1075 'common' is a list of revision numbers. If common is not supplied, uses
1056 nullrev.
1076 nullrev.
1057 """
1077 """
1058 if common is None:
1078 if common is None:
1059 common = [nullrev]
1079 common = [nullrev]
1060
1080
1061 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1081 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1062
1082
1063 def findmissingrevs(self, common=None, heads=None):
1083 def findmissingrevs(self, common=None, heads=None):
1064 """Return the revision numbers of the ancestors of heads that
1084 """Return the revision numbers of the ancestors of heads that
1065 are not ancestors of common.
1085 are not ancestors of common.
1066
1086
1067 More specifically, return a list of revision numbers corresponding to
1087 More specifically, return a list of revision numbers corresponding to
1068 nodes N such that every N satisfies the following constraints:
1088 nodes N such that every N satisfies the following constraints:
1069
1089
1070 1. N is an ancestor of some node in 'heads'
1090 1. N is an ancestor of some node in 'heads'
1071 2. N is not an ancestor of any node in 'common'
1091 2. N is not an ancestor of any node in 'common'
1072
1092
1073 The list is sorted by revision number, meaning it is
1093 The list is sorted by revision number, meaning it is
1074 topologically sorted.
1094 topologically sorted.
1075
1095
1076 'heads' and 'common' are both lists of revision numbers. If heads is
1096 'heads' and 'common' are both lists of revision numbers. If heads is
1077 not supplied, uses all of the revlog's heads. If common is not
1097 not supplied, uses all of the revlog's heads. If common is not
1078 supplied, uses nullid."""
1098 supplied, uses nullid."""
1079 if common is None:
1099 if common is None:
1080 common = [nullrev]
1100 common = [nullrev]
1081 if heads is None:
1101 if heads is None:
1082 heads = self.headrevs()
1102 heads = self.headrevs()
1083
1103
1084 inc = self.incrementalmissingrevs(common=common)
1104 inc = self.incrementalmissingrevs(common=common)
1085 return inc.missingancestors(heads)
1105 return inc.missingancestors(heads)
1086
1106
1087 def findmissing(self, common=None, heads=None):
1107 def findmissing(self, common=None, heads=None):
1088 """Return the ancestors of heads that are not ancestors of common.
1108 """Return the ancestors of heads that are not ancestors of common.
1089
1109
1090 More specifically, return a list of nodes N such that every N
1110 More specifically, return a list of nodes N such that every N
1091 satisfies the following constraints:
1111 satisfies the following constraints:
1092
1112
1093 1. N is an ancestor of some node in 'heads'
1113 1. N is an ancestor of some node in 'heads'
1094 2. N is not an ancestor of any node in 'common'
1114 2. N is not an ancestor of any node in 'common'
1095
1115
1096 The list is sorted by revision number, meaning it is
1116 The list is sorted by revision number, meaning it is
1097 topologically sorted.
1117 topologically sorted.
1098
1118
1099 'heads' and 'common' are both lists of node IDs. If heads is
1119 'heads' and 'common' are both lists of node IDs. If heads is
1100 not supplied, uses all of the revlog's heads. If common is not
1120 not supplied, uses all of the revlog's heads. If common is not
1101 supplied, uses nullid."""
1121 supplied, uses nullid."""
1102 if common is None:
1122 if common is None:
1103 common = [nullid]
1123 common = [nullid]
1104 if heads is None:
1124 if heads is None:
1105 heads = self.heads()
1125 heads = self.heads()
1106
1126
1107 common = [self.rev(n) for n in common]
1127 common = [self.rev(n) for n in common]
1108 heads = [self.rev(n) for n in heads]
1128 heads = [self.rev(n) for n in heads]
1109
1129
1110 inc = self.incrementalmissingrevs(common=common)
1130 inc = self.incrementalmissingrevs(common=common)
1111 return [self.node(r) for r in inc.missingancestors(heads)]
1131 return [self.node(r) for r in inc.missingancestors(heads)]
1112
1132
1113 def nodesbetween(self, roots=None, heads=None):
1133 def nodesbetween(self, roots=None, heads=None):
1114 """Return a topological path from 'roots' to 'heads'.
1134 """Return a topological path from 'roots' to 'heads'.
1115
1135
1116 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1136 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1117 topologically sorted list of all nodes N that satisfy both of
1137 topologically sorted list of all nodes N that satisfy both of
1118 these constraints:
1138 these constraints:
1119
1139
1120 1. N is a descendant of some node in 'roots'
1140 1. N is a descendant of some node in 'roots'
1121 2. N is an ancestor of some node in 'heads'
1141 2. N is an ancestor of some node in 'heads'
1122
1142
1123 Every node is considered to be both a descendant and an ancestor
1143 Every node is considered to be both a descendant and an ancestor
1124 of itself, so every reachable node in 'roots' and 'heads' will be
1144 of itself, so every reachable node in 'roots' and 'heads' will be
1125 included in 'nodes'.
1145 included in 'nodes'.
1126
1146
1127 'outroots' is the list of reachable nodes in 'roots', i.e., the
1147 'outroots' is the list of reachable nodes in 'roots', i.e., the
1128 subset of 'roots' that is returned in 'nodes'. Likewise,
1148 subset of 'roots' that is returned in 'nodes'. Likewise,
1129 'outheads' is the subset of 'heads' that is also in 'nodes'.
1149 'outheads' is the subset of 'heads' that is also in 'nodes'.
1130
1150
1131 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1151 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1132 unspecified, uses nullid as the only root. If 'heads' is
1152 unspecified, uses nullid as the only root. If 'heads' is
1133 unspecified, uses list of all of the revlog's heads."""
1153 unspecified, uses list of all of the revlog's heads."""
1134 nonodes = ([], [], [])
1154 nonodes = ([], [], [])
1135 if roots is not None:
1155 if roots is not None:
1136 roots = list(roots)
1156 roots = list(roots)
1137 if not roots:
1157 if not roots:
1138 return nonodes
1158 return nonodes
1139 lowestrev = min([self.rev(n) for n in roots])
1159 lowestrev = min([self.rev(n) for n in roots])
1140 else:
1160 else:
1141 roots = [nullid] # Everybody's a descendant of nullid
1161 roots = [nullid] # Everybody's a descendant of nullid
1142 lowestrev = nullrev
1162 lowestrev = nullrev
1143 if (lowestrev == nullrev) and (heads is None):
1163 if (lowestrev == nullrev) and (heads is None):
1144 # We want _all_ the nodes!
1164 # We want _all_ the nodes!
1145 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1165 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1146 if heads is None:
1166 if heads is None:
1147 # All nodes are ancestors, so the latest ancestor is the last
1167 # All nodes are ancestors, so the latest ancestor is the last
1148 # node.
1168 # node.
1149 highestrev = len(self) - 1
1169 highestrev = len(self) - 1
1150 # Set ancestors to None to signal that every node is an ancestor.
1170 # Set ancestors to None to signal that every node is an ancestor.
1151 ancestors = None
1171 ancestors = None
1152 # Set heads to an empty dictionary for later discovery of heads
1172 # Set heads to an empty dictionary for later discovery of heads
1153 heads = {}
1173 heads = {}
1154 else:
1174 else:
1155 heads = list(heads)
1175 heads = list(heads)
1156 if not heads:
1176 if not heads:
1157 return nonodes
1177 return nonodes
1158 ancestors = set()
1178 ancestors = set()
1159 # Turn heads into a dictionary so we can remove 'fake' heads.
1179 # Turn heads into a dictionary so we can remove 'fake' heads.
1160 # Also, later we will be using it to filter out the heads we can't
1180 # Also, later we will be using it to filter out the heads we can't
1161 # find from roots.
1181 # find from roots.
1162 heads = dict.fromkeys(heads, False)
1182 heads = dict.fromkeys(heads, False)
1163 # Start at the top and keep marking parents until we're done.
1183 # Start at the top and keep marking parents until we're done.
1164 nodestotag = set(heads)
1184 nodestotag = set(heads)
1165 # Remember where the top was so we can use it as a limit later.
1185 # Remember where the top was so we can use it as a limit later.
1166 highestrev = max([self.rev(n) for n in nodestotag])
1186 highestrev = max([self.rev(n) for n in nodestotag])
1167 while nodestotag:
1187 while nodestotag:
1168 # grab a node to tag
1188 # grab a node to tag
1169 n = nodestotag.pop()
1189 n = nodestotag.pop()
1170 # Never tag nullid
1190 # Never tag nullid
1171 if n == nullid:
1191 if n == nullid:
1172 continue
1192 continue
1173 # A node's revision number represents its place in a
1193 # A node's revision number represents its place in a
1174 # topologically sorted list of nodes.
1194 # topologically sorted list of nodes.
1175 r = self.rev(n)
1195 r = self.rev(n)
1176 if r >= lowestrev:
1196 if r >= lowestrev:
1177 if n not in ancestors:
1197 if n not in ancestors:
1178 # If we are possibly a descendant of one of the roots
1198 # If we are possibly a descendant of one of the roots
1179 # and we haven't already been marked as an ancestor
1199 # and we haven't already been marked as an ancestor
1180 ancestors.add(n) # Mark as ancestor
1200 ancestors.add(n) # Mark as ancestor
1181 # Add non-nullid parents to list of nodes to tag.
1201 # Add non-nullid parents to list of nodes to tag.
1182 nodestotag.update([p for p in self.parents(n) if
1202 nodestotag.update([p for p in self.parents(n) if
1183 p != nullid])
1203 p != nullid])
1184 elif n in heads: # We've seen it before, is it a fake head?
1204 elif n in heads: # We've seen it before, is it a fake head?
1185 # So it is, real heads should not be the ancestors of
1205 # So it is, real heads should not be the ancestors of
1186 # any other heads.
1206 # any other heads.
1187 heads.pop(n)
1207 heads.pop(n)
1188 if not ancestors:
1208 if not ancestors:
1189 return nonodes
1209 return nonodes
1190 # Now that we have our set of ancestors, we want to remove any
1210 # Now that we have our set of ancestors, we want to remove any
1191 # roots that are not ancestors.
1211 # roots that are not ancestors.
1192
1212
1193 # If one of the roots was nullid, everything is included anyway.
1213 # If one of the roots was nullid, everything is included anyway.
1194 if lowestrev > nullrev:
1214 if lowestrev > nullrev:
1195 # But, since we weren't, let's recompute the lowest rev to not
1215 # But, since we weren't, let's recompute the lowest rev to not
1196 # include roots that aren't ancestors.
1216 # include roots that aren't ancestors.
1197
1217
1198 # Filter out roots that aren't ancestors of heads
1218 # Filter out roots that aren't ancestors of heads
1199 roots = [root for root in roots if root in ancestors]
1219 roots = [root for root in roots if root in ancestors]
1200 # Recompute the lowest revision
1220 # Recompute the lowest revision
1201 if roots:
1221 if roots:
1202 lowestrev = min([self.rev(root) for root in roots])
1222 lowestrev = min([self.rev(root) for root in roots])
1203 else:
1223 else:
1204 # No more roots? Return empty list
1224 # No more roots? Return empty list
1205 return nonodes
1225 return nonodes
1206 else:
1226 else:
1207 # We are descending from nullid, and don't need to care about
1227 # We are descending from nullid, and don't need to care about
1208 # any other roots.
1228 # any other roots.
1209 lowestrev = nullrev
1229 lowestrev = nullrev
1210 roots = [nullid]
1230 roots = [nullid]
1211 # Transform our roots list into a set.
1231 # Transform our roots list into a set.
1212 descendants = set(roots)
1232 descendants = set(roots)
1213 # Also, keep the original roots so we can filter out roots that aren't
1233 # Also, keep the original roots so we can filter out roots that aren't
1214 # 'real' roots (i.e. are descended from other roots).
1234 # 'real' roots (i.e. are descended from other roots).
1215 roots = descendants.copy()
1235 roots = descendants.copy()
1216 # Our topologically sorted list of output nodes.
1236 # Our topologically sorted list of output nodes.
1217 orderedout = []
1237 orderedout = []
1218 # Don't start at nullid since we don't want nullid in our output list,
1238 # Don't start at nullid since we don't want nullid in our output list,
1219 # and if nullid shows up in descendants, empty parents will look like
1239 # and if nullid shows up in descendants, empty parents will look like
1220 # they're descendants.
1240 # they're descendants.
1221 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1241 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1222 n = self.node(r)
1242 n = self.node(r)
1223 isdescendant = False
1243 isdescendant = False
1224 if lowestrev == nullrev: # Everybody is a descendant of nullid
1244 if lowestrev == nullrev: # Everybody is a descendant of nullid
1225 isdescendant = True
1245 isdescendant = True
1226 elif n in descendants:
1246 elif n in descendants:
1227 # n is already a descendant
1247 # n is already a descendant
1228 isdescendant = True
1248 isdescendant = True
1229 # This check only needs to be done here because all the roots
1249 # This check only needs to be done here because all the roots
1230 # will start being marked is descendants before the loop.
1250 # will start being marked is descendants before the loop.
1231 if n in roots:
1251 if n in roots:
1232 # If n was a root, check if it's a 'real' root.
1252 # If n was a root, check if it's a 'real' root.
1233 p = tuple(self.parents(n))
1253 p = tuple(self.parents(n))
1234 # If any of its parents are descendants, it's not a root.
1254 # If any of its parents are descendants, it's not a root.
1235 if (p[0] in descendants) or (p[1] in descendants):
1255 if (p[0] in descendants) or (p[1] in descendants):
1236 roots.remove(n)
1256 roots.remove(n)
1237 else:
1257 else:
1238 p = tuple(self.parents(n))
1258 p = tuple(self.parents(n))
1239 # A node is a descendant if either of its parents are
1259 # A node is a descendant if either of its parents are
1240 # descendants. (We seeded the dependents list with the roots
1260 # descendants. (We seeded the dependents list with the roots
1241 # up there, remember?)
1261 # up there, remember?)
1242 if (p[0] in descendants) or (p[1] in descendants):
1262 if (p[0] in descendants) or (p[1] in descendants):
1243 descendants.add(n)
1263 descendants.add(n)
1244 isdescendant = True
1264 isdescendant = True
1245 if isdescendant and ((ancestors is None) or (n in ancestors)):
1265 if isdescendant and ((ancestors is None) or (n in ancestors)):
1246 # Only include nodes that are both descendants and ancestors.
1266 # Only include nodes that are both descendants and ancestors.
1247 orderedout.append(n)
1267 orderedout.append(n)
1248 if (ancestors is not None) and (n in heads):
1268 if (ancestors is not None) and (n in heads):
1249 # We're trying to figure out which heads are reachable
1269 # We're trying to figure out which heads are reachable
1250 # from roots.
1270 # from roots.
1251 # Mark this head as having been reached
1271 # Mark this head as having been reached
1252 heads[n] = True
1272 heads[n] = True
1253 elif ancestors is None:
1273 elif ancestors is None:
1254 # Otherwise, we're trying to discover the heads.
1274 # Otherwise, we're trying to discover the heads.
1255 # Assume this is a head because if it isn't, the next step
1275 # Assume this is a head because if it isn't, the next step
1256 # will eventually remove it.
1276 # will eventually remove it.
1257 heads[n] = True
1277 heads[n] = True
1258 # But, obviously its parents aren't.
1278 # But, obviously its parents aren't.
1259 for p in self.parents(n):
1279 for p in self.parents(n):
1260 heads.pop(p, None)
1280 heads.pop(p, None)
1261 heads = [head for head, flag in heads.iteritems() if flag]
1281 heads = [head for head, flag in heads.iteritems() if flag]
1262 roots = list(roots)
1282 roots = list(roots)
1263 assert orderedout
1283 assert orderedout
1264 assert roots
1284 assert roots
1265 assert heads
1285 assert heads
1266 return (orderedout, roots, heads)
1286 return (orderedout, roots, heads)
1267
1287
1268 def headrevs(self):
1288 def headrevs(self):
1269 try:
1289 try:
1270 return self.index.headrevs()
1290 return self.index.headrevs()
1271 except AttributeError:
1291 except AttributeError:
1272 return self._headrevs()
1292 return self._headrevs()
1273
1293
1274 def computephases(self, roots):
1294 def computephases(self, roots):
1275 return self.index.computephasesmapsets(roots)
1295 return self.index.computephasesmapsets(roots)
1276
1296
1277 def _headrevs(self):
1297 def _headrevs(self):
1278 count = len(self)
1298 count = len(self)
1279 if not count:
1299 if not count:
1280 return [nullrev]
1300 return [nullrev]
1281 # we won't iter over filtered rev so nobody is a head at start
1301 # we won't iter over filtered rev so nobody is a head at start
1282 ishead = [0] * (count + 1)
1302 ishead = [0] * (count + 1)
1283 index = self.index
1303 index = self.index
1284 for r in self:
1304 for r in self:
1285 ishead[r] = 1 # I may be an head
1305 ishead[r] = 1 # I may be an head
1286 e = index[r]
1306 e = index[r]
1287 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1307 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1288 return [r for r, val in enumerate(ishead) if val]
1308 return [r for r, val in enumerate(ishead) if val]
1289
1309
1290 def heads(self, start=None, stop=None):
1310 def heads(self, start=None, stop=None):
1291 """return the list of all nodes that have no children
1311 """return the list of all nodes that have no children
1292
1312
1293 if start is specified, only heads that are descendants of
1313 if start is specified, only heads that are descendants of
1294 start will be returned
1314 start will be returned
1295 if stop is specified, it will consider all the revs from stop
1315 if stop is specified, it will consider all the revs from stop
1296 as if they had no children
1316 as if they had no children
1297 """
1317 """
1298 if start is None and stop is None:
1318 if start is None and stop is None:
1299 if not len(self):
1319 if not len(self):
1300 return [nullid]
1320 return [nullid]
1301 return [self.node(r) for r in self.headrevs()]
1321 return [self.node(r) for r in self.headrevs()]
1302
1322
1303 if start is None:
1323 if start is None:
1304 start = nullid
1324 start = nullid
1305 if stop is None:
1325 if stop is None:
1306 stop = []
1326 stop = []
1307 stoprevs = set([self.rev(n) for n in stop])
1327 stoprevs = set([self.rev(n) for n in stop])
1308 startrev = self.rev(start)
1328 startrev = self.rev(start)
1309 reachable = {startrev}
1329 reachable = {startrev}
1310 heads = {startrev}
1330 heads = {startrev}
1311
1331
1312 parentrevs = self.parentrevs
1332 parentrevs = self.parentrevs
1313 for r in self.revs(start=startrev + 1):
1333 for r in self.revs(start=startrev + 1):
1314 for p in parentrevs(r):
1334 for p in parentrevs(r):
1315 if p in reachable:
1335 if p in reachable:
1316 if r not in stoprevs:
1336 if r not in stoprevs:
1317 reachable.add(r)
1337 reachable.add(r)
1318 heads.add(r)
1338 heads.add(r)
1319 if p in heads and p not in stoprevs:
1339 if p in heads and p not in stoprevs:
1320 heads.remove(p)
1340 heads.remove(p)
1321
1341
1322 return [self.node(r) for r in heads]
1342 return [self.node(r) for r in heads]
1323
1343
1324 def children(self, node):
1344 def children(self, node):
1325 """find the children of a given node"""
1345 """find the children of a given node"""
1326 c = []
1346 c = []
1327 p = self.rev(node)
1347 p = self.rev(node)
1328 for r in self.revs(start=p + 1):
1348 for r in self.revs(start=p + 1):
1329 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1349 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1330 if prevs:
1350 if prevs:
1331 for pr in prevs:
1351 for pr in prevs:
1332 if pr == p:
1352 if pr == p:
1333 c.append(self.node(r))
1353 c.append(self.node(r))
1334 elif p == nullrev:
1354 elif p == nullrev:
1335 c.append(self.node(r))
1355 c.append(self.node(r))
1336 return c
1356 return c
1337
1357
1338 def descendant(self, start, end):
1358 def descendant(self, start, end):
1339 if start == nullrev:
1359 if start == nullrev:
1340 return True
1360 return True
1341 for i in self.descendants([start]):
1361 for i in self.descendants([start]):
1342 if i == end:
1362 if i == end:
1343 return True
1363 return True
1344 elif i > end:
1364 elif i > end:
1345 break
1365 break
1346 return False
1366 return False
1347
1367
1348 def commonancestorsheads(self, a, b):
1368 def commonancestorsheads(self, a, b):
1349 """calculate all the heads of the common ancestors of nodes a and b"""
1369 """calculate all the heads of the common ancestors of nodes a and b"""
1350 a, b = self.rev(a), self.rev(b)
1370 a, b = self.rev(a), self.rev(b)
1351 try:
1371 try:
1352 ancs = self.index.commonancestorsheads(a, b)
1372 ancs = self.index.commonancestorsheads(a, b)
1353 except (AttributeError, OverflowError): # C implementation failed
1373 except (AttributeError, OverflowError): # C implementation failed
1354 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1374 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1355 return pycompat.maplist(self.node, ancs)
1375 return pycompat.maplist(self.node, ancs)
1356
1376
1357 def isancestor(self, a, b):
1377 def isancestor(self, a, b):
1358 """return True if node a is an ancestor of node b
1378 """return True if node a is an ancestor of node b
1359
1379
1360 The implementation of this is trivial but the use of
1380 The implementation of this is trivial but the use of
1361 commonancestorsheads is not."""
1381 commonancestorsheads is not."""
1362 return a in self.commonancestorsheads(a, b)
1382 return a in self.commonancestorsheads(a, b)
1363
1383
1364 def ancestor(self, a, b):
1384 def ancestor(self, a, b):
1365 """calculate the "best" common ancestor of nodes a and b"""
1385 """calculate the "best" common ancestor of nodes a and b"""
1366
1386
1367 a, b = self.rev(a), self.rev(b)
1387 a, b = self.rev(a), self.rev(b)
1368 try:
1388 try:
1369 ancs = self.index.ancestors(a, b)
1389 ancs = self.index.ancestors(a, b)
1370 except (AttributeError, OverflowError):
1390 except (AttributeError, OverflowError):
1371 ancs = ancestor.ancestors(self.parentrevs, a, b)
1391 ancs = ancestor.ancestors(self.parentrevs, a, b)
1372 if ancs:
1392 if ancs:
1373 # choose a consistent winner when there's a tie
1393 # choose a consistent winner when there's a tie
1374 return min(map(self.node, ancs))
1394 return min(map(self.node, ancs))
1375 return nullid
1395 return nullid
1376
1396
1377 def _match(self, id):
1397 def _match(self, id):
1378 if isinstance(id, int):
1398 if isinstance(id, int):
1379 # rev
1399 # rev
1380 return self.node(id)
1400 return self.node(id)
1381 if len(id) == 20:
1401 if len(id) == 20:
1382 # possibly a binary node
1402 # possibly a binary node
1383 # odds of a binary node being all hex in ASCII are 1 in 10**25
1403 # odds of a binary node being all hex in ASCII are 1 in 10**25
1384 try:
1404 try:
1385 node = id
1405 node = id
1386 self.rev(node) # quick search the index
1406 self.rev(node) # quick search the index
1387 return node
1407 return node
1388 except LookupError:
1408 except LookupError:
1389 pass # may be partial hex id
1409 pass # may be partial hex id
1390 try:
1410 try:
1391 # str(rev)
1411 # str(rev)
1392 rev = int(id)
1412 rev = int(id)
1393 if "%d" % rev != id:
1413 if "%d" % rev != id:
1394 raise ValueError
1414 raise ValueError
1395 if rev < 0:
1415 if rev < 0:
1396 rev = len(self) + rev
1416 rev = len(self) + rev
1397 if rev < 0 or rev >= len(self):
1417 if rev < 0 or rev >= len(self):
1398 raise ValueError
1418 raise ValueError
1399 return self.node(rev)
1419 return self.node(rev)
1400 except (ValueError, OverflowError):
1420 except (ValueError, OverflowError):
1401 pass
1421 pass
1402 if len(id) == 40:
1422 if len(id) == 40:
1403 try:
1423 try:
1404 # a full hex nodeid?
1424 # a full hex nodeid?
1405 node = bin(id)
1425 node = bin(id)
1406 self.rev(node)
1426 self.rev(node)
1407 return node
1427 return node
1408 except (TypeError, LookupError):
1428 except (TypeError, LookupError):
1409 pass
1429 pass
1410
1430
1411 def _partialmatch(self, id):
1431 def _partialmatch(self, id):
1412 maybewdir = wdirhex.startswith(id)
1432 maybewdir = wdirhex.startswith(id)
1413 try:
1433 try:
1414 partial = self.index.partialmatch(id)
1434 partial = self.index.partialmatch(id)
1415 if partial and self.hasnode(partial):
1435 if partial and self.hasnode(partial):
1416 if maybewdir:
1436 if maybewdir:
1417 # single 'ff...' match in radix tree, ambiguous with wdir
1437 # single 'ff...' match in radix tree, ambiguous with wdir
1418 raise RevlogError
1438 raise RevlogError
1419 return partial
1439 return partial
1420 if maybewdir:
1440 if maybewdir:
1421 # no 'ff...' match in radix tree, wdir identified
1441 # no 'ff...' match in radix tree, wdir identified
1422 raise error.WdirUnsupported
1442 raise error.WdirUnsupported
1423 return None
1443 return None
1424 except RevlogError:
1444 except RevlogError:
1425 # parsers.c radix tree lookup gave multiple matches
1445 # parsers.c radix tree lookup gave multiple matches
1426 # fast path: for unfiltered changelog, radix tree is accurate
1446 # fast path: for unfiltered changelog, radix tree is accurate
1427 if not getattr(self, 'filteredrevs', None):
1447 if not getattr(self, 'filteredrevs', None):
1428 raise LookupError(id, self.indexfile,
1448 raise LookupError(id, self.indexfile,
1429 _('ambiguous identifier'))
1449 _('ambiguous identifier'))
1430 # fall through to slow path that filters hidden revisions
1450 # fall through to slow path that filters hidden revisions
1431 except (AttributeError, ValueError):
1451 except (AttributeError, ValueError):
1432 # we are pure python, or key was too short to search radix tree
1452 # we are pure python, or key was too short to search radix tree
1433 pass
1453 pass
1434
1454
1435 if id in self._pcache:
1455 if id in self._pcache:
1436 return self._pcache[id]
1456 return self._pcache[id]
1437
1457
1438 if len(id) < 40:
1458 if len(id) < 40:
1439 try:
1459 try:
1440 # hex(node)[:...]
1460 # hex(node)[:...]
1441 l = len(id) // 2 # grab an even number of digits
1461 l = len(id) // 2 # grab an even number of digits
1442 prefix = bin(id[:l * 2])
1462 prefix = bin(id[:l * 2])
1443 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1463 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1444 nl = [n for n in nl if hex(n).startswith(id) and
1464 nl = [n for n in nl if hex(n).startswith(id) and
1445 self.hasnode(n)]
1465 self.hasnode(n)]
1446 if len(nl) > 0:
1466 if len(nl) > 0:
1447 if len(nl) == 1 and not maybewdir:
1467 if len(nl) == 1 and not maybewdir:
1448 self._pcache[id] = nl[0]
1468 self._pcache[id] = nl[0]
1449 return nl[0]
1469 return nl[0]
1450 raise LookupError(id, self.indexfile,
1470 raise LookupError(id, self.indexfile,
1451 _('ambiguous identifier'))
1471 _('ambiguous identifier'))
1452 if maybewdir:
1472 if maybewdir:
1453 raise error.WdirUnsupported
1473 raise error.WdirUnsupported
1454 return None
1474 return None
1455 except TypeError:
1475 except TypeError:
1456 pass
1476 pass
1457
1477
1458 def lookup(self, id):
1478 def lookup(self, id):
1459 """locate a node based on:
1479 """locate a node based on:
1460 - revision number or str(revision number)
1480 - revision number or str(revision number)
1461 - nodeid or subset of hex nodeid
1481 - nodeid or subset of hex nodeid
1462 """
1482 """
1463 n = self._match(id)
1483 n = self._match(id)
1464 if n is not None:
1484 if n is not None:
1465 return n
1485 return n
1466 n = self._partialmatch(id)
1486 n = self._partialmatch(id)
1467 if n:
1487 if n:
1468 return n
1488 return n
1469
1489
1470 raise LookupError(id, self.indexfile, _('no match found'))
1490 raise LookupError(id, self.indexfile, _('no match found'))
1471
1491
1472 def shortest(self, hexnode, minlength=1):
1492 def shortest(self, hexnode, minlength=1):
1473 """Find the shortest unambiguous prefix that matches hexnode."""
1493 """Find the shortest unambiguous prefix that matches hexnode."""
1474 def isvalid(test):
1494 def isvalid(test):
1475 try:
1495 try:
1476 if self._partialmatch(test) is None:
1496 if self._partialmatch(test) is None:
1477 return False
1497 return False
1478
1498
1479 try:
1499 try:
1480 i = int(test)
1500 i = int(test)
1481 # if we are a pure int, then starting with zero will not be
1501 # if we are a pure int, then starting with zero will not be
1482 # confused as a rev; or, obviously, if the int is larger
1502 # confused as a rev; or, obviously, if the int is larger
1483 # than the value of the tip rev
1503 # than the value of the tip rev
1484 if test[0] == '0' or i > len(self):
1504 if test[0] == '0' or i > len(self):
1485 return True
1505 return True
1486 return False
1506 return False
1487 except ValueError:
1507 except ValueError:
1488 return True
1508 return True
1489 except error.RevlogError:
1509 except error.RevlogError:
1490 return False
1510 return False
1491 except error.WdirUnsupported:
1511 except error.WdirUnsupported:
1492 # single 'ff...' match
1512 # single 'ff...' match
1493 return True
1513 return True
1494
1514
1495 shortest = hexnode
1515 shortest = hexnode
1496 startlength = max(6, minlength)
1516 startlength = max(6, minlength)
1497 length = startlength
1517 length = startlength
1498 while True:
1518 while True:
1499 test = hexnode[:length]
1519 test = hexnode[:length]
1500 if isvalid(test):
1520 if isvalid(test):
1501 shortest = test
1521 shortest = test
1502 if length == minlength or length > startlength:
1522 if length == minlength or length > startlength:
1503 return shortest
1523 return shortest
1504 length -= 1
1524 length -= 1
1505 else:
1525 else:
1506 length += 1
1526 length += 1
1507 if len(shortest) <= length:
1527 if len(shortest) <= length:
1508 return shortest
1528 return shortest
1509
1529
1510 def cmp(self, node, text):
1530 def cmp(self, node, text):
1511 """compare text with a given file revision
1531 """compare text with a given file revision
1512
1532
1513 returns True if text is different than what is stored.
1533 returns True if text is different than what is stored.
1514 """
1534 """
1515 p1, p2 = self.parents(node)
1535 p1, p2 = self.parents(node)
1516 return hash(text, p1, p2) != node
1536 return hash(text, p1, p2) != node
1517
1537
1518 def _cachesegment(self, offset, data):
1538 def _cachesegment(self, offset, data):
1519 """Add a segment to the revlog cache.
1539 """Add a segment to the revlog cache.
1520
1540
1521 Accepts an absolute offset and the data that is at that location.
1541 Accepts an absolute offset and the data that is at that location.
1522 """
1542 """
1523 o, d = self._chunkcache
1543 o, d = self._chunkcache
1524 # try to add to existing cache
1544 # try to add to existing cache
1525 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1545 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1526 self._chunkcache = o, d + data
1546 self._chunkcache = o, d + data
1527 else:
1547 else:
1528 self._chunkcache = offset, data
1548 self._chunkcache = offset, data
1529
1549
1530 def _readsegment(self, offset, length, df=None):
1550 def _readsegment(self, offset, length, df=None):
1531 """Load a segment of raw data from the revlog.
1551 """Load a segment of raw data from the revlog.
1532
1552
1533 Accepts an absolute offset, length to read, and an optional existing
1553 Accepts an absolute offset, length to read, and an optional existing
1534 file handle to read from.
1554 file handle to read from.
1535
1555
1536 If an existing file handle is passed, it will be seeked and the
1556 If an existing file handle is passed, it will be seeked and the
1537 original seek position will NOT be restored.
1557 original seek position will NOT be restored.
1538
1558
1539 Returns a str or buffer of raw byte data.
1559 Returns a str or buffer of raw byte data.
1540 """
1560 """
1541 # Cache data both forward and backward around the requested
1561 # Cache data both forward and backward around the requested
1542 # data, in a fixed size window. This helps speed up operations
1562 # data, in a fixed size window. This helps speed up operations
1543 # involving reading the revlog backwards.
1563 # involving reading the revlog backwards.
1544 cachesize = self._chunkcachesize
1564 cachesize = self._chunkcachesize
1545 realoffset = offset & ~(cachesize - 1)
1565 realoffset = offset & ~(cachesize - 1)
1546 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1566 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1547 - realoffset)
1567 - realoffset)
1548 with self._datareadfp(df) as df:
1568 with self._datareadfp(df) as df:
1549 df.seek(realoffset)
1569 df.seek(realoffset)
1550 d = df.read(reallength)
1570 d = df.read(reallength)
1551 self._cachesegment(realoffset, d)
1571 self._cachesegment(realoffset, d)
1552 if offset != realoffset or reallength != length:
1572 if offset != realoffset or reallength != length:
1553 return util.buffer(d, offset - realoffset, length)
1573 return util.buffer(d, offset - realoffset, length)
1554 return d
1574 return d
1555
1575
1556 def _getsegment(self, offset, length, df=None):
1576 def _getsegment(self, offset, length, df=None):
1557 """Obtain a segment of raw data from the revlog.
1577 """Obtain a segment of raw data from the revlog.
1558
1578
1559 Accepts an absolute offset, length of bytes to obtain, and an
1579 Accepts an absolute offset, length of bytes to obtain, and an
1560 optional file handle to the already-opened revlog. If the file
1580 optional file handle to the already-opened revlog. If the file
1561 handle is used, it's original seek position will not be preserved.
1581 handle is used, it's original seek position will not be preserved.
1562
1582
1563 Requests for data may be returned from a cache.
1583 Requests for data may be returned from a cache.
1564
1584
1565 Returns a str or a buffer instance of raw byte data.
1585 Returns a str or a buffer instance of raw byte data.
1566 """
1586 """
1567 o, d = self._chunkcache
1587 o, d = self._chunkcache
1568 l = len(d)
1588 l = len(d)
1569
1589
1570 # is it in the cache?
1590 # is it in the cache?
1571 cachestart = offset - o
1591 cachestart = offset - o
1572 cacheend = cachestart + length
1592 cacheend = cachestart + length
1573 if cachestart >= 0 and cacheend <= l:
1593 if cachestart >= 0 and cacheend <= l:
1574 if cachestart == 0 and cacheend == l:
1594 if cachestart == 0 and cacheend == l:
1575 return d # avoid a copy
1595 return d # avoid a copy
1576 return util.buffer(d, cachestart, cacheend - cachestart)
1596 return util.buffer(d, cachestart, cacheend - cachestart)
1577
1597
1578 return self._readsegment(offset, length, df=df)
1598 return self._readsegment(offset, length, df=df)
1579
1599
1580 def _getsegmentforrevs(self, startrev, endrev, df=None):
1600 def _getsegmentforrevs(self, startrev, endrev, df=None):
1581 """Obtain a segment of raw data corresponding to a range of revisions.
1601 """Obtain a segment of raw data corresponding to a range of revisions.
1582
1602
1583 Accepts the start and end revisions and an optional already-open
1603 Accepts the start and end revisions and an optional already-open
1584 file handle to be used for reading. If the file handle is read, its
1604 file handle to be used for reading. If the file handle is read, its
1585 seek position will not be preserved.
1605 seek position will not be preserved.
1586
1606
1587 Requests for data may be satisfied by a cache.
1607 Requests for data may be satisfied by a cache.
1588
1608
1589 Returns a 2-tuple of (offset, data) for the requested range of
1609 Returns a 2-tuple of (offset, data) for the requested range of
1590 revisions. Offset is the integer offset from the beginning of the
1610 revisions. Offset is the integer offset from the beginning of the
1591 revlog and data is a str or buffer of the raw byte data.
1611 revlog and data is a str or buffer of the raw byte data.
1592
1612
1593 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1613 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1594 to determine where each revision's data begins and ends.
1614 to determine where each revision's data begins and ends.
1595 """
1615 """
1596 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1616 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1597 # (functions are expensive).
1617 # (functions are expensive).
1598 index = self.index
1618 index = self.index
1599 istart = index[startrev]
1619 istart = index[startrev]
1600 start = int(istart[0] >> 16)
1620 start = int(istart[0] >> 16)
1601 if startrev == endrev:
1621 if startrev == endrev:
1602 end = start + istart[1]
1622 end = start + istart[1]
1603 else:
1623 else:
1604 iend = index[endrev]
1624 iend = index[endrev]
1605 end = int(iend[0] >> 16) + iend[1]
1625 end = int(iend[0] >> 16) + iend[1]
1606
1626
1607 if self._inline:
1627 if self._inline:
1608 start += (startrev + 1) * self._io.size
1628 start += (startrev + 1) * self._io.size
1609 end += (endrev + 1) * self._io.size
1629 end += (endrev + 1) * self._io.size
1610 length = end - start
1630 length = end - start
1611
1631
1612 return start, self._getsegment(start, length, df=df)
1632 return start, self._getsegment(start, length, df=df)
1613
1633
1614 def _chunk(self, rev, df=None):
1634 def _chunk(self, rev, df=None):
1615 """Obtain a single decompressed chunk for a revision.
1635 """Obtain a single decompressed chunk for a revision.
1616
1636
1617 Accepts an integer revision and an optional already-open file handle
1637 Accepts an integer revision and an optional already-open file handle
1618 to be used for reading. If used, the seek position of the file will not
1638 to be used for reading. If used, the seek position of the file will not
1619 be preserved.
1639 be preserved.
1620
1640
1621 Returns a str holding uncompressed data for the requested revision.
1641 Returns a str holding uncompressed data for the requested revision.
1622 """
1642 """
1623 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1643 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1624
1644
1625 def _chunks(self, revs, df=None):
1645 def _chunks(self, revs, df=None):
1626 """Obtain decompressed chunks for the specified revisions.
1646 """Obtain decompressed chunks for the specified revisions.
1627
1647
1628 Accepts an iterable of numeric revisions that are assumed to be in
1648 Accepts an iterable of numeric revisions that are assumed to be in
1629 ascending order. Also accepts an optional already-open file handle
1649 ascending order. Also accepts an optional already-open file handle
1630 to be used for reading. If used, the seek position of the file will
1650 to be used for reading. If used, the seek position of the file will
1631 not be preserved.
1651 not be preserved.
1632
1652
1633 This function is similar to calling ``self._chunk()`` multiple times,
1653 This function is similar to calling ``self._chunk()`` multiple times,
1634 but is faster.
1654 but is faster.
1635
1655
1636 Returns a list with decompressed data for each requested revision.
1656 Returns a list with decompressed data for each requested revision.
1637 """
1657 """
1638 if not revs:
1658 if not revs:
1639 return []
1659 return []
1640 start = self.start
1660 start = self.start
1641 length = self.length
1661 length = self.length
1642 inline = self._inline
1662 inline = self._inline
1643 iosize = self._io.size
1663 iosize = self._io.size
1644 buffer = util.buffer
1664 buffer = util.buffer
1645
1665
1646 l = []
1666 l = []
1647 ladd = l.append
1667 ladd = l.append
1648
1668
1649 if not self._withsparseread:
1669 if not self._withsparseread:
1650 slicedchunks = (revs,)
1670 slicedchunks = (revs,)
1651 else:
1671 else:
1652 slicedchunks = _slicechunk(self, revs)
1672 slicedchunks = _slicechunk(self, revs)
1653
1673
1654 for revschunk in slicedchunks:
1674 for revschunk in slicedchunks:
1655 firstrev = revschunk[0]
1675 firstrev = revschunk[0]
1656 # Skip trailing revisions with empty diff
1676 # Skip trailing revisions with empty diff
1657 for lastrev in revschunk[::-1]:
1677 for lastrev in revschunk[::-1]:
1658 if length(lastrev) != 0:
1678 if length(lastrev) != 0:
1659 break
1679 break
1660
1680
1661 try:
1681 try:
1662 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1682 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1663 except OverflowError:
1683 except OverflowError:
1664 # issue4215 - we can't cache a run of chunks greater than
1684 # issue4215 - we can't cache a run of chunks greater than
1665 # 2G on Windows
1685 # 2G on Windows
1666 return [self._chunk(rev, df=df) for rev in revschunk]
1686 return [self._chunk(rev, df=df) for rev in revschunk]
1667
1687
1668 decomp = self.decompress
1688 decomp = self.decompress
1669 for rev in revschunk:
1689 for rev in revschunk:
1670 chunkstart = start(rev)
1690 chunkstart = start(rev)
1671 if inline:
1691 if inline:
1672 chunkstart += (rev + 1) * iosize
1692 chunkstart += (rev + 1) * iosize
1673 chunklength = length(rev)
1693 chunklength = length(rev)
1674 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1694 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1675
1695
1676 return l
1696 return l
1677
1697
1678 def _chunkclear(self):
1698 def _chunkclear(self):
1679 """Clear the raw chunk cache."""
1699 """Clear the raw chunk cache."""
1680 self._chunkcache = (0, '')
1700 self._chunkcache = (0, '')
1681
1701
1682 def deltaparent(self, rev):
1702 def deltaparent(self, rev):
1683 """return deltaparent of the given revision"""
1703 """return deltaparent of the given revision"""
1684 base = self.index[rev][3]
1704 base = self.index[rev][3]
1685 if base == rev:
1705 if base == rev:
1686 return nullrev
1706 return nullrev
1687 elif self._generaldelta:
1707 elif self._generaldelta:
1688 return base
1708 return base
1689 else:
1709 else:
1690 return rev - 1
1710 return rev - 1
1691
1711
1692 def revdiff(self, rev1, rev2):
1712 def revdiff(self, rev1, rev2):
1693 """return or calculate a delta between two revisions
1713 """return or calculate a delta between two revisions
1694
1714
1695 The delta calculated is in binary form and is intended to be written to
1715 The delta calculated is in binary form and is intended to be written to
1696 revlog data directly. So this function needs raw revision data.
1716 revlog data directly. So this function needs raw revision data.
1697 """
1717 """
1698 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1718 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1699 return bytes(self._chunk(rev2))
1719 return bytes(self._chunk(rev2))
1700
1720
1701 return mdiff.textdiff(self.revision(rev1, raw=True),
1721 return mdiff.textdiff(self.revision(rev1, raw=True),
1702 self.revision(rev2, raw=True))
1722 self.revision(rev2, raw=True))
1703
1723
1704 def revision(self, nodeorrev, _df=None, raw=False):
1724 def revision(self, nodeorrev, _df=None, raw=False):
1705 """return an uncompressed revision of a given node or revision
1725 """return an uncompressed revision of a given node or revision
1706 number.
1726 number.
1707
1727
1708 _df - an existing file handle to read from. (internal-only)
1728 _df - an existing file handle to read from. (internal-only)
1709 raw - an optional argument specifying if the revision data is to be
1729 raw - an optional argument specifying if the revision data is to be
1710 treated as raw data when applying flag transforms. 'raw' should be set
1730 treated as raw data when applying flag transforms. 'raw' should be set
1711 to True when generating changegroups or in debug commands.
1731 to True when generating changegroups or in debug commands.
1712 """
1732 """
1713 if isinstance(nodeorrev, int):
1733 if isinstance(nodeorrev, int):
1714 rev = nodeorrev
1734 rev = nodeorrev
1715 node = self.node(rev)
1735 node = self.node(rev)
1716 else:
1736 else:
1717 node = nodeorrev
1737 node = nodeorrev
1718 rev = None
1738 rev = None
1719
1739
1720 cachedrev = None
1740 cachedrev = None
1721 flags = None
1741 flags = None
1722 rawtext = None
1742 rawtext = None
1723 if node == nullid:
1743 if node == nullid:
1724 return ""
1744 return ""
1725 if self._cache:
1745 if self._cache:
1726 if self._cache[0] == node:
1746 if self._cache[0] == node:
1727 # _cache only stores rawtext
1747 # _cache only stores rawtext
1728 if raw:
1748 if raw:
1729 return self._cache[2]
1749 return self._cache[2]
1730 # duplicated, but good for perf
1750 # duplicated, but good for perf
1731 if rev is None:
1751 if rev is None:
1732 rev = self.rev(node)
1752 rev = self.rev(node)
1733 if flags is None:
1753 if flags is None:
1734 flags = self.flags(rev)
1754 flags = self.flags(rev)
1735 # no extra flags set, no flag processor runs, text = rawtext
1755 # no extra flags set, no flag processor runs, text = rawtext
1736 if flags == REVIDX_DEFAULT_FLAGS:
1756 if flags == REVIDX_DEFAULT_FLAGS:
1737 return self._cache[2]
1757 return self._cache[2]
1738 # rawtext is reusable. need to run flag processor
1758 # rawtext is reusable. need to run flag processor
1739 rawtext = self._cache[2]
1759 rawtext = self._cache[2]
1740
1760
1741 cachedrev = self._cache[1]
1761 cachedrev = self._cache[1]
1742
1762
1743 # look up what we need to read
1763 # look up what we need to read
1744 if rawtext is None:
1764 if rawtext is None:
1745 if rev is None:
1765 if rev is None:
1746 rev = self.rev(node)
1766 rev = self.rev(node)
1747
1767
1748 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1768 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1749 if stopped:
1769 if stopped:
1750 rawtext = self._cache[2]
1770 rawtext = self._cache[2]
1751
1771
1752 # drop cache to save memory
1772 # drop cache to save memory
1753 self._cache = None
1773 self._cache = None
1754
1774
1755 bins = self._chunks(chain, df=_df)
1775 bins = self._chunks(chain, df=_df)
1756 if rawtext is None:
1776 if rawtext is None:
1757 rawtext = bytes(bins[0])
1777 rawtext = bytes(bins[0])
1758 bins = bins[1:]
1778 bins = bins[1:]
1759
1779
1760 rawtext = mdiff.patches(rawtext, bins)
1780 rawtext = mdiff.patches(rawtext, bins)
1761 self._cache = (node, rev, rawtext)
1781 self._cache = (node, rev, rawtext)
1762
1782
1763 if flags is None:
1783 if flags is None:
1764 if rev is None:
1784 if rev is None:
1765 rev = self.rev(node)
1785 rev = self.rev(node)
1766 flags = self.flags(rev)
1786 flags = self.flags(rev)
1767
1787
1768 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1788 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1769 if validatehash:
1789 if validatehash:
1770 self.checkhash(text, node, rev=rev)
1790 self.checkhash(text, node, rev=rev)
1771
1791
1772 return text
1792 return text
1773
1793
1774 def hash(self, text, p1, p2):
1794 def hash(self, text, p1, p2):
1775 """Compute a node hash.
1795 """Compute a node hash.
1776
1796
1777 Available as a function so that subclasses can replace the hash
1797 Available as a function so that subclasses can replace the hash
1778 as needed.
1798 as needed.
1779 """
1799 """
1780 return hash(text, p1, p2)
1800 return hash(text, p1, p2)
1781
1801
1782 def _processflags(self, text, flags, operation, raw=False):
1802 def _processflags(self, text, flags, operation, raw=False):
1783 """Inspect revision data flags and applies transforms defined by
1803 """Inspect revision data flags and applies transforms defined by
1784 registered flag processors.
1804 registered flag processors.
1785
1805
1786 ``text`` - the revision data to process
1806 ``text`` - the revision data to process
1787 ``flags`` - the revision flags
1807 ``flags`` - the revision flags
1788 ``operation`` - the operation being performed (read or write)
1808 ``operation`` - the operation being performed (read or write)
1789 ``raw`` - an optional argument describing if the raw transform should be
1809 ``raw`` - an optional argument describing if the raw transform should be
1790 applied.
1810 applied.
1791
1811
1792 This method processes the flags in the order (or reverse order if
1812 This method processes the flags in the order (or reverse order if
1793 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1813 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1794 flag processors registered for present flags. The order of flags defined
1814 flag processors registered for present flags. The order of flags defined
1795 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1815 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1796
1816
1797 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1817 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1798 processed text and ``validatehash`` is a bool indicating whether the
1818 processed text and ``validatehash`` is a bool indicating whether the
1799 returned text should be checked for hash integrity.
1819 returned text should be checked for hash integrity.
1800
1820
1801 Note: If the ``raw`` argument is set, it has precedence over the
1821 Note: If the ``raw`` argument is set, it has precedence over the
1802 operation and will only update the value of ``validatehash``.
1822 operation and will only update the value of ``validatehash``.
1803 """
1823 """
1804 # fast path: no flag processors will run
1824 # fast path: no flag processors will run
1805 if flags == 0:
1825 if flags == 0:
1806 return text, True
1826 return text, True
1807 if not operation in ('read', 'write'):
1827 if not operation in ('read', 'write'):
1808 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1828 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1809 # Check all flags are known.
1829 # Check all flags are known.
1810 if flags & ~REVIDX_KNOWN_FLAGS:
1830 if flags & ~REVIDX_KNOWN_FLAGS:
1811 raise RevlogError(_("incompatible revision flag '%#x'") %
1831 raise RevlogError(_("incompatible revision flag '%#x'") %
1812 (flags & ~REVIDX_KNOWN_FLAGS))
1832 (flags & ~REVIDX_KNOWN_FLAGS))
1813 validatehash = True
1833 validatehash = True
1814 # Depending on the operation (read or write), the order might be
1834 # Depending on the operation (read or write), the order might be
1815 # reversed due to non-commutative transforms.
1835 # reversed due to non-commutative transforms.
1816 orderedflags = REVIDX_FLAGS_ORDER
1836 orderedflags = REVIDX_FLAGS_ORDER
1817 if operation == 'write':
1837 if operation == 'write':
1818 orderedflags = reversed(orderedflags)
1838 orderedflags = reversed(orderedflags)
1819
1839
1820 for flag in orderedflags:
1840 for flag in orderedflags:
1821 # If a flagprocessor has been registered for a known flag, apply the
1841 # If a flagprocessor has been registered for a known flag, apply the
1822 # related operation transform and update result tuple.
1842 # related operation transform and update result tuple.
1823 if flag & flags:
1843 if flag & flags:
1824 vhash = True
1844 vhash = True
1825
1845
1826 if flag not in _flagprocessors:
1846 if flag not in _flagprocessors:
1827 message = _("missing processor for flag '%#x'") % (flag)
1847 message = _("missing processor for flag '%#x'") % (flag)
1828 raise RevlogError(message)
1848 raise RevlogError(message)
1829
1849
1830 processor = _flagprocessors[flag]
1850 processor = _flagprocessors[flag]
1831 if processor is not None:
1851 if processor is not None:
1832 readtransform, writetransform, rawtransform = processor
1852 readtransform, writetransform, rawtransform = processor
1833
1853
1834 if raw:
1854 if raw:
1835 vhash = rawtransform(self, text)
1855 vhash = rawtransform(self, text)
1836 elif operation == 'read':
1856 elif operation == 'read':
1837 text, vhash = readtransform(self, text)
1857 text, vhash = readtransform(self, text)
1838 else: # write operation
1858 else: # write operation
1839 text, vhash = writetransform(self, text)
1859 text, vhash = writetransform(self, text)
1840 validatehash = validatehash and vhash
1860 validatehash = validatehash and vhash
1841
1861
1842 return text, validatehash
1862 return text, validatehash
1843
1863
1844 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1864 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1845 """Check node hash integrity.
1865 """Check node hash integrity.
1846
1866
1847 Available as a function so that subclasses can extend hash mismatch
1867 Available as a function so that subclasses can extend hash mismatch
1848 behaviors as needed.
1868 behaviors as needed.
1849 """
1869 """
1850 if p1 is None and p2 is None:
1870 if p1 is None and p2 is None:
1851 p1, p2 = self.parents(node)
1871 p1, p2 = self.parents(node)
1852 if node != self.hash(text, p1, p2):
1872 if node != self.hash(text, p1, p2):
1853 revornode = rev
1873 revornode = rev
1854 if revornode is None:
1874 if revornode is None:
1855 revornode = templatefilters.short(hex(node))
1875 revornode = templatefilters.short(hex(node))
1856 raise RevlogError(_("integrity check failed on %s:%s")
1876 raise RevlogError(_("integrity check failed on %s:%s")
1857 % (self.indexfile, pycompat.bytestr(revornode)))
1877 % (self.indexfile, pycompat.bytestr(revornode)))
1858
1878
1859 def _enforceinlinesize(self, tr, fp=None):
1879 def _enforceinlinesize(self, tr, fp=None):
1860 """Check if the revlog is too big for inline and convert if so.
1880 """Check if the revlog is too big for inline and convert if so.
1861
1881
1862 This should be called after revisions are added to the revlog. If the
1882 This should be called after revisions are added to the revlog. If the
1863 revlog has grown too large to be an inline revlog, it will convert it
1883 revlog has grown too large to be an inline revlog, it will convert it
1864 to use multiple index and data files.
1884 to use multiple index and data files.
1865 """
1885 """
1866 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1886 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1867 return
1887 return
1868
1888
1869 trinfo = tr.find(self.indexfile)
1889 trinfo = tr.find(self.indexfile)
1870 if trinfo is None:
1890 if trinfo is None:
1871 raise RevlogError(_("%s not found in the transaction")
1891 raise RevlogError(_("%s not found in the transaction")
1872 % self.indexfile)
1892 % self.indexfile)
1873
1893
1874 trindex = trinfo[2]
1894 trindex = trinfo[2]
1875 if trindex is not None:
1895 if trindex is not None:
1876 dataoff = self.start(trindex)
1896 dataoff = self.start(trindex)
1877 else:
1897 else:
1878 # revlog was stripped at start of transaction, use all leftover data
1898 # revlog was stripped at start of transaction, use all leftover data
1879 trindex = len(self) - 1
1899 trindex = len(self) - 1
1880 dataoff = self.end(-2)
1900 dataoff = self.end(-2)
1881
1901
1882 tr.add(self.datafile, dataoff)
1902 tr.add(self.datafile, dataoff)
1883
1903
1884 if fp:
1904 if fp:
1885 fp.flush()
1905 fp.flush()
1886 fp.close()
1906 fp.close()
1887
1907
1888 with self._datafp('w') as df:
1908 with self._datafp('w') as df:
1889 for r in self:
1909 for r in self:
1890 df.write(self._getsegmentforrevs(r, r)[1])
1910 df.write(self._getsegmentforrevs(r, r)[1])
1891
1911
1892 with self._indexfp('w') as fp:
1912 with self._indexfp('w') as fp:
1893 self.version &= ~FLAG_INLINE_DATA
1913 self.version &= ~FLAG_INLINE_DATA
1894 self._inline = False
1914 self._inline = False
1895 io = self._io
1915 io = self._io
1896 for i in self:
1916 for i in self:
1897 e = io.packentry(self.index[i], self.node, self.version, i)
1917 e = io.packentry(self.index[i], self.node, self.version, i)
1898 fp.write(e)
1918 fp.write(e)
1899
1919
1900 # the temp file replace the real index when we exit the context
1920 # the temp file replace the real index when we exit the context
1901 # manager
1921 # manager
1902
1922
1903 tr.replace(self.indexfile, trindex * self._io.size)
1923 tr.replace(self.indexfile, trindex * self._io.size)
1904 self._chunkclear()
1924 self._chunkclear()
1905
1925
1906 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1926 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1907 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1927 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1908 """add a revision to the log
1928 """add a revision to the log
1909
1929
1910 text - the revision data to add
1930 text - the revision data to add
1911 transaction - the transaction object used for rollback
1931 transaction - the transaction object used for rollback
1912 link - the linkrev data to add
1932 link - the linkrev data to add
1913 p1, p2 - the parent nodeids of the revision
1933 p1, p2 - the parent nodeids of the revision
1914 cachedelta - an optional precomputed delta
1934 cachedelta - an optional precomputed delta
1915 node - nodeid of revision; typically node is not specified, and it is
1935 node - nodeid of revision; typically node is not specified, and it is
1916 computed by default as hash(text, p1, p2), however subclasses might
1936 computed by default as hash(text, p1, p2), however subclasses might
1917 use different hashing method (and override checkhash() in such case)
1937 use different hashing method (and override checkhash() in such case)
1918 flags - the known flags to set on the revision
1938 flags - the known flags to set on the revision
1919 deltacomputer - an optional _deltacomputer instance shared between
1939 deltacomputer - an optional _deltacomputer instance shared between
1920 multiple calls
1940 multiple calls
1921 """
1941 """
1922 if link == nullrev:
1942 if link == nullrev:
1923 raise RevlogError(_("attempted to add linkrev -1 to %s")
1943 raise RevlogError(_("attempted to add linkrev -1 to %s")
1924 % self.indexfile)
1944 % self.indexfile)
1925
1945
1926 if flags:
1946 if flags:
1927 node = node or self.hash(text, p1, p2)
1947 node = node or self.hash(text, p1, p2)
1928
1948
1929 rawtext, validatehash = self._processflags(text, flags, 'write')
1949 rawtext, validatehash = self._processflags(text, flags, 'write')
1930
1950
1931 # If the flag processor modifies the revision data, ignore any provided
1951 # If the flag processor modifies the revision data, ignore any provided
1932 # cachedelta.
1952 # cachedelta.
1933 if rawtext != text:
1953 if rawtext != text:
1934 cachedelta = None
1954 cachedelta = None
1935
1955
1936 if len(rawtext) > _maxentrysize:
1956 if len(rawtext) > _maxentrysize:
1937 raise RevlogError(
1957 raise RevlogError(
1938 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1958 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1939 % (self.indexfile, len(rawtext)))
1959 % (self.indexfile, len(rawtext)))
1940
1960
1941 node = node or self.hash(rawtext, p1, p2)
1961 node = node or self.hash(rawtext, p1, p2)
1942 if node in self.nodemap:
1962 if node in self.nodemap:
1943 return node
1963 return node
1944
1964
1945 if validatehash:
1965 if validatehash:
1946 self.checkhash(rawtext, node, p1=p1, p2=p2)
1966 self.checkhash(rawtext, node, p1=p1, p2=p2)
1947
1967
1948 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1968 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1949 flags, cachedelta=cachedelta,
1969 flags, cachedelta=cachedelta,
1950 deltacomputer=deltacomputer)
1970 deltacomputer=deltacomputer)
1951
1971
1952 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1972 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1953 cachedelta=None, deltacomputer=None):
1973 cachedelta=None, deltacomputer=None):
1954 """add a raw revision with known flags, node and parents
1974 """add a raw revision with known flags, node and parents
1955 useful when reusing a revision not stored in this revlog (ex: received
1975 useful when reusing a revision not stored in this revlog (ex: received
1956 over wire, or read from an external bundle).
1976 over wire, or read from an external bundle).
1957 """
1977 """
1958 dfh = None
1978 dfh = None
1959 if not self._inline:
1979 if not self._inline:
1960 dfh = self._datafp("a+")
1980 dfh = self._datafp("a+")
1961 ifh = self._indexfp("a+")
1981 ifh = self._indexfp("a+")
1962 try:
1982 try:
1963 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1983 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1964 flags, cachedelta, ifh, dfh,
1984 flags, cachedelta, ifh, dfh,
1965 deltacomputer=deltacomputer)
1985 deltacomputer=deltacomputer)
1966 finally:
1986 finally:
1967 if dfh:
1987 if dfh:
1968 dfh.close()
1988 dfh.close()
1969 ifh.close()
1989 ifh.close()
1970
1990
1971 def compress(self, data):
1991 def compress(self, data):
1972 """Generate a possibly-compressed representation of data."""
1992 """Generate a possibly-compressed representation of data."""
1973 if not data:
1993 if not data:
1974 return '', data
1994 return '', data
1975
1995
1976 compressed = self._compressor.compress(data)
1996 compressed = self._compressor.compress(data)
1977
1997
1978 if compressed:
1998 if compressed:
1979 # The revlog compressor added the header in the returned data.
1999 # The revlog compressor added the header in the returned data.
1980 return '', compressed
2000 return '', compressed
1981
2001
1982 if data[0:1] == '\0':
2002 if data[0:1] == '\0':
1983 return '', data
2003 return '', data
1984 return 'u', data
2004 return 'u', data
1985
2005
1986 def decompress(self, data):
2006 def decompress(self, data):
1987 """Decompress a revlog chunk.
2007 """Decompress a revlog chunk.
1988
2008
1989 The chunk is expected to begin with a header identifying the
2009 The chunk is expected to begin with a header identifying the
1990 format type so it can be routed to an appropriate decompressor.
2010 format type so it can be routed to an appropriate decompressor.
1991 """
2011 """
1992 if not data:
2012 if not data:
1993 return data
2013 return data
1994
2014
1995 # Revlogs are read much more frequently than they are written and many
2015 # Revlogs are read much more frequently than they are written and many
1996 # chunks only take microseconds to decompress, so performance is
2016 # chunks only take microseconds to decompress, so performance is
1997 # important here.
2017 # important here.
1998 #
2018 #
1999 # We can make a few assumptions about revlogs:
2019 # We can make a few assumptions about revlogs:
2000 #
2020 #
2001 # 1) the majority of chunks will be compressed (as opposed to inline
2021 # 1) the majority of chunks will be compressed (as opposed to inline
2002 # raw data).
2022 # raw data).
2003 # 2) decompressing *any* data will likely by at least 10x slower than
2023 # 2) decompressing *any* data will likely by at least 10x slower than
2004 # returning raw inline data.
2024 # returning raw inline data.
2005 # 3) we want to prioritize common and officially supported compression
2025 # 3) we want to prioritize common and officially supported compression
2006 # engines
2026 # engines
2007 #
2027 #
2008 # It follows that we want to optimize for "decompress compressed data
2028 # It follows that we want to optimize for "decompress compressed data
2009 # when encoded with common and officially supported compression engines"
2029 # when encoded with common and officially supported compression engines"
2010 # case over "raw data" and "data encoded by less common or non-official
2030 # case over "raw data" and "data encoded by less common or non-official
2011 # compression engines." That is why we have the inline lookup first
2031 # compression engines." That is why we have the inline lookup first
2012 # followed by the compengines lookup.
2032 # followed by the compengines lookup.
2013 #
2033 #
2014 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2034 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2015 # compressed chunks. And this matters for changelog and manifest reads.
2035 # compressed chunks. And this matters for changelog and manifest reads.
2016 t = data[0:1]
2036 t = data[0:1]
2017
2037
2018 if t == 'x':
2038 if t == 'x':
2019 try:
2039 try:
2020 return _zlibdecompress(data)
2040 return _zlibdecompress(data)
2021 except zlib.error as e:
2041 except zlib.error as e:
2022 raise RevlogError(_('revlog decompress error: %s') %
2042 raise RevlogError(_('revlog decompress error: %s') %
2023 stringutil.forcebytestr(e))
2043 stringutil.forcebytestr(e))
2024 # '\0' is more common than 'u' so it goes first.
2044 # '\0' is more common than 'u' so it goes first.
2025 elif t == '\0':
2045 elif t == '\0':
2026 return data
2046 return data
2027 elif t == 'u':
2047 elif t == 'u':
2028 return util.buffer(data, 1)
2048 return util.buffer(data, 1)
2029
2049
2030 try:
2050 try:
2031 compressor = self._decompressors[t]
2051 compressor = self._decompressors[t]
2032 except KeyError:
2052 except KeyError:
2033 try:
2053 try:
2034 engine = util.compengines.forrevlogheader(t)
2054 engine = util.compengines.forrevlogheader(t)
2035 compressor = engine.revlogcompressor()
2055 compressor = engine.revlogcompressor()
2036 self._decompressors[t] = compressor
2056 self._decompressors[t] = compressor
2037 except KeyError:
2057 except KeyError:
2038 raise RevlogError(_('unknown compression type %r') % t)
2058 raise RevlogError(_('unknown compression type %r') % t)
2039
2059
2040 return compressor.decompress(data)
2060 return compressor.decompress(data)
2041
2061
2042 def _isgooddeltainfo(self, d, textlen):
2062 def _isgooddeltainfo(self, d, textlen):
2043 """Returns True if the given delta is good. Good means that it is within
2063 """Returns True if the given delta is good. Good means that it is within
2044 the disk span, disk size, and chain length bounds that we know to be
2064 the disk span, disk size, and chain length bounds that we know to be
2045 performant."""
2065 performant."""
2046 if d is None:
2066 if d is None:
2047 return False
2067 return False
2048
2068
2049 # - 'd.distance' is the distance from the base revision -- bounding it
2069 # - 'd.distance' is the distance from the base revision -- bounding it
2050 # limits the amount of I/O we need to do.
2070 # limits the amount of I/O we need to do.
2051 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2071 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2052 # need to apply -- bounding it limits the amount of CPU we consume.
2072 # need to apply -- bounding it limits the amount of CPU we consume.
2053
2073
2054 defaultmax = textlen * 4
2074 defaultmax = textlen * 4
2055 maxdist = self._maxdeltachainspan
2075 maxdist = self._maxdeltachainspan
2056 if not maxdist:
2076 if not maxdist:
2057 maxdist = d.distance # ensure the conditional pass
2077 maxdist = d.distance # ensure the conditional pass
2058 maxdist = max(maxdist, defaultmax)
2078 maxdist = max(maxdist, defaultmax)
2059 if (d.distance > maxdist or d.deltalen > textlen or
2079 if (d.distance > maxdist or d.deltalen > textlen or
2060 d.compresseddeltalen > textlen * 2 or
2080 d.compresseddeltalen > textlen * 2 or
2061 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2081 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2062 return False
2082 return False
2063
2083
2064 return True
2084 return True
2065
2085
2066 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2086 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2067 cachedelta, ifh, dfh, alwayscache=False,
2087 cachedelta, ifh, dfh, alwayscache=False,
2068 deltacomputer=None):
2088 deltacomputer=None):
2069 """internal function to add revisions to the log
2089 """internal function to add revisions to the log
2070
2090
2071 see addrevision for argument descriptions.
2091 see addrevision for argument descriptions.
2072
2092
2073 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2093 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2074
2094
2075 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2095 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2076 be used.
2096 be used.
2077
2097
2078 invariants:
2098 invariants:
2079 - rawtext is optional (can be None); if not set, cachedelta must be set.
2099 - rawtext is optional (can be None); if not set, cachedelta must be set.
2080 if both are set, they must correspond to each other.
2100 if both are set, they must correspond to each other.
2081 """
2101 """
2082 if node == nullid:
2102 if node == nullid:
2083 raise RevlogError(_("%s: attempt to add null revision") %
2103 raise RevlogError(_("%s: attempt to add null revision") %
2084 (self.indexfile))
2104 (self.indexfile))
2085 if node == wdirid:
2105 if node == wdirid:
2086 raise RevlogError(_("%s: attempt to add wdir revision") %
2106 raise RevlogError(_("%s: attempt to add wdir revision") %
2087 (self.indexfile))
2107 (self.indexfile))
2088
2108
2089 if self._inline:
2109 if self._inline:
2090 fh = ifh
2110 fh = ifh
2091 else:
2111 else:
2092 fh = dfh
2112 fh = dfh
2093
2113
2094 btext = [rawtext]
2114 btext = [rawtext]
2095
2115
2096 curr = len(self)
2116 curr = len(self)
2097 prev = curr - 1
2117 prev = curr - 1
2098 offset = self.end(prev)
2118 offset = self.end(prev)
2099 p1r, p2r = self.rev(p1), self.rev(p2)
2119 p1r, p2r = self.rev(p1), self.rev(p2)
2100
2120
2101 # full versions are inserted when the needed deltas
2121 # full versions are inserted when the needed deltas
2102 # become comparable to the uncompressed text
2122 # become comparable to the uncompressed text
2103 if rawtext is None:
2123 if rawtext is None:
2104 # need rawtext size, before changed by flag processors, which is
2124 # need rawtext size, before changed by flag processors, which is
2105 # the non-raw size. use revlog explicitly to avoid filelog's extra
2125 # the non-raw size. use revlog explicitly to avoid filelog's extra
2106 # logic that might remove metadata size.
2126 # logic that might remove metadata size.
2107 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2127 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2108 cachedelta[1])
2128 cachedelta[1])
2109 else:
2129 else:
2110 textlen = len(rawtext)
2130 textlen = len(rawtext)
2111
2131
2112 if deltacomputer is None:
2132 if deltacomputer is None:
2113 deltacomputer = _deltacomputer(self)
2133 deltacomputer = _deltacomputer(self)
2114
2134
2115 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2135 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2116
2136
2117 # no delta for flag processor revision (see "candelta" for why)
2137 # no delta for flag processor revision (see "candelta" for why)
2118 # not calling candelta since only one revision needs test, also to
2138 # not calling candelta since only one revision needs test, also to
2119 # avoid overhead fetching flags again.
2139 # avoid overhead fetching flags again.
2120 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2140 if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
2121 deltainfo = None
2141 deltainfo = None
2122 else:
2142 else:
2123 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2143 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2124
2144
2125 if deltainfo is not None:
2145 if deltainfo is not None:
2126 base = deltainfo.base
2146 base = deltainfo.base
2127 chainbase = deltainfo.chainbase
2147 chainbase = deltainfo.chainbase
2128 data = deltainfo.data
2148 data = deltainfo.data
2129 l = deltainfo.deltalen
2149 l = deltainfo.deltalen
2130 else:
2150 else:
2131 rawtext = deltacomputer.buildtext(revinfo, fh)
2151 rawtext = deltacomputer.buildtext(revinfo, fh)
2132 data = self.compress(rawtext)
2152 data = self.compress(rawtext)
2133 l = len(data[1]) + len(data[0])
2153 l = len(data[1]) + len(data[0])
2134 base = chainbase = curr
2154 base = chainbase = curr
2135
2155
2136 e = (offset_type(offset, flags), l, textlen,
2156 e = (offset_type(offset, flags), l, textlen,
2137 base, link, p1r, p2r, node)
2157 base, link, p1r, p2r, node)
2138 self.index.insert(-1, e)
2158 self.index.insert(-1, e)
2139 self.nodemap[node] = curr
2159 self.nodemap[node] = curr
2140
2160
2141 entry = self._io.packentry(e, self.node, self.version, curr)
2161 entry = self._io.packentry(e, self.node, self.version, curr)
2142 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2162 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2143
2163
2144 if alwayscache and rawtext is None:
2164 if alwayscache and rawtext is None:
2145 rawtext = deltacomputer._buildtext(revinfo, fh)
2165 rawtext = deltacomputer._buildtext(revinfo, fh)
2146
2166
2147 if type(rawtext) == bytes: # only accept immutable objects
2167 if type(rawtext) == bytes: # only accept immutable objects
2148 self._cache = (node, curr, rawtext)
2168 self._cache = (node, curr, rawtext)
2149 self._chainbasecache[curr] = chainbase
2169 self._chainbasecache[curr] = chainbase
2150 return node
2170 return node
2151
2171
2152 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2172 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2153 # Files opened in a+ mode have inconsistent behavior on various
2173 # Files opened in a+ mode have inconsistent behavior on various
2154 # platforms. Windows requires that a file positioning call be made
2174 # platforms. Windows requires that a file positioning call be made
2155 # when the file handle transitions between reads and writes. See
2175 # when the file handle transitions between reads and writes. See
2156 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2176 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2157 # platforms, Python or the platform itself can be buggy. Some versions
2177 # platforms, Python or the platform itself can be buggy. Some versions
2158 # of Solaris have been observed to not append at the end of the file
2178 # of Solaris have been observed to not append at the end of the file
2159 # if the file was seeked to before the end. See issue4943 for more.
2179 # if the file was seeked to before the end. See issue4943 for more.
2160 #
2180 #
2161 # We work around this issue by inserting a seek() before writing.
2181 # We work around this issue by inserting a seek() before writing.
2162 # Note: This is likely not necessary on Python 3.
2182 # Note: This is likely not necessary on Python 3.
2163 ifh.seek(0, os.SEEK_END)
2183 ifh.seek(0, os.SEEK_END)
2164 if dfh:
2184 if dfh:
2165 dfh.seek(0, os.SEEK_END)
2185 dfh.seek(0, os.SEEK_END)
2166
2186
2167 curr = len(self) - 1
2187 curr = len(self) - 1
2168 if not self._inline:
2188 if not self._inline:
2169 transaction.add(self.datafile, offset)
2189 transaction.add(self.datafile, offset)
2170 transaction.add(self.indexfile, curr * len(entry))
2190 transaction.add(self.indexfile, curr * len(entry))
2171 if data[0]:
2191 if data[0]:
2172 dfh.write(data[0])
2192 dfh.write(data[0])
2173 dfh.write(data[1])
2193 dfh.write(data[1])
2174 ifh.write(entry)
2194 ifh.write(entry)
2175 else:
2195 else:
2176 offset += curr * self._io.size
2196 offset += curr * self._io.size
2177 transaction.add(self.indexfile, offset, curr)
2197 transaction.add(self.indexfile, offset, curr)
2178 ifh.write(entry)
2198 ifh.write(entry)
2179 ifh.write(data[0])
2199 ifh.write(data[0])
2180 ifh.write(data[1])
2200 ifh.write(data[1])
2181 self._enforceinlinesize(transaction, ifh)
2201 self._enforceinlinesize(transaction, ifh)
2182
2202
2183 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2203 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2184 """
2204 """
2185 add a delta group
2205 add a delta group
2186
2206
2187 given a set of deltas, add them to the revision log. the
2207 given a set of deltas, add them to the revision log. the
2188 first delta is against its parent, which should be in our
2208 first delta is against its parent, which should be in our
2189 log, the rest are against the previous delta.
2209 log, the rest are against the previous delta.
2190
2210
2191 If ``addrevisioncb`` is defined, it will be called with arguments of
2211 If ``addrevisioncb`` is defined, it will be called with arguments of
2192 this revlog and the node that was added.
2212 this revlog and the node that was added.
2193 """
2213 """
2194
2214
2195 nodes = []
2215 nodes = []
2196
2216
2197 r = len(self)
2217 r = len(self)
2198 end = 0
2218 end = 0
2199 if r:
2219 if r:
2200 end = self.end(r - 1)
2220 end = self.end(r - 1)
2201 ifh = self._indexfp("a+")
2221 ifh = self._indexfp("a+")
2202 isize = r * self._io.size
2222 isize = r * self._io.size
2203 if self._inline:
2223 if self._inline:
2204 transaction.add(self.indexfile, end + isize, r)
2224 transaction.add(self.indexfile, end + isize, r)
2205 dfh = None
2225 dfh = None
2206 else:
2226 else:
2207 transaction.add(self.indexfile, isize, r)
2227 transaction.add(self.indexfile, isize, r)
2208 transaction.add(self.datafile, end)
2228 transaction.add(self.datafile, end)
2209 dfh = self._datafp("a+")
2229 dfh = self._datafp("a+")
2210 def flush():
2230 def flush():
2211 if dfh:
2231 if dfh:
2212 dfh.flush()
2232 dfh.flush()
2213 ifh.flush()
2233 ifh.flush()
2214 try:
2234 try:
2215 deltacomputer = _deltacomputer(self)
2235 deltacomputer = _deltacomputer(self)
2216 # loop through our set of deltas
2236 # loop through our set of deltas
2217 for data in deltas:
2237 for data in deltas:
2218 node, p1, p2, linknode, deltabase, delta, flags = data
2238 node, p1, p2, linknode, deltabase, delta, flags = data
2219 link = linkmapper(linknode)
2239 link = linkmapper(linknode)
2220 flags = flags or REVIDX_DEFAULT_FLAGS
2240 flags = flags or REVIDX_DEFAULT_FLAGS
2221
2241
2222 nodes.append(node)
2242 nodes.append(node)
2223
2243
2224 if node in self.nodemap:
2244 if node in self.nodemap:
2225 # this can happen if two branches make the same change
2245 # this can happen if two branches make the same change
2226 continue
2246 continue
2227
2247
2228 for p in (p1, p2):
2248 for p in (p1, p2):
2229 if p not in self.nodemap:
2249 if p not in self.nodemap:
2230 raise LookupError(p, self.indexfile,
2250 raise LookupError(p, self.indexfile,
2231 _('unknown parent'))
2251 _('unknown parent'))
2232
2252
2233 if deltabase not in self.nodemap:
2253 if deltabase not in self.nodemap:
2234 raise LookupError(deltabase, self.indexfile,
2254 raise LookupError(deltabase, self.indexfile,
2235 _('unknown delta base'))
2255 _('unknown delta base'))
2236
2256
2237 baserev = self.rev(deltabase)
2257 baserev = self.rev(deltabase)
2238
2258
2239 if baserev != nullrev and self.iscensored(baserev):
2259 if baserev != nullrev and self.iscensored(baserev):
2240 # if base is censored, delta must be full replacement in a
2260 # if base is censored, delta must be full replacement in a
2241 # single patch operation
2261 # single patch operation
2242 hlen = struct.calcsize(">lll")
2262 hlen = struct.calcsize(">lll")
2243 oldlen = self.rawsize(baserev)
2263 oldlen = self.rawsize(baserev)
2244 newlen = len(delta) - hlen
2264 newlen = len(delta) - hlen
2245 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2265 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2246 raise error.CensoredBaseError(self.indexfile,
2266 raise error.CensoredBaseError(self.indexfile,
2247 self.node(baserev))
2267 self.node(baserev))
2248
2268
2249 if not flags and self._peek_iscensored(baserev, delta, flush):
2269 if not flags and self._peek_iscensored(baserev, delta, flush):
2250 flags |= REVIDX_ISCENSORED
2270 flags |= REVIDX_ISCENSORED
2251
2271
2252 # We assume consumers of addrevisioncb will want to retrieve
2272 # We assume consumers of addrevisioncb will want to retrieve
2253 # the added revision, which will require a call to
2273 # the added revision, which will require a call to
2254 # revision(). revision() will fast path if there is a cache
2274 # revision(). revision() will fast path if there is a cache
2255 # hit. So, we tell _addrevision() to always cache in this case.
2275 # hit. So, we tell _addrevision() to always cache in this case.
2256 # We're only using addgroup() in the context of changegroup
2276 # We're only using addgroup() in the context of changegroup
2257 # generation so the revision data can always be handled as raw
2277 # generation so the revision data can always be handled as raw
2258 # by the flagprocessor.
2278 # by the flagprocessor.
2259 self._addrevision(node, None, transaction, link,
2279 self._addrevision(node, None, transaction, link,
2260 p1, p2, flags, (baserev, delta),
2280 p1, p2, flags, (baserev, delta),
2261 ifh, dfh,
2281 ifh, dfh,
2262 alwayscache=bool(addrevisioncb),
2282 alwayscache=bool(addrevisioncb),
2263 deltacomputer=deltacomputer)
2283 deltacomputer=deltacomputer)
2264
2284
2265 if addrevisioncb:
2285 if addrevisioncb:
2266 addrevisioncb(self, node)
2286 addrevisioncb(self, node)
2267
2287
2268 if not dfh and not self._inline:
2288 if not dfh and not self._inline:
2269 # addrevision switched from inline to conventional
2289 # addrevision switched from inline to conventional
2270 # reopen the index
2290 # reopen the index
2271 ifh.close()
2291 ifh.close()
2272 dfh = self._datafp("a+")
2292 dfh = self._datafp("a+")
2273 ifh = self._indexfp("a+")
2293 ifh = self._indexfp("a+")
2274 finally:
2294 finally:
2275 if dfh:
2295 if dfh:
2276 dfh.close()
2296 dfh.close()
2277 ifh.close()
2297 ifh.close()
2278
2298
2279 return nodes
2299 return nodes
2280
2300
2281 def iscensored(self, rev):
2301 def iscensored(self, rev):
2282 """Check if a file revision is censored."""
2302 """Check if a file revision is censored."""
2283 return False
2303 return False
2284
2304
2285 def _peek_iscensored(self, baserev, delta, flush):
2305 def _peek_iscensored(self, baserev, delta, flush):
2286 """Quickly check if a delta produces a censored revision."""
2306 """Quickly check if a delta produces a censored revision."""
2287 return False
2307 return False
2288
2308
2289 def getstrippoint(self, minlink):
2309 def getstrippoint(self, minlink):
2290 """find the minimum rev that must be stripped to strip the linkrev
2310 """find the minimum rev that must be stripped to strip the linkrev
2291
2311
2292 Returns a tuple containing the minimum rev and a set of all revs that
2312 Returns a tuple containing the minimum rev and a set of all revs that
2293 have linkrevs that will be broken by this strip.
2313 have linkrevs that will be broken by this strip.
2294 """
2314 """
2295 brokenrevs = set()
2315 brokenrevs = set()
2296 strippoint = len(self)
2316 strippoint = len(self)
2297
2317
2298 heads = {}
2318 heads = {}
2299 futurelargelinkrevs = set()
2319 futurelargelinkrevs = set()
2300 for head in self.headrevs():
2320 for head in self.headrevs():
2301 headlinkrev = self.linkrev(head)
2321 headlinkrev = self.linkrev(head)
2302 heads[head] = headlinkrev
2322 heads[head] = headlinkrev
2303 if headlinkrev >= minlink:
2323 if headlinkrev >= minlink:
2304 futurelargelinkrevs.add(headlinkrev)
2324 futurelargelinkrevs.add(headlinkrev)
2305
2325
2306 # This algorithm involves walking down the rev graph, starting at the
2326 # This algorithm involves walking down the rev graph, starting at the
2307 # heads. Since the revs are topologically sorted according to linkrev,
2327 # heads. Since the revs are topologically sorted according to linkrev,
2308 # once all head linkrevs are below the minlink, we know there are
2328 # once all head linkrevs are below the minlink, we know there are
2309 # no more revs that could have a linkrev greater than minlink.
2329 # no more revs that could have a linkrev greater than minlink.
2310 # So we can stop walking.
2330 # So we can stop walking.
2311 while futurelargelinkrevs:
2331 while futurelargelinkrevs:
2312 strippoint -= 1
2332 strippoint -= 1
2313 linkrev = heads.pop(strippoint)
2333 linkrev = heads.pop(strippoint)
2314
2334
2315 if linkrev < minlink:
2335 if linkrev < minlink:
2316 brokenrevs.add(strippoint)
2336 brokenrevs.add(strippoint)
2317 else:
2337 else:
2318 futurelargelinkrevs.remove(linkrev)
2338 futurelargelinkrevs.remove(linkrev)
2319
2339
2320 for p in self.parentrevs(strippoint):
2340 for p in self.parentrevs(strippoint):
2321 if p != nullrev:
2341 if p != nullrev:
2322 plinkrev = self.linkrev(p)
2342 plinkrev = self.linkrev(p)
2323 heads[p] = plinkrev
2343 heads[p] = plinkrev
2324 if plinkrev >= minlink:
2344 if plinkrev >= minlink:
2325 futurelargelinkrevs.add(plinkrev)
2345 futurelargelinkrevs.add(plinkrev)
2326
2346
2327 return strippoint, brokenrevs
2347 return strippoint, brokenrevs
2328
2348
2329 def strip(self, minlink, transaction):
2349 def strip(self, minlink, transaction):
2330 """truncate the revlog on the first revision with a linkrev >= minlink
2350 """truncate the revlog on the first revision with a linkrev >= minlink
2331
2351
2332 This function is called when we're stripping revision minlink and
2352 This function is called when we're stripping revision minlink and
2333 its descendants from the repository.
2353 its descendants from the repository.
2334
2354
2335 We have to remove all revisions with linkrev >= minlink, because
2355 We have to remove all revisions with linkrev >= minlink, because
2336 the equivalent changelog revisions will be renumbered after the
2356 the equivalent changelog revisions will be renumbered after the
2337 strip.
2357 strip.
2338
2358
2339 So we truncate the revlog on the first of these revisions, and
2359 So we truncate the revlog on the first of these revisions, and
2340 trust that the caller has saved the revisions that shouldn't be
2360 trust that the caller has saved the revisions that shouldn't be
2341 removed and that it'll re-add them after this truncation.
2361 removed and that it'll re-add them after this truncation.
2342 """
2362 """
2343 if len(self) == 0:
2363 if len(self) == 0:
2344 return
2364 return
2345
2365
2346 rev, _ = self.getstrippoint(minlink)
2366 rev, _ = self.getstrippoint(minlink)
2347 if rev == len(self):
2367 if rev == len(self):
2348 return
2368 return
2349
2369
2350 # first truncate the files on disk
2370 # first truncate the files on disk
2351 end = self.start(rev)
2371 end = self.start(rev)
2352 if not self._inline:
2372 if not self._inline:
2353 transaction.add(self.datafile, end)
2373 transaction.add(self.datafile, end)
2354 end = rev * self._io.size
2374 end = rev * self._io.size
2355 else:
2375 else:
2356 end += rev * self._io.size
2376 end += rev * self._io.size
2357
2377
2358 transaction.add(self.indexfile, end)
2378 transaction.add(self.indexfile, end)
2359
2379
2360 # then reset internal state in memory to forget those revisions
2380 # then reset internal state in memory to forget those revisions
2361 self._cache = None
2381 self._cache = None
2362 self._chaininfocache = {}
2382 self._chaininfocache = {}
2363 self._chunkclear()
2383 self._chunkclear()
2364 for x in xrange(rev, len(self)):
2384 for x in xrange(rev, len(self)):
2365 del self.nodemap[self.node(x)]
2385 del self.nodemap[self.node(x)]
2366
2386
2367 del self.index[rev:-1]
2387 del self.index[rev:-1]
2368
2388
2369 def checksize(self):
2389 def checksize(self):
2370 expected = 0
2390 expected = 0
2371 if len(self):
2391 if len(self):
2372 expected = max(0, self.end(len(self) - 1))
2392 expected = max(0, self.end(len(self) - 1))
2373
2393
2374 try:
2394 try:
2375 with self._datafp() as f:
2395 with self._datafp() as f:
2376 f.seek(0, 2)
2396 f.seek(0, 2)
2377 actual = f.tell()
2397 actual = f.tell()
2378 dd = actual - expected
2398 dd = actual - expected
2379 except IOError as inst:
2399 except IOError as inst:
2380 if inst.errno != errno.ENOENT:
2400 if inst.errno != errno.ENOENT:
2381 raise
2401 raise
2382 dd = 0
2402 dd = 0
2383
2403
2384 try:
2404 try:
2385 f = self.opener(self.indexfile)
2405 f = self.opener(self.indexfile)
2386 f.seek(0, 2)
2406 f.seek(0, 2)
2387 actual = f.tell()
2407 actual = f.tell()
2388 f.close()
2408 f.close()
2389 s = self._io.size
2409 s = self._io.size
2390 i = max(0, actual // s)
2410 i = max(0, actual // s)
2391 di = actual - (i * s)
2411 di = actual - (i * s)
2392 if self._inline:
2412 if self._inline:
2393 databytes = 0
2413 databytes = 0
2394 for r in self:
2414 for r in self:
2395 databytes += max(0, self.length(r))
2415 databytes += max(0, self.length(r))
2396 dd = 0
2416 dd = 0
2397 di = actual - len(self) * s - databytes
2417 di = actual - len(self) * s - databytes
2398 except IOError as inst:
2418 except IOError as inst:
2399 if inst.errno != errno.ENOENT:
2419 if inst.errno != errno.ENOENT:
2400 raise
2420 raise
2401 di = 0
2421 di = 0
2402
2422
2403 return (dd, di)
2423 return (dd, di)
2404
2424
2405 def files(self):
2425 def files(self):
2406 res = [self.indexfile]
2426 res = [self.indexfile]
2407 if not self._inline:
2427 if not self._inline:
2408 res.append(self.datafile)
2428 res.append(self.datafile)
2409 return res
2429 return res
2410
2430
2411 DELTAREUSEALWAYS = 'always'
2431 DELTAREUSEALWAYS = 'always'
2412 DELTAREUSESAMEREVS = 'samerevs'
2432 DELTAREUSESAMEREVS = 'samerevs'
2413 DELTAREUSENEVER = 'never'
2433 DELTAREUSENEVER = 'never'
2414
2434
2415 DELTAREUSEFULLADD = 'fulladd'
2435 DELTAREUSEFULLADD = 'fulladd'
2416
2436
2417 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2437 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2418
2438
2419 def clone(self, tr, destrevlog, addrevisioncb=None,
2439 def clone(self, tr, destrevlog, addrevisioncb=None,
2420 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2440 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2421 """Copy this revlog to another, possibly with format changes.
2441 """Copy this revlog to another, possibly with format changes.
2422
2442
2423 The destination revlog will contain the same revisions and nodes.
2443 The destination revlog will contain the same revisions and nodes.
2424 However, it may not be bit-for-bit identical due to e.g. delta encoding
2444 However, it may not be bit-for-bit identical due to e.g. delta encoding
2425 differences.
2445 differences.
2426
2446
2427 The ``deltareuse`` argument control how deltas from the existing revlog
2447 The ``deltareuse`` argument control how deltas from the existing revlog
2428 are preserved in the destination revlog. The argument can have the
2448 are preserved in the destination revlog. The argument can have the
2429 following values:
2449 following values:
2430
2450
2431 DELTAREUSEALWAYS
2451 DELTAREUSEALWAYS
2432 Deltas will always be reused (if possible), even if the destination
2452 Deltas will always be reused (if possible), even if the destination
2433 revlog would not select the same revisions for the delta. This is the
2453 revlog would not select the same revisions for the delta. This is the
2434 fastest mode of operation.
2454 fastest mode of operation.
2435 DELTAREUSESAMEREVS
2455 DELTAREUSESAMEREVS
2436 Deltas will be reused if the destination revlog would pick the same
2456 Deltas will be reused if the destination revlog would pick the same
2437 revisions for the delta. This mode strikes a balance between speed
2457 revisions for the delta. This mode strikes a balance between speed
2438 and optimization.
2458 and optimization.
2439 DELTAREUSENEVER
2459 DELTAREUSENEVER
2440 Deltas will never be reused. This is the slowest mode of execution.
2460 Deltas will never be reused. This is the slowest mode of execution.
2441 This mode can be used to recompute deltas (e.g. if the diff/delta
2461 This mode can be used to recompute deltas (e.g. if the diff/delta
2442 algorithm changes).
2462 algorithm changes).
2443
2463
2444 Delta computation can be slow, so the choice of delta reuse policy can
2464 Delta computation can be slow, so the choice of delta reuse policy can
2445 significantly affect run time.
2465 significantly affect run time.
2446
2466
2447 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2467 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2448 two extremes. Deltas will be reused if they are appropriate. But if the
2468 two extremes. Deltas will be reused if they are appropriate. But if the
2449 delta could choose a better revision, it will do so. This means if you
2469 delta could choose a better revision, it will do so. This means if you
2450 are converting a non-generaldelta revlog to a generaldelta revlog,
2470 are converting a non-generaldelta revlog to a generaldelta revlog,
2451 deltas will be recomputed if the delta's parent isn't a parent of the
2471 deltas will be recomputed if the delta's parent isn't a parent of the
2452 revision.
2472 revision.
2453
2473
2454 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2474 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2455 controls whether to compute deltas against both parents for merges.
2475 controls whether to compute deltas against both parents for merges.
2456 By default, the current default is used.
2476 By default, the current default is used.
2457 """
2477 """
2458 if deltareuse not in self.DELTAREUSEALL:
2478 if deltareuse not in self.DELTAREUSEALL:
2459 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2479 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2460
2480
2461 if len(destrevlog):
2481 if len(destrevlog):
2462 raise ValueError(_('destination revlog is not empty'))
2482 raise ValueError(_('destination revlog is not empty'))
2463
2483
2464 if getattr(self, 'filteredrevs', None):
2484 if getattr(self, 'filteredrevs', None):
2465 raise ValueError(_('source revlog has filtered revisions'))
2485 raise ValueError(_('source revlog has filtered revisions'))
2466 if getattr(destrevlog, 'filteredrevs', None):
2486 if getattr(destrevlog, 'filteredrevs', None):
2467 raise ValueError(_('destination revlog has filtered revisions'))
2487 raise ValueError(_('destination revlog has filtered revisions'))
2468
2488
2469 # lazydeltabase controls whether to reuse a cached delta, if possible.
2489 # lazydeltabase controls whether to reuse a cached delta, if possible.
2470 oldlazydeltabase = destrevlog._lazydeltabase
2490 oldlazydeltabase = destrevlog._lazydeltabase
2471 oldamd = destrevlog._aggressivemergedeltas
2491 oldamd = destrevlog._aggressivemergedeltas
2472
2492
2473 try:
2493 try:
2474 if deltareuse == self.DELTAREUSEALWAYS:
2494 if deltareuse == self.DELTAREUSEALWAYS:
2475 destrevlog._lazydeltabase = True
2495 destrevlog._lazydeltabase = True
2476 elif deltareuse == self.DELTAREUSESAMEREVS:
2496 elif deltareuse == self.DELTAREUSESAMEREVS:
2477 destrevlog._lazydeltabase = False
2497 destrevlog._lazydeltabase = False
2478
2498
2479 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2499 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2480
2500
2481 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2501 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2482 self.DELTAREUSESAMEREVS)
2502 self.DELTAREUSESAMEREVS)
2483
2503
2484 deltacomputer = _deltacomputer(destrevlog)
2504 deltacomputer = _deltacomputer(destrevlog)
2485 index = self.index
2505 index = self.index
2486 for rev in self:
2506 for rev in self:
2487 entry = index[rev]
2507 entry = index[rev]
2488
2508
2489 # Some classes override linkrev to take filtered revs into
2509 # Some classes override linkrev to take filtered revs into
2490 # account. Use raw entry from index.
2510 # account. Use raw entry from index.
2491 flags = entry[0] & 0xffff
2511 flags = entry[0] & 0xffff
2492 linkrev = entry[4]
2512 linkrev = entry[4]
2493 p1 = index[entry[5]][7]
2513 p1 = index[entry[5]][7]
2494 p2 = index[entry[6]][7]
2514 p2 = index[entry[6]][7]
2495 node = entry[7]
2515 node = entry[7]
2496
2516
2497 # (Possibly) reuse the delta from the revlog if allowed and
2517 # (Possibly) reuse the delta from the revlog if allowed and
2498 # the revlog chunk is a delta.
2518 # the revlog chunk is a delta.
2499 cachedelta = None
2519 cachedelta = None
2500 rawtext = None
2520 rawtext = None
2501 if populatecachedelta:
2521 if populatecachedelta:
2502 dp = self.deltaparent(rev)
2522 dp = self.deltaparent(rev)
2503 if dp != nullrev:
2523 if dp != nullrev:
2504 cachedelta = (dp, bytes(self._chunk(rev)))
2524 cachedelta = (dp, bytes(self._chunk(rev)))
2505
2525
2506 if not cachedelta:
2526 if not cachedelta:
2507 rawtext = self.revision(rev, raw=True)
2527 rawtext = self.revision(rev, raw=True)
2508
2528
2509
2529
2510 if deltareuse == self.DELTAREUSEFULLADD:
2530 if deltareuse == self.DELTAREUSEFULLADD:
2511 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2531 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2512 cachedelta=cachedelta,
2532 cachedelta=cachedelta,
2513 node=node, flags=flags,
2533 node=node, flags=flags,
2514 deltacomputer=deltacomputer)
2534 deltacomputer=deltacomputer)
2515 else:
2535 else:
2516 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2536 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2517 checkambig=False)
2537 checkambig=False)
2518 dfh = None
2538 dfh = None
2519 if not destrevlog._inline:
2539 if not destrevlog._inline:
2520 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2540 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2521 try:
2541 try:
2522 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2542 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2523 p2, flags, cachedelta, ifh, dfh,
2543 p2, flags, cachedelta, ifh, dfh,
2524 deltacomputer=deltacomputer)
2544 deltacomputer=deltacomputer)
2525 finally:
2545 finally:
2526 if dfh:
2546 if dfh:
2527 dfh.close()
2547 dfh.close()
2528 ifh.close()
2548 ifh.close()
2529
2549
2530 if addrevisioncb:
2550 if addrevisioncb:
2531 addrevisioncb(self, rev, node)
2551 addrevisioncb(self, rev, node)
2532 finally:
2552 finally:
2533 destrevlog._lazydeltabase = oldlazydeltabase
2553 destrevlog._lazydeltabase = oldlazydeltabase
2534 destrevlog._aggressivemergedeltas = oldamd
2554 destrevlog._aggressivemergedeltas = oldamd
@@ -1,690 +1,689 b''
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # To use this with the test suite:
8 # To use this with the test suite:
9 #
9 #
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12
12
13 from __future__ import absolute_import
13 from __future__ import absolute_import
14
14
15 import stat
15 import stat
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial.node import (
18 from mercurial.node import (
19 bin,
19 bin,
20 hex,
20 hex,
21 nullid,
21 nullid,
22 nullrev,
22 nullrev,
23 )
23 )
24 from mercurial.thirdparty import (
24 from mercurial.thirdparty import (
25 cbor,
25 cbor,
26 )
26 )
27 from mercurial.thirdparty.zope import (
27 from mercurial.thirdparty.zope import (
28 interface as zi,
28 interface as zi,
29 )
29 )
30 from mercurial import (
30 from mercurial import (
31 ancestor,
31 ancestor,
32 bundlerepo,
32 bundlerepo,
33 error,
33 error,
34 extensions,
34 extensions,
35 filelog,
36 localrepo,
35 localrepo,
37 mdiff,
36 mdiff,
38 pycompat,
37 pycompat,
39 repository,
38 repository,
40 revlog,
39 revlog,
41 store,
40 store,
42 verify,
41 verify,
43 )
42 )
44
43
45 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
44 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
46 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
45 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
47 # be specifying the version(s) of Mercurial they are tested with, or
46 # be specifying the version(s) of Mercurial they are tested with, or
48 # leave the attribute unspecified.
47 # leave the attribute unspecified.
49 testedwith = 'ships-with-hg-core'
48 testedwith = 'ships-with-hg-core'
50
49
51 REQUIREMENT = 'testonly-simplestore'
50 REQUIREMENT = 'testonly-simplestore'
52
51
53 def validatenode(node):
52 def validatenode(node):
54 if isinstance(node, int):
53 if isinstance(node, int):
55 raise ValueError('expected node; got int')
54 raise ValueError('expected node; got int')
56
55
57 if len(node) != 20:
56 if len(node) != 20:
58 raise ValueError('expected 20 byte node')
57 raise ValueError('expected 20 byte node')
59
58
60 def validaterev(rev):
59 def validaterev(rev):
61 if not isinstance(rev, int):
60 if not isinstance(rev, int):
62 raise ValueError('expected int')
61 raise ValueError('expected int')
63
62
64 @zi.implementer(repository.ifilestorage)
63 @zi.implementer(repository.ifilestorage)
65 class filestorage(object):
64 class filestorage(object):
66 """Implements storage for a tracked path.
65 """Implements storage for a tracked path.
67
66
68 Data is stored in the VFS in a directory corresponding to the tracked
67 Data is stored in the VFS in a directory corresponding to the tracked
69 path.
68 path.
70
69
71 Index data is stored in an ``index`` file using CBOR.
70 Index data is stored in an ``index`` file using CBOR.
72
71
73 Fulltext data is stored in files having names of the node.
72 Fulltext data is stored in files having names of the node.
74 """
73 """
75
74
76 def __init__(self, svfs, path):
75 def __init__(self, svfs, path):
77 self._svfs = svfs
76 self._svfs = svfs
78 self._path = path
77 self._path = path
79
78
80 self._storepath = b'/'.join([b'data', path])
79 self._storepath = b'/'.join([b'data', path])
81 self._indexpath = b'/'.join([self._storepath, b'index'])
80 self._indexpath = b'/'.join([self._storepath, b'index'])
82
81
83 indexdata = self._svfs.tryread(self._indexpath)
82 indexdata = self._svfs.tryread(self._indexpath)
84 if indexdata:
83 if indexdata:
85 indexdata = cbor.loads(indexdata)
84 indexdata = cbor.loads(indexdata)
86
85
87 self._indexdata = indexdata or []
86 self._indexdata = indexdata or []
88 self._indexbynode = {}
87 self._indexbynode = {}
89 self._indexbyrev = {}
88 self._indexbyrev = {}
90 self.index = []
89 self.index = []
91 self._refreshindex()
90 self._refreshindex()
92
91
93 # This is used by changegroup code :/
92 # This is used by changegroup code :/
94 self._generaldelta = True
93 self._generaldelta = True
95 self.storedeltachains = False
94 self.storedeltachains = False
96
95
97 self.version = 1
96 self.version = 1
98
97
99 def _refreshindex(self):
98 def _refreshindex(self):
100 self._indexbynode.clear()
99 self._indexbynode.clear()
101 self._indexbyrev.clear()
100 self._indexbyrev.clear()
102 self.index = []
101 self.index = []
103
102
104 for i, entry in enumerate(self._indexdata):
103 for i, entry in enumerate(self._indexdata):
105 self._indexbynode[entry[b'node']] = entry
104 self._indexbynode[entry[b'node']] = entry
106 self._indexbyrev[i] = entry
105 self._indexbyrev[i] = entry
107
106
108 self._indexbynode[nullid] = {
107 self._indexbynode[nullid] = {
109 b'node': nullid,
108 b'node': nullid,
110 b'p1': nullid,
109 b'p1': nullid,
111 b'p2': nullid,
110 b'p2': nullid,
112 b'linkrev': nullrev,
111 b'linkrev': nullrev,
113 b'flags': 0,
112 b'flags': 0,
114 }
113 }
115
114
116 self._indexbyrev[nullrev] = {
115 self._indexbyrev[nullrev] = {
117 b'node': nullid,
116 b'node': nullid,
118 b'p1': nullid,
117 b'p1': nullid,
119 b'p2': nullid,
118 b'p2': nullid,
120 b'linkrev': nullrev,
119 b'linkrev': nullrev,
121 b'flags': 0,
120 b'flags': 0,
122 }
121 }
123
122
124 for i, entry in enumerate(self._indexdata):
123 for i, entry in enumerate(self._indexdata):
125 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
124 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
126
125
127 # start, length, rawsize, chainbase, linkrev, p1, p2, node
126 # start, length, rawsize, chainbase, linkrev, p1, p2, node
128 self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
127 self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
129 entry[b'node']))
128 entry[b'node']))
130
129
131 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
130 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
132
131
133 def __len__(self):
132 def __len__(self):
134 return len(self._indexdata)
133 return len(self._indexdata)
135
134
136 def __iter__(self):
135 def __iter__(self):
137 return iter(range(len(self)))
136 return iter(range(len(self)))
138
137
139 def revs(self, start=0, stop=None):
138 def revs(self, start=0, stop=None):
140 step = 1
139 step = 1
141 if stop is not None:
140 if stop is not None:
142 if start > stop:
141 if start > stop:
143 step = -1
142 step = -1
144
143
145 stop += step
144 stop += step
146 else:
145 else:
147 stop = len(self)
146 stop = len(self)
148
147
149 return range(start, stop, step)
148 return range(start, stop, step)
150
149
151 def parents(self, node):
150 def parents(self, node):
152 validatenode(node)
151 validatenode(node)
153
152
154 if node not in self._indexbynode:
153 if node not in self._indexbynode:
155 raise KeyError('unknown node')
154 raise KeyError('unknown node')
156
155
157 entry = self._indexbynode[node]
156 entry = self._indexbynode[node]
158
157
159 return entry[b'p1'], entry[b'p2']
158 return entry[b'p1'], entry[b'p2']
160
159
161 def parentrevs(self, rev):
160 def parentrevs(self, rev):
162 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
161 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
163 return self.rev(p1), self.rev(p2)
162 return self.rev(p1), self.rev(p2)
164
163
165 def rev(self, node):
164 def rev(self, node):
166 validatenode(node)
165 validatenode(node)
167
166
168 try:
167 try:
169 self._indexbynode[node]
168 self._indexbynode[node]
170 except KeyError:
169 except KeyError:
171 raise error.LookupError(node, self._indexpath, _('no node'))
170 raise error.LookupError(node, self._indexpath, _('no node'))
172
171
173 for rev, entry in self._indexbyrev.items():
172 for rev, entry in self._indexbyrev.items():
174 if entry[b'node'] == node:
173 if entry[b'node'] == node:
175 return rev
174 return rev
176
175
177 raise error.ProgrammingError('this should not occur')
176 raise error.ProgrammingError('this should not occur')
178
177
179 def node(self, rev):
178 def node(self, rev):
180 validaterev(rev)
179 validaterev(rev)
181
180
182 return self._indexbyrev[rev][b'node']
181 return self._indexbyrev[rev][b'node']
183
182
184 def lookup(self, node):
183 def lookup(self, node):
185 if isinstance(node, int):
184 if isinstance(node, int):
186 return self.node(node)
185 return self.node(node)
187
186
188 if len(node) == 20:
187 if len(node) == 20:
189 self.rev(node)
188 self.rev(node)
190 return node
189 return node
191
190
192 try:
191 try:
193 rev = int(node)
192 rev = int(node)
194 if '%d' % rev != node:
193 if '%d' % rev != node:
195 raise ValueError
194 raise ValueError
196
195
197 if rev < 0:
196 if rev < 0:
198 rev = len(self) + rev
197 rev = len(self) + rev
199 if rev < 0 or rev >= len(self):
198 if rev < 0 or rev >= len(self):
200 raise ValueError
199 raise ValueError
201
200
202 return self.node(rev)
201 return self.node(rev)
203 except (ValueError, OverflowError):
202 except (ValueError, OverflowError):
204 pass
203 pass
205
204
206 if len(node) == 40:
205 if len(node) == 40:
207 try:
206 try:
208 rawnode = bin(node)
207 rawnode = bin(node)
209 self.rev(rawnode)
208 self.rev(rawnode)
210 return rawnode
209 return rawnode
211 except TypeError:
210 except TypeError:
212 pass
211 pass
213
212
214 raise error.LookupError(node, self._path, _('invalid lookup input'))
213 raise error.LookupError(node, self._path, _('invalid lookup input'))
215
214
216 def linkrev(self, rev):
215 def linkrev(self, rev):
217 validaterev(rev)
216 validaterev(rev)
218
217
219 return self._indexbyrev[rev][b'linkrev']
218 return self._indexbyrev[rev][b'linkrev']
220
219
221 def flags(self, rev):
220 def flags(self, rev):
222 validaterev(rev)
221 validaterev(rev)
223
222
224 return self._indexbyrev[rev][b'flags']
223 return self._indexbyrev[rev][b'flags']
225
224
226 def deltaparent(self, rev):
225 def deltaparent(self, rev):
227 validaterev(rev)
226 validaterev(rev)
228
227
229 p1node = self.parents(self.node(rev))[0]
228 p1node = self.parents(self.node(rev))[0]
230 return self.rev(p1node)
229 return self.rev(p1node)
231
230
232 def candelta(self, baserev, rev):
231 def candelta(self, baserev, rev):
233 validaterev(baserev)
232 validaterev(baserev)
234 validaterev(rev)
233 validaterev(rev)
235
234
236 if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
235 if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
237 or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
236 or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
238 return False
237 return False
239
238
240 return True
239 return True
241
240
242 def rawsize(self, rev):
241 def rawsize(self, rev):
243 validaterev(rev)
242 validaterev(rev)
244 node = self.node(rev)
243 node = self.node(rev)
245 return len(self.revision(node, raw=True))
244 return len(self.revision(node, raw=True))
246
245
247 def _processflags(self, text, flags, operation, raw=False):
246 def _processflags(self, text, flags, operation, raw=False):
248 if flags == 0:
247 if flags == 0:
249 return text, True
248 return text, True
250
249
251 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
250 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
252 raise error.RevlogError(_("incompatible revision flag '%#x'") %
251 raise error.RevlogError(_("incompatible revision flag '%#x'") %
253 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
252 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
254
253
255 validatehash = True
254 validatehash = True
256 # Depending on the operation (read or write), the order might be
255 # Depending on the operation (read or write), the order might be
257 # reversed due to non-commutative transforms.
256 # reversed due to non-commutative transforms.
258 orderedflags = revlog.REVIDX_FLAGS_ORDER
257 orderedflags = revlog.REVIDX_FLAGS_ORDER
259 if operation == 'write':
258 if operation == 'write':
260 orderedflags = reversed(orderedflags)
259 orderedflags = reversed(orderedflags)
261
260
262 for flag in orderedflags:
261 for flag in orderedflags:
263 # If a flagprocessor has been registered for a known flag, apply the
262 # If a flagprocessor has been registered for a known flag, apply the
264 # related operation transform and update result tuple.
263 # related operation transform and update result tuple.
265 if flag & flags:
264 if flag & flags:
266 vhash = True
265 vhash = True
267
266
268 if flag not in revlog._flagprocessors:
267 if flag not in revlog._flagprocessors:
269 message = _("missing processor for flag '%#x'") % (flag)
268 message = _("missing processor for flag '%#x'") % (flag)
270 raise revlog.RevlogError(message)
269 raise revlog.RevlogError(message)
271
270
272 processor = revlog._flagprocessors[flag]
271 processor = revlog._flagprocessors[flag]
273 if processor is not None:
272 if processor is not None:
274 readtransform, writetransform, rawtransform = processor
273 readtransform, writetransform, rawtransform = processor
275
274
276 if raw:
275 if raw:
277 vhash = rawtransform(self, text)
276 vhash = rawtransform(self, text)
278 elif operation == 'read':
277 elif operation == 'read':
279 text, vhash = readtransform(self, text)
278 text, vhash = readtransform(self, text)
280 else: # write operation
279 else: # write operation
281 text, vhash = writetransform(self, text)
280 text, vhash = writetransform(self, text)
282 validatehash = validatehash and vhash
281 validatehash = validatehash and vhash
283
282
284 return text, validatehash
283 return text, validatehash
285
284
286 def checkhash(self, text, node, p1=None, p2=None, rev=None):
285 def checkhash(self, text, node, p1=None, p2=None, rev=None):
287 if p1 is None and p2 is None:
286 if p1 is None and p2 is None:
288 p1, p2 = self.parents(node)
287 p1, p2 = self.parents(node)
289 if node != revlog.hash(text, p1, p2):
288 if node != revlog.hash(text, p1, p2):
290 raise error.RevlogError(_("integrity check failed on %s") %
289 raise error.RevlogError(_("integrity check failed on %s") %
291 self._path)
290 self._path)
292
291
293 def revision(self, node, raw=False):
292 def revision(self, node, raw=False):
294 validatenode(node)
293 validatenode(node)
295
294
296 if node == nullid:
295 if node == nullid:
297 return b''
296 return b''
298
297
299 rev = self.rev(node)
298 rev = self.rev(node)
300 flags = self.flags(rev)
299 flags = self.flags(rev)
301
300
302 path = b'/'.join([self._storepath, hex(node)])
301 path = b'/'.join([self._storepath, hex(node)])
303 rawtext = self._svfs.read(path)
302 rawtext = self._svfs.read(path)
304
303
305 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
304 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
306 if validatehash:
305 if validatehash:
307 self.checkhash(text, node, rev=rev)
306 self.checkhash(text, node, rev=rev)
308
307
309 return text
308 return text
310
309
311 def read(self, node):
310 def read(self, node):
312 validatenode(node)
311 validatenode(node)
313
312
314 revision = self.revision(node)
313 revision = self.revision(node)
315
314
316 if not revision.startswith(b'\1\n'):
315 if not revision.startswith(b'\1\n'):
317 return revision
316 return revision
318
317
319 start = revision.index(b'\1\n', 2)
318 start = revision.index(b'\1\n', 2)
320 return revision[start + 2:]
319 return revision[start + 2:]
321
320
322 def renamed(self, node):
321 def renamed(self, node):
323 validatenode(node)
322 validatenode(node)
324
323
325 if self.parents(node)[0] != nullid:
324 if self.parents(node)[0] != nullid:
326 return False
325 return False
327
326
328 fulltext = self.revision(node)
327 fulltext = self.revision(node)
329 m = filelog.parsemeta(fulltext)[0]
328 m = revlog.parsemeta(fulltext)[0]
330
329
331 if m and 'copy' in m:
330 if m and 'copy' in m:
332 return m['copy'], bin(m['copyrev'])
331 return m['copy'], bin(m['copyrev'])
333
332
334 return False
333 return False
335
334
336 def cmp(self, node, text):
335 def cmp(self, node, text):
337 validatenode(node)
336 validatenode(node)
338
337
339 t = text
338 t = text
340
339
341 if text.startswith(b'\1\n'):
340 if text.startswith(b'\1\n'):
342 t = b'\1\n\1\n' + text
341 t = b'\1\n\1\n' + text
343
342
344 p1, p2 = self.parents(node)
343 p1, p2 = self.parents(node)
345
344
346 if revlog.hash(t, p1, p2) == node:
345 if revlog.hash(t, p1, p2) == node:
347 return False
346 return False
348
347
349 if self.iscensored(self.rev(node)):
348 if self.iscensored(self.rev(node)):
350 return text != b''
349 return text != b''
351
350
352 if self.renamed(node):
351 if self.renamed(node):
353 t2 = self.read(node)
352 t2 = self.read(node)
354 return t2 != text
353 return t2 != text
355
354
356 return True
355 return True
357
356
358 def size(self, rev):
357 def size(self, rev):
359 validaterev(rev)
358 validaterev(rev)
360
359
361 node = self._indexbyrev[rev][b'node']
360 node = self._indexbyrev[rev][b'node']
362
361
363 if self.renamed(node):
362 if self.renamed(node):
364 return len(self.read(node))
363 return len(self.read(node))
365
364
366 if self.iscensored(rev):
365 if self.iscensored(rev):
367 return 0
366 return 0
368
367
369 return len(self.revision(node))
368 return len(self.revision(node))
370
369
371 def iscensored(self, rev):
370 def iscensored(self, rev):
372 validaterev(rev)
371 validaterev(rev)
373
372
374 return self.flags(rev) & revlog.REVIDX_ISCENSORED
373 return self.flags(rev) & revlog.REVIDX_ISCENSORED
375
374
376 def commonancestorsheads(self, a, b):
375 def commonancestorsheads(self, a, b):
377 validatenode(a)
376 validatenode(a)
378 validatenode(b)
377 validatenode(b)
379
378
380 a = self.rev(a)
379 a = self.rev(a)
381 b = self.rev(b)
380 b = self.rev(b)
382
381
383 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
382 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
384 return pycompat.maplist(self.node, ancestors)
383 return pycompat.maplist(self.node, ancestors)
385
384
386 def descendants(self, revs):
385 def descendants(self, revs):
387 # This is a copy of revlog.descendants()
386 # This is a copy of revlog.descendants()
388 first = min(revs)
387 first = min(revs)
389 if first == nullrev:
388 if first == nullrev:
390 for i in self:
389 for i in self:
391 yield i
390 yield i
392 return
391 return
393
392
394 seen = set(revs)
393 seen = set(revs)
395 for i in self.revs(start=first + 1):
394 for i in self.revs(start=first + 1):
396 for x in self.parentrevs(i):
395 for x in self.parentrevs(i):
397 if x != nullrev and x in seen:
396 if x != nullrev and x in seen:
398 seen.add(i)
397 seen.add(i)
399 yield i
398 yield i
400 break
399 break
401
400
402 # Required by verify.
401 # Required by verify.
403 def files(self):
402 def files(self):
404 entries = self._svfs.listdir(self._storepath)
403 entries = self._svfs.listdir(self._storepath)
405
404
406 # Strip out undo.backup.* files created as part of transaction
405 # Strip out undo.backup.* files created as part of transaction
407 # recording.
406 # recording.
408 entries = [f for f in entries if not f.startswith('undo.backup.')]
407 entries = [f for f in entries if not f.startswith('undo.backup.')]
409
408
410 return [b'/'.join((self._storepath, f)) for f in entries]
409 return [b'/'.join((self._storepath, f)) for f in entries]
411
410
412 # Required by verify.
411 # Required by verify.
413 def checksize(self):
412 def checksize(self):
414 return 0, 0
413 return 0, 0
415
414
416 def add(self, text, meta, transaction, linkrev, p1, p2):
415 def add(self, text, meta, transaction, linkrev, p1, p2):
417 if meta or text.startswith(b'\1\n'):
416 if meta or text.startswith(b'\1\n'):
418 text = filelog.packmeta(meta, text)
417 text = revlog.packmeta(meta, text)
419
418
420 return self.addrevision(text, transaction, linkrev, p1, p2)
419 return self.addrevision(text, transaction, linkrev, p1, p2)
421
420
422 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
421 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
423 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
422 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
424 validatenode(p1)
423 validatenode(p1)
425 validatenode(p2)
424 validatenode(p2)
426
425
427 if flags:
426 if flags:
428 node = node or revlog.hash(text, p1, p2)
427 node = node or revlog.hash(text, p1, p2)
429
428
430 rawtext, validatehash = self._processflags(text, flags, 'write')
429 rawtext, validatehash = self._processflags(text, flags, 'write')
431
430
432 node = node or revlog.hash(text, p1, p2)
431 node = node or revlog.hash(text, p1, p2)
433
432
434 if node in self._indexbynode:
433 if node in self._indexbynode:
435 return node
434 return node
436
435
437 if validatehash:
436 if validatehash:
438 self.checkhash(rawtext, node, p1=p1, p2=p2)
437 self.checkhash(rawtext, node, p1=p1, p2=p2)
439
438
440 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
439 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
441 flags)
440 flags)
442
441
443 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
442 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
444 transaction.addbackup(self._indexpath)
443 transaction.addbackup(self._indexpath)
445
444
446 path = b'/'.join([self._storepath, hex(node)])
445 path = b'/'.join([self._storepath, hex(node)])
447
446
448 self._svfs.write(path, rawtext)
447 self._svfs.write(path, rawtext)
449
448
450 self._indexdata.append({
449 self._indexdata.append({
451 b'node': node,
450 b'node': node,
452 b'p1': p1,
451 b'p1': p1,
453 b'p2': p2,
452 b'p2': p2,
454 b'linkrev': link,
453 b'linkrev': link,
455 b'flags': flags,
454 b'flags': flags,
456 })
455 })
457
456
458 self._reflectindexupdate()
457 self._reflectindexupdate()
459
458
460 return node
459 return node
461
460
462 def _reflectindexupdate(self):
461 def _reflectindexupdate(self):
463 self._refreshindex()
462 self._refreshindex()
464 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
463 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
465
464
466 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
465 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
467 nodes = []
466 nodes = []
468
467
469 transaction.addbackup(self._indexpath)
468 transaction.addbackup(self._indexpath)
470
469
471 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
470 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
472 linkrev = linkmapper(linknode)
471 linkrev = linkmapper(linknode)
473 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
472 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
474
473
475 nodes.append(node)
474 nodes.append(node)
476
475
477 if node in self._indexbynode:
476 if node in self._indexbynode:
478 continue
477 continue
479
478
480 # Need to resolve the fulltext from the delta base.
479 # Need to resolve the fulltext from the delta base.
481 if deltabase == nullid:
480 if deltabase == nullid:
482 text = mdiff.patch(b'', delta)
481 text = mdiff.patch(b'', delta)
483 else:
482 else:
484 text = mdiff.patch(self.revision(deltabase), delta)
483 text = mdiff.patch(self.revision(deltabase), delta)
485
484
486 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
485 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
487 flags)
486 flags)
488
487
489 if addrevisioncb:
488 if addrevisioncb:
490 addrevisioncb(self, node)
489 addrevisioncb(self, node)
491
490
492 return nodes
491 return nodes
493
492
494 def revdiff(self, rev1, rev2):
493 def revdiff(self, rev1, rev2):
495 validaterev(rev1)
494 validaterev(rev1)
496 validaterev(rev2)
495 validaterev(rev2)
497
496
498 node1 = self.node(rev1)
497 node1 = self.node(rev1)
499 node2 = self.node(rev2)
498 node2 = self.node(rev2)
500
499
501 return mdiff.textdiff(self.revision(node1, raw=True),
500 return mdiff.textdiff(self.revision(node1, raw=True),
502 self.revision(node2, raw=True))
501 self.revision(node2, raw=True))
503
502
504 def headrevs(self):
503 def headrevs(self):
505 # Assume all revisions are heads by default.
504 # Assume all revisions are heads by default.
506 revishead = {rev: True for rev in self._indexbyrev}
505 revishead = {rev: True for rev in self._indexbyrev}
507
506
508 for rev, entry in self._indexbyrev.items():
507 for rev, entry in self._indexbyrev.items():
509 # Unset head flag for all seen parents.
508 # Unset head flag for all seen parents.
510 revishead[self.rev(entry[b'p1'])] = False
509 revishead[self.rev(entry[b'p1'])] = False
511 revishead[self.rev(entry[b'p2'])] = False
510 revishead[self.rev(entry[b'p2'])] = False
512
511
513 return [rev for rev, ishead in sorted(revishead.items())
512 return [rev for rev, ishead in sorted(revishead.items())
514 if ishead]
513 if ishead]
515
514
516 def heads(self, start=None, stop=None):
515 def heads(self, start=None, stop=None):
517 # This is copied from revlog.py.
516 # This is copied from revlog.py.
518 if start is None and stop is None:
517 if start is None and stop is None:
519 if not len(self):
518 if not len(self):
520 return [nullid]
519 return [nullid]
521 return [self.node(r) for r in self.headrevs()]
520 return [self.node(r) for r in self.headrevs()]
522
521
523 if start is None:
522 if start is None:
524 start = nullid
523 start = nullid
525 if stop is None:
524 if stop is None:
526 stop = []
525 stop = []
527 stoprevs = set([self.rev(n) for n in stop])
526 stoprevs = set([self.rev(n) for n in stop])
528 startrev = self.rev(start)
527 startrev = self.rev(start)
529 reachable = {startrev}
528 reachable = {startrev}
530 heads = {startrev}
529 heads = {startrev}
531
530
532 parentrevs = self.parentrevs
531 parentrevs = self.parentrevs
533 for r in self.revs(start=startrev + 1):
532 for r in self.revs(start=startrev + 1):
534 for p in parentrevs(r):
533 for p in parentrevs(r):
535 if p in reachable:
534 if p in reachable:
536 if r not in stoprevs:
535 if r not in stoprevs:
537 reachable.add(r)
536 reachable.add(r)
538 heads.add(r)
537 heads.add(r)
539 if p in heads and p not in stoprevs:
538 if p in heads and p not in stoprevs:
540 heads.remove(p)
539 heads.remove(p)
541
540
542 return [self.node(r) for r in heads]
541 return [self.node(r) for r in heads]
543
542
544 def children(self, node):
543 def children(self, node):
545 validatenode(node)
544 validatenode(node)
546
545
547 # This is a copy of revlog.children().
546 # This is a copy of revlog.children().
548 c = []
547 c = []
549 p = self.rev(node)
548 p = self.rev(node)
550 for r in self.revs(start=p + 1):
549 for r in self.revs(start=p + 1):
551 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
550 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
552 if prevs:
551 if prevs:
553 for pr in prevs:
552 for pr in prevs:
554 if pr == p:
553 if pr == p:
555 c.append(self.node(r))
554 c.append(self.node(r))
556 elif p == nullrev:
555 elif p == nullrev:
557 c.append(self.node(r))
556 c.append(self.node(r))
558 return c
557 return c
559
558
560 def getstrippoint(self, minlink):
559 def getstrippoint(self, minlink):
561
560
562 # This is largely a copy of revlog.getstrippoint().
561 # This is largely a copy of revlog.getstrippoint().
563 brokenrevs = set()
562 brokenrevs = set()
564 strippoint = len(self)
563 strippoint = len(self)
565
564
566 heads = {}
565 heads = {}
567 futurelargelinkrevs = set()
566 futurelargelinkrevs = set()
568 for head in self.headrevs():
567 for head in self.headrevs():
569 headlinkrev = self.linkrev(head)
568 headlinkrev = self.linkrev(head)
570 heads[head] = headlinkrev
569 heads[head] = headlinkrev
571 if headlinkrev >= minlink:
570 if headlinkrev >= minlink:
572 futurelargelinkrevs.add(headlinkrev)
571 futurelargelinkrevs.add(headlinkrev)
573
572
574 # This algorithm involves walking down the rev graph, starting at the
573 # This algorithm involves walking down the rev graph, starting at the
575 # heads. Since the revs are topologically sorted according to linkrev,
574 # heads. Since the revs are topologically sorted according to linkrev,
576 # once all head linkrevs are below the minlink, we know there are
575 # once all head linkrevs are below the minlink, we know there are
577 # no more revs that could have a linkrev greater than minlink.
576 # no more revs that could have a linkrev greater than minlink.
578 # So we can stop walking.
577 # So we can stop walking.
579 while futurelargelinkrevs:
578 while futurelargelinkrevs:
580 strippoint -= 1
579 strippoint -= 1
581 linkrev = heads.pop(strippoint)
580 linkrev = heads.pop(strippoint)
582
581
583 if linkrev < minlink:
582 if linkrev < minlink:
584 brokenrevs.add(strippoint)
583 brokenrevs.add(strippoint)
585 else:
584 else:
586 futurelargelinkrevs.remove(linkrev)
585 futurelargelinkrevs.remove(linkrev)
587
586
588 for p in self.parentrevs(strippoint):
587 for p in self.parentrevs(strippoint):
589 if p != nullrev:
588 if p != nullrev:
590 plinkrev = self.linkrev(p)
589 plinkrev = self.linkrev(p)
591 heads[p] = plinkrev
590 heads[p] = plinkrev
592 if plinkrev >= minlink:
591 if plinkrev >= minlink:
593 futurelargelinkrevs.add(plinkrev)
592 futurelargelinkrevs.add(plinkrev)
594
593
595 return strippoint, brokenrevs
594 return strippoint, brokenrevs
596
595
597 def strip(self, minlink, transaction):
596 def strip(self, minlink, transaction):
598 if not len(self):
597 if not len(self):
599 return
598 return
600
599
601 rev, _ignored = self.getstrippoint(minlink)
600 rev, _ignored = self.getstrippoint(minlink)
602 if rev == len(self):
601 if rev == len(self):
603 return
602 return
604
603
605 # Purge index data starting at the requested revision.
604 # Purge index data starting at the requested revision.
606 self._indexdata[rev:] = []
605 self._indexdata[rev:] = []
607 self._reflectindexupdate()
606 self._reflectindexupdate()
608
607
609 def issimplestorefile(f, kind, st):
608 def issimplestorefile(f, kind, st):
610 if kind != stat.S_IFREG:
609 if kind != stat.S_IFREG:
611 return False
610 return False
612
611
613 if store.isrevlog(f, kind, st):
612 if store.isrevlog(f, kind, st):
614 return False
613 return False
615
614
616 # Ignore transaction undo files.
615 # Ignore transaction undo files.
617 if f.startswith('undo.'):
616 if f.startswith('undo.'):
618 return False
617 return False
619
618
620 # Otherwise assume it belongs to the simple store.
619 # Otherwise assume it belongs to the simple store.
621 return True
620 return True
622
621
623 class simplestore(store.encodedstore):
622 class simplestore(store.encodedstore):
624 def datafiles(self):
623 def datafiles(self):
625 for x in super(simplestore, self).datafiles():
624 for x in super(simplestore, self).datafiles():
626 yield x
625 yield x
627
626
628 # Supplement with non-revlog files.
627 # Supplement with non-revlog files.
629 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
628 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
630
629
631 for unencoded, encoded, size in extrafiles:
630 for unencoded, encoded, size in extrafiles:
632 try:
631 try:
633 unencoded = store.decodefilename(unencoded)
632 unencoded = store.decodefilename(unencoded)
634 except KeyError:
633 except KeyError:
635 unencoded = None
634 unencoded = None
636
635
637 yield unencoded, encoded, size
636 yield unencoded, encoded, size
638
637
639 def reposetup(ui, repo):
638 def reposetup(ui, repo):
640 if not repo.local():
639 if not repo.local():
641 return
640 return
642
641
643 if isinstance(repo, bundlerepo.bundlerepository):
642 if isinstance(repo, bundlerepo.bundlerepository):
644 raise error.Abort(_('cannot use simple store with bundlerepo'))
643 raise error.Abort(_('cannot use simple store with bundlerepo'))
645
644
646 class simplestorerepo(repo.__class__):
645 class simplestorerepo(repo.__class__):
647 def file(self, f):
646 def file(self, f):
648 return filestorage(self.svfs, f)
647 return filestorage(self.svfs, f)
649
648
650 repo.__class__ = simplestorerepo
649 repo.__class__ = simplestorerepo
651
650
652 def featuresetup(ui, supported):
651 def featuresetup(ui, supported):
653 supported.add(REQUIREMENT)
652 supported.add(REQUIREMENT)
654
653
655 def newreporequirements(orig, repo):
654 def newreporequirements(orig, repo):
656 """Modifies default requirements for new repos to use the simple store."""
655 """Modifies default requirements for new repos to use the simple store."""
657 requirements = orig(repo)
656 requirements = orig(repo)
658
657
659 # These requirements are only used to affect creation of the store
658 # These requirements are only used to affect creation of the store
660 # object. We have our own store. So we can remove them.
659 # object. We have our own store. So we can remove them.
661 # TODO do this once we feel like taking the test hit.
660 # TODO do this once we feel like taking the test hit.
662 #if 'fncache' in requirements:
661 #if 'fncache' in requirements:
663 # requirements.remove('fncache')
662 # requirements.remove('fncache')
664 #if 'dotencode' in requirements:
663 #if 'dotencode' in requirements:
665 # requirements.remove('dotencode')
664 # requirements.remove('dotencode')
666
665
667 requirements.add(REQUIREMENT)
666 requirements.add(REQUIREMENT)
668
667
669 return requirements
668 return requirements
670
669
671 def makestore(orig, requirements, path, vfstype):
670 def makestore(orig, requirements, path, vfstype):
672 if REQUIREMENT not in requirements:
671 if REQUIREMENT not in requirements:
673 return orig(requirements, path, vfstype)
672 return orig(requirements, path, vfstype)
674
673
675 return simplestore(path, vfstype)
674 return simplestore(path, vfstype)
676
675
677 def verifierinit(orig, self, *args, **kwargs):
676 def verifierinit(orig, self, *args, **kwargs):
678 orig(self, *args, **kwargs)
677 orig(self, *args, **kwargs)
679
678
680 # We don't care that files in the store don't align with what is
679 # We don't care that files in the store don't align with what is
681 # advertised. So suppress these warnings.
680 # advertised. So suppress these warnings.
682 self.warnorphanstorefiles = False
681 self.warnorphanstorefiles = False
683
682
684 def extsetup(ui):
683 def extsetup(ui):
685 localrepo.featuresetupfuncs.add(featuresetup)
684 localrepo.featuresetupfuncs.add(featuresetup)
686
685
687 extensions.wrapfunction(localrepo, 'newreporequirements',
686 extensions.wrapfunction(localrepo, 'newreporequirements',
688 newreporequirements)
687 newreporequirements)
689 extensions.wrapfunction(store, 'store', makestore)
688 extensions.wrapfunction(store, 'store', makestore)
690 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
689 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now