##// END OF EJS Templates
censor: use context manager for lock management
Matt Harbison -
r38460:e219e355 default
parent child Browse files
Show More
@@ -1,189 +1,183 b''
1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
2 #
2 #
3 # This extension enables removal of file content at a given revision,
3 # This extension enables removal of file content at a given revision,
4 # rewriting the data/metadata of successive revisions to preserve revision log
4 # rewriting the data/metadata of successive revisions to preserve revision log
5 # integrity.
5 # integrity.
6
6
7 """erase file content at a given revision
7 """erase file content at a given revision
8
8
9 The censor command instructs Mercurial to erase all content of a file at a given
9 The censor command instructs Mercurial to erase all content of a file at a given
10 revision *without updating the changeset hash.* This allows existing history to
10 revision *without updating the changeset hash.* This allows existing history to
11 remain valid while preventing future clones/pulls from receiving the erased
11 remain valid while preventing future clones/pulls from receiving the erased
12 data.
12 data.
13
13
14 Typical uses for censor are due to security or legal requirements, including::
14 Typical uses for censor are due to security or legal requirements, including::
15
15
16 * Passwords, private keys, cryptographic material
16 * Passwords, private keys, cryptographic material
17 * Licensed data/code/libraries for which the license has expired
17 * Licensed data/code/libraries for which the license has expired
18 * Personally Identifiable Information or other private data
18 * Personally Identifiable Information or other private data
19
19
20 Censored nodes can interrupt mercurial's typical operation whenever the excised
20 Censored nodes can interrupt mercurial's typical operation whenever the excised
21 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
21 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
22 simply fail when asked to produce censored data. Others, like ``hg verify`` and
22 simply fail when asked to produce censored data. Others, like ``hg verify`` and
23 ``hg update``, must be capable of tolerating censored data to continue to
23 ``hg update``, must be capable of tolerating censored data to continue to
24 function in a meaningful way. Such commands only tolerate censored file
24 function in a meaningful way. Such commands only tolerate censored file
25 revisions if they are allowed by the "censor.policy=ignore" config option.
25 revisions if they are allowed by the "censor.policy=ignore" config option.
26 """
26 """
27
27
28 from __future__ import absolute_import
28 from __future__ import absolute_import
29
29
30 from mercurial.i18n import _
30 from mercurial.i18n import _
31 from mercurial.node import short
31 from mercurial.node import short
32
32
33 from mercurial import (
33 from mercurial import (
34 error,
34 error,
35 lock as lockmod,
36 registrar,
35 registrar,
37 revlog,
36 revlog,
38 scmutil,
37 scmutil,
39 util,
38 util,
40 )
39 )
41
40
42 cmdtable = {}
41 cmdtable = {}
43 command = registrar.command(cmdtable)
42 command = registrar.command(cmdtable)
44 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
43 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
45 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
44 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
46 # be specifying the version(s) of Mercurial they are tested with, or
45 # be specifying the version(s) of Mercurial they are tested with, or
47 # leave the attribute unspecified.
46 # leave the attribute unspecified.
48 testedwith = 'ships-with-hg-core'
47 testedwith = 'ships-with-hg-core'
49
48
50 @command('censor',
49 @command('censor',
51 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
50 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
52 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
51 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
53 _('-r REV [-t TEXT] [FILE]'))
52 _('-r REV [-t TEXT] [FILE]'))
54 def censor(ui, repo, path, rev='', tombstone='', **opts):
53 def censor(ui, repo, path, rev='', tombstone='', **opts):
55 wlock = lock = None
54 with repo.wlock(), repo.lock():
56 try:
57 wlock = repo.wlock()
58 lock = repo.lock()
59 return _docensor(ui, repo, path, rev, tombstone, **opts)
55 return _docensor(ui, repo, path, rev, tombstone, **opts)
60 finally:
61 lockmod.release(lock, wlock)
62
56
63 def _docensor(ui, repo, path, rev='', tombstone='', **opts):
57 def _docensor(ui, repo, path, rev='', tombstone='', **opts):
64 if not path:
58 if not path:
65 raise error.Abort(_('must specify file path to censor'))
59 raise error.Abort(_('must specify file path to censor'))
66 if not rev:
60 if not rev:
67 raise error.Abort(_('must specify revision to censor'))
61 raise error.Abort(_('must specify revision to censor'))
68
62
69 wctx = repo[None]
63 wctx = repo[None]
70
64
71 m = scmutil.match(wctx, (path,))
65 m = scmutil.match(wctx, (path,))
72 if m.anypats() or len(m.files()) != 1:
66 if m.anypats() or len(m.files()) != 1:
73 raise error.Abort(_('can only specify an explicit filename'))
67 raise error.Abort(_('can only specify an explicit filename'))
74 path = m.files()[0]
68 path = m.files()[0]
75 flog = repo.file(path)
69 flog = repo.file(path)
76 if not len(flog):
70 if not len(flog):
77 raise error.Abort(_('cannot censor file with no history'))
71 raise error.Abort(_('cannot censor file with no history'))
78
72
79 rev = scmutil.revsingle(repo, rev, rev).rev()
73 rev = scmutil.revsingle(repo, rev, rev).rev()
80 try:
74 try:
81 ctx = repo[rev]
75 ctx = repo[rev]
82 except KeyError:
76 except KeyError:
83 raise error.Abort(_('invalid revision identifier %s') % rev)
77 raise error.Abort(_('invalid revision identifier %s') % rev)
84
78
85 try:
79 try:
86 fctx = ctx.filectx(path)
80 fctx = ctx.filectx(path)
87 except error.LookupError:
81 except error.LookupError:
88 raise error.Abort(_('file does not exist at revision %s') % rev)
82 raise error.Abort(_('file does not exist at revision %s') % rev)
89
83
90 fnode = fctx.filenode()
84 fnode = fctx.filenode()
91 headctxs = [repo[c] for c in repo.heads()]
85 headctxs = [repo[c] for c in repo.heads()]
92 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
86 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
93 if heads:
87 if heads:
94 headlist = ', '.join([short(c.node()) for c in heads])
88 headlist = ', '.join([short(c.node()) for c in heads])
95 raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
89 raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
96 hint=_('clean/delete and commit first'))
90 hint=_('clean/delete and commit first'))
97
91
98 wp = wctx.parents()
92 wp = wctx.parents()
99 if ctx.node() in [p.node() for p in wp]:
93 if ctx.node() in [p.node() for p in wp]:
100 raise error.Abort(_('cannot censor working directory'),
94 raise error.Abort(_('cannot censor working directory'),
101 hint=_('clean/delete/update first'))
95 hint=_('clean/delete/update first'))
102
96
103 flogv = flog.version & 0xFFFF
97 flogv = flog.version & 0xFFFF
104 if flogv != revlog.REVLOGV1:
98 if flogv != revlog.REVLOGV1:
105 raise error.Abort(
99 raise error.Abort(
106 _('censor does not support revlog version %d') % (flogv,))
100 _('censor does not support revlog version %d') % (flogv,))
107
101
108 tombstone = revlog.packmeta({"censored": tombstone}, "")
102 tombstone = revlog.packmeta({"censored": tombstone}, "")
109
103
110 crev = fctx.filerev()
104 crev = fctx.filerev()
111
105
112 if len(tombstone) > flog.rawsize(crev):
106 if len(tombstone) > flog.rawsize(crev):
113 raise error.Abort(_(
107 raise error.Abort(_(
114 'censor tombstone must be no longer than censored data'))
108 'censor tombstone must be no longer than censored data'))
115
109
116 # Using two files instead of one makes it easy to rewrite entry-by-entry
110 # Using two files instead of one makes it easy to rewrite entry-by-entry
117 idxread = repo.svfs(flog.indexfile, 'r')
111 idxread = repo.svfs(flog.indexfile, 'r')
118 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
112 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
119 if flog.version & revlog.FLAG_INLINE_DATA:
113 if flog.version & revlog.FLAG_INLINE_DATA:
120 dataread, datawrite = idxread, idxwrite
114 dataread, datawrite = idxread, idxwrite
121 else:
115 else:
122 dataread = repo.svfs(flog.datafile, 'r')
116 dataread = repo.svfs(flog.datafile, 'r')
123 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
117 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
124
118
125 # Copy all revlog data up to the entry to be censored.
119 # Copy all revlog data up to the entry to be censored.
126 rio = revlog.revlogio()
120 rio = revlog.revlogio()
127 offset = flog.start(crev)
121 offset = flog.start(crev)
128
122
129 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
123 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
130 idxwrite.write(chunk)
124 idxwrite.write(chunk)
131 for chunk in util.filechunkiter(dataread, limit=offset):
125 for chunk in util.filechunkiter(dataread, limit=offset):
132 datawrite.write(chunk)
126 datawrite.write(chunk)
133
127
134 def rewriteindex(r, newoffs, newdata=None):
128 def rewriteindex(r, newoffs, newdata=None):
135 """Rewrite the index entry with a new data offset and optional new data.
129 """Rewrite the index entry with a new data offset and optional new data.
136
130
137 The newdata argument, if given, is a tuple of three positive integers:
131 The newdata argument, if given, is a tuple of three positive integers:
138 (new compressed, new uncompressed, added flag bits).
132 (new compressed, new uncompressed, added flag bits).
139 """
133 """
140 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
134 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
141 flags = revlog.gettype(offlags)
135 flags = revlog.gettype(offlags)
142 if newdata:
136 if newdata:
143 comp, uncomp, nflags = newdata
137 comp, uncomp, nflags = newdata
144 flags |= nflags
138 flags |= nflags
145 offlags = revlog.offset_type(newoffs, flags)
139 offlags = revlog.offset_type(newoffs, flags)
146 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
140 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
147 idxwrite.write(rio.packentry(e, None, flog.version, r))
141 idxwrite.write(rio.packentry(e, None, flog.version, r))
148 idxread.seek(rio.size, 1)
142 idxread.seek(rio.size, 1)
149
143
150 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
144 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
151 """Write the given full text to the filelog with the given data offset.
145 """Write the given full text to the filelog with the given data offset.
152
146
153 Returns:
147 Returns:
154 The integer number of data bytes written, for tracking data offsets.
148 The integer number of data bytes written, for tracking data offsets.
155 """
149 """
156 flag, compdata = flog.compress(data)
150 flag, compdata = flog.compress(data)
157 newcomp = len(flag) + len(compdata)
151 newcomp = len(flag) + len(compdata)
158 rewriteindex(r, offs, (newcomp, len(data), nflags))
152 rewriteindex(r, offs, (newcomp, len(data), nflags))
159 datawrite.write(flag)
153 datawrite.write(flag)
160 datawrite.write(compdata)
154 datawrite.write(compdata)
161 dataread.seek(flog.length(r), 1)
155 dataread.seek(flog.length(r), 1)
162 return newcomp
156 return newcomp
163
157
164 # Rewrite censored revlog entry with (padded) tombstone data.
158 # Rewrite censored revlog entry with (padded) tombstone data.
165 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
159 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
166 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
160 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
167
161
168 # Rewrite all following filelog revisions fixing up offsets and deltas.
162 # Rewrite all following filelog revisions fixing up offsets and deltas.
169 for srev in xrange(crev + 1, len(flog)):
163 for srev in xrange(crev + 1, len(flog)):
170 if crev in flog.parentrevs(srev):
164 if crev in flog.parentrevs(srev):
171 # Immediate children of censored node must be re-added as fulltext.
165 # Immediate children of censored node must be re-added as fulltext.
172 try:
166 try:
173 revdata = flog.revision(srev)
167 revdata = flog.revision(srev)
174 except error.CensoredNodeError as e:
168 except error.CensoredNodeError as e:
175 revdata = e.tombstone
169 revdata = e.tombstone
176 dlen = rewrite(srev, offset, revdata)
170 dlen = rewrite(srev, offset, revdata)
177 else:
171 else:
178 # Copy any other revision data verbatim after fixing up the offset.
172 # Copy any other revision data verbatim after fixing up the offset.
179 rewriteindex(srev, offset)
173 rewriteindex(srev, offset)
180 dlen = flog.length(srev)
174 dlen = flog.length(srev)
181 for chunk in util.filechunkiter(dataread, limit=dlen):
175 for chunk in util.filechunkiter(dataread, limit=dlen):
182 datawrite.write(chunk)
176 datawrite.write(chunk)
183 offset += dlen
177 offset += dlen
184
178
185 idxread.close()
179 idxread.close()
186 idxwrite.close()
180 idxwrite.close()
187 if dataread is not idxread:
181 if dataread is not idxread:
188 dataread.close()
182 dataread.close()
189 datawrite.close()
183 datawrite.close()
General Comments 0
You need to be logged in to leave comments. Login now