##// END OF EJS Templates
censor: add censor command to hgext with basic client-side tests...
Mike Edgar -
r24347:1bcfecbb default
parent child Browse files
Show More
@@ -0,0 +1,168 b''
1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
2 #
3 # This extension enables removal of file content at a given revision,
4 # rewriting the data/metadata of successive revisions to preserve revision log
5 # integrity.
6
7 """erase file content at a given revision
8
9 The censor command instructs Mercurial to erase all content of a file at a given
10 revision *without updating the changeset hash.* This allows existing history to
11 remain valid while preventing future clones/pulls from receiving the erased
12 data.
13
14 Typical uses for censor are due to security or legal requirements, including::
15
16 * Passwords, private keys, crytographic material
17 * Licensed data/code/libraries for which the license has expired
18 * Personally Identifiable Information or other private data
19
20 Censored file revisions are listed in a tracked file called .hgcensored stored
21 in the repository root. The censor command adds an entry to the .hgcensored file
22 in the working directory and commits it (much like ``hg tag`` and .hgtags). The
23 censored file data is then replaced with a pointer to the new commit, enabling
24 verification.
25
26 Censored nodes can interrupt mercurial's typical operation whenever the excised
27 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
28 simply fail when asked to produce censored data. Others, like ``hg verify`` and
29 ``hg update``, must be capable of tolerating censored data to continue to
30 function in a meaningful way. Such commands only tolerate censored file
31 revisions if they are allowed by the policy specified by the "censor.allow"
32 config option.
33 """
34
35 from mercurial.node import short
36 from mercurial import cmdutil, error, filelog, revlog, scmutil, util
37 from mercurial.i18n import _
38
39 cmdtable = {}
40 command = cmdutil.command(cmdtable)
41 testedwith = 'internal'
42
43 @command('censor',
44 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
45 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
46 _('-r REV [-t TEXT] [FILE]'))
47 def censor(ui, repo, path, rev='', tombstone='', **opts):
48 if not path:
49 raise util.Abort(_('must specify file path to censor'))
50 if not rev:
51 raise util.Abort(_('must specify revision to censor'))
52
53 flog = repo.file(path)
54 if not len(flog):
55 raise util.Abort(_('cannot censor file with no history'))
56
57 rev = scmutil.revsingle(repo, rev, rev).rev()
58 try:
59 ctx = repo[rev]
60 except KeyError:
61 raise util.Abort(_('invalid revision identifier %s') % rev)
62
63 try:
64 fctx = ctx.filectx(path)
65 except error.LookupError:
66 raise util.Abort(_('file does not exist at revision %s') % rev)
67
68 fnode = fctx.filenode()
69 headctxs = [repo[c] for c in repo.heads()]
70 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
71 if heads:
72 headlist = ', '.join([short(c.node()) for c in heads])
73 raise util.Abort(_('cannot censor file in heads (%s)') % headlist,
74 hint=_('clean/delete and commit first'))
75
76 wctx = repo[None]
77 wp = wctx.parents()
78 if ctx.node() in [p.node() for p in wp]:
79 raise util.Abort(_('cannot censor working directory'),
80 hint=_('clean/delete/update first'))
81
82 flogv = flog.version & 0xFFFF
83 if flogv != revlog.REVLOGNG:
84 raise util.Abort(
85 _('censor does not support revlog version %d') % (flogv,))
86
87 tombstone = filelog.packmeta({"censored": tombstone}, "")
88
89 crev = fctx.filerev()
90
91 if len(tombstone) > flog.rawsize(crev):
92 raise util.Abort(_(
93 'censor tombstone must be no longer than censored data'))
94
95 # Using two files instead of one makes it easy to rewrite entry-by-entry
96 idxread = repo.svfs(flog.indexfile, 'r')
97 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
98 if flog.version & revlog.REVLOGNGINLINEDATA:
99 dataread, datawrite = idxread, idxwrite
100 else:
101 dataread = repo.svfs(flog.datafile, 'r')
102 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
103
104 # Copy all revlog data up to the entry to be censored.
105 rio = revlog.revlogio()
106 offset = flog.start(crev)
107
108 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
109 idxwrite.write(chunk)
110 for chunk in util.filechunkiter(dataread, limit=offset):
111 datawrite.write(chunk)
112
113 def rewriteindex(r, newoffs, newdata=None):
114 """Rewrite the index entry with a new data offset and optional new data.
115
116 The newdata argument, if given, is a tuple of three positive integers:
117 (new compressed, new uncompressed, added flag bits).
118 """
119 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
120 flags = revlog.gettype(offlags)
121 if newdata:
122 comp, uncomp, nflags = newdata
123 flags |= nflags
124 offlags = revlog.offset_type(newoffs, flags)
125 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
126 idxwrite.write(rio.packentry(e, None, flog.version, r))
127 idxread.seek(rio.size, 1)
128
129 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
130 """Write the given full text to the filelog with the given data offset.
131
132 Returns:
133 The integer number of data bytes written, for tracking data offsets.
134 """
135 flag, compdata = flog.compress(data)
136 newcomp = len(flag) + len(compdata)
137 rewriteindex(r, offs, (newcomp, len(data), nflags))
138 datawrite.write(flag)
139 datawrite.write(compdata)
140 dataread.seek(flog.length(r), 1)
141 return newcomp
142
143 # Rewrite censored revlog entry with (padded) tombstone data.
144 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
145 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
146
147 # Rewrite all following filelog revisions fixing up offsets and deltas.
148 for srev in xrange(crev + 1, len(flog)):
149 if crev in flog.parentrevs(srev):
150 # Immediate children of censored node must be re-added as fulltext.
151 try:
152 revdata = flog.revision(srev)
153 except error.CensoredNodeError, e:
154 revdata = e.tombstone
155 dlen = rewrite(srev, offset, revdata)
156 else:
157 # Copy any other revision data verbatim after fixing up the offset.
158 rewriteindex(srev, offset)
159 dlen = flog.length(srev)
160 for chunk in util.filechunkiter(dataread, limit=dlen):
161 datawrite.write(chunk)
162 offset += dlen
163
164 idxread.close()
165 idxwrite.close()
166 if dataread is not idxread:
167 dataread.close()
168 datawrite.close()
@@ -0,0 +1,315 b''
1 $ cat >> $HGRCPATH <<EOF
2 > [extensions]
3 > censor=
4 > EOF
5 $ cp $HGRCPATH $HGRCPATH.orig
6
7 Create repo with unimpeachable content
8
9 $ hg init r
10 $ cd r
11 $ echo 'Initially untainted file' > target
12 $ echo 'Normal file here' > bystander
13 $ hg add target bystander
14 $ hg ci -m init
15
16 Clone repo so we can test pull later
17
18 $ cd ..
19 $ hg clone r rpull
20 updating to branch default
21 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
22 $ cd r
23
24 Introduce content which will ultimately require censorship. Name the first
25 censored node C1, second C2, and so on
26
27 $ echo 'Tainted file' > target
28 $ echo 'Passwords: hunter2' >> target
29 $ hg ci -m taint target
30 $ C1=`hg id --debug -i`
31
32 $ echo 'hunter3' >> target
33 $ echo 'Normal file v2' > bystander
34 $ hg ci -m moretaint target bystander
35 $ C2=`hg id --debug -i`
36
37 Add a new sanitized versions to correct our mistake. Name the first head H1,
38 the second head H2, and so on
39
40 $ echo 'Tainted file is now sanitized' > target
41 $ hg ci -m sanitized target
42 $ H1=`hg id --debug -i`
43
44 $ hg update -r $C2
45 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
46 $ echo 'Tainted file now super sanitized' > target
47 $ hg ci -m 'super sanitized' target
48 created new head
49 $ H2=`hg id --debug -i`
50
51 Verify target contents before censorship at each revision
52
53 $ hg cat -r 3 target
54 Tainted file is now sanitized
55 $ hg cat -r $H2 target
56 Tainted file now super sanitized
57 $ hg cat -r $C2 target
58 Tainted file
59 Passwords: hunter2
60 hunter3
61 $ hg cat -r $C1 target
62 Tainted file
63 Passwords: hunter2
64 $ hg cat -r 0 target
65 Initially untainted file
66
67 Try to censor revision with too large of a tombstone message
68
69 $ hg censor -r $C1 -t 'blah blah blah blah blah blah blah blah bla' target
70 abort: censor tombstone must be no longer than censored data
71 [255]
72
73 Censor revision with 2 offenses
74
75 $ hg censor -r $C2 -t "remove password" target
76 $ hg cat -r 3 target
77 Tainted file is now sanitized
78 $ hg cat -r $H2 target
79 Tainted file now super sanitized
80 $ hg cat -r $C2 target
81 abort: censored node: 1e0247a9a4b7
82 (set censor.policy to ignore errors)
83 [255]
84 $ hg cat -r $C1 target
85 Tainted file
86 Passwords: hunter2
87 $ hg cat -r 0 target
88 Initially untainted file
89
90 Censor revision with 1 offense
91
92 $ hg censor -r $C1 target
93 $ hg cat -r 3 target
94 Tainted file is now sanitized
95 $ hg cat -r $H2 target
96 Tainted file now super sanitized
97 $ hg cat -r $C2 target
98 abort: censored node: 1e0247a9a4b7
99 (set censor.policy to ignore errors)
100 [255]
101 $ hg cat -r $C1 target
102 abort: censored node: 613bc869fceb
103 (set censor.policy to ignore errors)
104 [255]
105 $ hg cat -r 0 target
106 Initially untainted file
107
108 Can only checkout target at uncensored revisions, -X is workaround for --all
109
110 $ hg revert -r $C2 target
111 abort: censored node: 1e0247a9a4b7
112 (set censor.policy to ignore errors)
113 [255]
114 $ hg revert -r $C1 target
115 abort: censored node: 613bc869fceb
116 (set censor.policy to ignore errors)
117 [255]
118 $ hg revert -r $C1 --all
119 reverting bystander
120 reverting target
121 abort: censored node: 613bc869fceb
122 (set censor.policy to ignore errors)
123 [255]
124 $ hg revert -r $C1 --all -X target
125 $ cat target
126 Tainted file now super sanitized
127 $ hg revert -r 0 --all
128 reverting target
129 $ cat target
130 Initially untainted file
131 $ hg revert -r $H2 --all
132 reverting bystander
133 reverting target
134 $ cat target
135 Tainted file now super sanitized
136
137 Uncensored file can be viewed at any revision
138
139 $ hg cat -r 3 bystander
140 Normal file v2
141 $ hg cat -r $C2 bystander
142 Normal file v2
143 $ hg cat -r $C1 bystander
144 Normal file here
145 $ hg cat -r 0 bystander
146 Normal file here
147
148 Can update to children of censored revision
149
150 $ hg update -r 3
151 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
152 $ cat target
153 Tainted file is now sanitized
154 $ hg update -r $H2
155 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
156 $ cat target
157 Tainted file now super sanitized
158
159 Set censor policy to abort in trusted $HGRC so hg verify fails
160
161 $ cp $HGRCPATH.orig $HGRCPATH
162 $ cat >> $HGRCPATH <<EOF
163 > [censor]
164 > policy = abort
165 > EOF
166
167 Repo fails verification due to censorship
168
169 $ hg verify
170 checking changesets
171 checking manifests
172 crosschecking files in changesets and manifests
173 checking files
174 target@1: censored file data
175 target@2: censored file data
176 2 files, 5 changesets, 7 total revisions
177 2 integrity errors encountered!
178 (first damaged changeset appears to be 1)
179 [1]
180
181 Cannot update to revision with censored data
182
183 $ hg update -r $C2
184 abort: censored node: 1e0247a9a4b7
185 (set censor.policy to ignore errors)
186 [255]
187 $ hg update -r $C1
188 abort: censored node: 613bc869fceb
189 (set censor.policy to ignore errors)
190 [255]
191 $ hg update -r 0
192 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
193 $ hg update -r $H2
194 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
195
196 Set censor policy to ignore in trusted $HGRC so hg verify passes
197
198 $ cp $HGRCPATH.orig $HGRCPATH
199 $ cat >> $HGRCPATH <<EOF
200 > [censor]
201 > policy = ignore
202 > EOF
203
204 Repo passes verification with warnings with explicit config
205
206 $ hg verify
207 checking changesets
208 checking manifests
209 crosschecking files in changesets and manifests
210 checking files
211 2 files, 5 changesets, 7 total revisions
212
213 May update to revision with censored data with explicit config
214
215 $ hg update -r $C2
216 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
217 $ cat target
218 $ hg update -r $C1
219 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
220 $ cat target
221 $ hg update -r 0
222 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
223 $ cat target
224 Initially untainted file
225 $ hg update -r $H2
226 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
227 $ cat target
228 Tainted file now super sanitized
229
230 Can merge in revision with censored data. Test requires one branch of history
231 with the file censored, but we can't censor at a head, so advance H1.
232
233 $ hg update -r $H1
234 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
235 $ C3=$H1
236 $ echo 'advanced head H1' > target
237 $ hg ci -m 'advance head H1' target
238 $ H1=`hg id --debug -i`
239 $ hg censor -r $C3 target
240 $ hg update -r $H2
241 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
242 $ hg merge -r $C3
243 merging target
244 0 files updated, 1 files merged, 0 files removed, 0 files unresolved
245 (branch merge, don't forget to commit)
246
247 Revisions present in repository heads may not be censored
248
249 $ hg update -C -r $H2
250 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
251 $ hg censor -r $H2 target
252 abort: cannot censor file in heads (78a8fc215e79)
253 (clean/delete and commit first)
254 [255]
255 $ echo 'twiddling thumbs' > bystander
256 $ hg ci -m 'bystander commit'
257 $ H2=`hg id --debug -i`
258 $ hg censor -r "$H2^" target
259 abort: cannot censor file in heads (efbe78065929)
260 (clean/delete and commit first)
261 [255]
262
263 Cannot censor working directory
264
265 $ echo 'seriously no passwords' > target
266 $ hg ci -m 'extend second head arbitrarily' target
267 $ H2=`hg id --debug -i`
268 $ hg update -r "$H2^"
269 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
270 $ hg censor -r . target
271 abort: cannot censor working directory
272 (clean/delete/update first)
273 [255]
274 $ hg update -r $H2
275 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
276
277 Can re-add file after being deleted + censored
278
279 $ C4=$H2
280 $ hg rm target
281 $ hg ci -m 'delete target so it may be censored'
282 $ H2=`hg id --debug -i`
283 $ hg censor -r $C4 target
284 $ hg cat -r $C4 target
285 $ hg cat -r "$H2^^" target
286 Tainted file now super sanitized
287 $ echo 'fresh start' > target
288 $ hg add target
289 $ hg ci -m reincarnated target
290 $ H2=`hg id --debug -i`
291 $ hg cat -r $H2 target
292 fresh start
293 $ hg cat -r "$H2^" target
294 target: no such file in rev 452ec1762369
295 [1]
296 $ hg cat -r $C4 target
297 $ hg cat -r "$H2^^^" target
298 Tainted file now super sanitized
299
300 Can censor after revlog has expanded to no longer permit inline storage
301
302 $ for x in `seq 0 50000`
303 > do
304 > echo "Password: hunter$x" >> target
305 > done
306 $ hg ci -m 'add 100k passwords'
307 $ H2=`hg id --debug -i`
308 $ C5=$H2
309 $ hg revert -r "$H2^" target
310 $ hg ci -m 'cleaned 100k passwords'
311 $ H2=`hg id --debug -i`
312 $ hg censor -r $C5 target
313 $ hg cat -r $C5 target
314 $ hg cat -r $H2 target
315 fresh start
@@ -245,6 +245,7 b' Test extension help:'
245 acl hooks for controlling repository access
245 acl hooks for controlling repository access
246 blackbox log repository events to a blackbox for debugging
246 blackbox log repository events to a blackbox for debugging
247 bugzilla hooks for integrating with the Bugzilla bug tracker
247 bugzilla hooks for integrating with the Bugzilla bug tracker
248 censor erase file content at a given revision
248 churn command to display statistics about repository history
249 churn command to display statistics about repository history
249 color colorize output from some commands
250 color colorize output from some commands
250 convert import revisions from foreign VCS repositories into
251 convert import revisions from foreign VCS repositories into
General Comments 0
You need to be logged in to leave comments. Login now