Show More
@@ -0,0 +1,168 b'' | |||
|
1 | # Copyright (C) 2015 - Mike Edgar <adgar@google.com> | |
|
2 | # | |
|
3 | # This extension enables removal of file content at a given revision, | |
|
4 | # rewriting the data/metadata of successive revisions to preserve revision log | |
|
5 | # integrity. | |
|
6 | ||
|
7 | """erase file content at a given revision | |
|
8 | ||
|
9 | The censor command instructs Mercurial to erase all content of a file at a given | |
|
10 | revision *without updating the changeset hash.* This allows existing history to | |
|
11 | remain valid while preventing future clones/pulls from receiving the erased | |
|
12 | data. | |
|
13 | ||
|
14 | Typical uses for censor are due to security or legal requirements, including:: | |
|
15 | ||
|
16 | * Passwords, private keys, crytographic material | |
|
17 | * Licensed data/code/libraries for which the license has expired | |
|
18 | * Personally Identifiable Information or other private data | |
|
19 | ||
|
20 | Censored file revisions are listed in a tracked file called .hgcensored stored | |
|
21 | in the repository root. The censor command adds an entry to the .hgcensored file | |
|
22 | in the working directory and commits it (much like ``hg tag`` and .hgtags). The | |
|
23 | censored file data is then replaced with a pointer to the new commit, enabling | |
|
24 | verification. | |
|
25 | ||
|
26 | Censored nodes can interrupt mercurial's typical operation whenever the excised | |
|
27 | data needs to be materialized. Some commands, like ``hg cat``/``hg revert``, | |
|
28 | simply fail when asked to produce censored data. Others, like ``hg verify`` and | |
|
29 | ``hg update``, must be capable of tolerating censored data to continue to | |
|
30 | function in a meaningful way. Such commands only tolerate censored file | |
|
31 | revisions if they are allowed by the policy specified by the "censor.allow" | |
|
32 | config option. | |
|
33 | """ | |
|
34 | ||
|
35 | from mercurial.node import short | |
|
36 | from mercurial import cmdutil, error, filelog, revlog, scmutil, util | |
|
37 | from mercurial.i18n import _ | |
|
38 | ||
|
39 | cmdtable = {} | |
|
40 | command = cmdutil.command(cmdtable) | |
|
41 | testedwith = 'internal' | |
|
42 | ||
|
43 | @command('censor', | |
|
44 | [('r', 'rev', '', _('censor file from specified revision'), _('REV')), | |
|
45 | ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))], | |
|
46 | _('-r REV [-t TEXT] [FILE]')) | |
|
47 | def censor(ui, repo, path, rev='', tombstone='', **opts): | |
|
48 | if not path: | |
|
49 | raise util.Abort(_('must specify file path to censor')) | |
|
50 | if not rev: | |
|
51 | raise util.Abort(_('must specify revision to censor')) | |
|
52 | ||
|
53 | flog = repo.file(path) | |
|
54 | if not len(flog): | |
|
55 | raise util.Abort(_('cannot censor file with no history')) | |
|
56 | ||
|
57 | rev = scmutil.revsingle(repo, rev, rev).rev() | |
|
58 | try: | |
|
59 | ctx = repo[rev] | |
|
60 | except KeyError: | |
|
61 | raise util.Abort(_('invalid revision identifier %s') % rev) | |
|
62 | ||
|
63 | try: | |
|
64 | fctx = ctx.filectx(path) | |
|
65 | except error.LookupError: | |
|
66 | raise util.Abort(_('file does not exist at revision %s') % rev) | |
|
67 | ||
|
68 | fnode = fctx.filenode() | |
|
69 | headctxs = [repo[c] for c in repo.heads()] | |
|
70 | heads = [c for c in headctxs if path in c and c.filenode(path) == fnode] | |
|
71 | if heads: | |
|
72 | headlist = ', '.join([short(c.node()) for c in heads]) | |
|
73 | raise util.Abort(_('cannot censor file in heads (%s)') % headlist, | |
|
74 | hint=_('clean/delete and commit first')) | |
|
75 | ||
|
76 | wctx = repo[None] | |
|
77 | wp = wctx.parents() | |
|
78 | if ctx.node() in [p.node() for p in wp]: | |
|
79 | raise util.Abort(_('cannot censor working directory'), | |
|
80 | hint=_('clean/delete/update first')) | |
|
81 | ||
|
82 | flogv = flog.version & 0xFFFF | |
|
83 | if flogv != revlog.REVLOGNG: | |
|
84 | raise util.Abort( | |
|
85 | _('censor does not support revlog version %d') % (flogv,)) | |
|
86 | ||
|
87 | tombstone = filelog.packmeta({"censored": tombstone}, "") | |
|
88 | ||
|
89 | crev = fctx.filerev() | |
|
90 | ||
|
91 | if len(tombstone) > flog.rawsize(crev): | |
|
92 | raise util.Abort(_( | |
|
93 | 'censor tombstone must be no longer than censored data')) | |
|
94 | ||
|
95 | # Using two files instead of one makes it easy to rewrite entry-by-entry | |
|
96 | idxread = repo.svfs(flog.indexfile, 'r') | |
|
97 | idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True) | |
|
98 | if flog.version & revlog.REVLOGNGINLINEDATA: | |
|
99 | dataread, datawrite = idxread, idxwrite | |
|
100 | else: | |
|
101 | dataread = repo.svfs(flog.datafile, 'r') | |
|
102 | datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True) | |
|
103 | ||
|
104 | # Copy all revlog data up to the entry to be censored. | |
|
105 | rio = revlog.revlogio() | |
|
106 | offset = flog.start(crev) | |
|
107 | ||
|
108 | for chunk in util.filechunkiter(idxread, limit=crev * rio.size): | |
|
109 | idxwrite.write(chunk) | |
|
110 | for chunk in util.filechunkiter(dataread, limit=offset): | |
|
111 | datawrite.write(chunk) | |
|
112 | ||
|
113 | def rewriteindex(r, newoffs, newdata=None): | |
|
114 | """Rewrite the index entry with a new data offset and optional new data. | |
|
115 | ||
|
116 | The newdata argument, if given, is a tuple of three positive integers: | |
|
117 | (new compressed, new uncompressed, added flag bits). | |
|
118 | """ | |
|
119 | offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r] | |
|
120 | flags = revlog.gettype(offlags) | |
|
121 | if newdata: | |
|
122 | comp, uncomp, nflags = newdata | |
|
123 | flags |= nflags | |
|
124 | offlags = revlog.offset_type(newoffs, flags) | |
|
125 | e = (offlags, comp, uncomp, r, link, p1, p2, nodeid) | |
|
126 | idxwrite.write(rio.packentry(e, None, flog.version, r)) | |
|
127 | idxread.seek(rio.size, 1) | |
|
128 | ||
|
129 | def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS): | |
|
130 | """Write the given full text to the filelog with the given data offset. | |
|
131 | ||
|
132 | Returns: | |
|
133 | The integer number of data bytes written, for tracking data offsets. | |
|
134 | """ | |
|
135 | flag, compdata = flog.compress(data) | |
|
136 | newcomp = len(flag) + len(compdata) | |
|
137 | rewriteindex(r, offs, (newcomp, len(data), nflags)) | |
|
138 | datawrite.write(flag) | |
|
139 | datawrite.write(compdata) | |
|
140 | dataread.seek(flog.length(r), 1) | |
|
141 | return newcomp | |
|
142 | ||
|
143 | # Rewrite censored revlog entry with (padded) tombstone data. | |
|
144 | pad = ' ' * (flog.rawsize(crev) - len(tombstone)) | |
|
145 | offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED) | |
|
146 | ||
|
147 | # Rewrite all following filelog revisions fixing up offsets and deltas. | |
|
148 | for srev in xrange(crev + 1, len(flog)): | |
|
149 | if crev in flog.parentrevs(srev): | |
|
150 | # Immediate children of censored node must be re-added as fulltext. | |
|
151 | try: | |
|
152 | revdata = flog.revision(srev) | |
|
153 | except error.CensoredNodeError, e: | |
|
154 | revdata = e.tombstone | |
|
155 | dlen = rewrite(srev, offset, revdata) | |
|
156 | else: | |
|
157 | # Copy any other revision data verbatim after fixing up the offset. | |
|
158 | rewriteindex(srev, offset) | |
|
159 | dlen = flog.length(srev) | |
|
160 | for chunk in util.filechunkiter(dataread, limit=dlen): | |
|
161 | datawrite.write(chunk) | |
|
162 | offset += dlen | |
|
163 | ||
|
164 | idxread.close() | |
|
165 | idxwrite.close() | |
|
166 | if dataread is not idxread: | |
|
167 | dataread.close() | |
|
168 | datawrite.close() |
@@ -0,0 +1,315 b'' | |||
|
1 | $ cat >> $HGRCPATH <<EOF | |
|
2 | > [extensions] | |
|
3 | > censor= | |
|
4 | > EOF | |
|
5 | $ cp $HGRCPATH $HGRCPATH.orig | |
|
6 | ||
|
7 | Create repo with unimpeachable content | |
|
8 | ||
|
9 | $ hg init r | |
|
10 | $ cd r | |
|
11 | $ echo 'Initially untainted file' > target | |
|
12 | $ echo 'Normal file here' > bystander | |
|
13 | $ hg add target bystander | |
|
14 | $ hg ci -m init | |
|
15 | ||
|
16 | Clone repo so we can test pull later | |
|
17 | ||
|
18 | $ cd .. | |
|
19 | $ hg clone r rpull | |
|
20 | updating to branch default | |
|
21 | 2 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
22 | $ cd r | |
|
23 | ||
|
24 | Introduce content which will ultimately require censorship. Name the first | |
|
25 | censored node C1, second C2, and so on | |
|
26 | ||
|
27 | $ echo 'Tainted file' > target | |
|
28 | $ echo 'Passwords: hunter2' >> target | |
|
29 | $ hg ci -m taint target | |
|
30 | $ C1=`hg id --debug -i` | |
|
31 | ||
|
32 | $ echo 'hunter3' >> target | |
|
33 | $ echo 'Normal file v2' > bystander | |
|
34 | $ hg ci -m moretaint target bystander | |
|
35 | $ C2=`hg id --debug -i` | |
|
36 | ||
|
37 | Add a new sanitized versions to correct our mistake. Name the first head H1, | |
|
38 | the second head H2, and so on | |
|
39 | ||
|
40 | $ echo 'Tainted file is now sanitized' > target | |
|
41 | $ hg ci -m sanitized target | |
|
42 | $ H1=`hg id --debug -i` | |
|
43 | ||
|
44 | $ hg update -r $C2 | |
|
45 | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
46 | $ echo 'Tainted file now super sanitized' > target | |
|
47 | $ hg ci -m 'super sanitized' target | |
|
48 | created new head | |
|
49 | $ H2=`hg id --debug -i` | |
|
50 | ||
|
51 | Verify target contents before censorship at each revision | |
|
52 | ||
|
53 | $ hg cat -r 3 target | |
|
54 | Tainted file is now sanitized | |
|
55 | $ hg cat -r $H2 target | |
|
56 | Tainted file now super sanitized | |
|
57 | $ hg cat -r $C2 target | |
|
58 | Tainted file | |
|
59 | Passwords: hunter2 | |
|
60 | hunter3 | |
|
61 | $ hg cat -r $C1 target | |
|
62 | Tainted file | |
|
63 | Passwords: hunter2 | |
|
64 | $ hg cat -r 0 target | |
|
65 | Initially untainted file | |
|
66 | ||
|
67 | Try to censor revision with too large of a tombstone message | |
|
68 | ||
|
69 | $ hg censor -r $C1 -t 'blah blah blah blah blah blah blah blah bla' target | |
|
70 | abort: censor tombstone must be no longer than censored data | |
|
71 | [255] | |
|
72 | ||
|
73 | Censor revision with 2 offenses | |
|
74 | ||
|
75 | $ hg censor -r $C2 -t "remove password" target | |
|
76 | $ hg cat -r 3 target | |
|
77 | Tainted file is now sanitized | |
|
78 | $ hg cat -r $H2 target | |
|
79 | Tainted file now super sanitized | |
|
80 | $ hg cat -r $C2 target | |
|
81 | abort: censored node: 1e0247a9a4b7 | |
|
82 | (set censor.policy to ignore errors) | |
|
83 | [255] | |
|
84 | $ hg cat -r $C1 target | |
|
85 | Tainted file | |
|
86 | Passwords: hunter2 | |
|
87 | $ hg cat -r 0 target | |
|
88 | Initially untainted file | |
|
89 | ||
|
90 | Censor revision with 1 offense | |
|
91 | ||
|
92 | $ hg censor -r $C1 target | |
|
93 | $ hg cat -r 3 target | |
|
94 | Tainted file is now sanitized | |
|
95 | $ hg cat -r $H2 target | |
|
96 | Tainted file now super sanitized | |
|
97 | $ hg cat -r $C2 target | |
|
98 | abort: censored node: 1e0247a9a4b7 | |
|
99 | (set censor.policy to ignore errors) | |
|
100 | [255] | |
|
101 | $ hg cat -r $C1 target | |
|
102 | abort: censored node: 613bc869fceb | |
|
103 | (set censor.policy to ignore errors) | |
|
104 | [255] | |
|
105 | $ hg cat -r 0 target | |
|
106 | Initially untainted file | |
|
107 | ||
|
108 | Can only checkout target at uncensored revisions, -X is workaround for --all | |
|
109 | ||
|
110 | $ hg revert -r $C2 target | |
|
111 | abort: censored node: 1e0247a9a4b7 | |
|
112 | (set censor.policy to ignore errors) | |
|
113 | [255] | |
|
114 | $ hg revert -r $C1 target | |
|
115 | abort: censored node: 613bc869fceb | |
|
116 | (set censor.policy to ignore errors) | |
|
117 | [255] | |
|
118 | $ hg revert -r $C1 --all | |
|
119 | reverting bystander | |
|
120 | reverting target | |
|
121 | abort: censored node: 613bc869fceb | |
|
122 | (set censor.policy to ignore errors) | |
|
123 | [255] | |
|
124 | $ hg revert -r $C1 --all -X target | |
|
125 | $ cat target | |
|
126 | Tainted file now super sanitized | |
|
127 | $ hg revert -r 0 --all | |
|
128 | reverting target | |
|
129 | $ cat target | |
|
130 | Initially untainted file | |
|
131 | $ hg revert -r $H2 --all | |
|
132 | reverting bystander | |
|
133 | reverting target | |
|
134 | $ cat target | |
|
135 | Tainted file now super sanitized | |
|
136 | ||
|
137 | Uncensored file can be viewed at any revision | |
|
138 | ||
|
139 | $ hg cat -r 3 bystander | |
|
140 | Normal file v2 | |
|
141 | $ hg cat -r $C2 bystander | |
|
142 | Normal file v2 | |
|
143 | $ hg cat -r $C1 bystander | |
|
144 | Normal file here | |
|
145 | $ hg cat -r 0 bystander | |
|
146 | Normal file here | |
|
147 | ||
|
148 | Can update to children of censored revision | |
|
149 | ||
|
150 | $ hg update -r 3 | |
|
151 | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
152 | $ cat target | |
|
153 | Tainted file is now sanitized | |
|
154 | $ hg update -r $H2 | |
|
155 | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
156 | $ cat target | |
|
157 | Tainted file now super sanitized | |
|
158 | ||
|
159 | Set censor policy to abort in trusted $HGRC so hg verify fails | |
|
160 | ||
|
161 | $ cp $HGRCPATH.orig $HGRCPATH | |
|
162 | $ cat >> $HGRCPATH <<EOF | |
|
163 | > [censor] | |
|
164 | > policy = abort | |
|
165 | > EOF | |
|
166 | ||
|
167 | Repo fails verification due to censorship | |
|
168 | ||
|
169 | $ hg verify | |
|
170 | checking changesets | |
|
171 | checking manifests | |
|
172 | crosschecking files in changesets and manifests | |
|
173 | checking files | |
|
174 | target@1: censored file data | |
|
175 | target@2: censored file data | |
|
176 | 2 files, 5 changesets, 7 total revisions | |
|
177 | 2 integrity errors encountered! | |
|
178 | (first damaged changeset appears to be 1) | |
|
179 | [1] | |
|
180 | ||
|
181 | Cannot update to revision with censored data | |
|
182 | ||
|
183 | $ hg update -r $C2 | |
|
184 | abort: censored node: 1e0247a9a4b7 | |
|
185 | (set censor.policy to ignore errors) | |
|
186 | [255] | |
|
187 | $ hg update -r $C1 | |
|
188 | abort: censored node: 613bc869fceb | |
|
189 | (set censor.policy to ignore errors) | |
|
190 | [255] | |
|
191 | $ hg update -r 0 | |
|
192 | 2 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
193 | $ hg update -r $H2 | |
|
194 | 2 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
195 | ||
|
196 | Set censor policy to ignore in trusted $HGRC so hg verify passes | |
|
197 | ||
|
198 | $ cp $HGRCPATH.orig $HGRCPATH | |
|
199 | $ cat >> $HGRCPATH <<EOF | |
|
200 | > [censor] | |
|
201 | > policy = ignore | |
|
202 | > EOF | |
|
203 | ||
|
204 | Repo passes verification with warnings with explicit config | |
|
205 | ||
|
206 | $ hg verify | |
|
207 | checking changesets | |
|
208 | checking manifests | |
|
209 | crosschecking files in changesets and manifests | |
|
210 | checking files | |
|
211 | 2 files, 5 changesets, 7 total revisions | |
|
212 | ||
|
213 | May update to revision with censored data with explicit config | |
|
214 | ||
|
215 | $ hg update -r $C2 | |
|
216 | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
217 | $ cat target | |
|
218 | $ hg update -r $C1 | |
|
219 | 2 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
220 | $ cat target | |
|
221 | $ hg update -r 0 | |
|
222 | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
223 | $ cat target | |
|
224 | Initially untainted file | |
|
225 | $ hg update -r $H2 | |
|
226 | 2 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
227 | $ cat target | |
|
228 | Tainted file now super sanitized | |
|
229 | ||
|
230 | Can merge in revision with censored data. Test requires one branch of history | |
|
231 | with the file censored, but we can't censor at a head, so advance H1. | |
|
232 | ||
|
233 | $ hg update -r $H1 | |
|
234 | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
235 | $ C3=$H1 | |
|
236 | $ echo 'advanced head H1' > target | |
|
237 | $ hg ci -m 'advance head H1' target | |
|
238 | $ H1=`hg id --debug -i` | |
|
239 | $ hg censor -r $C3 target | |
|
240 | $ hg update -r $H2 | |
|
241 | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
242 | $ hg merge -r $C3 | |
|
243 | merging target | |
|
244 | 0 files updated, 1 files merged, 0 files removed, 0 files unresolved | |
|
245 | (branch merge, don't forget to commit) | |
|
246 | ||
|
247 | Revisions present in repository heads may not be censored | |
|
248 | ||
|
249 | $ hg update -C -r $H2 | |
|
250 | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
251 | $ hg censor -r $H2 target | |
|
252 | abort: cannot censor file in heads (78a8fc215e79) | |
|
253 | (clean/delete and commit first) | |
|
254 | [255] | |
|
255 | $ echo 'twiddling thumbs' > bystander | |
|
256 | $ hg ci -m 'bystander commit' | |
|
257 | $ H2=`hg id --debug -i` | |
|
258 | $ hg censor -r "$H2^" target | |
|
259 | abort: cannot censor file in heads (efbe78065929) | |
|
260 | (clean/delete and commit first) | |
|
261 | [255] | |
|
262 | ||
|
263 | Cannot censor working directory | |
|
264 | ||
|
265 | $ echo 'seriously no passwords' > target | |
|
266 | $ hg ci -m 'extend second head arbitrarily' target | |
|
267 | $ H2=`hg id --debug -i` | |
|
268 | $ hg update -r "$H2^" | |
|
269 | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
270 | $ hg censor -r . target | |
|
271 | abort: cannot censor working directory | |
|
272 | (clean/delete/update first) | |
|
273 | [255] | |
|
274 | $ hg update -r $H2 | |
|
275 | 1 files updated, 0 files merged, 0 files removed, 0 files unresolved | |
|
276 | ||
|
277 | Can re-add file after being deleted + censored | |
|
278 | ||
|
279 | $ C4=$H2 | |
|
280 | $ hg rm target | |
|
281 | $ hg ci -m 'delete target so it may be censored' | |
|
282 | $ H2=`hg id --debug -i` | |
|
283 | $ hg censor -r $C4 target | |
|
284 | $ hg cat -r $C4 target | |
|
285 | $ hg cat -r "$H2^^" target | |
|
286 | Tainted file now super sanitized | |
|
287 | $ echo 'fresh start' > target | |
|
288 | $ hg add target | |
|
289 | $ hg ci -m reincarnated target | |
|
290 | $ H2=`hg id --debug -i` | |
|
291 | $ hg cat -r $H2 target | |
|
292 | fresh start | |
|
293 | $ hg cat -r "$H2^" target | |
|
294 | target: no such file in rev 452ec1762369 | |
|
295 | [1] | |
|
296 | $ hg cat -r $C4 target | |
|
297 | $ hg cat -r "$H2^^^" target | |
|
298 | Tainted file now super sanitized | |
|
299 | ||
|
300 | Can censor after revlog has expanded to no longer permit inline storage | |
|
301 | ||
|
302 | $ for x in `seq 0 50000` | |
|
303 | > do | |
|
304 | > echo "Password: hunter$x" >> target | |
|
305 | > done | |
|
306 | $ hg ci -m 'add 100k passwords' | |
|
307 | $ H2=`hg id --debug -i` | |
|
308 | $ C5=$H2 | |
|
309 | $ hg revert -r "$H2^" target | |
|
310 | $ hg ci -m 'cleaned 100k passwords' | |
|
311 | $ H2=`hg id --debug -i` | |
|
312 | $ hg censor -r $C5 target | |
|
313 | $ hg cat -r $C5 target | |
|
314 | $ hg cat -r $H2 target | |
|
315 | fresh start |
@@ -245,6 +245,7 b' Test extension help:' | |||
|
245 | 245 | acl hooks for controlling repository access |
|
246 | 246 | blackbox log repository events to a blackbox for debugging |
|
247 | 247 | bugzilla hooks for integrating with the Bugzilla bug tracker |
|
248 | censor erase file content at a given revision | |
|
248 | 249 | churn command to display statistics about repository history |
|
249 | 250 | color colorize output from some commands |
|
250 | 251 | convert import revisions from foreign VCS repositories into |
General Comments 0
You need to be logged in to leave comments.
Login now