Show More
@@ -1,189 +1,183 b'' | |||||
1 | # Copyright (C) 2015 - Mike Edgar <adgar@google.com> |
|
1 | # Copyright (C) 2015 - Mike Edgar <adgar@google.com> | |
2 | # |
|
2 | # | |
3 | # This extension enables removal of file content at a given revision, |
|
3 | # This extension enables removal of file content at a given revision, | |
4 | # rewriting the data/metadata of successive revisions to preserve revision log |
|
4 | # rewriting the data/metadata of successive revisions to preserve revision log | |
5 | # integrity. |
|
5 | # integrity. | |
6 |
|
6 | |||
7 | """erase file content at a given revision |
|
7 | """erase file content at a given revision | |
8 |
|
8 | |||
9 | The censor command instructs Mercurial to erase all content of a file at a given |
|
9 | The censor command instructs Mercurial to erase all content of a file at a given | |
10 | revision *without updating the changeset hash.* This allows existing history to |
|
10 | revision *without updating the changeset hash.* This allows existing history to | |
11 | remain valid while preventing future clones/pulls from receiving the erased |
|
11 | remain valid while preventing future clones/pulls from receiving the erased | |
12 | data. |
|
12 | data. | |
13 |
|
13 | |||
14 | Typical uses for censor are due to security or legal requirements, including:: |
|
14 | Typical uses for censor are due to security or legal requirements, including:: | |
15 |
|
15 | |||
16 | * Passwords, private keys, cryptographic material |
|
16 | * Passwords, private keys, cryptographic material | |
17 | * Licensed data/code/libraries for which the license has expired |
|
17 | * Licensed data/code/libraries for which the license has expired | |
18 | * Personally Identifiable Information or other private data |
|
18 | * Personally Identifiable Information or other private data | |
19 |
|
19 | |||
20 | Censored nodes can interrupt mercurial's typical operation whenever the excised |
|
20 | Censored nodes can interrupt mercurial's typical operation whenever the excised | |
21 | data needs to be materialized. Some commands, like ``hg cat``/``hg revert``, |
|
21 | data needs to be materialized. Some commands, like ``hg cat``/``hg revert``, | |
22 | simply fail when asked to produce censored data. Others, like ``hg verify`` and |
|
22 | simply fail when asked to produce censored data. Others, like ``hg verify`` and | |
23 | ``hg update``, must be capable of tolerating censored data to continue to |
|
23 | ``hg update``, must be capable of tolerating censored data to continue to | |
24 | function in a meaningful way. Such commands only tolerate censored file |
|
24 | function in a meaningful way. Such commands only tolerate censored file | |
25 | revisions if they are allowed by the "censor.policy=ignore" config option. |
|
25 | revisions if they are allowed by the "censor.policy=ignore" config option. | |
26 | """ |
|
26 | """ | |
27 |
|
27 | |||
28 | from __future__ import absolute_import |
|
28 | from __future__ import absolute_import | |
29 |
|
29 | |||
30 | from mercurial.i18n import _ |
|
30 | from mercurial.i18n import _ | |
31 | from mercurial.node import short |
|
31 | from mercurial.node import short | |
32 |
|
32 | |||
33 | from mercurial import ( |
|
33 | from mercurial import ( | |
34 | error, |
|
34 | error, | |
35 | lock as lockmod, |
|
|||
36 | registrar, |
|
35 | registrar, | |
37 | revlog, |
|
36 | revlog, | |
38 | scmutil, |
|
37 | scmutil, | |
39 | util, |
|
38 | util, | |
40 | ) |
|
39 | ) | |
41 |
|
40 | |||
42 | cmdtable = {} |
|
41 | cmdtable = {} | |
43 | command = registrar.command(cmdtable) |
|
42 | command = registrar.command(cmdtable) | |
44 | # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for |
|
43 | # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for | |
45 | # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should |
|
44 | # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should | |
46 | # be specifying the version(s) of Mercurial they are tested with, or |
|
45 | # be specifying the version(s) of Mercurial they are tested with, or | |
47 | # leave the attribute unspecified. |
|
46 | # leave the attribute unspecified. | |
48 | testedwith = 'ships-with-hg-core' |
|
47 | testedwith = 'ships-with-hg-core' | |
49 |
|
48 | |||
50 | @command('censor', |
|
49 | @command('censor', | |
51 | [('r', 'rev', '', _('censor file from specified revision'), _('REV')), |
|
50 | [('r', 'rev', '', _('censor file from specified revision'), _('REV')), | |
52 | ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))], |
|
51 | ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))], | |
53 | _('-r REV [-t TEXT] [FILE]')) |
|
52 | _('-r REV [-t TEXT] [FILE]')) | |
54 | def censor(ui, repo, path, rev='', tombstone='', **opts): |
|
53 | def censor(ui, repo, path, rev='', tombstone='', **opts): | |
55 | wlock = lock = None |
|
54 | with repo.wlock(), repo.lock(): | |
56 | try: |
|
|||
57 | wlock = repo.wlock() |
|
|||
58 | lock = repo.lock() |
|
|||
59 | return _docensor(ui, repo, path, rev, tombstone, **opts) |
|
55 | return _docensor(ui, repo, path, rev, tombstone, **opts) | |
60 | finally: |
|
|||
61 | lockmod.release(lock, wlock) |
|
|||
62 |
|
56 | |||
63 | def _docensor(ui, repo, path, rev='', tombstone='', **opts): |
|
57 | def _docensor(ui, repo, path, rev='', tombstone='', **opts): | |
64 | if not path: |
|
58 | if not path: | |
65 | raise error.Abort(_('must specify file path to censor')) |
|
59 | raise error.Abort(_('must specify file path to censor')) | |
66 | if not rev: |
|
60 | if not rev: | |
67 | raise error.Abort(_('must specify revision to censor')) |
|
61 | raise error.Abort(_('must specify revision to censor')) | |
68 |
|
62 | |||
69 | wctx = repo[None] |
|
63 | wctx = repo[None] | |
70 |
|
64 | |||
71 | m = scmutil.match(wctx, (path,)) |
|
65 | m = scmutil.match(wctx, (path,)) | |
72 | if m.anypats() or len(m.files()) != 1: |
|
66 | if m.anypats() or len(m.files()) != 1: | |
73 | raise error.Abort(_('can only specify an explicit filename')) |
|
67 | raise error.Abort(_('can only specify an explicit filename')) | |
74 | path = m.files()[0] |
|
68 | path = m.files()[0] | |
75 | flog = repo.file(path) |
|
69 | flog = repo.file(path) | |
76 | if not len(flog): |
|
70 | if not len(flog): | |
77 | raise error.Abort(_('cannot censor file with no history')) |
|
71 | raise error.Abort(_('cannot censor file with no history')) | |
78 |
|
72 | |||
79 | rev = scmutil.revsingle(repo, rev, rev).rev() |
|
73 | rev = scmutil.revsingle(repo, rev, rev).rev() | |
80 | try: |
|
74 | try: | |
81 | ctx = repo[rev] |
|
75 | ctx = repo[rev] | |
82 | except KeyError: |
|
76 | except KeyError: | |
83 | raise error.Abort(_('invalid revision identifier %s') % rev) |
|
77 | raise error.Abort(_('invalid revision identifier %s') % rev) | |
84 |
|
78 | |||
85 | try: |
|
79 | try: | |
86 | fctx = ctx.filectx(path) |
|
80 | fctx = ctx.filectx(path) | |
87 | except error.LookupError: |
|
81 | except error.LookupError: | |
88 | raise error.Abort(_('file does not exist at revision %s') % rev) |
|
82 | raise error.Abort(_('file does not exist at revision %s') % rev) | |
89 |
|
83 | |||
90 | fnode = fctx.filenode() |
|
84 | fnode = fctx.filenode() | |
91 | headctxs = [repo[c] for c in repo.heads()] |
|
85 | headctxs = [repo[c] for c in repo.heads()] | |
92 | heads = [c for c in headctxs if path in c and c.filenode(path) == fnode] |
|
86 | heads = [c for c in headctxs if path in c and c.filenode(path) == fnode] | |
93 | if heads: |
|
87 | if heads: | |
94 | headlist = ', '.join([short(c.node()) for c in heads]) |
|
88 | headlist = ', '.join([short(c.node()) for c in heads]) | |
95 | raise error.Abort(_('cannot censor file in heads (%s)') % headlist, |
|
89 | raise error.Abort(_('cannot censor file in heads (%s)') % headlist, | |
96 | hint=_('clean/delete and commit first')) |
|
90 | hint=_('clean/delete and commit first')) | |
97 |
|
91 | |||
98 | wp = wctx.parents() |
|
92 | wp = wctx.parents() | |
99 | if ctx.node() in [p.node() for p in wp]: |
|
93 | if ctx.node() in [p.node() for p in wp]: | |
100 | raise error.Abort(_('cannot censor working directory'), |
|
94 | raise error.Abort(_('cannot censor working directory'), | |
101 | hint=_('clean/delete/update first')) |
|
95 | hint=_('clean/delete/update first')) | |
102 |
|
96 | |||
103 | flogv = flog.version & 0xFFFF |
|
97 | flogv = flog.version & 0xFFFF | |
104 | if flogv != revlog.REVLOGV1: |
|
98 | if flogv != revlog.REVLOGV1: | |
105 | raise error.Abort( |
|
99 | raise error.Abort( | |
106 | _('censor does not support revlog version %d') % (flogv,)) |
|
100 | _('censor does not support revlog version %d') % (flogv,)) | |
107 |
|
101 | |||
108 | tombstone = revlog.packmeta({"censored": tombstone}, "") |
|
102 | tombstone = revlog.packmeta({"censored": tombstone}, "") | |
109 |
|
103 | |||
110 | crev = fctx.filerev() |
|
104 | crev = fctx.filerev() | |
111 |
|
105 | |||
112 | if len(tombstone) > flog.rawsize(crev): |
|
106 | if len(tombstone) > flog.rawsize(crev): | |
113 | raise error.Abort(_( |
|
107 | raise error.Abort(_( | |
114 | 'censor tombstone must be no longer than censored data')) |
|
108 | 'censor tombstone must be no longer than censored data')) | |
115 |
|
109 | |||
116 | # Using two files instead of one makes it easy to rewrite entry-by-entry |
|
110 | # Using two files instead of one makes it easy to rewrite entry-by-entry | |
117 | idxread = repo.svfs(flog.indexfile, 'r') |
|
111 | idxread = repo.svfs(flog.indexfile, 'r') | |
118 | idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True) |
|
112 | idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True) | |
119 | if flog.version & revlog.FLAG_INLINE_DATA: |
|
113 | if flog.version & revlog.FLAG_INLINE_DATA: | |
120 | dataread, datawrite = idxread, idxwrite |
|
114 | dataread, datawrite = idxread, idxwrite | |
121 | else: |
|
115 | else: | |
122 | dataread = repo.svfs(flog.datafile, 'r') |
|
116 | dataread = repo.svfs(flog.datafile, 'r') | |
123 | datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True) |
|
117 | datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True) | |
124 |
|
118 | |||
125 | # Copy all revlog data up to the entry to be censored. |
|
119 | # Copy all revlog data up to the entry to be censored. | |
126 | rio = revlog.revlogio() |
|
120 | rio = revlog.revlogio() | |
127 | offset = flog.start(crev) |
|
121 | offset = flog.start(crev) | |
128 |
|
122 | |||
129 | for chunk in util.filechunkiter(idxread, limit=crev * rio.size): |
|
123 | for chunk in util.filechunkiter(idxread, limit=crev * rio.size): | |
130 | idxwrite.write(chunk) |
|
124 | idxwrite.write(chunk) | |
131 | for chunk in util.filechunkiter(dataread, limit=offset): |
|
125 | for chunk in util.filechunkiter(dataread, limit=offset): | |
132 | datawrite.write(chunk) |
|
126 | datawrite.write(chunk) | |
133 |
|
127 | |||
134 | def rewriteindex(r, newoffs, newdata=None): |
|
128 | def rewriteindex(r, newoffs, newdata=None): | |
135 | """Rewrite the index entry with a new data offset and optional new data. |
|
129 | """Rewrite the index entry with a new data offset and optional new data. | |
136 |
|
130 | |||
137 | The newdata argument, if given, is a tuple of three positive integers: |
|
131 | The newdata argument, if given, is a tuple of three positive integers: | |
138 | (new compressed, new uncompressed, added flag bits). |
|
132 | (new compressed, new uncompressed, added flag bits). | |
139 | """ |
|
133 | """ | |
140 | offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r] |
|
134 | offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r] | |
141 | flags = revlog.gettype(offlags) |
|
135 | flags = revlog.gettype(offlags) | |
142 | if newdata: |
|
136 | if newdata: | |
143 | comp, uncomp, nflags = newdata |
|
137 | comp, uncomp, nflags = newdata | |
144 | flags |= nflags |
|
138 | flags |= nflags | |
145 | offlags = revlog.offset_type(newoffs, flags) |
|
139 | offlags = revlog.offset_type(newoffs, flags) | |
146 | e = (offlags, comp, uncomp, r, link, p1, p2, nodeid) |
|
140 | e = (offlags, comp, uncomp, r, link, p1, p2, nodeid) | |
147 | idxwrite.write(rio.packentry(e, None, flog.version, r)) |
|
141 | idxwrite.write(rio.packentry(e, None, flog.version, r)) | |
148 | idxread.seek(rio.size, 1) |
|
142 | idxread.seek(rio.size, 1) | |
149 |
|
143 | |||
150 | def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS): |
|
144 | def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS): | |
151 | """Write the given full text to the filelog with the given data offset. |
|
145 | """Write the given full text to the filelog with the given data offset. | |
152 |
|
146 | |||
153 | Returns: |
|
147 | Returns: | |
154 | The integer number of data bytes written, for tracking data offsets. |
|
148 | The integer number of data bytes written, for tracking data offsets. | |
155 | """ |
|
149 | """ | |
156 | flag, compdata = flog.compress(data) |
|
150 | flag, compdata = flog.compress(data) | |
157 | newcomp = len(flag) + len(compdata) |
|
151 | newcomp = len(flag) + len(compdata) | |
158 | rewriteindex(r, offs, (newcomp, len(data), nflags)) |
|
152 | rewriteindex(r, offs, (newcomp, len(data), nflags)) | |
159 | datawrite.write(flag) |
|
153 | datawrite.write(flag) | |
160 | datawrite.write(compdata) |
|
154 | datawrite.write(compdata) | |
161 | dataread.seek(flog.length(r), 1) |
|
155 | dataread.seek(flog.length(r), 1) | |
162 | return newcomp |
|
156 | return newcomp | |
163 |
|
157 | |||
164 | # Rewrite censored revlog entry with (padded) tombstone data. |
|
158 | # Rewrite censored revlog entry with (padded) tombstone data. | |
165 | pad = ' ' * (flog.rawsize(crev) - len(tombstone)) |
|
159 | pad = ' ' * (flog.rawsize(crev) - len(tombstone)) | |
166 | offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED) |
|
160 | offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED) | |
167 |
|
161 | |||
168 | # Rewrite all following filelog revisions fixing up offsets and deltas. |
|
162 | # Rewrite all following filelog revisions fixing up offsets and deltas. | |
169 | for srev in xrange(crev + 1, len(flog)): |
|
163 | for srev in xrange(crev + 1, len(flog)): | |
170 | if crev in flog.parentrevs(srev): |
|
164 | if crev in flog.parentrevs(srev): | |
171 | # Immediate children of censored node must be re-added as fulltext. |
|
165 | # Immediate children of censored node must be re-added as fulltext. | |
172 | try: |
|
166 | try: | |
173 | revdata = flog.revision(srev) |
|
167 | revdata = flog.revision(srev) | |
174 | except error.CensoredNodeError as e: |
|
168 | except error.CensoredNodeError as e: | |
175 | revdata = e.tombstone |
|
169 | revdata = e.tombstone | |
176 | dlen = rewrite(srev, offset, revdata) |
|
170 | dlen = rewrite(srev, offset, revdata) | |
177 | else: |
|
171 | else: | |
178 | # Copy any other revision data verbatim after fixing up the offset. |
|
172 | # Copy any other revision data verbatim after fixing up the offset. | |
179 | rewriteindex(srev, offset) |
|
173 | rewriteindex(srev, offset) | |
180 | dlen = flog.length(srev) |
|
174 | dlen = flog.length(srev) | |
181 | for chunk in util.filechunkiter(dataread, limit=dlen): |
|
175 | for chunk in util.filechunkiter(dataread, limit=dlen): | |
182 | datawrite.write(chunk) |
|
176 | datawrite.write(chunk) | |
183 | offset += dlen |
|
177 | offset += dlen | |
184 |
|
178 | |||
185 | idxread.close() |
|
179 | idxread.close() | |
186 | idxwrite.close() |
|
180 | idxwrite.close() | |
187 | if dataread is not idxread: |
|
181 | if dataread is not idxread: | |
188 | dataread.close() |
|
182 | dataread.close() | |
189 | datawrite.close() |
|
183 | datawrite.close() |
General Comments 0
You need to be logged in to leave comments.
Login now