Show More
@@ -1,162 +1,161 b'' | |||
|
1 | 1 | # Copyright (C) 2015 - Mike Edgar <adgar@google.com> |
|
2 | 2 | # |
|
3 | 3 | # This extension enables removal of file content at a given revision, |
|
4 | 4 | # rewriting the data/metadata of successive revisions to preserve revision log |
|
5 | 5 | # integrity. |
|
6 | 6 | |
|
7 | 7 | """erase file content at a given revision |
|
8 | 8 | |
|
9 | 9 | The censor command instructs Mercurial to erase all content of a file at a given |
|
10 | 10 | revision *without updating the changeset hash.* This allows existing history to |
|
11 | 11 | remain valid while preventing future clones/pulls from receiving the erased |
|
12 | 12 | data. |
|
13 | 13 | |
|
14 | 14 | Typical uses for censor are due to security or legal requirements, including:: |
|
15 | 15 | |
|
16 | 16 | * Passwords, private keys, crytographic material |
|
17 | 17 | * Licensed data/code/libraries for which the license has expired |
|
18 | 18 | * Personally Identifiable Information or other private data |
|
19 | 19 | |
|
20 | 20 | Censored nodes can interrupt mercurial's typical operation whenever the excised |
|
21 | 21 | data needs to be materialized. Some commands, like ``hg cat``/``hg revert``, |
|
22 | 22 | simply fail when asked to produce censored data. Others, like ``hg verify`` and |
|
23 | 23 | ``hg update``, must be capable of tolerating censored data to continue to |
|
24 | 24 | function in a meaningful way. Such commands only tolerate censored file |
|
25 |
revisions if they are allowed by the policy |
|
|
26 | config option. | |
|
25 | revisions if they are allowed by the "censor.policy=ignore" config option. | |
|
27 | 26 | """ |
|
28 | 27 | |
|
29 | 28 | from mercurial.node import short |
|
30 | 29 | from mercurial import cmdutil, error, filelog, revlog, scmutil, util |
|
31 | 30 | from mercurial.i18n import _ |
|
32 | 31 | |
|
33 | 32 | cmdtable = {} |
|
34 | 33 | command = cmdutil.command(cmdtable) |
|
35 | 34 | testedwith = 'internal' |
|
36 | 35 | |
|
37 | 36 | @command('censor', |
|
38 | 37 | [('r', 'rev', '', _('censor file from specified revision'), _('REV')), |
|
39 | 38 | ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))], |
|
40 | 39 | _('-r REV [-t TEXT] [FILE]')) |
|
41 | 40 | def censor(ui, repo, path, rev='', tombstone='', **opts): |
|
42 | 41 | if not path: |
|
43 | 42 | raise util.Abort(_('must specify file path to censor')) |
|
44 | 43 | if not rev: |
|
45 | 44 | raise util.Abort(_('must specify revision to censor')) |
|
46 | 45 | |
|
47 | 46 | flog = repo.file(path) |
|
48 | 47 | if not len(flog): |
|
49 | 48 | raise util.Abort(_('cannot censor file with no history')) |
|
50 | 49 | |
|
51 | 50 | rev = scmutil.revsingle(repo, rev, rev).rev() |
|
52 | 51 | try: |
|
53 | 52 | ctx = repo[rev] |
|
54 | 53 | except KeyError: |
|
55 | 54 | raise util.Abort(_('invalid revision identifier %s') % rev) |
|
56 | 55 | |
|
57 | 56 | try: |
|
58 | 57 | fctx = ctx.filectx(path) |
|
59 | 58 | except error.LookupError: |
|
60 | 59 | raise util.Abort(_('file does not exist at revision %s') % rev) |
|
61 | 60 | |
|
62 | 61 | fnode = fctx.filenode() |
|
63 | 62 | headctxs = [repo[c] for c in repo.heads()] |
|
64 | 63 | heads = [c for c in headctxs if path in c and c.filenode(path) == fnode] |
|
65 | 64 | if heads: |
|
66 | 65 | headlist = ', '.join([short(c.node()) for c in heads]) |
|
67 | 66 | raise util.Abort(_('cannot censor file in heads (%s)') % headlist, |
|
68 | 67 | hint=_('clean/delete and commit first')) |
|
69 | 68 | |
|
70 | 69 | wctx = repo[None] |
|
71 | 70 | wp = wctx.parents() |
|
72 | 71 | if ctx.node() in [p.node() for p in wp]: |
|
73 | 72 | raise util.Abort(_('cannot censor working directory'), |
|
74 | 73 | hint=_('clean/delete/update first')) |
|
75 | 74 | |
|
76 | 75 | flogv = flog.version & 0xFFFF |
|
77 | 76 | if flogv != revlog.REVLOGNG: |
|
78 | 77 | raise util.Abort( |
|
79 | 78 | _('censor does not support revlog version %d') % (flogv,)) |
|
80 | 79 | |
|
81 | 80 | tombstone = filelog.packmeta({"censored": tombstone}, "") |
|
82 | 81 | |
|
83 | 82 | crev = fctx.filerev() |
|
84 | 83 | |
|
85 | 84 | if len(tombstone) > flog.rawsize(crev): |
|
86 | 85 | raise util.Abort(_( |
|
87 | 86 | 'censor tombstone must be no longer than censored data')) |
|
88 | 87 | |
|
89 | 88 | # Using two files instead of one makes it easy to rewrite entry-by-entry |
|
90 | 89 | idxread = repo.svfs(flog.indexfile, 'r') |
|
91 | 90 | idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True) |
|
92 | 91 | if flog.version & revlog.REVLOGNGINLINEDATA: |
|
93 | 92 | dataread, datawrite = idxread, idxwrite |
|
94 | 93 | else: |
|
95 | 94 | dataread = repo.svfs(flog.datafile, 'r') |
|
96 | 95 | datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True) |
|
97 | 96 | |
|
98 | 97 | # Copy all revlog data up to the entry to be censored. |
|
99 | 98 | rio = revlog.revlogio() |
|
100 | 99 | offset = flog.start(crev) |
|
101 | 100 | |
|
102 | 101 | for chunk in util.filechunkiter(idxread, limit=crev * rio.size): |
|
103 | 102 | idxwrite.write(chunk) |
|
104 | 103 | for chunk in util.filechunkiter(dataread, limit=offset): |
|
105 | 104 | datawrite.write(chunk) |
|
106 | 105 | |
|
107 | 106 | def rewriteindex(r, newoffs, newdata=None): |
|
108 | 107 | """Rewrite the index entry with a new data offset and optional new data. |
|
109 | 108 | |
|
110 | 109 | The newdata argument, if given, is a tuple of three positive integers: |
|
111 | 110 | (new compressed, new uncompressed, added flag bits). |
|
112 | 111 | """ |
|
113 | 112 | offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r] |
|
114 | 113 | flags = revlog.gettype(offlags) |
|
115 | 114 | if newdata: |
|
116 | 115 | comp, uncomp, nflags = newdata |
|
117 | 116 | flags |= nflags |
|
118 | 117 | offlags = revlog.offset_type(newoffs, flags) |
|
119 | 118 | e = (offlags, comp, uncomp, r, link, p1, p2, nodeid) |
|
120 | 119 | idxwrite.write(rio.packentry(e, None, flog.version, r)) |
|
121 | 120 | idxread.seek(rio.size, 1) |
|
122 | 121 | |
|
123 | 122 | def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS): |
|
124 | 123 | """Write the given full text to the filelog with the given data offset. |
|
125 | 124 | |
|
126 | 125 | Returns: |
|
127 | 126 | The integer number of data bytes written, for tracking data offsets. |
|
128 | 127 | """ |
|
129 | 128 | flag, compdata = flog.compress(data) |
|
130 | 129 | newcomp = len(flag) + len(compdata) |
|
131 | 130 | rewriteindex(r, offs, (newcomp, len(data), nflags)) |
|
132 | 131 | datawrite.write(flag) |
|
133 | 132 | datawrite.write(compdata) |
|
134 | 133 | dataread.seek(flog.length(r), 1) |
|
135 | 134 | return newcomp |
|
136 | 135 | |
|
137 | 136 | # Rewrite censored revlog entry with (padded) tombstone data. |
|
138 | 137 | pad = ' ' * (flog.rawsize(crev) - len(tombstone)) |
|
139 | 138 | offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED) |
|
140 | 139 | |
|
141 | 140 | # Rewrite all following filelog revisions fixing up offsets and deltas. |
|
142 | 141 | for srev in xrange(crev + 1, len(flog)): |
|
143 | 142 | if crev in flog.parentrevs(srev): |
|
144 | 143 | # Immediate children of censored node must be re-added as fulltext. |
|
145 | 144 | try: |
|
146 | 145 | revdata = flog.revision(srev) |
|
147 | 146 | except error.CensoredNodeError, e: |
|
148 | 147 | revdata = e.tombstone |
|
149 | 148 | dlen = rewrite(srev, offset, revdata) |
|
150 | 149 | else: |
|
151 | 150 | # Copy any other revision data verbatim after fixing up the offset. |
|
152 | 151 | rewriteindex(srev, offset) |
|
153 | 152 | dlen = flog.length(srev) |
|
154 | 153 | for chunk in util.filechunkiter(dataread, limit=dlen): |
|
155 | 154 | datawrite.write(chunk) |
|
156 | 155 | offset += dlen |
|
157 | 156 | |
|
158 | 157 | idxread.close() |
|
159 | 158 | idxwrite.close() |
|
160 | 159 | if dataread is not idxread: |
|
161 | 160 | dataread.close() |
|
162 | 161 | datawrite.close() |
General Comments 0
You need to be logged in to leave comments.
Login now