##// END OF EJS Templates
censor: make censor acquire locks before processing...
FUJIWARA Katsunori -
r27290:525d9b3f default
parent child Browse files
Show More
@@ -1,170 +1,180 b''
1 1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
2 2 #
3 3 # This extension enables removal of file content at a given revision,
4 4 # rewriting the data/metadata of successive revisions to preserve revision log
5 5 # integrity.
6 6
7 7 """erase file content at a given revision
8 8
9 9 The censor command instructs Mercurial to erase all content of a file at a given
10 10 revision *without updating the changeset hash.* This allows existing history to
11 11 remain valid while preventing future clones/pulls from receiving the erased
12 12 data.
13 13
14 14 Typical uses for censor are due to security or legal requirements, including::
15 15
16 16 * Passwords, private keys, cryptographic material
17 17 * Licensed data/code/libraries for which the license has expired
18 18 * Personally Identifiable Information or other private data
19 19
20 20 Censored nodes can interrupt mercurial's typical operation whenever the excised
21 21 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
22 22 simply fail when asked to produce censored data. Others, like ``hg verify`` and
23 23 ``hg update``, must be capable of tolerating censored data to continue to
24 24 function in a meaningful way. Such commands only tolerate censored file
25 25 revisions if they are allowed by the "censor.policy=ignore" config option.
26 26 """
27 27
28 28 from mercurial.node import short
29 29 from mercurial import cmdutil, error, filelog, revlog, scmutil, util
30 30 from mercurial.i18n import _
31 from mercurial import lock as lockmod
31 32
32 33 cmdtable = {}
33 34 command = cmdutil.command(cmdtable)
34 35 # Note for extension authors: ONLY specify testedwith = 'internal' for
35 36 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
36 37 # be specifying the version(s) of Mercurial they are tested with, or
37 38 # leave the attribute unspecified.
38 39 testedwith = 'internal'
39 40
40 41 @command('censor',
41 42 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
42 43 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
43 44 _('-r REV [-t TEXT] [FILE]'))
44 45 def censor(ui, repo, path, rev='', tombstone='', **opts):
46 wlock = lock = None
47 try:
48 wlock = repo.wlock()
49 lock = repo.lock()
50 return _docensor(ui, repo, path, rev, tombstone, **opts)
51 finally:
52 lockmod.release(lock, wlock)
53
54 def _docensor(ui, repo, path, rev='', tombstone='', **opts):
45 55 if not path:
46 56 raise error.Abort(_('must specify file path to censor'))
47 57 if not rev:
48 58 raise error.Abort(_('must specify revision to censor'))
49 59
50 60 wctx = repo[None]
51 61
52 62 m = scmutil.match(wctx, (path,))
53 63 if m.anypats() or len(m.files()) != 1:
54 64 raise error.Abort(_('can only specify an explicit filename'))
55 65 path = m.files()[0]
56 66 flog = repo.file(path)
57 67 if not len(flog):
58 68 raise error.Abort(_('cannot censor file with no history'))
59 69
60 70 rev = scmutil.revsingle(repo, rev, rev).rev()
61 71 try:
62 72 ctx = repo[rev]
63 73 except KeyError:
64 74 raise error.Abort(_('invalid revision identifier %s') % rev)
65 75
66 76 try:
67 77 fctx = ctx.filectx(path)
68 78 except error.LookupError:
69 79 raise error.Abort(_('file does not exist at revision %s') % rev)
70 80
71 81 fnode = fctx.filenode()
72 82 headctxs = [repo[c] for c in repo.heads()]
73 83 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
74 84 if heads:
75 85 headlist = ', '.join([short(c.node()) for c in heads])
76 86 raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
77 87 hint=_('clean/delete and commit first'))
78 88
79 89 wp = wctx.parents()
80 90 if ctx.node() in [p.node() for p in wp]:
81 91 raise error.Abort(_('cannot censor working directory'),
82 92 hint=_('clean/delete/update first'))
83 93
84 94 flogv = flog.version & 0xFFFF
85 95 if flogv != revlog.REVLOGNG:
86 96 raise error.Abort(
87 97 _('censor does not support revlog version %d') % (flogv,))
88 98
89 99 tombstone = filelog.packmeta({"censored": tombstone}, "")
90 100
91 101 crev = fctx.filerev()
92 102
93 103 if len(tombstone) > flog.rawsize(crev):
94 104 raise error.Abort(_(
95 105 'censor tombstone must be no longer than censored data'))
96 106
97 107 # Using two files instead of one makes it easy to rewrite entry-by-entry
98 108 idxread = repo.svfs(flog.indexfile, 'r')
99 109 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
100 110 if flog.version & revlog.REVLOGNGINLINEDATA:
101 111 dataread, datawrite = idxread, idxwrite
102 112 else:
103 113 dataread = repo.svfs(flog.datafile, 'r')
104 114 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
105 115
106 116 # Copy all revlog data up to the entry to be censored.
107 117 rio = revlog.revlogio()
108 118 offset = flog.start(crev)
109 119
110 120 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
111 121 idxwrite.write(chunk)
112 122 for chunk in util.filechunkiter(dataread, limit=offset):
113 123 datawrite.write(chunk)
114 124
115 125 def rewriteindex(r, newoffs, newdata=None):
116 126 """Rewrite the index entry with a new data offset and optional new data.
117 127
118 128 The newdata argument, if given, is a tuple of three positive integers:
119 129 (new compressed, new uncompressed, added flag bits).
120 130 """
121 131 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
122 132 flags = revlog.gettype(offlags)
123 133 if newdata:
124 134 comp, uncomp, nflags = newdata
125 135 flags |= nflags
126 136 offlags = revlog.offset_type(newoffs, flags)
127 137 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
128 138 idxwrite.write(rio.packentry(e, None, flog.version, r))
129 139 idxread.seek(rio.size, 1)
130 140
131 141 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
132 142 """Write the given full text to the filelog with the given data offset.
133 143
134 144 Returns:
135 145 The integer number of data bytes written, for tracking data offsets.
136 146 """
137 147 flag, compdata = flog.compress(data)
138 148 newcomp = len(flag) + len(compdata)
139 149 rewriteindex(r, offs, (newcomp, len(data), nflags))
140 150 datawrite.write(flag)
141 151 datawrite.write(compdata)
142 152 dataread.seek(flog.length(r), 1)
143 153 return newcomp
144 154
145 155 # Rewrite censored revlog entry with (padded) tombstone data.
146 156 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
147 157 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
148 158
149 159 # Rewrite all following filelog revisions fixing up offsets and deltas.
150 160 for srev in xrange(crev + 1, len(flog)):
151 161 if crev in flog.parentrevs(srev):
152 162 # Immediate children of censored node must be re-added as fulltext.
153 163 try:
154 164 revdata = flog.revision(srev)
155 165 except error.CensoredNodeError as e:
156 166 revdata = e.tombstone
157 167 dlen = rewrite(srev, offset, revdata)
158 168 else:
159 169 # Copy any other revision data verbatim after fixing up the offset.
160 170 rewriteindex(srev, offset)
161 171 dlen = flog.length(srev)
162 172 for chunk in util.filechunkiter(dataread, limit=dlen):
163 173 datawrite.write(chunk)
164 174 offset += dlen
165 175
166 176 idxread.close()
167 177 idxwrite.close()
168 178 if dataread is not idxread:
169 179 dataread.close()
170 180 datawrite.close()
General Comments 0
You need to be logged in to leave comments. Login now