##// END OF EJS Templates
censor: make censor acquire locks before processing...
FUJIWARA Katsunori -
r27290:525d9b3f default
parent child Browse files
Show More
@@ -1,170 +1,180 b''
1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
1 # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
2 #
2 #
3 # This extension enables removal of file content at a given revision,
3 # This extension enables removal of file content at a given revision,
4 # rewriting the data/metadata of successive revisions to preserve revision log
4 # rewriting the data/metadata of successive revisions to preserve revision log
5 # integrity.
5 # integrity.
6
6
7 """erase file content at a given revision
7 """erase file content at a given revision
8
8
9 The censor command instructs Mercurial to erase all content of a file at a given
9 The censor command instructs Mercurial to erase all content of a file at a given
10 revision *without updating the changeset hash.* This allows existing history to
10 revision *without updating the changeset hash.* This allows existing history to
11 remain valid while preventing future clones/pulls from receiving the erased
11 remain valid while preventing future clones/pulls from receiving the erased
12 data.
12 data.
13
13
14 Typical uses for censor are due to security or legal requirements, including::
14 Typical uses for censor are due to security or legal requirements, including::
15
15
16 * Passwords, private keys, cryptographic material
16 * Passwords, private keys, cryptographic material
17 * Licensed data/code/libraries for which the license has expired
17 * Licensed data/code/libraries for which the license has expired
18 * Personally Identifiable Information or other private data
18 * Personally Identifiable Information or other private data
19
19
20 Censored nodes can interrupt mercurial's typical operation whenever the excised
20 Censored nodes can interrupt mercurial's typical operation whenever the excised
21 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
21 data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
22 simply fail when asked to produce censored data. Others, like ``hg verify`` and
22 simply fail when asked to produce censored data. Others, like ``hg verify`` and
23 ``hg update``, must be capable of tolerating censored data to continue to
23 ``hg update``, must be capable of tolerating censored data to continue to
24 function in a meaningful way. Such commands only tolerate censored file
24 function in a meaningful way. Such commands only tolerate censored file
25 revisions if they are allowed by the "censor.policy=ignore" config option.
25 revisions if they are allowed by the "censor.policy=ignore" config option.
26 """
26 """
27
27
28 from mercurial.node import short
28 from mercurial.node import short
29 from mercurial import cmdutil, error, filelog, revlog, scmutil, util
29 from mercurial import cmdutil, error, filelog, revlog, scmutil, util
30 from mercurial.i18n import _
30 from mercurial.i18n import _
31 from mercurial import lock as lockmod
31
32
32 cmdtable = {}
33 cmdtable = {}
33 command = cmdutil.command(cmdtable)
34 command = cmdutil.command(cmdtable)
34 # Note for extension authors: ONLY specify testedwith = 'internal' for
35 # Note for extension authors: ONLY specify testedwith = 'internal' for
35 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
36 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
36 # be specifying the version(s) of Mercurial they are tested with, or
37 # be specifying the version(s) of Mercurial they are tested with, or
37 # leave the attribute unspecified.
38 # leave the attribute unspecified.
38 testedwith = 'internal'
39 testedwith = 'internal'
39
40
40 @command('censor',
41 @command('censor',
41 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
42 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
42 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
43 ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
43 _('-r REV [-t TEXT] [FILE]'))
44 _('-r REV [-t TEXT] [FILE]'))
44 def censor(ui, repo, path, rev='', tombstone='', **opts):
45 def censor(ui, repo, path, rev='', tombstone='', **opts):
46 wlock = lock = None
47 try:
48 wlock = repo.wlock()
49 lock = repo.lock()
50 return _docensor(ui, repo, path, rev, tombstone, **opts)
51 finally:
52 lockmod.release(lock, wlock)
53
54 def _docensor(ui, repo, path, rev='', tombstone='', **opts):
45 if not path:
55 if not path:
46 raise error.Abort(_('must specify file path to censor'))
56 raise error.Abort(_('must specify file path to censor'))
47 if not rev:
57 if not rev:
48 raise error.Abort(_('must specify revision to censor'))
58 raise error.Abort(_('must specify revision to censor'))
49
59
50 wctx = repo[None]
60 wctx = repo[None]
51
61
52 m = scmutil.match(wctx, (path,))
62 m = scmutil.match(wctx, (path,))
53 if m.anypats() or len(m.files()) != 1:
63 if m.anypats() or len(m.files()) != 1:
54 raise error.Abort(_('can only specify an explicit filename'))
64 raise error.Abort(_('can only specify an explicit filename'))
55 path = m.files()[0]
65 path = m.files()[0]
56 flog = repo.file(path)
66 flog = repo.file(path)
57 if not len(flog):
67 if not len(flog):
58 raise error.Abort(_('cannot censor file with no history'))
68 raise error.Abort(_('cannot censor file with no history'))
59
69
60 rev = scmutil.revsingle(repo, rev, rev).rev()
70 rev = scmutil.revsingle(repo, rev, rev).rev()
61 try:
71 try:
62 ctx = repo[rev]
72 ctx = repo[rev]
63 except KeyError:
73 except KeyError:
64 raise error.Abort(_('invalid revision identifier %s') % rev)
74 raise error.Abort(_('invalid revision identifier %s') % rev)
65
75
66 try:
76 try:
67 fctx = ctx.filectx(path)
77 fctx = ctx.filectx(path)
68 except error.LookupError:
78 except error.LookupError:
69 raise error.Abort(_('file does not exist at revision %s') % rev)
79 raise error.Abort(_('file does not exist at revision %s') % rev)
70
80
71 fnode = fctx.filenode()
81 fnode = fctx.filenode()
72 headctxs = [repo[c] for c in repo.heads()]
82 headctxs = [repo[c] for c in repo.heads()]
73 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
83 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
74 if heads:
84 if heads:
75 headlist = ', '.join([short(c.node()) for c in heads])
85 headlist = ', '.join([short(c.node()) for c in heads])
76 raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
86 raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
77 hint=_('clean/delete and commit first'))
87 hint=_('clean/delete and commit first'))
78
88
79 wp = wctx.parents()
89 wp = wctx.parents()
80 if ctx.node() in [p.node() for p in wp]:
90 if ctx.node() in [p.node() for p in wp]:
81 raise error.Abort(_('cannot censor working directory'),
91 raise error.Abort(_('cannot censor working directory'),
82 hint=_('clean/delete/update first'))
92 hint=_('clean/delete/update first'))
83
93
84 flogv = flog.version & 0xFFFF
94 flogv = flog.version & 0xFFFF
85 if flogv != revlog.REVLOGNG:
95 if flogv != revlog.REVLOGNG:
86 raise error.Abort(
96 raise error.Abort(
87 _('censor does not support revlog version %d') % (flogv,))
97 _('censor does not support revlog version %d') % (flogv,))
88
98
89 tombstone = filelog.packmeta({"censored": tombstone}, "")
99 tombstone = filelog.packmeta({"censored": tombstone}, "")
90
100
91 crev = fctx.filerev()
101 crev = fctx.filerev()
92
102
93 if len(tombstone) > flog.rawsize(crev):
103 if len(tombstone) > flog.rawsize(crev):
94 raise error.Abort(_(
104 raise error.Abort(_(
95 'censor tombstone must be no longer than censored data'))
105 'censor tombstone must be no longer than censored data'))
96
106
97 # Using two files instead of one makes it easy to rewrite entry-by-entry
107 # Using two files instead of one makes it easy to rewrite entry-by-entry
98 idxread = repo.svfs(flog.indexfile, 'r')
108 idxread = repo.svfs(flog.indexfile, 'r')
99 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
109 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
100 if flog.version & revlog.REVLOGNGINLINEDATA:
110 if flog.version & revlog.REVLOGNGINLINEDATA:
101 dataread, datawrite = idxread, idxwrite
111 dataread, datawrite = idxread, idxwrite
102 else:
112 else:
103 dataread = repo.svfs(flog.datafile, 'r')
113 dataread = repo.svfs(flog.datafile, 'r')
104 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
114 datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
105
115
106 # Copy all revlog data up to the entry to be censored.
116 # Copy all revlog data up to the entry to be censored.
107 rio = revlog.revlogio()
117 rio = revlog.revlogio()
108 offset = flog.start(crev)
118 offset = flog.start(crev)
109
119
110 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
120 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
111 idxwrite.write(chunk)
121 idxwrite.write(chunk)
112 for chunk in util.filechunkiter(dataread, limit=offset):
122 for chunk in util.filechunkiter(dataread, limit=offset):
113 datawrite.write(chunk)
123 datawrite.write(chunk)
114
124
115 def rewriteindex(r, newoffs, newdata=None):
125 def rewriteindex(r, newoffs, newdata=None):
116 """Rewrite the index entry with a new data offset and optional new data.
126 """Rewrite the index entry with a new data offset and optional new data.
117
127
118 The newdata argument, if given, is a tuple of three positive integers:
128 The newdata argument, if given, is a tuple of three positive integers:
119 (new compressed, new uncompressed, added flag bits).
129 (new compressed, new uncompressed, added flag bits).
120 """
130 """
121 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
131 offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
122 flags = revlog.gettype(offlags)
132 flags = revlog.gettype(offlags)
123 if newdata:
133 if newdata:
124 comp, uncomp, nflags = newdata
134 comp, uncomp, nflags = newdata
125 flags |= nflags
135 flags |= nflags
126 offlags = revlog.offset_type(newoffs, flags)
136 offlags = revlog.offset_type(newoffs, flags)
127 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
137 e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
128 idxwrite.write(rio.packentry(e, None, flog.version, r))
138 idxwrite.write(rio.packentry(e, None, flog.version, r))
129 idxread.seek(rio.size, 1)
139 idxread.seek(rio.size, 1)
130
140
131 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
141 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
132 """Write the given full text to the filelog with the given data offset.
142 """Write the given full text to the filelog with the given data offset.
133
143
134 Returns:
144 Returns:
135 The integer number of data bytes written, for tracking data offsets.
145 The integer number of data bytes written, for tracking data offsets.
136 """
146 """
137 flag, compdata = flog.compress(data)
147 flag, compdata = flog.compress(data)
138 newcomp = len(flag) + len(compdata)
148 newcomp = len(flag) + len(compdata)
139 rewriteindex(r, offs, (newcomp, len(data), nflags))
149 rewriteindex(r, offs, (newcomp, len(data), nflags))
140 datawrite.write(flag)
150 datawrite.write(flag)
141 datawrite.write(compdata)
151 datawrite.write(compdata)
142 dataread.seek(flog.length(r), 1)
152 dataread.seek(flog.length(r), 1)
143 return newcomp
153 return newcomp
144
154
145 # Rewrite censored revlog entry with (padded) tombstone data.
155 # Rewrite censored revlog entry with (padded) tombstone data.
146 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
156 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
147 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
157 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
148
158
149 # Rewrite all following filelog revisions fixing up offsets and deltas.
159 # Rewrite all following filelog revisions fixing up offsets and deltas.
150 for srev in xrange(crev + 1, len(flog)):
160 for srev in xrange(crev + 1, len(flog)):
151 if crev in flog.parentrevs(srev):
161 if crev in flog.parentrevs(srev):
152 # Immediate children of censored node must be re-added as fulltext.
162 # Immediate children of censored node must be re-added as fulltext.
153 try:
163 try:
154 revdata = flog.revision(srev)
164 revdata = flog.revision(srev)
155 except error.CensoredNodeError as e:
165 except error.CensoredNodeError as e:
156 revdata = e.tombstone
166 revdata = e.tombstone
157 dlen = rewrite(srev, offset, revdata)
167 dlen = rewrite(srev, offset, revdata)
158 else:
168 else:
159 # Copy any other revision data verbatim after fixing up the offset.
169 # Copy any other revision data verbatim after fixing up the offset.
160 rewriteindex(srev, offset)
170 rewriteindex(srev, offset)
161 dlen = flog.length(srev)
171 dlen = flog.length(srev)
162 for chunk in util.filechunkiter(dataread, limit=dlen):
172 for chunk in util.filechunkiter(dataread, limit=dlen):
163 datawrite.write(chunk)
173 datawrite.write(chunk)
164 offset += dlen
174 offset += dlen
165
175
166 idxread.close()
176 idxread.close()
167 idxwrite.close()
177 idxwrite.close()
168 if dataread is not idxread:
178 if dataread is not idxread:
169 dataread.close()
179 dataread.close()
170 datawrite.close()
180 datawrite.close()
General Comments 0
You need to be logged in to leave comments. Login now