upstream/mercurial-mirror Commit - r39697:8bfbb258

1

2

#

2

#

3

# This extension enables removal of file content at a given revision,

3

# This extension enables removal of file content at a given revision,

4

# rewriting the data/metadata of successive revisions to preserve revision log

4

# rewriting the data/metadata of successive revisions to preserve revision log

5

# integrity.

5

# integrity.

6

7

"""erase file content at a given revision

7

"""erase file content at a given revision

8

9

The censor command instructs Mercurial to erase all content of a file at a given

9

The censor command instructs Mercurial to erase all content of a file at a given

10

revision *without updating the changeset hash.* This allows existing history to

10

revision *without updating the changeset hash.* This allows existing history to

11

remain valid while preventing future clones/pulls from receiving the erased

11

remain valid while preventing future clones/pulls from receiving the erased

12

data.

12

data.

13

14

Typical uses for censor are due to security or legal requirements, including::

14

Typical uses for censor are due to security or legal requirements, including::

15

16

* Passwords, private keys, cryptographic material

16

* Passwords, private keys, cryptographic material

17

* Licensed data/code/libraries for which the license has expired

17

* Licensed data/code/libraries for which the license has expired

18

* Personally Identifiable Information or other private data

18

* Personally Identifiable Information or other private data

19

20

Censored nodes can interrupt mercurial's typical operation whenever the excised

20

Censored nodes can interrupt mercurial's typical operation whenever the excised

21

data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,

21

data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,

22

simply fail when asked to produce censored data. Others, like ``hg verify`` and

22

simply fail when asked to produce censored data. Others, like ``hg verify`` and

23

``hg update``, must be capable of tolerating censored data to continue to

23

``hg update``, must be capable of tolerating censored data to continue to

24

function in a meaningful way. Such commands only tolerate censored file

24

function in a meaningful way. Such commands only tolerate censored file

25

revisions if they are allowed by the "censor.policy=ignore" config option.

25

revisions if they are allowed by the "censor.policy=ignore" config option.

26

"""

26

"""

27

28

from __future__ import absolute_import

28

from __future__ import absolute_import

29

30

from mercurial.i18n import _

30

from mercurial.i18n import _

31

from mercurial.node import short

31

from mercurial.node import short

32

33

from mercurial import (

33

from mercurial import (

34

error,

34

error,

35

pycompat,

35

pycompat,

36

registrar,

36

registrar,

37

revlog,

37

revlog,

38

scmutil,

38

scmutil,

39

util,

39

util,

40

)

40

)

41

42

cmdtable = {}

42

cmdtable = {}

43

command = registrar.command(cmdtable)

43

command = registrar.command(cmdtable)

44

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

44

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

45

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

45

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

46

# be specifying the version(s) of Mercurial they are tested with, or

46

# be specifying the version(s) of Mercurial they are tested with, or

47

# leave the attribute unspecified.

47

# leave the attribute unspecified.

48

testedwith = 'ships-with-hg-core'

48

testedwith = 'ships-with-hg-core'

49

50

@command('censor',

50

@command('censor',

51

[('r', 'rev', '', _('censor file from specified revision'), _('REV')),

51

[('r', 'rev', '', _('censor file from specified revision'), _('REV')),

52

('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],

52

('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],

53

_('-r REV [-t TEXT] [FILE]'))

53

_('-r REV [-t TEXT] [FILE]'))

54

def censor(ui, repo, path, rev='', tombstone='', **opts):

54

def censor(ui, repo, path, rev='', tombstone='', **opts):

55

with repo.wlock(), repo.lock():

55

with repo.wlock(), repo.lock():

56

return _docensor(ui, repo, path, rev, tombstone, **opts)

56

return _docensor(ui, repo, path, rev, tombstone, **opts)

57

58

def _docensor(ui, repo, path, rev='', tombstone='', **opts):

58

def _docensor(ui, repo, path, rev='', tombstone='', **opts):

59

if not path:

59

if not path:

60

raise error.Abort(_('must specify file path to censor'))

60

raise error.Abort(_('must specify file path to censor'))

61

if not rev:

61

if not rev:

62

raise error.Abort(_('must specify revision to censor'))

62

raise error.Abort(_('must specify revision to censor'))

63

64

wctx = repo[None]

64

wctx = repo[None]

65

66

m = scmutil.match(wctx, (path,))

66

m = scmutil.match(wctx, (path,))

67

if m.anypats() or len(m.files()) != 1:

67

if m.anypats() or len(m.files()) != 1:

68

raise error.Abort(_('can only specify an explicit filename'))

68

raise error.Abort(_('can only specify an explicit filename'))

69

path = m.files()[0]

69

path = m.files()[0]

70

flog = repo.file(path)

70

flog = repo.file(path)

71

if not len(flog):

71

if not len(flog):

72

raise error.Abort(_('cannot censor file with no history'))

72

raise error.Abort(_('cannot censor file with no history'))

73

74

rev = scmutil.revsingle(repo, rev, rev).rev()

74

rev = scmutil.revsingle(repo, rev, rev).rev()

75

try:

75

try:

76

ctx = repo[rev]

76

ctx = repo[rev]

77

except KeyError:

77

except KeyError:

78

raise error.Abort(_('invalid revision identifier %s') % rev)

78

raise error.Abort(_('invalid revision identifier %s') % rev)

79

80

try:

80

try:

81

fctx = ctx.filectx(path)

81

fctx = ctx.filectx(path)

82

except error.LookupError:

82

except error.LookupError:

83

raise error.Abort(_('file does not exist at revision %s') % rev)

83

raise error.Abort(_('file does not exist at revision %s') % rev)

84

85

fnode = fctx.filenode()

85

fnode = fctx.filenode()

86

heads = []

86

heads = []

87

for headnode in repo.heads():

87

for headnode in repo.heads():

88

c = repo[headnode]

88

hc = repo[headnode]

89

if path in c and c.filenode(path) == fnode:

89

if path in hc and hc.filenode(path) == fnode:

90

heads.append(c)

90

heads.append(hc)

91

if heads:

91

if heads:

92

headlist = ', '.join([short(c.node()) for c in heads])

92

headlist = ', '.join([short(c.node()) for c in heads])

93

raise error.Abort(_('cannot censor file in heads (%s)') % headlist,

93

raise error.Abort(_('cannot censor file in heads (%s)') % headlist,

94

hint=_('clean/delete and commit first'))

94

hint=_('clean/delete and commit first'))

95

96

wp = wctx.parents()

96

wp = wctx.parents()

97

if ctx.node() in [p.node() for p in wp]:

97

if ctx.node() in [p.node() for p in wp]:

98

raise error.Abort(_('cannot censor working directory'),

98

raise error.Abort(_('cannot censor working directory'),

99

hint=_('clean/delete/update first'))

99

hint=_('clean/delete/update first'))

100

101

flogv = flog.version & 0xFFFF

101

flogv = flog.version & 0xFFFF

102

if flogv != revlog.REVLOGV1:

102

if flogv != revlog.REVLOGV1:

103

raise error.Abort(

103

raise error.Abort(

104

_('censor does not support revlog version %d') % (flogv,))

104

_('censor does not support revlog version %d') % (flogv,))

105

106

tombstone = revlog.packmeta({"censored": tombstone}, "")

106

tombstone = revlog.packmeta({"censored": tombstone}, "")

107

108

crev = fctx.filerev()

108

crev = fctx.filerev()

109

110

if len(tombstone) > flog.rawsize(crev):

110

if len(tombstone) > flog.rawsize(crev):

111

raise error.Abort(_(

111

raise error.Abort(_(

112

'censor tombstone must be no longer than censored data'))

112

'censor tombstone must be no longer than censored data'))

113

114

# Using two files instead of one makes it easy to rewrite entry-by-entry

114

# Using two files instead of one makes it easy to rewrite entry-by-entry

115

idxread = repo.svfs(flog.indexfile, 'r')

115

idxread = repo.svfs(flog.indexfile, 'r')

116

idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)

116

idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)

117

if flog.version & revlog.FLAG_INLINE_DATA:

117

if flog.version & revlog.FLAG_INLINE_DATA:

118

dataread, datawrite = idxread, idxwrite

118

dataread, datawrite = idxread, idxwrite

119

else:

119

else:

120

dataread = repo.svfs(flog.datafile, 'r')

120

dataread = repo.svfs(flog.datafile, 'r')

121

datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)

121

datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)

122

123

# Copy all revlog data up to the entry to be censored.

123

# Copy all revlog data up to the entry to be censored.

124

rio = revlog.revlogio()

124

rio = revlog.revlogio()

125

offset = flog.start(crev)

125

offset = flog.start(crev)

126

127

for chunk in util.filechunkiter(idxread, limit=crev * rio.size):

127

for chunk in util.filechunkiter(idxread, limit=crev * rio.size):

128

idxwrite.write(chunk)

128

idxwrite.write(chunk)

129

for chunk in util.filechunkiter(dataread, limit=offset):

129

for chunk in util.filechunkiter(dataread, limit=offset):

130

datawrite.write(chunk)

130

datawrite.write(chunk)

131

132

def rewriteindex(r, newoffs, newdata=None):

132

def rewriteindex(r, newoffs, newdata=None):

133

"""Rewrite the index entry with a new data offset and optional new data.

133

"""Rewrite the index entry with a new data offset and optional new data.

134

135

The newdata argument, if given, is a tuple of three positive integers:

135

The newdata argument, if given, is a tuple of three positive integers:

136

(new compressed, new uncompressed, added flag bits).

136

(new compressed, new uncompressed, added flag bits).

137

"""

137

"""

138

offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]

138

offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]

139

flags = revlog.gettype(offlags)

139

flags = revlog.gettype(offlags)

140

if newdata:

140

if newdata:

141

comp, uncomp, nflags = newdata

141

comp, uncomp, nflags = newdata

142

flags |= nflags

142

flags |= nflags

143

offlags = revlog.offset_type(newoffs, flags)

143

offlags = revlog.offset_type(newoffs, flags)

144

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

144

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

145

idxwrite.write(rio.packentry(e, None, flog.version, r))

145

idxwrite.write(rio.packentry(e, None, flog.version, r))

146

idxread.seek(rio.size, 1)

146

idxread.seek(rio.size, 1)

147

148

def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):

148

def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):

149

"""Write the given full text to the filelog with the given data offset.

149

"""Write the given full text to the filelog with the given data offset.

150

151

Returns:

151

Returns:

152

The integer number of data bytes written, for tracking data offsets.

152

The integer number of data bytes written, for tracking data offsets.

153

"""

153

"""

154

flag, compdata = flog.compress(data)

154

flag, compdata = flog.compress(data)

155

newcomp = len(flag) + len(compdata)

155

newcomp = len(flag) + len(compdata)

156

rewriteindex(r, offs, (newcomp, len(data), nflags))

156

rewriteindex(r, offs, (newcomp, len(data), nflags))

157

datawrite.write(flag)

157

datawrite.write(flag)

158

datawrite.write(compdata)

158

datawrite.write(compdata)

159

dataread.seek(flog.length(r), 1)

159

dataread.seek(flog.length(r), 1)

160

return newcomp

160

return newcomp

161

162

# Rewrite censored revlog entry with (padded) tombstone data.

162

# Rewrite censored revlog entry with (padded) tombstone data.

163

pad = ' ' * (flog.rawsize(crev) - len(tombstone))

163

pad = ' ' * (flog.rawsize(crev) - len(tombstone))

164

offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)

164

offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)

165

166

# Rewrite all following filelog revisions fixing up offsets and deltas.

166

# Rewrite all following filelog revisions fixing up offsets and deltas.

167

for srev in pycompat.xrange(crev + 1, len(flog)):

167

for srev in pycompat.xrange(crev + 1, len(flog)):

168

if crev in flog.parentrevs(srev):

168

if crev in flog.parentrevs(srev):

169

# Immediate children of censored node must be re-added as fulltext.

169

# Immediate children of censored node must be re-added as fulltext.

170

try:

170

try:

171

revdata = flog.revision(srev)

171

revdata = flog.revision(srev)

172

except error.CensoredNodeError as e:

172

except error.CensoredNodeError as e:

173

revdata = e.tombstone

173

revdata = e.tombstone

174

dlen = rewrite(srev, offset, revdata)

174

dlen = rewrite(srev, offset, revdata)

175

else:

175

else:

176

# Copy any other revision data verbatim after fixing up the offset.

176

# Copy any other revision data verbatim after fixing up the offset.

177

rewriteindex(srev, offset)

177

rewriteindex(srev, offset)

178

dlen = flog.length(srev)

178

dlen = flog.length(srev)

179

for chunk in util.filechunkiter(dataread, limit=dlen):

179

for chunk in util.filechunkiter(dataread, limit=dlen):

180

datawrite.write(chunk)

180

datawrite.write(chunk)

181

offset += dlen

181

offset += dlen

182

183

idxread.close()

183

idxread.close()

184

idxwrite.close()

184

idxwrite.close()

185

if dataread is not idxread:

185

if dataread is not idxread:

186

dataread.close()

186

dataread.close()

187

datawrite.close()

187

datawrite.close()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
             #
             # This extension enables removal of file content at a given revision,
             # rewriting the data/metadata of successive revisions to preserve revision log
             # integrity.
             """erase file content at a given revision
             The censor command instructs Mercurial to erase all content of a file at a given
             revision *without updating the changeset hash.* This allows existing history to
             remain valid while preventing future clones/pulls from receiving the erased
             data.
             Typical uses for censor are due to security or legal requirements, including::
              * Passwords, private keys, cryptographic material
              * Licensed data/code/libraries for which the license has expired
              * Personally Identifiable Information or other private data
             Censored nodes can interrupt mercurial's typical operation whenever the excised
             data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
             simply fail when asked to produce censored data. Others, like ``hg verify`` and
             ``hg update``, must be capable of tolerating censored data to continue to
             function in a meaningful way. Such commands only tolerate censored file
             revisions if they are allowed by the "censor.policy=ignore" config option.
             """
             from __future__ import absolute_import
             from mercurial.i18n import _
             from mercurial.node import short
             from mercurial import (
                 error,
                 pycompat,
                 registrar,
                 revlog,
                 scmutil,
                 util,
             )
             cmdtable = {}
             command = registrar.command(cmdtable)
             # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
             # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
             # be specifying the version(s) of Mercurial they are tested with, or
             # leave the attribute unspecified.
             testedwith = 'ships-with-hg-core'
             @command('censor',
                 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
                  ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
                 _('-r REV [-t TEXT] [FILE]'))
             def censor(ui, repo, path, rev='', tombstone='', **opts):
                 with repo.wlock(), repo.lock():
                     return _docensor(ui, repo, path, rev, tombstone, **opts)
             def _docensor(ui, repo, path, rev='', tombstone='', **opts):
                 if not path:
                     raise error.Abort(_('must specify file path to censor'))
                 if not rev:
                     raise error.Abort(_('must specify revision to censor'))
                 wctx = repo[None]
                 m = scmutil.match(wctx, (path,))
                 if m.anypats() or len(m.files()) != 1:
                     raise error.Abort(_('can only specify an explicit filename'))
                 path = m.files()[0]
                 flog = repo.file(path)
                 if not len(flog):
                     raise error.Abort(_('cannot censor file with no history'))
                 rev = scmutil.revsingle(repo, rev, rev).rev()
                 try:
                     ctx = repo[rev]
                 except KeyError:
                     raise error.Abort(_('invalid revision identifier %s') % rev)
                 try:
                     fctx = ctx.filectx(path)
                 except error.LookupError:
                     raise error.Abort(_('file does not exist at revision %s') % rev)
                 fnode = fctx.filenode()
                 heads = []
                 for headnode in repo.heads():
-                    c = repo[headnode]
+                    hc = repo[headnode]
-                    if path in c and c.filenode(path) == fnode:
+                    if path in hc and hc.filenode(path) == fnode:
-                        heads.append(c)
+                        heads.append(hc)
                 if heads:
                     headlist = ', '.join([short(c.node()) for c in heads])
                     raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
                         hint=_('clean/delete and commit first'))
                 wp = wctx.parents()
                 if ctx.node() in [p.node() for p in wp]:
                     raise error.Abort(_('cannot censor working directory'),
                         hint=_('clean/delete/update first'))
                 flogv = flog.version & 0xFFFF
                 if flogv != revlog.REVLOGV1:
                     raise error.Abort(
                         _('censor does not support revlog version %d') % (flogv,))
                 tombstone = revlog.packmeta({"censored": tombstone}, "")
                 crev = fctx.filerev()
                 if len(tombstone) > flog.rawsize(crev):
                     raise error.Abort(_(
                         'censor tombstone must be no longer than censored data'))
                 # Using two files instead of one makes it easy to rewrite entry-by-entry
                 idxread = repo.svfs(flog.indexfile, 'r')
                 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
                 if flog.version & revlog.FLAG_INLINE_DATA:
                     dataread, datawrite = idxread, idxwrite
                 else:
                     dataread = repo.svfs(flog.datafile, 'r')
                     datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
                 # Copy all revlog data up to the entry to be censored.
                 rio = revlog.revlogio()
                 offset = flog.start(crev)
                 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
                     idxwrite.write(chunk)
                 for chunk in util.filechunkiter(dataread, limit=offset):
                     datawrite.write(chunk)
                 def rewriteindex(r, newoffs, newdata=None):
                     """Rewrite the index entry with a new data offset and optional new data.
                     The newdata argument, if given, is a tuple of three positive integers:
                     (new compressed, new uncompressed, added flag bits).
                     """
                     offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
                     flags = revlog.gettype(offlags)
                     if newdata:
                         comp, uncomp, nflags = newdata
                         flags |= nflags
                     offlags = revlog.offset_type(newoffs, flags)
                     e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
                     idxwrite.write(rio.packentry(e, None, flog.version, r))
                     idxread.seek(rio.size, 1)
                 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
                     """Write the given full text to the filelog with the given data offset.
                     Returns:
                         The integer number of data bytes written, for tracking data offsets.
                     """
                     flag, compdata = flog.compress(data)
                     newcomp = len(flag) + len(compdata)
                     rewriteindex(r, offs, (newcomp, len(data), nflags))
                     datawrite.write(flag)
                     datawrite.write(compdata)
                     dataread.seek(flog.length(r), 1)
                     return newcomp
                 # Rewrite censored revlog entry with (padded) tombstone data.
                 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
                 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
                 # Rewrite all following filelog revisions fixing up offsets and deltas.
                 for srev in pycompat.xrange(crev + 1, len(flog)):
                     if crev in flog.parentrevs(srev):
                         # Immediate children of censored node must be re-added as fulltext.
                         try:
                             revdata = flog.revision(srev)
                         except error.CensoredNodeError as e:
                             revdata = e.tombstone
                         dlen = rewrite(srev, offset, revdata)
                     else:
                         # Copy any other revision data verbatim after fixing up the offset.
                         rewriteindex(srev, offset)
                         dlen = flog.length(srev)
                         for chunk in util.filechunkiter(dataread, limit=dlen):
                             datawrite.write(chunk)
                     offset += dlen
                 idxread.close()
                 idxwrite.close()
                 if dataread is not idxread:
                     dataread.close()
                     datawrite.close()