upstream/mercurial-mirror Commit - r39651:a658f97c

1

2

#

2

#

3

# This extension enables removal of file content at a given revision,

3

# This extension enables removal of file content at a given revision,

4

# rewriting the data/metadata of successive revisions to preserve revision log

4

# rewriting the data/metadata of successive revisions to preserve revision log

5

# integrity.

5

# integrity.

6

7

"""erase file content at a given revision

7

"""erase file content at a given revision

8

9

The censor command instructs Mercurial to erase all content of a file at a given

9

The censor command instructs Mercurial to erase all content of a file at a given

10

revision *without updating the changeset hash.* This allows existing history to

10

revision *without updating the changeset hash.* This allows existing history to

11

remain valid while preventing future clones/pulls from receiving the erased

11

remain valid while preventing future clones/pulls from receiving the erased

12

data.

12

data.

13

14

Typical uses for censor are due to security or legal requirements, including::

14

Typical uses for censor are due to security or legal requirements, including::

15

16

* Passwords, private keys, cryptographic material

16

* Passwords, private keys, cryptographic material

17

* Licensed data/code/libraries for which the license has expired

17

* Licensed data/code/libraries for which the license has expired

18

* Personally Identifiable Information or other private data

18

* Personally Identifiable Information or other private data

19

20

Censored nodes can interrupt mercurial's typical operation whenever the excised

20

Censored nodes can interrupt mercurial's typical operation whenever the excised

21

data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,

21

data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,

22

simply fail when asked to produce censored data. Others, like ``hg verify`` and

22

simply fail when asked to produce censored data. Others, like ``hg verify`` and

23

``hg update``, must be capable of tolerating censored data to continue to

23

``hg update``, must be capable of tolerating censored data to continue to

24

function in a meaningful way. Such commands only tolerate censored file

24

function in a meaningful way. Such commands only tolerate censored file

25

revisions if they are allowed by the "censor.policy=ignore" config option.

25

revisions if they are allowed by the "censor.policy=ignore" config option.

26

"""

26

"""

27

28

from __future__ import absolute_import

28

from __future__ import absolute_import

29

30

from mercurial.i18n import _

30

from mercurial.i18n import _

31

from mercurial.node import short

31

from mercurial.node import short

32

33

from mercurial import (

33

from mercurial import (

34

error,

34

error,

35

pycompat,

35

pycompat,

36

registrar,

36

registrar,

37

revlog,

37

revlog,

38

scmutil,

38

scmutil,

39

util,

39

util,

40

)

40

)

41

42

cmdtable = {}

42

cmdtable = {}

43

command = registrar.command(cmdtable)

43

command = registrar.command(cmdtable)

44

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

44

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

45

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

45

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

46

# be specifying the version(s) of Mercurial they are tested with, or

46

# be specifying the version(s) of Mercurial they are tested with, or

47

# leave the attribute unspecified.

47

# leave the attribute unspecified.

48

testedwith = 'ships-with-hg-core'

48

testedwith = 'ships-with-hg-core'

49

50

@command('censor',

50

@command('censor',

51

[('r', 'rev', '', _('censor file from specified revision'), _('REV')),

51

[('r', 'rev', '', _('censor file from specified revision'), _('REV')),

52

('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],

52

('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],

53

_('-r REV [-t TEXT] [FILE]'))

53

_('-r REV [-t TEXT] [FILE]'))

54

def censor(ui, repo, path, rev='', tombstone='', **opts):

54

def censor(ui, repo, path, rev='', tombstone='', **opts):

55

with repo.wlock(), repo.lock():

55

with repo.wlock(), repo.lock():

56

return _docensor(ui, repo, path, rev, tombstone, **opts)

56

return _docensor(ui, repo, path, rev, tombstone, **opts)

57

58

def _docensor(ui, repo, path, rev='', tombstone='', **opts):

58

def _docensor(ui, repo, path, rev='', tombstone='', **opts):

59

if not path:

59

if not path:

60

raise error.Abort(_('must specify file path to censor'))

60

raise error.Abort(_('must specify file path to censor'))

61

if not rev:

61

if not rev:

62

raise error.Abort(_('must specify revision to censor'))

62

raise error.Abort(_('must specify revision to censor'))

63

64

wctx = repo[None]

64

wctx = repo[None]

65

66

m = scmutil.match(wctx, (path,))

66

m = scmutil.match(wctx, (path,))

67

if m.anypats() or len(m.files()) != 1:

67

if m.anypats() or len(m.files()) != 1:

68

raise error.Abort(_('can only specify an explicit filename'))

68

raise error.Abort(_('can only specify an explicit filename'))

69

path = m.files()[0]

69

path = m.files()[0]

70

flog = repo.file(path)

70

flog = repo.file(path)

71

if not len(flog):

71

if not len(flog):

72

raise error.Abort(_('cannot censor file with no history'))

72

raise error.Abort(_('cannot censor file with no history'))

73

74

rev = scmutil.revsingle(repo, rev, rev).rev()

74

rev = scmutil.revsingle(repo, rev, rev).rev()

75

try:

75

try:

76

ctx = repo[rev]

76

ctx = repo[rev]

77

except KeyError:

77

except KeyError:

78

raise error.Abort(_('invalid revision identifier %s') % rev)

78

raise error.Abort(_('invalid revision identifier %s') % rev)

79

80

try:

80

try:

81

fctx = ctx.filectx(path)

81

fctx = ctx.filectx(path)

82

except error.LookupError:

82

except error.LookupError:

83

raise error.Abort(_('file does not exist at revision %s') % rev)

83

raise error.Abort(_('file does not exist at revision %s') % rev)

84

85

fnode = fctx.filenode()

85

fnode = fctx.filenode()

86

headctxs = [repo[c] for c in repo.heads()]

86

heads = []

87

heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]

87

for headnode in repo.heads():

88

c = repo[headnode]

89

if path in c and c.filenode(path) == fnode:

90

heads.append(c)

88

if heads:

91

if heads:

89

headlist = ', '.join([short(c.node()) for c in heads])

92

headlist = ', '.join([short(c.node()) for c in heads])

90

raise error.Abort(_('cannot censor file in heads (%s)') % headlist,

93

raise error.Abort(_('cannot censor file in heads (%s)') % headlist,

91

hint=_('clean/delete and commit first'))

94

hint=_('clean/delete and commit first'))

92

95

93

wp = wctx.parents()

96

wp = wctx.parents()

94

if ctx.node() in [p.node() for p in wp]:

97

if ctx.node() in [p.node() for p in wp]:

95

raise error.Abort(_('cannot censor working directory'),

98

raise error.Abort(_('cannot censor working directory'),

96

hint=_('clean/delete/update first'))

99

hint=_('clean/delete/update first'))

97

100

98

flogv = flog.version & 0xFFFF

101

flogv = flog.version & 0xFFFF

99

if flogv != revlog.REVLOGV1:

102

if flogv != revlog.REVLOGV1:

100

raise error.Abort(

103

raise error.Abort(

101

_('censor does not support revlog version %d') % (flogv,))

104

_('censor does not support revlog version %d') % (flogv,))

102

105

103

tombstone = revlog.packmeta({"censored": tombstone}, "")

106

tombstone = revlog.packmeta({"censored": tombstone}, "")

104

107

105

crev = fctx.filerev()

108

crev = fctx.filerev()

106

109

107

if len(tombstone) > flog.rawsize(crev):

110

if len(tombstone) > flog.rawsize(crev):

108

raise error.Abort(_(

111

raise error.Abort(_(

109

'censor tombstone must be no longer than censored data'))

112

'censor tombstone must be no longer than censored data'))

110

113

111

# Using two files instead of one makes it easy to rewrite entry-by-entry

114

# Using two files instead of one makes it easy to rewrite entry-by-entry

112

idxread = repo.svfs(flog.indexfile, 'r')

115

idxread = repo.svfs(flog.indexfile, 'r')

113

idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)

116

idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)

114

if flog.version & revlog.FLAG_INLINE_DATA:

117

if flog.version & revlog.FLAG_INLINE_DATA:

115

dataread, datawrite = idxread, idxwrite

118

dataread, datawrite = idxread, idxwrite

116

else:

119

else:

117

dataread = repo.svfs(flog.datafile, 'r')

120

dataread = repo.svfs(flog.datafile, 'r')

118

datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)

121

datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)

119

122

120

# Copy all revlog data up to the entry to be censored.

123

# Copy all revlog data up to the entry to be censored.

121

rio = revlog.revlogio()

124

rio = revlog.revlogio()

122

offset = flog.start(crev)

125

offset = flog.start(crev)

123

126

124

for chunk in util.filechunkiter(idxread, limit=crev * rio.size):

127

for chunk in util.filechunkiter(idxread, limit=crev * rio.size):

125

idxwrite.write(chunk)

128

idxwrite.write(chunk)

126

for chunk in util.filechunkiter(dataread, limit=offset):

129

for chunk in util.filechunkiter(dataread, limit=offset):

127

datawrite.write(chunk)

130

datawrite.write(chunk)

128

131

129

def rewriteindex(r, newoffs, newdata=None):

132

def rewriteindex(r, newoffs, newdata=None):

130

"""Rewrite the index entry with a new data offset and optional new data.

133

"""Rewrite the index entry with a new data offset and optional new data.

131

134

132

The newdata argument, if given, is a tuple of three positive integers:

135

The newdata argument, if given, is a tuple of three positive integers:

133

(new compressed, new uncompressed, added flag bits).

136

(new compressed, new uncompressed, added flag bits).

134

"""

137

"""

135

offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]

138

offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]

136

flags = revlog.gettype(offlags)

139

flags = revlog.gettype(offlags)

137

if newdata:

140

if newdata:

138

comp, uncomp, nflags = newdata

141

comp, uncomp, nflags = newdata

139

flags |= nflags

142

flags |= nflags

140

offlags = revlog.offset_type(newoffs, flags)

143

offlags = revlog.offset_type(newoffs, flags)

141

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

144

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

142

idxwrite.write(rio.packentry(e, None, flog.version, r))

145

idxwrite.write(rio.packentry(e, None, flog.version, r))

143

idxread.seek(rio.size, 1)

146

idxread.seek(rio.size, 1)

144

147

145

def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):

148

def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):

146

"""Write the given full text to the filelog with the given data offset.

149

"""Write the given full text to the filelog with the given data offset.

147

150

148

Returns:

151

Returns:

149

The integer number of data bytes written, for tracking data offsets.

152

The integer number of data bytes written, for tracking data offsets.

150

"""

153

"""

151

flag, compdata = flog.compress(data)

154

flag, compdata = flog.compress(data)

152

newcomp = len(flag) + len(compdata)

155

newcomp = len(flag) + len(compdata)

153

rewriteindex(r, offs, (newcomp, len(data), nflags))

156

rewriteindex(r, offs, (newcomp, len(data), nflags))

154

datawrite.write(flag)

157

datawrite.write(flag)

155

datawrite.write(compdata)

158

datawrite.write(compdata)

156

dataread.seek(flog.length(r), 1)

159

dataread.seek(flog.length(r), 1)

157

return newcomp

160

return newcomp

158

161

159

# Rewrite censored revlog entry with (padded) tombstone data.

162

# Rewrite censored revlog entry with (padded) tombstone data.

160

pad = ' ' * (flog.rawsize(crev) - len(tombstone))

163

pad = ' ' * (flog.rawsize(crev) - len(tombstone))

161

offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)

164

offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)

162

165

163

# Rewrite all following filelog revisions fixing up offsets and deltas.

166

# Rewrite all following filelog revisions fixing up offsets and deltas.

164

for srev in pycompat.xrange(crev + 1, len(flog)):

167

for srev in pycompat.xrange(crev + 1, len(flog)):

165

if crev in flog.parentrevs(srev):

168

if crev in flog.parentrevs(srev):

166

# Immediate children of censored node must be re-added as fulltext.

169

# Immediate children of censored node must be re-added as fulltext.

167

try:

170

try:

168

revdata = flog.revision(srev)

171

revdata = flog.revision(srev)

169

except error.CensoredNodeError as e:

172

except error.CensoredNodeError as e:

170

revdata = e.tombstone

173

revdata = e.tombstone

171

dlen = rewrite(srev, offset, revdata)

174

dlen = rewrite(srev, offset, revdata)

172

else:

175

else:

173

# Copy any other revision data verbatim after fixing up the offset.

176

# Copy any other revision data verbatim after fixing up the offset.

174

rewriteindex(srev, offset)

177

rewriteindex(srev, offset)

175

dlen = flog.length(srev)

178

dlen = flog.length(srev)

176

for chunk in util.filechunkiter(dataread, limit=dlen):

179

for chunk in util.filechunkiter(dataread, limit=dlen):

177

datawrite.write(chunk)

180

datawrite.write(chunk)

178

offset += dlen

181

offset += dlen

179

182

180

idxread.close()

183

idxread.close()

181

idxwrite.close()

184

idxwrite.close()

182

if dataread is not idxread:

185

if dataread is not idxread:

183

dataread.close()

186

dataread.close()

184

datawrite.close()

187

datawrite.close()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
             #
             # This extension enables removal of file content at a given revision,
             # rewriting the data/metadata of successive revisions to preserve revision log
             # integrity.
             """erase file content at a given revision
             The censor command instructs Mercurial to erase all content of a file at a given
             revision *without updating the changeset hash.* This allows existing history to
             remain valid while preventing future clones/pulls from receiving the erased
             data.
             Typical uses for censor are due to security or legal requirements, including::
              * Passwords, private keys, cryptographic material
              * Licensed data/code/libraries for which the license has expired
              * Personally Identifiable Information or other private data
             Censored nodes can interrupt mercurial's typical operation whenever the excised
             data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
             simply fail when asked to produce censored data. Others, like ``hg verify`` and
             ``hg update``, must be capable of tolerating censored data to continue to
             function in a meaningful way. Such commands only tolerate censored file
             revisions if they are allowed by the "censor.policy=ignore" config option.
             """
             from __future__ import absolute_import
             from mercurial.i18n import _
             from mercurial.node import short
             from mercurial import (
                 error,
                 pycompat,
                 registrar,
                 revlog,
                 scmutil,
                 util,
             )
             cmdtable = {}
             command = registrar.command(cmdtable)
             # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
             # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
             # be specifying the version(s) of Mercurial they are tested with, or
             # leave the attribute unspecified.
             testedwith = 'ships-with-hg-core'
             @command('censor',
                 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
                  ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
                 _('-r REV [-t TEXT] [FILE]'))
             def censor(ui, repo, path, rev='', tombstone='', **opts):
                 with repo.wlock(), repo.lock():
                     return _docensor(ui, repo, path, rev, tombstone, **opts)
             def _docensor(ui, repo, path, rev='', tombstone='', **opts):
                 if not path:
                     raise error.Abort(_('must specify file path to censor'))
                 if not rev:
                     raise error.Abort(_('must specify revision to censor'))
                 wctx = repo[None]
                 m = scmutil.match(wctx, (path,))
                 if m.anypats() or len(m.files()) != 1:
                     raise error.Abort(_('can only specify an explicit filename'))
                 path = m.files()[0]
                 flog = repo.file(path)
                 if not len(flog):
                     raise error.Abort(_('cannot censor file with no history'))
                 rev = scmutil.revsingle(repo, rev, rev).rev()
                 try:
                     ctx = repo[rev]
                 except KeyError:
                     raise error.Abort(_('invalid revision identifier %s') % rev)
                 try:
                     fctx = ctx.filectx(path)
                 except error.LookupError:
                     raise error.Abort(_('file does not exist at revision %s') % rev)
                 fnode = fctx.filenode()
-                headctxs = [repo[c] for c in repo.heads()]
+                heads = []
-                heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
+                for headnode in repo.heads():
+                    c = repo[headnode]
+                    if path in c and c.filenode(path) == fnode:
+                        heads.append(c)
                 if heads:
                     headlist = ', '.join([short(c.node()) for c in heads])
                     raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
                         hint=_('clean/delete and commit first'))
                 wp = wctx.parents()
                 if ctx.node() in [p.node() for p in wp]:
                     raise error.Abort(_('cannot censor working directory'),
                         hint=_('clean/delete/update first'))
                 flogv = flog.version & 0xFFFF
                 if flogv != revlog.REVLOGV1:
                     raise error.Abort(
                         _('censor does not support revlog version %d') % (flogv,))
                 tombstone = revlog.packmeta({"censored": tombstone}, "")
                 crev = fctx.filerev()
                 if len(tombstone) > flog.rawsize(crev):
                     raise error.Abort(_(
                         'censor tombstone must be no longer than censored data'))
                 # Using two files instead of one makes it easy to rewrite entry-by-entry
                 idxread = repo.svfs(flog.indexfile, 'r')
                 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
                 if flog.version & revlog.FLAG_INLINE_DATA:
                     dataread, datawrite = idxread, idxwrite
                 else:
                     dataread = repo.svfs(flog.datafile, 'r')
                     datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
                 # Copy all revlog data up to the entry to be censored.
                 rio = revlog.revlogio()
                 offset = flog.start(crev)
                 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
                     idxwrite.write(chunk)
                 for chunk in util.filechunkiter(dataread, limit=offset):
                     datawrite.write(chunk)
                 def rewriteindex(r, newoffs, newdata=None):
                     """Rewrite the index entry with a new data offset and optional new data.
                     The newdata argument, if given, is a tuple of three positive integers:
                     (new compressed, new uncompressed, added flag bits).
                     """
                     offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
                     flags = revlog.gettype(offlags)
                     if newdata:
                         comp, uncomp, nflags = newdata
                         flags |= nflags
                     offlags = revlog.offset_type(newoffs, flags)
                     e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
                     idxwrite.write(rio.packentry(e, None, flog.version, r))
                     idxread.seek(rio.size, 1)
                 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
                     """Write the given full text to the filelog with the given data offset.
                     Returns:
                         The integer number of data bytes written, for tracking data offsets.
                     """
                     flag, compdata = flog.compress(data)
                     newcomp = len(flag) + len(compdata)
                     rewriteindex(r, offs, (newcomp, len(data), nflags))
                     datawrite.write(flag)
                     datawrite.write(compdata)
                     dataread.seek(flog.length(r), 1)
                     return newcomp
                 # Rewrite censored revlog entry with (padded) tombstone data.
                 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
                 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
                 # Rewrite all following filelog revisions fixing up offsets and deltas.
                 for srev in pycompat.xrange(crev + 1, len(flog)):
                     if crev in flog.parentrevs(srev):
                         # Immediate children of censored node must be re-added as fulltext.
                         try:
                             revdata = flog.revision(srev)
                         except error.CensoredNodeError as e:
                             revdata = e.tombstone
                         dlen = rewrite(srev, offset, revdata)
                     else:
                         # Copy any other revision data verbatim after fixing up the offset.
                         rewriteindex(srev, offset)
                         dlen = flog.length(srev)
                         for chunk in util.filechunkiter(dataread, limit=dlen):
                             datawrite.write(chunk)
                     offset += dlen
                 idxread.close()
                 idxwrite.close()
                 if dataread is not idxread:
                     dataread.close()
                     datawrite.close()