upstream/mercurial-mirror Commit - r24890:cba84b06

1

2

#

2

#

3

# This extension enables removal of file content at a given revision,

3

# This extension enables removal of file content at a given revision,

4

# rewriting the data/metadata of successive revisions to preserve revision log

4

# rewriting the data/metadata of successive revisions to preserve revision log

5

# integrity.

5

# integrity.

6

7

"""erase file content at a given revision

7

"""erase file content at a given revision

8

9

The censor command instructs Mercurial to erase all content of a file at a given

9

The censor command instructs Mercurial to erase all content of a file at a given

10

revision *without updating the changeset hash.* This allows existing history to

10

revision *without updating the changeset hash.* This allows existing history to

11

remain valid while preventing future clones/pulls from receiving the erased

11

remain valid while preventing future clones/pulls from receiving the erased

12

data.

12

data.

13

14

Typical uses for censor are due to security or legal requirements, including::

14

Typical uses for censor are due to security or legal requirements, including::

15

16

* Passwords, private keys, crytographic material

16

* Passwords, private keys, crytographic material

17

* Licensed data/code/libraries for which the license has expired

17

* Licensed data/code/libraries for which the license has expired

18

* Personally Identifiable Information or other private data

18

* Personally Identifiable Information or other private data

19

20

Censored nodes can interrupt mercurial's typical operation whenever the excised

20

Censored nodes can interrupt mercurial's typical operation whenever the excised

21

data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,

21

data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,

22

simply fail when asked to produce censored data. Others, like ``hg verify`` and

22

simply fail when asked to produce censored data. Others, like ``hg verify`` and

23

``hg update``, must be capable of tolerating censored data to continue to

23

``hg update``, must be capable of tolerating censored data to continue to

24

function in a meaningful way. Such commands only tolerate censored file

24

function in a meaningful way. Such commands only tolerate censored file

25

revisions if they are allowed by the policy ~~specified by the "censor.allow"~~

25

revisions if they are allowed by the "censor.policy=ignore" config option.

26

config option.

27

"""

26

"""

28

27

29

from mercurial.node import short

28

from mercurial.node import short

30

from mercurial import cmdutil, error, filelog, revlog, scmutil, util

29

from mercurial import cmdutil, error, filelog, revlog, scmutil, util

31

from mercurial.i18n import _

30

from mercurial.i18n import _

32

31

33

cmdtable = {}

32

cmdtable = {}

34

command = cmdutil.command(cmdtable)

33

command = cmdutil.command(cmdtable)

35

testedwith = 'internal'

34

testedwith = 'internal'

36

35

37

@command('censor',

36

@command('censor',

38

[('r', 'rev', '', _('censor file from specified revision'), _('REV')),

37

[('r', 'rev', '', _('censor file from specified revision'), _('REV')),

39

('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],

38

('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],

40

_('-r REV [-t TEXT] [FILE]'))

39

_('-r REV [-t TEXT] [FILE]'))

41

def censor(ui, repo, path, rev='', tombstone='', **opts):

40

def censor(ui, repo, path, rev='', tombstone='', **opts):

42

if not path:

41

if not path:

43

raise util.Abort(_('must specify file path to censor'))

42

raise util.Abort(_('must specify file path to censor'))

44

if not rev:

43

if not rev:

45

raise util.Abort(_('must specify revision to censor'))

44

raise util.Abort(_('must specify revision to censor'))

46

45

47

flog = repo.file(path)

46

flog = repo.file(path)

48

if not len(flog):

47

if not len(flog):

49

raise util.Abort(_('cannot censor file with no history'))

48

raise util.Abort(_('cannot censor file with no history'))

50

49

51

rev = scmutil.revsingle(repo, rev, rev).rev()

50

rev = scmutil.revsingle(repo, rev, rev).rev()

52

try:

51

try:

53

ctx = repo[rev]

52

ctx = repo[rev]

54

except KeyError:

53

except KeyError:

55

raise util.Abort(_('invalid revision identifier %s') % rev)

54

raise util.Abort(_('invalid revision identifier %s') % rev)

56

55

57

try:

56

try:

58

fctx = ctx.filectx(path)

57

fctx = ctx.filectx(path)

59

except error.LookupError:

58

except error.LookupError:

60

raise util.Abort(_('file does not exist at revision %s') % rev)

59

raise util.Abort(_('file does not exist at revision %s') % rev)

61

60

62

fnode = fctx.filenode()

61

fnode = fctx.filenode()

63

headctxs = [repo[c] for c in repo.heads()]

62

headctxs = [repo[c] for c in repo.heads()]

64

heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]

63

heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]

65

if heads:

64

if heads:

66

headlist = ', '.join([short(c.node()) for c in heads])

65

headlist = ', '.join([short(c.node()) for c in heads])

67

raise util.Abort(_('cannot censor file in heads (%s)') % headlist,

66

raise util.Abort(_('cannot censor file in heads (%s)') % headlist,

68

hint=_('clean/delete and commit first'))

67

hint=_('clean/delete and commit first'))

69

68

70

wctx = repo[None]

69

wctx = repo[None]

71

wp = wctx.parents()

70

wp = wctx.parents()

72

if ctx.node() in [p.node() for p in wp]:

71

if ctx.node() in [p.node() for p in wp]:

73

raise util.Abort(_('cannot censor working directory'),

72

raise util.Abort(_('cannot censor working directory'),

74

hint=_('clean/delete/update first'))

73

hint=_('clean/delete/update first'))

75

74

76

flogv = flog.version & 0xFFFF

75

flogv = flog.version & 0xFFFF

77

if flogv != revlog.REVLOGNG:

76

if flogv != revlog.REVLOGNG:

78

raise util.Abort(

77

raise util.Abort(

79

_('censor does not support revlog version %d') % (flogv,))

78

_('censor does not support revlog version %d') % (flogv,))

80

79

81

tombstone = filelog.packmeta({"censored": tombstone}, "")

80

tombstone = filelog.packmeta({"censored": tombstone}, "")

82

81

83

crev = fctx.filerev()

82

crev = fctx.filerev()

84

83

85

if len(tombstone) > flog.rawsize(crev):

84

if len(tombstone) > flog.rawsize(crev):

86

raise util.Abort(_(

85

raise util.Abort(_(

87

'censor tombstone must be no longer than censored data'))

86

'censor tombstone must be no longer than censored data'))

88

87

89

# Using two files instead of one makes it easy to rewrite entry-by-entry

88

# Using two files instead of one makes it easy to rewrite entry-by-entry

90

idxread = repo.svfs(flog.indexfile, 'r')

89

idxread = repo.svfs(flog.indexfile, 'r')

91

idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)

90

idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)

92

if flog.version & revlog.REVLOGNGINLINEDATA:

91

if flog.version & revlog.REVLOGNGINLINEDATA:

93

dataread, datawrite = idxread, idxwrite

92

dataread, datawrite = idxread, idxwrite

94

else:

93

else:

95

dataread = repo.svfs(flog.datafile, 'r')

94

dataread = repo.svfs(flog.datafile, 'r')

96

datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)

95

datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)

97

96

98

# Copy all revlog data up to the entry to be censored.

97

# Copy all revlog data up to the entry to be censored.

99

rio = revlog.revlogio()

98

rio = revlog.revlogio()

100

offset = flog.start(crev)

99

offset = flog.start(crev)

101

100

102

for chunk in util.filechunkiter(idxread, limit=crev * rio.size):

101

for chunk in util.filechunkiter(idxread, limit=crev * rio.size):

103

idxwrite.write(chunk)

102

idxwrite.write(chunk)

104

for chunk in util.filechunkiter(dataread, limit=offset):

103

for chunk in util.filechunkiter(dataread, limit=offset):

105

datawrite.write(chunk)

104

datawrite.write(chunk)

106

105

107

def rewriteindex(r, newoffs, newdata=None):

106

def rewriteindex(r, newoffs, newdata=None):

108

"""Rewrite the index entry with a new data offset and optional new data.

107

"""Rewrite the index entry with a new data offset and optional new data.

109

108

110

The newdata argument, if given, is a tuple of three positive integers:

109

The newdata argument, if given, is a tuple of three positive integers:

111

(new compressed, new uncompressed, added flag bits).

110

(new compressed, new uncompressed, added flag bits).

112

"""

111

"""

113

offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]

112

offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]

114

flags = revlog.gettype(offlags)

113

flags = revlog.gettype(offlags)

115

if newdata:

114

if newdata:

116

comp, uncomp, nflags = newdata

115

comp, uncomp, nflags = newdata

117

flags |= nflags

116

flags |= nflags

118

offlags = revlog.offset_type(newoffs, flags)

117

offlags = revlog.offset_type(newoffs, flags)

119

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

118

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

120

idxwrite.write(rio.packentry(e, None, flog.version, r))

119

idxwrite.write(rio.packentry(e, None, flog.version, r))

121

idxread.seek(rio.size, 1)

120

idxread.seek(rio.size, 1)

122

121

123

def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):

122

def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):

124

"""Write the given full text to the filelog with the given data offset.

123

"""Write the given full text to the filelog with the given data offset.

125

124

126

Returns:

125

Returns:

127

The integer number of data bytes written, for tracking data offsets.

126

The integer number of data bytes written, for tracking data offsets.

128

"""

127

"""

129

flag, compdata = flog.compress(data)

128

flag, compdata = flog.compress(data)

130

newcomp = len(flag) + len(compdata)

129

newcomp = len(flag) + len(compdata)

131

rewriteindex(r, offs, (newcomp, len(data), nflags))

130

rewriteindex(r, offs, (newcomp, len(data), nflags))

132

datawrite.write(flag)

131

datawrite.write(flag)

133

datawrite.write(compdata)

132

datawrite.write(compdata)

134

dataread.seek(flog.length(r), 1)

133

dataread.seek(flog.length(r), 1)

135

return newcomp

134

return newcomp

136

135

137

# Rewrite censored revlog entry with (padded) tombstone data.

136

# Rewrite censored revlog entry with (padded) tombstone data.

138

pad = ' ' * (flog.rawsize(crev) - len(tombstone))

137

pad = ' ' * (flog.rawsize(crev) - len(tombstone))

139

offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)

138

offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)

140

139

141

# Rewrite all following filelog revisions fixing up offsets and deltas.

140

# Rewrite all following filelog revisions fixing up offsets and deltas.

142

for srev in xrange(crev + 1, len(flog)):

141

for srev in xrange(crev + 1, len(flog)):

143

if crev in flog.parentrevs(srev):

142

if crev in flog.parentrevs(srev):

144

# Immediate children of censored node must be re-added as fulltext.

143

# Immediate children of censored node must be re-added as fulltext.

145

try:

144

try:

146

revdata = flog.revision(srev)

145

revdata = flog.revision(srev)

147

except error.CensoredNodeError, e:

146

except error.CensoredNodeError, e:

148

revdata = e.tombstone

147

revdata = e.tombstone

149

dlen = rewrite(srev, offset, revdata)

148

dlen = rewrite(srev, offset, revdata)

150

else:

149

else:

151

# Copy any other revision data verbatim after fixing up the offset.

150

# Copy any other revision data verbatim after fixing up the offset.

152

rewriteindex(srev, offset)

151

rewriteindex(srev, offset)

153

dlen = flog.length(srev)

152

dlen = flog.length(srev)

154

for chunk in util.filechunkiter(dataread, limit=dlen):

153

for chunk in util.filechunkiter(dataread, limit=dlen):

155

datawrite.write(chunk)

154

datawrite.write(chunk)

156

offset += dlen

155

offset += dlen

157

156

158

idxread.close()

157

idxread.close()

159

idxwrite.close()

158

idxwrite.close()

160

if dataread is not idxread:

159

if dataread is not idxread:

161

dataread.close()

160

dataread.close()

162

datawrite.close()

161

datawrite.close()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
             #
             # This extension enables removal of file content at a given revision,
             # rewriting the data/metadata of successive revisions to preserve revision log
             # integrity.
             """erase file content at a given revision
             The censor command instructs Mercurial to erase all content of a file at a given
             revision *without updating the changeset hash.* This allows existing history to
             remain valid while preventing future clones/pulls from receiving the erased
             data.
             Typical uses for censor are due to security or legal requirements, including::
              * Passwords, private keys, crytographic material
              * Licensed data/code/libraries for which the license has expired
              * Personally Identifiable Information or other private data
             Censored nodes can interrupt mercurial's typical operation whenever the excised
             data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
             simply fail when asked to produce censored data. Others, like ``hg verify`` and
             ``hg update``, must be capable of tolerating censored data to continue to
             function in a meaningful way. Such commands only tolerate censored file
-            revisions if they are allowed by the policy specified by the "censor.allow"
+            revisions if they are allowed by the "censor.policy=ignore" config option.
-            config option.
             """
             from mercurial.node import short
             from mercurial import cmdutil, error, filelog, revlog, scmutil, util
             from mercurial.i18n import _
             cmdtable = {}
             command = cmdutil.command(cmdtable)
             testedwith = 'internal'
             @command('censor',
                 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
                  ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
                 _('-r REV [-t TEXT] [FILE]'))
             def censor(ui, repo, path, rev='', tombstone='', **opts):
                 if not path:
                     raise util.Abort(_('must specify file path to censor'))
                 if not rev:
                     raise util.Abort(_('must specify revision to censor'))
                 flog = repo.file(path)
                 if not len(flog):
                     raise util.Abort(_('cannot censor file with no history'))
                 rev = scmutil.revsingle(repo, rev, rev).rev()
                 try:
                     ctx = repo[rev]
                 except KeyError:
                     raise util.Abort(_('invalid revision identifier %s') % rev)
                 try:
                     fctx = ctx.filectx(path)
                 except error.LookupError:
                     raise util.Abort(_('file does not exist at revision %s') % rev)
                 fnode = fctx.filenode()
                 headctxs = [repo[c] for c in repo.heads()]
                 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
                 if heads:
                     headlist = ', '.join([short(c.node()) for c in heads])
                     raise util.Abort(_('cannot censor file in heads (%s)') % headlist,
                         hint=_('clean/delete and commit first'))
                 wctx = repo[None]
                 wp = wctx.parents()
                 if ctx.node() in [p.node() for p in wp]:
                     raise util.Abort(_('cannot censor working directory'),
                         hint=_('clean/delete/update first'))
                 flogv = flog.version & 0xFFFF
                 if flogv != revlog.REVLOGNG:
                     raise util.Abort(
                         _('censor does not support revlog version %d') % (flogv,))
                 tombstone = filelog.packmeta({"censored": tombstone}, "")
                 crev = fctx.filerev()
                 if len(tombstone) > flog.rawsize(crev):
                     raise util.Abort(_(
                         'censor tombstone must be no longer than censored data'))
                 # Using two files instead of one makes it easy to rewrite entry-by-entry
                 idxread = repo.svfs(flog.indexfile, 'r')
                 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
                 if flog.version & revlog.REVLOGNGINLINEDATA:
                     dataread, datawrite = idxread, idxwrite
                 else:
                     dataread = repo.svfs(flog.datafile, 'r')
                     datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
                 # Copy all revlog data up to the entry to be censored.
                 rio = revlog.revlogio()
                 offset = flog.start(crev)
                 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
                     idxwrite.write(chunk)
                 for chunk in util.filechunkiter(dataread, limit=offset):
                     datawrite.write(chunk)
                 def rewriteindex(r, newoffs, newdata=None):
                     """Rewrite the index entry with a new data offset and optional new data.
                     The newdata argument, if given, is a tuple of three positive integers:
                     (new compressed, new uncompressed, added flag bits).
                     """
                     offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
                     flags = revlog.gettype(offlags)
                     if newdata:
                         comp, uncomp, nflags = newdata
                         flags |= nflags
                     offlags = revlog.offset_type(newoffs, flags)
                     e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
                     idxwrite.write(rio.packentry(e, None, flog.version, r))
                     idxread.seek(rio.size, 1)
                 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
                     """Write the given full text to the filelog with the given data offset.
                     Returns:
                         The integer number of data bytes written, for tracking data offsets.
                     """
                     flag, compdata = flog.compress(data)
                     newcomp = len(flag) + len(compdata)
                     rewriteindex(r, offs, (newcomp, len(data), nflags))
                     datawrite.write(flag)
                     datawrite.write(compdata)
                     dataread.seek(flog.length(r), 1)
                     return newcomp
                 # Rewrite censored revlog entry with (padded) tombstone data.
                 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
                 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
                 # Rewrite all following filelog revisions fixing up offsets and deltas.
                 for srev in xrange(crev + 1, len(flog)):
                     if crev in flog.parentrevs(srev):
                         # Immediate children of censored node must be re-added as fulltext.
                         try:
                             revdata = flog.revision(srev)
                         except error.CensoredNodeError, e:
                             revdata = e.tombstone
                         dlen = rewrite(srev, offset, revdata)
                     else:
                         # Copy any other revision data verbatim after fixing up the offset.
                         rewriteindex(srev, offset)
                         dlen = flog.length(srev)
                         for chunk in util.filechunkiter(dataread, limit=dlen):
                             datawrite.write(chunk)
                     offset += dlen
                 idxread.close()
                 idxwrite.close()
                 if dataread is not idxread:
                     dataread.close()
                     datawrite.close()