upstream/mercurial-mirror Commit - r38460:e219e355

1

2

#

2

#

3

# This extension enables removal of file content at a given revision,

3

# This extension enables removal of file content at a given revision,

4

# rewriting the data/metadata of successive revisions to preserve revision log

4

# rewriting the data/metadata of successive revisions to preserve revision log

5

# integrity.

5

# integrity.

6

7

"""erase file content at a given revision

7

"""erase file content at a given revision

8

9

The censor command instructs Mercurial to erase all content of a file at a given

9

The censor command instructs Mercurial to erase all content of a file at a given

10

revision *without updating the changeset hash.* This allows existing history to

10

revision *without updating the changeset hash.* This allows existing history to

11

remain valid while preventing future clones/pulls from receiving the erased

11

remain valid while preventing future clones/pulls from receiving the erased

12

data.

12

data.

13

14

Typical uses for censor are due to security or legal requirements, including::

14

Typical uses for censor are due to security or legal requirements, including::

15

16

* Passwords, private keys, cryptographic material

16

* Passwords, private keys, cryptographic material

17

* Licensed data/code/libraries for which the license has expired

17

* Licensed data/code/libraries for which the license has expired

18

* Personally Identifiable Information or other private data

18

* Personally Identifiable Information or other private data

19

20

Censored nodes can interrupt mercurial's typical operation whenever the excised

20

Censored nodes can interrupt mercurial's typical operation whenever the excised

21

data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,

21

data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,

22

simply fail when asked to produce censored data. Others, like ``hg verify`` and

22

simply fail when asked to produce censored data. Others, like ``hg verify`` and

23

``hg update``, must be capable of tolerating censored data to continue to

23

``hg update``, must be capable of tolerating censored data to continue to

24

function in a meaningful way. Such commands only tolerate censored file

24

function in a meaningful way. Such commands only tolerate censored file

25

revisions if they are allowed by the "censor.policy=ignore" config option.

25

revisions if they are allowed by the "censor.policy=ignore" config option.

26

"""

26

"""

27

28

from __future__ import absolute_import

28

from __future__ import absolute_import

29

30

from mercurial.i18n import _

30

from mercurial.i18n import _

31

from mercurial.node import short

31

from mercurial.node import short

32

33

from mercurial import (

33

from mercurial import (

34

error,

34

error,

35

lock as lockmod,

36

registrar,

35

registrar,

37

revlog,

36

revlog,

38

scmutil,

37

scmutil,

39

util,

38

util,

40

)

39

)

41

40

42

cmdtable = {}

41

cmdtable = {}

43

command = registrar.command(cmdtable)

42

command = registrar.command(cmdtable)

44

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

43

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for

45

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

44

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

46

# be specifying the version(s) of Mercurial they are tested with, or

45

# be specifying the version(s) of Mercurial they are tested with, or

47

# leave the attribute unspecified.

46

# leave the attribute unspecified.

48

testedwith = 'ships-with-hg-core'

47

testedwith = 'ships-with-hg-core'

49

48

50

@command('censor',

49

@command('censor',

51

[('r', 'rev', '', _('censor file from specified revision'), _('REV')),

50

[('r', 'rev', '', _('censor file from specified revision'), _('REV')),

52

('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],

51

('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],

53

_('-r REV [-t TEXT] [FILE]'))

52

_('-r REV [-t TEXT] [FILE]'))

54

def censor(ui, repo, path, rev='', tombstone='', **opts):

53

def censor(ui, repo, path, rev='', tombstone='', **opts):

55

wlock = lock = None

54

with repo.wlock(), repo.lock():

56

try:

57

wlock = repo.wlock()

58

lock = repo.lock()

59

return _docensor(ui, repo, path, rev, tombstone, **opts)

55

return _docensor(ui, repo, path, rev, tombstone, **opts)

60

finally:

61

lockmod.release(lock, wlock)

62

56

63

def _docensor(ui, repo, path, rev='', tombstone='', **opts):

57

def _docensor(ui, repo, path, rev='', tombstone='', **opts):

64

if not path:

58

if not path:

65

raise error.Abort(_('must specify file path to censor'))

59

raise error.Abort(_('must specify file path to censor'))

66

if not rev:

60

if not rev:

67

raise error.Abort(_('must specify revision to censor'))

61

raise error.Abort(_('must specify revision to censor'))

68

62

69

wctx = repo[None]

63

wctx = repo[None]

70

64

71

m = scmutil.match(wctx, (path,))

65

m = scmutil.match(wctx, (path,))

72

if m.anypats() or len(m.files()) != 1:

66

if m.anypats() or len(m.files()) != 1:

73

raise error.Abort(_('can only specify an explicit filename'))

67

raise error.Abort(_('can only specify an explicit filename'))

74

path = m.files()[0]

68

path = m.files()[0]

75

flog = repo.file(path)

69

flog = repo.file(path)

76

if not len(flog):

70

if not len(flog):

77

raise error.Abort(_('cannot censor file with no history'))

71

raise error.Abort(_('cannot censor file with no history'))

78

72

79

rev = scmutil.revsingle(repo, rev, rev).rev()

73

rev = scmutil.revsingle(repo, rev, rev).rev()

80

try:

74

try:

81

ctx = repo[rev]

75

ctx = repo[rev]

82

except KeyError:

76

except KeyError:

83

raise error.Abort(_('invalid revision identifier %s') % rev)

77

raise error.Abort(_('invalid revision identifier %s') % rev)

84

78

85

try:

79

try:

86

fctx = ctx.filectx(path)

80

fctx = ctx.filectx(path)

87

except error.LookupError:

81

except error.LookupError:

88

raise error.Abort(_('file does not exist at revision %s') % rev)

82

raise error.Abort(_('file does not exist at revision %s') % rev)

89

83

90

fnode = fctx.filenode()

84

fnode = fctx.filenode()

91

headctxs = [repo[c] for c in repo.heads()]

85

headctxs = [repo[c] for c in repo.heads()]

92

heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]

86

heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]

93

if heads:

87

if heads:

94

headlist = ', '.join([short(c.node()) for c in heads])

88

headlist = ', '.join([short(c.node()) for c in heads])

95

raise error.Abort(_('cannot censor file in heads (%s)') % headlist,

89

raise error.Abort(_('cannot censor file in heads (%s)') % headlist,

96

hint=_('clean/delete and commit first'))

90

hint=_('clean/delete and commit first'))

97

91

98

wp = wctx.parents()

92

wp = wctx.parents()

99

if ctx.node() in [p.node() for p in wp]:

93

if ctx.node() in [p.node() for p in wp]:

100

raise error.Abort(_('cannot censor working directory'),

94

raise error.Abort(_('cannot censor working directory'),

101

hint=_('clean/delete/update first'))

95

hint=_('clean/delete/update first'))

102

96

103

flogv = flog.version & 0xFFFF

97

flogv = flog.version & 0xFFFF

104

if flogv != revlog.REVLOGV1:

98

if flogv != revlog.REVLOGV1:

105

raise error.Abort(

99

raise error.Abort(

106

_('censor does not support revlog version %d') % (flogv,))

100

_('censor does not support revlog version %d') % (flogv,))

107

101

108

tombstone = revlog.packmeta({"censored": tombstone}, "")

102

tombstone = revlog.packmeta({"censored": tombstone}, "")

109

103

110

crev = fctx.filerev()

104

crev = fctx.filerev()

111

105

112

if len(tombstone) > flog.rawsize(crev):

106

if len(tombstone) > flog.rawsize(crev):

113

raise error.Abort(_(

107

raise error.Abort(_(

114

'censor tombstone must be no longer than censored data'))

108

'censor tombstone must be no longer than censored data'))

115

109

116

# Using two files instead of one makes it easy to rewrite entry-by-entry

110

# Using two files instead of one makes it easy to rewrite entry-by-entry

117

idxread = repo.svfs(flog.indexfile, 'r')

111

idxread = repo.svfs(flog.indexfile, 'r')

118

idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)

112

idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)

119

if flog.version & revlog.FLAG_INLINE_DATA:

113

if flog.version & revlog.FLAG_INLINE_DATA:

120

dataread, datawrite = idxread, idxwrite

114

dataread, datawrite = idxread, idxwrite

121

else:

115

else:

122

dataread = repo.svfs(flog.datafile, 'r')

116

dataread = repo.svfs(flog.datafile, 'r')

123

datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)

117

datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)

124

118

125

# Copy all revlog data up to the entry to be censored.

119

# Copy all revlog data up to the entry to be censored.

126

rio = revlog.revlogio()

120

rio = revlog.revlogio()

127

offset = flog.start(crev)

121

offset = flog.start(crev)

128

122

129

for chunk in util.filechunkiter(idxread, limit=crev * rio.size):

123

for chunk in util.filechunkiter(idxread, limit=crev * rio.size):

130

idxwrite.write(chunk)

124

idxwrite.write(chunk)

131

for chunk in util.filechunkiter(dataread, limit=offset):

125

for chunk in util.filechunkiter(dataread, limit=offset):

132

datawrite.write(chunk)

126

datawrite.write(chunk)

133

127

134

def rewriteindex(r, newoffs, newdata=None):

128

def rewriteindex(r, newoffs, newdata=None):

135

"""Rewrite the index entry with a new data offset and optional new data.

129

"""Rewrite the index entry with a new data offset and optional new data.

136

130

137

The newdata argument, if given, is a tuple of three positive integers:

131

The newdata argument, if given, is a tuple of three positive integers:

138

(new compressed, new uncompressed, added flag bits).

132

(new compressed, new uncompressed, added flag bits).

139

"""

133

"""

140

offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]

134

offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]

141

flags = revlog.gettype(offlags)

135

flags = revlog.gettype(offlags)

142

if newdata:

136

if newdata:

143

comp, uncomp, nflags = newdata

137

comp, uncomp, nflags = newdata

144

flags |= nflags

138

flags |= nflags

145

offlags = revlog.offset_type(newoffs, flags)

139

offlags = revlog.offset_type(newoffs, flags)

146

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

140

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

147

idxwrite.write(rio.packentry(e, None, flog.version, r))

141

idxwrite.write(rio.packentry(e, None, flog.version, r))

148

idxread.seek(rio.size, 1)

142

idxread.seek(rio.size, 1)

149

143

150

def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):

144

def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):

151

"""Write the given full text to the filelog with the given data offset.

145

"""Write the given full text to the filelog with the given data offset.

152

146

153

Returns:

147

Returns:

154

The integer number of data bytes written, for tracking data offsets.

148

The integer number of data bytes written, for tracking data offsets.

155

"""

149

"""

156

flag, compdata = flog.compress(data)

150

flag, compdata = flog.compress(data)

157

newcomp = len(flag) + len(compdata)

151

newcomp = len(flag) + len(compdata)

158

rewriteindex(r, offs, (newcomp, len(data), nflags))

152

rewriteindex(r, offs, (newcomp, len(data), nflags))

159

datawrite.write(flag)

153

datawrite.write(flag)

160

datawrite.write(compdata)

154

datawrite.write(compdata)

161

dataread.seek(flog.length(r), 1)

155

dataread.seek(flog.length(r), 1)

162

return newcomp

156

return newcomp

163

157

164

# Rewrite censored revlog entry with (padded) tombstone data.

158

# Rewrite censored revlog entry with (padded) tombstone data.

165

pad = ' ' * (flog.rawsize(crev) - len(tombstone))

159

pad = ' ' * (flog.rawsize(crev) - len(tombstone))

166

offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)

160

offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)

167

161

168

# Rewrite all following filelog revisions fixing up offsets and deltas.

162

# Rewrite all following filelog revisions fixing up offsets and deltas.

169

for srev in xrange(crev + 1, len(flog)):

163

for srev in xrange(crev + 1, len(flog)):

170

if crev in flog.parentrevs(srev):

164

if crev in flog.parentrevs(srev):

171

# Immediate children of censored node must be re-added as fulltext.

165

# Immediate children of censored node must be re-added as fulltext.

172

try:

166

try:

173

revdata = flog.revision(srev)

167

revdata = flog.revision(srev)

174

except error.CensoredNodeError as e:

168

except error.CensoredNodeError as e:

175

revdata = e.tombstone

169

revdata = e.tombstone

176

dlen = rewrite(srev, offset, revdata)

170

dlen = rewrite(srev, offset, revdata)

177

else:

171

else:

178

# Copy any other revision data verbatim after fixing up the offset.

172

# Copy any other revision data verbatim after fixing up the offset.

179

rewriteindex(srev, offset)

173

rewriteindex(srev, offset)

180

dlen = flog.length(srev)

174

dlen = flog.length(srev)

181

for chunk in util.filechunkiter(dataread, limit=dlen):

175

for chunk in util.filechunkiter(dataread, limit=dlen):

182

datawrite.write(chunk)

176

datawrite.write(chunk)

183

offset += dlen

177

offset += dlen

184

178

185

idxread.close()

179

idxread.close()

186

idxwrite.close()

180

idxwrite.close()

187

if dataread is not idxread:

181

if dataread is not idxread:

188

dataread.close()

182

dataread.close()

189

datawrite.close()

183

datawrite.close()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
             #
             # This extension enables removal of file content at a given revision,
             # rewriting the data/metadata of successive revisions to preserve revision log
             # integrity.
             """erase file content at a given revision
             The censor command instructs Mercurial to erase all content of a file at a given
             revision *without updating the changeset hash.* This allows existing history to
             remain valid while preventing future clones/pulls from receiving the erased
             data.
             Typical uses for censor are due to security or legal requirements, including::
              * Passwords, private keys, cryptographic material
              * Licensed data/code/libraries for which the license has expired
              * Personally Identifiable Information or other private data
             Censored nodes can interrupt mercurial's typical operation whenever the excised
             data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
             simply fail when asked to produce censored data. Others, like ``hg verify`` and
             ``hg update``, must be capable of tolerating censored data to continue to
             function in a meaningful way. Such commands only tolerate censored file
             revisions if they are allowed by the "censor.policy=ignore" config option.
             """
             from __future__ import absolute_import
             from mercurial.i18n import _
             from mercurial.node import short
             from mercurial import (
                 error,
-                lock as lockmod,
                 registrar,
                 revlog,
                 scmutil,
                 util,
             )
             cmdtable = {}
             command = registrar.command(cmdtable)
             # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
             # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
             # be specifying the version(s) of Mercurial they are tested with, or
             # leave the attribute unspecified.
             testedwith = 'ships-with-hg-core'
             @command('censor',
                 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
                  ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
                 _('-r REV [-t TEXT] [FILE]'))
             def censor(ui, repo, path, rev='', tombstone='', **opts):
-                wlock = lock = None
+                with repo.wlock(), repo.lock():
-                try:
-                    wlock = repo.wlock()
-                    lock = repo.lock()
                     return _docensor(ui, repo, path, rev, tombstone, **opts)
-                finally:
-                    lockmod.release(lock, wlock)
             def _docensor(ui, repo, path, rev='', tombstone='', **opts):
                 if not path:
                     raise error.Abort(_('must specify file path to censor'))
                 if not rev:
                     raise error.Abort(_('must specify revision to censor'))
                 wctx = repo[None]
                 m = scmutil.match(wctx, (path,))
                 if m.anypats() or len(m.files()) != 1:
                     raise error.Abort(_('can only specify an explicit filename'))
                 path = m.files()[0]
                 flog = repo.file(path)
                 if not len(flog):
                     raise error.Abort(_('cannot censor file with no history'))
                 rev = scmutil.revsingle(repo, rev, rev).rev()
                 try:
                     ctx = repo[rev]
                 except KeyError:
                     raise error.Abort(_('invalid revision identifier %s') % rev)
                 try:
                     fctx = ctx.filectx(path)
                 except error.LookupError:
                     raise error.Abort(_('file does not exist at revision %s') % rev)
                 fnode = fctx.filenode()
                 headctxs = [repo[c] for c in repo.heads()]
                 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
                 if heads:
                     headlist = ', '.join([short(c.node()) for c in heads])
                     raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
                         hint=_('clean/delete and commit first'))
                 wp = wctx.parents()
                 if ctx.node() in [p.node() for p in wp]:
                     raise error.Abort(_('cannot censor working directory'),
                         hint=_('clean/delete/update first'))
                 flogv = flog.version & 0xFFFF
                 if flogv != revlog.REVLOGV1:
                     raise error.Abort(
                         _('censor does not support revlog version %d') % (flogv,))
                 tombstone = revlog.packmeta({"censored": tombstone}, "")
                 crev = fctx.filerev()
                 if len(tombstone) > flog.rawsize(crev):
                     raise error.Abort(_(
                         'censor tombstone must be no longer than censored data'))
                 # Using two files instead of one makes it easy to rewrite entry-by-entry
                 idxread = repo.svfs(flog.indexfile, 'r')
                 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
                 if flog.version & revlog.FLAG_INLINE_DATA:
                     dataread, datawrite = idxread, idxwrite
                 else:
                     dataread = repo.svfs(flog.datafile, 'r')
                     datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
                 # Copy all revlog data up to the entry to be censored.
                 rio = revlog.revlogio()
                 offset = flog.start(crev)
                 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
                     idxwrite.write(chunk)
                 for chunk in util.filechunkiter(dataread, limit=offset):
                     datawrite.write(chunk)
                 def rewriteindex(r, newoffs, newdata=None):
                     """Rewrite the index entry with a new data offset and optional new data.
                     The newdata argument, if given, is a tuple of three positive integers:
                     (new compressed, new uncompressed, added flag bits).
                     """
                     offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
                     flags = revlog.gettype(offlags)
                     if newdata:
                         comp, uncomp, nflags = newdata
                         flags |= nflags
                     offlags = revlog.offset_type(newoffs, flags)
                     e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
                     idxwrite.write(rio.packentry(e, None, flog.version, r))
                     idxread.seek(rio.size, 1)
                 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
                     """Write the given full text to the filelog with the given data offset.
                     Returns:
                         The integer number of data bytes written, for tracking data offsets.
                     """
                     flag, compdata = flog.compress(data)
                     newcomp = len(flag) + len(compdata)
                     rewriteindex(r, offs, (newcomp, len(data), nflags))
                     datawrite.write(flag)
                     datawrite.write(compdata)
                     dataread.seek(flog.length(r), 1)
                     return newcomp
                 # Rewrite censored revlog entry with (padded) tombstone data.
                 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
                 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
                 # Rewrite all following filelog revisions fixing up offsets and deltas.
                 for srev in xrange(crev + 1, len(flog)):
                     if crev in flog.parentrevs(srev):
                         # Immediate children of censored node must be re-added as fulltext.
                         try:
                             revdata = flog.revision(srev)
                         except error.CensoredNodeError as e:
                             revdata = e.tombstone
                         dlen = rewrite(srev, offset, revdata)
                     else:
                         # Copy any other revision data verbatim after fixing up the offset.
                         rewriteindex(srev, offset)
                         dlen = flog.length(srev)
                         for chunk in util.filechunkiter(dataread, limit=dlen):
                             datawrite.write(chunk)
                     offset += dlen
                 idxread.close()
                 idxwrite.close()
                 if dataread is not idxread:
                     dataread.close()
                     datawrite.close()