upstream/mercurial-mirror Commit - r27290:525d9b3f

1

2

#

2

#

3

# This extension enables removal of file content at a given revision,

3

# This extension enables removal of file content at a given revision,

4

# rewriting the data/metadata of successive revisions to preserve revision log

4

# rewriting the data/metadata of successive revisions to preserve revision log

5

# integrity.

5

# integrity.

6

7

"""erase file content at a given revision

7

"""erase file content at a given revision

8

9

The censor command instructs Mercurial to erase all content of a file at a given

9

The censor command instructs Mercurial to erase all content of a file at a given

10

revision *without updating the changeset hash.* This allows existing history to

10

revision *without updating the changeset hash.* This allows existing history to

11

remain valid while preventing future clones/pulls from receiving the erased

11

remain valid while preventing future clones/pulls from receiving the erased

12

data.

12

data.

13

14

Typical uses for censor are due to security or legal requirements, including::

14

Typical uses for censor are due to security or legal requirements, including::

15

16

* Passwords, private keys, cryptographic material

16

* Passwords, private keys, cryptographic material

17

* Licensed data/code/libraries for which the license has expired

17

* Licensed data/code/libraries for which the license has expired

18

* Personally Identifiable Information or other private data

18

* Personally Identifiable Information or other private data

19

20

Censored nodes can interrupt mercurial's typical operation whenever the excised

20

Censored nodes can interrupt mercurial's typical operation whenever the excised

21

data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,

21

data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,

22

simply fail when asked to produce censored data. Others, like ``hg verify`` and

22

simply fail when asked to produce censored data. Others, like ``hg verify`` and

23

``hg update``, must be capable of tolerating censored data to continue to

23

``hg update``, must be capable of tolerating censored data to continue to

24

function in a meaningful way. Such commands only tolerate censored file

24

function in a meaningful way. Such commands only tolerate censored file

25

revisions if they are allowed by the "censor.policy=ignore" config option.

25

revisions if they are allowed by the "censor.policy=ignore" config option.

26

"""

26

"""

27

28

from mercurial.node import short

28

from mercurial.node import short

29

from mercurial import cmdutil, error, filelog, revlog, scmutil, util

29

from mercurial import cmdutil, error, filelog, revlog, scmutil, util

30

from mercurial.i18n import _

30

from mercurial.i18n import _

31

from mercurial import lock as lockmod

31

32

cmdtable = {}

33

cmdtable = {}

33

command = cmdutil.command(cmdtable)

34

command = cmdutil.command(cmdtable)

34

# Note for extension authors: ONLY specify testedwith = 'internal' for

35

# Note for extension authors: ONLY specify testedwith = 'internal' for

35

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

36

# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should

36

# be specifying the version(s) of Mercurial they are tested with, or

37

# be specifying the version(s) of Mercurial they are tested with, or

37

# leave the attribute unspecified.

38

# leave the attribute unspecified.

38

testedwith = 'internal'

39

testedwith = 'internal'

39

40

@command('censor',

41

@command('censor',

41

[('r', 'rev', '', _('censor file from specified revision'), _('REV')),

42

[('r', 'rev', '', _('censor file from specified revision'), _('REV')),

42

('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],

43

('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],

43

_('-r REV [-t TEXT] [FILE]'))

44

_('-r REV [-t TEXT] [FILE]'))

44

def censor(ui, repo, path, rev='', tombstone='', **opts):

45

def censor(ui, repo, path, rev='', tombstone='', **opts):

46

wlock = lock = None

47

try:

48

wlock = repo.wlock()

49

lock = repo.lock()

50

return _docensor(ui, repo, path, rev, tombstone, **opts)

51

finally:

52

lockmod.release(lock, wlock)

53

54

def _docensor(ui, repo, path, rev='', tombstone='', **opts):

45

if not path:

55

if not path:

46

raise error.Abort(_('must specify file path to censor'))

56

raise error.Abort(_('must specify file path to censor'))

47

if not rev:

57

if not rev:

48

raise error.Abort(_('must specify revision to censor'))

58

raise error.Abort(_('must specify revision to censor'))

49

59

50

wctx = repo[None]

60

wctx = repo[None]

51

61

52

m = scmutil.match(wctx, (path,))

62

m = scmutil.match(wctx, (path,))

53

if m.anypats() or len(m.files()) != 1:

63

if m.anypats() or len(m.files()) != 1:

54

raise error.Abort(_('can only specify an explicit filename'))

64

raise error.Abort(_('can only specify an explicit filename'))

55

path = m.files()[0]

65

path = m.files()[0]

56

flog = repo.file(path)

66

flog = repo.file(path)

57

if not len(flog):

67

if not len(flog):

58

raise error.Abort(_('cannot censor file with no history'))

68

raise error.Abort(_('cannot censor file with no history'))

59

69

60

rev = scmutil.revsingle(repo, rev, rev).rev()

70

rev = scmutil.revsingle(repo, rev, rev).rev()

61

try:

71

try:

62

ctx = repo[rev]

72

ctx = repo[rev]

63

except KeyError:

73

except KeyError:

64

raise error.Abort(_('invalid revision identifier %s') % rev)

74

raise error.Abort(_('invalid revision identifier %s') % rev)

65

75

66

try:

76

try:

67

fctx = ctx.filectx(path)

77

fctx = ctx.filectx(path)

68

except error.LookupError:

78

except error.LookupError:

69

raise error.Abort(_('file does not exist at revision %s') % rev)

79

raise error.Abort(_('file does not exist at revision %s') % rev)

70

80

71

fnode = fctx.filenode()

81

fnode = fctx.filenode()

72

headctxs = [repo[c] for c in repo.heads()]

82

headctxs = [repo[c] for c in repo.heads()]

73

heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]

83

heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]

74

if heads:

84

if heads:

75

headlist = ', '.join([short(c.node()) for c in heads])

85

headlist = ', '.join([short(c.node()) for c in heads])

76

raise error.Abort(_('cannot censor file in heads (%s)') % headlist,

86

raise error.Abort(_('cannot censor file in heads (%s)') % headlist,

77

hint=_('clean/delete and commit first'))

87

hint=_('clean/delete and commit first'))

78

88

79

wp = wctx.parents()

89

wp = wctx.parents()

80

if ctx.node() in [p.node() for p in wp]:

90

if ctx.node() in [p.node() for p in wp]:

81

raise error.Abort(_('cannot censor working directory'),

91

raise error.Abort(_('cannot censor working directory'),

82

hint=_('clean/delete/update first'))

92

hint=_('clean/delete/update first'))

83

93

84

flogv = flog.version & 0xFFFF

94

flogv = flog.version & 0xFFFF

85

if flogv != revlog.REVLOGNG:

95

if flogv != revlog.REVLOGNG:

86

raise error.Abort(

96

raise error.Abort(

87

_('censor does not support revlog version %d') % (flogv,))

97

_('censor does not support revlog version %d') % (flogv,))

88

98

89

tombstone = filelog.packmeta({"censored": tombstone}, "")

99

tombstone = filelog.packmeta({"censored": tombstone}, "")

90

100

91

crev = fctx.filerev()

101

crev = fctx.filerev()

92

102

93

if len(tombstone) > flog.rawsize(crev):

103

if len(tombstone) > flog.rawsize(crev):

94

raise error.Abort(_(

104

raise error.Abort(_(

95

'censor tombstone must be no longer than censored data'))

105

'censor tombstone must be no longer than censored data'))

96

106

97

# Using two files instead of one makes it easy to rewrite entry-by-entry

107

# Using two files instead of one makes it easy to rewrite entry-by-entry

98

idxread = repo.svfs(flog.indexfile, 'r')

108

idxread = repo.svfs(flog.indexfile, 'r')

99

idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)

109

idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)

100

if flog.version & revlog.REVLOGNGINLINEDATA:

110

if flog.version & revlog.REVLOGNGINLINEDATA:

101

dataread, datawrite = idxread, idxwrite

111

dataread, datawrite = idxread, idxwrite

102

else:

112

else:

103

dataread = repo.svfs(flog.datafile, 'r')

113

dataread = repo.svfs(flog.datafile, 'r')

104

datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)

114

datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)

105

115

106

# Copy all revlog data up to the entry to be censored.

116

# Copy all revlog data up to the entry to be censored.

107

rio = revlog.revlogio()

117

rio = revlog.revlogio()

108

offset = flog.start(crev)

118

offset = flog.start(crev)

109

119

110

for chunk in util.filechunkiter(idxread, limit=crev * rio.size):

120

for chunk in util.filechunkiter(idxread, limit=crev * rio.size):

111

idxwrite.write(chunk)

121

idxwrite.write(chunk)

112

for chunk in util.filechunkiter(dataread, limit=offset):

122

for chunk in util.filechunkiter(dataread, limit=offset):

113

datawrite.write(chunk)

123

datawrite.write(chunk)

114

124

115

def rewriteindex(r, newoffs, newdata=None):

125

def rewriteindex(r, newoffs, newdata=None):

116

"""Rewrite the index entry with a new data offset and optional new data.

126

"""Rewrite the index entry with a new data offset and optional new data.

117

127

118

The newdata argument, if given, is a tuple of three positive integers:

128

The newdata argument, if given, is a tuple of three positive integers:

119

(new compressed, new uncompressed, added flag bits).

129

(new compressed, new uncompressed, added flag bits).

120

"""

130

"""

121

offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]

131

offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]

122

flags = revlog.gettype(offlags)

132

flags = revlog.gettype(offlags)

123

if newdata:

133

if newdata:

124

comp, uncomp, nflags = newdata

134

comp, uncomp, nflags = newdata

125

flags |= nflags

135

flags |= nflags

126

offlags = revlog.offset_type(newoffs, flags)

136

offlags = revlog.offset_type(newoffs, flags)

127

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

137

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

128

idxwrite.write(rio.packentry(e, None, flog.version, r))

138

idxwrite.write(rio.packentry(e, None, flog.version, r))

129

idxread.seek(rio.size, 1)

139

idxread.seek(rio.size, 1)

130

140

131

def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):

141

def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):

132

"""Write the given full text to the filelog with the given data offset.

142

"""Write the given full text to the filelog with the given data offset.

133

143

134

Returns:

144

Returns:

135

The integer number of data bytes written, for tracking data offsets.

145

The integer number of data bytes written, for tracking data offsets.

136

"""

146

"""

137

flag, compdata = flog.compress(data)

147

flag, compdata = flog.compress(data)

138

newcomp = len(flag) + len(compdata)

148

newcomp = len(flag) + len(compdata)

139

rewriteindex(r, offs, (newcomp, len(data), nflags))

149

rewriteindex(r, offs, (newcomp, len(data), nflags))

140

datawrite.write(flag)

150

datawrite.write(flag)

141

datawrite.write(compdata)

151

datawrite.write(compdata)

142

dataread.seek(flog.length(r), 1)

152

dataread.seek(flog.length(r), 1)

143

return newcomp

153

return newcomp

144

154

145

# Rewrite censored revlog entry with (padded) tombstone data.

155

# Rewrite censored revlog entry with (padded) tombstone data.

146

pad = ' ' * (flog.rawsize(crev) - len(tombstone))

156

pad = ' ' * (flog.rawsize(crev) - len(tombstone))

147

offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)

157

offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)

148

158

149

# Rewrite all following filelog revisions fixing up offsets and deltas.

159

# Rewrite all following filelog revisions fixing up offsets and deltas.

150

for srev in xrange(crev + 1, len(flog)):

160

for srev in xrange(crev + 1, len(flog)):

151

if crev in flog.parentrevs(srev):

161

if crev in flog.parentrevs(srev):

152

# Immediate children of censored node must be re-added as fulltext.

162

# Immediate children of censored node must be re-added as fulltext.

153

try:

163

try:

154

revdata = flog.revision(srev)

164

revdata = flog.revision(srev)

155

except error.CensoredNodeError as e:

165

except error.CensoredNodeError as e:

156

revdata = e.tombstone

166

revdata = e.tombstone

157

dlen = rewrite(srev, offset, revdata)

167

dlen = rewrite(srev, offset, revdata)

158

else:

168

else:

159

# Copy any other revision data verbatim after fixing up the offset.

169

# Copy any other revision data verbatim after fixing up the offset.

160

rewriteindex(srev, offset)

170

rewriteindex(srev, offset)

161

dlen = flog.length(srev)

171

dlen = flog.length(srev)

162

for chunk in util.filechunkiter(dataread, limit=dlen):

172

for chunk in util.filechunkiter(dataread, limit=dlen):

163

datawrite.write(chunk)

173

datawrite.write(chunk)

164

offset += dlen

174

offset += dlen

165

175

166

idxread.close()

176

idxread.close()

167

idxwrite.close()

177

idxwrite.close()

168

if dataread is not idxread:

178

if dataread is not idxread:

169

dataread.close()

179

dataread.close()

170

datawrite.close()

180

datawrite.close()

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # Copyright (C) 2015 - Mike Edgar <adgar@google.com>
             #
             # This extension enables removal of file content at a given revision,
             # rewriting the data/metadata of successive revisions to preserve revision log
             # integrity.
             """erase file content at a given revision
             The censor command instructs Mercurial to erase all content of a file at a given
             revision *without updating the changeset hash.* This allows existing history to
             remain valid while preventing future clones/pulls from receiving the erased
             data.
             Typical uses for censor are due to security or legal requirements, including::
              * Passwords, private keys, cryptographic material
              * Licensed data/code/libraries for which the license has expired
              * Personally Identifiable Information or other private data
             Censored nodes can interrupt mercurial's typical operation whenever the excised
             data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
             simply fail when asked to produce censored data. Others, like ``hg verify`` and
             ``hg update``, must be capable of tolerating censored data to continue to
             function in a meaningful way. Such commands only tolerate censored file
             revisions if they are allowed by the "censor.policy=ignore" config option.
             """
             from mercurial.node import short
             from mercurial import cmdutil, error, filelog, revlog, scmutil, util
             from mercurial.i18n import _
+            from mercurial import lock as lockmod
             cmdtable = {}
             command = cmdutil.command(cmdtable)
             # Note for extension authors: ONLY specify testedwith = 'internal' for
             # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
             # be specifying the version(s) of Mercurial they are tested with, or
             # leave the attribute unspecified.
             testedwith = 'internal'
             @command('censor',
                 [('r', 'rev', '', _('censor file from specified revision'), _('REV')),
                  ('t', 'tombstone', '', _('replacement tombstone data'), _('TEXT'))],
                 _('-r REV [-t TEXT] [FILE]'))
             def censor(ui, repo, path, rev='', tombstone='', **opts):
+                wlock = lock = None
+                try:
+                    wlock = repo.wlock()
+                    lock = repo.lock()
+                    return _docensor(ui, repo, path, rev, tombstone, **opts)
+                finally:
+                    lockmod.release(lock, wlock)
+            def _docensor(ui, repo, path, rev='', tombstone='', **opts):
                 if not path:
                     raise error.Abort(_('must specify file path to censor'))
                 if not rev:
                     raise error.Abort(_('must specify revision to censor'))
                 wctx = repo[None]
                 m = scmutil.match(wctx, (path,))
                 if m.anypats() or len(m.files()) != 1:
                     raise error.Abort(_('can only specify an explicit filename'))
                 path = m.files()[0]
                 flog = repo.file(path)
                 if not len(flog):
                     raise error.Abort(_('cannot censor file with no history'))
                 rev = scmutil.revsingle(repo, rev, rev).rev()
                 try:
                     ctx = repo[rev]
                 except KeyError:
                     raise error.Abort(_('invalid revision identifier %s') % rev)
                 try:
                     fctx = ctx.filectx(path)
                 except error.LookupError:
                     raise error.Abort(_('file does not exist at revision %s') % rev)
                 fnode = fctx.filenode()
                 headctxs = [repo[c] for c in repo.heads()]
                 heads = [c for c in headctxs if path in c and c.filenode(path) == fnode]
                 if heads:
                     headlist = ', '.join([short(c.node()) for c in heads])
                     raise error.Abort(_('cannot censor file in heads (%s)') % headlist,
                         hint=_('clean/delete and commit first'))
                 wp = wctx.parents()
                 if ctx.node() in [p.node() for p in wp]:
                     raise error.Abort(_('cannot censor working directory'),
                         hint=_('clean/delete/update first'))
                 flogv = flog.version & 0xFFFF
                 if flogv != revlog.REVLOGNG:
                     raise error.Abort(
                         _('censor does not support revlog version %d') % (flogv,))
                 tombstone = filelog.packmeta({"censored": tombstone}, "")
                 crev = fctx.filerev()
                 if len(tombstone) > flog.rawsize(crev):
                     raise error.Abort(_(
                         'censor tombstone must be no longer than censored data'))
                 # Using two files instead of one makes it easy to rewrite entry-by-entry
                 idxread = repo.svfs(flog.indexfile, 'r')
                 idxwrite = repo.svfs(flog.indexfile, 'wb', atomictemp=True)
                 if flog.version & revlog.REVLOGNGINLINEDATA:
                     dataread, datawrite = idxread, idxwrite
                 else:
                     dataread = repo.svfs(flog.datafile, 'r')
                     datawrite = repo.svfs(flog.datafile, 'wb', atomictemp=True)
                 # Copy all revlog data up to the entry to be censored.
                 rio = revlog.revlogio()
                 offset = flog.start(crev)
                 for chunk in util.filechunkiter(idxread, limit=crev * rio.size):
                     idxwrite.write(chunk)
                 for chunk in util.filechunkiter(dataread, limit=offset):
                     datawrite.write(chunk)
                 def rewriteindex(r, newoffs, newdata=None):
                     """Rewrite the index entry with a new data offset and optional new data.
                     The newdata argument, if given, is a tuple of three positive integers:
                     (new compressed, new uncompressed, added flag bits).
                     """
                     offlags, comp, uncomp, base, link, p1, p2, nodeid = flog.index[r]
                     flags = revlog.gettype(offlags)
                     if newdata:
                         comp, uncomp, nflags = newdata
                         flags |= nflags
                     offlags = revlog.offset_type(newoffs, flags)
                     e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
                     idxwrite.write(rio.packentry(e, None, flog.version, r))
                     idxread.seek(rio.size, 1)
                 def rewrite(r, offs, data, nflags=revlog.REVIDX_DEFAULT_FLAGS):
                     """Write the given full text to the filelog with the given data offset.
                     Returns:
                         The integer number of data bytes written, for tracking data offsets.
                     """
                     flag, compdata = flog.compress(data)
                     newcomp = len(flag) + len(compdata)
                     rewriteindex(r, offs, (newcomp, len(data), nflags))
                     datawrite.write(flag)
                     datawrite.write(compdata)
                     dataread.seek(flog.length(r), 1)
                     return newcomp
                 # Rewrite censored revlog entry with (padded) tombstone data.
                 pad = ' ' * (flog.rawsize(crev) - len(tombstone))
                 offset += rewrite(crev, offset, tombstone + pad, revlog.REVIDX_ISCENSORED)
                 # Rewrite all following filelog revisions fixing up offsets and deltas.
                 for srev in xrange(crev + 1, len(flog)):
                     if crev in flog.parentrevs(srev):
                         # Immediate children of censored node must be re-added as fulltext.
                         try:
                             revdata = flog.revision(srev)
                         except error.CensoredNodeError as e:
                             revdata = e.tombstone
                         dlen = rewrite(srev, offset, revdata)
                     else:
                         # Copy any other revision data verbatim after fixing up the offset.
                         rewriteindex(srev, offset)
                         dlen = flog.length(srev)
                         for chunk in util.filechunkiter(dataread, limit=dlen):
                             datawrite.write(chunk)
                     offset += dlen
                 idxread.close()
                 idxwrite.close()
                 if dataread is not idxread:
                     dataread.close()
                     datawrite.close()