upstream/mercurial-mirror Commit - r10009:69dca857

1

#!/usr/bin/env python

1

#!/usr/bin/env python

2

3

"""\

3

"""\

4

Reorder a revlog (by default the the manifest file in the current

4

Reorder a revlog (by default the the manifest file in the current

5

repository) to save space. Specifically, this topologically sorts the

5

repository) to save space. Specifically, this topologically sorts the

6

revisions in the revlog so that revisions on the same branch are adjacent

6

revisions in the revlog so that revisions on the same branch are adjacent

7

as much as possible. This is a workaround for the fact that Mercurial

7

as much as possible. This is a workaround for the fact that Mercurial

8

computes deltas relative to the previous revision rather than relative to a

8

computes deltas relative to the previous revision rather than relative to a

9

parent revision. This is *not* safe to run on a changelog.

9

parent revision. This is *not* safe to run on a changelog.

10

"""

10

"""

11

12

# Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>

12

# Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>

13

# as a patch to rewrite-log. Cleaned up, refactored, documented, and

13

# as a patch to rewrite-log. Cleaned up, refactored, documented, and

14

# renamed by Greg Ward <greg at gerg.ca>.

14

# renamed by Greg Ward <greg at gerg.ca>.

15

16

# XXX would be nice to have a way to verify the repository after shrinking,

16

# XXX would be nice to have a way to verify the repository after shrinking,

17

# e.g. by comparing "before" and "after" states of random changesets

17

# e.g. by comparing "before" and "after" states of random changesets

18

# (maybe: export before, shrink, export after, diff).

18

# (maybe: export before, shrink, export after, diff).

19

20

import sys, os, tempfile

20

import sys, os, tempfile

21

import optparse

21

import optparse

22

from mercurial import ui as ui_, hg, revlog, transaction, node, util

22

from mercurial import ui as ui_, hg, revlog, transaction, node, util

23

from mercurial import changegroup

23

24

def toposort(rl):

25

def toposort(rl):

25

write = sys.stdout.write

26

write = sys.stdout.write

26

27

children = {}

28

children = {}

28

root = []

29

root = []

29

# build children and roots

30

# build children and roots

30

write('reading %d revs ' % len(rl))

31

write('reading %d revs ' % len(rl))

31

try:

32

try:

32

for i in rl:

33

for i in rl:

33

children[i] = []

34

children[i] = []

34

parents = [p for p in rl.parentrevs(i) if p != node.nullrev]

35

parents = [p for p in rl.parentrevs(i) if p != node.nullrev]

35

# in case of duplicate parents

36

# in case of duplicate parents

36

if len(parents) == 2 and parents[0] == parents[1]:

37

if len(parents) == 2 and parents[0] == parents[1]:

37

del parents[1]

38

del parents[1]

38

for p in parents:

39

for p in parents:

39

assert p in children

40

assert p in children

40

children[p].append(i)

41

children[p].append(i)

41

42

if len(parents) == 0:

43

if len(parents) == 0:

43

root.append(i)

44

root.append(i)

44

45

if i % 1000 == 0:

46

if i % 1000 == 0:

46

write('.')

47

write('.')

47

finally:

48

finally:

48

write('\n')

49

write('\n')

49

50

# XXX this is a reimplementation of the 'branchsort' topo sort

51

# XXX this is a reimplementation of the 'branchsort' topo sort

51

# algorithm in hgext.convert.convcmd... would be nice not to duplicate

52

# algorithm in hgext.convert.convcmd... would be nice not to duplicate

52

# the algorithm

53

# the algorithm

53

write('sorting ...')

54

write('sorting ...')

54

visit = root

55

visit = root

55

ret = []

56

ret = []

56

while visit:

57

while visit:

57

i = visit.pop(0)

58

i = visit.pop(0)

58

ret.append(i)

59

ret.append(i)

59

if i not in children:

60

if i not in children:

60

# This only happens if some node's p1 == p2, which can

61

# This only happens if some node's p1 == p2, which can

61

# happen in the manifest in certain circumstances.

62

# happen in the manifest in certain circumstances.

62

continue

63

continue

63

next = []

64

next = []

64

for c in children.pop(i):

65

for c in children.pop(i):

65

parents_unseen = [p for p in rl.parentrevs(c)

66

parents_unseen = [p for p in rl.parentrevs(c)

66

if p != node.nullrev and p in children]

67

if p != node.nullrev and p in children]

67

if len(parents_unseen) == 0:

68

if len(parents_unseen) == 0:

68

next.append(c)

69

next.append(c)

69

visit = next + visit

70

visit = next + visit

70

write('\n')

71

write('\n')

71

return ret

72

return ret

72

73

def writerevs(r1, r2, order, tr):

74

def writerevs(r1, r2, order, tr):

74

write = sys.stdout.write

75

write = sys.stdout.write

75

write('writing %d revs ' % len(order))

76

write('writing %d revs ' % len(order))

77

78

count = [0]

79

def progress(*args):

80

if count[0] % 1000 == 0:

81

write('.')

82

count[0] += 1

83

84

order = [r1.node(r) for r in order]

85

86

# this is a bit ugly, but it works

87

lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))

88

unlookup = lambda x: int(x, 10)

89

76

try:

90

try:

77

count = 0

91

group = util.chunkbuffer(r1.group(order, lookup, progress))

78

for rev in order:

92

chunkiter = changegroup.chunkiter(group)

79

n = r1.node(rev)

93

r2.addgroup(chunkiter, unlookup, tr)

80

p1, p2 = r1.parents(n)

81

l = r1.linkrev(rev)

82

t = r1.revision(n)

83

n2 = r2.addrevision(t, tr, l, p1, p2)

84

85

if count % 1000 == 0:

86

write('.')

87

count += 1

88

finally:

94

finally:

89

write('\n')

95

write('\n')

90

96

91

def report(olddatafn, newdatafn):

97

def report(olddatafn, newdatafn):

92

oldsize = float(os.stat(olddatafn).st_size)

98

oldsize = float(os.stat(olddatafn).st_size)

93

newsize = float(os.stat(newdatafn).st_size)

99

newsize = float(os.stat(newdatafn).st_size)

94

100

95

# argh: have to pass an int to %d, because a float >= 2^32

101

# argh: have to pass an int to %d, because a float >= 2^32

96

# blows up under Python 2.5 or earlier

102

# blows up under Python 2.5 or earlier

97

sys.stdout.write('old file size: %12d bytes (%6.1f MiB)\n'

103

sys.stdout.write('old file size: %12d bytes (%6.1f MiB)\n'

98

% (int(oldsize), oldsize/1024/1024))

104

% (int(oldsize), oldsize/1024/1024))

99

sys.stdout.write('new file size: %12d bytes (%6.1f MiB)\n'

105

sys.stdout.write('new file size: %12d bytes (%6.1f MiB)\n'

100

% (int(newsize), newsize/1024/1024))

106

% (int(newsize), newsize/1024/1024))

101

107

102

shrink_percent = (oldsize - newsize) / oldsize * 100

108

shrink_percent = (oldsize - newsize) / oldsize * 100

103

shrink_factor = oldsize / newsize

109

shrink_factor = oldsize / newsize

104

sys.stdout.write('shrinkage: %.1f%% (%.1fx)\n'

110

sys.stdout.write('shrinkage: %.1f%% (%.1fx)\n'

105

% (shrink_percent, shrink_factor))

111

% (shrink_percent, shrink_factor))

106

112

107

def main():

113

def main():

108

114

109

# Unbuffer stdout for nice progress output.

115

# Unbuffer stdout for nice progress output.

110

sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)

116

sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)

111

write = sys.stdout.write

117

write = sys.stdout.write

112

118

113

parser = optparse.OptionParser(description=__doc__)

119

parser = optparse.OptionParser(description=__doc__)

114

parser.add_option('-R', '--repository',

120

parser.add_option('-R', '--repository',

115

default=os.path.curdir,

121

default=os.path.curdir,

116

metavar='REPO',

122

metavar='REPO',

117

help='repository root directory [default: current dir]')

123

help='repository root directory [default: current dir]')

118

parser.add_option('--revlog',

124

parser.add_option('--revlog',

119

metavar='FILE',

125

metavar='FILE',

120

help='shrink FILE [default: REPO/hg/store/00manifest.i]')

126

help='shrink FILE [default: REPO/hg/store/00manifest.i]')

121

(options, args) = parser.parse_args()

127

(options, args) = parser.parse_args()

122

if args:

128

if args:

123

parser.error('too many arguments')

129

parser.error('too many arguments')

124

130

125

# Open the specified repository.

131

# Open the specified repository.

126

ui = ui_.ui()

132

ui = ui_.ui()

127

repo = hg.repository(ui, options.repository)

133

repo = hg.repository(ui, options.repository)

128

if not repo.local():

134

if not repo.local():

129

parser.error('not a local repository: %s' % options.repository)

135

parser.error('not a local repository: %s' % options.repository)

130

136

131

if options.revlog is None:

137

if options.revlog is None:

132

indexfn = repo.sjoin('00manifest.i')

138

indexfn = repo.sjoin('00manifest.i')

133

else:

139

else:

134

if not options.revlog.endswith('.i'):

140

if not options.revlog.endswith('.i'):

135

parser.error('--revlog option must specify the revlog index file '

141

parser.error('--revlog option must specify the revlog index file '

136

'(*.i), not %s' % options.revlog)

142

'(*.i), not %s' % options.revlog)

137

143

138

indexfn = os.path.realpath(options.revlog)

144

indexfn = os.path.realpath(options.revlog)

139

store = repo.sjoin('')

145

store = repo.sjoin('')

140

if not indexfn.startswith(store):

146

if not indexfn.startswith(store):

141

parser.error('--revlog option must specify a revlog in %s, not %s'

147

parser.error('--revlog option must specify a revlog in %s, not %s'

142

% (store, indexfn))

148

% (store, indexfn))

143

149

144

datafn = indexfn[:-2] + '.d'

150

datafn = indexfn[:-2] + '.d'

145

if not os.path.exists(indexfn):

151

if not os.path.exists(indexfn):

146

parser.error('no such file: %s' % indexfn)

152

parser.error('no such file: %s' % indexfn)

147

if '00changelog' in indexfn:

153

if '00changelog' in indexfn:

148

parser.error('shrinking the changelog will corrupt your repository')

154

parser.error('shrinking the changelog will corrupt your repository')

149

if not os.path.exists(datafn):

155

if not os.path.exists(datafn):

150

# This is just a lazy shortcut because I can't be bothered to

156

# This is just a lazy shortcut because I can't be bothered to

151

# handle all the special cases that entail from no .d file.

157

# handle all the special cases that entail from no .d file.

152

parser.error('%s does not exist: revlog not big enough '

158

parser.error('%s does not exist: revlog not big enough '

153

'to be worth shrinking' % datafn)

159

'to be worth shrinking' % datafn)

154

160

155

oldindexfn = indexfn + '.old'

161

oldindexfn = indexfn + '.old'

156

olddatafn = datafn + '.old'

162

olddatafn = datafn + '.old'

157

if os.path.exists(oldindexfn) or os.path.exists(olddatafn):

163

if os.path.exists(oldindexfn) or os.path.exists(olddatafn):

158

parser.error('one or both of\n'

164

parser.error('one or both of\n'

159

' %s\n'

165

' %s\n'

160

' %s\n'

166

' %s\n'

161

'exists from a previous run; please clean up before '

167

'exists from a previous run; please clean up before '

162

'running again'

168

'running again'

163

% (oldindexfn, olddatafn))

169

% (oldindexfn, olddatafn))

164

170

165

write('shrinking %s\n' % indexfn)

171

write('shrinking %s\n' % indexfn)

166

prefix = os.path.basename(indexfn)[:-1]

172

prefix = os.path.basename(indexfn)[:-1]

167

(tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),

173

(tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),

168

prefix=prefix,

174

prefix=prefix,

169

suffix='.i')

175

suffix='.i')

170

tmpdatafn = tmpindexfn[:-2] + '.d'

176

tmpdatafn = tmpindexfn[:-2] + '.d'

171

os.close(tmpfd)

177

os.close(tmpfd)

172

178

173

r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)

179

r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)

174

r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)

180

r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)

175

181

176

# Don't use repo.transaction(), because then things get hairy with

182

# Don't use repo.transaction(), because then things get hairy with

177

# paths: some need to be relative to .hg, and some need to be

183

# paths: some need to be relative to .hg, and some need to be

178

# absolute. Doing it this way keeps things simple: everything is an

184

# absolute. Doing it this way keeps things simple: everything is an

179

# absolute path.

185

# absolute path.

180

lock = repo.lock(wait=False)

186

lock = repo.lock(wait=False)

181

tr = transaction.transaction(sys.stderr.write,

187

tr = transaction.transaction(sys.stderr.write,

182

open,

188

open,

183

repo.sjoin('journal'))

189

repo.sjoin('journal'))

184

190

185

try:

191

try:

186

try:

192

try:

187

order = toposort(r1)

193

order = toposort(r1)

188

writerevs(r1, r2, order, tr)

194

writerevs(r1, r2, order, tr)

189

report(datafn, tmpdatafn)

195

report(datafn, tmpdatafn)

190

tr.close()

196

tr.close()

191

except:

197

except:

192

# Abort transaction first, so we truncate the files before

198

# Abort transaction first, so we truncate the files before

193

# deleting them.

199

# deleting them.

194

tr.abort()

200

tr.abort()

195

if os.path.exists(tmpindexfn):

201

if os.path.exists(tmpindexfn):

196

os.unlink(tmpindexfn)

202

os.unlink(tmpindexfn)

197

if os.path.exists(tmpdatafn):

203

if os.path.exists(tmpdatafn):

198

os.unlink(tmpdatafn)

204

os.unlink(tmpdatafn)

199

raise

205

raise

200

finally:

206

finally:

201

lock.release()

207

lock.release()

202

208

203

os.link(indexfn, oldindexfn)

209

os.link(indexfn, oldindexfn)

204

os.link(datafn, olddatafn)

210

os.link(datafn, olddatafn)

205

os.rename(tmpindexfn, indexfn)

211

os.rename(tmpindexfn, indexfn)

206

os.rename(tmpdatafn, datafn)

212

os.rename(tmpdatafn, datafn)

207

write('note: old revlog saved in:\n'

213

write('note: old revlog saved in:\n'

208

' %s\n'

214

' %s\n'

209

' %s\n'

215

' %s\n'

210

'(You can delete those files when you are satisfied that your\n'

216

'(You can delete those files when you are satisfied that your\n'

211

'repository is still sane. '

217

'repository is still sane. '

212

'Running \'hg verify\' is strongly recommended.)\n'

218

'Running \'hg verify\' is strongly recommended.)\n'

213

% (oldindexfn, olddatafn))

219

% (oldindexfn, olddatafn))

214

220

215

try:

221

try:

216

main()

222

main()

217

except KeyboardInterrupt:

223

except KeyboardInterrupt:

218

sys.exit("interrupted")

224

sys.exit("interrupted")

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #!/usr/bin/env python
             """\
             Reorder a revlog (by default the the manifest file in the current
             repository) to save space.  Specifically, this topologically sorts the
             revisions in the revlog so that revisions on the same branch are adjacent
             as much as possible.  This is a workaround for the fact that Mercurial
             computes deltas relative to the previous revision rather than relative to a
             parent revision.  This is *not* safe to run on a changelog.
             """
             # Originally written by Benoit Boissinot <benoit.boissinot at ens-lyon.org>
             # as a patch to rewrite-log.  Cleaned up, refactored, documented, and
             # renamed by Greg Ward <greg at gerg.ca>.
             # XXX would be nice to have a way to verify the repository after shrinking,
             # e.g. by comparing "before" and "after" states of random changesets
             # (maybe: export before, shrink, export after, diff).
             import sys, os, tempfile
             import optparse
             from mercurial import ui as ui_, hg, revlog, transaction, node, util
+            from mercurial import changegroup
             def toposort(rl):
                 write = sys.stdout.write
                 children = {}
                 root = []
                 # build children and roots
                 write('reading %d revs ' % len(rl))
                 try:
                     for i in rl:
                         children[i] = []
                         parents = [p for p in rl.parentrevs(i) if p != node.nullrev]
                         # in case of duplicate parents
                         if len(parents) == 2 and parents[0] == parents[1]:
                             del parents[1]
                         for p in parents:
                             assert p in children
                             children[p].append(i)
                         if len(parents) == 0:
                             root.append(i)
                         if i % 1000 == 0:
                             write('.')
                 finally:
                     write('\n')
                 # XXX this is a reimplementation of the 'branchsort' topo sort
                 # algorithm in hgext.convert.convcmd... would be nice not to duplicate
                 # the algorithm
                 write('sorting ...')
                 visit = root
                 ret = []
                 while visit:
                     i = visit.pop(0)
                     ret.append(i)
                     if i not in children:
                         # This only happens if some node's p1 == p2, which can
                         # happen in the manifest in certain circumstances.
                         continue
                     next = []
                     for c in children.pop(i):
                         parents_unseen = [p for p in rl.parentrevs(c)
                                           if p != node.nullrev and p in children]
                         if len(parents_unseen) == 0:
                             next.append(c)
                     visit = next + visit
                 write('\n')
                 return ret
             def writerevs(r1, r2, order, tr):
                 write = sys.stdout.write
                 write('writing %d revs ' % len(order))
+                count = [0]
+                def progress(*args):
+                    if count[0] % 1000 == 0:
+                        write('.')
+                    count[0] += 1
+                order = [r1.node(r) for r in order]
+                # this is a bit ugly, but it works
+                lookup = lambda x: "%020d" % r1.linkrev(r1.rev(x))
+                unlookup = lambda x: int(x, 10)
                 try:
-                    count = 0
+                    group = util.chunkbuffer(r1.group(order, lookup, progress))
-                    for rev in order:
+                    chunkiter = changegroup.chunkiter(group)
-                        n = r1.node(rev)
+                    r2.addgroup(chunkiter, unlookup, tr)
-                        p1, p2 = r1.parents(n)
-                        l = r1.linkrev(rev)
-                        t = r1.revision(n)
-                        n2 = r2.addrevision(t, tr, l, p1, p2)
-                        if count % 1000 == 0:
-                            write('.')
-                        count += 1
                 finally:
                     write('\n')
             def report(olddatafn, newdatafn):
                 oldsize = float(os.stat(olddatafn).st_size)
                 newsize = float(os.stat(newdatafn).st_size)
                 # argh: have to pass an int to %d, because a float >= 2^32
                 # blows up under Python 2.5 or earlier
                 sys.stdout.write('old file size: %12d bytes (%6.1f MiB)\n'
                                  % (int(oldsize), oldsize/1024/1024))
                 sys.stdout.write('new file size: %12d bytes (%6.1f MiB)\n'
                                  % (int(newsize), newsize/1024/1024))
                 shrink_percent = (oldsize - newsize) / oldsize * 100
                 shrink_factor = oldsize / newsize
                 sys.stdout.write('shrinkage: %.1f%% (%.1fx)\n'
                                  % (shrink_percent, shrink_factor))
             def main():
                 # Unbuffer stdout for nice progress output.
                 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
                 write = sys.stdout.write
                 parser = optparse.OptionParser(description=__doc__)
                 parser.add_option('-R', '--repository',
                                   default=os.path.curdir,
                                   metavar='REPO',
                                   help='repository root directory [default: current dir]')
                 parser.add_option('--revlog',
                                   metavar='FILE',
                                   help='shrink FILE [default: REPO/hg/store/00manifest.i]')
                 (options, args) = parser.parse_args()
                 if args:
                     parser.error('too many arguments')
                 # Open the specified repository.
                 ui = ui_.ui()
                 repo = hg.repository(ui, options.repository)
                 if not repo.local():
                     parser.error('not a local repository: %s' % options.repository)
                 if options.revlog is None:
                     indexfn = repo.sjoin('00manifest.i')
                 else:
                     if not options.revlog.endswith('.i'):
                         parser.error('--revlog option must specify the revlog index file '
                                      '(*.i), not %s' % options.revlog)
                     indexfn = os.path.realpath(options.revlog)
                     store = repo.sjoin('')
                     if not indexfn.startswith(store):
                         parser.error('--revlog option must specify a revlog in %s, not %s'
                                      % (store, indexfn))
                 datafn = indexfn[:-2] + '.d'
                 if not os.path.exists(indexfn):
                     parser.error('no such file: %s' % indexfn)
                 if '00changelog' in indexfn:
                     parser.error('shrinking the changelog will corrupt your repository')
                 if not os.path.exists(datafn):
                     # This is just a lazy shortcut because I can't be bothered to
                     # handle all the special cases that entail from no .d file.
                     parser.error('%s does not exist: revlog not big enough '
                                  'to be worth shrinking' % datafn)
                 oldindexfn = indexfn + '.old'
                 olddatafn = datafn + '.old'
                 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
                     parser.error('one or both of\n'
                                  '  %s\n'
                                  '  %s\n'
                                  'exists from a previous run; please clean up before '
                                  'running again'
                                  % (oldindexfn, olddatafn))
                 write('shrinking %s\n' % indexfn)
                 prefix = os.path.basename(indexfn)[:-1]
                 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
                                                        prefix=prefix,
                                                        suffix='.i')
                 tmpdatafn = tmpindexfn[:-2] + '.d'
                 os.close(tmpfd)
                 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
                 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
                 # Don't use repo.transaction(), because then things get hairy with
                 # paths: some need to be relative to .hg, and some need to be
                 # absolute.  Doing it this way keeps things simple: everything is an
                 # absolute path.
                 lock = repo.lock(wait=False)
                 tr = transaction.transaction(sys.stderr.write,
                                              open,
                                              repo.sjoin('journal'))
                 try:
                     try:
                         order = toposort(r1)
                         writerevs(r1, r2, order, tr)
                         report(datafn, tmpdatafn)
                         tr.close()
                     except:
                         # Abort transaction first, so we truncate the files before
                         # deleting them.
                         tr.abort()
                         if os.path.exists(tmpindexfn):
                             os.unlink(tmpindexfn)
                         if os.path.exists(tmpdatafn):
                             os.unlink(tmpdatafn)
                         raise
                 finally:
                     lock.release()
                 os.link(indexfn, oldindexfn)
                 os.link(datafn, olddatafn)
                 os.rename(tmpindexfn, indexfn)
                 os.rename(tmpdatafn, datafn)
                 write('note: old revlog saved in:\n'
                       '  %s\n'
                       '  %s\n'
                       '(You can delete those files when you are satisfied that your\n'
                       'repository is still sane.  '
                       'Running \'hg verify\' is strongly recommended.)\n'
                       % (oldindexfn, olddatafn))
             try:
                 main()
             except KeyboardInterrupt:
                 sys.exit("interrupted")