upstream/mercurial-mirror Commit - r22709:889789a2

23

- Probability of a commit being a merge

23

- Probability of a commit being a merge

24

- Probability of a newly added file being added to a new directory

24

- Probability of a newly added file being added to a new directory

25

- Interarrival time, and time zone, of commits

25

- Interarrival time, and time zone, of commits

26

- Number of files in each directory

26

27

A few obvious properties that are not currently handled realistically:

28

A few obvious properties that are not currently handled realistically:

28

29

81

yield filename, mar, lineadd, lineremove, binary

82

yield filename, mar, lineadd, lineremove, binary

82

83

@command('analyze',

84

@command('analyze',

84

[('o', 'output', [], _('write output to given file'), _('FILE')),

85

[('o', 'output', '', _('write output to given file'), _('FILE')),

85

('r', 'rev', [], _('analyze specified revisions'), _('REV'))],

86

('r', 'rev', [], _('analyze specified revisions'), _('REV'))],

86

_('hg analyze'))

87

_('hg analyze'), optionalrepo=True)

87

def analyze(ui, repo, *revs, **opts):

88

def analyze(ui, repo, *revs, **opts):

88

'''create a simple model of a repository to use for later synthesis

89

'''create a simple model of a repository to use for later synthesis

89

90

This command examines every changeset in the given range (or all

91

This command examines every changeset in the given range (or all

91

of history if none are specified) and creates a simple statistical

92

of history if none are specified) and creates a simple statistical

92

model of the history of the repository.

93

model of the history of the repository. It also measures the directory

94

structure of the repository as checked out.

93

95

94

The model is written out to a JSON file, and can be used by

96

The model is written out to a JSON file, and can be used by

95

:hg:`synthesize` to create or augment a repository with synthetic

97

:hg:`synthesize` to create or augment a repository with synthetic

96

commits that have a structure that is statistically similar to the

98

commits that have a structure that is statistically similar to the

97

analyzed repository.

99

analyzed repository.

98

'''

100

'''

101

root = repo.root

102

if not root.endswith(os.path.sep):

103

root += os.path.sep

99

104

100

revs = list(revs)

105

revs = list(revs)

101

revs.extend(opts['rev'])

106

revs.extend(opts['rev'])

104

109

105

output = opts['output']

110

output = opts['output']

106

if not output:

111

if not output:

107

output = os.path.basename(~~repo~~.root) + '.json'

112

output = os.path.basename(root) + '.json'

108

113

109

if output == '-':

114

if output == '-':

110

fp = sys.stdout

115

fp = sys.stdout

111

else:

116

else:

112

fp = open(output, 'w')

117

fp = open(output, 'w')

113

118

114

revs = scmutil.revrange(repo, revs)

119

# Always obtain file counts of each directory in the given root directory.

115

revs.sort()

120

def onerror(e):

121

ui.warn(_('error walking directory structure: %s\n') % e)

122

123

dirs = {}

124

rootprefixlen = len(root)

125

for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):

126

dirpathfromroot = dirpath[rootprefixlen:]

127

dirs[dirpathfromroot] = len(filenames)

128

if '.hg' in dirnames:

129

dirnames.remove('.hg')

116

130

117

lineschanged = zerodict()

131

lineschanged = zerodict()

118

children = zerodict()

132

children = zerodict()

128

dirsadded = zerodict()

142

dirsadded = zerodict()

129

tzoffset = zerodict()

143

tzoffset = zerodict()

130

144

145

# If a mercurial repo is available, also model the commit history.

146

if repo:

147

revs = scmutil.revrange(repo, revs)

148

revs.sort()

149

131

progress = ui.progress

150

progress = ui.progress

132

_analyzing = _('analyzing')

151

_analyzing = _('analyzing')

133

_changesets = _('changesets')

152

_changesets = _('changesets')

150

else:

169

else:

151

lastctx = repo[rev - 1]

170

lastctx = repo[rev - 1]

152

if lastctx.rev() != nullrev:

171

if lastctx.rev() != nullrev:

153

~~interarrival~~[~~roundto~~(ctx.date()[0] - lastctx.date()[0], ~~300~~)] += 1

172

timedelta = ctx.date()[0] - lastctx.date()[0]

173

interarrival[roundto(timedelta, 300)] += 1

154

diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])

174

diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])

155

fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0

175

fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0

156

for filename, mar, lineadd, lineremove, bin~~ary~~ in parsegitdiff(diff):

176

for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):

157

if bin~~ary~~:

177

if isbin:

158

continue

178

continue

159

added = sum(lineadd.itervalues(), 0)

179

added = sum(lineadd.itervalues(), 0)

160

if mar == 'm':

180

if mar == 'm':

161

if added and lineremove:

181

if added and lineremove:

162

lineschanged[roundto(added, 5), ~~roundto~~(~~lineremove~~, 5)] += 1

182

lineschanged[roundto(added, 5),

183

roundto(lineremove, 5)] += 1

163

filechanges += 1

184

filechanges += 1

164

elif mar == 'a':

185

elif mar == 'a':

165

fileadds += 1

186

fileadds += 1

189

return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)

210

return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)

190

211

191

json.dump({'revs': len(revs),

212

json.dump({'revs': len(revs),

213

'initdirs': pronk(dirs),

192

'lineschanged': pronk(lineschanged),

214

'lineschanged': pronk(lineschanged),

193

'children': pronk(invchildren),

215

'children': pronk(invchildren),

194

'fileschanged': pronk(fileschanged),

216

'fileschanged': pronk(fileschanged),

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             - Probability of a commit being a merge
             - Probability of a newly added file being added to a new directory
             - Interarrival time, and time zone, of commits
+            - Number of files in each directory
             A few obvious properties that are not currently handled realistically:
                     yield filename, mar, lineadd, lineremove, binary
             @command('analyze',
-                     [('o', 'output', [], _('write output to given file'), _('FILE')),
+                     [('o', 'output', '', _('write output to given file'), _('FILE')),
                       ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
-                     _('hg analyze'))
+                     _('hg analyze'), optionalrepo=True)
             def analyze(ui, repo, *revs, **opts):
                 '''create a simple model of a repository to use for later synthesis
                 This command examines every changeset in the given range (or all
                 of history if none are specified) and creates a simple statistical
-                model of the history of the repository.
+                model of the history of the repository. It also measures the directory
+                structure of the repository as checked out.
                 The model is written out to a JSON file, and can be used by
                 :hg:`synthesize` to create or augment a repository with synthetic
                 commits that have a structure that is statistically similar to the
                 analyzed repository.
                 '''
+                root = repo.root
+                if not root.endswith(os.path.sep):
+                    root += os.path.sep
                 revs = list(revs)
                 revs.extend(opts['rev'])
                 output = opts['output']
                 if not output:
-                    output = os.path.basename(repo.root) + '.json'
+                    output = os.path.basename(root) + '.json'
                 if output == '-':
                     fp = sys.stdout
                 else:
                     fp = open(output, 'w')
-                revs = scmutil.revrange(repo, revs)
+                # Always obtain file counts of each directory in the given root directory.
-                revs.sort()
+                def onerror(e):
+                    ui.warn(_('error walking directory structure: %s\n') % e)
+                dirs = {}
+                rootprefixlen = len(root)
+                for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
+                    dirpathfromroot = dirpath[rootprefixlen:]
+                    dirs[dirpathfromroot] = len(filenames)
+                    if '.hg' in dirnames:
+                        dirnames.remove('.hg')
                 lineschanged = zerodict()
                 children = zerodict()
                 dirsadded = zerodict()
                 tzoffset = zerodict()
+                # If a mercurial repo is available, also model the commit history.
+                if repo:
+                    revs = scmutil.revrange(repo, revs)
+                    revs.sort()
                     progress = ui.progress
                     _analyzing = _('analyzing')
                     _changesets = _('changesets')
                         else:
                             lastctx = repo[rev - 1]
                         if lastctx.rev() != nullrev:
-                        interarrival[roundto(ctx.date()[0] - lastctx.date()[0], 300)] += 1
+                            timedelta = ctx.date()[0] - lastctx.date()[0]
+                            interarrival[roundto(timedelta, 300)] += 1
                         diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
                         fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
-                    for filename, mar, lineadd, lineremove, binary in parsegitdiff(diff):
+                        for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
-                        if binary:
+                            if isbin:
                                 continue
                             added = sum(lineadd.itervalues(), 0)
                             if mar == 'm':
                                 if added and lineremove:
-                                lineschanged[roundto(added, 5), roundto(lineremove, 5)] += 1
+                                    lineschanged[roundto(added, 5),
+                                                 roundto(lineremove, 5)] += 1
                                     filechanges += 1
                             elif mar == 'a':
                                 fileadds += 1
                     return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
                 json.dump({'revs': len(revs),
+                           'initdirs': pronk(dirs),
                            'lineschanged': pronk(lineschanged),
                            'children': pronk(invchildren),
                            'fileschanged': pronk(fileschanged),