##// END OF EJS Templates
contrib/synthrepo: walk a repo's directory structure during analysis...
Mike Edgar -
r22709:889789a2 default
parent child Browse files
Show More
@@ -23,6 +23,7 Properties that are analyzed and synthes
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26 - Number of files in each directory
26
27
27 A few obvious properties that are not currently handled realistically:
28 A few obvious properties that are not currently handled realistically:
28
29
@@ -81,21 +82,25 def parsegitdiff(lines):
81 yield filename, mar, lineadd, lineremove, binary
82 yield filename, mar, lineadd, lineremove, binary
82
83
83 @command('analyze',
84 @command('analyze',
84 [('o', 'output', [], _('write output to given file'), _('FILE')),
85 [('o', 'output', '', _('write output to given file'), _('FILE')),
85 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
86 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
86 _('hg analyze'))
87 _('hg analyze'), optionalrepo=True)
87 def analyze(ui, repo, *revs, **opts):
88 def analyze(ui, repo, *revs, **opts):
88 '''create a simple model of a repository to use for later synthesis
89 '''create a simple model of a repository to use for later synthesis
89
90
90 This command examines every changeset in the given range (or all
91 This command examines every changeset in the given range (or all
91 of history if none are specified) and creates a simple statistical
92 of history if none are specified) and creates a simple statistical
92 model of the history of the repository.
93 model of the history of the repository. It also measures the directory
94 structure of the repository as checked out.
93
95
94 The model is written out to a JSON file, and can be used by
96 The model is written out to a JSON file, and can be used by
95 :hg:`synthesize` to create or augment a repository with synthetic
97 :hg:`synthesize` to create or augment a repository with synthetic
96 commits that have a structure that is statistically similar to the
98 commits that have a structure that is statistically similar to the
97 analyzed repository.
99 analyzed repository.
98 '''
100 '''
101 root = repo.root
102 if not root.endswith(os.path.sep):
103 root += os.path.sep
99
104
100 revs = list(revs)
105 revs = list(revs)
101 revs.extend(opts['rev'])
106 revs.extend(opts['rev'])
@@ -104,15 +109,24 def analyze(ui, repo, *revs, **opts):
104
109
105 output = opts['output']
110 output = opts['output']
106 if not output:
111 if not output:
107 output = os.path.basename(repo.root) + '.json'
112 output = os.path.basename(root) + '.json'
108
113
109 if output == '-':
114 if output == '-':
110 fp = sys.stdout
115 fp = sys.stdout
111 else:
116 else:
112 fp = open(output, 'w')
117 fp = open(output, 'w')
113
118
114 revs = scmutil.revrange(repo, revs)
119 # Always obtain file counts of each directory in the given root directory.
115 revs.sort()
120 def onerror(e):
121 ui.warn(_('error walking directory structure: %s\n') % e)
122
123 dirs = {}
124 rootprefixlen = len(root)
125 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
126 dirpathfromroot = dirpath[rootprefixlen:]
127 dirs[dirpathfromroot] = len(filenames)
128 if '.hg' in dirnames:
129 dirnames.remove('.hg')
116
130
117 lineschanged = zerodict()
131 lineschanged = zerodict()
118 children = zerodict()
132 children = zerodict()
@@ -128,6 +142,11 def analyze(ui, repo, *revs, **opts):
128 dirsadded = zerodict()
142 dirsadded = zerodict()
129 tzoffset = zerodict()
143 tzoffset = zerodict()
130
144
145 # If a mercurial repo is available, also model the commit history.
146 if repo:
147 revs = scmutil.revrange(repo, revs)
148 revs.sort()
149
131 progress = ui.progress
150 progress = ui.progress
132 _analyzing = _('analyzing')
151 _analyzing = _('analyzing')
133 _changesets = _('changesets')
152 _changesets = _('changesets')
@@ -150,16 +169,18 def analyze(ui, repo, *revs, **opts):
150 else:
169 else:
151 lastctx = repo[rev - 1]
170 lastctx = repo[rev - 1]
152 if lastctx.rev() != nullrev:
171 if lastctx.rev() != nullrev:
153 interarrival[roundto(ctx.date()[0] - lastctx.date()[0], 300)] += 1
172 timedelta = ctx.date()[0] - lastctx.date()[0]
173 interarrival[roundto(timedelta, 300)] += 1
154 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
174 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
155 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
175 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
156 for filename, mar, lineadd, lineremove, binary in parsegitdiff(diff):
176 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
157 if binary:
177 if isbin:
158 continue
178 continue
159 added = sum(lineadd.itervalues(), 0)
179 added = sum(lineadd.itervalues(), 0)
160 if mar == 'm':
180 if mar == 'm':
161 if added and lineremove:
181 if added and lineremove:
162 lineschanged[roundto(added, 5), roundto(lineremove, 5)] += 1
182 lineschanged[roundto(added, 5),
183 roundto(lineremove, 5)] += 1
163 filechanges += 1
184 filechanges += 1
164 elif mar == 'a':
185 elif mar == 'a':
165 fileadds += 1
186 fileadds += 1
@@ -189,6 +210,7 def analyze(ui, repo, *revs, **opts):
189 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
210 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
190
211
191 json.dump({'revs': len(revs),
212 json.dump({'revs': len(revs),
213 'initdirs': pronk(dirs),
192 'lineschanged': pronk(lineschanged),
214 'lineschanged': pronk(lineschanged),
193 'children': pronk(invchildren),
215 'children': pronk(invchildren),
194 'fileschanged': pronk(fileschanged),
216 'fileschanged': pronk(fileschanged),
General Comments 0
You need to be logged in to leave comments. Login now