##// END OF EJS Templates
contrib/synthrepo: walk a repo's directory structure during analysis...
Mike Edgar -
r22709:889789a2 default
parent child Browse files
Show More
@@ -23,6 +23,7 b' Properties that are analyzed and synthes'
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26 - Number of files in each directory
26
27
27 A few obvious properties that are not currently handled realistically:
28 A few obvious properties that are not currently handled realistically:
28
29
@@ -81,21 +82,25 b' def parsegitdiff(lines):'
81 yield filename, mar, lineadd, lineremove, binary
82 yield filename, mar, lineadd, lineremove, binary
82
83
83 @command('analyze',
84 @command('analyze',
84 [('o', 'output', [], _('write output to given file'), _('FILE')),
85 [('o', 'output', '', _('write output to given file'), _('FILE')),
85 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
86 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
86 _('hg analyze'))
87 _('hg analyze'), optionalrepo=True)
87 def analyze(ui, repo, *revs, **opts):
88 def analyze(ui, repo, *revs, **opts):
88 '''create a simple model of a repository to use for later synthesis
89 '''create a simple model of a repository to use for later synthesis
89
90
90 This command examines every changeset in the given range (or all
91 This command examines every changeset in the given range (or all
91 of history if none are specified) and creates a simple statistical
92 of history if none are specified) and creates a simple statistical
92 model of the history of the repository.
93 model of the history of the repository. It also measures the directory
94 structure of the repository as checked out.
93
95
94 The model is written out to a JSON file, and can be used by
96 The model is written out to a JSON file, and can be used by
95 :hg:`synthesize` to create or augment a repository with synthetic
97 :hg:`synthesize` to create or augment a repository with synthetic
96 commits that have a structure that is statistically similar to the
98 commits that have a structure that is statistically similar to the
97 analyzed repository.
99 analyzed repository.
98 '''
100 '''
101 root = repo.root
102 if not root.endswith(os.path.sep):
103 root += os.path.sep
99
104
100 revs = list(revs)
105 revs = list(revs)
101 revs.extend(opts['rev'])
106 revs.extend(opts['rev'])
@@ -104,15 +109,24 b' def analyze(ui, repo, *revs, **opts):'
104
109
105 output = opts['output']
110 output = opts['output']
106 if not output:
111 if not output:
107 output = os.path.basename(repo.root) + '.json'
112 output = os.path.basename(root) + '.json'
108
113
109 if output == '-':
114 if output == '-':
110 fp = sys.stdout
115 fp = sys.stdout
111 else:
116 else:
112 fp = open(output, 'w')
117 fp = open(output, 'w')
113
118
114 revs = scmutil.revrange(repo, revs)
119 # Always obtain file counts of each directory in the given root directory.
115 revs.sort()
120 def onerror(e):
121 ui.warn(_('error walking directory structure: %s\n') % e)
122
123 dirs = {}
124 rootprefixlen = len(root)
125 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
126 dirpathfromroot = dirpath[rootprefixlen:]
127 dirs[dirpathfromroot] = len(filenames)
128 if '.hg' in dirnames:
129 dirnames.remove('.hg')
116
130
117 lineschanged = zerodict()
131 lineschanged = zerodict()
118 children = zerodict()
132 children = zerodict()
@@ -128,54 +142,61 b' def analyze(ui, repo, *revs, **opts):'
128 dirsadded = zerodict()
142 dirsadded = zerodict()
129 tzoffset = zerodict()
143 tzoffset = zerodict()
130
144
131 progress = ui.progress
145 # If a mercurial repo is available, also model the commit history.
132 _analyzing = _('analyzing')
146 if repo:
133 _changesets = _('changesets')
147 revs = scmutil.revrange(repo, revs)
134 _total = len(revs)
148 revs.sort()
149
150 progress = ui.progress
151 _analyzing = _('analyzing')
152 _changesets = _('changesets')
153 _total = len(revs)
135
154
136 for i, rev in enumerate(revs):
155 for i, rev in enumerate(revs):
137 progress(_analyzing, i, unit=_changesets, total=_total)
156 progress(_analyzing, i, unit=_changesets, total=_total)
138 ctx = repo[rev]
157 ctx = repo[rev]
139 pl = ctx.parents()
158 pl = ctx.parents()
140 pctx = pl[0]
159 pctx = pl[0]
141 prev = pctx.rev()
160 prev = pctx.rev()
142 children[prev] += 1
161 children[prev] += 1
143 p1distance[rev - prev] += 1
162 p1distance[rev - prev] += 1
144 parents[len(pl)] += 1
163 parents[len(pl)] += 1
145 tzoffset[ctx.date()[1]] += 1
164 tzoffset[ctx.date()[1]] += 1
146 if len(pl) > 1:
165 if len(pl) > 1:
147 p2distance[rev - pl[1].rev()] += 1
166 p2distance[rev - pl[1].rev()] += 1
148 if prev == rev - 1:
167 if prev == rev - 1:
149 lastctx = pctx
168 lastctx = pctx
150 else:
169 else:
151 lastctx = repo[rev - 1]
170 lastctx = repo[rev - 1]
152 if lastctx.rev() != nullrev:
171 if lastctx.rev() != nullrev:
153 interarrival[roundto(ctx.date()[0] - lastctx.date()[0], 300)] += 1
172 timedelta = ctx.date()[0] - lastctx.date()[0]
154 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
173 interarrival[roundto(timedelta, 300)] += 1
155 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
174 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
156 for filename, mar, lineadd, lineremove, binary in parsegitdiff(diff):
175 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
157 if binary:
176 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
158 continue
177 if isbin:
159 added = sum(lineadd.itervalues(), 0)
178 continue
160 if mar == 'm':
179 added = sum(lineadd.itervalues(), 0)
161 if added and lineremove:
180 if mar == 'm':
162 lineschanged[roundto(added, 5), roundto(lineremove, 5)] += 1
181 if added and lineremove:
163 filechanges += 1
182 lineschanged[roundto(added, 5),
164 elif mar == 'a':
183 roundto(lineremove, 5)] += 1
165 fileadds += 1
184 filechanges += 1
166 if '/' in filename:
185 elif mar == 'a':
167 filedir = filename.rsplit('/', 1)[0]
186 fileadds += 1
168 if filedir not in pctx.dirs():
187 if '/' in filename:
169 diradds += 1
188 filedir = filename.rsplit('/', 1)[0]
170 linesinfilesadded[roundto(added, 5)] += 1
189 if filedir not in pctx.dirs():
171 elif mar == 'r':
190 diradds += 1
172 fileremoves += 1
191 linesinfilesadded[roundto(added, 5)] += 1
173 for length, count in lineadd.iteritems():
192 elif mar == 'r':
174 linelengths[length] += count
193 fileremoves += 1
175 fileschanged[filechanges] += 1
194 for length, count in lineadd.iteritems():
176 filesadded[fileadds] += 1
195 linelengths[length] += count
177 dirsadded[diradds] += 1
196 fileschanged[filechanges] += 1
178 filesremoved[fileremoves] += 1
197 filesadded[fileadds] += 1
198 dirsadded[diradds] += 1
199 filesremoved[fileremoves] += 1
179
200
180 invchildren = zerodict()
201 invchildren = zerodict()
181
202
@@ -189,6 +210,7 b' def analyze(ui, repo, *revs, **opts):'
189 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
210 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
190
211
191 json.dump({'revs': len(revs),
212 json.dump({'revs': len(revs),
213 'initdirs': pronk(dirs),
192 'lineschanged': pronk(lineschanged),
214 'lineschanged': pronk(lineschanged),
193 'children': pronk(invchildren),
215 'children': pronk(invchildren),
194 'fileschanged': pronk(fileschanged),
216 'fileschanged': pronk(fileschanged),
General Comments 0
You need to be logged in to leave comments. Login now