##// END OF EJS Templates
synthrepo: use progress helper...
Martin von Zweigbergk -
r38427:6540333a default
parent child Browse files
Show More
@@ -1,516 +1,514 b''
1 # synthrepo.py - repo synthesis
1 # synthrepo.py - repo synthesis
2 #
2 #
3 # Copyright 2012 Facebook
3 # Copyright 2012 Facebook
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''synthesize structurally interesting change history
8 '''synthesize structurally interesting change history
9
9
10 This extension is useful for creating a repository with properties
10 This extension is useful for creating a repository with properties
11 that are statistically similar to an existing repository. During
11 that are statistically similar to an existing repository. During
12 analysis, a simple probability table is constructed from the history
12 analysis, a simple probability table is constructed from the history
13 of an existing repository. During synthesis, these properties are
13 of an existing repository. During synthesis, these properties are
14 reconstructed.
14 reconstructed.
15
15
16 Properties that are analyzed and synthesized include the following:
16 Properties that are analyzed and synthesized include the following:
17
17
18 - Lines added or removed when an existing file is modified
18 - Lines added or removed when an existing file is modified
19 - Number and sizes of files added
19 - Number and sizes of files added
20 - Number of files removed
20 - Number of files removed
21 - Line lengths
21 - Line lengths
22 - Topological distance to parent changeset(s)
22 - Topological distance to parent changeset(s)
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26 - Number of files in each directory
26 - Number of files in each directory
27
27
28 A few obvious properties that are not currently handled realistically:
28 A few obvious properties that are not currently handled realistically:
29
29
30 - Merges are treated as regular commits with two parents, which is not
30 - Merges are treated as regular commits with two parents, which is not
31 realistic
31 realistic
32 - Modifications are not treated as operations on hunks of lines, but
32 - Modifications are not treated as operations on hunks of lines, but
33 as insertions and deletions of randomly chosen single lines
33 as insertions and deletions of randomly chosen single lines
34 - Committer ID (always random)
34 - Committer ID (always random)
35 - Executability of files
35 - Executability of files
36 - Symlinks and binary files are ignored
36 - Symlinks and binary files are ignored
37 '''
37 '''
38
38
39 from __future__ import absolute_import
39 from __future__ import absolute_import
40 import bisect
40 import bisect
41 import collections
41 import collections
42 import itertools
42 import itertools
43 import json
43 import json
44 import os
44 import os
45 import random
45 import random
46 import sys
46 import sys
47 import time
47 import time
48
48
49 from mercurial.i18n import _
49 from mercurial.i18n import _
50 from mercurial.node import (
50 from mercurial.node import (
51 nullid,
51 nullid,
52 nullrev,
52 nullrev,
53 short,
53 short,
54 )
54 )
55 from mercurial import (
55 from mercurial import (
56 context,
56 context,
57 error,
57 error,
58 hg,
58 hg,
59 patch,
59 patch,
60 registrar,
60 registrar,
61 scmutil,
61 scmutil,
62 )
62 )
63 from mercurial.utils import dateutil
63 from mercurial.utils import dateutil
64
64
65 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
65 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
66 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
66 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
67 # be specifying the version(s) of Mercurial they are tested with, or
67 # be specifying the version(s) of Mercurial they are tested with, or
68 # leave the attribute unspecified.
68 # leave the attribute unspecified.
69 testedwith = 'ships-with-hg-core'
69 testedwith = 'ships-with-hg-core'
70
70
71 cmdtable = {}
71 cmdtable = {}
72 command = registrar.command(cmdtable)
72 command = registrar.command(cmdtable)
73
73
74 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
74 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
75
75
76 def zerodict():
76 def zerodict():
77 return collections.defaultdict(lambda: 0)
77 return collections.defaultdict(lambda: 0)
78
78
79 def roundto(x, k):
79 def roundto(x, k):
80 if x > k * 2:
80 if x > k * 2:
81 return int(round(x / float(k)) * k)
81 return int(round(x / float(k)) * k)
82 return int(round(x))
82 return int(round(x))
83
83
84 def parsegitdiff(lines):
84 def parsegitdiff(lines):
85 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
85 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
86 binary = False
86 binary = False
87 for line in lines:
87 for line in lines:
88 start = line[:6]
88 start = line[:6]
89 if start == 'diff -':
89 if start == 'diff -':
90 if filename:
90 if filename:
91 yield filename, mar, lineadd, lineremove, binary
91 yield filename, mar, lineadd, lineremove, binary
92 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
92 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
93 filename = patch.gitre.match(line).group(1)
93 filename = patch.gitre.match(line).group(1)
94 elif start in newfile:
94 elif start in newfile:
95 mar = 'a'
95 mar = 'a'
96 elif start == 'GIT bi':
96 elif start == 'GIT bi':
97 binary = True
97 binary = True
98 elif start == 'delete':
98 elif start == 'delete':
99 mar = 'r'
99 mar = 'r'
100 elif start:
100 elif start:
101 s = start[0]
101 s = start[0]
102 if s == '-' and not line.startswith('--- '):
102 if s == '-' and not line.startswith('--- '):
103 lineremove += 1
103 lineremove += 1
104 elif s == '+' and not line.startswith('+++ '):
104 elif s == '+' and not line.startswith('+++ '):
105 lineadd[roundto(len(line) - 1, 5)] += 1
105 lineadd[roundto(len(line) - 1, 5)] += 1
106 if filename:
106 if filename:
107 yield filename, mar, lineadd, lineremove, binary
107 yield filename, mar, lineadd, lineremove, binary
108
108
109 @command('analyze',
109 @command('analyze',
110 [('o', 'output', '', _('write output to given file'), _('FILE')),
110 [('o', 'output', '', _('write output to given file'), _('FILE')),
111 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
111 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
112 _('hg analyze'), optionalrepo=True)
112 _('hg analyze'), optionalrepo=True)
113 def analyze(ui, repo, *revs, **opts):
113 def analyze(ui, repo, *revs, **opts):
114 '''create a simple model of a repository to use for later synthesis
114 '''create a simple model of a repository to use for later synthesis
115
115
116 This command examines every changeset in the given range (or all
116 This command examines every changeset in the given range (or all
117 of history if none are specified) and creates a simple statistical
117 of history if none are specified) and creates a simple statistical
118 model of the history of the repository. It also measures the directory
118 model of the history of the repository. It also measures the directory
119 structure of the repository as checked out.
119 structure of the repository as checked out.
120
120
121 The model is written out to a JSON file, and can be used by
121 The model is written out to a JSON file, and can be used by
122 :hg:`synthesize` to create or augment a repository with synthetic
122 :hg:`synthesize` to create or augment a repository with synthetic
123 commits that have a structure that is statistically similar to the
123 commits that have a structure that is statistically similar to the
124 analyzed repository.
124 analyzed repository.
125 '''
125 '''
126 root = repo.root
126 root = repo.root
127 if not root.endswith(os.path.sep):
127 if not root.endswith(os.path.sep):
128 root += os.path.sep
128 root += os.path.sep
129
129
130 revs = list(revs)
130 revs = list(revs)
131 revs.extend(opts['rev'])
131 revs.extend(opts['rev'])
132 if not revs:
132 if not revs:
133 revs = [':']
133 revs = [':']
134
134
135 output = opts['output']
135 output = opts['output']
136 if not output:
136 if not output:
137 output = os.path.basename(root) + '.json'
137 output = os.path.basename(root) + '.json'
138
138
139 if output == '-':
139 if output == '-':
140 fp = sys.stdout
140 fp = sys.stdout
141 else:
141 else:
142 fp = open(output, 'w')
142 fp = open(output, 'w')
143
143
144 # Always obtain file counts of each directory in the given root directory.
144 # Always obtain file counts of each directory in the given root directory.
145 def onerror(e):
145 def onerror(e):
146 ui.warn(_('error walking directory structure: %s\n') % e)
146 ui.warn(_('error walking directory structure: %s\n') % e)
147
147
148 dirs = {}
148 dirs = {}
149 rootprefixlen = len(root)
149 rootprefixlen = len(root)
150 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
150 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
151 dirpathfromroot = dirpath[rootprefixlen:]
151 dirpathfromroot = dirpath[rootprefixlen:]
152 dirs[dirpathfromroot] = len(filenames)
152 dirs[dirpathfromroot] = len(filenames)
153 if '.hg' in dirnames:
153 if '.hg' in dirnames:
154 dirnames.remove('.hg')
154 dirnames.remove('.hg')
155
155
156 lineschanged = zerodict()
156 lineschanged = zerodict()
157 children = zerodict()
157 children = zerodict()
158 p1distance = zerodict()
158 p1distance = zerodict()
159 p2distance = zerodict()
159 p2distance = zerodict()
160 linesinfilesadded = zerodict()
160 linesinfilesadded = zerodict()
161 fileschanged = zerodict()
161 fileschanged = zerodict()
162 filesadded = zerodict()
162 filesadded = zerodict()
163 filesremoved = zerodict()
163 filesremoved = zerodict()
164 linelengths = zerodict()
164 linelengths = zerodict()
165 interarrival = zerodict()
165 interarrival = zerodict()
166 parents = zerodict()
166 parents = zerodict()
167 dirsadded = zerodict()
167 dirsadded = zerodict()
168 tzoffset = zerodict()
168 tzoffset = zerodict()
169
169
170 # If a mercurial repo is available, also model the commit history.
170 # If a mercurial repo is available, also model the commit history.
171 if repo:
171 if repo:
172 revs = scmutil.revrange(repo, revs)
172 revs = scmutil.revrange(repo, revs)
173 revs.sort()
173 revs.sort()
174
174
175 progress = ui.progress
175 progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),
176 _analyzing = _('analyzing')
176 total=len(revs))
177 _changesets = _('changesets')
178 _total = len(revs)
179
180 for i, rev in enumerate(revs):
177 for i, rev in enumerate(revs):
181 progress(_analyzing, i, unit=_changesets, total=_total)
178 progress.update(i)
182 ctx = repo[rev]
179 ctx = repo[rev]
183 pl = ctx.parents()
180 pl = ctx.parents()
184 pctx = pl[0]
181 pctx = pl[0]
185 prev = pctx.rev()
182 prev = pctx.rev()
186 children[prev] += 1
183 children[prev] += 1
187 p1distance[rev - prev] += 1
184 p1distance[rev - prev] += 1
188 parents[len(pl)] += 1
185 parents[len(pl)] += 1
189 tzoffset[ctx.date()[1]] += 1
186 tzoffset[ctx.date()[1]] += 1
190 if len(pl) > 1:
187 if len(pl) > 1:
191 p2distance[rev - pl[1].rev()] += 1
188 p2distance[rev - pl[1].rev()] += 1
192 if prev == rev - 1:
189 if prev == rev - 1:
193 lastctx = pctx
190 lastctx = pctx
194 else:
191 else:
195 lastctx = repo[rev - 1]
192 lastctx = repo[rev - 1]
196 if lastctx.rev() != nullrev:
193 if lastctx.rev() != nullrev:
197 timedelta = ctx.date()[0] - lastctx.date()[0]
194 timedelta = ctx.date()[0] - lastctx.date()[0]
198 interarrival[roundto(timedelta, 300)] += 1
195 interarrival[roundto(timedelta, 300)] += 1
199 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
196 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
200 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
197 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
201 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
198 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
202 if isbin:
199 if isbin:
203 continue
200 continue
204 added = sum(lineadd.itervalues(), 0)
201 added = sum(lineadd.itervalues(), 0)
205 if mar == 'm':
202 if mar == 'm':
206 if added and lineremove:
203 if added and lineremove:
207 lineschanged[roundto(added, 5),
204 lineschanged[roundto(added, 5),
208 roundto(lineremove, 5)] += 1
205 roundto(lineremove, 5)] += 1
209 filechanges += 1
206 filechanges += 1
210 elif mar == 'a':
207 elif mar == 'a':
211 fileadds += 1
208 fileadds += 1
212 if '/' in filename:
209 if '/' in filename:
213 filedir = filename.rsplit('/', 1)[0]
210 filedir = filename.rsplit('/', 1)[0]
214 if filedir not in pctx.dirs():
211 if filedir not in pctx.dirs():
215 diradds += 1
212 diradds += 1
216 linesinfilesadded[roundto(added, 5)] += 1
213 linesinfilesadded[roundto(added, 5)] += 1
217 elif mar == 'r':
214 elif mar == 'r':
218 fileremoves += 1
215 fileremoves += 1
219 for length, count in lineadd.iteritems():
216 for length, count in lineadd.iteritems():
220 linelengths[length] += count
217 linelengths[length] += count
221 fileschanged[filechanges] += 1
218 fileschanged[filechanges] += 1
222 filesadded[fileadds] += 1
219 filesadded[fileadds] += 1
223 dirsadded[diradds] += 1
220 dirsadded[diradds] += 1
224 filesremoved[fileremoves] += 1
221 filesremoved[fileremoves] += 1
225
222
226 invchildren = zerodict()
223 invchildren = zerodict()
227
224
228 for rev, count in children.iteritems():
225 for rev, count in children.iteritems():
229 invchildren[count] += 1
226 invchildren[count] += 1
230
227
231 if output != '-':
228 if output != '-':
232 ui.status(_('writing output to %s\n') % output)
229 ui.status(_('writing output to %s\n') % output)
233
230
234 def pronk(d):
231 def pronk(d):
235 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
232 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
236
233
237 json.dump({'revs': len(revs),
234 json.dump({'revs': len(revs),
238 'initdirs': pronk(dirs),
235 'initdirs': pronk(dirs),
239 'lineschanged': pronk(lineschanged),
236 'lineschanged': pronk(lineschanged),
240 'children': pronk(invchildren),
237 'children': pronk(invchildren),
241 'fileschanged': pronk(fileschanged),
238 'fileschanged': pronk(fileschanged),
242 'filesadded': pronk(filesadded),
239 'filesadded': pronk(filesadded),
243 'linesinfilesadded': pronk(linesinfilesadded),
240 'linesinfilesadded': pronk(linesinfilesadded),
244 'dirsadded': pronk(dirsadded),
241 'dirsadded': pronk(dirsadded),
245 'filesremoved': pronk(filesremoved),
242 'filesremoved': pronk(filesremoved),
246 'linelengths': pronk(linelengths),
243 'linelengths': pronk(linelengths),
247 'parents': pronk(parents),
244 'parents': pronk(parents),
248 'p1distance': pronk(p1distance),
245 'p1distance': pronk(p1distance),
249 'p2distance': pronk(p2distance),
246 'p2distance': pronk(p2distance),
250 'interarrival': pronk(interarrival),
247 'interarrival': pronk(interarrival),
251 'tzoffset': pronk(tzoffset),
248 'tzoffset': pronk(tzoffset),
252 },
249 },
253 fp)
250 fp)
254 fp.close()
251 fp.close()
255
252
256 @command('synthesize',
253 @command('synthesize',
257 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
254 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
258 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
255 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
259 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
256 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
260 _('hg synthesize [OPTION].. DESCFILE'))
257 _('hg synthesize [OPTION].. DESCFILE'))
261 def synthesize(ui, repo, descpath, **opts):
258 def synthesize(ui, repo, descpath, **opts):
262 '''synthesize commits based on a model of an existing repository
259 '''synthesize commits based on a model of an existing repository
263
260
264 The model must have been generated by :hg:`analyze`. Commits will
261 The model must have been generated by :hg:`analyze`. Commits will
265 be generated randomly according to the probabilities described in
262 be generated randomly according to the probabilities described in
266 the model. If --initfiles is set, the repository will be seeded with
263 the model. If --initfiles is set, the repository will be seeded with
267 the given number files following the modeled repository's directory
264 the given number files following the modeled repository's directory
268 structure.
265 structure.
269
266
270 When synthesizing new content, commit descriptions, and user
267 When synthesizing new content, commit descriptions, and user
271 names, words will be chosen randomly from a dictionary that is
268 names, words will be chosen randomly from a dictionary that is
272 presumed to contain one word per line. Use --dict to specify the
269 presumed to contain one word per line. Use --dict to specify the
273 path to an alternate dictionary to use.
270 path to an alternate dictionary to use.
274 '''
271 '''
275 try:
272 try:
276 fp = hg.openpath(ui, descpath)
273 fp = hg.openpath(ui, descpath)
277 except Exception as err:
274 except Exception as err:
278 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
275 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
279 desc = json.load(fp)
276 desc = json.load(fp)
280 fp.close()
277 fp.close()
281
278
282 def cdf(l):
279 def cdf(l):
283 if not l:
280 if not l:
284 return [], []
281 return [], []
285 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
282 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
286 t = float(sum(probs, 0))
283 t = float(sum(probs, 0))
287 s, cdfs = 0, []
284 s, cdfs = 0, []
288 for v in probs:
285 for v in probs:
289 s += v
286 s += v
290 cdfs.append(s / t)
287 cdfs.append(s / t)
291 return vals, cdfs
288 return vals, cdfs
292
289
293 lineschanged = cdf(desc['lineschanged'])
290 lineschanged = cdf(desc['lineschanged'])
294 fileschanged = cdf(desc['fileschanged'])
291 fileschanged = cdf(desc['fileschanged'])
295 filesadded = cdf(desc['filesadded'])
292 filesadded = cdf(desc['filesadded'])
296 dirsadded = cdf(desc['dirsadded'])
293 dirsadded = cdf(desc['dirsadded'])
297 filesremoved = cdf(desc['filesremoved'])
294 filesremoved = cdf(desc['filesremoved'])
298 linelengths = cdf(desc['linelengths'])
295 linelengths = cdf(desc['linelengths'])
299 parents = cdf(desc['parents'])
296 parents = cdf(desc['parents'])
300 p1distance = cdf(desc['p1distance'])
297 p1distance = cdf(desc['p1distance'])
301 p2distance = cdf(desc['p2distance'])
298 p2distance = cdf(desc['p2distance'])
302 interarrival = cdf(desc['interarrival'])
299 interarrival = cdf(desc['interarrival'])
303 linesinfilesadded = cdf(desc['linesinfilesadded'])
300 linesinfilesadded = cdf(desc['linesinfilesadded'])
304 tzoffset = cdf(desc['tzoffset'])
301 tzoffset = cdf(desc['tzoffset'])
305
302
306 dictfile = opts.get('dict') or '/usr/share/dict/words'
303 dictfile = opts.get('dict') or '/usr/share/dict/words'
307 try:
304 try:
308 fp = open(dictfile, 'rU')
305 fp = open(dictfile, 'rU')
309 except IOError as err:
306 except IOError as err:
310 raise error.Abort('%s: %s' % (dictfile, err.strerror))
307 raise error.Abort('%s: %s' % (dictfile, err.strerror))
311 words = fp.read().splitlines()
308 words = fp.read().splitlines()
312 fp.close()
309 fp.close()
313
310
314 initdirs = {}
311 initdirs = {}
315 if desc['initdirs']:
312 if desc['initdirs']:
316 for k, v in desc['initdirs']:
313 for k, v in desc['initdirs']:
317 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
314 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
318 initdirs = renamedirs(initdirs, words)
315 initdirs = renamedirs(initdirs, words)
319 initdirscdf = cdf(initdirs)
316 initdirscdf = cdf(initdirs)
320
317
321 def pick(cdf):
318 def pick(cdf):
322 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
319 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
323
320
324 def pickpath():
321 def pickpath():
325 return os.path.join(pick(initdirscdf), random.choice(words))
322 return os.path.join(pick(initdirscdf), random.choice(words))
326
323
327 def makeline(minimum=0):
324 def makeline(minimum=0):
328 total = max(minimum, pick(linelengths))
325 total = max(minimum, pick(linelengths))
329 c, l = 0, []
326 c, l = 0, []
330 while c < total:
327 while c < total:
331 w = random.choice(words)
328 w = random.choice(words)
332 c += len(w) + 1
329 c += len(w) + 1
333 l.append(w)
330 l.append(w)
334 return ' '.join(l)
331 return ' '.join(l)
335
332
336 wlock = repo.wlock()
333 wlock = repo.wlock()
337 lock = repo.lock()
334 lock = repo.lock()
338
335
339 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
336 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
340
337
341 progress = ui.progress
342 _synthesizing = _('synthesizing')
338 _synthesizing = _('synthesizing')
343 _files = _('initial files')
339 _files = _('initial files')
344 _changesets = _('changesets')
340 _changesets = _('changesets')
345
341
346 # Synthesize a single initial revision adding files to the repo according
342 # Synthesize a single initial revision adding files to the repo according
347 # to the modeled directory structure.
343 # to the modeled directory structure.
348 initcount = int(opts['initfiles'])
344 initcount = int(opts['initfiles'])
349 if initcount and initdirs:
345 if initcount and initdirs:
350 pctx = repo[None].parents()[0]
346 pctx = repo[None].parents()[0]
351 dirs = set(pctx.dirs())
347 dirs = set(pctx.dirs())
352 files = {}
348 files = {}
353
349
354 def validpath(path):
350 def validpath(path):
355 # Don't pick filenames which are already directory names.
351 # Don't pick filenames which are already directory names.
356 if path in dirs:
352 if path in dirs:
357 return False
353 return False
358 # Don't pick directories which were used as file names.
354 # Don't pick directories which were used as file names.
359 while path:
355 while path:
360 if path in files:
356 if path in files:
361 return False
357 return False
362 path = os.path.dirname(path)
358 path = os.path.dirname(path)
363 return True
359 return True
364
360
361 progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
365 for i in xrange(0, initcount):
362 for i in xrange(0, initcount):
366 ui.progress(_synthesizing, i, unit=_files, total=initcount)
363 progress.update(i)
367
364
368 path = pickpath()
365 path = pickpath()
369 while not validpath(path):
366 while not validpath(path):
370 path = pickpath()
367 path = pickpath()
371 data = '%s contents\n' % path
368 data = '%s contents\n' % path
372 files[path] = data
369 files[path] = data
373 dir = os.path.dirname(path)
370 dir = os.path.dirname(path)
374 while dir and dir not in dirs:
371 while dir and dir not in dirs:
375 dirs.add(dir)
372 dirs.add(dir)
376 dir = os.path.dirname(dir)
373 dir = os.path.dirname(dir)
377
374
378 def filectxfn(repo, memctx, path):
375 def filectxfn(repo, memctx, path):
379 return context.memfilectx(repo, memctx, path, files[path])
376 return context.memfilectx(repo, memctx, path, files[path])
380
377
381 ui.progress(_synthesizing, None)
378 progress.complete()
382 message = 'synthesized wide repo with %d files' % (len(files),)
379 message = 'synthesized wide repo with %d files' % (len(files),)
383 mc = context.memctx(repo, [pctx.node(), nullid], message,
380 mc = context.memctx(repo, [pctx.node(), nullid], message,
384 files, filectxfn, ui.username(),
381 files, filectxfn, ui.username(),
385 '%d %d' % dateutil.makedate())
382 '%d %d' % dateutil.makedate())
386 initnode = mc.commit()
383 initnode = mc.commit()
387 if ui.debugflag:
384 if ui.debugflag:
388 hexfn = hex
385 hexfn = hex
389 else:
386 else:
390 hexfn = short
387 hexfn = short
391 ui.status(_('added commit %s with %d files\n')
388 ui.status(_('added commit %s with %d files\n')
392 % (hexfn(initnode), len(files)))
389 % (hexfn(initnode), len(files)))
393
390
394 # Synthesize incremental revisions to the repository, adding repo depth.
391 # Synthesize incremental revisions to the repository, adding repo depth.
395 count = int(opts['count'])
392 count = int(opts['count'])
396 heads = set(map(repo.changelog.rev, repo.heads()))
393 heads = set(map(repo.changelog.rev, repo.heads()))
394 progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
397 for i in xrange(count):
395 for i in xrange(count):
398 progress(_synthesizing, i, unit=_changesets, total=count)
396 progress.update(i)
399
397
400 node = repo.changelog.node
398 node = repo.changelog.node
401 revs = len(repo)
399 revs = len(repo)
402
400
403 def pickhead(heads, distance):
401 def pickhead(heads, distance):
404 if heads:
402 if heads:
405 lheads = sorted(heads)
403 lheads = sorted(heads)
406 rev = revs - min(pick(distance), revs)
404 rev = revs - min(pick(distance), revs)
407 if rev < lheads[-1]:
405 if rev < lheads[-1]:
408 rev = lheads[bisect.bisect_left(lheads, rev)]
406 rev = lheads[bisect.bisect_left(lheads, rev)]
409 else:
407 else:
410 rev = lheads[-1]
408 rev = lheads[-1]
411 return rev, node(rev)
409 return rev, node(rev)
412 return nullrev, nullid
410 return nullrev, nullid
413
411
414 r1 = revs - min(pick(p1distance), revs)
412 r1 = revs - min(pick(p1distance), revs)
415 p1 = node(r1)
413 p1 = node(r1)
416
414
417 # the number of heads will grow without bound if we use a pure
415 # the number of heads will grow without bound if we use a pure
418 # model, so artificially constrain their proliferation
416 # model, so artificially constrain their proliferation
419 toomanyheads = len(heads) > random.randint(1, 20)
417 toomanyheads = len(heads) > random.randint(1, 20)
420 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
418 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
421 r2, p2 = pickhead(heads.difference([r1]), p2distance)
419 r2, p2 = pickhead(heads.difference([r1]), p2distance)
422 else:
420 else:
423 r2, p2 = nullrev, nullid
421 r2, p2 = nullrev, nullid
424
422
425 pl = [p1, p2]
423 pl = [p1, p2]
426 pctx = repo[r1]
424 pctx = repo[r1]
427 mf = pctx.manifest()
425 mf = pctx.manifest()
428 mfk = mf.keys()
426 mfk = mf.keys()
429 changes = {}
427 changes = {}
430 if mfk:
428 if mfk:
431 for __ in xrange(pick(fileschanged)):
429 for __ in xrange(pick(fileschanged)):
432 for __ in xrange(10):
430 for __ in xrange(10):
433 fctx = pctx.filectx(random.choice(mfk))
431 fctx = pctx.filectx(random.choice(mfk))
434 path = fctx.path()
432 path = fctx.path()
435 if not (path in nevertouch or fctx.isbinary() or
433 if not (path in nevertouch or fctx.isbinary() or
436 'l' in fctx.flags()):
434 'l' in fctx.flags()):
437 break
435 break
438 lines = fctx.data().splitlines()
436 lines = fctx.data().splitlines()
439 add, remove = pick(lineschanged)
437 add, remove = pick(lineschanged)
440 for __ in xrange(remove):
438 for __ in xrange(remove):
441 if not lines:
439 if not lines:
442 break
440 break
443 del lines[random.randrange(0, len(lines))]
441 del lines[random.randrange(0, len(lines))]
444 for __ in xrange(add):
442 for __ in xrange(add):
445 lines.insert(random.randint(0, len(lines)), makeline())
443 lines.insert(random.randint(0, len(lines)), makeline())
446 path = fctx.path()
444 path = fctx.path()
447 changes[path] = '\n'.join(lines) + '\n'
445 changes[path] = '\n'.join(lines) + '\n'
448 for __ in xrange(pick(filesremoved)):
446 for __ in xrange(pick(filesremoved)):
449 path = random.choice(mfk)
447 path = random.choice(mfk)
450 for __ in xrange(10):
448 for __ in xrange(10):
451 path = random.choice(mfk)
449 path = random.choice(mfk)
452 if path not in changes:
450 if path not in changes:
453 break
451 break
454 if filesadded:
452 if filesadded:
455 dirs = list(pctx.dirs())
453 dirs = list(pctx.dirs())
456 dirs.insert(0, '')
454 dirs.insert(0, '')
457 for __ in xrange(pick(filesadded)):
455 for __ in xrange(pick(filesadded)):
458 pathstr = ''
456 pathstr = ''
459 while pathstr in dirs:
457 while pathstr in dirs:
460 path = [random.choice(dirs)]
458 path = [random.choice(dirs)]
461 if pick(dirsadded):
459 if pick(dirsadded):
462 path.append(random.choice(words))
460 path.append(random.choice(words))
463 path.append(random.choice(words))
461 path.append(random.choice(words))
464 pathstr = '/'.join(filter(None, path))
462 pathstr = '/'.join(filter(None, path))
465 data = '\n'.join(makeline()
463 data = '\n'.join(makeline()
466 for __ in xrange(pick(linesinfilesadded))) + '\n'
464 for __ in xrange(pick(linesinfilesadded))) + '\n'
467 changes[pathstr] = data
465 changes[pathstr] = data
468 def filectxfn(repo, memctx, path):
466 def filectxfn(repo, memctx, path):
469 if path not in changes:
467 if path not in changes:
470 return None
468 return None
471 return context.memfilectx(repo, memctx, path, changes[path])
469 return context.memfilectx(repo, memctx, path, changes[path])
472 if not changes:
470 if not changes:
473 continue
471 continue
474 if revs:
472 if revs:
475 date = repo['tip'].date()[0] + pick(interarrival)
473 date = repo['tip'].date()[0] + pick(interarrival)
476 else:
474 else:
477 date = time.time() - (86400 * count)
475 date = time.time() - (86400 * count)
478 # dates in mercurial must be positive, fit in 32-bit signed integers.
476 # dates in mercurial must be positive, fit in 32-bit signed integers.
479 date = min(0x7fffffff, max(0, date))
477 date = min(0x7fffffff, max(0, date))
480 user = random.choice(words) + '@' + random.choice(words)
478 user = random.choice(words) + '@' + random.choice(words)
481 mc = context.memctx(repo, pl, makeline(minimum=2),
479 mc = context.memctx(repo, pl, makeline(minimum=2),
482 sorted(changes),
480 sorted(changes),
483 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
481 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
484 newnode = mc.commit()
482 newnode = mc.commit()
485 heads.add(repo.changelog.rev(newnode))
483 heads.add(repo.changelog.rev(newnode))
486 heads.discard(r1)
484 heads.discard(r1)
487 heads.discard(r2)
485 heads.discard(r2)
488
486
489 lock.release()
487 lock.release()
490 wlock.release()
488 wlock.release()
491
489
492 def renamedirs(dirs, words):
490 def renamedirs(dirs, words):
493 '''Randomly rename the directory names in the per-dir file count dict.'''
491 '''Randomly rename the directory names in the per-dir file count dict.'''
494 wordgen = itertools.cycle(words)
492 wordgen = itertools.cycle(words)
495 replacements = {'': ''}
493 replacements = {'': ''}
496 def rename(dirpath):
494 def rename(dirpath):
497 '''Recursively rename the directory and all path prefixes.
495 '''Recursively rename the directory and all path prefixes.
498
496
499 The mapping from path to renamed path is stored for all path prefixes
497 The mapping from path to renamed path is stored for all path prefixes
500 as in dynamic programming, ensuring linear runtime and consistent
498 as in dynamic programming, ensuring linear runtime and consistent
501 renaming regardless of iteration order through the model.
499 renaming regardless of iteration order through the model.
502 '''
500 '''
503 if dirpath in replacements:
501 if dirpath in replacements:
504 return replacements[dirpath]
502 return replacements[dirpath]
505 head, _ = os.path.split(dirpath)
503 head, _ = os.path.split(dirpath)
506 if head:
504 if head:
507 head = rename(head)
505 head = rename(head)
508 else:
506 else:
509 head = ''
507 head = ''
510 renamed = os.path.join(head, next(wordgen))
508 renamed = os.path.join(head, next(wordgen))
511 replacements[dirpath] = renamed
509 replacements[dirpath] = renamed
512 return renamed
510 return renamed
513 result = []
511 result = []
514 for dirpath, count in dirs.iteritems():
512 for dirpath, count in dirs.iteritems():
515 result.append([rename(dirpath.lstrip(os.sep)), count])
513 result.append([rename(dirpath.lstrip(os.sep)), count])
516 return result
514 return result
General Comments 0
You need to be logged in to leave comments. Login now