##// END OF EJS Templates
synthrepo: simply use the ui passed as a function argument
Yuya Nishihara -
r38603:c6398fc2 default
parent child Browse files
Show More
@@ -1,521 +1,521 b''
1 # synthrepo.py - repo synthesis
1 # synthrepo.py - repo synthesis
2 #
2 #
3 # Copyright 2012 Facebook
3 # Copyright 2012 Facebook
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''synthesize structurally interesting change history
8 '''synthesize structurally interesting change history
9
9
10 This extension is useful for creating a repository with properties
10 This extension is useful for creating a repository with properties
11 that are statistically similar to an existing repository. During
11 that are statistically similar to an existing repository. During
12 analysis, a simple probability table is constructed from the history
12 analysis, a simple probability table is constructed from the history
13 of an existing repository. During synthesis, these properties are
13 of an existing repository. During synthesis, these properties are
14 reconstructed.
14 reconstructed.
15
15
16 Properties that are analyzed and synthesized include the following:
16 Properties that are analyzed and synthesized include the following:
17
17
18 - Lines added or removed when an existing file is modified
18 - Lines added or removed when an existing file is modified
19 - Number and sizes of files added
19 - Number and sizes of files added
20 - Number of files removed
20 - Number of files removed
21 - Line lengths
21 - Line lengths
22 - Topological distance to parent changeset(s)
22 - Topological distance to parent changeset(s)
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26 - Number of files in each directory
26 - Number of files in each directory
27
27
28 A few obvious properties that are not currently handled realistically:
28 A few obvious properties that are not currently handled realistically:
29
29
30 - Merges are treated as regular commits with two parents, which is not
30 - Merges are treated as regular commits with two parents, which is not
31 realistic
31 realistic
32 - Modifications are not treated as operations on hunks of lines, but
32 - Modifications are not treated as operations on hunks of lines, but
33 as insertions and deletions of randomly chosen single lines
33 as insertions and deletions of randomly chosen single lines
34 - Committer ID (always random)
34 - Committer ID (always random)
35 - Executability of files
35 - Executability of files
36 - Symlinks and binary files are ignored
36 - Symlinks and binary files are ignored
37 '''
37 '''
38
38
39 from __future__ import absolute_import
39 from __future__ import absolute_import
40 import bisect
40 import bisect
41 import collections
41 import collections
42 import itertools
42 import itertools
43 import json
43 import json
44 import os
44 import os
45 import random
45 import random
46 import sys
46 import sys
47 import time
47 import time
48
48
49 from mercurial.i18n import _
49 from mercurial.i18n import _
50 from mercurial.node import (
50 from mercurial.node import (
51 nullid,
51 nullid,
52 nullrev,
52 nullrev,
53 short,
53 short,
54 )
54 )
55 from mercurial import (
55 from mercurial import (
56 context,
56 context,
57 error,
57 error,
58 hg,
58 hg,
59 patch,
59 patch,
60 registrar,
60 registrar,
61 scmutil,
61 scmutil,
62 )
62 )
63 from mercurial.utils import (
63 from mercurial.utils import (
64 dateutil,
64 dateutil,
65 diffutil,
65 diffutil,
66 )
66 )
67
67
68 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
68 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
69 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
69 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
70 # be specifying the version(s) of Mercurial they are tested with, or
70 # be specifying the version(s) of Mercurial they are tested with, or
71 # leave the attribute unspecified.
71 # leave the attribute unspecified.
72 testedwith = 'ships-with-hg-core'
72 testedwith = 'ships-with-hg-core'
73
73
74 cmdtable = {}
74 cmdtable = {}
75 command = registrar.command(cmdtable)
75 command = registrar.command(cmdtable)
76
76
77 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
77 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
78
78
79 def zerodict():
79 def zerodict():
80 return collections.defaultdict(lambda: 0)
80 return collections.defaultdict(lambda: 0)
81
81
82 def roundto(x, k):
82 def roundto(x, k):
83 if x > k * 2:
83 if x > k * 2:
84 return int(round(x / float(k)) * k)
84 return int(round(x / float(k)) * k)
85 return int(round(x))
85 return int(round(x))
86
86
87 def parsegitdiff(lines):
87 def parsegitdiff(lines):
88 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
88 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
89 binary = False
89 binary = False
90 for line in lines:
90 for line in lines:
91 start = line[:6]
91 start = line[:6]
92 if start == 'diff -':
92 if start == 'diff -':
93 if filename:
93 if filename:
94 yield filename, mar, lineadd, lineremove, binary
94 yield filename, mar, lineadd, lineremove, binary
95 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
95 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
96 filename = patch.gitre.match(line).group(1)
96 filename = patch.gitre.match(line).group(1)
97 elif start in newfile:
97 elif start in newfile:
98 mar = 'a'
98 mar = 'a'
99 elif start == 'GIT bi':
99 elif start == 'GIT bi':
100 binary = True
100 binary = True
101 elif start == 'delete':
101 elif start == 'delete':
102 mar = 'r'
102 mar = 'r'
103 elif start:
103 elif start:
104 s = start[0]
104 s = start[0]
105 if s == '-' and not line.startswith('--- '):
105 if s == '-' and not line.startswith('--- '):
106 lineremove += 1
106 lineremove += 1
107 elif s == '+' and not line.startswith('+++ '):
107 elif s == '+' and not line.startswith('+++ '):
108 lineadd[roundto(len(line) - 1, 5)] += 1
108 lineadd[roundto(len(line) - 1, 5)] += 1
109 if filename:
109 if filename:
110 yield filename, mar, lineadd, lineremove, binary
110 yield filename, mar, lineadd, lineremove, binary
111
111
112 @command('analyze',
112 @command('analyze',
113 [('o', 'output', '', _('write output to given file'), _('FILE')),
113 [('o', 'output', '', _('write output to given file'), _('FILE')),
114 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
114 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
115 _('hg analyze'), optionalrepo=True)
115 _('hg analyze'), optionalrepo=True)
116 def analyze(ui, repo, *revs, **opts):
116 def analyze(ui, repo, *revs, **opts):
117 '''create a simple model of a repository to use for later synthesis
117 '''create a simple model of a repository to use for later synthesis
118
118
119 This command examines every changeset in the given range (or all
119 This command examines every changeset in the given range (or all
120 of history if none are specified) and creates a simple statistical
120 of history if none are specified) and creates a simple statistical
121 model of the history of the repository. It also measures the directory
121 model of the history of the repository. It also measures the directory
122 structure of the repository as checked out.
122 structure of the repository as checked out.
123
123
124 The model is written out to a JSON file, and can be used by
124 The model is written out to a JSON file, and can be used by
125 :hg:`synthesize` to create or augment a repository with synthetic
125 :hg:`synthesize` to create or augment a repository with synthetic
126 commits that have a structure that is statistically similar to the
126 commits that have a structure that is statistically similar to the
127 analyzed repository.
127 analyzed repository.
128 '''
128 '''
129 root = repo.root
129 root = repo.root
130 if not root.endswith(os.path.sep):
130 if not root.endswith(os.path.sep):
131 root += os.path.sep
131 root += os.path.sep
132
132
133 revs = list(revs)
133 revs = list(revs)
134 revs.extend(opts['rev'])
134 revs.extend(opts['rev'])
135 if not revs:
135 if not revs:
136 revs = [':']
136 revs = [':']
137
137
138 output = opts['output']
138 output = opts['output']
139 if not output:
139 if not output:
140 output = os.path.basename(root) + '.json'
140 output = os.path.basename(root) + '.json'
141
141
142 if output == '-':
142 if output == '-':
143 fp = sys.stdout
143 fp = sys.stdout
144 else:
144 else:
145 fp = open(output, 'w')
145 fp = open(output, 'w')
146
146
147 # Always obtain file counts of each directory in the given root directory.
147 # Always obtain file counts of each directory in the given root directory.
148 def onerror(e):
148 def onerror(e):
149 ui.warn(_('error walking directory structure: %s\n') % e)
149 ui.warn(_('error walking directory structure: %s\n') % e)
150
150
151 dirs = {}
151 dirs = {}
152 rootprefixlen = len(root)
152 rootprefixlen = len(root)
153 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
153 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
154 dirpathfromroot = dirpath[rootprefixlen:]
154 dirpathfromroot = dirpath[rootprefixlen:]
155 dirs[dirpathfromroot] = len(filenames)
155 dirs[dirpathfromroot] = len(filenames)
156 if '.hg' in dirnames:
156 if '.hg' in dirnames:
157 dirnames.remove('.hg')
157 dirnames.remove('.hg')
158
158
159 lineschanged = zerodict()
159 lineschanged = zerodict()
160 children = zerodict()
160 children = zerodict()
161 p1distance = zerodict()
161 p1distance = zerodict()
162 p2distance = zerodict()
162 p2distance = zerodict()
163 linesinfilesadded = zerodict()
163 linesinfilesadded = zerodict()
164 fileschanged = zerodict()
164 fileschanged = zerodict()
165 filesadded = zerodict()
165 filesadded = zerodict()
166 filesremoved = zerodict()
166 filesremoved = zerodict()
167 linelengths = zerodict()
167 linelengths = zerodict()
168 interarrival = zerodict()
168 interarrival = zerodict()
169 parents = zerodict()
169 parents = zerodict()
170 dirsadded = zerodict()
170 dirsadded = zerodict()
171 tzoffset = zerodict()
171 tzoffset = zerodict()
172
172
173 # If a mercurial repo is available, also model the commit history.
173 # If a mercurial repo is available, also model the commit history.
174 if repo:
174 if repo:
175 revs = scmutil.revrange(repo, revs)
175 revs = scmutil.revrange(repo, revs)
176 revs.sort()
176 revs.sort()
177
177
178 progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),
178 progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),
179 total=len(revs))
179 total=len(revs))
180 for i, rev in enumerate(revs):
180 for i, rev in enumerate(revs):
181 progress.update(i)
181 progress.update(i)
182 ctx = repo[rev]
182 ctx = repo[rev]
183 pl = ctx.parents()
183 pl = ctx.parents()
184 pctx = pl[0]
184 pctx = pl[0]
185 prev = pctx.rev()
185 prev = pctx.rev()
186 children[prev] += 1
186 children[prev] += 1
187 p1distance[rev - prev] += 1
187 p1distance[rev - prev] += 1
188 parents[len(pl)] += 1
188 parents[len(pl)] += 1
189 tzoffset[ctx.date()[1]] += 1
189 tzoffset[ctx.date()[1]] += 1
190 if len(pl) > 1:
190 if len(pl) > 1:
191 p2distance[rev - pl[1].rev()] += 1
191 p2distance[rev - pl[1].rev()] += 1
192 if prev == rev - 1:
192 if prev == rev - 1:
193 lastctx = pctx
193 lastctx = pctx
194 else:
194 else:
195 lastctx = repo[rev - 1]
195 lastctx = repo[rev - 1]
196 if lastctx.rev() != nullrev:
196 if lastctx.rev() != nullrev:
197 timedelta = ctx.date()[0] - lastctx.date()[0]
197 timedelta = ctx.date()[0] - lastctx.date()[0]
198 interarrival[roundto(timedelta, 300)] += 1
198 interarrival[roundto(timedelta, 300)] += 1
199 diffopts = diffutil.diffopts(ctx._repo.ui, {'git': True})
199 diffopts = diffutil.diffopts(ui, {'git': True})
200 diff = sum((d.splitlines()
200 diff = sum((d.splitlines()
201 for d in ctx.diff(pctx, opts=diffopts)), [])
201 for d in ctx.diff(pctx, opts=diffopts)), [])
202 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
202 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
203 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
203 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
204 if isbin:
204 if isbin:
205 continue
205 continue
206 added = sum(lineadd.itervalues(), 0)
206 added = sum(lineadd.itervalues(), 0)
207 if mar == 'm':
207 if mar == 'm':
208 if added and lineremove:
208 if added and lineremove:
209 lineschanged[roundto(added, 5),
209 lineschanged[roundto(added, 5),
210 roundto(lineremove, 5)] += 1
210 roundto(lineremove, 5)] += 1
211 filechanges += 1
211 filechanges += 1
212 elif mar == 'a':
212 elif mar == 'a':
213 fileadds += 1
213 fileadds += 1
214 if '/' in filename:
214 if '/' in filename:
215 filedir = filename.rsplit('/', 1)[0]
215 filedir = filename.rsplit('/', 1)[0]
216 if filedir not in pctx.dirs():
216 if filedir not in pctx.dirs():
217 diradds += 1
217 diradds += 1
218 linesinfilesadded[roundto(added, 5)] += 1
218 linesinfilesadded[roundto(added, 5)] += 1
219 elif mar == 'r':
219 elif mar == 'r':
220 fileremoves += 1
220 fileremoves += 1
221 for length, count in lineadd.iteritems():
221 for length, count in lineadd.iteritems():
222 linelengths[length] += count
222 linelengths[length] += count
223 fileschanged[filechanges] += 1
223 fileschanged[filechanges] += 1
224 filesadded[fileadds] += 1
224 filesadded[fileadds] += 1
225 dirsadded[diradds] += 1
225 dirsadded[diradds] += 1
226 filesremoved[fileremoves] += 1
226 filesremoved[fileremoves] += 1
227 progress.complete()
227 progress.complete()
228
228
229 invchildren = zerodict()
229 invchildren = zerodict()
230
230
231 for rev, count in children.iteritems():
231 for rev, count in children.iteritems():
232 invchildren[count] += 1
232 invchildren[count] += 1
233
233
234 if output != '-':
234 if output != '-':
235 ui.status(_('writing output to %s\n') % output)
235 ui.status(_('writing output to %s\n') % output)
236
236
237 def pronk(d):
237 def pronk(d):
238 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
238 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
239
239
240 json.dump({'revs': len(revs),
240 json.dump({'revs': len(revs),
241 'initdirs': pronk(dirs),
241 'initdirs': pronk(dirs),
242 'lineschanged': pronk(lineschanged),
242 'lineschanged': pronk(lineschanged),
243 'children': pronk(invchildren),
243 'children': pronk(invchildren),
244 'fileschanged': pronk(fileschanged),
244 'fileschanged': pronk(fileschanged),
245 'filesadded': pronk(filesadded),
245 'filesadded': pronk(filesadded),
246 'linesinfilesadded': pronk(linesinfilesadded),
246 'linesinfilesadded': pronk(linesinfilesadded),
247 'dirsadded': pronk(dirsadded),
247 'dirsadded': pronk(dirsadded),
248 'filesremoved': pronk(filesremoved),
248 'filesremoved': pronk(filesremoved),
249 'linelengths': pronk(linelengths),
249 'linelengths': pronk(linelengths),
250 'parents': pronk(parents),
250 'parents': pronk(parents),
251 'p1distance': pronk(p1distance),
251 'p1distance': pronk(p1distance),
252 'p2distance': pronk(p2distance),
252 'p2distance': pronk(p2distance),
253 'interarrival': pronk(interarrival),
253 'interarrival': pronk(interarrival),
254 'tzoffset': pronk(tzoffset),
254 'tzoffset': pronk(tzoffset),
255 },
255 },
256 fp)
256 fp)
257 fp.close()
257 fp.close()
258
258
259 @command('synthesize',
259 @command('synthesize',
260 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
260 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
261 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
261 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
262 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
262 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
263 _('hg synthesize [OPTION].. DESCFILE'))
263 _('hg synthesize [OPTION].. DESCFILE'))
264 def synthesize(ui, repo, descpath, **opts):
264 def synthesize(ui, repo, descpath, **opts):
265 '''synthesize commits based on a model of an existing repository
265 '''synthesize commits based on a model of an existing repository
266
266
267 The model must have been generated by :hg:`analyze`. Commits will
267 The model must have been generated by :hg:`analyze`. Commits will
268 be generated randomly according to the probabilities described in
268 be generated randomly according to the probabilities described in
269 the model. If --initfiles is set, the repository will be seeded with
269 the model. If --initfiles is set, the repository will be seeded with
270 the given number files following the modeled repository's directory
270 the given number files following the modeled repository's directory
271 structure.
271 structure.
272
272
273 When synthesizing new content, commit descriptions, and user
273 When synthesizing new content, commit descriptions, and user
274 names, words will be chosen randomly from a dictionary that is
274 names, words will be chosen randomly from a dictionary that is
275 presumed to contain one word per line. Use --dict to specify the
275 presumed to contain one word per line. Use --dict to specify the
276 path to an alternate dictionary to use.
276 path to an alternate dictionary to use.
277 '''
277 '''
278 try:
278 try:
279 fp = hg.openpath(ui, descpath)
279 fp = hg.openpath(ui, descpath)
280 except Exception as err:
280 except Exception as err:
281 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
281 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
282 desc = json.load(fp)
282 desc = json.load(fp)
283 fp.close()
283 fp.close()
284
284
285 def cdf(l):
285 def cdf(l):
286 if not l:
286 if not l:
287 return [], []
287 return [], []
288 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
288 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
289 t = float(sum(probs, 0))
289 t = float(sum(probs, 0))
290 s, cdfs = 0, []
290 s, cdfs = 0, []
291 for v in probs:
291 for v in probs:
292 s += v
292 s += v
293 cdfs.append(s / t)
293 cdfs.append(s / t)
294 return vals, cdfs
294 return vals, cdfs
295
295
296 lineschanged = cdf(desc['lineschanged'])
296 lineschanged = cdf(desc['lineschanged'])
297 fileschanged = cdf(desc['fileschanged'])
297 fileschanged = cdf(desc['fileschanged'])
298 filesadded = cdf(desc['filesadded'])
298 filesadded = cdf(desc['filesadded'])
299 dirsadded = cdf(desc['dirsadded'])
299 dirsadded = cdf(desc['dirsadded'])
300 filesremoved = cdf(desc['filesremoved'])
300 filesremoved = cdf(desc['filesremoved'])
301 linelengths = cdf(desc['linelengths'])
301 linelengths = cdf(desc['linelengths'])
302 parents = cdf(desc['parents'])
302 parents = cdf(desc['parents'])
303 p1distance = cdf(desc['p1distance'])
303 p1distance = cdf(desc['p1distance'])
304 p2distance = cdf(desc['p2distance'])
304 p2distance = cdf(desc['p2distance'])
305 interarrival = cdf(desc['interarrival'])
305 interarrival = cdf(desc['interarrival'])
306 linesinfilesadded = cdf(desc['linesinfilesadded'])
306 linesinfilesadded = cdf(desc['linesinfilesadded'])
307 tzoffset = cdf(desc['tzoffset'])
307 tzoffset = cdf(desc['tzoffset'])
308
308
309 dictfile = opts.get('dict') or '/usr/share/dict/words'
309 dictfile = opts.get('dict') or '/usr/share/dict/words'
310 try:
310 try:
311 fp = open(dictfile, 'rU')
311 fp = open(dictfile, 'rU')
312 except IOError as err:
312 except IOError as err:
313 raise error.Abort('%s: %s' % (dictfile, err.strerror))
313 raise error.Abort('%s: %s' % (dictfile, err.strerror))
314 words = fp.read().splitlines()
314 words = fp.read().splitlines()
315 fp.close()
315 fp.close()
316
316
317 initdirs = {}
317 initdirs = {}
318 if desc['initdirs']:
318 if desc['initdirs']:
319 for k, v in desc['initdirs']:
319 for k, v in desc['initdirs']:
320 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
320 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
321 initdirs = renamedirs(initdirs, words)
321 initdirs = renamedirs(initdirs, words)
322 initdirscdf = cdf(initdirs)
322 initdirscdf = cdf(initdirs)
323
323
324 def pick(cdf):
324 def pick(cdf):
325 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
325 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
326
326
327 def pickpath():
327 def pickpath():
328 return os.path.join(pick(initdirscdf), random.choice(words))
328 return os.path.join(pick(initdirscdf), random.choice(words))
329
329
330 def makeline(minimum=0):
330 def makeline(minimum=0):
331 total = max(minimum, pick(linelengths))
331 total = max(minimum, pick(linelengths))
332 c, l = 0, []
332 c, l = 0, []
333 while c < total:
333 while c < total:
334 w = random.choice(words)
334 w = random.choice(words)
335 c += len(w) + 1
335 c += len(w) + 1
336 l.append(w)
336 l.append(w)
337 return ' '.join(l)
337 return ' '.join(l)
338
338
339 wlock = repo.wlock()
339 wlock = repo.wlock()
340 lock = repo.lock()
340 lock = repo.lock()
341
341
342 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
342 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
343
343
344 _synthesizing = _('synthesizing')
344 _synthesizing = _('synthesizing')
345 _files = _('initial files')
345 _files = _('initial files')
346 _changesets = _('changesets')
346 _changesets = _('changesets')
347
347
348 # Synthesize a single initial revision adding files to the repo according
348 # Synthesize a single initial revision adding files to the repo according
349 # to the modeled directory structure.
349 # to the modeled directory structure.
350 initcount = int(opts['initfiles'])
350 initcount = int(opts['initfiles'])
351 if initcount and initdirs:
351 if initcount and initdirs:
352 pctx = repo[None].parents()[0]
352 pctx = repo[None].parents()[0]
353 dirs = set(pctx.dirs())
353 dirs = set(pctx.dirs())
354 files = {}
354 files = {}
355
355
356 def validpath(path):
356 def validpath(path):
357 # Don't pick filenames which are already directory names.
357 # Don't pick filenames which are already directory names.
358 if path in dirs:
358 if path in dirs:
359 return False
359 return False
360 # Don't pick directories which were used as file names.
360 # Don't pick directories which were used as file names.
361 while path:
361 while path:
362 if path in files:
362 if path in files:
363 return False
363 return False
364 path = os.path.dirname(path)
364 path = os.path.dirname(path)
365 return True
365 return True
366
366
367 progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
367 progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
368 for i in xrange(0, initcount):
368 for i in xrange(0, initcount):
369 progress.update(i)
369 progress.update(i)
370
370
371 path = pickpath()
371 path = pickpath()
372 while not validpath(path):
372 while not validpath(path):
373 path = pickpath()
373 path = pickpath()
374 data = '%s contents\n' % path
374 data = '%s contents\n' % path
375 files[path] = data
375 files[path] = data
376 dir = os.path.dirname(path)
376 dir = os.path.dirname(path)
377 while dir and dir not in dirs:
377 while dir and dir not in dirs:
378 dirs.add(dir)
378 dirs.add(dir)
379 dir = os.path.dirname(dir)
379 dir = os.path.dirname(dir)
380
380
381 def filectxfn(repo, memctx, path):
381 def filectxfn(repo, memctx, path):
382 return context.memfilectx(repo, memctx, path, files[path])
382 return context.memfilectx(repo, memctx, path, files[path])
383
383
384 progress.complete()
384 progress.complete()
385 message = 'synthesized wide repo with %d files' % (len(files),)
385 message = 'synthesized wide repo with %d files' % (len(files),)
386 mc = context.memctx(repo, [pctx.node(), nullid], message,
386 mc = context.memctx(repo, [pctx.node(), nullid], message,
387 files, filectxfn, ui.username(),
387 files, filectxfn, ui.username(),
388 '%d %d' % dateutil.makedate())
388 '%d %d' % dateutil.makedate())
389 initnode = mc.commit()
389 initnode = mc.commit()
390 if ui.debugflag:
390 if ui.debugflag:
391 hexfn = hex
391 hexfn = hex
392 else:
392 else:
393 hexfn = short
393 hexfn = short
394 ui.status(_('added commit %s with %d files\n')
394 ui.status(_('added commit %s with %d files\n')
395 % (hexfn(initnode), len(files)))
395 % (hexfn(initnode), len(files)))
396
396
397 # Synthesize incremental revisions to the repository, adding repo depth.
397 # Synthesize incremental revisions to the repository, adding repo depth.
398 count = int(opts['count'])
398 count = int(opts['count'])
399 heads = set(map(repo.changelog.rev, repo.heads()))
399 heads = set(map(repo.changelog.rev, repo.heads()))
400 progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
400 progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
401 for i in xrange(count):
401 for i in xrange(count):
402 progress.update(i)
402 progress.update(i)
403
403
404 node = repo.changelog.node
404 node = repo.changelog.node
405 revs = len(repo)
405 revs = len(repo)
406
406
407 def pickhead(heads, distance):
407 def pickhead(heads, distance):
408 if heads:
408 if heads:
409 lheads = sorted(heads)
409 lheads = sorted(heads)
410 rev = revs - min(pick(distance), revs)
410 rev = revs - min(pick(distance), revs)
411 if rev < lheads[-1]:
411 if rev < lheads[-1]:
412 rev = lheads[bisect.bisect_left(lheads, rev)]
412 rev = lheads[bisect.bisect_left(lheads, rev)]
413 else:
413 else:
414 rev = lheads[-1]
414 rev = lheads[-1]
415 return rev, node(rev)
415 return rev, node(rev)
416 return nullrev, nullid
416 return nullrev, nullid
417
417
418 r1 = revs - min(pick(p1distance), revs)
418 r1 = revs - min(pick(p1distance), revs)
419 p1 = node(r1)
419 p1 = node(r1)
420
420
421 # the number of heads will grow without bound if we use a pure
421 # the number of heads will grow without bound if we use a pure
422 # model, so artificially constrain their proliferation
422 # model, so artificially constrain their proliferation
423 toomanyheads = len(heads) > random.randint(1, 20)
423 toomanyheads = len(heads) > random.randint(1, 20)
424 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
424 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
425 r2, p2 = pickhead(heads.difference([r1]), p2distance)
425 r2, p2 = pickhead(heads.difference([r1]), p2distance)
426 else:
426 else:
427 r2, p2 = nullrev, nullid
427 r2, p2 = nullrev, nullid
428
428
429 pl = [p1, p2]
429 pl = [p1, p2]
430 pctx = repo[r1]
430 pctx = repo[r1]
431 mf = pctx.manifest()
431 mf = pctx.manifest()
432 mfk = mf.keys()
432 mfk = mf.keys()
433 changes = {}
433 changes = {}
434 if mfk:
434 if mfk:
435 for __ in xrange(pick(fileschanged)):
435 for __ in xrange(pick(fileschanged)):
436 for __ in xrange(10):
436 for __ in xrange(10):
437 fctx = pctx.filectx(random.choice(mfk))
437 fctx = pctx.filectx(random.choice(mfk))
438 path = fctx.path()
438 path = fctx.path()
439 if not (path in nevertouch or fctx.isbinary() or
439 if not (path in nevertouch or fctx.isbinary() or
440 'l' in fctx.flags()):
440 'l' in fctx.flags()):
441 break
441 break
442 lines = fctx.data().splitlines()
442 lines = fctx.data().splitlines()
443 add, remove = pick(lineschanged)
443 add, remove = pick(lineschanged)
444 for __ in xrange(remove):
444 for __ in xrange(remove):
445 if not lines:
445 if not lines:
446 break
446 break
447 del lines[random.randrange(0, len(lines))]
447 del lines[random.randrange(0, len(lines))]
448 for __ in xrange(add):
448 for __ in xrange(add):
449 lines.insert(random.randint(0, len(lines)), makeline())
449 lines.insert(random.randint(0, len(lines)), makeline())
450 path = fctx.path()
450 path = fctx.path()
451 changes[path] = '\n'.join(lines) + '\n'
451 changes[path] = '\n'.join(lines) + '\n'
452 for __ in xrange(pick(filesremoved)):
452 for __ in xrange(pick(filesremoved)):
453 path = random.choice(mfk)
453 path = random.choice(mfk)
454 for __ in xrange(10):
454 for __ in xrange(10):
455 path = random.choice(mfk)
455 path = random.choice(mfk)
456 if path not in changes:
456 if path not in changes:
457 break
457 break
458 if filesadded:
458 if filesadded:
459 dirs = list(pctx.dirs())
459 dirs = list(pctx.dirs())
460 dirs.insert(0, '')
460 dirs.insert(0, '')
461 for __ in xrange(pick(filesadded)):
461 for __ in xrange(pick(filesadded)):
462 pathstr = ''
462 pathstr = ''
463 while pathstr in dirs:
463 while pathstr in dirs:
464 path = [random.choice(dirs)]
464 path = [random.choice(dirs)]
465 if pick(dirsadded):
465 if pick(dirsadded):
466 path.append(random.choice(words))
466 path.append(random.choice(words))
467 path.append(random.choice(words))
467 path.append(random.choice(words))
468 pathstr = '/'.join(filter(None, path))
468 pathstr = '/'.join(filter(None, path))
469 data = '\n'.join(makeline()
469 data = '\n'.join(makeline()
470 for __ in xrange(pick(linesinfilesadded))) + '\n'
470 for __ in xrange(pick(linesinfilesadded))) + '\n'
471 changes[pathstr] = data
471 changes[pathstr] = data
472 def filectxfn(repo, memctx, path):
472 def filectxfn(repo, memctx, path):
473 if path not in changes:
473 if path not in changes:
474 return None
474 return None
475 return context.memfilectx(repo, memctx, path, changes[path])
475 return context.memfilectx(repo, memctx, path, changes[path])
476 if not changes:
476 if not changes:
477 continue
477 continue
478 if revs:
478 if revs:
479 date = repo['tip'].date()[0] + pick(interarrival)
479 date = repo['tip'].date()[0] + pick(interarrival)
480 else:
480 else:
481 date = time.time() - (86400 * count)
481 date = time.time() - (86400 * count)
482 # dates in mercurial must be positive, fit in 32-bit signed integers.
482 # dates in mercurial must be positive, fit in 32-bit signed integers.
483 date = min(0x7fffffff, max(0, date))
483 date = min(0x7fffffff, max(0, date))
484 user = random.choice(words) + '@' + random.choice(words)
484 user = random.choice(words) + '@' + random.choice(words)
485 mc = context.memctx(repo, pl, makeline(minimum=2),
485 mc = context.memctx(repo, pl, makeline(minimum=2),
486 sorted(changes),
486 sorted(changes),
487 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
487 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
488 newnode = mc.commit()
488 newnode = mc.commit()
489 heads.add(repo.changelog.rev(newnode))
489 heads.add(repo.changelog.rev(newnode))
490 heads.discard(r1)
490 heads.discard(r1)
491 heads.discard(r2)
491 heads.discard(r2)
492 progress.complete()
492 progress.complete()
493
493
494 lock.release()
494 lock.release()
495 wlock.release()
495 wlock.release()
496
496
497 def renamedirs(dirs, words):
497 def renamedirs(dirs, words):
498 '''Randomly rename the directory names in the per-dir file count dict.'''
498 '''Randomly rename the directory names in the per-dir file count dict.'''
499 wordgen = itertools.cycle(words)
499 wordgen = itertools.cycle(words)
500 replacements = {'': ''}
500 replacements = {'': ''}
501 def rename(dirpath):
501 def rename(dirpath):
502 '''Recursively rename the directory and all path prefixes.
502 '''Recursively rename the directory and all path prefixes.
503
503
504 The mapping from path to renamed path is stored for all path prefixes
504 The mapping from path to renamed path is stored for all path prefixes
505 as in dynamic programming, ensuring linear runtime and consistent
505 as in dynamic programming, ensuring linear runtime and consistent
506 renaming regardless of iteration order through the model.
506 renaming regardless of iteration order through the model.
507 '''
507 '''
508 if dirpath in replacements:
508 if dirpath in replacements:
509 return replacements[dirpath]
509 return replacements[dirpath]
510 head, _ = os.path.split(dirpath)
510 head, _ = os.path.split(dirpath)
511 if head:
511 if head:
512 head = rename(head)
512 head = rename(head)
513 else:
513 else:
514 head = ''
514 head = ''
515 renamed = os.path.join(head, next(wordgen))
515 renamed = os.path.join(head, next(wordgen))
516 replacements[dirpath] = renamed
516 replacements[dirpath] = renamed
517 return renamed
517 return renamed
518 result = []
518 result = []
519 for dirpath, count in dirs.iteritems():
519 for dirpath, count in dirs.iteritems():
520 result.append([rename(dirpath.lstrip(os.sep)), count])
520 result.append([rename(dirpath.lstrip(os.sep)), count])
521 return result
521 return result
General Comments 0
You need to be logged in to leave comments. Login now