##// END OF EJS Templates
contrib/synthrepo: generate initial repo contents using directory shape model...
Mike Edgar -
r22708:4c66e70c default
parent child Browse files
Show More
@@ -1,376 +1,442
1 # synthrepo.py - repo synthesis
1 # synthrepo.py - repo synthesis
2 #
2 #
3 # Copyright 2012 Facebook
3 # Copyright 2012 Facebook
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''synthesize structurally interesting change history
8 '''synthesize structurally interesting change history
9
9
10 This extension is useful for creating a repository with properties
10 This extension is useful for creating a repository with properties
11 that are statistically similar to an existing repository. During
11 that are statistically similar to an existing repository. During
12 analysis, a simple probability table is constructed from the history
12 analysis, a simple probability table is constructed from the history
13 of an existing repository. During synthesis, these properties are
13 of an existing repository. During synthesis, these properties are
14 reconstructed.
14 reconstructed.
15
15
16 Properties that are analyzed and synthesized include the following:
16 Properties that are analyzed and synthesized include the following:
17
17
18 - Lines added or removed when an existing file is modified
18 - Lines added or removed when an existing file is modified
19 - Number and sizes of files added
19 - Number and sizes of files added
20 - Number of files removed
20 - Number of files removed
21 - Line lengths
21 - Line lengths
22 - Topological distance to parent changeset(s)
22 - Topological distance to parent changeset(s)
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26
26
27 A few obvious properties that are not currently handled realistically:
27 A few obvious properties that are not currently handled realistically:
28
28
29 - Merges are treated as regular commits with two parents, which is not
29 - Merges are treated as regular commits with two parents, which is not
30 realistic
30 realistic
31 - Modifications are not treated as operations on hunks of lines, but
31 - Modifications are not treated as operations on hunks of lines, but
32 as insertions and deletions of randomly chosen single lines
32 as insertions and deletions of randomly chosen single lines
33 - Committer ID (always random)
33 - Committer ID (always random)
34 - Executability of files
34 - Executability of files
35 - Symlinks and binary files are ignored
35 - Symlinks and binary files are ignored
36 '''
36 '''
37
37
38 import bisect, collections, json, os, random, time, sys
38 import bisect, collections, itertools, json, os, random, time, sys
39 from mercurial import cmdutil, context, patch, scmutil, util, hg
39 from mercurial import cmdutil, context, patch, scmutil, util, hg
40 from mercurial.i18n import _
40 from mercurial.i18n import _
41 from mercurial.node import nullrev, nullid
41 from mercurial.node import nullrev, nullid, short
42
42
43 testedwith = 'internal'
43 testedwith = 'internal'
44
44
45 cmdtable = {}
45 cmdtable = {}
46 command = cmdutil.command(cmdtable)
46 command = cmdutil.command(cmdtable)
47
47
48 newfile = set(('new fi', 'rename', 'copy f', 'copy t'))
48 newfile = set(('new fi', 'rename', 'copy f', 'copy t'))
49
49
50 def zerodict():
50 def zerodict():
51 return collections.defaultdict(lambda: 0)
51 return collections.defaultdict(lambda: 0)
52
52
53 def roundto(x, k):
53 def roundto(x, k):
54 if x > k * 2:
54 if x > k * 2:
55 return int(round(x / float(k)) * k)
55 return int(round(x / float(k)) * k)
56 return int(round(x))
56 return int(round(x))
57
57
58 def parsegitdiff(lines):
58 def parsegitdiff(lines):
59 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
59 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
60 binary = False
60 binary = False
61 for line in lines:
61 for line in lines:
62 start = line[:6]
62 start = line[:6]
63 if start == 'diff -':
63 if start == 'diff -':
64 if filename:
64 if filename:
65 yield filename, mar, lineadd, lineremove, binary
65 yield filename, mar, lineadd, lineremove, binary
66 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
66 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
67 filename = patch.gitre.match(line).group(1)
67 filename = patch.gitre.match(line).group(1)
68 elif start in newfile:
68 elif start in newfile:
69 mar = 'a'
69 mar = 'a'
70 elif start == 'GIT bi':
70 elif start == 'GIT bi':
71 binary = True
71 binary = True
72 elif start == 'delete':
72 elif start == 'delete':
73 mar = 'r'
73 mar = 'r'
74 elif start:
74 elif start:
75 s = start[0]
75 s = start[0]
76 if s == '-' and not line.startswith('--- '):
76 if s == '-' and not line.startswith('--- '):
77 lineremove += 1
77 lineremove += 1
78 elif s == '+' and not line.startswith('+++ '):
78 elif s == '+' and not line.startswith('+++ '):
79 lineadd[roundto(len(line) - 1, 5)] += 1
79 lineadd[roundto(len(line) - 1, 5)] += 1
80 if filename:
80 if filename:
81 yield filename, mar, lineadd, lineremove, binary
81 yield filename, mar, lineadd, lineremove, binary
82
82
83 @command('analyze',
83 @command('analyze',
84 [('o', 'output', [], _('write output to given file'), _('FILE')),
84 [('o', 'output', [], _('write output to given file'), _('FILE')),
85 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
85 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
86 _('hg analyze'))
86 _('hg analyze'))
87 def analyze(ui, repo, *revs, **opts):
87 def analyze(ui, repo, *revs, **opts):
88 '''create a simple model of a repository to use for later synthesis
88 '''create a simple model of a repository to use for later synthesis
89
89
90 This command examines every changeset in the given range (or all
90 This command examines every changeset in the given range (or all
91 of history if none are specified) and creates a simple statistical
91 of history if none are specified) and creates a simple statistical
92 model of the history of the repository.
92 model of the history of the repository.
93
93
94 The model is written out to a JSON file, and can be used by
94 The model is written out to a JSON file, and can be used by
95 :hg:`synthesize` to create or augment a repository with synthetic
95 :hg:`synthesize` to create or augment a repository with synthetic
96 commits that have a structure that is statistically similar to the
96 commits that have a structure that is statistically similar to the
97 analyzed repository.
97 analyzed repository.
98 '''
98 '''
99
99
100 revs = list(revs)
100 revs = list(revs)
101 revs.extend(opts['rev'])
101 revs.extend(opts['rev'])
102 if not revs:
102 if not revs:
103 revs = [':']
103 revs = [':']
104
104
105 output = opts['output']
105 output = opts['output']
106 if not output:
106 if not output:
107 output = os.path.basename(repo.root) + '.json'
107 output = os.path.basename(repo.root) + '.json'
108
108
109 if output == '-':
109 if output == '-':
110 fp = sys.stdout
110 fp = sys.stdout
111 else:
111 else:
112 fp = open(output, 'w')
112 fp = open(output, 'w')
113
113
114 revs = scmutil.revrange(repo, revs)
114 revs = scmutil.revrange(repo, revs)
115 revs.sort()
115 revs.sort()
116
116
117 lineschanged = zerodict()
117 lineschanged = zerodict()
118 children = zerodict()
118 children = zerodict()
119 p1distance = zerodict()
119 p1distance = zerodict()
120 p2distance = zerodict()
120 p2distance = zerodict()
121 linesinfilesadded = zerodict()
121 linesinfilesadded = zerodict()
122 fileschanged = zerodict()
122 fileschanged = zerodict()
123 filesadded = zerodict()
123 filesadded = zerodict()
124 filesremoved = zerodict()
124 filesremoved = zerodict()
125 linelengths = zerodict()
125 linelengths = zerodict()
126 interarrival = zerodict()
126 interarrival = zerodict()
127 parents = zerodict()
127 parents = zerodict()
128 dirsadded = zerodict()
128 dirsadded = zerodict()
129 tzoffset = zerodict()
129 tzoffset = zerodict()
130
130
131 progress = ui.progress
131 progress = ui.progress
132 _analyzing = _('analyzing')
132 _analyzing = _('analyzing')
133 _changesets = _('changesets')
133 _changesets = _('changesets')
134 _total = len(revs)
134 _total = len(revs)
135
135
136 for i, rev in enumerate(revs):
136 for i, rev in enumerate(revs):
137 progress(_analyzing, i, unit=_changesets, total=_total)
137 progress(_analyzing, i, unit=_changesets, total=_total)
138 ctx = repo[rev]
138 ctx = repo[rev]
139 pl = ctx.parents()
139 pl = ctx.parents()
140 pctx = pl[0]
140 pctx = pl[0]
141 prev = pctx.rev()
141 prev = pctx.rev()
142 children[prev] += 1
142 children[prev] += 1
143 p1distance[rev - prev] += 1
143 p1distance[rev - prev] += 1
144 parents[len(pl)] += 1
144 parents[len(pl)] += 1
145 tzoffset[ctx.date()[1]] += 1
145 tzoffset[ctx.date()[1]] += 1
146 if len(pl) > 1:
146 if len(pl) > 1:
147 p2distance[rev - pl[1].rev()] += 1
147 p2distance[rev - pl[1].rev()] += 1
148 if prev == rev - 1:
148 if prev == rev - 1:
149 lastctx = pctx
149 lastctx = pctx
150 else:
150 else:
151 lastctx = repo[rev - 1]
151 lastctx = repo[rev - 1]
152 if lastctx.rev() != nullrev:
152 if lastctx.rev() != nullrev:
153 interarrival[roundto(ctx.date()[0] - lastctx.date()[0], 300)] += 1
153 interarrival[roundto(ctx.date()[0] - lastctx.date()[0], 300)] += 1
154 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
154 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
155 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
155 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
156 for filename, mar, lineadd, lineremove, binary in parsegitdiff(diff):
156 for filename, mar, lineadd, lineremove, binary in parsegitdiff(diff):
157 if binary:
157 if binary:
158 continue
158 continue
159 added = sum(lineadd.itervalues(), 0)
159 added = sum(lineadd.itervalues(), 0)
160 if mar == 'm':
160 if mar == 'm':
161 if added and lineremove:
161 if added and lineremove:
162 lineschanged[roundto(added, 5), roundto(lineremove, 5)] += 1
162 lineschanged[roundto(added, 5), roundto(lineremove, 5)] += 1
163 filechanges += 1
163 filechanges += 1
164 elif mar == 'a':
164 elif mar == 'a':
165 fileadds += 1
165 fileadds += 1
166 if '/' in filename:
166 if '/' in filename:
167 filedir = filename.rsplit('/', 1)[0]
167 filedir = filename.rsplit('/', 1)[0]
168 if filedir not in pctx.dirs():
168 if filedir not in pctx.dirs():
169 diradds += 1
169 diradds += 1
170 linesinfilesadded[roundto(added, 5)] += 1
170 linesinfilesadded[roundto(added, 5)] += 1
171 elif mar == 'r':
171 elif mar == 'r':
172 fileremoves += 1
172 fileremoves += 1
173 for length, count in lineadd.iteritems():
173 for length, count in lineadd.iteritems():
174 linelengths[length] += count
174 linelengths[length] += count
175 fileschanged[filechanges] += 1
175 fileschanged[filechanges] += 1
176 filesadded[fileadds] += 1
176 filesadded[fileadds] += 1
177 dirsadded[diradds] += 1
177 dirsadded[diradds] += 1
178 filesremoved[fileremoves] += 1
178 filesremoved[fileremoves] += 1
179
179
180 invchildren = zerodict()
180 invchildren = zerodict()
181
181
182 for rev, count in children.iteritems():
182 for rev, count in children.iteritems():
183 invchildren[count] += 1
183 invchildren[count] += 1
184
184
185 if output != '-':
185 if output != '-':
186 ui.status(_('writing output to %s\n') % output)
186 ui.status(_('writing output to %s\n') % output)
187
187
188 def pronk(d):
188 def pronk(d):
189 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
189 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
190
190
191 json.dump({'revs': len(revs),
191 json.dump({'revs': len(revs),
192 'lineschanged': pronk(lineschanged),
192 'lineschanged': pronk(lineschanged),
193 'children': pronk(invchildren),
193 'children': pronk(invchildren),
194 'fileschanged': pronk(fileschanged),
194 'fileschanged': pronk(fileschanged),
195 'filesadded': pronk(filesadded),
195 'filesadded': pronk(filesadded),
196 'linesinfilesadded': pronk(linesinfilesadded),
196 'linesinfilesadded': pronk(linesinfilesadded),
197 'dirsadded': pronk(dirsadded),
197 'dirsadded': pronk(dirsadded),
198 'filesremoved': pronk(filesremoved),
198 'filesremoved': pronk(filesremoved),
199 'linelengths': pronk(linelengths),
199 'linelengths': pronk(linelengths),
200 'parents': pronk(parents),
200 'parents': pronk(parents),
201 'p1distance': pronk(p1distance),
201 'p1distance': pronk(p1distance),
202 'p2distance': pronk(p2distance),
202 'p2distance': pronk(p2distance),
203 'interarrival': pronk(interarrival),
203 'interarrival': pronk(interarrival),
204 'tzoffset': pronk(tzoffset),
204 'tzoffset': pronk(tzoffset),
205 },
205 },
206 fp)
206 fp)
207 fp.close()
207 fp.close()
208
208
209 @command('synthesize',
209 @command('synthesize',
210 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
210 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
211 ('', 'dict', '', _('path to a dictionary of words'), _('FILE'))],
211 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
212 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
212 _('hg synthesize [OPTION].. DESCFILE'))
213 _('hg synthesize [OPTION].. DESCFILE'))
213 def synthesize(ui, repo, descpath, **opts):
214 def synthesize(ui, repo, descpath, **opts):
214 '''synthesize commits based on a model of an existing repository
215 '''synthesize commits based on a model of an existing repository
215
216
216 The model must have been generated by :hg:`analyze`. Commits will
217 The model must have been generated by :hg:`analyze`. Commits will
217 be generated randomly according to the probabilities described in
218 be generated randomly according to the probabilities described in
218 the model.
219 the model. If --initfiles is set, the repository will be seeded with
220 the given number files following the modeled repository's directory
221 structure.
219
222
220 When synthesizing new content, commit descriptions, and user
223 When synthesizing new content, commit descriptions, and user
221 names, words will be chosen randomly from a dictionary that is
224 names, words will be chosen randomly from a dictionary that is
222 presumed to contain one word per line. Use --dict to specify the
225 presumed to contain one word per line. Use --dict to specify the
223 path to an alternate dictionary to use.
226 path to an alternate dictionary to use.
224 '''
227 '''
225 try:
228 try:
226 fp = hg.openpath(ui, descpath)
229 fp = hg.openpath(ui, descpath)
227 except Exception, err:
230 except Exception, err:
228 raise util.Abort('%s: %s' % (descpath, err[0].strerror))
231 raise util.Abort('%s: %s' % (descpath, err[0].strerror))
229 desc = json.load(fp)
232 desc = json.load(fp)
230 fp.close()
233 fp.close()
231
234
232 def cdf(l):
235 def cdf(l):
233 if not l:
236 if not l:
234 return [], []
237 return [], []
235 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
238 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
236 t = float(sum(probs, 0))
239 t = float(sum(probs, 0))
237 s, cdfs = 0, []
240 s, cdfs = 0, []
238 for v in probs:
241 for v in probs:
239 s += v
242 s += v
240 cdfs.append(s / t)
243 cdfs.append(s / t)
241 return vals, cdfs
244 return vals, cdfs
242
245
243 lineschanged = cdf(desc['lineschanged'])
246 lineschanged = cdf(desc['lineschanged'])
244 fileschanged = cdf(desc['fileschanged'])
247 fileschanged = cdf(desc['fileschanged'])
245 filesadded = cdf(desc['filesadded'])
248 filesadded = cdf(desc['filesadded'])
246 dirsadded = cdf(desc['dirsadded'])
249 dirsadded = cdf(desc['dirsadded'])
247 filesremoved = cdf(desc['filesremoved'])
250 filesremoved = cdf(desc['filesremoved'])
248 linelengths = cdf(desc['linelengths'])
251 linelengths = cdf(desc['linelengths'])
249 parents = cdf(desc['parents'])
252 parents = cdf(desc['parents'])
250 p1distance = cdf(desc['p1distance'])
253 p1distance = cdf(desc['p1distance'])
251 p2distance = cdf(desc['p2distance'])
254 p2distance = cdf(desc['p2distance'])
252 interarrival = cdf(desc['interarrival'])
255 interarrival = cdf(desc['interarrival'])
253 linesinfilesadded = cdf(desc['linesinfilesadded'])
256 linesinfilesadded = cdf(desc['linesinfilesadded'])
254 tzoffset = cdf(desc['tzoffset'])
257 tzoffset = cdf(desc['tzoffset'])
255
258
256 dictfile = opts.get('dict') or '/usr/share/dict/words'
259 dictfile = opts.get('dict') or '/usr/share/dict/words'
257 try:
260 try:
258 fp = open(dictfile, 'rU')
261 fp = open(dictfile, 'rU')
259 except IOError, err:
262 except IOError, err:
260 raise util.Abort('%s: %s' % (dictfile, err.strerror))
263 raise util.Abort('%s: %s' % (dictfile, err.strerror))
261 words = fp.read().splitlines()
264 words = fp.read().splitlines()
262 fp.close()
265 fp.close()
263
266
267 initdirs = {}
268 if desc['initdirs']:
269 for k, v in desc['initdirs']:
270 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
271 initdirs = renamedirs(initdirs, words)
272 initdirscdf = cdf(initdirs)
273
264 def pick(cdf):
274 def pick(cdf):
265 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
275 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
266
276
277 def pickpath():
278 return os.path.join(pick(initdirscdf), random.choice(words))
279
267 def makeline(minimum=0):
280 def makeline(minimum=0):
268 total = max(minimum, pick(linelengths))
281 total = max(minimum, pick(linelengths))
269 c, l = 0, []
282 c, l = 0, []
270 while c < total:
283 while c < total:
271 w = random.choice(words)
284 w = random.choice(words)
272 c += len(w) + 1
285 c += len(w) + 1
273 l.append(w)
286 l.append(w)
274 return ' '.join(l)
287 return ' '.join(l)
275
288
276 wlock = repo.wlock()
289 wlock = repo.wlock()
277 lock = repo.lock()
290 lock = repo.lock()
278
291
279 nevertouch = set(('.hgsub', '.hgignore', '.hgtags'))
292 nevertouch = set(('.hgsub', '.hgignore', '.hgtags'))
280
293
281 progress = ui.progress
294 progress = ui.progress
282 _synthesizing = _('synthesizing')
295 _synthesizing = _('synthesizing')
296 _files = _('initial files')
283 _changesets = _('changesets')
297 _changesets = _('changesets')
284
298
299 # Synthesize a single initial revision adding files to the repo according
300 # to the modeled directory structure.
301 initcount = int(opts['initfiles'])
302 if initcount and initdirs:
303 pctx = repo[None].parents()[0]
304 files = {}
305 for i in xrange(0, initcount):
306 ui.progress(_synthesizing, i, unit=_files, total=initcount)
307
308 path = pickpath()
309 while path in pctx.dirs():
310 path = pickpath()
311 data = '%s contents\n' % path
312 files[path] = context.memfilectx(repo, path, data)
313
314 def filectxfn(repo, memctx, path):
315 return files[path]
316
317 ui.progress(_synthesizing, None)
318 message = 'synthesized wide repo with %d files' % (len(files),)
319 mc = context.memctx(repo, [pctx.node(), nullid], message,
320 files.iterkeys(), filectxfn, ui.username(),
321 '%d %d' % util.makedate())
322 initnode = mc.commit()
323 hexfn = ui.debugflag and hex or short
324 ui.status(_('added commit %s with %d files\n')
325 % (hexfn(initnode), len(files)))
326
327 # Synthesize incremental revisions to the repository, adding repo depth.
285 count = int(opts['count'])
328 count = int(opts['count'])
286 heads = set(map(repo.changelog.rev, repo.heads()))
329 heads = set(map(repo.changelog.rev, repo.heads()))
287 for i in xrange(count):
330 for i in xrange(count):
288 progress(_synthesizing, i, unit=_changesets, total=count)
331 progress(_synthesizing, i, unit=_changesets, total=count)
289
332
290 node = repo.changelog.node
333 node = repo.changelog.node
291 revs = len(repo)
334 revs = len(repo)
292
335
293 def pickhead(heads, distance):
336 def pickhead(heads, distance):
294 if heads:
337 if heads:
295 lheads = sorted(heads)
338 lheads = sorted(heads)
296 rev = revs - min(pick(distance), revs)
339 rev = revs - min(pick(distance), revs)
297 if rev < lheads[-1]:
340 if rev < lheads[-1]:
298 rev = lheads[bisect.bisect_left(lheads, rev)]
341 rev = lheads[bisect.bisect_left(lheads, rev)]
299 else:
342 else:
300 rev = lheads[-1]
343 rev = lheads[-1]
301 return rev, node(rev)
344 return rev, node(rev)
302 return nullrev, nullid
345 return nullrev, nullid
303
346
304 r1 = revs - min(pick(p1distance), revs)
347 r1 = revs - min(pick(p1distance), revs)
305 p1 = node(r1)
348 p1 = node(r1)
306
349
307 # the number of heads will grow without bound if we use a pure
350 # the number of heads will grow without bound if we use a pure
308 # model, so artificially constrain their proliferation
351 # model, so artificially constrain their proliferation
309 toomanyheads = len(heads) > random.randint(1, 20)
352 toomanyheads = len(heads) > random.randint(1, 20)
310 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
353 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
311 r2, p2 = pickhead(heads.difference([r1]), p2distance)
354 r2, p2 = pickhead(heads.difference([r1]), p2distance)
312 else:
355 else:
313 r2, p2 = nullrev, nullid
356 r2, p2 = nullrev, nullid
314
357
315 pl = [p1, p2]
358 pl = [p1, p2]
316 pctx = repo[r1]
359 pctx = repo[r1]
317 mf = pctx.manifest()
360 mf = pctx.manifest()
318 mfk = mf.keys()
361 mfk = mf.keys()
319 changes = {}
362 changes = {}
320 if mfk:
363 if mfk:
321 for __ in xrange(pick(fileschanged)):
364 for __ in xrange(pick(fileschanged)):
322 for __ in xrange(10):
365 for __ in xrange(10):
323 fctx = pctx.filectx(random.choice(mfk))
366 fctx = pctx.filectx(random.choice(mfk))
324 path = fctx.path()
367 path = fctx.path()
325 if not (path in nevertouch or fctx.isbinary() or
368 if not (path in nevertouch or fctx.isbinary() or
326 'l' in fctx.flags()):
369 'l' in fctx.flags()):
327 break
370 break
328 lines = fctx.data().splitlines()
371 lines = fctx.data().splitlines()
329 add, remove = pick(lineschanged)
372 add, remove = pick(lineschanged)
330 for __ in xrange(remove):
373 for __ in xrange(remove):
331 if not lines:
374 if not lines:
332 break
375 break
333 del lines[random.randrange(0, len(lines))]
376 del lines[random.randrange(0, len(lines))]
334 for __ in xrange(add):
377 for __ in xrange(add):
335 lines.insert(random.randint(0, len(lines)), makeline())
378 lines.insert(random.randint(0, len(lines)), makeline())
336 path = fctx.path()
379 path = fctx.path()
337 changes[path] = context.memfilectx(repo, path,
380 changes[path] = context.memfilectx(repo, path,
338 '\n'.join(lines) + '\n')
381 '\n'.join(lines) + '\n')
339 for __ in xrange(pick(filesremoved)):
382 for __ in xrange(pick(filesremoved)):
340 path = random.choice(mfk)
383 path = random.choice(mfk)
341 for __ in xrange(10):
384 for __ in xrange(10):
342 path = random.choice(mfk)
385 path = random.choice(mfk)
343 if path not in changes:
386 if path not in changes:
344 changes[path] = None
387 changes[path] = None
345 break
388 break
346 if filesadded:
389 if filesadded:
347 dirs = list(pctx.dirs())
390 dirs = list(pctx.dirs())
348 dirs.append('')
391 dirs.append('')
349 for __ in xrange(pick(filesadded)):
392 for __ in xrange(pick(filesadded)):
350 path = [random.choice(dirs)]
393 path = [random.choice(dirs)]
351 if pick(dirsadded):
394 if pick(dirsadded):
352 path.append(random.choice(words))
395 path.append(random.choice(words))
353 path.append(random.choice(words))
396 path.append(random.choice(words))
354 path = '/'.join(filter(None, path))
397 path = '/'.join(filter(None, path))
355 data = '\n'.join(makeline()
398 data = '\n'.join(makeline()
356 for __ in xrange(pick(linesinfilesadded))) + '\n'
399 for __ in xrange(pick(linesinfilesadded))) + '\n'
357 changes[path] = context.memfilectx(repo, path, data)
400 changes[path] = context.memfilectx(repo, path, data)
358 def filectxfn(repo, memctx, path):
401 def filectxfn(repo, memctx, path):
359 return changes[path]
402 return changes[path]
360 if not changes:
403 if not changes:
361 continue
404 continue
362 if revs:
405 if revs:
363 date = repo['tip'].date()[0] + pick(interarrival)
406 date = repo['tip'].date()[0] + pick(interarrival)
364 else:
407 else:
365 date = time.time() - (86400 * count)
408 date = time.time() - (86400 * count)
366 user = random.choice(words) + '@' + random.choice(words)
409 user = random.choice(words) + '@' + random.choice(words)
367 mc = context.memctx(repo, pl, makeline(minimum=2),
410 mc = context.memctx(repo, pl, makeline(minimum=2),
368 sorted(changes.iterkeys()),
411 sorted(changes.iterkeys()),
369 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
412 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
370 newnode = mc.commit()
413 newnode = mc.commit()
371 heads.add(repo.changelog.rev(newnode))
414 heads.add(repo.changelog.rev(newnode))
372 heads.discard(r1)
415 heads.discard(r1)
373 heads.discard(r2)
416 heads.discard(r2)
374
417
375 lock.release()
418 lock.release()
376 wlock.release()
419 wlock.release()
420
421 def renamedirs(dirs, words):
422 '''Randomly rename the directory names in the per-dir file count dict.'''
423 wordgen = itertools.cycle(words)
424 replacements = {'': ''}
425 def rename(dirpath):
426 '''Recursively rename the directory and all path prefixes.
427
428 The mapping from path to renamed path is stored for all path prefixes
429 as in dynamic programming, ensuring linear runtime and consistent
430 renaming regardless of iteration order through the model.
431 '''
432 if dirpath in replacements:
433 return replacements[dirpath]
434 head, _ = os.path.split(dirpath)
435 head = head and rename(head) or ''
436 renamed = os.path.join(head, wordgen.next())
437 replacements[dirpath] = renamed
438 return renamed
439 result = []
440 for dirpath, count in dirs.iteritems():
441 result.append([rename(dirpath.lstrip(os.sep)), count])
442 return result
General Comments 0
You need to be logged in to leave comments. Login now