##// END OF EJS Templates
contrib/synthrepo: walk a repo's directory structure during analysis...
Mike Edgar -
r22709:889789a2 default
parent child Browse files
Show More
@@ -1,442 +1,464
1 # synthrepo.py - repo synthesis
1 # synthrepo.py - repo synthesis
2 #
2 #
3 # Copyright 2012 Facebook
3 # Copyright 2012 Facebook
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''synthesize structurally interesting change history
8 '''synthesize structurally interesting change history
9
9
10 This extension is useful for creating a repository with properties
10 This extension is useful for creating a repository with properties
11 that are statistically similar to an existing repository. During
11 that are statistically similar to an existing repository. During
12 analysis, a simple probability table is constructed from the history
12 analysis, a simple probability table is constructed from the history
13 of an existing repository. During synthesis, these properties are
13 of an existing repository. During synthesis, these properties are
14 reconstructed.
14 reconstructed.
15
15
16 Properties that are analyzed and synthesized include the following:
16 Properties that are analyzed and synthesized include the following:
17
17
18 - Lines added or removed when an existing file is modified
18 - Lines added or removed when an existing file is modified
19 - Number and sizes of files added
19 - Number and sizes of files added
20 - Number of files removed
20 - Number of files removed
21 - Line lengths
21 - Line lengths
22 - Topological distance to parent changeset(s)
22 - Topological distance to parent changeset(s)
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26 - Number of files in each directory
26
27
27 A few obvious properties that are not currently handled realistically:
28 A few obvious properties that are not currently handled realistically:
28
29
29 - Merges are treated as regular commits with two parents, which is not
30 - Merges are treated as regular commits with two parents, which is not
30 realistic
31 realistic
31 - Modifications are not treated as operations on hunks of lines, but
32 - Modifications are not treated as operations on hunks of lines, but
32 as insertions and deletions of randomly chosen single lines
33 as insertions and deletions of randomly chosen single lines
33 - Committer ID (always random)
34 - Committer ID (always random)
34 - Executability of files
35 - Executability of files
35 - Symlinks and binary files are ignored
36 - Symlinks and binary files are ignored
36 '''
37 '''
37
38
38 import bisect, collections, itertools, json, os, random, time, sys
39 import bisect, collections, itertools, json, os, random, time, sys
39 from mercurial import cmdutil, context, patch, scmutil, util, hg
40 from mercurial import cmdutil, context, patch, scmutil, util, hg
40 from mercurial.i18n import _
41 from mercurial.i18n import _
41 from mercurial.node import nullrev, nullid, short
42 from mercurial.node import nullrev, nullid, short
42
43
43 testedwith = 'internal'
44 testedwith = 'internal'
44
45
45 cmdtable = {}
46 cmdtable = {}
46 command = cmdutil.command(cmdtable)
47 command = cmdutil.command(cmdtable)
47
48
48 newfile = set(('new fi', 'rename', 'copy f', 'copy t'))
49 newfile = set(('new fi', 'rename', 'copy f', 'copy t'))
49
50
50 def zerodict():
51 def zerodict():
51 return collections.defaultdict(lambda: 0)
52 return collections.defaultdict(lambda: 0)
52
53
53 def roundto(x, k):
54 def roundto(x, k):
54 if x > k * 2:
55 if x > k * 2:
55 return int(round(x / float(k)) * k)
56 return int(round(x / float(k)) * k)
56 return int(round(x))
57 return int(round(x))
57
58
58 def parsegitdiff(lines):
59 def parsegitdiff(lines):
59 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
60 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
60 binary = False
61 binary = False
61 for line in lines:
62 for line in lines:
62 start = line[:6]
63 start = line[:6]
63 if start == 'diff -':
64 if start == 'diff -':
64 if filename:
65 if filename:
65 yield filename, mar, lineadd, lineremove, binary
66 yield filename, mar, lineadd, lineremove, binary
66 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
67 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
67 filename = patch.gitre.match(line).group(1)
68 filename = patch.gitre.match(line).group(1)
68 elif start in newfile:
69 elif start in newfile:
69 mar = 'a'
70 mar = 'a'
70 elif start == 'GIT bi':
71 elif start == 'GIT bi':
71 binary = True
72 binary = True
72 elif start == 'delete':
73 elif start == 'delete':
73 mar = 'r'
74 mar = 'r'
74 elif start:
75 elif start:
75 s = start[0]
76 s = start[0]
76 if s == '-' and not line.startswith('--- '):
77 if s == '-' and not line.startswith('--- '):
77 lineremove += 1
78 lineremove += 1
78 elif s == '+' and not line.startswith('+++ '):
79 elif s == '+' and not line.startswith('+++ '):
79 lineadd[roundto(len(line) - 1, 5)] += 1
80 lineadd[roundto(len(line) - 1, 5)] += 1
80 if filename:
81 if filename:
81 yield filename, mar, lineadd, lineremove, binary
82 yield filename, mar, lineadd, lineremove, binary
82
83
83 @command('analyze',
84 @command('analyze',
84 [('o', 'output', [], _('write output to given file'), _('FILE')),
85 [('o', 'output', '', _('write output to given file'), _('FILE')),
85 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
86 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
86 _('hg analyze'))
87 _('hg analyze'), optionalrepo=True)
87 def analyze(ui, repo, *revs, **opts):
88 def analyze(ui, repo, *revs, **opts):
88 '''create a simple model of a repository to use for later synthesis
89 '''create a simple model of a repository to use for later synthesis
89
90
90 This command examines every changeset in the given range (or all
91 This command examines every changeset in the given range (or all
91 of history if none are specified) and creates a simple statistical
92 of history if none are specified) and creates a simple statistical
92 model of the history of the repository.
93 model of the history of the repository. It also measures the directory
94 structure of the repository as checked out.
93
95
94 The model is written out to a JSON file, and can be used by
96 The model is written out to a JSON file, and can be used by
95 :hg:`synthesize` to create or augment a repository with synthetic
97 :hg:`synthesize` to create or augment a repository with synthetic
96 commits that have a structure that is statistically similar to the
98 commits that have a structure that is statistically similar to the
97 analyzed repository.
99 analyzed repository.
98 '''
100 '''
101 root = repo.root
102 if not root.endswith(os.path.sep):
103 root += os.path.sep
99
104
100 revs = list(revs)
105 revs = list(revs)
101 revs.extend(opts['rev'])
106 revs.extend(opts['rev'])
102 if not revs:
107 if not revs:
103 revs = [':']
108 revs = [':']
104
109
105 output = opts['output']
110 output = opts['output']
106 if not output:
111 if not output:
107 output = os.path.basename(repo.root) + '.json'
112 output = os.path.basename(root) + '.json'
108
113
109 if output == '-':
114 if output == '-':
110 fp = sys.stdout
115 fp = sys.stdout
111 else:
116 else:
112 fp = open(output, 'w')
117 fp = open(output, 'w')
113
118
114 revs = scmutil.revrange(repo, revs)
119 # Always obtain file counts of each directory in the given root directory.
115 revs.sort()
120 def onerror(e):
121 ui.warn(_('error walking directory structure: %s\n') % e)
122
123 dirs = {}
124 rootprefixlen = len(root)
125 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
126 dirpathfromroot = dirpath[rootprefixlen:]
127 dirs[dirpathfromroot] = len(filenames)
128 if '.hg' in dirnames:
129 dirnames.remove('.hg')
116
130
117 lineschanged = zerodict()
131 lineschanged = zerodict()
118 children = zerodict()
132 children = zerodict()
119 p1distance = zerodict()
133 p1distance = zerodict()
120 p2distance = zerodict()
134 p2distance = zerodict()
121 linesinfilesadded = zerodict()
135 linesinfilesadded = zerodict()
122 fileschanged = zerodict()
136 fileschanged = zerodict()
123 filesadded = zerodict()
137 filesadded = zerodict()
124 filesremoved = zerodict()
138 filesremoved = zerodict()
125 linelengths = zerodict()
139 linelengths = zerodict()
126 interarrival = zerodict()
140 interarrival = zerodict()
127 parents = zerodict()
141 parents = zerodict()
128 dirsadded = zerodict()
142 dirsadded = zerodict()
129 tzoffset = zerodict()
143 tzoffset = zerodict()
130
144
131 progress = ui.progress
145 # If a mercurial repo is available, also model the commit history.
132 _analyzing = _('analyzing')
146 if repo:
133 _changesets = _('changesets')
147 revs = scmutil.revrange(repo, revs)
134 _total = len(revs)
148 revs.sort()
149
150 progress = ui.progress
151 _analyzing = _('analyzing')
152 _changesets = _('changesets')
153 _total = len(revs)
135
154
136 for i, rev in enumerate(revs):
155 for i, rev in enumerate(revs):
137 progress(_analyzing, i, unit=_changesets, total=_total)
156 progress(_analyzing, i, unit=_changesets, total=_total)
138 ctx = repo[rev]
157 ctx = repo[rev]
139 pl = ctx.parents()
158 pl = ctx.parents()
140 pctx = pl[0]
159 pctx = pl[0]
141 prev = pctx.rev()
160 prev = pctx.rev()
142 children[prev] += 1
161 children[prev] += 1
143 p1distance[rev - prev] += 1
162 p1distance[rev - prev] += 1
144 parents[len(pl)] += 1
163 parents[len(pl)] += 1
145 tzoffset[ctx.date()[1]] += 1
164 tzoffset[ctx.date()[1]] += 1
146 if len(pl) > 1:
165 if len(pl) > 1:
147 p2distance[rev - pl[1].rev()] += 1
166 p2distance[rev - pl[1].rev()] += 1
148 if prev == rev - 1:
167 if prev == rev - 1:
149 lastctx = pctx
168 lastctx = pctx
150 else:
169 else:
151 lastctx = repo[rev - 1]
170 lastctx = repo[rev - 1]
152 if lastctx.rev() != nullrev:
171 if lastctx.rev() != nullrev:
153 interarrival[roundto(ctx.date()[0] - lastctx.date()[0], 300)] += 1
172 timedelta = ctx.date()[0] - lastctx.date()[0]
154 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
173 interarrival[roundto(timedelta, 300)] += 1
155 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
174 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
156 for filename, mar, lineadd, lineremove, binary in parsegitdiff(diff):
175 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
157 if binary:
176 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
158 continue
177 if isbin:
159 added = sum(lineadd.itervalues(), 0)
178 continue
160 if mar == 'm':
179 added = sum(lineadd.itervalues(), 0)
161 if added and lineremove:
180 if mar == 'm':
162 lineschanged[roundto(added, 5), roundto(lineremove, 5)] += 1
181 if added and lineremove:
163 filechanges += 1
182 lineschanged[roundto(added, 5),
164 elif mar == 'a':
183 roundto(lineremove, 5)] += 1
165 fileadds += 1
184 filechanges += 1
166 if '/' in filename:
185 elif mar == 'a':
167 filedir = filename.rsplit('/', 1)[0]
186 fileadds += 1
168 if filedir not in pctx.dirs():
187 if '/' in filename:
169 diradds += 1
188 filedir = filename.rsplit('/', 1)[0]
170 linesinfilesadded[roundto(added, 5)] += 1
189 if filedir not in pctx.dirs():
171 elif mar == 'r':
190 diradds += 1
172 fileremoves += 1
191 linesinfilesadded[roundto(added, 5)] += 1
173 for length, count in lineadd.iteritems():
192 elif mar == 'r':
174 linelengths[length] += count
193 fileremoves += 1
175 fileschanged[filechanges] += 1
194 for length, count in lineadd.iteritems():
176 filesadded[fileadds] += 1
195 linelengths[length] += count
177 dirsadded[diradds] += 1
196 fileschanged[filechanges] += 1
178 filesremoved[fileremoves] += 1
197 filesadded[fileadds] += 1
198 dirsadded[diradds] += 1
199 filesremoved[fileremoves] += 1
179
200
180 invchildren = zerodict()
201 invchildren = zerodict()
181
202
182 for rev, count in children.iteritems():
203 for rev, count in children.iteritems():
183 invchildren[count] += 1
204 invchildren[count] += 1
184
205
185 if output != '-':
206 if output != '-':
186 ui.status(_('writing output to %s\n') % output)
207 ui.status(_('writing output to %s\n') % output)
187
208
188 def pronk(d):
209 def pronk(d):
189 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
210 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
190
211
191 json.dump({'revs': len(revs),
212 json.dump({'revs': len(revs),
213 'initdirs': pronk(dirs),
192 'lineschanged': pronk(lineschanged),
214 'lineschanged': pronk(lineschanged),
193 'children': pronk(invchildren),
215 'children': pronk(invchildren),
194 'fileschanged': pronk(fileschanged),
216 'fileschanged': pronk(fileschanged),
195 'filesadded': pronk(filesadded),
217 'filesadded': pronk(filesadded),
196 'linesinfilesadded': pronk(linesinfilesadded),
218 'linesinfilesadded': pronk(linesinfilesadded),
197 'dirsadded': pronk(dirsadded),
219 'dirsadded': pronk(dirsadded),
198 'filesremoved': pronk(filesremoved),
220 'filesremoved': pronk(filesremoved),
199 'linelengths': pronk(linelengths),
221 'linelengths': pronk(linelengths),
200 'parents': pronk(parents),
222 'parents': pronk(parents),
201 'p1distance': pronk(p1distance),
223 'p1distance': pronk(p1distance),
202 'p2distance': pronk(p2distance),
224 'p2distance': pronk(p2distance),
203 'interarrival': pronk(interarrival),
225 'interarrival': pronk(interarrival),
204 'tzoffset': pronk(tzoffset),
226 'tzoffset': pronk(tzoffset),
205 },
227 },
206 fp)
228 fp)
207 fp.close()
229 fp.close()
208
230
209 @command('synthesize',
231 @command('synthesize',
210 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
232 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
211 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
233 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
212 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
234 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
213 _('hg synthesize [OPTION].. DESCFILE'))
235 _('hg synthesize [OPTION].. DESCFILE'))
214 def synthesize(ui, repo, descpath, **opts):
236 def synthesize(ui, repo, descpath, **opts):
215 '''synthesize commits based on a model of an existing repository
237 '''synthesize commits based on a model of an existing repository
216
238
217 The model must have been generated by :hg:`analyze`. Commits will
239 The model must have been generated by :hg:`analyze`. Commits will
218 be generated randomly according to the probabilities described in
240 be generated randomly according to the probabilities described in
219 the model. If --initfiles is set, the repository will be seeded with
241 the model. If --initfiles is set, the repository will be seeded with
220 the given number files following the modeled repository's directory
242 the given number files following the modeled repository's directory
221 structure.
243 structure.
222
244
223 When synthesizing new content, commit descriptions, and user
245 When synthesizing new content, commit descriptions, and user
224 names, words will be chosen randomly from a dictionary that is
246 names, words will be chosen randomly from a dictionary that is
225 presumed to contain one word per line. Use --dict to specify the
247 presumed to contain one word per line. Use --dict to specify the
226 path to an alternate dictionary to use.
248 path to an alternate dictionary to use.
227 '''
249 '''
228 try:
250 try:
229 fp = hg.openpath(ui, descpath)
251 fp = hg.openpath(ui, descpath)
230 except Exception, err:
252 except Exception, err:
231 raise util.Abort('%s: %s' % (descpath, err[0].strerror))
253 raise util.Abort('%s: %s' % (descpath, err[0].strerror))
232 desc = json.load(fp)
254 desc = json.load(fp)
233 fp.close()
255 fp.close()
234
256
235 def cdf(l):
257 def cdf(l):
236 if not l:
258 if not l:
237 return [], []
259 return [], []
238 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
260 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
239 t = float(sum(probs, 0))
261 t = float(sum(probs, 0))
240 s, cdfs = 0, []
262 s, cdfs = 0, []
241 for v in probs:
263 for v in probs:
242 s += v
264 s += v
243 cdfs.append(s / t)
265 cdfs.append(s / t)
244 return vals, cdfs
266 return vals, cdfs
245
267
246 lineschanged = cdf(desc['lineschanged'])
268 lineschanged = cdf(desc['lineschanged'])
247 fileschanged = cdf(desc['fileschanged'])
269 fileschanged = cdf(desc['fileschanged'])
248 filesadded = cdf(desc['filesadded'])
270 filesadded = cdf(desc['filesadded'])
249 dirsadded = cdf(desc['dirsadded'])
271 dirsadded = cdf(desc['dirsadded'])
250 filesremoved = cdf(desc['filesremoved'])
272 filesremoved = cdf(desc['filesremoved'])
251 linelengths = cdf(desc['linelengths'])
273 linelengths = cdf(desc['linelengths'])
252 parents = cdf(desc['parents'])
274 parents = cdf(desc['parents'])
253 p1distance = cdf(desc['p1distance'])
275 p1distance = cdf(desc['p1distance'])
254 p2distance = cdf(desc['p2distance'])
276 p2distance = cdf(desc['p2distance'])
255 interarrival = cdf(desc['interarrival'])
277 interarrival = cdf(desc['interarrival'])
256 linesinfilesadded = cdf(desc['linesinfilesadded'])
278 linesinfilesadded = cdf(desc['linesinfilesadded'])
257 tzoffset = cdf(desc['tzoffset'])
279 tzoffset = cdf(desc['tzoffset'])
258
280
259 dictfile = opts.get('dict') or '/usr/share/dict/words'
281 dictfile = opts.get('dict') or '/usr/share/dict/words'
260 try:
282 try:
261 fp = open(dictfile, 'rU')
283 fp = open(dictfile, 'rU')
262 except IOError, err:
284 except IOError, err:
263 raise util.Abort('%s: %s' % (dictfile, err.strerror))
285 raise util.Abort('%s: %s' % (dictfile, err.strerror))
264 words = fp.read().splitlines()
286 words = fp.read().splitlines()
265 fp.close()
287 fp.close()
266
288
267 initdirs = {}
289 initdirs = {}
268 if desc['initdirs']:
290 if desc['initdirs']:
269 for k, v in desc['initdirs']:
291 for k, v in desc['initdirs']:
270 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
292 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
271 initdirs = renamedirs(initdirs, words)
293 initdirs = renamedirs(initdirs, words)
272 initdirscdf = cdf(initdirs)
294 initdirscdf = cdf(initdirs)
273
295
274 def pick(cdf):
296 def pick(cdf):
275 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
297 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
276
298
277 def pickpath():
299 def pickpath():
278 return os.path.join(pick(initdirscdf), random.choice(words))
300 return os.path.join(pick(initdirscdf), random.choice(words))
279
301
280 def makeline(minimum=0):
302 def makeline(minimum=0):
281 total = max(minimum, pick(linelengths))
303 total = max(minimum, pick(linelengths))
282 c, l = 0, []
304 c, l = 0, []
283 while c < total:
305 while c < total:
284 w = random.choice(words)
306 w = random.choice(words)
285 c += len(w) + 1
307 c += len(w) + 1
286 l.append(w)
308 l.append(w)
287 return ' '.join(l)
309 return ' '.join(l)
288
310
289 wlock = repo.wlock()
311 wlock = repo.wlock()
290 lock = repo.lock()
312 lock = repo.lock()
291
313
292 nevertouch = set(('.hgsub', '.hgignore', '.hgtags'))
314 nevertouch = set(('.hgsub', '.hgignore', '.hgtags'))
293
315
294 progress = ui.progress
316 progress = ui.progress
295 _synthesizing = _('synthesizing')
317 _synthesizing = _('synthesizing')
296 _files = _('initial files')
318 _files = _('initial files')
297 _changesets = _('changesets')
319 _changesets = _('changesets')
298
320
299 # Synthesize a single initial revision adding files to the repo according
321 # Synthesize a single initial revision adding files to the repo according
300 # to the modeled directory structure.
322 # to the modeled directory structure.
301 initcount = int(opts['initfiles'])
323 initcount = int(opts['initfiles'])
302 if initcount and initdirs:
324 if initcount and initdirs:
303 pctx = repo[None].parents()[0]
325 pctx = repo[None].parents()[0]
304 files = {}
326 files = {}
305 for i in xrange(0, initcount):
327 for i in xrange(0, initcount):
306 ui.progress(_synthesizing, i, unit=_files, total=initcount)
328 ui.progress(_synthesizing, i, unit=_files, total=initcount)
307
329
308 path = pickpath()
330 path = pickpath()
309 while path in pctx.dirs():
331 while path in pctx.dirs():
310 path = pickpath()
332 path = pickpath()
311 data = '%s contents\n' % path
333 data = '%s contents\n' % path
312 files[path] = context.memfilectx(repo, path, data)
334 files[path] = context.memfilectx(repo, path, data)
313
335
314 def filectxfn(repo, memctx, path):
336 def filectxfn(repo, memctx, path):
315 return files[path]
337 return files[path]
316
338
317 ui.progress(_synthesizing, None)
339 ui.progress(_synthesizing, None)
318 message = 'synthesized wide repo with %d files' % (len(files),)
340 message = 'synthesized wide repo with %d files' % (len(files),)
319 mc = context.memctx(repo, [pctx.node(), nullid], message,
341 mc = context.memctx(repo, [pctx.node(), nullid], message,
320 files.iterkeys(), filectxfn, ui.username(),
342 files.iterkeys(), filectxfn, ui.username(),
321 '%d %d' % util.makedate())
343 '%d %d' % util.makedate())
322 initnode = mc.commit()
344 initnode = mc.commit()
323 hexfn = ui.debugflag and hex or short
345 hexfn = ui.debugflag and hex or short
324 ui.status(_('added commit %s with %d files\n')
346 ui.status(_('added commit %s with %d files\n')
325 % (hexfn(initnode), len(files)))
347 % (hexfn(initnode), len(files)))
326
348
327 # Synthesize incremental revisions to the repository, adding repo depth.
349 # Synthesize incremental revisions to the repository, adding repo depth.
328 count = int(opts['count'])
350 count = int(opts['count'])
329 heads = set(map(repo.changelog.rev, repo.heads()))
351 heads = set(map(repo.changelog.rev, repo.heads()))
330 for i in xrange(count):
352 for i in xrange(count):
331 progress(_synthesizing, i, unit=_changesets, total=count)
353 progress(_synthesizing, i, unit=_changesets, total=count)
332
354
333 node = repo.changelog.node
355 node = repo.changelog.node
334 revs = len(repo)
356 revs = len(repo)
335
357
336 def pickhead(heads, distance):
358 def pickhead(heads, distance):
337 if heads:
359 if heads:
338 lheads = sorted(heads)
360 lheads = sorted(heads)
339 rev = revs - min(pick(distance), revs)
361 rev = revs - min(pick(distance), revs)
340 if rev < lheads[-1]:
362 if rev < lheads[-1]:
341 rev = lheads[bisect.bisect_left(lheads, rev)]
363 rev = lheads[bisect.bisect_left(lheads, rev)]
342 else:
364 else:
343 rev = lheads[-1]
365 rev = lheads[-1]
344 return rev, node(rev)
366 return rev, node(rev)
345 return nullrev, nullid
367 return nullrev, nullid
346
368
347 r1 = revs - min(pick(p1distance), revs)
369 r1 = revs - min(pick(p1distance), revs)
348 p1 = node(r1)
370 p1 = node(r1)
349
371
350 # the number of heads will grow without bound if we use a pure
372 # the number of heads will grow without bound if we use a pure
351 # model, so artificially constrain their proliferation
373 # model, so artificially constrain their proliferation
352 toomanyheads = len(heads) > random.randint(1, 20)
374 toomanyheads = len(heads) > random.randint(1, 20)
353 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
375 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
354 r2, p2 = pickhead(heads.difference([r1]), p2distance)
376 r2, p2 = pickhead(heads.difference([r1]), p2distance)
355 else:
377 else:
356 r2, p2 = nullrev, nullid
378 r2, p2 = nullrev, nullid
357
379
358 pl = [p1, p2]
380 pl = [p1, p2]
359 pctx = repo[r1]
381 pctx = repo[r1]
360 mf = pctx.manifest()
382 mf = pctx.manifest()
361 mfk = mf.keys()
383 mfk = mf.keys()
362 changes = {}
384 changes = {}
363 if mfk:
385 if mfk:
364 for __ in xrange(pick(fileschanged)):
386 for __ in xrange(pick(fileschanged)):
365 for __ in xrange(10):
387 for __ in xrange(10):
366 fctx = pctx.filectx(random.choice(mfk))
388 fctx = pctx.filectx(random.choice(mfk))
367 path = fctx.path()
389 path = fctx.path()
368 if not (path in nevertouch or fctx.isbinary() or
390 if not (path in nevertouch or fctx.isbinary() or
369 'l' in fctx.flags()):
391 'l' in fctx.flags()):
370 break
392 break
371 lines = fctx.data().splitlines()
393 lines = fctx.data().splitlines()
372 add, remove = pick(lineschanged)
394 add, remove = pick(lineschanged)
373 for __ in xrange(remove):
395 for __ in xrange(remove):
374 if not lines:
396 if not lines:
375 break
397 break
376 del lines[random.randrange(0, len(lines))]
398 del lines[random.randrange(0, len(lines))]
377 for __ in xrange(add):
399 for __ in xrange(add):
378 lines.insert(random.randint(0, len(lines)), makeline())
400 lines.insert(random.randint(0, len(lines)), makeline())
379 path = fctx.path()
401 path = fctx.path()
380 changes[path] = context.memfilectx(repo, path,
402 changes[path] = context.memfilectx(repo, path,
381 '\n'.join(lines) + '\n')
403 '\n'.join(lines) + '\n')
382 for __ in xrange(pick(filesremoved)):
404 for __ in xrange(pick(filesremoved)):
383 path = random.choice(mfk)
405 path = random.choice(mfk)
384 for __ in xrange(10):
406 for __ in xrange(10):
385 path = random.choice(mfk)
407 path = random.choice(mfk)
386 if path not in changes:
408 if path not in changes:
387 changes[path] = None
409 changes[path] = None
388 break
410 break
389 if filesadded:
411 if filesadded:
390 dirs = list(pctx.dirs())
412 dirs = list(pctx.dirs())
391 dirs.append('')
413 dirs.append('')
392 for __ in xrange(pick(filesadded)):
414 for __ in xrange(pick(filesadded)):
393 path = [random.choice(dirs)]
415 path = [random.choice(dirs)]
394 if pick(dirsadded):
416 if pick(dirsadded):
395 path.append(random.choice(words))
417 path.append(random.choice(words))
396 path.append(random.choice(words))
418 path.append(random.choice(words))
397 path = '/'.join(filter(None, path))
419 path = '/'.join(filter(None, path))
398 data = '\n'.join(makeline()
420 data = '\n'.join(makeline()
399 for __ in xrange(pick(linesinfilesadded))) + '\n'
421 for __ in xrange(pick(linesinfilesadded))) + '\n'
400 changes[path] = context.memfilectx(repo, path, data)
422 changes[path] = context.memfilectx(repo, path, data)
401 def filectxfn(repo, memctx, path):
423 def filectxfn(repo, memctx, path):
402 return changes[path]
424 return changes[path]
403 if not changes:
425 if not changes:
404 continue
426 continue
405 if revs:
427 if revs:
406 date = repo['tip'].date()[0] + pick(interarrival)
428 date = repo['tip'].date()[0] + pick(interarrival)
407 else:
429 else:
408 date = time.time() - (86400 * count)
430 date = time.time() - (86400 * count)
409 user = random.choice(words) + '@' + random.choice(words)
431 user = random.choice(words) + '@' + random.choice(words)
410 mc = context.memctx(repo, pl, makeline(minimum=2),
432 mc = context.memctx(repo, pl, makeline(minimum=2),
411 sorted(changes.iterkeys()),
433 sorted(changes.iterkeys()),
412 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
434 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
413 newnode = mc.commit()
435 newnode = mc.commit()
414 heads.add(repo.changelog.rev(newnode))
436 heads.add(repo.changelog.rev(newnode))
415 heads.discard(r1)
437 heads.discard(r1)
416 heads.discard(r2)
438 heads.discard(r2)
417
439
418 lock.release()
440 lock.release()
419 wlock.release()
441 wlock.release()
420
442
421 def renamedirs(dirs, words):
443 def renamedirs(dirs, words):
422 '''Randomly rename the directory names in the per-dir file count dict.'''
444 '''Randomly rename the directory names in the per-dir file count dict.'''
423 wordgen = itertools.cycle(words)
445 wordgen = itertools.cycle(words)
424 replacements = {'': ''}
446 replacements = {'': ''}
425 def rename(dirpath):
447 def rename(dirpath):
426 '''Recursively rename the directory and all path prefixes.
448 '''Recursively rename the directory and all path prefixes.
427
449
428 The mapping from path to renamed path is stored for all path prefixes
450 The mapping from path to renamed path is stored for all path prefixes
429 as in dynamic programming, ensuring linear runtime and consistent
451 as in dynamic programming, ensuring linear runtime and consistent
430 renaming regardless of iteration order through the model.
452 renaming regardless of iteration order through the model.
431 '''
453 '''
432 if dirpath in replacements:
454 if dirpath in replacements:
433 return replacements[dirpath]
455 return replacements[dirpath]
434 head, _ = os.path.split(dirpath)
456 head, _ = os.path.split(dirpath)
435 head = head and rename(head) or ''
457 head = head and rename(head) or ''
436 renamed = os.path.join(head, wordgen.next())
458 renamed = os.path.join(head, wordgen.next())
437 replacements[dirpath] = renamed
459 replacements[dirpath] = renamed
438 return renamed
460 return renamed
439 result = []
461 result = []
440 for dirpath, count in dirs.iteritems():
462 for dirpath, count in dirs.iteritems():
441 result.append([rename(dirpath.lstrip(os.sep)), count])
463 result.append([rename(dirpath.lstrip(os.sep)), count])
442 return result
464 return result
General Comments 0
You need to be logged in to leave comments. Login now