##// END OF EJS Templates
synthrepo: new filenames must not also be new directories, and vice-versa...
Mike Edgar -
r23778:a5dbec25 default
parent child Browse files
Show More
@@ -1,468 +1,485 b''
1 # synthrepo.py - repo synthesis
1 # synthrepo.py - repo synthesis
2 #
2 #
3 # Copyright 2012 Facebook
3 # Copyright 2012 Facebook
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''synthesize structurally interesting change history
8 '''synthesize structurally interesting change history
9
9
10 This extension is useful for creating a repository with properties
10 This extension is useful for creating a repository with properties
11 that are statistically similar to an existing repository. During
11 that are statistically similar to an existing repository. During
12 analysis, a simple probability table is constructed from the history
12 analysis, a simple probability table is constructed from the history
13 of an existing repository. During synthesis, these properties are
13 of an existing repository. During synthesis, these properties are
14 reconstructed.
14 reconstructed.
15
15
16 Properties that are analyzed and synthesized include the following:
16 Properties that are analyzed and synthesized include the following:
17
17
18 - Lines added or removed when an existing file is modified
18 - Lines added or removed when an existing file is modified
19 - Number and sizes of files added
19 - Number and sizes of files added
20 - Number of files removed
20 - Number of files removed
21 - Line lengths
21 - Line lengths
22 - Topological distance to parent changeset(s)
22 - Topological distance to parent changeset(s)
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26 - Number of files in each directory
26 - Number of files in each directory
27
27
28 A few obvious properties that are not currently handled realistically:
28 A few obvious properties that are not currently handled realistically:
29
29
30 - Merges are treated as regular commits with two parents, which is not
30 - Merges are treated as regular commits with two parents, which is not
31 realistic
31 realistic
32 - Modifications are not treated as operations on hunks of lines, but
32 - Modifications are not treated as operations on hunks of lines, but
33 as insertions and deletions of randomly chosen single lines
33 as insertions and deletions of randomly chosen single lines
34 - Committer ID (always random)
34 - Committer ID (always random)
35 - Executability of files
35 - Executability of files
36 - Symlinks and binary files are ignored
36 - Symlinks and binary files are ignored
37 '''
37 '''
38
38
39 import bisect, collections, itertools, json, os, random, time, sys
39 import bisect, collections, itertools, json, os, random, time, sys
40 from mercurial import cmdutil, context, patch, scmutil, util, hg
40 from mercurial import cmdutil, context, patch, scmutil, util, hg
41 from mercurial.i18n import _
41 from mercurial.i18n import _
42 from mercurial.node import nullrev, nullid, short
42 from mercurial.node import nullrev, nullid, short
43
43
44 testedwith = 'internal'
44 testedwith = 'internal'
45
45
46 cmdtable = {}
46 cmdtable = {}
47 command = cmdutil.command(cmdtable)
47 command = cmdutil.command(cmdtable)
48
48
49 newfile = set(('new fi', 'rename', 'copy f', 'copy t'))
49 newfile = set(('new fi', 'rename', 'copy f', 'copy t'))
50
50
51 def zerodict():
51 def zerodict():
52 return collections.defaultdict(lambda: 0)
52 return collections.defaultdict(lambda: 0)
53
53
54 def roundto(x, k):
54 def roundto(x, k):
55 if x > k * 2:
55 if x > k * 2:
56 return int(round(x / float(k)) * k)
56 return int(round(x / float(k)) * k)
57 return int(round(x))
57 return int(round(x))
58
58
59 def parsegitdiff(lines):
59 def parsegitdiff(lines):
60 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
60 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
61 binary = False
61 binary = False
62 for line in lines:
62 for line in lines:
63 start = line[:6]
63 start = line[:6]
64 if start == 'diff -':
64 if start == 'diff -':
65 if filename:
65 if filename:
66 yield filename, mar, lineadd, lineremove, binary
66 yield filename, mar, lineadd, lineremove, binary
67 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
67 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
68 filename = patch.gitre.match(line).group(1)
68 filename = patch.gitre.match(line).group(1)
69 elif start in newfile:
69 elif start in newfile:
70 mar = 'a'
70 mar = 'a'
71 elif start == 'GIT bi':
71 elif start == 'GIT bi':
72 binary = True
72 binary = True
73 elif start == 'delete':
73 elif start == 'delete':
74 mar = 'r'
74 mar = 'r'
75 elif start:
75 elif start:
76 s = start[0]
76 s = start[0]
77 if s == '-' and not line.startswith('--- '):
77 if s == '-' and not line.startswith('--- '):
78 lineremove += 1
78 lineremove += 1
79 elif s == '+' and not line.startswith('+++ '):
79 elif s == '+' and not line.startswith('+++ '):
80 lineadd[roundto(len(line) - 1, 5)] += 1
80 lineadd[roundto(len(line) - 1, 5)] += 1
81 if filename:
81 if filename:
82 yield filename, mar, lineadd, lineremove, binary
82 yield filename, mar, lineadd, lineremove, binary
83
83
84 @command('analyze',
84 @command('analyze',
85 [('o', 'output', '', _('write output to given file'), _('FILE')),
85 [('o', 'output', '', _('write output to given file'), _('FILE')),
86 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
86 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
87 _('hg analyze'), optionalrepo=True)
87 _('hg analyze'), optionalrepo=True)
88 def analyze(ui, repo, *revs, **opts):
88 def analyze(ui, repo, *revs, **opts):
89 '''create a simple model of a repository to use for later synthesis
89 '''create a simple model of a repository to use for later synthesis
90
90
91 This command examines every changeset in the given range (or all
91 This command examines every changeset in the given range (or all
92 of history if none are specified) and creates a simple statistical
92 of history if none are specified) and creates a simple statistical
93 model of the history of the repository. It also measures the directory
93 model of the history of the repository. It also measures the directory
94 structure of the repository as checked out.
94 structure of the repository as checked out.
95
95
96 The model is written out to a JSON file, and can be used by
96 The model is written out to a JSON file, and can be used by
97 :hg:`synthesize` to create or augment a repository with synthetic
97 :hg:`synthesize` to create or augment a repository with synthetic
98 commits that have a structure that is statistically similar to the
98 commits that have a structure that is statistically similar to the
99 analyzed repository.
99 analyzed repository.
100 '''
100 '''
101 root = repo.root
101 root = repo.root
102 if not root.endswith(os.path.sep):
102 if not root.endswith(os.path.sep):
103 root += os.path.sep
103 root += os.path.sep
104
104
105 revs = list(revs)
105 revs = list(revs)
106 revs.extend(opts['rev'])
106 revs.extend(opts['rev'])
107 if not revs:
107 if not revs:
108 revs = [':']
108 revs = [':']
109
109
110 output = opts['output']
110 output = opts['output']
111 if not output:
111 if not output:
112 output = os.path.basename(root) + '.json'
112 output = os.path.basename(root) + '.json'
113
113
114 if output == '-':
114 if output == '-':
115 fp = sys.stdout
115 fp = sys.stdout
116 else:
116 else:
117 fp = open(output, 'w')
117 fp = open(output, 'w')
118
118
119 # Always obtain file counts of each directory in the given root directory.
119 # Always obtain file counts of each directory in the given root directory.
120 def onerror(e):
120 def onerror(e):
121 ui.warn(_('error walking directory structure: %s\n') % e)
121 ui.warn(_('error walking directory structure: %s\n') % e)
122
122
123 dirs = {}
123 dirs = {}
124 rootprefixlen = len(root)
124 rootprefixlen = len(root)
125 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
125 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
126 dirpathfromroot = dirpath[rootprefixlen:]
126 dirpathfromroot = dirpath[rootprefixlen:]
127 dirs[dirpathfromroot] = len(filenames)
127 dirs[dirpathfromroot] = len(filenames)
128 if '.hg' in dirnames:
128 if '.hg' in dirnames:
129 dirnames.remove('.hg')
129 dirnames.remove('.hg')
130
130
131 lineschanged = zerodict()
131 lineschanged = zerodict()
132 children = zerodict()
132 children = zerodict()
133 p1distance = zerodict()
133 p1distance = zerodict()
134 p2distance = zerodict()
134 p2distance = zerodict()
135 linesinfilesadded = zerodict()
135 linesinfilesadded = zerodict()
136 fileschanged = zerodict()
136 fileschanged = zerodict()
137 filesadded = zerodict()
137 filesadded = zerodict()
138 filesremoved = zerodict()
138 filesremoved = zerodict()
139 linelengths = zerodict()
139 linelengths = zerodict()
140 interarrival = zerodict()
140 interarrival = zerodict()
141 parents = zerodict()
141 parents = zerodict()
142 dirsadded = zerodict()
142 dirsadded = zerodict()
143 tzoffset = zerodict()
143 tzoffset = zerodict()
144
144
145 # If a mercurial repo is available, also model the commit history.
145 # If a mercurial repo is available, also model the commit history.
146 if repo:
146 if repo:
147 revs = scmutil.revrange(repo, revs)
147 revs = scmutil.revrange(repo, revs)
148 revs.sort()
148 revs.sort()
149
149
150 progress = ui.progress
150 progress = ui.progress
151 _analyzing = _('analyzing')
151 _analyzing = _('analyzing')
152 _changesets = _('changesets')
152 _changesets = _('changesets')
153 _total = len(revs)
153 _total = len(revs)
154
154
155 for i, rev in enumerate(revs):
155 for i, rev in enumerate(revs):
156 progress(_analyzing, i, unit=_changesets, total=_total)
156 progress(_analyzing, i, unit=_changesets, total=_total)
157 ctx = repo[rev]
157 ctx = repo[rev]
158 pl = ctx.parents()
158 pl = ctx.parents()
159 pctx = pl[0]
159 pctx = pl[0]
160 prev = pctx.rev()
160 prev = pctx.rev()
161 children[prev] += 1
161 children[prev] += 1
162 p1distance[rev - prev] += 1
162 p1distance[rev - prev] += 1
163 parents[len(pl)] += 1
163 parents[len(pl)] += 1
164 tzoffset[ctx.date()[1]] += 1
164 tzoffset[ctx.date()[1]] += 1
165 if len(pl) > 1:
165 if len(pl) > 1:
166 p2distance[rev - pl[1].rev()] += 1
166 p2distance[rev - pl[1].rev()] += 1
167 if prev == rev - 1:
167 if prev == rev - 1:
168 lastctx = pctx
168 lastctx = pctx
169 else:
169 else:
170 lastctx = repo[rev - 1]
170 lastctx = repo[rev - 1]
171 if lastctx.rev() != nullrev:
171 if lastctx.rev() != nullrev:
172 timedelta = ctx.date()[0] - lastctx.date()[0]
172 timedelta = ctx.date()[0] - lastctx.date()[0]
173 interarrival[roundto(timedelta, 300)] += 1
173 interarrival[roundto(timedelta, 300)] += 1
174 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
174 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
175 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
175 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
176 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
176 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
177 if isbin:
177 if isbin:
178 continue
178 continue
179 added = sum(lineadd.itervalues(), 0)
179 added = sum(lineadd.itervalues(), 0)
180 if mar == 'm':
180 if mar == 'm':
181 if added and lineremove:
181 if added and lineremove:
182 lineschanged[roundto(added, 5),
182 lineschanged[roundto(added, 5),
183 roundto(lineremove, 5)] += 1
183 roundto(lineremove, 5)] += 1
184 filechanges += 1
184 filechanges += 1
185 elif mar == 'a':
185 elif mar == 'a':
186 fileadds += 1
186 fileadds += 1
187 if '/' in filename:
187 if '/' in filename:
188 filedir = filename.rsplit('/', 1)[0]
188 filedir = filename.rsplit('/', 1)[0]
189 if filedir not in pctx.dirs():
189 if filedir not in pctx.dirs():
190 diradds += 1
190 diradds += 1
191 linesinfilesadded[roundto(added, 5)] += 1
191 linesinfilesadded[roundto(added, 5)] += 1
192 elif mar == 'r':
192 elif mar == 'r':
193 fileremoves += 1
193 fileremoves += 1
194 for length, count in lineadd.iteritems():
194 for length, count in lineadd.iteritems():
195 linelengths[length] += count
195 linelengths[length] += count
196 fileschanged[filechanges] += 1
196 fileschanged[filechanges] += 1
197 filesadded[fileadds] += 1
197 filesadded[fileadds] += 1
198 dirsadded[diradds] += 1
198 dirsadded[diradds] += 1
199 filesremoved[fileremoves] += 1
199 filesremoved[fileremoves] += 1
200
200
201 invchildren = zerodict()
201 invchildren = zerodict()
202
202
203 for rev, count in children.iteritems():
203 for rev, count in children.iteritems():
204 invchildren[count] += 1
204 invchildren[count] += 1
205
205
206 if output != '-':
206 if output != '-':
207 ui.status(_('writing output to %s\n') % output)
207 ui.status(_('writing output to %s\n') % output)
208
208
209 def pronk(d):
209 def pronk(d):
210 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
210 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
211
211
212 json.dump({'revs': len(revs),
212 json.dump({'revs': len(revs),
213 'initdirs': pronk(dirs),
213 'initdirs': pronk(dirs),
214 'lineschanged': pronk(lineschanged),
214 'lineschanged': pronk(lineschanged),
215 'children': pronk(invchildren),
215 'children': pronk(invchildren),
216 'fileschanged': pronk(fileschanged),
216 'fileschanged': pronk(fileschanged),
217 'filesadded': pronk(filesadded),
217 'filesadded': pronk(filesadded),
218 'linesinfilesadded': pronk(linesinfilesadded),
218 'linesinfilesadded': pronk(linesinfilesadded),
219 'dirsadded': pronk(dirsadded),
219 'dirsadded': pronk(dirsadded),
220 'filesremoved': pronk(filesremoved),
220 'filesremoved': pronk(filesremoved),
221 'linelengths': pronk(linelengths),
221 'linelengths': pronk(linelengths),
222 'parents': pronk(parents),
222 'parents': pronk(parents),
223 'p1distance': pronk(p1distance),
223 'p1distance': pronk(p1distance),
224 'p2distance': pronk(p2distance),
224 'p2distance': pronk(p2distance),
225 'interarrival': pronk(interarrival),
225 'interarrival': pronk(interarrival),
226 'tzoffset': pronk(tzoffset),
226 'tzoffset': pronk(tzoffset),
227 },
227 },
228 fp)
228 fp)
229 fp.close()
229 fp.close()
230
230
231 @command('synthesize',
231 @command('synthesize',
232 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
232 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
233 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
233 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
234 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
234 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
235 _('hg synthesize [OPTION].. DESCFILE'))
235 _('hg synthesize [OPTION].. DESCFILE'))
236 def synthesize(ui, repo, descpath, **opts):
236 def synthesize(ui, repo, descpath, **opts):
237 '''synthesize commits based on a model of an existing repository
237 '''synthesize commits based on a model of an existing repository
238
238
239 The model must have been generated by :hg:`analyze`. Commits will
239 The model must have been generated by :hg:`analyze`. Commits will
240 be generated randomly according to the probabilities described in
240 be generated randomly according to the probabilities described in
241 the model. If --initfiles is set, the repository will be seeded with
241 the model. If --initfiles is set, the repository will be seeded with
242 the given number files following the modeled repository's directory
242 the given number files following the modeled repository's directory
243 structure.
243 structure.
244
244
245 When synthesizing new content, commit descriptions, and user
245 When synthesizing new content, commit descriptions, and user
246 names, words will be chosen randomly from a dictionary that is
246 names, words will be chosen randomly from a dictionary that is
247 presumed to contain one word per line. Use --dict to specify the
247 presumed to contain one word per line. Use --dict to specify the
248 path to an alternate dictionary to use.
248 path to an alternate dictionary to use.
249 '''
249 '''
250 try:
250 try:
251 fp = hg.openpath(ui, descpath)
251 fp = hg.openpath(ui, descpath)
252 except Exception, err:
252 except Exception, err:
253 raise util.Abort('%s: %s' % (descpath, err[0].strerror))
253 raise util.Abort('%s: %s' % (descpath, err[0].strerror))
254 desc = json.load(fp)
254 desc = json.load(fp)
255 fp.close()
255 fp.close()
256
256
257 def cdf(l):
257 def cdf(l):
258 if not l:
258 if not l:
259 return [], []
259 return [], []
260 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
260 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
261 t = float(sum(probs, 0))
261 t = float(sum(probs, 0))
262 s, cdfs = 0, []
262 s, cdfs = 0, []
263 for v in probs:
263 for v in probs:
264 s += v
264 s += v
265 cdfs.append(s / t)
265 cdfs.append(s / t)
266 return vals, cdfs
266 return vals, cdfs
267
267
268 lineschanged = cdf(desc['lineschanged'])
268 lineschanged = cdf(desc['lineschanged'])
269 fileschanged = cdf(desc['fileschanged'])
269 fileschanged = cdf(desc['fileschanged'])
270 filesadded = cdf(desc['filesadded'])
270 filesadded = cdf(desc['filesadded'])
271 dirsadded = cdf(desc['dirsadded'])
271 dirsadded = cdf(desc['dirsadded'])
272 filesremoved = cdf(desc['filesremoved'])
272 filesremoved = cdf(desc['filesremoved'])
273 linelengths = cdf(desc['linelengths'])
273 linelengths = cdf(desc['linelengths'])
274 parents = cdf(desc['parents'])
274 parents = cdf(desc['parents'])
275 p1distance = cdf(desc['p1distance'])
275 p1distance = cdf(desc['p1distance'])
276 p2distance = cdf(desc['p2distance'])
276 p2distance = cdf(desc['p2distance'])
277 interarrival = cdf(desc['interarrival'])
277 interarrival = cdf(desc['interarrival'])
278 linesinfilesadded = cdf(desc['linesinfilesadded'])
278 linesinfilesadded = cdf(desc['linesinfilesadded'])
279 tzoffset = cdf(desc['tzoffset'])
279 tzoffset = cdf(desc['tzoffset'])
280
280
281 dictfile = opts.get('dict') or '/usr/share/dict/words'
281 dictfile = opts.get('dict') or '/usr/share/dict/words'
282 try:
282 try:
283 fp = open(dictfile, 'rU')
283 fp = open(dictfile, 'rU')
284 except IOError, err:
284 except IOError, err:
285 raise util.Abort('%s: %s' % (dictfile, err.strerror))
285 raise util.Abort('%s: %s' % (dictfile, err.strerror))
286 words = fp.read().splitlines()
286 words = fp.read().splitlines()
287 fp.close()
287 fp.close()
288
288
289 initdirs = {}
289 initdirs = {}
290 if desc['initdirs']:
290 if desc['initdirs']:
291 for k, v in desc['initdirs']:
291 for k, v in desc['initdirs']:
292 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
292 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
293 initdirs = renamedirs(initdirs, words)
293 initdirs = renamedirs(initdirs, words)
294 initdirscdf = cdf(initdirs)
294 initdirscdf = cdf(initdirs)
295
295
296 def pick(cdf):
296 def pick(cdf):
297 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
297 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
298
298
299 def pickpath():
299 def pickpath():
300 return os.path.join(pick(initdirscdf), random.choice(words))
300 return os.path.join(pick(initdirscdf), random.choice(words))
301
301
302 def makeline(minimum=0):
302 def makeline(minimum=0):
303 total = max(minimum, pick(linelengths))
303 total = max(minimum, pick(linelengths))
304 c, l = 0, []
304 c, l = 0, []
305 while c < total:
305 while c < total:
306 w = random.choice(words)
306 w = random.choice(words)
307 c += len(w) + 1
307 c += len(w) + 1
308 l.append(w)
308 l.append(w)
309 return ' '.join(l)
309 return ' '.join(l)
310
310
311 wlock = repo.wlock()
311 wlock = repo.wlock()
312 lock = repo.lock()
312 lock = repo.lock()
313
313
314 nevertouch = set(('.hgsub', '.hgignore', '.hgtags'))
314 nevertouch = set(('.hgsub', '.hgignore', '.hgtags'))
315
315
316 progress = ui.progress
316 progress = ui.progress
317 _synthesizing = _('synthesizing')
317 _synthesizing = _('synthesizing')
318 _files = _('initial files')
318 _files = _('initial files')
319 _changesets = _('changesets')
319 _changesets = _('changesets')
320
320
321 # Synthesize a single initial revision adding files to the repo according
321 # Synthesize a single initial revision adding files to the repo according
322 # to the modeled directory structure.
322 # to the modeled directory structure.
323 initcount = int(opts['initfiles'])
323 initcount = int(opts['initfiles'])
324 if initcount and initdirs:
324 if initcount and initdirs:
325 pctx = repo[None].parents()[0]
325 pctx = repo[None].parents()[0]
326 dirs = set(pctx.dirs())
326 files = {}
327 files = {}
328
329 def validpath(path):
330 # Don't pick filenames which are already directory names.
331 if path in dirs:
332 return False
333 # Don't pick directories which were used as file names.
334 while path:
335 if path in files:
336 return False
337 path = os.path.dirname(path)
338 return True
339
327 for i in xrange(0, initcount):
340 for i in xrange(0, initcount):
328 ui.progress(_synthesizing, i, unit=_files, total=initcount)
341 ui.progress(_synthesizing, i, unit=_files, total=initcount)
329
342
330 path = pickpath()
343 path = pickpath()
331 while path in pctx.dirs():
344 while not validpath(path):
332 path = pickpath()
345 path = pickpath()
333 data = '%s contents\n' % path
346 data = '%s contents\n' % path
334 files[path] = context.memfilectx(repo, path, data)
347 files[path] = context.memfilectx(repo, path, data)
348 dir = os.path.dirname(path)
349 while dir and dir not in dirs:
350 dirs.add(dir)
351 dir = os.path.dirname(dir)
335
352
336 def filectxfn(repo, memctx, path):
353 def filectxfn(repo, memctx, path):
337 return files[path]
354 return files[path]
338
355
339 ui.progress(_synthesizing, None)
356 ui.progress(_synthesizing, None)
340 message = 'synthesized wide repo with %d files' % (len(files),)
357 message = 'synthesized wide repo with %d files' % (len(files),)
341 mc = context.memctx(repo, [pctx.node(), nullid], message,
358 mc = context.memctx(repo, [pctx.node(), nullid], message,
342 files.iterkeys(), filectxfn, ui.username(),
359 files.iterkeys(), filectxfn, ui.username(),
343 '%d %d' % util.makedate())
360 '%d %d' % util.makedate())
344 initnode = mc.commit()
361 initnode = mc.commit()
345 hexfn = ui.debugflag and hex or short
362 hexfn = ui.debugflag and hex or short
346 ui.status(_('added commit %s with %d files\n')
363 ui.status(_('added commit %s with %d files\n')
347 % (hexfn(initnode), len(files)))
364 % (hexfn(initnode), len(files)))
348
365
349 # Synthesize incremental revisions to the repository, adding repo depth.
366 # Synthesize incremental revisions to the repository, adding repo depth.
350 count = int(opts['count'])
367 count = int(opts['count'])
351 heads = set(map(repo.changelog.rev, repo.heads()))
368 heads = set(map(repo.changelog.rev, repo.heads()))
352 for i in xrange(count):
369 for i in xrange(count):
353 progress(_synthesizing, i, unit=_changesets, total=count)
370 progress(_synthesizing, i, unit=_changesets, total=count)
354
371
355 node = repo.changelog.node
372 node = repo.changelog.node
356 revs = len(repo)
373 revs = len(repo)
357
374
358 def pickhead(heads, distance):
375 def pickhead(heads, distance):
359 if heads:
376 if heads:
360 lheads = sorted(heads)
377 lheads = sorted(heads)
361 rev = revs - min(pick(distance), revs)
378 rev = revs - min(pick(distance), revs)
362 if rev < lheads[-1]:
379 if rev < lheads[-1]:
363 rev = lheads[bisect.bisect_left(lheads, rev)]
380 rev = lheads[bisect.bisect_left(lheads, rev)]
364 else:
381 else:
365 rev = lheads[-1]
382 rev = lheads[-1]
366 return rev, node(rev)
383 return rev, node(rev)
367 return nullrev, nullid
384 return nullrev, nullid
368
385
369 r1 = revs - min(pick(p1distance), revs)
386 r1 = revs - min(pick(p1distance), revs)
370 p1 = node(r1)
387 p1 = node(r1)
371
388
372 # the number of heads will grow without bound if we use a pure
389 # the number of heads will grow without bound if we use a pure
373 # model, so artificially constrain their proliferation
390 # model, so artificially constrain their proliferation
374 toomanyheads = len(heads) > random.randint(1, 20)
391 toomanyheads = len(heads) > random.randint(1, 20)
375 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
392 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
376 r2, p2 = pickhead(heads.difference([r1]), p2distance)
393 r2, p2 = pickhead(heads.difference([r1]), p2distance)
377 else:
394 else:
378 r2, p2 = nullrev, nullid
395 r2, p2 = nullrev, nullid
379
396
380 pl = [p1, p2]
397 pl = [p1, p2]
381 pctx = repo[r1]
398 pctx = repo[r1]
382 mf = pctx.manifest()
399 mf = pctx.manifest()
383 mfk = mf.keys()
400 mfk = mf.keys()
384 changes = {}
401 changes = {}
385 if mfk:
402 if mfk:
386 for __ in xrange(pick(fileschanged)):
403 for __ in xrange(pick(fileschanged)):
387 for __ in xrange(10):
404 for __ in xrange(10):
388 fctx = pctx.filectx(random.choice(mfk))
405 fctx = pctx.filectx(random.choice(mfk))
389 path = fctx.path()
406 path = fctx.path()
390 if not (path in nevertouch or fctx.isbinary() or
407 if not (path in nevertouch or fctx.isbinary() or
391 'l' in fctx.flags()):
408 'l' in fctx.flags()):
392 break
409 break
393 lines = fctx.data().splitlines()
410 lines = fctx.data().splitlines()
394 add, remove = pick(lineschanged)
411 add, remove = pick(lineschanged)
395 for __ in xrange(remove):
412 for __ in xrange(remove):
396 if not lines:
413 if not lines:
397 break
414 break
398 del lines[random.randrange(0, len(lines))]
415 del lines[random.randrange(0, len(lines))]
399 for __ in xrange(add):
416 for __ in xrange(add):
400 lines.insert(random.randint(0, len(lines)), makeline())
417 lines.insert(random.randint(0, len(lines)), makeline())
401 path = fctx.path()
418 path = fctx.path()
402 changes[path] = context.memfilectx(repo, path,
419 changes[path] = context.memfilectx(repo, path,
403 '\n'.join(lines) + '\n')
420 '\n'.join(lines) + '\n')
404 for __ in xrange(pick(filesremoved)):
421 for __ in xrange(pick(filesremoved)):
405 path = random.choice(mfk)
422 path = random.choice(mfk)
406 for __ in xrange(10):
423 for __ in xrange(10):
407 path = random.choice(mfk)
424 path = random.choice(mfk)
408 if path not in changes:
425 if path not in changes:
409 changes[path] = None
426 changes[path] = None
410 break
427 break
411 if filesadded:
428 if filesadded:
412 dirs = list(pctx.dirs())
429 dirs = list(pctx.dirs())
413 dirs.insert(0, '')
430 dirs.insert(0, '')
414 for __ in xrange(pick(filesadded)):
431 for __ in xrange(pick(filesadded)):
415 pathstr = ''
432 pathstr = ''
416 while pathstr in dirs:
433 while pathstr in dirs:
417 path = [random.choice(dirs)]
434 path = [random.choice(dirs)]
418 if pick(dirsadded):
435 if pick(dirsadded):
419 path.append(random.choice(words))
436 path.append(random.choice(words))
420 path.append(random.choice(words))
437 path.append(random.choice(words))
421 pathstr = '/'.join(filter(None, path))
438 pathstr = '/'.join(filter(None, path))
422 data = '\n'.join(makeline()
439 data = '\n'.join(makeline()
423 for __ in xrange(pick(linesinfilesadded))) + '\n'
440 for __ in xrange(pick(linesinfilesadded))) + '\n'
424 changes[pathstr] = context.memfilectx(repo, pathstr, data)
441 changes[pathstr] = context.memfilectx(repo, pathstr, data)
425 def filectxfn(repo, memctx, path):
442 def filectxfn(repo, memctx, path):
426 return changes[path]
443 return changes[path]
427 if not changes:
444 if not changes:
428 continue
445 continue
429 if revs:
446 if revs:
430 date = repo['tip'].date()[0] + pick(interarrival)
447 date = repo['tip'].date()[0] + pick(interarrival)
431 else:
448 else:
432 date = time.time() - (86400 * count)
449 date = time.time() - (86400 * count)
433 # dates in mercurial must be positive, fit in 32-bit signed integers.
450 # dates in mercurial must be positive, fit in 32-bit signed integers.
434 date = min(0x7fffffff, max(0, date))
451 date = min(0x7fffffff, max(0, date))
435 user = random.choice(words) + '@' + random.choice(words)
452 user = random.choice(words) + '@' + random.choice(words)
436 mc = context.memctx(repo, pl, makeline(minimum=2),
453 mc = context.memctx(repo, pl, makeline(minimum=2),
437 sorted(changes.iterkeys()),
454 sorted(changes.iterkeys()),
438 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
455 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
439 newnode = mc.commit()
456 newnode = mc.commit()
440 heads.add(repo.changelog.rev(newnode))
457 heads.add(repo.changelog.rev(newnode))
441 heads.discard(r1)
458 heads.discard(r1)
442 heads.discard(r2)
459 heads.discard(r2)
443
460
444 lock.release()
461 lock.release()
445 wlock.release()
462 wlock.release()
446
463
447 def renamedirs(dirs, words):
464 def renamedirs(dirs, words):
448 '''Randomly rename the directory names in the per-dir file count dict.'''
465 '''Randomly rename the directory names in the per-dir file count dict.'''
449 wordgen = itertools.cycle(words)
466 wordgen = itertools.cycle(words)
450 replacements = {'': ''}
467 replacements = {'': ''}
451 def rename(dirpath):
468 def rename(dirpath):
452 '''Recursively rename the directory and all path prefixes.
469 '''Recursively rename the directory and all path prefixes.
453
470
454 The mapping from path to renamed path is stored for all path prefixes
471 The mapping from path to renamed path is stored for all path prefixes
455 as in dynamic programming, ensuring linear runtime and consistent
472 as in dynamic programming, ensuring linear runtime and consistent
456 renaming regardless of iteration order through the model.
473 renaming regardless of iteration order through the model.
457 '''
474 '''
458 if dirpath in replacements:
475 if dirpath in replacements:
459 return replacements[dirpath]
476 return replacements[dirpath]
460 head, _ = os.path.split(dirpath)
477 head, _ = os.path.split(dirpath)
461 head = head and rename(head) or ''
478 head = head and rename(head) or ''
462 renamed = os.path.join(head, wordgen.next())
479 renamed = os.path.join(head, wordgen.next())
463 replacements[dirpath] = renamed
480 replacements[dirpath] = renamed
464 return renamed
481 return renamed
465 result = []
482 result = []
466 for dirpath, count in dirs.iteritems():
483 for dirpath, count in dirs.iteritems():
467 result.append([rename(dirpath.lstrip(os.sep)), count])
484 result.append([rename(dirpath.lstrip(os.sep)), count])
468 return result
485 return result
General Comments 0
You need to be logged in to leave comments. Login now