##// END OF EJS Templates
synthrepo: close progress topics...
Martin von Zweigbergk -
r38428:ce65c25d default
parent child Browse files
Show More
@@ -1,514 +1,516 b''
1 # synthrepo.py - repo synthesis
1 # synthrepo.py - repo synthesis
2 #
2 #
3 # Copyright 2012 Facebook
3 # Copyright 2012 Facebook
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''synthesize structurally interesting change history
8 '''synthesize structurally interesting change history
9
9
10 This extension is useful for creating a repository with properties
10 This extension is useful for creating a repository with properties
11 that are statistically similar to an existing repository. During
11 that are statistically similar to an existing repository. During
12 analysis, a simple probability table is constructed from the history
12 analysis, a simple probability table is constructed from the history
13 of an existing repository. During synthesis, these properties are
13 of an existing repository. During synthesis, these properties are
14 reconstructed.
14 reconstructed.
15
15
16 Properties that are analyzed and synthesized include the following:
16 Properties that are analyzed and synthesized include the following:
17
17
18 - Lines added or removed when an existing file is modified
18 - Lines added or removed when an existing file is modified
19 - Number and sizes of files added
19 - Number and sizes of files added
20 - Number of files removed
20 - Number of files removed
21 - Line lengths
21 - Line lengths
22 - Topological distance to parent changeset(s)
22 - Topological distance to parent changeset(s)
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26 - Number of files in each directory
26 - Number of files in each directory
27
27
28 A few obvious properties that are not currently handled realistically:
28 A few obvious properties that are not currently handled realistically:
29
29
30 - Merges are treated as regular commits with two parents, which is not
30 - Merges are treated as regular commits with two parents, which is not
31 realistic
31 realistic
32 - Modifications are not treated as operations on hunks of lines, but
32 - Modifications are not treated as operations on hunks of lines, but
33 as insertions and deletions of randomly chosen single lines
33 as insertions and deletions of randomly chosen single lines
34 - Committer ID (always random)
34 - Committer ID (always random)
35 - Executability of files
35 - Executability of files
36 - Symlinks and binary files are ignored
36 - Symlinks and binary files are ignored
37 '''
37 '''
38
38
39 from __future__ import absolute_import
39 from __future__ import absolute_import
40 import bisect
40 import bisect
41 import collections
41 import collections
42 import itertools
42 import itertools
43 import json
43 import json
44 import os
44 import os
45 import random
45 import random
46 import sys
46 import sys
47 import time
47 import time
48
48
49 from mercurial.i18n import _
49 from mercurial.i18n import _
50 from mercurial.node import (
50 from mercurial.node import (
51 nullid,
51 nullid,
52 nullrev,
52 nullrev,
53 short,
53 short,
54 )
54 )
55 from mercurial import (
55 from mercurial import (
56 context,
56 context,
57 error,
57 error,
58 hg,
58 hg,
59 patch,
59 patch,
60 registrar,
60 registrar,
61 scmutil,
61 scmutil,
62 )
62 )
63 from mercurial.utils import dateutil
63 from mercurial.utils import dateutil
64
64
65 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
65 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
66 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
66 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
67 # be specifying the version(s) of Mercurial they are tested with, or
67 # be specifying the version(s) of Mercurial they are tested with, or
68 # leave the attribute unspecified.
68 # leave the attribute unspecified.
69 testedwith = 'ships-with-hg-core'
69 testedwith = 'ships-with-hg-core'
70
70
71 cmdtable = {}
71 cmdtable = {}
72 command = registrar.command(cmdtable)
72 command = registrar.command(cmdtable)
73
73
74 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
74 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
75
75
76 def zerodict():
76 def zerodict():
77 return collections.defaultdict(lambda: 0)
77 return collections.defaultdict(lambda: 0)
78
78
79 def roundto(x, k):
79 def roundto(x, k):
80 if x > k * 2:
80 if x > k * 2:
81 return int(round(x / float(k)) * k)
81 return int(round(x / float(k)) * k)
82 return int(round(x))
82 return int(round(x))
83
83
84 def parsegitdiff(lines):
84 def parsegitdiff(lines):
85 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
85 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
86 binary = False
86 binary = False
87 for line in lines:
87 for line in lines:
88 start = line[:6]
88 start = line[:6]
89 if start == 'diff -':
89 if start == 'diff -':
90 if filename:
90 if filename:
91 yield filename, mar, lineadd, lineremove, binary
91 yield filename, mar, lineadd, lineremove, binary
92 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
92 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
93 filename = patch.gitre.match(line).group(1)
93 filename = patch.gitre.match(line).group(1)
94 elif start in newfile:
94 elif start in newfile:
95 mar = 'a'
95 mar = 'a'
96 elif start == 'GIT bi':
96 elif start == 'GIT bi':
97 binary = True
97 binary = True
98 elif start == 'delete':
98 elif start == 'delete':
99 mar = 'r'
99 mar = 'r'
100 elif start:
100 elif start:
101 s = start[0]
101 s = start[0]
102 if s == '-' and not line.startswith('--- '):
102 if s == '-' and not line.startswith('--- '):
103 lineremove += 1
103 lineremove += 1
104 elif s == '+' and not line.startswith('+++ '):
104 elif s == '+' and not line.startswith('+++ '):
105 lineadd[roundto(len(line) - 1, 5)] += 1
105 lineadd[roundto(len(line) - 1, 5)] += 1
106 if filename:
106 if filename:
107 yield filename, mar, lineadd, lineremove, binary
107 yield filename, mar, lineadd, lineremove, binary
108
108
109 @command('analyze',
109 @command('analyze',
110 [('o', 'output', '', _('write output to given file'), _('FILE')),
110 [('o', 'output', '', _('write output to given file'), _('FILE')),
111 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
111 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
112 _('hg analyze'), optionalrepo=True)
112 _('hg analyze'), optionalrepo=True)
113 def analyze(ui, repo, *revs, **opts):
113 def analyze(ui, repo, *revs, **opts):
114 '''create a simple model of a repository to use for later synthesis
114 '''create a simple model of a repository to use for later synthesis
115
115
116 This command examines every changeset in the given range (or all
116 This command examines every changeset in the given range (or all
117 of history if none are specified) and creates a simple statistical
117 of history if none are specified) and creates a simple statistical
118 model of the history of the repository. It also measures the directory
118 model of the history of the repository. It also measures the directory
119 structure of the repository as checked out.
119 structure of the repository as checked out.
120
120
121 The model is written out to a JSON file, and can be used by
121 The model is written out to a JSON file, and can be used by
122 :hg:`synthesize` to create or augment a repository with synthetic
122 :hg:`synthesize` to create or augment a repository with synthetic
123 commits that have a structure that is statistically similar to the
123 commits that have a structure that is statistically similar to the
124 analyzed repository.
124 analyzed repository.
125 '''
125 '''
126 root = repo.root
126 root = repo.root
127 if not root.endswith(os.path.sep):
127 if not root.endswith(os.path.sep):
128 root += os.path.sep
128 root += os.path.sep
129
129
130 revs = list(revs)
130 revs = list(revs)
131 revs.extend(opts['rev'])
131 revs.extend(opts['rev'])
132 if not revs:
132 if not revs:
133 revs = [':']
133 revs = [':']
134
134
135 output = opts['output']
135 output = opts['output']
136 if not output:
136 if not output:
137 output = os.path.basename(root) + '.json'
137 output = os.path.basename(root) + '.json'
138
138
139 if output == '-':
139 if output == '-':
140 fp = sys.stdout
140 fp = sys.stdout
141 else:
141 else:
142 fp = open(output, 'w')
142 fp = open(output, 'w')
143
143
144 # Always obtain file counts of each directory in the given root directory.
144 # Always obtain file counts of each directory in the given root directory.
145 def onerror(e):
145 def onerror(e):
146 ui.warn(_('error walking directory structure: %s\n') % e)
146 ui.warn(_('error walking directory structure: %s\n') % e)
147
147
148 dirs = {}
148 dirs = {}
149 rootprefixlen = len(root)
149 rootprefixlen = len(root)
150 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
150 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
151 dirpathfromroot = dirpath[rootprefixlen:]
151 dirpathfromroot = dirpath[rootprefixlen:]
152 dirs[dirpathfromroot] = len(filenames)
152 dirs[dirpathfromroot] = len(filenames)
153 if '.hg' in dirnames:
153 if '.hg' in dirnames:
154 dirnames.remove('.hg')
154 dirnames.remove('.hg')
155
155
156 lineschanged = zerodict()
156 lineschanged = zerodict()
157 children = zerodict()
157 children = zerodict()
158 p1distance = zerodict()
158 p1distance = zerodict()
159 p2distance = zerodict()
159 p2distance = zerodict()
160 linesinfilesadded = zerodict()
160 linesinfilesadded = zerodict()
161 fileschanged = zerodict()
161 fileschanged = zerodict()
162 filesadded = zerodict()
162 filesadded = zerodict()
163 filesremoved = zerodict()
163 filesremoved = zerodict()
164 linelengths = zerodict()
164 linelengths = zerodict()
165 interarrival = zerodict()
165 interarrival = zerodict()
166 parents = zerodict()
166 parents = zerodict()
167 dirsadded = zerodict()
167 dirsadded = zerodict()
168 tzoffset = zerodict()
168 tzoffset = zerodict()
169
169
170 # If a mercurial repo is available, also model the commit history.
170 # If a mercurial repo is available, also model the commit history.
171 if repo:
171 if repo:
172 revs = scmutil.revrange(repo, revs)
172 revs = scmutil.revrange(repo, revs)
173 revs.sort()
173 revs.sort()
174
174
175 progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),
175 progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),
176 total=len(revs))
176 total=len(revs))
177 for i, rev in enumerate(revs):
177 for i, rev in enumerate(revs):
178 progress.update(i)
178 progress.update(i)
179 ctx = repo[rev]
179 ctx = repo[rev]
180 pl = ctx.parents()
180 pl = ctx.parents()
181 pctx = pl[0]
181 pctx = pl[0]
182 prev = pctx.rev()
182 prev = pctx.rev()
183 children[prev] += 1
183 children[prev] += 1
184 p1distance[rev - prev] += 1
184 p1distance[rev - prev] += 1
185 parents[len(pl)] += 1
185 parents[len(pl)] += 1
186 tzoffset[ctx.date()[1]] += 1
186 tzoffset[ctx.date()[1]] += 1
187 if len(pl) > 1:
187 if len(pl) > 1:
188 p2distance[rev - pl[1].rev()] += 1
188 p2distance[rev - pl[1].rev()] += 1
189 if prev == rev - 1:
189 if prev == rev - 1:
190 lastctx = pctx
190 lastctx = pctx
191 else:
191 else:
192 lastctx = repo[rev - 1]
192 lastctx = repo[rev - 1]
193 if lastctx.rev() != nullrev:
193 if lastctx.rev() != nullrev:
194 timedelta = ctx.date()[0] - lastctx.date()[0]
194 timedelta = ctx.date()[0] - lastctx.date()[0]
195 interarrival[roundto(timedelta, 300)] += 1
195 interarrival[roundto(timedelta, 300)] += 1
196 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
196 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
197 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
197 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
198 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
198 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
199 if isbin:
199 if isbin:
200 continue
200 continue
201 added = sum(lineadd.itervalues(), 0)
201 added = sum(lineadd.itervalues(), 0)
202 if mar == 'm':
202 if mar == 'm':
203 if added and lineremove:
203 if added and lineremove:
204 lineschanged[roundto(added, 5),
204 lineschanged[roundto(added, 5),
205 roundto(lineremove, 5)] += 1
205 roundto(lineremove, 5)] += 1
206 filechanges += 1
206 filechanges += 1
207 elif mar == 'a':
207 elif mar == 'a':
208 fileadds += 1
208 fileadds += 1
209 if '/' in filename:
209 if '/' in filename:
210 filedir = filename.rsplit('/', 1)[0]
210 filedir = filename.rsplit('/', 1)[0]
211 if filedir not in pctx.dirs():
211 if filedir not in pctx.dirs():
212 diradds += 1
212 diradds += 1
213 linesinfilesadded[roundto(added, 5)] += 1
213 linesinfilesadded[roundto(added, 5)] += 1
214 elif mar == 'r':
214 elif mar == 'r':
215 fileremoves += 1
215 fileremoves += 1
216 for length, count in lineadd.iteritems():
216 for length, count in lineadd.iteritems():
217 linelengths[length] += count
217 linelengths[length] += count
218 fileschanged[filechanges] += 1
218 fileschanged[filechanges] += 1
219 filesadded[fileadds] += 1
219 filesadded[fileadds] += 1
220 dirsadded[diradds] += 1
220 dirsadded[diradds] += 1
221 filesremoved[fileremoves] += 1
221 filesremoved[fileremoves] += 1
222 progress.complete()
222
223
223 invchildren = zerodict()
224 invchildren = zerodict()
224
225
225 for rev, count in children.iteritems():
226 for rev, count in children.iteritems():
226 invchildren[count] += 1
227 invchildren[count] += 1
227
228
228 if output != '-':
229 if output != '-':
229 ui.status(_('writing output to %s\n') % output)
230 ui.status(_('writing output to %s\n') % output)
230
231
231 def pronk(d):
232 def pronk(d):
232 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
233 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
233
234
234 json.dump({'revs': len(revs),
235 json.dump({'revs': len(revs),
235 'initdirs': pronk(dirs),
236 'initdirs': pronk(dirs),
236 'lineschanged': pronk(lineschanged),
237 'lineschanged': pronk(lineschanged),
237 'children': pronk(invchildren),
238 'children': pronk(invchildren),
238 'fileschanged': pronk(fileschanged),
239 'fileschanged': pronk(fileschanged),
239 'filesadded': pronk(filesadded),
240 'filesadded': pronk(filesadded),
240 'linesinfilesadded': pronk(linesinfilesadded),
241 'linesinfilesadded': pronk(linesinfilesadded),
241 'dirsadded': pronk(dirsadded),
242 'dirsadded': pronk(dirsadded),
242 'filesremoved': pronk(filesremoved),
243 'filesremoved': pronk(filesremoved),
243 'linelengths': pronk(linelengths),
244 'linelengths': pronk(linelengths),
244 'parents': pronk(parents),
245 'parents': pronk(parents),
245 'p1distance': pronk(p1distance),
246 'p1distance': pronk(p1distance),
246 'p2distance': pronk(p2distance),
247 'p2distance': pronk(p2distance),
247 'interarrival': pronk(interarrival),
248 'interarrival': pronk(interarrival),
248 'tzoffset': pronk(tzoffset),
249 'tzoffset': pronk(tzoffset),
249 },
250 },
250 fp)
251 fp)
251 fp.close()
252 fp.close()
252
253
253 @command('synthesize',
254 @command('synthesize',
254 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
255 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
255 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
256 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
256 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
257 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
257 _('hg synthesize [OPTION].. DESCFILE'))
258 _('hg synthesize [OPTION].. DESCFILE'))
258 def synthesize(ui, repo, descpath, **opts):
259 def synthesize(ui, repo, descpath, **opts):
259 '''synthesize commits based on a model of an existing repository
260 '''synthesize commits based on a model of an existing repository
260
261
261 The model must have been generated by :hg:`analyze`. Commits will
262 The model must have been generated by :hg:`analyze`. Commits will
262 be generated randomly according to the probabilities described in
263 be generated randomly according to the probabilities described in
263 the model. If --initfiles is set, the repository will be seeded with
264 the model. If --initfiles is set, the repository will be seeded with
264 the given number files following the modeled repository's directory
265 the given number files following the modeled repository's directory
265 structure.
266 structure.
266
267
267 When synthesizing new content, commit descriptions, and user
268 When synthesizing new content, commit descriptions, and user
268 names, words will be chosen randomly from a dictionary that is
269 names, words will be chosen randomly from a dictionary that is
269 presumed to contain one word per line. Use --dict to specify the
270 presumed to contain one word per line. Use --dict to specify the
270 path to an alternate dictionary to use.
271 path to an alternate dictionary to use.
271 '''
272 '''
272 try:
273 try:
273 fp = hg.openpath(ui, descpath)
274 fp = hg.openpath(ui, descpath)
274 except Exception as err:
275 except Exception as err:
275 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
276 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
276 desc = json.load(fp)
277 desc = json.load(fp)
277 fp.close()
278 fp.close()
278
279
279 def cdf(l):
280 def cdf(l):
280 if not l:
281 if not l:
281 return [], []
282 return [], []
282 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
283 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
283 t = float(sum(probs, 0))
284 t = float(sum(probs, 0))
284 s, cdfs = 0, []
285 s, cdfs = 0, []
285 for v in probs:
286 for v in probs:
286 s += v
287 s += v
287 cdfs.append(s / t)
288 cdfs.append(s / t)
288 return vals, cdfs
289 return vals, cdfs
289
290
290 lineschanged = cdf(desc['lineschanged'])
291 lineschanged = cdf(desc['lineschanged'])
291 fileschanged = cdf(desc['fileschanged'])
292 fileschanged = cdf(desc['fileschanged'])
292 filesadded = cdf(desc['filesadded'])
293 filesadded = cdf(desc['filesadded'])
293 dirsadded = cdf(desc['dirsadded'])
294 dirsadded = cdf(desc['dirsadded'])
294 filesremoved = cdf(desc['filesremoved'])
295 filesremoved = cdf(desc['filesremoved'])
295 linelengths = cdf(desc['linelengths'])
296 linelengths = cdf(desc['linelengths'])
296 parents = cdf(desc['parents'])
297 parents = cdf(desc['parents'])
297 p1distance = cdf(desc['p1distance'])
298 p1distance = cdf(desc['p1distance'])
298 p2distance = cdf(desc['p2distance'])
299 p2distance = cdf(desc['p2distance'])
299 interarrival = cdf(desc['interarrival'])
300 interarrival = cdf(desc['interarrival'])
300 linesinfilesadded = cdf(desc['linesinfilesadded'])
301 linesinfilesadded = cdf(desc['linesinfilesadded'])
301 tzoffset = cdf(desc['tzoffset'])
302 tzoffset = cdf(desc['tzoffset'])
302
303
303 dictfile = opts.get('dict') or '/usr/share/dict/words'
304 dictfile = opts.get('dict') or '/usr/share/dict/words'
304 try:
305 try:
305 fp = open(dictfile, 'rU')
306 fp = open(dictfile, 'rU')
306 except IOError as err:
307 except IOError as err:
307 raise error.Abort('%s: %s' % (dictfile, err.strerror))
308 raise error.Abort('%s: %s' % (dictfile, err.strerror))
308 words = fp.read().splitlines()
309 words = fp.read().splitlines()
309 fp.close()
310 fp.close()
310
311
311 initdirs = {}
312 initdirs = {}
312 if desc['initdirs']:
313 if desc['initdirs']:
313 for k, v in desc['initdirs']:
314 for k, v in desc['initdirs']:
314 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
315 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
315 initdirs = renamedirs(initdirs, words)
316 initdirs = renamedirs(initdirs, words)
316 initdirscdf = cdf(initdirs)
317 initdirscdf = cdf(initdirs)
317
318
318 def pick(cdf):
319 def pick(cdf):
319 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
320 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
320
321
321 def pickpath():
322 def pickpath():
322 return os.path.join(pick(initdirscdf), random.choice(words))
323 return os.path.join(pick(initdirscdf), random.choice(words))
323
324
324 def makeline(minimum=0):
325 def makeline(minimum=0):
325 total = max(minimum, pick(linelengths))
326 total = max(minimum, pick(linelengths))
326 c, l = 0, []
327 c, l = 0, []
327 while c < total:
328 while c < total:
328 w = random.choice(words)
329 w = random.choice(words)
329 c += len(w) + 1
330 c += len(w) + 1
330 l.append(w)
331 l.append(w)
331 return ' '.join(l)
332 return ' '.join(l)
332
333
333 wlock = repo.wlock()
334 wlock = repo.wlock()
334 lock = repo.lock()
335 lock = repo.lock()
335
336
336 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
337 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
337
338
338 _synthesizing = _('synthesizing')
339 _synthesizing = _('synthesizing')
339 _files = _('initial files')
340 _files = _('initial files')
340 _changesets = _('changesets')
341 _changesets = _('changesets')
341
342
342 # Synthesize a single initial revision adding files to the repo according
343 # Synthesize a single initial revision adding files to the repo according
343 # to the modeled directory structure.
344 # to the modeled directory structure.
344 initcount = int(opts['initfiles'])
345 initcount = int(opts['initfiles'])
345 if initcount and initdirs:
346 if initcount and initdirs:
346 pctx = repo[None].parents()[0]
347 pctx = repo[None].parents()[0]
347 dirs = set(pctx.dirs())
348 dirs = set(pctx.dirs())
348 files = {}
349 files = {}
349
350
350 def validpath(path):
351 def validpath(path):
351 # Don't pick filenames which are already directory names.
352 # Don't pick filenames which are already directory names.
352 if path in dirs:
353 if path in dirs:
353 return False
354 return False
354 # Don't pick directories which were used as file names.
355 # Don't pick directories which were used as file names.
355 while path:
356 while path:
356 if path in files:
357 if path in files:
357 return False
358 return False
358 path = os.path.dirname(path)
359 path = os.path.dirname(path)
359 return True
360 return True
360
361
361 progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
362 progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
362 for i in xrange(0, initcount):
363 for i in xrange(0, initcount):
363 progress.update(i)
364 progress.update(i)
364
365
365 path = pickpath()
366 path = pickpath()
366 while not validpath(path):
367 while not validpath(path):
367 path = pickpath()
368 path = pickpath()
368 data = '%s contents\n' % path
369 data = '%s contents\n' % path
369 files[path] = data
370 files[path] = data
370 dir = os.path.dirname(path)
371 dir = os.path.dirname(path)
371 while dir and dir not in dirs:
372 while dir and dir not in dirs:
372 dirs.add(dir)
373 dirs.add(dir)
373 dir = os.path.dirname(dir)
374 dir = os.path.dirname(dir)
374
375
375 def filectxfn(repo, memctx, path):
376 def filectxfn(repo, memctx, path):
376 return context.memfilectx(repo, memctx, path, files[path])
377 return context.memfilectx(repo, memctx, path, files[path])
377
378
378 progress.complete()
379 progress.complete()
379 message = 'synthesized wide repo with %d files' % (len(files),)
380 message = 'synthesized wide repo with %d files' % (len(files),)
380 mc = context.memctx(repo, [pctx.node(), nullid], message,
381 mc = context.memctx(repo, [pctx.node(), nullid], message,
381 files, filectxfn, ui.username(),
382 files, filectxfn, ui.username(),
382 '%d %d' % dateutil.makedate())
383 '%d %d' % dateutil.makedate())
383 initnode = mc.commit()
384 initnode = mc.commit()
384 if ui.debugflag:
385 if ui.debugflag:
385 hexfn = hex
386 hexfn = hex
386 else:
387 else:
387 hexfn = short
388 hexfn = short
388 ui.status(_('added commit %s with %d files\n')
389 ui.status(_('added commit %s with %d files\n')
389 % (hexfn(initnode), len(files)))
390 % (hexfn(initnode), len(files)))
390
391
391 # Synthesize incremental revisions to the repository, adding repo depth.
392 # Synthesize incremental revisions to the repository, adding repo depth.
392 count = int(opts['count'])
393 count = int(opts['count'])
393 heads = set(map(repo.changelog.rev, repo.heads()))
394 heads = set(map(repo.changelog.rev, repo.heads()))
394 progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
395 progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
395 for i in xrange(count):
396 for i in xrange(count):
396 progress.update(i)
397 progress.update(i)
397
398
398 node = repo.changelog.node
399 node = repo.changelog.node
399 revs = len(repo)
400 revs = len(repo)
400
401
401 def pickhead(heads, distance):
402 def pickhead(heads, distance):
402 if heads:
403 if heads:
403 lheads = sorted(heads)
404 lheads = sorted(heads)
404 rev = revs - min(pick(distance), revs)
405 rev = revs - min(pick(distance), revs)
405 if rev < lheads[-1]:
406 if rev < lheads[-1]:
406 rev = lheads[bisect.bisect_left(lheads, rev)]
407 rev = lheads[bisect.bisect_left(lheads, rev)]
407 else:
408 else:
408 rev = lheads[-1]
409 rev = lheads[-1]
409 return rev, node(rev)
410 return rev, node(rev)
410 return nullrev, nullid
411 return nullrev, nullid
411
412
412 r1 = revs - min(pick(p1distance), revs)
413 r1 = revs - min(pick(p1distance), revs)
413 p1 = node(r1)
414 p1 = node(r1)
414
415
415 # the number of heads will grow without bound if we use a pure
416 # the number of heads will grow without bound if we use a pure
416 # model, so artificially constrain their proliferation
417 # model, so artificially constrain their proliferation
417 toomanyheads = len(heads) > random.randint(1, 20)
418 toomanyheads = len(heads) > random.randint(1, 20)
418 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
419 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
419 r2, p2 = pickhead(heads.difference([r1]), p2distance)
420 r2, p2 = pickhead(heads.difference([r1]), p2distance)
420 else:
421 else:
421 r2, p2 = nullrev, nullid
422 r2, p2 = nullrev, nullid
422
423
423 pl = [p1, p2]
424 pl = [p1, p2]
424 pctx = repo[r1]
425 pctx = repo[r1]
425 mf = pctx.manifest()
426 mf = pctx.manifest()
426 mfk = mf.keys()
427 mfk = mf.keys()
427 changes = {}
428 changes = {}
428 if mfk:
429 if mfk:
429 for __ in xrange(pick(fileschanged)):
430 for __ in xrange(pick(fileschanged)):
430 for __ in xrange(10):
431 for __ in xrange(10):
431 fctx = pctx.filectx(random.choice(mfk))
432 fctx = pctx.filectx(random.choice(mfk))
432 path = fctx.path()
433 path = fctx.path()
433 if not (path in nevertouch or fctx.isbinary() or
434 if not (path in nevertouch or fctx.isbinary() or
434 'l' in fctx.flags()):
435 'l' in fctx.flags()):
435 break
436 break
436 lines = fctx.data().splitlines()
437 lines = fctx.data().splitlines()
437 add, remove = pick(lineschanged)
438 add, remove = pick(lineschanged)
438 for __ in xrange(remove):
439 for __ in xrange(remove):
439 if not lines:
440 if not lines:
440 break
441 break
441 del lines[random.randrange(0, len(lines))]
442 del lines[random.randrange(0, len(lines))]
442 for __ in xrange(add):
443 for __ in xrange(add):
443 lines.insert(random.randint(0, len(lines)), makeline())
444 lines.insert(random.randint(0, len(lines)), makeline())
444 path = fctx.path()
445 path = fctx.path()
445 changes[path] = '\n'.join(lines) + '\n'
446 changes[path] = '\n'.join(lines) + '\n'
446 for __ in xrange(pick(filesremoved)):
447 for __ in xrange(pick(filesremoved)):
447 path = random.choice(mfk)
448 path = random.choice(mfk)
448 for __ in xrange(10):
449 for __ in xrange(10):
449 path = random.choice(mfk)
450 path = random.choice(mfk)
450 if path not in changes:
451 if path not in changes:
451 break
452 break
452 if filesadded:
453 if filesadded:
453 dirs = list(pctx.dirs())
454 dirs = list(pctx.dirs())
454 dirs.insert(0, '')
455 dirs.insert(0, '')
455 for __ in xrange(pick(filesadded)):
456 for __ in xrange(pick(filesadded)):
456 pathstr = ''
457 pathstr = ''
457 while pathstr in dirs:
458 while pathstr in dirs:
458 path = [random.choice(dirs)]
459 path = [random.choice(dirs)]
459 if pick(dirsadded):
460 if pick(dirsadded):
460 path.append(random.choice(words))
461 path.append(random.choice(words))
461 path.append(random.choice(words))
462 path.append(random.choice(words))
462 pathstr = '/'.join(filter(None, path))
463 pathstr = '/'.join(filter(None, path))
463 data = '\n'.join(makeline()
464 data = '\n'.join(makeline()
464 for __ in xrange(pick(linesinfilesadded))) + '\n'
465 for __ in xrange(pick(linesinfilesadded))) + '\n'
465 changes[pathstr] = data
466 changes[pathstr] = data
466 def filectxfn(repo, memctx, path):
467 def filectxfn(repo, memctx, path):
467 if path not in changes:
468 if path not in changes:
468 return None
469 return None
469 return context.memfilectx(repo, memctx, path, changes[path])
470 return context.memfilectx(repo, memctx, path, changes[path])
470 if not changes:
471 if not changes:
471 continue
472 continue
472 if revs:
473 if revs:
473 date = repo['tip'].date()[0] + pick(interarrival)
474 date = repo['tip'].date()[0] + pick(interarrival)
474 else:
475 else:
475 date = time.time() - (86400 * count)
476 date = time.time() - (86400 * count)
476 # dates in mercurial must be positive, fit in 32-bit signed integers.
477 # dates in mercurial must be positive, fit in 32-bit signed integers.
477 date = min(0x7fffffff, max(0, date))
478 date = min(0x7fffffff, max(0, date))
478 user = random.choice(words) + '@' + random.choice(words)
479 user = random.choice(words) + '@' + random.choice(words)
479 mc = context.memctx(repo, pl, makeline(minimum=2),
480 mc = context.memctx(repo, pl, makeline(minimum=2),
480 sorted(changes),
481 sorted(changes),
481 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
482 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
482 newnode = mc.commit()
483 newnode = mc.commit()
483 heads.add(repo.changelog.rev(newnode))
484 heads.add(repo.changelog.rev(newnode))
484 heads.discard(r1)
485 heads.discard(r1)
485 heads.discard(r2)
486 heads.discard(r2)
487 progress.complete()
486
488
487 lock.release()
489 lock.release()
488 wlock.release()
490 wlock.release()
489
491
490 def renamedirs(dirs, words):
492 def renamedirs(dirs, words):
491 '''Randomly rename the directory names in the per-dir file count dict.'''
493 '''Randomly rename the directory names in the per-dir file count dict.'''
492 wordgen = itertools.cycle(words)
494 wordgen = itertools.cycle(words)
493 replacements = {'': ''}
495 replacements = {'': ''}
494 def rename(dirpath):
496 def rename(dirpath):
495 '''Recursively rename the directory and all path prefixes.
497 '''Recursively rename the directory and all path prefixes.
496
498
497 The mapping from path to renamed path is stored for all path prefixes
499 The mapping from path to renamed path is stored for all path prefixes
498 as in dynamic programming, ensuring linear runtime and consistent
500 as in dynamic programming, ensuring linear runtime and consistent
499 renaming regardless of iteration order through the model.
501 renaming regardless of iteration order through the model.
500 '''
502 '''
501 if dirpath in replacements:
503 if dirpath in replacements:
502 return replacements[dirpath]
504 return replacements[dirpath]
503 head, _ = os.path.split(dirpath)
505 head, _ = os.path.split(dirpath)
504 if head:
506 if head:
505 head = rename(head)
507 head = rename(head)
506 else:
508 else:
507 head = ''
509 head = ''
508 renamed = os.path.join(head, next(wordgen))
510 renamed = os.path.join(head, next(wordgen))
509 replacements[dirpath] = renamed
511 replacements[dirpath] = renamed
510 return renamed
512 return renamed
511 result = []
513 result = []
512 for dirpath, count in dirs.iteritems():
514 for dirpath, count in dirs.iteritems():
513 result.append([rename(dirpath.lstrip(os.sep)), count])
515 result.append([rename(dirpath.lstrip(os.sep)), count])
514 return result
516 return result
General Comments 0
You need to be logged in to leave comments. Login now