##// END OF EJS Templates
synthrepo: use pycompat.xrange...
Gregory Szorc -
r43274:c07812bd default
parent child Browse files
Show More
@@ -1,520 +1,522 b''
1 # synthrepo.py - repo synthesis
1 # synthrepo.py - repo synthesis
2 #
2 #
3 # Copyright 2012 Facebook
3 # Copyright 2012 Facebook
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''synthesize structurally interesting change history
8 '''synthesize structurally interesting change history
9
9
10 This extension is useful for creating a repository with properties
10 This extension is useful for creating a repository with properties
11 that are statistically similar to an existing repository. During
11 that are statistically similar to an existing repository. During
12 analysis, a simple probability table is constructed from the history
12 analysis, a simple probability table is constructed from the history
13 of an existing repository. During synthesis, these properties are
13 of an existing repository. During synthesis, these properties are
14 reconstructed.
14 reconstructed.
15
15
16 Properties that are analyzed and synthesized include the following:
16 Properties that are analyzed and synthesized include the following:
17
17
18 - Lines added or removed when an existing file is modified
18 - Lines added or removed when an existing file is modified
19 - Number and sizes of files added
19 - Number and sizes of files added
20 - Number of files removed
20 - Number of files removed
21 - Line lengths
21 - Line lengths
22 - Topological distance to parent changeset(s)
22 - Topological distance to parent changeset(s)
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26 - Number of files in each directory
26 - Number of files in each directory
27
27
28 A few obvious properties that are not currently handled realistically:
28 A few obvious properties that are not currently handled realistically:
29
29
30 - Merges are treated as regular commits with two parents, which is not
30 - Merges are treated as regular commits with two parents, which is not
31 realistic
31 realistic
32 - Modifications are not treated as operations on hunks of lines, but
32 - Modifications are not treated as operations on hunks of lines, but
33 as insertions and deletions of randomly chosen single lines
33 as insertions and deletions of randomly chosen single lines
34 - Committer ID (always random)
34 - Committer ID (always random)
35 - Executability of files
35 - Executability of files
36 - Symlinks and binary files are ignored
36 - Symlinks and binary files are ignored
37 '''
37 '''
38
38
39 from __future__ import absolute_import
39 from __future__ import absolute_import
40 import bisect
40 import bisect
41 import collections
41 import collections
42 import itertools
42 import itertools
43 import json
43 import json
44 import os
44 import os
45 import random
45 import random
46 import sys
46 import sys
47 import time
47 import time
48
48
49 from mercurial.i18n import _
49 from mercurial.i18n import _
50 from mercurial.node import (
50 from mercurial.node import (
51 nullid,
51 nullid,
52 nullrev,
52 nullrev,
53 short,
53 short,
54 )
54 )
55 from mercurial import (
55 from mercurial import (
56 context,
56 context,
57 diffutil,
57 diffutil,
58 error,
58 error,
59 hg,
59 hg,
60 patch,
60 patch,
61 pycompat,
61 registrar,
62 registrar,
62 scmutil,
63 scmutil,
63 )
64 )
64 from mercurial.utils import (
65 from mercurial.utils import (
65 dateutil,
66 dateutil,
66 )
67 )
67
68
68 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
69 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
69 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
70 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
70 # be specifying the version(s) of Mercurial they are tested with, or
71 # be specifying the version(s) of Mercurial they are tested with, or
71 # leave the attribute unspecified.
72 # leave the attribute unspecified.
72 testedwith = 'ships-with-hg-core'
73 testedwith = 'ships-with-hg-core'
73
74
74 cmdtable = {}
75 cmdtable = {}
75 command = registrar.command(cmdtable)
76 command = registrar.command(cmdtable)
76
77
77 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
78 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
78
79
79 def zerodict():
80 def zerodict():
80 return collections.defaultdict(lambda: 0)
81 return collections.defaultdict(lambda: 0)
81
82
82 def roundto(x, k):
83 def roundto(x, k):
83 if x > k * 2:
84 if x > k * 2:
84 return int(round(x / float(k)) * k)
85 return int(round(x / float(k)) * k)
85 return int(round(x))
86 return int(round(x))
86
87
87 def parsegitdiff(lines):
88 def parsegitdiff(lines):
88 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
89 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
89 binary = False
90 binary = False
90 for line in lines:
91 for line in lines:
91 start = line[:6]
92 start = line[:6]
92 if start == 'diff -':
93 if start == 'diff -':
93 if filename:
94 if filename:
94 yield filename, mar, lineadd, lineremove, binary
95 yield filename, mar, lineadd, lineremove, binary
95 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
96 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
96 filename = patch.gitre.match(line).group(1)
97 filename = patch.gitre.match(line).group(1)
97 elif start in newfile:
98 elif start in newfile:
98 mar = 'a'
99 mar = 'a'
99 elif start == 'GIT bi':
100 elif start == 'GIT bi':
100 binary = True
101 binary = True
101 elif start == 'delete':
102 elif start == 'delete':
102 mar = 'r'
103 mar = 'r'
103 elif start:
104 elif start:
104 s = start[0]
105 s = start[0]
105 if s == '-' and not line.startswith('--- '):
106 if s == '-' and not line.startswith('--- '):
106 lineremove += 1
107 lineremove += 1
107 elif s == '+' and not line.startswith('+++ '):
108 elif s == '+' and not line.startswith('+++ '):
108 lineadd[roundto(len(line) - 1, 5)] += 1
109 lineadd[roundto(len(line) - 1, 5)] += 1
109 if filename:
110 if filename:
110 yield filename, mar, lineadd, lineremove, binary
111 yield filename, mar, lineadd, lineremove, binary
111
112
112 @command('analyze',
113 @command('analyze',
113 [('o', 'output', '', _('write output to given file'), _('FILE')),
114 [('o', 'output', '', _('write output to given file'), _('FILE')),
114 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
115 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
115 _('hg analyze'), optionalrepo=True)
116 _('hg analyze'), optionalrepo=True)
116 def analyze(ui, repo, *revs, **opts):
117 def analyze(ui, repo, *revs, **opts):
117 '''create a simple model of a repository to use for later synthesis
118 '''create a simple model of a repository to use for later synthesis
118
119
119 This command examines every changeset in the given range (or all
120 This command examines every changeset in the given range (or all
120 of history if none are specified) and creates a simple statistical
121 of history if none are specified) and creates a simple statistical
121 model of the history of the repository. It also measures the directory
122 model of the history of the repository. It also measures the directory
122 structure of the repository as checked out.
123 structure of the repository as checked out.
123
124
124 The model is written out to a JSON file, and can be used by
125 The model is written out to a JSON file, and can be used by
125 :hg:`synthesize` to create or augment a repository with synthetic
126 :hg:`synthesize` to create or augment a repository with synthetic
126 commits that have a structure that is statistically similar to the
127 commits that have a structure that is statistically similar to the
127 analyzed repository.
128 analyzed repository.
128 '''
129 '''
129 root = repo.root
130 root = repo.root
130 if not root.endswith(os.path.sep):
131 if not root.endswith(os.path.sep):
131 root += os.path.sep
132 root += os.path.sep
132
133
133 revs = list(revs)
134 revs = list(revs)
134 revs.extend(opts['rev'])
135 revs.extend(opts['rev'])
135 if not revs:
136 if not revs:
136 revs = [':']
137 revs = [':']
137
138
138 output = opts['output']
139 output = opts['output']
139 if not output:
140 if not output:
140 output = os.path.basename(root) + '.json'
141 output = os.path.basename(root) + '.json'
141
142
142 if output == '-':
143 if output == '-':
143 fp = sys.stdout
144 fp = sys.stdout
144 else:
145 else:
145 fp = open(output, 'w')
146 fp = open(output, 'w')
146
147
147 # Always obtain file counts of each directory in the given root directory.
148 # Always obtain file counts of each directory in the given root directory.
148 def onerror(e):
149 def onerror(e):
149 ui.warn(_('error walking directory structure: %s\n') % e)
150 ui.warn(_('error walking directory structure: %s\n') % e)
150
151
151 dirs = {}
152 dirs = {}
152 rootprefixlen = len(root)
153 rootprefixlen = len(root)
153 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
154 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
154 dirpathfromroot = dirpath[rootprefixlen:]
155 dirpathfromroot = dirpath[rootprefixlen:]
155 dirs[dirpathfromroot] = len(filenames)
156 dirs[dirpathfromroot] = len(filenames)
156 if '.hg' in dirnames:
157 if '.hg' in dirnames:
157 dirnames.remove('.hg')
158 dirnames.remove('.hg')
158
159
159 lineschanged = zerodict()
160 lineschanged = zerodict()
160 children = zerodict()
161 children = zerodict()
161 p1distance = zerodict()
162 p1distance = zerodict()
162 p2distance = zerodict()
163 p2distance = zerodict()
163 linesinfilesadded = zerodict()
164 linesinfilesadded = zerodict()
164 fileschanged = zerodict()
165 fileschanged = zerodict()
165 filesadded = zerodict()
166 filesadded = zerodict()
166 filesremoved = zerodict()
167 filesremoved = zerodict()
167 linelengths = zerodict()
168 linelengths = zerodict()
168 interarrival = zerodict()
169 interarrival = zerodict()
169 parents = zerodict()
170 parents = zerodict()
170 dirsadded = zerodict()
171 dirsadded = zerodict()
171 tzoffset = zerodict()
172 tzoffset = zerodict()
172
173
173 # If a mercurial repo is available, also model the commit history.
174 # If a mercurial repo is available, also model the commit history.
174 if repo:
175 if repo:
175 revs = scmutil.revrange(repo, revs)
176 revs = scmutil.revrange(repo, revs)
176 revs.sort()
177 revs.sort()
177
178
178 progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),
179 progress = ui.makeprogress(_('analyzing'), unit=_('changesets'),
179 total=len(revs))
180 total=len(revs))
180 for i, rev in enumerate(revs):
181 for i, rev in enumerate(revs):
181 progress.update(i)
182 progress.update(i)
182 ctx = repo[rev]
183 ctx = repo[rev]
183 pl = ctx.parents()
184 pl = ctx.parents()
184 pctx = pl[0]
185 pctx = pl[0]
185 prev = pctx.rev()
186 prev = pctx.rev()
186 children[prev] += 1
187 children[prev] += 1
187 p1distance[rev - prev] += 1
188 p1distance[rev - prev] += 1
188 parents[len(pl)] += 1
189 parents[len(pl)] += 1
189 tzoffset[ctx.date()[1]] += 1
190 tzoffset[ctx.date()[1]] += 1
190 if len(pl) > 1:
191 if len(pl) > 1:
191 p2distance[rev - pl[1].rev()] += 1
192 p2distance[rev - pl[1].rev()] += 1
192 if prev == rev - 1:
193 if prev == rev - 1:
193 lastctx = pctx
194 lastctx = pctx
194 else:
195 else:
195 lastctx = repo[rev - 1]
196 lastctx = repo[rev - 1]
196 if lastctx.rev() != nullrev:
197 if lastctx.rev() != nullrev:
197 timedelta = ctx.date()[0] - lastctx.date()[0]
198 timedelta = ctx.date()[0] - lastctx.date()[0]
198 interarrival[roundto(timedelta, 300)] += 1
199 interarrival[roundto(timedelta, 300)] += 1
199 diffopts = diffutil.diffallopts(ui, {'git': True})
200 diffopts = diffutil.diffallopts(ui, {'git': True})
200 diff = sum((d.splitlines()
201 diff = sum((d.splitlines()
201 for d in ctx.diff(pctx, opts=diffopts)), [])
202 for d in ctx.diff(pctx, opts=diffopts)), [])
202 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
203 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
203 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
204 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
204 if isbin:
205 if isbin:
205 continue
206 continue
206 added = sum(lineadd.itervalues(), 0)
207 added = sum(lineadd.itervalues(), 0)
207 if mar == 'm':
208 if mar == 'm':
208 if added and lineremove:
209 if added and lineremove:
209 lineschanged[roundto(added, 5),
210 lineschanged[roundto(added, 5),
210 roundto(lineremove, 5)] += 1
211 roundto(lineremove, 5)] += 1
211 filechanges += 1
212 filechanges += 1
212 elif mar == 'a':
213 elif mar == 'a':
213 fileadds += 1
214 fileadds += 1
214 if '/' in filename:
215 if '/' in filename:
215 filedir = filename.rsplit('/', 1)[0]
216 filedir = filename.rsplit('/', 1)[0]
216 if filedir not in pctx.dirs():
217 if filedir not in pctx.dirs():
217 diradds += 1
218 diradds += 1
218 linesinfilesadded[roundto(added, 5)] += 1
219 linesinfilesadded[roundto(added, 5)] += 1
219 elif mar == 'r':
220 elif mar == 'r':
220 fileremoves += 1
221 fileremoves += 1
221 for length, count in lineadd.iteritems():
222 for length, count in lineadd.iteritems():
222 linelengths[length] += count
223 linelengths[length] += count
223 fileschanged[filechanges] += 1
224 fileschanged[filechanges] += 1
224 filesadded[fileadds] += 1
225 filesadded[fileadds] += 1
225 dirsadded[diradds] += 1
226 dirsadded[diradds] += 1
226 filesremoved[fileremoves] += 1
227 filesremoved[fileremoves] += 1
227 progress.complete()
228 progress.complete()
228
229
229 invchildren = zerodict()
230 invchildren = zerodict()
230
231
231 for rev, count in children.iteritems():
232 for rev, count in children.iteritems():
232 invchildren[count] += 1
233 invchildren[count] += 1
233
234
234 if output != '-':
235 if output != '-':
235 ui.status(_('writing output to %s\n') % output)
236 ui.status(_('writing output to %s\n') % output)
236
237
237 def pronk(d):
238 def pronk(d):
238 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
239 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
239
240
240 json.dump({'revs': len(revs),
241 json.dump({'revs': len(revs),
241 'initdirs': pronk(dirs),
242 'initdirs': pronk(dirs),
242 'lineschanged': pronk(lineschanged),
243 'lineschanged': pronk(lineschanged),
243 'children': pronk(invchildren),
244 'children': pronk(invchildren),
244 'fileschanged': pronk(fileschanged),
245 'fileschanged': pronk(fileschanged),
245 'filesadded': pronk(filesadded),
246 'filesadded': pronk(filesadded),
246 'linesinfilesadded': pronk(linesinfilesadded),
247 'linesinfilesadded': pronk(linesinfilesadded),
247 'dirsadded': pronk(dirsadded),
248 'dirsadded': pronk(dirsadded),
248 'filesremoved': pronk(filesremoved),
249 'filesremoved': pronk(filesremoved),
249 'linelengths': pronk(linelengths),
250 'linelengths': pronk(linelengths),
250 'parents': pronk(parents),
251 'parents': pronk(parents),
251 'p1distance': pronk(p1distance),
252 'p1distance': pronk(p1distance),
252 'p2distance': pronk(p2distance),
253 'p2distance': pronk(p2distance),
253 'interarrival': pronk(interarrival),
254 'interarrival': pronk(interarrival),
254 'tzoffset': pronk(tzoffset),
255 'tzoffset': pronk(tzoffset),
255 },
256 },
256 fp)
257 fp)
257 fp.close()
258 fp.close()
258
259
259 @command('synthesize',
260 @command('synthesize',
260 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
261 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
261 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
262 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
262 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
263 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
263 _('hg synthesize [OPTION].. DESCFILE'))
264 _('hg synthesize [OPTION].. DESCFILE'))
264 def synthesize(ui, repo, descpath, **opts):
265 def synthesize(ui, repo, descpath, **opts):
265 '''synthesize commits based on a model of an existing repository
266 '''synthesize commits based on a model of an existing repository
266
267
267 The model must have been generated by :hg:`analyze`. Commits will
268 The model must have been generated by :hg:`analyze`. Commits will
268 be generated randomly according to the probabilities described in
269 be generated randomly according to the probabilities described in
269 the model. If --initfiles is set, the repository will be seeded with
270 the model. If --initfiles is set, the repository will be seeded with
270 the given number files following the modeled repository's directory
271 the given number files following the modeled repository's directory
271 structure.
272 structure.
272
273
273 When synthesizing new content, commit descriptions, and user
274 When synthesizing new content, commit descriptions, and user
274 names, words will be chosen randomly from a dictionary that is
275 names, words will be chosen randomly from a dictionary that is
275 presumed to contain one word per line. Use --dict to specify the
276 presumed to contain one word per line. Use --dict to specify the
276 path to an alternate dictionary to use.
277 path to an alternate dictionary to use.
277 '''
278 '''
278 try:
279 try:
279 fp = hg.openpath(ui, descpath)
280 fp = hg.openpath(ui, descpath)
280 except Exception as err:
281 except Exception as err:
281 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
282 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
282 desc = json.load(fp)
283 desc = json.load(fp)
283 fp.close()
284 fp.close()
284
285
285 def cdf(l):
286 def cdf(l):
286 if not l:
287 if not l:
287 return [], []
288 return [], []
288 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
289 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
289 t = float(sum(probs, 0))
290 t = float(sum(probs, 0))
290 s, cdfs = 0, []
291 s, cdfs = 0, []
291 for v in probs:
292 for v in probs:
292 s += v
293 s += v
293 cdfs.append(s / t)
294 cdfs.append(s / t)
294 return vals, cdfs
295 return vals, cdfs
295
296
296 lineschanged = cdf(desc['lineschanged'])
297 lineschanged = cdf(desc['lineschanged'])
297 fileschanged = cdf(desc['fileschanged'])
298 fileschanged = cdf(desc['fileschanged'])
298 filesadded = cdf(desc['filesadded'])
299 filesadded = cdf(desc['filesadded'])
299 dirsadded = cdf(desc['dirsadded'])
300 dirsadded = cdf(desc['dirsadded'])
300 filesremoved = cdf(desc['filesremoved'])
301 filesremoved = cdf(desc['filesremoved'])
301 linelengths = cdf(desc['linelengths'])
302 linelengths = cdf(desc['linelengths'])
302 parents = cdf(desc['parents'])
303 parents = cdf(desc['parents'])
303 p1distance = cdf(desc['p1distance'])
304 p1distance = cdf(desc['p1distance'])
304 p2distance = cdf(desc['p2distance'])
305 p2distance = cdf(desc['p2distance'])
305 interarrival = cdf(desc['interarrival'])
306 interarrival = cdf(desc['interarrival'])
306 linesinfilesadded = cdf(desc['linesinfilesadded'])
307 linesinfilesadded = cdf(desc['linesinfilesadded'])
307 tzoffset = cdf(desc['tzoffset'])
308 tzoffset = cdf(desc['tzoffset'])
308
309
309 dictfile = opts.get('dict') or '/usr/share/dict/words'
310 dictfile = opts.get('dict') or '/usr/share/dict/words'
310 try:
311 try:
311 fp = open(dictfile, 'rU')
312 fp = open(dictfile, 'rU')
312 except IOError as err:
313 except IOError as err:
313 raise error.Abort('%s: %s' % (dictfile, err.strerror))
314 raise error.Abort('%s: %s' % (dictfile, err.strerror))
314 words = fp.read().splitlines()
315 words = fp.read().splitlines()
315 fp.close()
316 fp.close()
316
317
317 initdirs = {}
318 initdirs = {}
318 if desc['initdirs']:
319 if desc['initdirs']:
319 for k, v in desc['initdirs']:
320 for k, v in desc['initdirs']:
320 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
321 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
321 initdirs = renamedirs(initdirs, words)
322 initdirs = renamedirs(initdirs, words)
322 initdirscdf = cdf(initdirs)
323 initdirscdf = cdf(initdirs)
323
324
324 def pick(cdf):
325 def pick(cdf):
325 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
326 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
326
327
327 def pickpath():
328 def pickpath():
328 return os.path.join(pick(initdirscdf), random.choice(words))
329 return os.path.join(pick(initdirscdf), random.choice(words))
329
330
330 def makeline(minimum=0):
331 def makeline(minimum=0):
331 total = max(minimum, pick(linelengths))
332 total = max(minimum, pick(linelengths))
332 c, l = 0, []
333 c, l = 0, []
333 while c < total:
334 while c < total:
334 w = random.choice(words)
335 w = random.choice(words)
335 c += len(w) + 1
336 c += len(w) + 1
336 l.append(w)
337 l.append(w)
337 return ' '.join(l)
338 return ' '.join(l)
338
339
339 wlock = repo.wlock()
340 wlock = repo.wlock()
340 lock = repo.lock()
341 lock = repo.lock()
341
342
342 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
343 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
343
344
344 _synthesizing = _('synthesizing')
345 _synthesizing = _('synthesizing')
345 _files = _('initial files')
346 _files = _('initial files')
346 _changesets = _('changesets')
347 _changesets = _('changesets')
347
348
348 # Synthesize a single initial revision adding files to the repo according
349 # Synthesize a single initial revision adding files to the repo according
349 # to the modeled directory structure.
350 # to the modeled directory structure.
350 initcount = int(opts['initfiles'])
351 initcount = int(opts['initfiles'])
351 if initcount and initdirs:
352 if initcount and initdirs:
352 pctx = repo['.']
353 pctx = repo['.']
353 dirs = set(pctx.dirs())
354 dirs = set(pctx.dirs())
354 files = {}
355 files = {}
355
356
356 def validpath(path):
357 def validpath(path):
357 # Don't pick filenames which are already directory names.
358 # Don't pick filenames which are already directory names.
358 if path in dirs:
359 if path in dirs:
359 return False
360 return False
360 # Don't pick directories which were used as file names.
361 # Don't pick directories which were used as file names.
361 while path:
362 while path:
362 if path in files:
363 if path in files:
363 return False
364 return False
364 path = os.path.dirname(path)
365 path = os.path.dirname(path)
365 return True
366 return True
366
367
367 progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
368 progress = ui.makeprogress(_synthesizing, unit=_files, total=initcount)
368 for i in xrange(0, initcount):
369 for i in pycompat.xrange(0, initcount):
369 progress.update(i)
370 progress.update(i)
370
371
371 path = pickpath()
372 path = pickpath()
372 while not validpath(path):
373 while not validpath(path):
373 path = pickpath()
374 path = pickpath()
374 data = '%s contents\n' % path
375 data = '%s contents\n' % path
375 files[path] = data
376 files[path] = data
376 dir = os.path.dirname(path)
377 dir = os.path.dirname(path)
377 while dir and dir not in dirs:
378 while dir and dir not in dirs:
378 dirs.add(dir)
379 dirs.add(dir)
379 dir = os.path.dirname(dir)
380 dir = os.path.dirname(dir)
380
381
381 def filectxfn(repo, memctx, path):
382 def filectxfn(repo, memctx, path):
382 return context.memfilectx(repo, memctx, path, files[path])
383 return context.memfilectx(repo, memctx, path, files[path])
383
384
384 progress.complete()
385 progress.complete()
385 message = 'synthesized wide repo with %d files' % (len(files),)
386 message = 'synthesized wide repo with %d files' % (len(files),)
386 mc = context.memctx(repo, [pctx.node(), nullid], message,
387 mc = context.memctx(repo, [pctx.node(), nullid], message,
387 files, filectxfn, ui.username(),
388 files, filectxfn, ui.username(),
388 '%d %d' % dateutil.makedate())
389 '%d %d' % dateutil.makedate())
389 initnode = mc.commit()
390 initnode = mc.commit()
390 if ui.debugflag:
391 if ui.debugflag:
391 hexfn = hex
392 hexfn = hex
392 else:
393 else:
393 hexfn = short
394 hexfn = short
394 ui.status(_('added commit %s with %d files\n')
395 ui.status(_('added commit %s with %d files\n')
395 % (hexfn(initnode), len(files)))
396 % (hexfn(initnode), len(files)))
396
397
397 # Synthesize incremental revisions to the repository, adding repo depth.
398 # Synthesize incremental revisions to the repository, adding repo depth.
398 count = int(opts['count'])
399 count = int(opts['count'])
399 heads = set(map(repo.changelog.rev, repo.heads()))
400 heads = set(map(repo.changelog.rev, repo.heads()))
400 progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
401 progress = ui.makeprogress(_synthesizing, unit=_changesets, total=count)
401 for i in xrange(count):
402 for i in pycompat.xrange(count):
402 progress.update(i)
403 progress.update(i)
403
404
404 node = repo.changelog.node
405 node = repo.changelog.node
405 revs = len(repo)
406 revs = len(repo)
406
407
407 def pickhead(heads, distance):
408 def pickhead(heads, distance):
408 if heads:
409 if heads:
409 lheads = sorted(heads)
410 lheads = sorted(heads)
410 rev = revs - min(pick(distance), revs)
411 rev = revs - min(pick(distance), revs)
411 if rev < lheads[-1]:
412 if rev < lheads[-1]:
412 rev = lheads[bisect.bisect_left(lheads, rev)]
413 rev = lheads[bisect.bisect_left(lheads, rev)]
413 else:
414 else:
414 rev = lheads[-1]
415 rev = lheads[-1]
415 return rev, node(rev)
416 return rev, node(rev)
416 return nullrev, nullid
417 return nullrev, nullid
417
418
418 r1 = revs - min(pick(p1distance), revs)
419 r1 = revs - min(pick(p1distance), revs)
419 p1 = node(r1)
420 p1 = node(r1)
420
421
421 # the number of heads will grow without bound if we use a pure
422 # the number of heads will grow without bound if we use a pure
422 # model, so artificially constrain their proliferation
423 # model, so artificially constrain their proliferation
423 toomanyheads = len(heads) > random.randint(1, 20)
424 toomanyheads = len(heads) > random.randint(1, 20)
424 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
425 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
425 r2, p2 = pickhead(heads.difference([r1]), p2distance)
426 r2, p2 = pickhead(heads.difference([r1]), p2distance)
426 else:
427 else:
427 r2, p2 = nullrev, nullid
428 r2, p2 = nullrev, nullid
428
429
429 pl = [p1, p2]
430 pl = [p1, p2]
430 pctx = repo[r1]
431 pctx = repo[r1]
431 mf = pctx.manifest()
432 mf = pctx.manifest()
432 mfk = mf.keys()
433 mfk = mf.keys()
433 changes = {}
434 changes = {}
434 if mfk:
435 if mfk:
435 for __ in xrange(pick(fileschanged)):
436 for __ in pycompat.xrange(pick(fileschanged)):
436 for __ in xrange(10):
437 for __ in pycompat.xrange(10):
437 fctx = pctx.filectx(random.choice(mfk))
438 fctx = pctx.filectx(random.choice(mfk))
438 path = fctx.path()
439 path = fctx.path()
439 if not (path in nevertouch or fctx.isbinary() or
440 if not (path in nevertouch or fctx.isbinary() or
440 'l' in fctx.flags()):
441 'l' in fctx.flags()):
441 break
442 break
442 lines = fctx.data().splitlines()
443 lines = fctx.data().splitlines()
443 add, remove = pick(lineschanged)
444 add, remove = pick(lineschanged)
444 for __ in xrange(remove):
445 for __ in pycompat.xrange(remove):
445 if not lines:
446 if not lines:
446 break
447 break
447 del lines[random.randrange(0, len(lines))]
448 del lines[random.randrange(0, len(lines))]
448 for __ in xrange(add):
449 for __ in pycompat.xrange(add):
449 lines.insert(random.randint(0, len(lines)), makeline())
450 lines.insert(random.randint(0, len(lines)), makeline())
450 path = fctx.path()
451 path = fctx.path()
451 changes[path] = '\n'.join(lines) + '\n'
452 changes[path] = '\n'.join(lines) + '\n'
452 for __ in xrange(pick(filesremoved)):
453 for __ in pycompat.xrange(pick(filesremoved)):
453 for __ in xrange(10):
454 for __ in pycompat.xrange(10):
454 path = random.choice(mfk)
455 path = random.choice(mfk)
455 if path not in changes:
456 if path not in changes:
456 break
457 break
457 if filesadded:
458 if filesadded:
458 dirs = list(pctx.dirs())
459 dirs = list(pctx.dirs())
459 dirs.insert(0, '')
460 dirs.insert(0, '')
460 for __ in xrange(pick(filesadded)):
461 for __ in pycompat.xrange(pick(filesadded)):
461 pathstr = ''
462 pathstr = ''
462 while pathstr in dirs:
463 while pathstr in dirs:
463 path = [random.choice(dirs)]
464 path = [random.choice(dirs)]
464 if pick(dirsadded):
465 if pick(dirsadded):
465 path.append(random.choice(words))
466 path.append(random.choice(words))
466 path.append(random.choice(words))
467 path.append(random.choice(words))
467 pathstr = '/'.join(filter(None, path))
468 pathstr = '/'.join(filter(None, path))
468 data = '\n'.join(makeline()
469 data = '\n'.join(
469 for __ in xrange(pick(linesinfilesadded))) + '\n'
470 makeline()
471 for __ in pycompat.xrange(pick(linesinfilesadded))) + '\n'
470 changes[pathstr] = data
472 changes[pathstr] = data
471 def filectxfn(repo, memctx, path):
473 def filectxfn(repo, memctx, path):
472 if path not in changes:
474 if path not in changes:
473 return None
475 return None
474 return context.memfilectx(repo, memctx, path, changes[path])
476 return context.memfilectx(repo, memctx, path, changes[path])
475 if not changes:
477 if not changes:
476 continue
478 continue
477 if revs:
479 if revs:
478 date = repo['tip'].date()[0] + pick(interarrival)
480 date = repo['tip'].date()[0] + pick(interarrival)
479 else:
481 else:
480 date = time.time() - (86400 * count)
482 date = time.time() - (86400 * count)
481 # dates in mercurial must be positive, fit in 32-bit signed integers.
483 # dates in mercurial must be positive, fit in 32-bit signed integers.
482 date = min(0x7fffffff, max(0, date))
484 date = min(0x7fffffff, max(0, date))
483 user = random.choice(words) + '@' + random.choice(words)
485 user = random.choice(words) + '@' + random.choice(words)
484 mc = context.memctx(repo, pl, makeline(minimum=2),
486 mc = context.memctx(repo, pl, makeline(minimum=2),
485 sorted(changes),
487 sorted(changes),
486 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
488 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
487 newnode = mc.commit()
489 newnode = mc.commit()
488 heads.add(repo.changelog.rev(newnode))
490 heads.add(repo.changelog.rev(newnode))
489 heads.discard(r1)
491 heads.discard(r1)
490 heads.discard(r2)
492 heads.discard(r2)
491 progress.complete()
493 progress.complete()
492
494
493 lock.release()
495 lock.release()
494 wlock.release()
496 wlock.release()
495
497
496 def renamedirs(dirs, words):
498 def renamedirs(dirs, words):
497 '''Randomly rename the directory names in the per-dir file count dict.'''
499 '''Randomly rename the directory names in the per-dir file count dict.'''
498 wordgen = itertools.cycle(words)
500 wordgen = itertools.cycle(words)
499 replacements = {'': ''}
501 replacements = {'': ''}
500 def rename(dirpath):
502 def rename(dirpath):
501 '''Recursively rename the directory and all path prefixes.
503 '''Recursively rename the directory and all path prefixes.
502
504
503 The mapping from path to renamed path is stored for all path prefixes
505 The mapping from path to renamed path is stored for all path prefixes
504 as in dynamic programming, ensuring linear runtime and consistent
506 as in dynamic programming, ensuring linear runtime and consistent
505 renaming regardless of iteration order through the model.
507 renaming regardless of iteration order through the model.
506 '''
508 '''
507 if dirpath in replacements:
509 if dirpath in replacements:
508 return replacements[dirpath]
510 return replacements[dirpath]
509 head, _ = os.path.split(dirpath)
511 head, _ = os.path.split(dirpath)
510 if head:
512 if head:
511 head = rename(head)
513 head = rename(head)
512 else:
514 else:
513 head = ''
515 head = ''
514 renamed = os.path.join(head, next(wordgen))
516 renamed = os.path.join(head, next(wordgen))
515 replacements[dirpath] = renamed
517 replacements[dirpath] = renamed
516 return renamed
518 return renamed
517 result = []
519 result = []
518 for dirpath, count in dirs.iteritems():
520 for dirpath, count in dirs.iteritems():
519 result.append([rename(dirpath.lstrip(os.sep)), count])
521 result.append([rename(dirpath.lstrip(os.sep)), count])
520 return result
522 return result
General Comments 0
You need to be logged in to leave comments. Login now