##// END OF EJS Templates
synthrepo: create filectx instance in 'filectxfn' callback...
Martin von Zweigbergk -
r35399:2123e762 default
parent child Browse files
Show More
@@ -1,516 +1,516 b''
1 # synthrepo.py - repo synthesis
1 # synthrepo.py - repo synthesis
2 #
2 #
3 # Copyright 2012 Facebook
3 # Copyright 2012 Facebook
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''synthesize structurally interesting change history
8 '''synthesize structurally interesting change history
9
9
10 This extension is useful for creating a repository with properties
10 This extension is useful for creating a repository with properties
11 that are statistically similar to an existing repository. During
11 that are statistically similar to an existing repository. During
12 analysis, a simple probability table is constructed from the history
12 analysis, a simple probability table is constructed from the history
13 of an existing repository. During synthesis, these properties are
13 of an existing repository. During synthesis, these properties are
14 reconstructed.
14 reconstructed.
15
15
16 Properties that are analyzed and synthesized include the following:
16 Properties that are analyzed and synthesized include the following:
17
17
18 - Lines added or removed when an existing file is modified
18 - Lines added or removed when an existing file is modified
19 - Number and sizes of files added
19 - Number and sizes of files added
20 - Number of files removed
20 - Number of files removed
21 - Line lengths
21 - Line lengths
22 - Topological distance to parent changeset(s)
22 - Topological distance to parent changeset(s)
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26 - Number of files in each directory
26 - Number of files in each directory
27
27
28 A few obvious properties that are not currently handled realistically:
28 A few obvious properties that are not currently handled realistically:
29
29
30 - Merges are treated as regular commits with two parents, which is not
30 - Merges are treated as regular commits with two parents, which is not
31 realistic
31 realistic
32 - Modifications are not treated as operations on hunks of lines, but
32 - Modifications are not treated as operations on hunks of lines, but
33 as insertions and deletions of randomly chosen single lines
33 as insertions and deletions of randomly chosen single lines
34 - Committer ID (always random)
34 - Committer ID (always random)
35 - Executability of files
35 - Executability of files
36 - Symlinks and binary files are ignored
36 - Symlinks and binary files are ignored
37 '''
37 '''
38
38
39 from __future__ import absolute_import
39 from __future__ import absolute_import
40 import bisect
40 import bisect
41 import collections
41 import collections
42 import itertools
42 import itertools
43 import json
43 import json
44 import os
44 import os
45 import random
45 import random
46 import sys
46 import sys
47 import time
47 import time
48
48
49 from mercurial.i18n import _
49 from mercurial.i18n import _
50 from mercurial.node import (
50 from mercurial.node import (
51 nullid,
51 nullid,
52 nullrev,
52 nullrev,
53 short,
53 short,
54 )
54 )
55 from mercurial import (
55 from mercurial import (
56 context,
56 context,
57 error,
57 error,
58 hg,
58 hg,
59 patch,
59 patch,
60 registrar,
60 registrar,
61 scmutil,
61 scmutil,
62 util,
62 util,
63 )
63 )
64
64
65 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
65 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
66 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
66 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
67 # be specifying the version(s) of Mercurial they are tested with, or
67 # be specifying the version(s) of Mercurial they are tested with, or
68 # leave the attribute unspecified.
68 # leave the attribute unspecified.
69 testedwith = 'ships-with-hg-core'
69 testedwith = 'ships-with-hg-core'
70
70
71 cmdtable = {}
71 cmdtable = {}
72 command = registrar.command(cmdtable)
72 command = registrar.command(cmdtable)
73
73
74 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
74 newfile = {'new fi', 'rename', 'copy f', 'copy t'}
75
75
76 def zerodict():
76 def zerodict():
77 return collections.defaultdict(lambda: 0)
77 return collections.defaultdict(lambda: 0)
78
78
79 def roundto(x, k):
79 def roundto(x, k):
80 if x > k * 2:
80 if x > k * 2:
81 return int(round(x / float(k)) * k)
81 return int(round(x / float(k)) * k)
82 return int(round(x))
82 return int(round(x))
83
83
84 def parsegitdiff(lines):
84 def parsegitdiff(lines):
85 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
85 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
86 binary = False
86 binary = False
87 for line in lines:
87 for line in lines:
88 start = line[:6]
88 start = line[:6]
89 if start == 'diff -':
89 if start == 'diff -':
90 if filename:
90 if filename:
91 yield filename, mar, lineadd, lineremove, binary
91 yield filename, mar, lineadd, lineremove, binary
92 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
92 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
93 filename = patch.gitre.match(line).group(1)
93 filename = patch.gitre.match(line).group(1)
94 elif start in newfile:
94 elif start in newfile:
95 mar = 'a'
95 mar = 'a'
96 elif start == 'GIT bi':
96 elif start == 'GIT bi':
97 binary = True
97 binary = True
98 elif start == 'delete':
98 elif start == 'delete':
99 mar = 'r'
99 mar = 'r'
100 elif start:
100 elif start:
101 s = start[0]
101 s = start[0]
102 if s == '-' and not line.startswith('--- '):
102 if s == '-' and not line.startswith('--- '):
103 lineremove += 1
103 lineremove += 1
104 elif s == '+' and not line.startswith('+++ '):
104 elif s == '+' and not line.startswith('+++ '):
105 lineadd[roundto(len(line) - 1, 5)] += 1
105 lineadd[roundto(len(line) - 1, 5)] += 1
106 if filename:
106 if filename:
107 yield filename, mar, lineadd, lineremove, binary
107 yield filename, mar, lineadd, lineremove, binary
108
108
109 @command('analyze',
109 @command('analyze',
110 [('o', 'output', '', _('write output to given file'), _('FILE')),
110 [('o', 'output', '', _('write output to given file'), _('FILE')),
111 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
111 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
112 _('hg analyze'), optionalrepo=True)
112 _('hg analyze'), optionalrepo=True)
113 def analyze(ui, repo, *revs, **opts):
113 def analyze(ui, repo, *revs, **opts):
114 '''create a simple model of a repository to use for later synthesis
114 '''create a simple model of a repository to use for later synthesis
115
115
116 This command examines every changeset in the given range (or all
116 This command examines every changeset in the given range (or all
117 of history if none are specified) and creates a simple statistical
117 of history if none are specified) and creates a simple statistical
118 model of the history of the repository. It also measures the directory
118 model of the history of the repository. It also measures the directory
119 structure of the repository as checked out.
119 structure of the repository as checked out.
120
120
121 The model is written out to a JSON file, and can be used by
121 The model is written out to a JSON file, and can be used by
122 :hg:`synthesize` to create or augment a repository with synthetic
122 :hg:`synthesize` to create or augment a repository with synthetic
123 commits that have a structure that is statistically similar to the
123 commits that have a structure that is statistically similar to the
124 analyzed repository.
124 analyzed repository.
125 '''
125 '''
126 root = repo.root
126 root = repo.root
127 if not root.endswith(os.path.sep):
127 if not root.endswith(os.path.sep):
128 root += os.path.sep
128 root += os.path.sep
129
129
130 revs = list(revs)
130 revs = list(revs)
131 revs.extend(opts['rev'])
131 revs.extend(opts['rev'])
132 if not revs:
132 if not revs:
133 revs = [':']
133 revs = [':']
134
134
135 output = opts['output']
135 output = opts['output']
136 if not output:
136 if not output:
137 output = os.path.basename(root) + '.json'
137 output = os.path.basename(root) + '.json'
138
138
139 if output == '-':
139 if output == '-':
140 fp = sys.stdout
140 fp = sys.stdout
141 else:
141 else:
142 fp = open(output, 'w')
142 fp = open(output, 'w')
143
143
144 # Always obtain file counts of each directory in the given root directory.
144 # Always obtain file counts of each directory in the given root directory.
145 def onerror(e):
145 def onerror(e):
146 ui.warn(_('error walking directory structure: %s\n') % e)
146 ui.warn(_('error walking directory structure: %s\n') % e)
147
147
148 dirs = {}
148 dirs = {}
149 rootprefixlen = len(root)
149 rootprefixlen = len(root)
150 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
150 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
151 dirpathfromroot = dirpath[rootprefixlen:]
151 dirpathfromroot = dirpath[rootprefixlen:]
152 dirs[dirpathfromroot] = len(filenames)
152 dirs[dirpathfromroot] = len(filenames)
153 if '.hg' in dirnames:
153 if '.hg' in dirnames:
154 dirnames.remove('.hg')
154 dirnames.remove('.hg')
155
155
156 lineschanged = zerodict()
156 lineschanged = zerodict()
157 children = zerodict()
157 children = zerodict()
158 p1distance = zerodict()
158 p1distance = zerodict()
159 p2distance = zerodict()
159 p2distance = zerodict()
160 linesinfilesadded = zerodict()
160 linesinfilesadded = zerodict()
161 fileschanged = zerodict()
161 fileschanged = zerodict()
162 filesadded = zerodict()
162 filesadded = zerodict()
163 filesremoved = zerodict()
163 filesremoved = zerodict()
164 linelengths = zerodict()
164 linelengths = zerodict()
165 interarrival = zerodict()
165 interarrival = zerodict()
166 parents = zerodict()
166 parents = zerodict()
167 dirsadded = zerodict()
167 dirsadded = zerodict()
168 tzoffset = zerodict()
168 tzoffset = zerodict()
169
169
170 # If a mercurial repo is available, also model the commit history.
170 # If a mercurial repo is available, also model the commit history.
171 if repo:
171 if repo:
172 revs = scmutil.revrange(repo, revs)
172 revs = scmutil.revrange(repo, revs)
173 revs.sort()
173 revs.sort()
174
174
175 progress = ui.progress
175 progress = ui.progress
176 _analyzing = _('analyzing')
176 _analyzing = _('analyzing')
177 _changesets = _('changesets')
177 _changesets = _('changesets')
178 _total = len(revs)
178 _total = len(revs)
179
179
180 for i, rev in enumerate(revs):
180 for i, rev in enumerate(revs):
181 progress(_analyzing, i, unit=_changesets, total=_total)
181 progress(_analyzing, i, unit=_changesets, total=_total)
182 ctx = repo[rev]
182 ctx = repo[rev]
183 pl = ctx.parents()
183 pl = ctx.parents()
184 pctx = pl[0]
184 pctx = pl[0]
185 prev = pctx.rev()
185 prev = pctx.rev()
186 children[prev] += 1
186 children[prev] += 1
187 p1distance[rev - prev] += 1
187 p1distance[rev - prev] += 1
188 parents[len(pl)] += 1
188 parents[len(pl)] += 1
189 tzoffset[ctx.date()[1]] += 1
189 tzoffset[ctx.date()[1]] += 1
190 if len(pl) > 1:
190 if len(pl) > 1:
191 p2distance[rev - pl[1].rev()] += 1
191 p2distance[rev - pl[1].rev()] += 1
192 if prev == rev - 1:
192 if prev == rev - 1:
193 lastctx = pctx
193 lastctx = pctx
194 else:
194 else:
195 lastctx = repo[rev - 1]
195 lastctx = repo[rev - 1]
196 if lastctx.rev() != nullrev:
196 if lastctx.rev() != nullrev:
197 timedelta = ctx.date()[0] - lastctx.date()[0]
197 timedelta = ctx.date()[0] - lastctx.date()[0]
198 interarrival[roundto(timedelta, 300)] += 1
198 interarrival[roundto(timedelta, 300)] += 1
199 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
199 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
200 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
200 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
201 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
201 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
202 if isbin:
202 if isbin:
203 continue
203 continue
204 added = sum(lineadd.itervalues(), 0)
204 added = sum(lineadd.itervalues(), 0)
205 if mar == 'm':
205 if mar == 'm':
206 if added and lineremove:
206 if added and lineremove:
207 lineschanged[roundto(added, 5),
207 lineschanged[roundto(added, 5),
208 roundto(lineremove, 5)] += 1
208 roundto(lineremove, 5)] += 1
209 filechanges += 1
209 filechanges += 1
210 elif mar == 'a':
210 elif mar == 'a':
211 fileadds += 1
211 fileadds += 1
212 if '/' in filename:
212 if '/' in filename:
213 filedir = filename.rsplit('/', 1)[0]
213 filedir = filename.rsplit('/', 1)[0]
214 if filedir not in pctx.dirs():
214 if filedir not in pctx.dirs():
215 diradds += 1
215 diradds += 1
216 linesinfilesadded[roundto(added, 5)] += 1
216 linesinfilesadded[roundto(added, 5)] += 1
217 elif mar == 'r':
217 elif mar == 'r':
218 fileremoves += 1
218 fileremoves += 1
219 for length, count in lineadd.iteritems():
219 for length, count in lineadd.iteritems():
220 linelengths[length] += count
220 linelengths[length] += count
221 fileschanged[filechanges] += 1
221 fileschanged[filechanges] += 1
222 filesadded[fileadds] += 1
222 filesadded[fileadds] += 1
223 dirsadded[diradds] += 1
223 dirsadded[diradds] += 1
224 filesremoved[fileremoves] += 1
224 filesremoved[fileremoves] += 1
225
225
226 invchildren = zerodict()
226 invchildren = zerodict()
227
227
228 for rev, count in children.iteritems():
228 for rev, count in children.iteritems():
229 invchildren[count] += 1
229 invchildren[count] += 1
230
230
231 if output != '-':
231 if output != '-':
232 ui.status(_('writing output to %s\n') % output)
232 ui.status(_('writing output to %s\n') % output)
233
233
234 def pronk(d):
234 def pronk(d):
235 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
235 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
236
236
237 json.dump({'revs': len(revs),
237 json.dump({'revs': len(revs),
238 'initdirs': pronk(dirs),
238 'initdirs': pronk(dirs),
239 'lineschanged': pronk(lineschanged),
239 'lineschanged': pronk(lineschanged),
240 'children': pronk(invchildren),
240 'children': pronk(invchildren),
241 'fileschanged': pronk(fileschanged),
241 'fileschanged': pronk(fileschanged),
242 'filesadded': pronk(filesadded),
242 'filesadded': pronk(filesadded),
243 'linesinfilesadded': pronk(linesinfilesadded),
243 'linesinfilesadded': pronk(linesinfilesadded),
244 'dirsadded': pronk(dirsadded),
244 'dirsadded': pronk(dirsadded),
245 'filesremoved': pronk(filesremoved),
245 'filesremoved': pronk(filesremoved),
246 'linelengths': pronk(linelengths),
246 'linelengths': pronk(linelengths),
247 'parents': pronk(parents),
247 'parents': pronk(parents),
248 'p1distance': pronk(p1distance),
248 'p1distance': pronk(p1distance),
249 'p2distance': pronk(p2distance),
249 'p2distance': pronk(p2distance),
250 'interarrival': pronk(interarrival),
250 'interarrival': pronk(interarrival),
251 'tzoffset': pronk(tzoffset),
251 'tzoffset': pronk(tzoffset),
252 },
252 },
253 fp)
253 fp)
254 fp.close()
254 fp.close()
255
255
256 @command('synthesize',
256 @command('synthesize',
257 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
257 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
258 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
258 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
259 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
259 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
260 _('hg synthesize [OPTION].. DESCFILE'))
260 _('hg synthesize [OPTION].. DESCFILE'))
261 def synthesize(ui, repo, descpath, **opts):
261 def synthesize(ui, repo, descpath, **opts):
262 '''synthesize commits based on a model of an existing repository
262 '''synthesize commits based on a model of an existing repository
263
263
264 The model must have been generated by :hg:`analyze`. Commits will
264 The model must have been generated by :hg:`analyze`. Commits will
265 be generated randomly according to the probabilities described in
265 be generated randomly according to the probabilities described in
266 the model. If --initfiles is set, the repository will be seeded with
266 the model. If --initfiles is set, the repository will be seeded with
267 the given number files following the modeled repository's directory
267 the given number files following the modeled repository's directory
268 structure.
268 structure.
269
269
270 When synthesizing new content, commit descriptions, and user
270 When synthesizing new content, commit descriptions, and user
271 names, words will be chosen randomly from a dictionary that is
271 names, words will be chosen randomly from a dictionary that is
272 presumed to contain one word per line. Use --dict to specify the
272 presumed to contain one word per line. Use --dict to specify the
273 path to an alternate dictionary to use.
273 path to an alternate dictionary to use.
274 '''
274 '''
275 try:
275 try:
276 fp = hg.openpath(ui, descpath)
276 fp = hg.openpath(ui, descpath)
277 except Exception as err:
277 except Exception as err:
278 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
278 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
279 desc = json.load(fp)
279 desc = json.load(fp)
280 fp.close()
280 fp.close()
281
281
282 def cdf(l):
282 def cdf(l):
283 if not l:
283 if not l:
284 return [], []
284 return [], []
285 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
285 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
286 t = float(sum(probs, 0))
286 t = float(sum(probs, 0))
287 s, cdfs = 0, []
287 s, cdfs = 0, []
288 for v in probs:
288 for v in probs:
289 s += v
289 s += v
290 cdfs.append(s / t)
290 cdfs.append(s / t)
291 return vals, cdfs
291 return vals, cdfs
292
292
293 lineschanged = cdf(desc['lineschanged'])
293 lineschanged = cdf(desc['lineschanged'])
294 fileschanged = cdf(desc['fileschanged'])
294 fileschanged = cdf(desc['fileschanged'])
295 filesadded = cdf(desc['filesadded'])
295 filesadded = cdf(desc['filesadded'])
296 dirsadded = cdf(desc['dirsadded'])
296 dirsadded = cdf(desc['dirsadded'])
297 filesremoved = cdf(desc['filesremoved'])
297 filesremoved = cdf(desc['filesremoved'])
298 linelengths = cdf(desc['linelengths'])
298 linelengths = cdf(desc['linelengths'])
299 parents = cdf(desc['parents'])
299 parents = cdf(desc['parents'])
300 p1distance = cdf(desc['p1distance'])
300 p1distance = cdf(desc['p1distance'])
301 p2distance = cdf(desc['p2distance'])
301 p2distance = cdf(desc['p2distance'])
302 interarrival = cdf(desc['interarrival'])
302 interarrival = cdf(desc['interarrival'])
303 linesinfilesadded = cdf(desc['linesinfilesadded'])
303 linesinfilesadded = cdf(desc['linesinfilesadded'])
304 tzoffset = cdf(desc['tzoffset'])
304 tzoffset = cdf(desc['tzoffset'])
305
305
306 dictfile = opts.get('dict') or '/usr/share/dict/words'
306 dictfile = opts.get('dict') or '/usr/share/dict/words'
307 try:
307 try:
308 fp = open(dictfile, 'rU')
308 fp = open(dictfile, 'rU')
309 except IOError as err:
309 except IOError as err:
310 raise error.Abort('%s: %s' % (dictfile, err.strerror))
310 raise error.Abort('%s: %s' % (dictfile, err.strerror))
311 words = fp.read().splitlines()
311 words = fp.read().splitlines()
312 fp.close()
312 fp.close()
313
313
314 initdirs = {}
314 initdirs = {}
315 if desc['initdirs']:
315 if desc['initdirs']:
316 for k, v in desc['initdirs']:
316 for k, v in desc['initdirs']:
317 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
317 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
318 initdirs = renamedirs(initdirs, words)
318 initdirs = renamedirs(initdirs, words)
319 initdirscdf = cdf(initdirs)
319 initdirscdf = cdf(initdirs)
320
320
321 def pick(cdf):
321 def pick(cdf):
322 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
322 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
323
323
324 def pickpath():
324 def pickpath():
325 return os.path.join(pick(initdirscdf), random.choice(words))
325 return os.path.join(pick(initdirscdf), random.choice(words))
326
326
327 def makeline(minimum=0):
327 def makeline(minimum=0):
328 total = max(minimum, pick(linelengths))
328 total = max(minimum, pick(linelengths))
329 c, l = 0, []
329 c, l = 0, []
330 while c < total:
330 while c < total:
331 w = random.choice(words)
331 w = random.choice(words)
332 c += len(w) + 1
332 c += len(w) + 1
333 l.append(w)
333 l.append(w)
334 return ' '.join(l)
334 return ' '.join(l)
335
335
336 wlock = repo.wlock()
336 wlock = repo.wlock()
337 lock = repo.lock()
337 lock = repo.lock()
338
338
339 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
339 nevertouch = {'.hgsub', '.hgignore', '.hgtags'}
340
340
341 progress = ui.progress
341 progress = ui.progress
342 _synthesizing = _('synthesizing')
342 _synthesizing = _('synthesizing')
343 _files = _('initial files')
343 _files = _('initial files')
344 _changesets = _('changesets')
344 _changesets = _('changesets')
345
345
346 # Synthesize a single initial revision adding files to the repo according
346 # Synthesize a single initial revision adding files to the repo according
347 # to the modeled directory structure.
347 # to the modeled directory structure.
348 initcount = int(opts['initfiles'])
348 initcount = int(opts['initfiles'])
349 if initcount and initdirs:
349 if initcount and initdirs:
350 pctx = repo[None].parents()[0]
350 pctx = repo[None].parents()[0]
351 dirs = set(pctx.dirs())
351 dirs = set(pctx.dirs())
352 files = {}
352 files = {}
353
353
354 def validpath(path):
354 def validpath(path):
355 # Don't pick filenames which are already directory names.
355 # Don't pick filenames which are already directory names.
356 if path in dirs:
356 if path in dirs:
357 return False
357 return False
358 # Don't pick directories which were used as file names.
358 # Don't pick directories which were used as file names.
359 while path:
359 while path:
360 if path in files:
360 if path in files:
361 return False
361 return False
362 path = os.path.dirname(path)
362 path = os.path.dirname(path)
363 return True
363 return True
364
364
365 for i in xrange(0, initcount):
365 for i in xrange(0, initcount):
366 ui.progress(_synthesizing, i, unit=_files, total=initcount)
366 ui.progress(_synthesizing, i, unit=_files, total=initcount)
367
367
368 path = pickpath()
368 path = pickpath()
369 while not validpath(path):
369 while not validpath(path):
370 path = pickpath()
370 path = pickpath()
371 data = '%s contents\n' % path
371 data = '%s contents\n' % path
372 files[path] = context.memfilectx(repo, path, data)
372 files[path] = data
373 dir = os.path.dirname(path)
373 dir = os.path.dirname(path)
374 while dir and dir not in dirs:
374 while dir and dir not in dirs:
375 dirs.add(dir)
375 dirs.add(dir)
376 dir = os.path.dirname(dir)
376 dir = os.path.dirname(dir)
377
377
378 def filectxfn(repo, memctx, path):
378 def filectxfn(repo, memctx, path):
379 return files[path]
379 return context.memfilectx(repo, path, files[path])
380
380
381 ui.progress(_synthesizing, None)
381 ui.progress(_synthesizing, None)
382 message = 'synthesized wide repo with %d files' % (len(files),)
382 message = 'synthesized wide repo with %d files' % (len(files),)
383 mc = context.memctx(repo, [pctx.node(), nullid], message,
383 mc = context.memctx(repo, [pctx.node(), nullid], message,
384 files.iterkeys(), filectxfn, ui.username(),
384 files.iterkeys(), filectxfn, ui.username(),
385 '%d %d' % util.makedate())
385 '%d %d' % util.makedate())
386 initnode = mc.commit()
386 initnode = mc.commit()
387 if ui.debugflag:
387 if ui.debugflag:
388 hexfn = hex
388 hexfn = hex
389 else:
389 else:
390 hexfn = short
390 hexfn = short
391 ui.status(_('added commit %s with %d files\n')
391 ui.status(_('added commit %s with %d files\n')
392 % (hexfn(initnode), len(files)))
392 % (hexfn(initnode), len(files)))
393
393
394 # Synthesize incremental revisions to the repository, adding repo depth.
394 # Synthesize incremental revisions to the repository, adding repo depth.
395 count = int(opts['count'])
395 count = int(opts['count'])
396 heads = set(map(repo.changelog.rev, repo.heads()))
396 heads = set(map(repo.changelog.rev, repo.heads()))
397 for i in xrange(count):
397 for i in xrange(count):
398 progress(_synthesizing, i, unit=_changesets, total=count)
398 progress(_synthesizing, i, unit=_changesets, total=count)
399
399
400 node = repo.changelog.node
400 node = repo.changelog.node
401 revs = len(repo)
401 revs = len(repo)
402
402
403 def pickhead(heads, distance):
403 def pickhead(heads, distance):
404 if heads:
404 if heads:
405 lheads = sorted(heads)
405 lheads = sorted(heads)
406 rev = revs - min(pick(distance), revs)
406 rev = revs - min(pick(distance), revs)
407 if rev < lheads[-1]:
407 if rev < lheads[-1]:
408 rev = lheads[bisect.bisect_left(lheads, rev)]
408 rev = lheads[bisect.bisect_left(lheads, rev)]
409 else:
409 else:
410 rev = lheads[-1]
410 rev = lheads[-1]
411 return rev, node(rev)
411 return rev, node(rev)
412 return nullrev, nullid
412 return nullrev, nullid
413
413
414 r1 = revs - min(pick(p1distance), revs)
414 r1 = revs - min(pick(p1distance), revs)
415 p1 = node(r1)
415 p1 = node(r1)
416
416
417 # the number of heads will grow without bound if we use a pure
417 # the number of heads will grow without bound if we use a pure
418 # model, so artificially constrain their proliferation
418 # model, so artificially constrain their proliferation
419 toomanyheads = len(heads) > random.randint(1, 20)
419 toomanyheads = len(heads) > random.randint(1, 20)
420 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
420 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
421 r2, p2 = pickhead(heads.difference([r1]), p2distance)
421 r2, p2 = pickhead(heads.difference([r1]), p2distance)
422 else:
422 else:
423 r2, p2 = nullrev, nullid
423 r2, p2 = nullrev, nullid
424
424
425 pl = [p1, p2]
425 pl = [p1, p2]
426 pctx = repo[r1]
426 pctx = repo[r1]
427 mf = pctx.manifest()
427 mf = pctx.manifest()
428 mfk = mf.keys()
428 mfk = mf.keys()
429 changes = {}
429 changes = {}
430 if mfk:
430 if mfk:
431 for __ in xrange(pick(fileschanged)):
431 for __ in xrange(pick(fileschanged)):
432 for __ in xrange(10):
432 for __ in xrange(10):
433 fctx = pctx.filectx(random.choice(mfk))
433 fctx = pctx.filectx(random.choice(mfk))
434 path = fctx.path()
434 path = fctx.path()
435 if not (path in nevertouch or fctx.isbinary() or
435 if not (path in nevertouch or fctx.isbinary() or
436 'l' in fctx.flags()):
436 'l' in fctx.flags()):
437 break
437 break
438 lines = fctx.data().splitlines()
438 lines = fctx.data().splitlines()
439 add, remove = pick(lineschanged)
439 add, remove = pick(lineschanged)
440 for __ in xrange(remove):
440 for __ in xrange(remove):
441 if not lines:
441 if not lines:
442 break
442 break
443 del lines[random.randrange(0, len(lines))]
443 del lines[random.randrange(0, len(lines))]
444 for __ in xrange(add):
444 for __ in xrange(add):
445 lines.insert(random.randint(0, len(lines)), makeline())
445 lines.insert(random.randint(0, len(lines)), makeline())
446 path = fctx.path()
446 path = fctx.path()
447 changes[path] = context.memfilectx(repo, path,
447 changes[path] = '\n'.join(lines) + '\n'
448 '\n'.join(lines) + '\n')
449 for __ in xrange(pick(filesremoved)):
448 for __ in xrange(pick(filesremoved)):
450 path = random.choice(mfk)
449 path = random.choice(mfk)
451 for __ in xrange(10):
450 for __ in xrange(10):
452 path = random.choice(mfk)
451 path = random.choice(mfk)
453 if path not in changes:
452 if path not in changes:
454 changes[path] = None
455 break
453 break
456 if filesadded:
454 if filesadded:
457 dirs = list(pctx.dirs())
455 dirs = list(pctx.dirs())
458 dirs.insert(0, '')
456 dirs.insert(0, '')
459 for __ in xrange(pick(filesadded)):
457 for __ in xrange(pick(filesadded)):
460 pathstr = ''
458 pathstr = ''
461 while pathstr in dirs:
459 while pathstr in dirs:
462 path = [random.choice(dirs)]
460 path = [random.choice(dirs)]
463 if pick(dirsadded):
461 if pick(dirsadded):
464 path.append(random.choice(words))
462 path.append(random.choice(words))
465 path.append(random.choice(words))
463 path.append(random.choice(words))
466 pathstr = '/'.join(filter(None, path))
464 pathstr = '/'.join(filter(None, path))
467 data = '\n'.join(makeline()
465 data = '\n'.join(makeline()
468 for __ in xrange(pick(linesinfilesadded))) + '\n'
466 for __ in xrange(pick(linesinfilesadded))) + '\n'
469 changes[pathstr] = context.memfilectx(repo, pathstr, data)
467 changes[pathstr] = data
470 def filectxfn(repo, memctx, path):
468 def filectxfn(repo, memctx, path):
471 return changes[path]
469 if path not in changes:
470 return None
471 return context.memfilectx(repo, path, changes[path])
472 if not changes:
472 if not changes:
473 continue
473 continue
474 if revs:
474 if revs:
475 date = repo['tip'].date()[0] + pick(interarrival)
475 date = repo['tip'].date()[0] + pick(interarrival)
476 else:
476 else:
477 date = time.time() - (86400 * count)
477 date = time.time() - (86400 * count)
478 # dates in mercurial must be positive, fit in 32-bit signed integers.
478 # dates in mercurial must be positive, fit in 32-bit signed integers.
479 date = min(0x7fffffff, max(0, date))
479 date = min(0x7fffffff, max(0, date))
480 user = random.choice(words) + '@' + random.choice(words)
480 user = random.choice(words) + '@' + random.choice(words)
481 mc = context.memctx(repo, pl, makeline(minimum=2),
481 mc = context.memctx(repo, pl, makeline(minimum=2),
482 sorted(changes),
482 sorted(changes),
483 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
483 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
484 newnode = mc.commit()
484 newnode = mc.commit()
485 heads.add(repo.changelog.rev(newnode))
485 heads.add(repo.changelog.rev(newnode))
486 heads.discard(r1)
486 heads.discard(r1)
487 heads.discard(r2)
487 heads.discard(r2)
488
488
489 lock.release()
489 lock.release()
490 wlock.release()
490 wlock.release()
491
491
492 def renamedirs(dirs, words):
492 def renamedirs(dirs, words):
493 '''Randomly rename the directory names in the per-dir file count dict.'''
493 '''Randomly rename the directory names in the per-dir file count dict.'''
494 wordgen = itertools.cycle(words)
494 wordgen = itertools.cycle(words)
495 replacements = {'': ''}
495 replacements = {'': ''}
496 def rename(dirpath):
496 def rename(dirpath):
497 '''Recursively rename the directory and all path prefixes.
497 '''Recursively rename the directory and all path prefixes.
498
498
499 The mapping from path to renamed path is stored for all path prefixes
499 The mapping from path to renamed path is stored for all path prefixes
500 as in dynamic programming, ensuring linear runtime and consistent
500 as in dynamic programming, ensuring linear runtime and consistent
501 renaming regardless of iteration order through the model.
501 renaming regardless of iteration order through the model.
502 '''
502 '''
503 if dirpath in replacements:
503 if dirpath in replacements:
504 return replacements[dirpath]
504 return replacements[dirpath]
505 head, _ = os.path.split(dirpath)
505 head, _ = os.path.split(dirpath)
506 if head:
506 if head:
507 head = rename(head)
507 head = rename(head)
508 else:
508 else:
509 head = ''
509 head = ''
510 renamed = os.path.join(head, next(wordgen))
510 renamed = os.path.join(head, next(wordgen))
511 replacements[dirpath] = renamed
511 replacements[dirpath] = renamed
512 return renamed
512 return renamed
513 result = []
513 result = []
514 for dirpath, count in dirs.iteritems():
514 for dirpath, count in dirs.iteritems():
515 result.append([rename(dirpath.lstrip(os.sep)), count])
515 result.append([rename(dirpath.lstrip(os.sep)), count])
516 return result
516 return result
General Comments 0
You need to be logged in to leave comments. Login now