##// END OF EJS Templates
contrib: synthrepo use absolute_import
Pulkit Goyal -
r28563:62250a48 default
parent child Browse files
Show More
@@ -1,495 +1,515
1 # synthrepo.py - repo synthesis
1 # synthrepo.py - repo synthesis
2 #
2 #
3 # Copyright 2012 Facebook
3 # Copyright 2012 Facebook
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 '''synthesize structurally interesting change history
8 '''synthesize structurally interesting change history
9
9
10 This extension is useful for creating a repository with properties
10 This extension is useful for creating a repository with properties
11 that are statistically similar to an existing repository. During
11 that are statistically similar to an existing repository. During
12 analysis, a simple probability table is constructed from the history
12 analysis, a simple probability table is constructed from the history
13 of an existing repository. During synthesis, these properties are
13 of an existing repository. During synthesis, these properties are
14 reconstructed.
14 reconstructed.
15
15
16 Properties that are analyzed and synthesized include the following:
16 Properties that are analyzed and synthesized include the following:
17
17
18 - Lines added or removed when an existing file is modified
18 - Lines added or removed when an existing file is modified
19 - Number and sizes of files added
19 - Number and sizes of files added
20 - Number of files removed
20 - Number of files removed
21 - Line lengths
21 - Line lengths
22 - Topological distance to parent changeset(s)
22 - Topological distance to parent changeset(s)
23 - Probability of a commit being a merge
23 - Probability of a commit being a merge
24 - Probability of a newly added file being added to a new directory
24 - Probability of a newly added file being added to a new directory
25 - Interarrival time, and time zone, of commits
25 - Interarrival time, and time zone, of commits
26 - Number of files in each directory
26 - Number of files in each directory
27
27
28 A few obvious properties that are not currently handled realistically:
28 A few obvious properties that are not currently handled realistically:
29
29
30 - Merges are treated as regular commits with two parents, which is not
30 - Merges are treated as regular commits with two parents, which is not
31 realistic
31 realistic
32 - Modifications are not treated as operations on hunks of lines, but
32 - Modifications are not treated as operations on hunks of lines, but
33 as insertions and deletions of randomly chosen single lines
33 as insertions and deletions of randomly chosen single lines
34 - Committer ID (always random)
34 - Committer ID (always random)
35 - Executability of files
35 - Executability of files
36 - Symlinks and binary files are ignored
36 - Symlinks and binary files are ignored
37 '''
37 '''
38
38
39 import bisect, collections, itertools, json, os, random, time, sys
39 from __future__ import absolute_import
40 from mercurial import cmdutil, context, patch, scmutil, util, hg, error
40 import bisect
41 import collections
42 import itertools
43 import json
44 import os
45 import random
46 import sys
47 import time
48 from mercurial import (
49 cmdutil,
50 context,
51 error,
52 hg,
53 patch,
54 scmutil,
55 util,
56 )
41 from mercurial.i18n import _
57 from mercurial.i18n import _
42 from mercurial.node import nullrev, nullid, short
58 from mercurial.node import (
59 nullid,
60 nullrev,
61 short,
62 )
43
63
44 # Note for extension authors: ONLY specify testedwith = 'internal' for
64 # Note for extension authors: ONLY specify testedwith = 'internal' for
45 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
65 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
46 # be specifying the version(s) of Mercurial they are tested with, or
66 # be specifying the version(s) of Mercurial they are tested with, or
47 # leave the attribute unspecified.
67 # leave the attribute unspecified.
48 testedwith = 'internal'
68 testedwith = 'internal'
49
69
50 cmdtable = {}
70 cmdtable = {}
51 command = cmdutil.command(cmdtable)
71 command = cmdutil.command(cmdtable)
52
72
53 newfile = set(('new fi', 'rename', 'copy f', 'copy t'))
73 newfile = set(('new fi', 'rename', 'copy f', 'copy t'))
54
74
55 def zerodict():
75 def zerodict():
56 return collections.defaultdict(lambda: 0)
76 return collections.defaultdict(lambda: 0)
57
77
58 def roundto(x, k):
78 def roundto(x, k):
59 if x > k * 2:
79 if x > k * 2:
60 return int(round(x / float(k)) * k)
80 return int(round(x / float(k)) * k)
61 return int(round(x))
81 return int(round(x))
62
82
63 def parsegitdiff(lines):
83 def parsegitdiff(lines):
64 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
84 filename, mar, lineadd, lineremove = None, None, zerodict(), 0
65 binary = False
85 binary = False
66 for line in lines:
86 for line in lines:
67 start = line[:6]
87 start = line[:6]
68 if start == 'diff -':
88 if start == 'diff -':
69 if filename:
89 if filename:
70 yield filename, mar, lineadd, lineremove, binary
90 yield filename, mar, lineadd, lineremove, binary
71 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
91 mar, lineadd, lineremove, binary = 'm', zerodict(), 0, False
72 filename = patch.gitre.match(line).group(1)
92 filename = patch.gitre.match(line).group(1)
73 elif start in newfile:
93 elif start in newfile:
74 mar = 'a'
94 mar = 'a'
75 elif start == 'GIT bi':
95 elif start == 'GIT bi':
76 binary = True
96 binary = True
77 elif start == 'delete':
97 elif start == 'delete':
78 mar = 'r'
98 mar = 'r'
79 elif start:
99 elif start:
80 s = start[0]
100 s = start[0]
81 if s == '-' and not line.startswith('--- '):
101 if s == '-' and not line.startswith('--- '):
82 lineremove += 1
102 lineremove += 1
83 elif s == '+' and not line.startswith('+++ '):
103 elif s == '+' and not line.startswith('+++ '):
84 lineadd[roundto(len(line) - 1, 5)] += 1
104 lineadd[roundto(len(line) - 1, 5)] += 1
85 if filename:
105 if filename:
86 yield filename, mar, lineadd, lineremove, binary
106 yield filename, mar, lineadd, lineremove, binary
87
107
88 @command('analyze',
108 @command('analyze',
89 [('o', 'output', '', _('write output to given file'), _('FILE')),
109 [('o', 'output', '', _('write output to given file'), _('FILE')),
90 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
110 ('r', 'rev', [], _('analyze specified revisions'), _('REV'))],
91 _('hg analyze'), optionalrepo=True)
111 _('hg analyze'), optionalrepo=True)
92 def analyze(ui, repo, *revs, **opts):
112 def analyze(ui, repo, *revs, **opts):
93 '''create a simple model of a repository to use for later synthesis
113 '''create a simple model of a repository to use for later synthesis
94
114
95 This command examines every changeset in the given range (or all
115 This command examines every changeset in the given range (or all
96 of history if none are specified) and creates a simple statistical
116 of history if none are specified) and creates a simple statistical
97 model of the history of the repository. It also measures the directory
117 model of the history of the repository. It also measures the directory
98 structure of the repository as checked out.
118 structure of the repository as checked out.
99
119
100 The model is written out to a JSON file, and can be used by
120 The model is written out to a JSON file, and can be used by
101 :hg:`synthesize` to create or augment a repository with synthetic
121 :hg:`synthesize` to create or augment a repository with synthetic
102 commits that have a structure that is statistically similar to the
122 commits that have a structure that is statistically similar to the
103 analyzed repository.
123 analyzed repository.
104 '''
124 '''
105 root = repo.root
125 root = repo.root
106 if not root.endswith(os.path.sep):
126 if not root.endswith(os.path.sep):
107 root += os.path.sep
127 root += os.path.sep
108
128
109 revs = list(revs)
129 revs = list(revs)
110 revs.extend(opts['rev'])
130 revs.extend(opts['rev'])
111 if not revs:
131 if not revs:
112 revs = [':']
132 revs = [':']
113
133
114 output = opts['output']
134 output = opts['output']
115 if not output:
135 if not output:
116 output = os.path.basename(root) + '.json'
136 output = os.path.basename(root) + '.json'
117
137
118 if output == '-':
138 if output == '-':
119 fp = sys.stdout
139 fp = sys.stdout
120 else:
140 else:
121 fp = open(output, 'w')
141 fp = open(output, 'w')
122
142
123 # Always obtain file counts of each directory in the given root directory.
143 # Always obtain file counts of each directory in the given root directory.
124 def onerror(e):
144 def onerror(e):
125 ui.warn(_('error walking directory structure: %s\n') % e)
145 ui.warn(_('error walking directory structure: %s\n') % e)
126
146
127 dirs = {}
147 dirs = {}
128 rootprefixlen = len(root)
148 rootprefixlen = len(root)
129 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
149 for dirpath, dirnames, filenames in os.walk(root, onerror=onerror):
130 dirpathfromroot = dirpath[rootprefixlen:]
150 dirpathfromroot = dirpath[rootprefixlen:]
131 dirs[dirpathfromroot] = len(filenames)
151 dirs[dirpathfromroot] = len(filenames)
132 if '.hg' in dirnames:
152 if '.hg' in dirnames:
133 dirnames.remove('.hg')
153 dirnames.remove('.hg')
134
154
135 lineschanged = zerodict()
155 lineschanged = zerodict()
136 children = zerodict()
156 children = zerodict()
137 p1distance = zerodict()
157 p1distance = zerodict()
138 p2distance = zerodict()
158 p2distance = zerodict()
139 linesinfilesadded = zerodict()
159 linesinfilesadded = zerodict()
140 fileschanged = zerodict()
160 fileschanged = zerodict()
141 filesadded = zerodict()
161 filesadded = zerodict()
142 filesremoved = zerodict()
162 filesremoved = zerodict()
143 linelengths = zerodict()
163 linelengths = zerodict()
144 interarrival = zerodict()
164 interarrival = zerodict()
145 parents = zerodict()
165 parents = zerodict()
146 dirsadded = zerodict()
166 dirsadded = zerodict()
147 tzoffset = zerodict()
167 tzoffset = zerodict()
148
168
149 # If a mercurial repo is available, also model the commit history.
169 # If a mercurial repo is available, also model the commit history.
150 if repo:
170 if repo:
151 revs = scmutil.revrange(repo, revs)
171 revs = scmutil.revrange(repo, revs)
152 revs.sort()
172 revs.sort()
153
173
154 progress = ui.progress
174 progress = ui.progress
155 _analyzing = _('analyzing')
175 _analyzing = _('analyzing')
156 _changesets = _('changesets')
176 _changesets = _('changesets')
157 _total = len(revs)
177 _total = len(revs)
158
178
159 for i, rev in enumerate(revs):
179 for i, rev in enumerate(revs):
160 progress(_analyzing, i, unit=_changesets, total=_total)
180 progress(_analyzing, i, unit=_changesets, total=_total)
161 ctx = repo[rev]
181 ctx = repo[rev]
162 pl = ctx.parents()
182 pl = ctx.parents()
163 pctx = pl[0]
183 pctx = pl[0]
164 prev = pctx.rev()
184 prev = pctx.rev()
165 children[prev] += 1
185 children[prev] += 1
166 p1distance[rev - prev] += 1
186 p1distance[rev - prev] += 1
167 parents[len(pl)] += 1
187 parents[len(pl)] += 1
168 tzoffset[ctx.date()[1]] += 1
188 tzoffset[ctx.date()[1]] += 1
169 if len(pl) > 1:
189 if len(pl) > 1:
170 p2distance[rev - pl[1].rev()] += 1
190 p2distance[rev - pl[1].rev()] += 1
171 if prev == rev - 1:
191 if prev == rev - 1:
172 lastctx = pctx
192 lastctx = pctx
173 else:
193 else:
174 lastctx = repo[rev - 1]
194 lastctx = repo[rev - 1]
175 if lastctx.rev() != nullrev:
195 if lastctx.rev() != nullrev:
176 timedelta = ctx.date()[0] - lastctx.date()[0]
196 timedelta = ctx.date()[0] - lastctx.date()[0]
177 interarrival[roundto(timedelta, 300)] += 1
197 interarrival[roundto(timedelta, 300)] += 1
178 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
198 diff = sum((d.splitlines() for d in ctx.diff(pctx, git=True)), [])
179 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
199 fileadds, diradds, fileremoves, filechanges = 0, 0, 0, 0
180 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
200 for filename, mar, lineadd, lineremove, isbin in parsegitdiff(diff):
181 if isbin:
201 if isbin:
182 continue
202 continue
183 added = sum(lineadd.itervalues(), 0)
203 added = sum(lineadd.itervalues(), 0)
184 if mar == 'm':
204 if mar == 'm':
185 if added and lineremove:
205 if added and lineremove:
186 lineschanged[roundto(added, 5),
206 lineschanged[roundto(added, 5),
187 roundto(lineremove, 5)] += 1
207 roundto(lineremove, 5)] += 1
188 filechanges += 1
208 filechanges += 1
189 elif mar == 'a':
209 elif mar == 'a':
190 fileadds += 1
210 fileadds += 1
191 if '/' in filename:
211 if '/' in filename:
192 filedir = filename.rsplit('/', 1)[0]
212 filedir = filename.rsplit('/', 1)[0]
193 if filedir not in pctx.dirs():
213 if filedir not in pctx.dirs():
194 diradds += 1
214 diradds += 1
195 linesinfilesadded[roundto(added, 5)] += 1
215 linesinfilesadded[roundto(added, 5)] += 1
196 elif mar == 'r':
216 elif mar == 'r':
197 fileremoves += 1
217 fileremoves += 1
198 for length, count in lineadd.iteritems():
218 for length, count in lineadd.iteritems():
199 linelengths[length] += count
219 linelengths[length] += count
200 fileschanged[filechanges] += 1
220 fileschanged[filechanges] += 1
201 filesadded[fileadds] += 1
221 filesadded[fileadds] += 1
202 dirsadded[diradds] += 1
222 dirsadded[diradds] += 1
203 filesremoved[fileremoves] += 1
223 filesremoved[fileremoves] += 1
204
224
205 invchildren = zerodict()
225 invchildren = zerodict()
206
226
207 for rev, count in children.iteritems():
227 for rev, count in children.iteritems():
208 invchildren[count] += 1
228 invchildren[count] += 1
209
229
210 if output != '-':
230 if output != '-':
211 ui.status(_('writing output to %s\n') % output)
231 ui.status(_('writing output to %s\n') % output)
212
232
213 def pronk(d):
233 def pronk(d):
214 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
234 return sorted(d.iteritems(), key=lambda x: x[1], reverse=True)
215
235
216 json.dump({'revs': len(revs),
236 json.dump({'revs': len(revs),
217 'initdirs': pronk(dirs),
237 'initdirs': pronk(dirs),
218 'lineschanged': pronk(lineschanged),
238 'lineschanged': pronk(lineschanged),
219 'children': pronk(invchildren),
239 'children': pronk(invchildren),
220 'fileschanged': pronk(fileschanged),
240 'fileschanged': pronk(fileschanged),
221 'filesadded': pronk(filesadded),
241 'filesadded': pronk(filesadded),
222 'linesinfilesadded': pronk(linesinfilesadded),
242 'linesinfilesadded': pronk(linesinfilesadded),
223 'dirsadded': pronk(dirsadded),
243 'dirsadded': pronk(dirsadded),
224 'filesremoved': pronk(filesremoved),
244 'filesremoved': pronk(filesremoved),
225 'linelengths': pronk(linelengths),
245 'linelengths': pronk(linelengths),
226 'parents': pronk(parents),
246 'parents': pronk(parents),
227 'p1distance': pronk(p1distance),
247 'p1distance': pronk(p1distance),
228 'p2distance': pronk(p2distance),
248 'p2distance': pronk(p2distance),
229 'interarrival': pronk(interarrival),
249 'interarrival': pronk(interarrival),
230 'tzoffset': pronk(tzoffset),
250 'tzoffset': pronk(tzoffset),
231 },
251 },
232 fp)
252 fp)
233 fp.close()
253 fp.close()
234
254
235 @command('synthesize',
255 @command('synthesize',
236 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
256 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
237 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
257 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
238 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
258 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
239 _('hg synthesize [OPTION].. DESCFILE'))
259 _('hg synthesize [OPTION].. DESCFILE'))
240 def synthesize(ui, repo, descpath, **opts):
260 def synthesize(ui, repo, descpath, **opts):
241 '''synthesize commits based on a model of an existing repository
261 '''synthesize commits based on a model of an existing repository
242
262
243 The model must have been generated by :hg:`analyze`. Commits will
263 The model must have been generated by :hg:`analyze`. Commits will
244 be generated randomly according to the probabilities described in
264 be generated randomly according to the probabilities described in
245 the model. If --initfiles is set, the repository will be seeded with
265 the model. If --initfiles is set, the repository will be seeded with
246 the given number files following the modeled repository's directory
266 the given number files following the modeled repository's directory
247 structure.
267 structure.
248
268
249 When synthesizing new content, commit descriptions, and user
269 When synthesizing new content, commit descriptions, and user
250 names, words will be chosen randomly from a dictionary that is
270 names, words will be chosen randomly from a dictionary that is
251 presumed to contain one word per line. Use --dict to specify the
271 presumed to contain one word per line. Use --dict to specify the
252 path to an alternate dictionary to use.
272 path to an alternate dictionary to use.
253 '''
273 '''
254 try:
274 try:
255 fp = hg.openpath(ui, descpath)
275 fp = hg.openpath(ui, descpath)
256 except Exception as err:
276 except Exception as err:
257 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
277 raise error.Abort('%s: %s' % (descpath, err[0].strerror))
258 desc = json.load(fp)
278 desc = json.load(fp)
259 fp.close()
279 fp.close()
260
280
261 def cdf(l):
281 def cdf(l):
262 if not l:
282 if not l:
263 return [], []
283 return [], []
264 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
284 vals, probs = zip(*sorted(l, key=lambda x: x[1], reverse=True))
265 t = float(sum(probs, 0))
285 t = float(sum(probs, 0))
266 s, cdfs = 0, []
286 s, cdfs = 0, []
267 for v in probs:
287 for v in probs:
268 s += v
288 s += v
269 cdfs.append(s / t)
289 cdfs.append(s / t)
270 return vals, cdfs
290 return vals, cdfs
271
291
272 lineschanged = cdf(desc['lineschanged'])
292 lineschanged = cdf(desc['lineschanged'])
273 fileschanged = cdf(desc['fileschanged'])
293 fileschanged = cdf(desc['fileschanged'])
274 filesadded = cdf(desc['filesadded'])
294 filesadded = cdf(desc['filesadded'])
275 dirsadded = cdf(desc['dirsadded'])
295 dirsadded = cdf(desc['dirsadded'])
276 filesremoved = cdf(desc['filesremoved'])
296 filesremoved = cdf(desc['filesremoved'])
277 linelengths = cdf(desc['linelengths'])
297 linelengths = cdf(desc['linelengths'])
278 parents = cdf(desc['parents'])
298 parents = cdf(desc['parents'])
279 p1distance = cdf(desc['p1distance'])
299 p1distance = cdf(desc['p1distance'])
280 p2distance = cdf(desc['p2distance'])
300 p2distance = cdf(desc['p2distance'])
281 interarrival = cdf(desc['interarrival'])
301 interarrival = cdf(desc['interarrival'])
282 linesinfilesadded = cdf(desc['linesinfilesadded'])
302 linesinfilesadded = cdf(desc['linesinfilesadded'])
283 tzoffset = cdf(desc['tzoffset'])
303 tzoffset = cdf(desc['tzoffset'])
284
304
285 dictfile = opts.get('dict') or '/usr/share/dict/words'
305 dictfile = opts.get('dict') or '/usr/share/dict/words'
286 try:
306 try:
287 fp = open(dictfile, 'rU')
307 fp = open(dictfile, 'rU')
288 except IOError as err:
308 except IOError as err:
289 raise error.Abort('%s: %s' % (dictfile, err.strerror))
309 raise error.Abort('%s: %s' % (dictfile, err.strerror))
290 words = fp.read().splitlines()
310 words = fp.read().splitlines()
291 fp.close()
311 fp.close()
292
312
293 initdirs = {}
313 initdirs = {}
294 if desc['initdirs']:
314 if desc['initdirs']:
295 for k, v in desc['initdirs']:
315 for k, v in desc['initdirs']:
296 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
316 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
297 initdirs = renamedirs(initdirs, words)
317 initdirs = renamedirs(initdirs, words)
298 initdirscdf = cdf(initdirs)
318 initdirscdf = cdf(initdirs)
299
319
300 def pick(cdf):
320 def pick(cdf):
301 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
321 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
302
322
303 def pickpath():
323 def pickpath():
304 return os.path.join(pick(initdirscdf), random.choice(words))
324 return os.path.join(pick(initdirscdf), random.choice(words))
305
325
306 def makeline(minimum=0):
326 def makeline(minimum=0):
307 total = max(minimum, pick(linelengths))
327 total = max(minimum, pick(linelengths))
308 c, l = 0, []
328 c, l = 0, []
309 while c < total:
329 while c < total:
310 w = random.choice(words)
330 w = random.choice(words)
311 c += len(w) + 1
331 c += len(w) + 1
312 l.append(w)
332 l.append(w)
313 return ' '.join(l)
333 return ' '.join(l)
314
334
315 wlock = repo.wlock()
335 wlock = repo.wlock()
316 lock = repo.lock()
336 lock = repo.lock()
317
337
318 nevertouch = set(('.hgsub', '.hgignore', '.hgtags'))
338 nevertouch = set(('.hgsub', '.hgignore', '.hgtags'))
319
339
320 progress = ui.progress
340 progress = ui.progress
321 _synthesizing = _('synthesizing')
341 _synthesizing = _('synthesizing')
322 _files = _('initial files')
342 _files = _('initial files')
323 _changesets = _('changesets')
343 _changesets = _('changesets')
324
344
325 # Synthesize a single initial revision adding files to the repo according
345 # Synthesize a single initial revision adding files to the repo according
326 # to the modeled directory structure.
346 # to the modeled directory structure.
327 initcount = int(opts['initfiles'])
347 initcount = int(opts['initfiles'])
328 if initcount and initdirs:
348 if initcount and initdirs:
329 pctx = repo[None].parents()[0]
349 pctx = repo[None].parents()[0]
330 dirs = set(pctx.dirs())
350 dirs = set(pctx.dirs())
331 files = {}
351 files = {}
332
352
333 def validpath(path):
353 def validpath(path):
334 # Don't pick filenames which are already directory names.
354 # Don't pick filenames which are already directory names.
335 if path in dirs:
355 if path in dirs:
336 return False
356 return False
337 # Don't pick directories which were used as file names.
357 # Don't pick directories which were used as file names.
338 while path:
358 while path:
339 if path in files:
359 if path in files:
340 return False
360 return False
341 path = os.path.dirname(path)
361 path = os.path.dirname(path)
342 return True
362 return True
343
363
344 for i in xrange(0, initcount):
364 for i in xrange(0, initcount):
345 ui.progress(_synthesizing, i, unit=_files, total=initcount)
365 ui.progress(_synthesizing, i, unit=_files, total=initcount)
346
366
347 path = pickpath()
367 path = pickpath()
348 while not validpath(path):
368 while not validpath(path):
349 path = pickpath()
369 path = pickpath()
350 data = '%s contents\n' % path
370 data = '%s contents\n' % path
351 files[path] = context.memfilectx(repo, path, data)
371 files[path] = context.memfilectx(repo, path, data)
352 dir = os.path.dirname(path)
372 dir = os.path.dirname(path)
353 while dir and dir not in dirs:
373 while dir and dir not in dirs:
354 dirs.add(dir)
374 dirs.add(dir)
355 dir = os.path.dirname(dir)
375 dir = os.path.dirname(dir)
356
376
357 def filectxfn(repo, memctx, path):
377 def filectxfn(repo, memctx, path):
358 return files[path]
378 return files[path]
359
379
360 ui.progress(_synthesizing, None)
380 ui.progress(_synthesizing, None)
361 message = 'synthesized wide repo with %d files' % (len(files),)
381 message = 'synthesized wide repo with %d files' % (len(files),)
362 mc = context.memctx(repo, [pctx.node(), nullid], message,
382 mc = context.memctx(repo, [pctx.node(), nullid], message,
363 files.iterkeys(), filectxfn, ui.username(),
383 files.iterkeys(), filectxfn, ui.username(),
364 '%d %d' % util.makedate())
384 '%d %d' % util.makedate())
365 initnode = mc.commit()
385 initnode = mc.commit()
366 if ui.debugflag:
386 if ui.debugflag:
367 hexfn = hex
387 hexfn = hex
368 else:
388 else:
369 hexfn = short
389 hexfn = short
370 ui.status(_('added commit %s with %d files\n')
390 ui.status(_('added commit %s with %d files\n')
371 % (hexfn(initnode), len(files)))
391 % (hexfn(initnode), len(files)))
372
392
373 # Synthesize incremental revisions to the repository, adding repo depth.
393 # Synthesize incremental revisions to the repository, adding repo depth.
374 count = int(opts['count'])
394 count = int(opts['count'])
375 heads = set(map(repo.changelog.rev, repo.heads()))
395 heads = set(map(repo.changelog.rev, repo.heads()))
376 for i in xrange(count):
396 for i in xrange(count):
377 progress(_synthesizing, i, unit=_changesets, total=count)
397 progress(_synthesizing, i, unit=_changesets, total=count)
378
398
379 node = repo.changelog.node
399 node = repo.changelog.node
380 revs = len(repo)
400 revs = len(repo)
381
401
382 def pickhead(heads, distance):
402 def pickhead(heads, distance):
383 if heads:
403 if heads:
384 lheads = sorted(heads)
404 lheads = sorted(heads)
385 rev = revs - min(pick(distance), revs)
405 rev = revs - min(pick(distance), revs)
386 if rev < lheads[-1]:
406 if rev < lheads[-1]:
387 rev = lheads[bisect.bisect_left(lheads, rev)]
407 rev = lheads[bisect.bisect_left(lheads, rev)]
388 else:
408 else:
389 rev = lheads[-1]
409 rev = lheads[-1]
390 return rev, node(rev)
410 return rev, node(rev)
391 return nullrev, nullid
411 return nullrev, nullid
392
412
393 r1 = revs - min(pick(p1distance), revs)
413 r1 = revs - min(pick(p1distance), revs)
394 p1 = node(r1)
414 p1 = node(r1)
395
415
396 # the number of heads will grow without bound if we use a pure
416 # the number of heads will grow without bound if we use a pure
397 # model, so artificially constrain their proliferation
417 # model, so artificially constrain their proliferation
398 toomanyheads = len(heads) > random.randint(1, 20)
418 toomanyheads = len(heads) > random.randint(1, 20)
399 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
419 if p2distance[0] and (pick(parents) == 2 or toomanyheads):
400 r2, p2 = pickhead(heads.difference([r1]), p2distance)
420 r2, p2 = pickhead(heads.difference([r1]), p2distance)
401 else:
421 else:
402 r2, p2 = nullrev, nullid
422 r2, p2 = nullrev, nullid
403
423
404 pl = [p1, p2]
424 pl = [p1, p2]
405 pctx = repo[r1]
425 pctx = repo[r1]
406 mf = pctx.manifest()
426 mf = pctx.manifest()
407 mfk = mf.keys()
427 mfk = mf.keys()
408 changes = {}
428 changes = {}
409 if mfk:
429 if mfk:
410 for __ in xrange(pick(fileschanged)):
430 for __ in xrange(pick(fileschanged)):
411 for __ in xrange(10):
431 for __ in xrange(10):
412 fctx = pctx.filectx(random.choice(mfk))
432 fctx = pctx.filectx(random.choice(mfk))
413 path = fctx.path()
433 path = fctx.path()
414 if not (path in nevertouch or fctx.isbinary() or
434 if not (path in nevertouch or fctx.isbinary() or
415 'l' in fctx.flags()):
435 'l' in fctx.flags()):
416 break
436 break
417 lines = fctx.data().splitlines()
437 lines = fctx.data().splitlines()
418 add, remove = pick(lineschanged)
438 add, remove = pick(lineschanged)
419 for __ in xrange(remove):
439 for __ in xrange(remove):
420 if not lines:
440 if not lines:
421 break
441 break
422 del lines[random.randrange(0, len(lines))]
442 del lines[random.randrange(0, len(lines))]
423 for __ in xrange(add):
443 for __ in xrange(add):
424 lines.insert(random.randint(0, len(lines)), makeline())
444 lines.insert(random.randint(0, len(lines)), makeline())
425 path = fctx.path()
445 path = fctx.path()
426 changes[path] = context.memfilectx(repo, path,
446 changes[path] = context.memfilectx(repo, path,
427 '\n'.join(lines) + '\n')
447 '\n'.join(lines) + '\n')
428 for __ in xrange(pick(filesremoved)):
448 for __ in xrange(pick(filesremoved)):
429 path = random.choice(mfk)
449 path = random.choice(mfk)
430 for __ in xrange(10):
450 for __ in xrange(10):
431 path = random.choice(mfk)
451 path = random.choice(mfk)
432 if path not in changes:
452 if path not in changes:
433 changes[path] = None
453 changes[path] = None
434 break
454 break
435 if filesadded:
455 if filesadded:
436 dirs = list(pctx.dirs())
456 dirs = list(pctx.dirs())
437 dirs.insert(0, '')
457 dirs.insert(0, '')
438 for __ in xrange(pick(filesadded)):
458 for __ in xrange(pick(filesadded)):
439 pathstr = ''
459 pathstr = ''
440 while pathstr in dirs:
460 while pathstr in dirs:
441 path = [random.choice(dirs)]
461 path = [random.choice(dirs)]
442 if pick(dirsadded):
462 if pick(dirsadded):
443 path.append(random.choice(words))
463 path.append(random.choice(words))
444 path.append(random.choice(words))
464 path.append(random.choice(words))
445 pathstr = '/'.join(filter(None, path))
465 pathstr = '/'.join(filter(None, path))
446 data = '\n'.join(makeline()
466 data = '\n'.join(makeline()
447 for __ in xrange(pick(linesinfilesadded))) + '\n'
467 for __ in xrange(pick(linesinfilesadded))) + '\n'
448 changes[pathstr] = context.memfilectx(repo, pathstr, data)
468 changes[pathstr] = context.memfilectx(repo, pathstr, data)
449 def filectxfn(repo, memctx, path):
469 def filectxfn(repo, memctx, path):
450 return changes[path]
470 return changes[path]
451 if not changes:
471 if not changes:
452 continue
472 continue
453 if revs:
473 if revs:
454 date = repo['tip'].date()[0] + pick(interarrival)
474 date = repo['tip'].date()[0] + pick(interarrival)
455 else:
475 else:
456 date = time.time() - (86400 * count)
476 date = time.time() - (86400 * count)
457 # dates in mercurial must be positive, fit in 32-bit signed integers.
477 # dates in mercurial must be positive, fit in 32-bit signed integers.
458 date = min(0x7fffffff, max(0, date))
478 date = min(0x7fffffff, max(0, date))
459 user = random.choice(words) + '@' + random.choice(words)
479 user = random.choice(words) + '@' + random.choice(words)
460 mc = context.memctx(repo, pl, makeline(minimum=2),
480 mc = context.memctx(repo, pl, makeline(minimum=2),
461 sorted(changes.iterkeys()),
481 sorted(changes.iterkeys()),
462 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
482 filectxfn, user, '%d %d' % (date, pick(tzoffset)))
463 newnode = mc.commit()
483 newnode = mc.commit()
464 heads.add(repo.changelog.rev(newnode))
484 heads.add(repo.changelog.rev(newnode))
465 heads.discard(r1)
485 heads.discard(r1)
466 heads.discard(r2)
486 heads.discard(r2)
467
487
468 lock.release()
488 lock.release()
469 wlock.release()
489 wlock.release()
470
490
471 def renamedirs(dirs, words):
491 def renamedirs(dirs, words):
472 '''Randomly rename the directory names in the per-dir file count dict.'''
492 '''Randomly rename the directory names in the per-dir file count dict.'''
473 wordgen = itertools.cycle(words)
493 wordgen = itertools.cycle(words)
474 replacements = {'': ''}
494 replacements = {'': ''}
475 def rename(dirpath):
495 def rename(dirpath):
476 '''Recursively rename the directory and all path prefixes.
496 '''Recursively rename the directory and all path prefixes.
477
497
478 The mapping from path to renamed path is stored for all path prefixes
498 The mapping from path to renamed path is stored for all path prefixes
479 as in dynamic programming, ensuring linear runtime and consistent
499 as in dynamic programming, ensuring linear runtime and consistent
480 renaming regardless of iteration order through the model.
500 renaming regardless of iteration order through the model.
481 '''
501 '''
482 if dirpath in replacements:
502 if dirpath in replacements:
483 return replacements[dirpath]
503 return replacements[dirpath]
484 head, _ = os.path.split(dirpath)
504 head, _ = os.path.split(dirpath)
485 if head:
505 if head:
486 head = rename(head)
506 head = rename(head)
487 else:
507 else:
488 head = ''
508 head = ''
489 renamed = os.path.join(head, wordgen.next())
509 renamed = os.path.join(head, wordgen.next())
490 replacements[dirpath] = renamed
510 replacements[dirpath] = renamed
491 return renamed
511 return renamed
492 result = []
512 result = []
493 for dirpath, count in dirs.iteritems():
513 for dirpath, count in dirs.iteritems():
494 result.append([rename(dirpath.lstrip(os.sep)), count])
514 result.append([rename(dirpath.lstrip(os.sep)), count])
495 return result
515 return result
@@ -1,123 +1,122
1 #require test-repo
1 #require test-repo
2
2
3 $ cd "$TESTDIR"/..
3 $ cd "$TESTDIR"/..
4
4
5 $ hg files 'set:(**.py)' | sed 's|\\|/|g' | xargs python contrib/check-py3-compat.py
5 $ hg files 'set:(**.py)' | sed 's|\\|/|g' | xargs python contrib/check-py3-compat.py
6 contrib/import-checker.py not using absolute_import
6 contrib/import-checker.py not using absolute_import
7 contrib/import-checker.py requires print_function
7 contrib/import-checker.py requires print_function
8 contrib/revsetbenchmarks.py not using absolute_import
8 contrib/revsetbenchmarks.py not using absolute_import
9 contrib/revsetbenchmarks.py requires print_function
9 contrib/revsetbenchmarks.py requires print_function
10 contrib/synthrepo.py not using absolute_import
11 doc/check-seclevel.py not using absolute_import
10 doc/check-seclevel.py not using absolute_import
12 doc/gendoc.py not using absolute_import
11 doc/gendoc.py not using absolute_import
13 doc/hgmanpage.py not using absolute_import
12 doc/hgmanpage.py not using absolute_import
14 hgext/color.py not using absolute_import
13 hgext/color.py not using absolute_import
15 hgext/eol.py not using absolute_import
14 hgext/eol.py not using absolute_import
16 hgext/extdiff.py not using absolute_import
15 hgext/extdiff.py not using absolute_import
17 hgext/factotum.py not using absolute_import
16 hgext/factotum.py not using absolute_import
18 hgext/fetch.py not using absolute_import
17 hgext/fetch.py not using absolute_import
19 hgext/fsmonitor/pywatchman/__init__.py not using absolute_import
18 hgext/fsmonitor/pywatchman/__init__.py not using absolute_import
20 hgext/fsmonitor/pywatchman/__init__.py requires print_function
19 hgext/fsmonitor/pywatchman/__init__.py requires print_function
21 hgext/fsmonitor/pywatchman/capabilities.py not using absolute_import
20 hgext/fsmonitor/pywatchman/capabilities.py not using absolute_import
22 hgext/fsmonitor/pywatchman/pybser.py not using absolute_import
21 hgext/fsmonitor/pywatchman/pybser.py not using absolute_import
23 hgext/gpg.py not using absolute_import
22 hgext/gpg.py not using absolute_import
24 hgext/graphlog.py not using absolute_import
23 hgext/graphlog.py not using absolute_import
25 hgext/hgcia.py not using absolute_import
24 hgext/hgcia.py not using absolute_import
26 hgext/hgk.py not using absolute_import
25 hgext/hgk.py not using absolute_import
27 hgext/highlight/__init__.py not using absolute_import
26 hgext/highlight/__init__.py not using absolute_import
28 hgext/highlight/highlight.py not using absolute_import
27 hgext/highlight/highlight.py not using absolute_import
29 hgext/histedit.py not using absolute_import
28 hgext/histedit.py not using absolute_import
30 hgext/largefiles/__init__.py not using absolute_import
29 hgext/largefiles/__init__.py not using absolute_import
31 hgext/largefiles/basestore.py not using absolute_import
30 hgext/largefiles/basestore.py not using absolute_import
32 hgext/largefiles/lfcommands.py not using absolute_import
31 hgext/largefiles/lfcommands.py not using absolute_import
33 hgext/largefiles/lfutil.py not using absolute_import
32 hgext/largefiles/lfutil.py not using absolute_import
34 hgext/largefiles/localstore.py not using absolute_import
33 hgext/largefiles/localstore.py not using absolute_import
35 hgext/largefiles/overrides.py not using absolute_import
34 hgext/largefiles/overrides.py not using absolute_import
36 hgext/largefiles/proto.py not using absolute_import
35 hgext/largefiles/proto.py not using absolute_import
37 hgext/largefiles/remotestore.py not using absolute_import
36 hgext/largefiles/remotestore.py not using absolute_import
38 hgext/largefiles/reposetup.py not using absolute_import
37 hgext/largefiles/reposetup.py not using absolute_import
39 hgext/largefiles/uisetup.py not using absolute_import
38 hgext/largefiles/uisetup.py not using absolute_import
40 hgext/largefiles/wirestore.py not using absolute_import
39 hgext/largefiles/wirestore.py not using absolute_import
41 hgext/mq.py not using absolute_import
40 hgext/mq.py not using absolute_import
42 hgext/rebase.py not using absolute_import
41 hgext/rebase.py not using absolute_import
43 hgext/share.py not using absolute_import
42 hgext/share.py not using absolute_import
44 hgext/win32text.py not using absolute_import
43 hgext/win32text.py not using absolute_import
45 i18n/check-translation.py not using absolute_import
44 i18n/check-translation.py not using absolute_import
46 i18n/polib.py not using absolute_import
45 i18n/polib.py not using absolute_import
47 setup.py not using absolute_import
46 setup.py not using absolute_import
48 tests/filterpyflakes.py requires print_function
47 tests/filterpyflakes.py requires print_function
49 tests/generate-working-copy-states.py requires print_function
48 tests/generate-working-copy-states.py requires print_function
50 tests/get-with-headers.py requires print_function
49 tests/get-with-headers.py requires print_function
51 tests/heredoctest.py requires print_function
50 tests/heredoctest.py requires print_function
52 tests/hypothesishelpers.py not using absolute_import
51 tests/hypothesishelpers.py not using absolute_import
53 tests/hypothesishelpers.py requires print_function
52 tests/hypothesishelpers.py requires print_function
54 tests/killdaemons.py not using absolute_import
53 tests/killdaemons.py not using absolute_import
55 tests/md5sum.py not using absolute_import
54 tests/md5sum.py not using absolute_import
56 tests/mockblackbox.py not using absolute_import
55 tests/mockblackbox.py not using absolute_import
57 tests/printenv.py not using absolute_import
56 tests/printenv.py not using absolute_import
58 tests/readlink.py not using absolute_import
57 tests/readlink.py not using absolute_import
59 tests/readlink.py requires print_function
58 tests/readlink.py requires print_function
60 tests/revlog-formatv0.py not using absolute_import
59 tests/revlog-formatv0.py not using absolute_import
61 tests/run-tests.py not using absolute_import
60 tests/run-tests.py not using absolute_import
62 tests/seq.py not using absolute_import
61 tests/seq.py not using absolute_import
63 tests/seq.py requires print_function
62 tests/seq.py requires print_function
64 tests/silenttestrunner.py not using absolute_import
63 tests/silenttestrunner.py not using absolute_import
65 tests/silenttestrunner.py requires print_function
64 tests/silenttestrunner.py requires print_function
66 tests/sitecustomize.py not using absolute_import
65 tests/sitecustomize.py not using absolute_import
67 tests/svn-safe-append.py not using absolute_import
66 tests/svn-safe-append.py not using absolute_import
68 tests/svnxml.py not using absolute_import
67 tests/svnxml.py not using absolute_import
69 tests/test-ancestor.py requires print_function
68 tests/test-ancestor.py requires print_function
70 tests/test-atomictempfile.py not using absolute_import
69 tests/test-atomictempfile.py not using absolute_import
71 tests/test-batching.py not using absolute_import
70 tests/test-batching.py not using absolute_import
72 tests/test-batching.py requires print_function
71 tests/test-batching.py requires print_function
73 tests/test-bdiff.py not using absolute_import
72 tests/test-bdiff.py not using absolute_import
74 tests/test-bdiff.py requires print_function
73 tests/test-bdiff.py requires print_function
75 tests/test-context.py not using absolute_import
74 tests/test-context.py not using absolute_import
76 tests/test-context.py requires print_function
75 tests/test-context.py requires print_function
77 tests/test-demandimport.py not using absolute_import
76 tests/test-demandimport.py not using absolute_import
78 tests/test-demandimport.py requires print_function
77 tests/test-demandimport.py requires print_function
79 tests/test-doctest.py not using absolute_import
78 tests/test-doctest.py not using absolute_import
80 tests/test-duplicateoptions.py not using absolute_import
79 tests/test-duplicateoptions.py not using absolute_import
81 tests/test-duplicateoptions.py requires print_function
80 tests/test-duplicateoptions.py requires print_function
82 tests/test-filecache.py not using absolute_import
81 tests/test-filecache.py not using absolute_import
83 tests/test-filecache.py requires print_function
82 tests/test-filecache.py requires print_function
84 tests/test-filelog.py not using absolute_import
83 tests/test-filelog.py not using absolute_import
85 tests/test-filelog.py requires print_function
84 tests/test-filelog.py requires print_function
86 tests/test-hg-parseurl.py not using absolute_import
85 tests/test-hg-parseurl.py not using absolute_import
87 tests/test-hg-parseurl.py requires print_function
86 tests/test-hg-parseurl.py requires print_function
88 tests/test-hgweb-auth.py not using absolute_import
87 tests/test-hgweb-auth.py not using absolute_import
89 tests/test-hgweb-auth.py requires print_function
88 tests/test-hgweb-auth.py requires print_function
90 tests/test-hgwebdir-paths.py not using absolute_import
89 tests/test-hgwebdir-paths.py not using absolute_import
91 tests/test-hybridencode.py not using absolute_import
90 tests/test-hybridencode.py not using absolute_import
92 tests/test-hybridencode.py requires print_function
91 tests/test-hybridencode.py requires print_function
93 tests/test-lrucachedict.py not using absolute_import
92 tests/test-lrucachedict.py not using absolute_import
94 tests/test-lrucachedict.py requires print_function
93 tests/test-lrucachedict.py requires print_function
95 tests/test-manifest.py not using absolute_import
94 tests/test-manifest.py not using absolute_import
96 tests/test-minirst.py not using absolute_import
95 tests/test-minirst.py not using absolute_import
97 tests/test-minirst.py requires print_function
96 tests/test-minirst.py requires print_function
98 tests/test-parseindex2.py not using absolute_import
97 tests/test-parseindex2.py not using absolute_import
99 tests/test-parseindex2.py requires print_function
98 tests/test-parseindex2.py requires print_function
100 tests/test-pathencode.py not using absolute_import
99 tests/test-pathencode.py not using absolute_import
101 tests/test-pathencode.py requires print_function
100 tests/test-pathencode.py requires print_function
102 tests/test-propertycache.py not using absolute_import
101 tests/test-propertycache.py not using absolute_import
103 tests/test-propertycache.py requires print_function
102 tests/test-propertycache.py requires print_function
104 tests/test-revlog-ancestry.py not using absolute_import
103 tests/test-revlog-ancestry.py not using absolute_import
105 tests/test-revlog-ancestry.py requires print_function
104 tests/test-revlog-ancestry.py requires print_function
106 tests/test-run-tests.py not using absolute_import
105 tests/test-run-tests.py not using absolute_import
107 tests/test-simplemerge.py not using absolute_import
106 tests/test-simplemerge.py not using absolute_import
108 tests/test-status-inprocess.py not using absolute_import
107 tests/test-status-inprocess.py not using absolute_import
109 tests/test-status-inprocess.py requires print_function
108 tests/test-status-inprocess.py requires print_function
110 tests/test-symlink-os-yes-fs-no.py not using absolute_import
109 tests/test-symlink-os-yes-fs-no.py not using absolute_import
111 tests/test-trusted.py not using absolute_import
110 tests/test-trusted.py not using absolute_import
112 tests/test-trusted.py requires print_function
111 tests/test-trusted.py requires print_function
113 tests/test-ui-color.py not using absolute_import
112 tests/test-ui-color.py not using absolute_import
114 tests/test-ui-color.py requires print_function
113 tests/test-ui-color.py requires print_function
115 tests/test-ui-config.py not using absolute_import
114 tests/test-ui-config.py not using absolute_import
116 tests/test-ui-config.py requires print_function
115 tests/test-ui-config.py requires print_function
117 tests/test-ui-verbosity.py not using absolute_import
116 tests/test-ui-verbosity.py not using absolute_import
118 tests/test-ui-verbosity.py requires print_function
117 tests/test-ui-verbosity.py requires print_function
119 tests/test-url.py not using absolute_import
118 tests/test-url.py not using absolute_import
120 tests/test-url.py requires print_function
119 tests/test-url.py requires print_function
121 tests/test-walkrepo.py requires print_function
120 tests/test-walkrepo.py requires print_function
122 tests/test-wireproto.py requires print_function
121 tests/test-wireproto.py requires print_function
123 tests/tinyproxy.py requires print_function
122 tests/tinyproxy.py requires print_function
General Comments 0
You need to be logged in to leave comments. Login now