upstream/mercurial-mirror Commit - r22708:4c66e70c

contrib/synthrepo: generate initial repo contents using directory shape model...

Mike Edgar -

r22708:4c66e70c default

parent child

contrib/synthrepo.py

0 +70 -4

@@ -35,10 +35,10 A few obvious properties that are not cu
35	- Symlinks and binary files are ignored	35	- Symlinks and binary files are ignored
36	'''	36	'''
37		37
38	import bisect, collections, json, os, random, time, sys	38	import bisect, collections, itertools, json, os, random, time, sys
39	from mercurial import cmdutil, context, patch, scmutil, util, hg	39	from mercurial import cmdutil, context, patch, scmutil, util, hg
40	from mercurial.i18n import _	40	from mercurial.i18n import _
41	from mercurial.node import nullrev, nullid	41	from mercurial.node import nullrev, nullid, short
42		42
43	testedwith = 'internal'	43	testedwith = 'internal'
44		44
@@ -208,14 +208,17 def analyze(ui, repo, revs, *opts):
208		208
209	@command('synthesize',	209	@command('synthesize',
210	[('c', 'count', 0, _('create given number of commits'), _('COUNT')),	210	[('c', 'count', 0, _('create given number of commits'), _('COUNT')),
211	('', 'dict', '', _('path to a dictionary of words'), _('FILE'))],	211	('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
		212	('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
212	_('hg synthesize [OPTION].. DESCFILE'))	213	_('hg synthesize [OPTION].. DESCFILE'))
213	def synthesize(ui, repo, descpath, **opts):	214	def synthesize(ui, repo, descpath, **opts):
214	'''synthesize commits based on a model of an existing repository	215	'''synthesize commits based on a model of an existing repository
215		216
216	The model must have been generated by :hg:`analyze`. Commits will	217	The model must have been generated by :hg:`analyze`. Commits will
217	be generated randomly according to the probabilities described in	218	be generated randomly according to the probabilities described in
218	the model.	219	the model. If --initfiles is set, the repository will be seeded with
		220	the given number files following the modeled repository's directory
		221	structure.
219		222
220	When synthesizing new content, commit descriptions, and user	223	When synthesizing new content, commit descriptions, and user
221	names, words will be chosen randomly from a dictionary that is	224	names, words will be chosen randomly from a dictionary that is
@@ -261,9 +264,19 def synthesize(ui, repo, descpath, **opt
261	words = fp.read().splitlines()	264	words = fp.read().splitlines()
262	fp.close()	265	fp.close()
263		266
		267	initdirs = {}
		268	if desc['initdirs']:
		269	for k, v in desc['initdirs']:
		270	initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
		271	initdirs = renamedirs(initdirs, words)
		272	initdirscdf = cdf(initdirs)
		273
264	def pick(cdf):	274	def pick(cdf):
265	return cdf[0][bisect.bisect_left(cdf[1], random.random())]	275	return cdf[0][bisect.bisect_left(cdf[1], random.random())]
266		276
		277	def pickpath():
		278	return os.path.join(pick(initdirscdf), random.choice(words))
		279
267	def makeline(minimum=0):	280	def makeline(minimum=0):
268	total = max(minimum, pick(linelengths))	281	total = max(minimum, pick(linelengths))
269	c, l = 0, []	282	c, l = 0, []
@@ -280,8 +293,38 def synthesize(ui, repo, descpath, **opt
280		293
281	progress = ui.progress	294	progress = ui.progress
282	_synthesizing = _('synthesizing')	295	_synthesizing = _('synthesizing')
		296	_files = _('initial files')
283	_changesets = _('changesets')	297	_changesets = _('changesets')
284		298
		299	# Synthesize a single initial revision adding files to the repo according
		300	# to the modeled directory structure.
		301	initcount = int(opts['initfiles'])
		302	if initcount and initdirs:
		303	pctx = repo[None].parents()[0]
		304	files = {}
		305	for i in xrange(0, initcount):
		306	ui.progress(_synthesizing, i, unit=_files, total=initcount)
		307
		308	path = pickpath()
		309	while path in pctx.dirs():
		310	path = pickpath()
		311	data = '%s contents\n' % path
		312	files[path] = context.memfilectx(repo, path, data)
		313
		314	def filectxfn(repo, memctx, path):
		315	return files[path]
		316
		317	ui.progress(_synthesizing, None)
		318	message = 'synthesized wide repo with %d files' % (len(files),)
		319	mc = context.memctx(repo, [pctx.node(), nullid], message,
		320	files.iterkeys(), filectxfn, ui.username(),
		321	'%d %d' % util.makedate())
		322	initnode = mc.commit()
		323	hexfn = ui.debugflag and hex or short
		324	ui.status(_('added commit %s with %d files\n')
		325	% (hexfn(initnode), len(files)))
		326
		327	# Synthesize incremental revisions to the repository, adding repo depth.
285	count = int(opts['count'])	328	count = int(opts['count'])
286	heads = set(map(repo.changelog.rev, repo.heads()))	329	heads = set(map(repo.changelog.rev, repo.heads()))
287	for i in xrange(count):	330	for i in xrange(count):
@@ -374,3 +417,26 def synthesize(ui, repo, descpath, **opt
374		417
375	lock.release()	418	lock.release()
376	wlock.release()	419	wlock.release()
		420
		421	def renamedirs(dirs, words):
		422	'''Randomly rename the directory names in the per-dir file count dict.'''
		423	wordgen = itertools.cycle(words)
		424	replacements = {'': ''}
		425	def rename(dirpath):
		426	'''Recursively rename the directory and all path prefixes.
		427
		428	The mapping from path to renamed path is stored for all path prefixes
		429	as in dynamic programming, ensuring linear runtime and consistent
		430	renaming regardless of iteration order through the model.
		431	'''
		432	if dirpath in replacements:
		433	return replacements[dirpath]
		434	head, _ = os.path.split(dirpath)
		435	head = head and rename(head) or ''
		436	renamed = os.path.join(head, wordgen.next())
		437	replacements[dirpath] = renamed
		438	return renamed
		439	result = []
		440	for dirpath, count in dirs.iteritems():
		441	result.append([rename(dirpath.lstrip(os.sep)), count])
		442	return result

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages