##// END OF EJS Templates
contrib/synthrepo: generate initial repo contents using directory shape model...
Mike Edgar -
r22708:4c66e70c default
parent child Browse files
Show More
@@ -35,10 +35,10 A few obvious properties that are not cu
35 35 - Symlinks and binary files are ignored
36 36 '''
37 37
38 import bisect, collections, json, os, random, time, sys
38 import bisect, collections, itertools, json, os, random, time, sys
39 39 from mercurial import cmdutil, context, patch, scmutil, util, hg
40 40 from mercurial.i18n import _
41 from mercurial.node import nullrev, nullid
41 from mercurial.node import nullrev, nullid, short
42 42
43 43 testedwith = 'internal'
44 44
@@ -208,14 +208,17 def analyze(ui, repo, *revs, **opts):
208 208
209 209 @command('synthesize',
210 210 [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
211 ('', 'dict', '', _('path to a dictionary of words'), _('FILE'))],
211 ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
212 ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
212 213 _('hg synthesize [OPTION].. DESCFILE'))
213 214 def synthesize(ui, repo, descpath, **opts):
214 215 '''synthesize commits based on a model of an existing repository
215 216
216 217 The model must have been generated by :hg:`analyze`. Commits will
217 218 be generated randomly according to the probabilities described in
218 the model.
219 the model. If --initfiles is set, the repository will be seeded with
220 the given number files following the modeled repository's directory
221 structure.
219 222
220 223 When synthesizing new content, commit descriptions, and user
221 224 names, words will be chosen randomly from a dictionary that is
@@ -261,9 +264,19 def synthesize(ui, repo, descpath, **opt
261 264 words = fp.read().splitlines()
262 265 fp.close()
263 266
267 initdirs = {}
268 if desc['initdirs']:
269 for k, v in desc['initdirs']:
270 initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
271 initdirs = renamedirs(initdirs, words)
272 initdirscdf = cdf(initdirs)
273
264 274 def pick(cdf):
265 275 return cdf[0][bisect.bisect_left(cdf[1], random.random())]
266 276
277 def pickpath():
278 return os.path.join(pick(initdirscdf), random.choice(words))
279
267 280 def makeline(minimum=0):
268 281 total = max(minimum, pick(linelengths))
269 282 c, l = 0, []
@@ -280,8 +293,38 def synthesize(ui, repo, descpath, **opt
280 293
281 294 progress = ui.progress
282 295 _synthesizing = _('synthesizing')
296 _files = _('initial files')
283 297 _changesets = _('changesets')
284 298
299 # Synthesize a single initial revision adding files to the repo according
300 # to the modeled directory structure.
301 initcount = int(opts['initfiles'])
302 if initcount and initdirs:
303 pctx = repo[None].parents()[0]
304 files = {}
305 for i in xrange(0, initcount):
306 ui.progress(_synthesizing, i, unit=_files, total=initcount)
307
308 path = pickpath()
309 while path in pctx.dirs():
310 path = pickpath()
311 data = '%s contents\n' % path
312 files[path] = context.memfilectx(repo, path, data)
313
314 def filectxfn(repo, memctx, path):
315 return files[path]
316
317 ui.progress(_synthesizing, None)
318 message = 'synthesized wide repo with %d files' % (len(files),)
319 mc = context.memctx(repo, [pctx.node(), nullid], message,
320 files.iterkeys(), filectxfn, ui.username(),
321 '%d %d' % util.makedate())
322 initnode = mc.commit()
323 hexfn = ui.debugflag and hex or short
324 ui.status(_('added commit %s with %d files\n')
325 % (hexfn(initnode), len(files)))
326
327 # Synthesize incremental revisions to the repository, adding repo depth.
285 328 count = int(opts['count'])
286 329 heads = set(map(repo.changelog.rev, repo.heads()))
287 330 for i in xrange(count):
@@ -374,3 +417,26 def synthesize(ui, repo, descpath, **opt
374 417
375 418 lock.release()
376 419 wlock.release()
420
421 def renamedirs(dirs, words):
422 '''Randomly rename the directory names in the per-dir file count dict.'''
423 wordgen = itertools.cycle(words)
424 replacements = {'': ''}
425 def rename(dirpath):
426 '''Recursively rename the directory and all path prefixes.
427
428 The mapping from path to renamed path is stored for all path prefixes
429 as in dynamic programming, ensuring linear runtime and consistent
430 renaming regardless of iteration order through the model.
431 '''
432 if dirpath in replacements:
433 return replacements[dirpath]
434 head, _ = os.path.split(dirpath)
435 head = head and rename(head) or ''
436 renamed = os.path.join(head, wordgen.next())
437 replacements[dirpath] = renamed
438 return renamed
439 result = []
440 for dirpath, count in dirs.iteritems():
441 result.append([rename(dirpath.lstrip(os.sep)), count])
442 return result
General Comments 0
You need to be logged in to leave comments. Login now