upstream/mercurial-mirror Commit - r22708:4c66e70c

contrib/synthrepo: generate initial repo contents using directory shape model...

Mike Edgar -

r22708:4c66e70c default

parent child

contrib/synthrepo.py

0 +70 -4

              - Symlinks and binary files are ignored
              '''
-             import bisect, collections, json, os, random, time, sys
+             import bisect, collections, itertools, json, os, random, time, sys
              from mercurial import cmdutil, context, patch, scmutil, util, hg
              from mercurial.i18n import _
-             from mercurial.node import nullrev, nullid
+             from mercurial.node import nullrev, nullid, short
              testedwith = 'internal'
              @command('synthesize',
                       [('c', 'count', 0, _('create given number of commits'), _('COUNT')),
-                       ('', 'dict', '', _('path to a dictionary of words'), _('FILE'))],
+                       ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
+                       ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
                       _('hg synthesize [OPTION].. DESCFILE'))
              def synthesize(ui, repo, descpath, **opts):
                  '''synthesize commits based on a model of an existing repository
                  The model must have been generated by :hg:`analyze`. Commits will
                  be generated randomly according to the probabilities described in
-                 the model.
+                 the model. If --initfiles is set, the repository will be seeded with
+                 the given number files following the modeled repository's directory
+                 structure.
                  When synthesizing new content, commit descriptions, and user
                  names, words will be chosen randomly from a dictionary that is
                  words = fp.read().splitlines()
                  fp.close()
+                 initdirs = {}
+                 if desc['initdirs']:
+                     for k, v in desc['initdirs']:
+                         initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
+                     initdirs = renamedirs(initdirs, words)
+                 initdirscdf = cdf(initdirs)
                  def pick(cdf):
                      return cdf[0][bisect.bisect_left(cdf[1], random.random())]
+                 def pickpath():
+                     return os.path.join(pick(initdirscdf), random.choice(words))
                  def makeline(minimum=0):
                      total = max(minimum, pick(linelengths))
                      c, l = 0, []
                  progress = ui.progress
                  _synthesizing = _('synthesizing')
+                 _files = _('initial files')
                  _changesets = _('changesets')
+                 # Synthesize a single initial revision adding files to the repo according
+                 # to the modeled directory structure.
+                 initcount = int(opts['initfiles'])
+                 if initcount and initdirs:
+                     pctx = repo[None].parents()[0]
+                     files = {}
+                     for i in xrange(0, initcount):
+                         ui.progress(_synthesizing, i, unit=_files, total=initcount)
+                         path = pickpath()
+                         while path in pctx.dirs():
+                             path = pickpath()
+                         data = '%s contents\n' % path
+                         files[path] = context.memfilectx(repo, path, data)
+                     def filectxfn(repo, memctx, path):
+                         return files[path]
+                     ui.progress(_synthesizing, None)
+                     message = 'synthesized wide repo with %d files' % (len(files),)
+                     mc = context.memctx(repo, [pctx.node(), nullid], message,
+                                         files.iterkeys(), filectxfn, ui.username(),
+                                         '%d %d' % util.makedate())
+                     initnode = mc.commit()
+                     hexfn = ui.debugflag and hex or short
+                     ui.status(_('added commit %s with %d files\n')
+                               % (hexfn(initnode), len(files)))
+                 # Synthesize incremental revisions to the repository, adding repo depth.
                  count = int(opts['count'])
                  heads = set(map(repo.changelog.rev, repo.heads()))
                  for i in xrange(count):
                  lock.release()
                  wlock.release()
+             def renamedirs(dirs, words):
+                 '''Randomly rename the directory names in the per-dir file count dict.'''
+                 wordgen = itertools.cycle(words)
+                 replacements = {'': ''}
+                 def rename(dirpath):
+                     '''Recursively rename the directory and all path prefixes.
+                     The mapping from path to renamed path is stored for all path prefixes
+                     as in dynamic programming, ensuring linear runtime and consistent
+                     renaming regardless of iteration order through the model.
+                     '''
+                     if dirpath in replacements:
+                         return replacements[dirpath]
+                     head, _ = os.path.split(dirpath)
+                     head = head and rename(head) or ''
+                     renamed = os.path.join(head, wordgen.next())
+                     replacements[dirpath] = renamed
+                     return renamed
+                 result = []
+                 for dirpath, count in dirs.iteritems():
+                     result.append([rename(dirpath.lstrip(os.sep)), count])
+                 return result

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages