upstream/mercurial-mirror Commit - r53336:83f87912

test-sparse-revlog: commit the repo content in a single in-mem transaction...

marmoute -

r53336:83f87912 default

parent child

tests/artifacts/scripts/generate-churning-bundle.py

0 +41 -19

              #!/usr/bin/env python3
              #
              # generate-branchy-bundle - generate a branch for a "large" branchy repository
              #
              # Copyright 2018 Octobus, contact@octobus.net
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              #
              # This script generates a repository suitable for testing delta computation
              # strategies.
              #
              # The repository update a single "large" file with many updates. One fixed part
              # of the files always get updated while the rest of the lines get updated over
              # time. This update happens over many topological branches, some getting merged
              # back.
+             #
-             # Running with `chg` in your path and `CHGHG` set is recommended for speed.
              import hashlib
              import os
              import shutil
              import subprocess
              import sys
              import tempfile
+             import mercurial.context
+             import mercurial.hg
+             import mercurial.ui
              BUNDLE_NAME = 'big-file-churn.hg'
              # constants for generating the repository
              NB_CHANGESET = 5000
              PERIOD_MERGING = 8
              PERIOD_BRANCHING = 7
              MOVE_BACK_MIN = 3
              MOVE_BACK_RANGE = 5
              # constants for generating the large file we keep updating
              #
              # At each revision, the beginning on the file change,
              # and set of other lines changes too.
              FILENAME = 'SPARSE-REVLOG-TEST-FILE'
              NB_LINES = 10500
              ALWAYS_CHANGE_LINES = 500
              OTHER_CHANGES = 300
              def build_graph():
                  heads = {0}
                  graph = {0: (None, None)}
                  for idx in range(1, NB_CHANGESET + 1):
                      p, _ = parents = [idx - 1, None]
                      if (idx % PERIOD_BRANCHING) == 0:
                          back = MOVE_BACK_MIN + (idx % MOVE_BACK_RANGE)
                          for _ in range(back):
                              p = graph.get(p, (p,))[0]
                              parents[0] = p
                      if (idx % PERIOD_MERGING) == 0:
                          parents[1] = min(heads)
                      for p in parents:
                          heads.discard(p)
                      heads.add(idx)
                      graph[idx] = tuple(parents)
                  return graph
              GRAPH = build_graph()
              def nextcontent(previous_content):
                  """utility to produce a new file content from the previous one"""
                  return hashlib.md5(previous_content).hexdigest().encode('ascii')
              def filecontent(iteridx, oldcontent):
                  """generate a new file content
                  The content is generated according the iteration index and previous
                  content"""
                  # initial call
                  if iteridx == 0:
                      current = b''
                  else:
                      current = b"%d" % iteridx
                  for idx in range(NB_LINES):
                      do_change_line = True
                      if oldcontent is not None and ALWAYS_CHANGE_LINES < idx:
                          do_change_line = not ((idx - iteridx) % OTHER_CHANGES)
                      if do_change_line:
                          to_write = current + b'\n'
                          current = nextcontent(current)
                      else:
                          to_write = oldcontent[idx]
                      yield to_write
              def merge_content(base, left, right):
                  """merge two file content to produce a new one
                  use unambiguous update on each side when possible, and produce a new line
                  whenever a merge is needed. Similar to what the manifest would do.
                  """
                  for old, left, right in zip(base, left, right):
                      if old == left and old == right:
                          yield old
                      elif old == left and old != right:
                          yield right
                      elif old != left and old == right:
                          yield left
                      else:
                          yield nextcontent(left + right)
              def ancestors(graph, rev):
                  """return the set of ancestors of revision <rev>"""
                  to_proceed = {rev}
                  seen = set(to_proceed)
                  while to_proceed:
                      current = to_proceed.pop()
                      for p in graph[current]:
                          if p is None:
                              continue
                          if p in seen:
                              continue
                          to_proceed.add(p)
                          seen.add(p)
                  return seen
              def gca(graph, left, right):
                  """find the greater common ancestors of left and right
                  Note that the algorithm is stupid and N² when run on all merge, however
                  this should not be a too much issue given the current scale.
                  """
                  return max(ancestors(graph, left) & ancestors(graph, right))
              def make_one_content_fn(idx, base, left, right):
                  """build a function that build the content on demand
                  The dependency are kept are reference to make sure they are not
                  garbage-collected until we use them. Once we computed the current content,
                  we make sure to drop their reference to allow them to be garbage collected.
                  """
                  def content_fn(idx=idx, base=base, left=left, right=right):
                      if left is None:
                          new = filecontent(idx, None)
                      elif base is None:
                          new = filecontent(idx, left())
                      else:
                          merged = merge_content(base(), left(), right())
                          new = filecontent(idx, list(merged))
                      return list(new)
                  del idx
                  del base
                  del left
                  del right
                  value = None
                  cf = [content_fn]
                  del content_fn
                  def final_fn():
                      nonlocal value
                      if value is None:
                          content_fn = cf.pop()
                          value = list(content_fn())
                          del content_fn
                      return value
                  return final_fn
              def build_content_graph(graph):
                  """produce file content for all revision
                  The content will be generated on demande and cached. Cleanup the
                  dictionnary are you use it to reduce memory usage.
                  """
                  content = {}
                  for idx, (p1, p2) in graph.items():
                      base = left = right = None
                      if p1 is not None:
                          left = content[p1]
                          if p2 is not None:
                              right = content[p2]
                              base_rev = gca(graph, p1, p2)
                              base = content[base_rev]
                      content[idx] = make_one_content_fn(idx, base, left, right)
                  return content
              CONTENT = build_content_graph(GRAPH)
              def hg(command, *args):
                  """call a mercurial command with appropriate config and argument"""
                  env = os.environ.copy()
                  if 'CHGHG' in env:
                      full_cmd = ['chg']
                  else:
                      full_cmd = ['hg']
                  full_cmd.append('--quiet')
                  full_cmd.append(command)
                  if command == 'commit':
                      # reproducible commit metadata
                      full_cmd.extend(['--date', '0 0', '--user', 'test'])
                  elif command == 'merge':
                      # avoid conflicts by picking the local variant
                      full_cmd.extend(['--tool', ':merge-local'])
                  full_cmd.extend(args)
                  env['HGRCPATH'] = ''
                  return subprocess.check_call(full_cmd, env=env)
+             def write_repo(path):
+                 """write repository content in memory"""
+                 repo = mercurial.hg.repository(
+                     mercurial.ui.ui.load(),
+                     path=path.encode('utf-8'),
+                 )
+                 nodemap = {None: repo.nodeconstants.nullid}
+                 with repo.lock(), repo.transaction(b'bundle-generation'):
+                     for idx, (p1, p2) in GRAPH.items():
+                         if sys.stdout.isatty():
+                             print("generating commit #%d/%d" % (idx, NB_CHANGESET))
+                         file_fn = lambda repo, memctx, path: mercurial.context.memfilectx(
+                             repo,
+                             memctx,
+                             path,
+                             data=b''.join(CONTENT.pop(idx)()),
+                         )
+                         mc = mercurial.context.memctx(
+                             repo,
+                             (nodemap[p1], nodemap[p2]),
+                             b'commit #%d' % idx if idx else b'initial commit',
+                             [FILENAME.encode('ascii')],
+                             file_fn,
+                             user=b"test",
+                             date=(0, 0),
+                         )
+                         nodemap[idx] = repo.commitctx(mc)
              def run(target):
                  tmpdir = tempfile.mkdtemp(prefix='tmp-hg-test-big-file-bundle-')
                  try:
                      os.chdir(tmpdir)
-                     hg('init')
-                     for idx, (p1, p2) in GRAPH.items():
-                         if sys.stdout.isatty():
-                             print("generating commit #%d/%d" % (idx, NB_CHANGESET))
-                         if p1 is not None and p1 != idx - 1:
-                             hg('update', "%d" % p1)
-                         if p2 is not None:
-                             hg('merge', "%d" % p2)
-                         with open(FILENAME, 'wb') as f:
-                             # pop the value to let it be garbage collection eventually.
-                             for line in CONTENT.pop(idx)():
-                                 f.write(line)
-                         if idx == 0:
-                             hg('add', FILENAME)
-                             hg('commit', '--addremove', '--message', 'initial commit')
-                         else:
-                             hg('commit', '--message', 'commit #%d' % idx)
+                     hg(
+                         'init',
+                         '--config',
+                         'format.maxchainlen=%d' % NB_CHANGESET,
+                     )
+                     write_repo(tmpdir)
                      hg('bundle', '--all', target, '--config', 'devel.bundle.delta=p1')
                      with open(target, 'rb') as bundle:
                          data = bundle.read()
                          digest = hashlib.md5(data).hexdigest()
                      with open(target + '.md5', 'wb') as md5file:
                          md5file.write(digest.encode('ascii') + b'\n')
                      if sys.stdout.isatty():
                          print('bundle generated at "%s" md5: %s' % (target, digest))
                  finally:
                      shutil.rmtree(tmpdir)
                  return 0
              if __name__ == '__main__':
                  orig = os.path.realpath(os.path.dirname(sys.argv[0]))
                  target = os.path.join(orig, os.pardir, 'cache', BUNDLE_NAME)
                  sys.exit(run(target))

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages