upstream/mercurial-mirror Files · contrib/perf-utils/subsetmaker.py

contrib: add a partial-merge tool for sorted lists (such as Python imports)...

contrib: add a partial-merge tool for sorted lists (such as Python imports) This is a pretty naive tool that uses a regular expression for matching lines. It is based on a Google-internal tool that worked in a similar way. For now, the regular expression is hard-coded to attempt to match single-line Python imports. The only commit I've found in the hg core repo where the tool helped was commit 9cd6292abfdf. I think that's because we often use multiple imports per import statement. I think this tool is still a decent first step (especially once the regex is made configurable in the next patch). The merging should ideally use a proper Python parser and do the merge at the AST (or CST?) level, but that's significantly harder, especially if you want to preserve comments and whitespace. It's also less generic. Differential Revision: https://phab.mercurial-scm.org/D12380

marmoute - - Load All Authors

File last commit:

r47506:63a3941d default


                r49874:681b25ea

default

Download file

             subsetmaker.py
        
                    170 lines
            
             | 4.8 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / contrib / perf-utils / subsetmaker.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        marmoute
    
perf-helper: add a small extension with revsets to select repository subset...

              r47500
            
      """revset to select sample of repository

      Hopefully this is useful to create interesting discovery cases.

      """

      import collections

      import random

      from mercurial.i18n import _

      from mercurial import (

          registrar,

          revset,

          revsetlang,

          smartset,

      )

      revsetpredicate = registrar.revsetpredicate()

        marmoute
    
perf-util: add an helper revset to use the same spec as the case search script...

              r47506
            
      @revsetpredicate(b'subsetspec("<spec>")')

      def subsetmarkerspec(repo, subset, x):

          """use a shorthand spec as used by search-discovery-case

          Supported format are:

          - "scratch-count-seed": not scratch(all(), count, "seed")

          - "randomantichain-seed": ::randomantichain(all(), "seed")

          - "rev-REV": "::REV"

          """

          args = revsetlang.getargs(

              x, 0, 1, _(b'subsetspec("spec") required an argument')

          )

          spec = revsetlang.getstring(args[0], _(b"spec should be a string"))

          case = spec.split(b'-')

          t = case[0]

          if t == b'scratch':

              spec_revset = b'not scratch(all(), %s, "%s")' % (case[1], case[2])

          elif t == b'randomantichain':

              spec_revset = b'::randomantichain(all(), "%s")' % case[1]

          elif t == b'rev':

              spec_revset = b'::%d' % case[1]

          else:

              assert False, spec

          selected = repo.revs(spec_revset)

          return selected & subset

        marmoute
    
perf-helper: add a small extension with revsets to select repository subset...

              r47500
            
      @revsetpredicate(b'scratch(REVS, <count>, [seed])')

      def scratch(repo, subset, x):

          """randomly remove <count> revision from the repository top

          This subset is created by recursively picking changeset starting from the

          heads. It can be summarized using the following algorithm::

              selected = set()

              for i in range(<count>):

                  unselected = repo.revs("not <selected>")

                  candidates = repo.revs("heads(<unselected>)")

                  pick = random.choice(candidates)

                  selected.add(pick)

          """

          m = _(b"scratch expects revisions, count argument and an optional seed")

          args = revsetlang.getargs(x, 2, 3, m)

          if len(args) == 2:

              x, n = args

              rand = random

          elif len(args) == 3:

              x, n, seed = args

              seed = revsetlang.getinteger(seed, _(b"seed should be a number"))

              rand = random.Random(seed)

          else:

              assert False

          n = revsetlang.getinteger(n, _(b"scratch expects a number"))

          selected = set()

          heads = set()

          children_count = collections.defaultdict(lambda: 0)

          parents = repo.changelog._uncheckedparentrevs

          baseset = revset.getset(repo, smartset.fullreposet(repo), x)

          baseset.sort()

          for r in baseset:

              heads.add(r)

              p1, p2 = parents(r)

              if p1 >= 0:

                  heads.discard(p1)

                  children_count[p1] += 1

              if p2 >= 0:

                  heads.discard(p2)

                  children_count[p2] += 1

          for h in heads:

              assert children_count[h] == 0

          selected = set()

          for x in range(n):

              if not heads:

                  break

              pick = rand.choice(list(heads))

              heads.remove(pick)

              assert pick not in selected

              selected.add(pick)

              p1, p2 = parents(pick)

              if p1 in children_count:

                  assert p1 in children_count

                  children_count[p1] -= 1

                  assert children_count[p1] >= 0

                  if children_count[p1] == 0:

                      assert p1 not in selected, (r, p1)

                      heads.add(p1)

              if p2 in children_count:

                  assert p2 in children_count

                  children_count[p2] -= 1

                  assert children_count[p2] >= 0

                  if children_count[p2] == 0:

                      assert p2 not in selected, (r, p2)

                      heads.add(p2)

          return smartset.baseset(selected) & subset

        marmoute
    
perf-helper: add a new sampling revset based on anti-chain...

              r47501
            
      @revsetpredicate(b'randomantichain(REVS, [seed])')

      def antichain(repo, subset, x):

          """Pick a random anti-chain in the repository

          A antichain is a set of changeset where there isn't any element that is

          either a descendant or ancestors of any other element in the set. In other

          word, all the elements are independant. It can be summarized with the

          following algorithm::

          selected = set()

          unselected = repo.revs('all()')

          while unselected:

              pick = random.choice(unselected)

              selected.add(pick)

              unselected -= repo.revs('::<pick> + <pick>::')

          """

          args = revsetlang.getargs(

              x, 1, 2, _(b"randomantichain expects revisions and an optional seed")

          )

          if len(args) == 1:

              (x,) = args

              rand = random

          elif len(args) == 2:

              x, seed = args

              seed = revsetlang.getinteger(seed, _(b"seed should be a number"))

              rand = random.Random(seed)

          else:

              assert False

          selected = set()

          baseset = revset.getset(repo, smartset.fullreposet(repo), x)

          undecided = baseset

          while undecided:

              pick = rand.choice(list(undecided))

              selected.add(pick)

              undecided = repo.revs(

                  '%ld and not (::%ld or %ld::head())', baseset, selected, selected

              )

          return smartset.baseset(selected) & subset

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

marmoute perf-helper: add a small extension with revsets to select repository subset...	r47500	"""revset to select sample of repository

		Hopefully this is useful to create interesting discovery cases.
		"""

		import collections
		import random

		from mercurial.i18n import _

		from mercurial import (
		registrar,
		revset,
		revsetlang,
		smartset,
		)

		revsetpredicate = registrar.revsetpredicate()


marmoute perf-util: add an helper revset to use the same spec as the case search script...	r47506	@revsetpredicate(b'subsetspec("<spec>")')
		def subsetmarkerspec(repo, subset, x):
		"""use a shorthand spec as used by search-discovery-case

		Supported format are:

		- "scratch-count-seed": not scratch(all(), count, "seed")
		- "randomantichain-seed": ::randomantichain(all(), "seed")
		- "rev-REV": "::REV"
		"""
		args = revsetlang.getargs(
		x, 0, 1, _(b'subsetspec("spec") required an argument')
		)

		spec = revsetlang.getstring(args[0], _(b"spec should be a string"))
		case = spec.split(b'-')
		t = case[0]
		if t == b'scratch':
		spec_revset = b'not scratch(all(), %s, "%s")' % (case[1], case[2])
		elif t == b'randomantichain':
		spec_revset = b'::randomantichain(all(), "%s")' % case[1]
		elif t == b'rev':
		spec_revset = b'::%d' % case[1]
		else:
		assert False, spec

		selected = repo.revs(spec_revset)

		return selected & subset


marmoute perf-helper: add a small extension with revsets to select repository subset...	r47500	@revsetpredicate(b'scratch(REVS, <count>, [seed])')
		def scratch(repo, subset, x):
		"""randomly remove <count> revision from the repository top

		This subset is created by recursively picking changeset starting from the
		heads. It can be summarized using the following algorithm::

		selected = set()
		for i in range(<count>):
		unselected = repo.revs("not <selected>")
		candidates = repo.revs("heads(<unselected>)")
		pick = random.choice(candidates)
		selected.add(pick)
		"""
		m = _(b"scratch expects revisions, count argument and an optional seed")
		args = revsetlang.getargs(x, 2, 3, m)
		if len(args) == 2:
		x, n = args
		rand = random
		elif len(args) == 3:
		x, n, seed = args
		seed = revsetlang.getinteger(seed, _(b"seed should be a number"))
		rand = random.Random(seed)
		else:
		assert False

		n = revsetlang.getinteger(n, _(b"scratch expects a number"))

		selected = set()
		heads = set()
		children_count = collections.defaultdict(lambda: 0)
		parents = repo.changelog._uncheckedparentrevs

		baseset = revset.getset(repo, smartset.fullreposet(repo), x)
		baseset.sort()
		for r in baseset:
		heads.add(r)

		p1, p2 = parents(r)
		if p1 >= 0:
		heads.discard(p1)
		children_count[p1] += 1
		if p2 >= 0:
		heads.discard(p2)
		children_count[p2] += 1

		for h in heads:
		assert children_count[h] == 0

		selected = set()
		for x in range(n):
		if not heads:
		break
		pick = rand.choice(list(heads))
		heads.remove(pick)
		assert pick not in selected
		selected.add(pick)
		p1, p2 = parents(pick)
		if p1 in children_count:
		assert p1 in children_count
		children_count[p1] -= 1
		assert children_count[p1] >= 0
		if children_count[p1] == 0:
		assert p1 not in selected, (r, p1)
		heads.add(p1)
		if p2 in children_count:
		assert p2 in children_count
		children_count[p2] -= 1
		assert children_count[p2] >= 0
		if children_count[p2] == 0:
		assert p2 not in selected, (r, p2)
		heads.add(p2)

		return smartset.baseset(selected) & subset
marmoute perf-helper: add a new sampling revset based on anti-chain...	r47501

		@revsetpredicate(b'randomantichain(REVS, [seed])')
		def antichain(repo, subset, x):
		"""Pick a random anti-chain in the repository

		A antichain is a set of changeset where there isn't any element that is
		either a descendant or ancestors of any other element in the set. In other
		word, all the elements are independant. It can be summarized with the
		following algorithm::

		selected = set()
		unselected = repo.revs('all()')
		while unselected:
		pick = random.choice(unselected)
		selected.add(pick)
		unselected -= repo.revs('::<pick> + <pick>::')
		"""

		args = revsetlang.getargs(
		x, 1, 2, _(b"randomantichain expects revisions and an optional seed")
		)
		if len(args) == 1:
		(x,) = args
		rand = random
		elif len(args) == 2:
		x, seed = args
		seed = revsetlang.getinteger(seed, _(b"seed should be a number"))
		rand = random.Random(seed)
		else:
		assert False

		selected = set()

		baseset = revset.getset(repo, smartset.fullreposet(repo), x)
		undecided = baseset

		while undecided:
		pick = rand.choice(list(undecided))
		selected.add(pick)
		undecided = repo.revs(
		'%ld and not (::%ld or %ld::head())', baseset, selected, selected
		)

		return smartset.baseset(selected) & subset