upstream/mercurial-mirror Files · hgext/git/index.py

chgserver: add merge-tools to sensitive config items...

chgserver: add merge-tools to sensitive config items Because this can change whether the ui is gui or not. This fixes test-extdiff.t with chg.

Martin von Zweigbergk - - Load All Authors

File last commit:

r44968:ec54b3d2 default


                r45106:bdc8a594

default

Download file

             index.py
        
                    350 lines
            
             | 11.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / hgext / git / index.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Augie Fackler
    
git: skeleton of a new extension to _directly_ operate on git repos...

              r44961
            
      from __future__ import absolute_import

      import collections

      import os

      import sqlite3

      from mercurial.i18n import _

      from mercurial import (

          encoding,

          error,

          node as nodemod,

          pycompat,

      )

      from . import gitutil

        Martin von Zweigbergk
    
git: don't fail import when pygit2 is not install...

              r44968
            
      pygit2 = gitutil.get_pygit2()

        Augie Fackler
    
git: skeleton of a new extension to _directly_ operate on git repos...

              r44961
            
      _CURRENT_SCHEMA_VERSION = 1

      _SCHEMA = (

          """

      CREATE TABLE refs (

        -- node and name are unique together. There may be more than one name for

        -- a given node, and there may be no name at all for a given node (in the

        -- case of an anonymous hg head).

        node TEXT NOT NULL,

        name TEXT

      );

      -- The "possible heads" of the repository, which we use to figure out

      -- if we need to re-walk the changelog.

      CREATE TABLE possible_heads (

        node TEXT NOT NULL

      );

      -- The topological heads of the changelog, which hg depends on.

      CREATE TABLE heads (

        node TEXT NOT NULL

      );

      -- A total ordering of the changelog

      CREATE TABLE changelog (

        rev INTEGER NOT NULL PRIMARY KEY,

        node TEXT NOT NULL,

        p1 TEXT,

        p2 TEXT

      );

      CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);

      CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);

      -- Changed files for each commit, which lets us dynamically build

      -- filelogs.

      CREATE TABLE changedfiles (

        node TEXT NOT NULL,

        filename TEXT NOT NULL,

        -- 40 zeroes for deletions

        filenode TEXT NOT NULL,

      -- to handle filelog parentage:

        p1node TEXT,

        p1filenode TEXT,

        p2node TEXT,

        p2filenode TEXT

      );

      CREATE INDEX changedfiles_nodes_idx

        ON changedfiles(node);

      PRAGMA user_version=%d

      """

          % _CURRENT_SCHEMA_VERSION

      )

      def _createdb(path):

          # print('open db', path)

          # import traceback

          # traceback.print_stack()

          db = sqlite3.connect(encoding.strfromlocal(path))

          db.text_factory = bytes

          res = db.execute('PRAGMA user_version').fetchone()[0]

          # New database.

          if res == 0:

              for statement in _SCHEMA.split(';'):

                  db.execute(statement.strip())

              db.commit()

          elif res == _CURRENT_SCHEMA_VERSION:

              pass

          else:

              raise error.Abort(_(b'sqlite database has unrecognized version'))

          db.execute('PRAGMA journal_mode=WAL')

          return db

        Martin von Zweigbergk
    
git: don't fail import when pygit2 is not install...

              r44968
            
      _OUR_ORDER = ()

      if pygit2:

          _OUR_ORDER = (

              pygit2.GIT_SORT_TOPOLOGICAL

              | pygit2.GIT_SORT_TIME

              | pygit2.GIT_SORT_REVERSE

          )

        Augie Fackler
    
git: skeleton of a new extension to _directly_ operate on git repos...

              r44961
            
      _DIFF_FLAGS = 1 << 21  # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2

      def _find_nearest_ancestor_introducing_node(

          db, gitrepo, file_path, walk_start, filenode

      ):

          """Find the nearest ancestor that introduces a file node.

          Args:

            db: a handle to our sqlite database.

            gitrepo: A pygit2.Repository instance.

            file_path: the path of a file in the repo

            walk_start: a pygit2.Oid that is a commit where we should start walking

                        for our nearest ancestor.

          Returns:

            A hexlified SHA that is the commit ID of the next-nearest parent.

          """

          assert isinstance(file_path, str), 'file_path must be str, got %r' % type(

              file_path

          )

          assert isinstance(filenode, str), 'filenode must be str, got %r' % type(

              filenode

          )

          parent_options = {

              row[0].decode('ascii')

              for row in db.execute(

                  'SELECT node FROM changedfiles '

                  'WHERE filename = ? AND filenode = ?',

                  (file_path, filenode),

              )

          }

          inner_walker = gitrepo.walk(walk_start, _OUR_ORDER)

          for w in inner_walker:

              if w.id.hex in parent_options:

                  return w.id.hex

          raise error.ProgrammingError(

              'Unable to find introducing commit for %s node %s from %s',

              (file_path, filenode, walk_start),

          )

      def fill_in_filelog(gitrepo, db, startcommit, path, startfilenode):

          """Given a starting commit and path, fill in a filelog's parent pointers.

          Args:

            gitrepo: a pygit2.Repository

            db: a handle to our sqlite database

            startcommit: a hexlified node id for the commit to start at

            path: the path of the file whose parent pointers we should fill in.

            filenode: the hexlified node id of the file at startcommit

          TODO: make filenode optional

          """

          assert isinstance(

              startcommit, str

          ), 'startcommit must be str, got %r' % type(startcommit)

          assert isinstance(

              startfilenode, str

          ), 'startfilenode must be str, got %r' % type(startfilenode)

          visit = collections.deque([(startcommit, startfilenode)])

          while visit:

              cnode, filenode = visit.popleft()

              commit = gitrepo[cnode]

              parents = []

              for parent in commit.parents:

                  t = parent.tree

                  for comp in path.split('/'):

                      try:

                          t = gitrepo[t[comp].id]

                      except KeyError:

                          break

                  else:

                      introducer = _find_nearest_ancestor_introducing_node(

                          db, gitrepo, path, parent.id, t.id.hex

                      )

                      parents.append((introducer, t.id.hex))

              p1node = p1fnode = p2node = p2fnode = gitutil.nullgit

              for par, parfnode in parents:

                  found = int(

                      db.execute(

                          'SELECT COUNT(*) FROM changedfiles WHERE '

                          'node = ? AND filename = ? AND filenode = ? AND '

                          'p1node NOT NULL',

                          (par, path, parfnode),

                      ).fetchone()[0]

                  )

                  if found == 0:

                      assert par is not None

                      visit.append((par, parfnode))

              if parents:

                  p1node, p1fnode = parents[0]

              if len(parents) == 2:

                  p2node, p2fnode = parents[1]

              if len(parents) > 2:

                  raise error.ProgrammingError(

                      b"git support can't handle octopus merges"

                  )

              db.execute(

                  'UPDATE changedfiles SET '

                  'p1node = ?, p1filenode = ?, p2node = ?, p2filenode = ? '

                  'WHERE node = ? AND filename = ? AND filenode = ?',

                  (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode),

              )

          db.commit()

      def _index_repo(gitrepo, db, progress_factory=lambda *args, **kwargs: None):

          # Identify all references so we can tell the walker to visit all of them.

          all_refs = gitrepo.listall_references()

          possible_heads = set()

          prog = progress_factory(b'refs')

          for pos, ref in enumerate(all_refs):

              if prog is not None:

                  prog.update(pos)

              if not (

                  ref.startswith('refs/heads/')  # local branch

                  or ref.startswith('refs/tags/')  # tag

                  or ref.startswith('refs/remotes/')  # remote branch

                  or ref.startswith('refs/hg/')  # from this extension

              ):

                  continue

              try:

                  start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)

              except ValueError:

                  # No commit to be found, so we don't care for hg's purposes.

                  continue

              possible_heads.add(start.id)

          # Optimization: if the list of heads hasn't changed, don't

          # reindex, the changelog. This doesn't matter on small

          # repositories, but on even moderately deep histories (eg cpython)

          # this is a very important performance win.

          #

          # TODO: we should figure out how to incrementally index history

          # (preferably by detecting rewinds!) so that we don't have to do a

          # full changelog walk every time a new commit is created.

          cache_heads = {x[0] for x in db.execute('SELECT node FROM possible_heads')}

          walker = None

          cur_cache_heads = {h.hex for h in possible_heads}

          if cur_cache_heads == cache_heads:

              return

          for start in possible_heads:

              if walker is None:

                  walker = gitrepo.walk(start, _OUR_ORDER)

              else:

                  walker.push(start)

          # Empty out the existing changelog. Even for large-ish histories

          # we can do the top-level "walk all the commits" dance very

          # quickly as long as we don't need to figure out the changed files

          # list.

          db.execute('DELETE FROM changelog')

          if prog is not None:

              prog.complete()

          prog = progress_factory(b'commits')

          # This walker is sure to visit all the revisions in history, but

          # only once.

          for pos, commit in enumerate(walker):

              if prog is not None:

                  prog.update(pos)

              p1 = p2 = nodemod.nullhex

              if len(commit.parents) > 2:

                  raise error.ProgrammingError(

                      (

                          b"git support can't handle octopus merges, "

                          b"found a commit with %d parents :("

                      )

                      % len(commit.parents)

                  )

              if commit.parents:

                  p1 = commit.parents[0].id.hex

              if len(commit.parents) == 2:

                  p2 = commit.parents[1].id.hex

              db.execute(

                  'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)',

                  (pos, commit.id.hex, p1, p2),

              )

              num_changedfiles = db.execute(

                  "SELECT COUNT(*) from changedfiles WHERE node = ?",

                  (commit.id.hex,),

              ).fetchone()[0]

              if not num_changedfiles:

                  files = {}

                  # I *think* we only need to check p1 for changed files

                  # (and therefore linkrevs), because any node that would

                  # actually have this commit as a linkrev would be

                  # completely new in this rev.

                  p1 = commit.parents[0].id.hex if commit.parents else None

                  if p1 is not None:

                      patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)

                  else:

                      patchgen = commit.tree.diff_to_tree(

                          swap=True, flags=_DIFF_FLAGS

                      )

                  new_files = (p.delta.new_file for p in patchgen)

                  files = {

                      nf.path: nf.id.hex

                      for nf in new_files

                      if nf.id.raw != nodemod.nullid

                  }

                  for p, n in files.items():

                      # We intentionally set NULLs for any file parentage

                      # information so it'll get demand-computed later. We

                      # used to do it right here, and it was _very_ slow.

                      db.execute(

                          'INSERT INTO changedfiles ('

                          'node, filename, filenode, p1node, p1filenode, p2node, '

                          'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',

                          (commit.id.hex, p, n, None, None, None, None),

                      )

          db.execute('DELETE FROM heads')

          db.execute('DELETE FROM possible_heads')

          for hid in possible_heads:

              h = hid.hex

              db.execute('INSERT INTO possible_heads (node) VALUES(?)', (h,))

              haschild = db.execute(

                  'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?', (h, h)

              ).fetchone()[0]

              if not haschild:

                  db.execute('INSERT INTO heads (node) VALUES(?)', (h,))

          db.commit()

          if prog is not None:

              prog.complete()

      def get_index(gitrepo, progress_factory=lambda *args, **kwargs: None):

          cachepath = os.path.join(

              pycompat.fsencode(gitrepo.path), b'..', b'.hg', b'cache'

          )

          if not os.path.exists(cachepath):

              os.makedirs(cachepath)

          dbpath = os.path.join(cachepath, b'git-commits.sqlite')

          db = _createdb(dbpath)

          # TODO check against gitrepo heads before doing a full index

          # TODO thread a ui.progress call into this layer

          _index_repo(gitrepo, db, progress_factory)

          return db

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Augie Fackler git: skeleton of a new extension to _directly_ operate on git repos...	r44961	from __future__ import absolute_import

		import collections
		import os
		import sqlite3

		from mercurial.i18n import _

		from mercurial import (
		encoding,
		error,
		node as nodemod,
		pycompat,
		)

		from . import gitutil


Martin von Zweigbergk git: don't fail import when pygit2 is not install...	r44968	pygit2 = gitutil.get_pygit2()

Augie Fackler git: skeleton of a new extension to _directly_ operate on git repos...	r44961	_CURRENT_SCHEMA_VERSION = 1
		_SCHEMA = (
		"""
		CREATE TABLE refs (
		-- node and name are unique together. There may be more than one name for
		-- a given node, and there may be no name at all for a given node (in the
		-- case of an anonymous hg head).
		node TEXT NOT NULL,
		name TEXT
		);

		-- The "possible heads" of the repository, which we use to figure out
		-- if we need to re-walk the changelog.
		CREATE TABLE possible_heads (
		node TEXT NOT NULL
		);

		-- The topological heads of the changelog, which hg depends on.
		CREATE TABLE heads (
		node TEXT NOT NULL
		);

		-- A total ordering of the changelog
		CREATE TABLE changelog (
		rev INTEGER NOT NULL PRIMARY KEY,
		node TEXT NOT NULL,
		p1 TEXT,
		p2 TEXT
		);

		CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
		CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);

		-- Changed files for each commit, which lets us dynamically build
		-- filelogs.
		CREATE TABLE changedfiles (
		node TEXT NOT NULL,
		filename TEXT NOT NULL,
		-- 40 zeroes for deletions
		filenode TEXT NOT NULL,
		-- to handle filelog parentage:
		p1node TEXT,
		p1filenode TEXT,
		p2node TEXT,
		p2filenode TEXT
		);

		CREATE INDEX changedfiles_nodes_idx
		ON changedfiles(node);

		PRAGMA user_version=%d
		"""
		% _CURRENT_SCHEMA_VERSION
		)


		def _createdb(path):
		# print('open db', path)
		# import traceback
		# traceback.print_stack()
		db = sqlite3.connect(encoding.strfromlocal(path))
		db.text_factory = bytes

		res = db.execute('PRAGMA user_version').fetchone()[0]

		# New database.
		if res == 0:
		for statement in _SCHEMA.split(';'):
		db.execute(statement.strip())

		db.commit()

		elif res == _CURRENT_SCHEMA_VERSION:
		pass

		else:
		raise error.Abort(_(b'sqlite database has unrecognized version'))

		db.execute('PRAGMA journal_mode=WAL')

		return db


Martin von Zweigbergk git: don't fail import when pygit2 is not install...	r44968	_OUR_ORDER = ()
		if pygit2:
		_OUR_ORDER = (
		pygit2.GIT_SORT_TOPOLOGICAL
		\| pygit2.GIT_SORT_TIME
		\| pygit2.GIT_SORT_REVERSE
		)
Augie Fackler git: skeleton of a new extension to _directly_ operate on git repos...	r44961
		_DIFF_FLAGS = 1 << 21 # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2


		def _find_nearest_ancestor_introducing_node(
		db, gitrepo, file_path, walk_start, filenode
		):
		"""Find the nearest ancestor that introduces a file node.

		Args:
		db: a handle to our sqlite database.
		gitrepo: A pygit2.Repository instance.
		file_path: the path of a file in the repo
		walk_start: a pygit2.Oid that is a commit where we should start walking
		for our nearest ancestor.

		Returns:
		A hexlified SHA that is the commit ID of the next-nearest parent.
		"""
		assert isinstance(file_path, str), 'file_path must be str, got %r' % type(
		file_path
		)
		assert isinstance(filenode, str), 'filenode must be str, got %r' % type(
		filenode
		)
		parent_options = {
		row[0].decode('ascii')
		for row in db.execute(
		'SELECT node FROM changedfiles '
		'WHERE filename = ? AND filenode = ?',
		(file_path, filenode),
		)
		}
		inner_walker = gitrepo.walk(walk_start, _OUR_ORDER)
		for w in inner_walker:
		if w.id.hex in parent_options:
		return w.id.hex
		raise error.ProgrammingError(
		'Unable to find introducing commit for %s node %s from %s',
		(file_path, filenode, walk_start),
		)


		def fill_in_filelog(gitrepo, db, startcommit, path, startfilenode):
		"""Given a starting commit and path, fill in a filelog's parent pointers.

		Args:
		gitrepo: a pygit2.Repository
		db: a handle to our sqlite database
		startcommit: a hexlified node id for the commit to start at
		path: the path of the file whose parent pointers we should fill in.
		filenode: the hexlified node id of the file at startcommit

		TODO: make filenode optional
		"""
		assert isinstance(
		startcommit, str
		), 'startcommit must be str, got %r' % type(startcommit)
		assert isinstance(
		startfilenode, str
		), 'startfilenode must be str, got %r' % type(startfilenode)
		visit = collections.deque([(startcommit, startfilenode)])
		while visit:
		cnode, filenode = visit.popleft()
		commit = gitrepo[cnode]
		parents = []
		for parent in commit.parents:
		t = parent.tree
		for comp in path.split('/'):
		try:
		t = gitrepo[t[comp].id]
		except KeyError:
		break
		else:
		introducer = _find_nearest_ancestor_introducing_node(
		db, gitrepo, path, parent.id, t.id.hex
		)
		parents.append((introducer, t.id.hex))
		p1node = p1fnode = p2node = p2fnode = gitutil.nullgit
		for par, parfnode in parents:
		found = int(
		db.execute(
		'SELECT COUNT(*) FROM changedfiles WHERE '
		'node = ? AND filename = ? AND filenode = ? AND '
		'p1node NOT NULL',
		(par, path, parfnode),
		).fetchone()[0]
		)
		if found == 0:
		assert par is not None
		visit.append((par, parfnode))
		if parents:
		p1node, p1fnode = parents[0]
		if len(parents) == 2:
		p2node, p2fnode = parents[1]
		if len(parents) > 2:
		raise error.ProgrammingError(
		b"git support can't handle octopus merges"
		)
		db.execute(
		'UPDATE changedfiles SET '
		'p1node = ?, p1filenode = ?, p2node = ?, p2filenode = ? '
		'WHERE node = ? AND filename = ? AND filenode = ?',
		(p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode),
		)
		db.commit()


		def _index_repo(gitrepo, db, progress_factory=lambda args, *kwargs: None):
		# Identify all references so we can tell the walker to visit all of them.
		all_refs = gitrepo.listall_references()
		possible_heads = set()
		prog = progress_factory(b'refs')
		for pos, ref in enumerate(all_refs):
		if prog is not None:
		prog.update(pos)
		if not (
		ref.startswith('refs/heads/') # local branch
		or ref.startswith('refs/tags/') # tag
		or ref.startswith('refs/remotes/') # remote branch
		or ref.startswith('refs/hg/') # from this extension
		):
		continue
		try:
		start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)
		except ValueError:
		# No commit to be found, so we don't care for hg's purposes.
		continue
		possible_heads.add(start.id)
		# Optimization: if the list of heads hasn't changed, don't
		# reindex, the changelog. This doesn't matter on small
		# repositories, but on even moderately deep histories (eg cpython)
		# this is a very important performance win.
		#
		# TODO: we should figure out how to incrementally index history
		# (preferably by detecting rewinds!) so that we don't have to do a
		# full changelog walk every time a new commit is created.
		cache_heads = {x[0] for x in db.execute('SELECT node FROM possible_heads')}
		walker = None
		cur_cache_heads = {h.hex for h in possible_heads}
		if cur_cache_heads == cache_heads:
		return
		for start in possible_heads:
		if walker is None:
		walker = gitrepo.walk(start, _OUR_ORDER)
		else:
		walker.push(start)

		# Empty out the existing changelog. Even for large-ish histories
		# we can do the top-level "walk all the commits" dance very
		# quickly as long as we don't need to figure out the changed files
		# list.
		db.execute('DELETE FROM changelog')
		if prog is not None:
		prog.complete()
		prog = progress_factory(b'commits')
		# This walker is sure to visit all the revisions in history, but
		# only once.
		for pos, commit in enumerate(walker):
		if prog is not None:
		prog.update(pos)
		p1 = p2 = nodemod.nullhex
		if len(commit.parents) > 2:
		raise error.ProgrammingError(
		(
		b"git support can't handle octopus merges, "
		b"found a commit with %d parents :("
		)
		% len(commit.parents)
		)
		if commit.parents:
		p1 = commit.parents[0].id.hex
		if len(commit.parents) == 2:
		p2 = commit.parents[1].id.hex
		db.execute(
		'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)',
		(pos, commit.id.hex, p1, p2),
		)

		num_changedfiles = db.execute(
		"SELECT COUNT(*) from changedfiles WHERE node = ?",
		(commit.id.hex,),
		).fetchone()[0]
		if not num_changedfiles:
		files = {}
		# I think we only need to check p1 for changed files
		# (and therefore linkrevs), because any node that would
		# actually have this commit as a linkrev would be
		# completely new in this rev.
		p1 = commit.parents[0].id.hex if commit.parents else None
		if p1 is not None:
		patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)
		else:
		patchgen = commit.tree.diff_to_tree(
		swap=True, flags=_DIFF_FLAGS
		)
		new_files = (p.delta.new_file for p in patchgen)
		files = {
		nf.path: nf.id.hex
		for nf in new_files
		if nf.id.raw != nodemod.nullid
		}
		for p, n in files.items():
		# We intentionally set NULLs for any file parentage
		# information so it'll get demand-computed later. We
		# used to do it right here, and it was _very_ slow.
		db.execute(
		'INSERT INTO changedfiles ('
		'node, filename, filenode, p1node, p1filenode, p2node, '
		'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',
		(commit.id.hex, p, n, None, None, None, None),
		)
		db.execute('DELETE FROM heads')
		db.execute('DELETE FROM possible_heads')
		for hid in possible_heads:
		h = hid.hex
		db.execute('INSERT INTO possible_heads (node) VALUES(?)', (h,))
		haschild = db.execute(
		'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?', (h, h)
		).fetchone()[0]
		if not haschild:
		db.execute('INSERT INTO heads (node) VALUES(?)', (h,))

		db.commit()
		if prog is not None:
		prog.complete()


		def get_index(gitrepo, progress_factory=lambda args, *kwargs: None):
		cachepath = os.path.join(
		pycompat.fsencode(gitrepo.path), b'..', b'.hg', b'cache'
		)
		if not os.path.exists(cachepath):
		os.makedirs(cachepath)
		dbpath = os.path.join(cachepath, b'git-commits.sqlite')
		db = _createdb(dbpath)
		# TODO check against gitrepo heads before doing a full index
		# TODO thread a ui.progress call into this layer
		_index_repo(gitrepo, db, progress_factory)
		return db