rhodecode-enterprise-ce Files · rhodecode/tests/scripts/test_crawler.py

core: avoid using rhodecode.test packages inside main packages as tests are removed during build which can cause some problems in some edge case calls

super-admin - - Load All Authors

File last commit:

r5608:6d33e504 default


                r5618:bdbdb63f

default

Download file

             test_crawler.py
        
                    188 lines
            
             | 5.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / rhodecode / tests / scripts / test_crawler.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        super-admin
    
core: updated copyright to 2024

              r5608
            
      # Copyright (C) 2010-2024 RhodeCode GmbH

        marcink
    
project: added all source files and assets

              r1
            
      #

      # This program is free software: you can redistribute it and/or modify

      # it under the terms of the GNU Affero General Public License, version 3

      # (only), as published by the Free Software Foundation.

      #

      # This program is distributed in the hope that it will be useful,

      # but WITHOUT ANY WARRANTY; without even the implied warranty of

      # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

      # GNU General Public License for more details.

      #

      # You should have received a copy of the GNU Affero General Public License

      # along with this program.  If not, see <http://www.gnu.org/licenses/>.

      #

      # This program is dual-licensed. If you wish to learn more about the

      # RhodeCode Enterprise Edition, including its added features, Support services,

      # and proprietary license terms, please see https://rhodecode.com/licenses/

      """

      Test for crawling a project for memory usage

      This should be runned just as regular script together

      with a watch script that will show memory usage.

      watch -n1 ./rhodecode/tests/mem_watch

      """

      import cookielib

        super-admin
    
tests: fixed all tests for python3  BIG changes

              r5087
            
      import urllib.request

      import urllib.parse

      import urllib.error

      import urllib.request

      import urllib.error

      import urllib.parse

        marcink
    
project: added all source files and assets

              r1
            
      import time

      import os

      import sys

      from os.path import join as jn

      from os.path import dirname as dn

      from sqlalchemy.util import OrderedSet

      __here__ = os.path.abspath(__file__)

      __root__ = dn(dn(dn(__here__)))

      sys.path.append(__root__)

      from rhodecode.lib import vcs

      from rhodecode.lib.vcs.exceptions import RepositoryError

      PASES = 3

      HOST = 'http://127.0.0.1'

      PORT = 5001

      BASE_URI = '%s:%s/' % (HOST, PORT)

      if len(sys.argv) == 2:

          BASE_URI = sys.argv[1]

      if not BASE_URI.endswith('/'):

          BASE_URI += '/'

        marcink
    
core: use py3 compatible prints

              r3057
            
      print('Crawling @ %s' % BASE_URI)

        marcink
    
project: added all source files and assets

              r1
            
      BASE_URI += '%s'

      PROJECT_PATH = jn('/', 'home', 'marcink', 'repos')

      PROJECTS = [

          #'linux-magx-pbranch',

          'CPython',

          'rhodecode_tip',

      ]

      cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt')

        super-admin
    
python3: fix urllib usage

              r4914
            
      o = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))

        marcink
    
project: added all source files and assets

              r1
            
      o.addheaders = [

          ('User-agent', 'rhodecode-crawler'),

          ('Accept-Language', 'en - us, en;q = 0.5')

      ]

        super-admin
    
python3: fix urllib usage

              r4914
            
      urllib.request.install_opener(o)

        marcink
    
project: added all source files and assets

              r1
            
      def _get_repo(proj):

        super-admin
    
py3: remove use of pyramid.compat

              r4908
            
          if isinstance(proj, str):

        marcink
    
project: added all source files and assets

              r1
            
              repo = vcs.get_repo(jn(PROJECT_PATH, proj))

              proj = proj

          else:

              repo = proj

              proj = repo.name

          return repo, proj

      def test_changelog_walk(proj, pages=100):

          repo, proj = _get_repo(proj)

          total_time = 0

          for i in range(1, pages):

              page = '/'.join((proj, 'changelog',))

        super-admin
    
python3: fix urllib usage

              r4914
            
              full_uri = (BASE_URI % page) + '?' + urllib.parse.urlencode({'page': i})

        marcink
    
project: added all source files and assets

              r1
            
              s = time.time()

              f = o.open(full_uri)

              assert f.url == full_uri, 'URL:%s does not match %s' % (f.url, full_uri)

              size = len(f.read())

              e = time.time() - s

              total_time += e

        marcink
    
core: use py3 compatible prints

              r3057
            
              print('visited %s size:%s req:%s ms' % (full_uri, size, e))

        marcink
    
project: added all source files and assets

              r1
            
        marcink
    
core: use py3 compatible prints

              r3057
            
          print('total_time {}'.format(total_time))

          print('average on req {}'.format(total_time / float(pages)))

        marcink
    
project: added all source files and assets

              r1
            
      def test_commit_walk(proj, limit=None):

          repo, proj = _get_repo(proj)

        marcink
    
core: use py3 compatible prints

              r3057
            
          print('processing', jn(PROJECT_PATH, proj))

        marcink
    
project: added all source files and assets

              r1
            
          total_time = 0

          cnt = 0

          for i in repo:

              cnt += 1

              raw_cs = '/'.join((proj, 'changeset', i.raw_id))

              if limit and limit == cnt:

                  break

              full_uri = (BASE_URI % raw_cs)

        marcink
    
core: use py3 compatible prints

              r3057
            
              print('%s visiting %s\%s' % (cnt, full_uri, i))

        marcink
    
project: added all source files and assets

              r1
            
              s = time.time()

              f = o.open(full_uri)

              size = len(f.read())

              e = time.time() - s

              total_time += e

        marcink
    
core: use py3 compatible prints

              r3057
            
              print('%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e))

        marcink
    
project: added all source files and assets

              r1
            
        marcink
    
core: use py3 compatible prints

              r3057
            
          print('total_time {}'.format(total_time))

          print('average on req {}'.format(total_time / float(cnt)))

        marcink
    
project: added all source files and assets

              r1
            
      def test_files_walk(proj, limit=100):

          repo, proj = _get_repo(proj)

        marcink
    
core: use py3 compatible prints

              r3057
            
          print('processing {}'.format(jn(PROJECT_PATH, proj)))

        marcink
    
project: added all source files and assets

              r1
            
          total_time = 0

          paths_ = OrderedSet([''])

          try:

              tip = repo.get_commit('tip')

              for topnode, dirs, files in tip.walk('/'):

                  for dir in dirs:

                      paths_.add(dir.path)

                      for f in dir:

                          paths_.add(f.path)

                  for f in files:

                      paths_.add(f.path)

          except RepositoryError as e:

              pass

          cnt = 0

          for f in paths_:

              cnt += 1

              if limit and limit == cnt:

                  break

              file_path = '/'.join((proj, 'files', 'tip', f))

              full_uri = (BASE_URI % file_path)

        marcink
    
core: use py3 compatible prints

              r3057
            
              print('%s visiting %s' % (cnt, full_uri))

        marcink
    
project: added all source files and assets

              r1
            
              s = time.time()

              f = o.open(full_uri)

              size = len(f.read())

              e = time.time() - s

              total_time += e

        marcink
    
core: use py3 compatible prints

              r3057
            
              print('%s visited OK size:%s req:%s ms' % (cnt, size, e))

        marcink
    
project: added all source files and assets

              r1
            
        marcink
    
core: use py3 compatible prints

              r3057
            
          print('total_time {}'.format(total_time))

          print('average on req {}'.format(total_time / float(cnt)))

        marcink
    
project: added all source files and assets

              r1
            
      if __name__ == '__main__':

          for path in PROJECTS:

              repo = vcs.get_repo(jn(PROJECT_PATH, path))

              for i in range(PASES):

        marcink
    
core: use py3 compatible prints

              r3057
            
                  print('PASS %s/%s' % (i, PASES))

        marcink
    
project: added all source files and assets

              r1
            
                  test_changelog_walk(repo, pages=80)

                  test_commit_walk(repo, limit=100)

                  test_files_walk(repo, limit=100)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

super-admin core: updated copyright to 2024	r5608	# Copyright (C) 2010-2024 RhodeCode GmbH
marcink project: added all source files and assets	r1	#
		# This program is free software: you can redistribute it and/or modify
		# it under the terms of the GNU Affero General Public License, version 3
		# (only), as published by the Free Software Foundation.
		#
		# This program is distributed in the hope that it will be useful,
		# but WITHOUT ANY WARRANTY; without even the implied warranty of
		# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		# GNU General Public License for more details.
		#
		# You should have received a copy of the GNU Affero General Public License
		# along with this program. If not, see <http://www.gnu.org/licenses/>.
		#
		# This program is dual-licensed. If you wish to learn more about the
		# RhodeCode Enterprise Edition, including its added features, Support services,
		# and proprietary license terms, please see https://rhodecode.com/licenses/

		"""
		Test for crawling a project for memory usage
		This should be runned just as regular script together
		with a watch script that will show memory usage.

		watch -n1 ./rhodecode/tests/mem_watch
		"""


		import cookielib
super-admin tests: fixed all tests for python3 BIG changes	r5087	import urllib.request
		import urllib.parse
		import urllib.error
		import urllib.request
		import urllib.error
		import urllib.parse
marcink project: added all source files and assets	r1	import time
		import os
		import sys
		from os.path import join as jn
		from os.path import dirname as dn
		from sqlalchemy.util import OrderedSet

		__here__ = os.path.abspath(__file__)
		__root__ = dn(dn(dn(__here__)))
		sys.path.append(__root__)

		from rhodecode.lib import vcs
		from rhodecode.lib.vcs.exceptions import RepositoryError

		PASES = 3
		HOST = 'http://127.0.0.1'
		PORT = 5001
		BASE_URI = '%s:%s/' % (HOST, PORT)

		if len(sys.argv) == 2:
		BASE_URI = sys.argv[1]

		if not BASE_URI.endswith('/'):
		BASE_URI += '/'

marcink core: use py3 compatible prints	r3057	print('Crawling @ %s' % BASE_URI)
marcink project: added all source files and assets	r1	BASE_URI += '%s'
		PROJECT_PATH = jn('/', 'home', 'marcink', 'repos')
		PROJECTS = [
		#'linux-magx-pbranch',
		'CPython',
		'rhodecode_tip',
		]


		cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt')
super-admin python3: fix urllib usage	r4914	o = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
marcink project: added all source files and assets	r1	o.addheaders = [
		('User-agent', 'rhodecode-crawler'),
		('Accept-Language', 'en - us, en;q = 0.5')
		]

super-admin python3: fix urllib usage	r4914	urllib.request.install_opener(o)
marcink project: added all source files and assets	r1

		def _get_repo(proj):
super-admin py3: remove use of pyramid.compat	r4908	if isinstance(proj, str):
marcink project: added all source files and assets	r1	repo = vcs.get_repo(jn(PROJECT_PATH, proj))
		proj = proj
		else:
		repo = proj
		proj = repo.name

		return repo, proj


		def test_changelog_walk(proj, pages=100):
		repo, proj = _get_repo(proj)

		total_time = 0
		for i in range(1, pages):

		page = '/'.join((proj, 'changelog',))

super-admin python3: fix urllib usage	r4914	full_uri = (BASE_URI % page) + '?' + urllib.parse.urlencode({'page': i})
marcink project: added all source files and assets	r1	s = time.time()
		f = o.open(full_uri)

		assert f.url == full_uri, 'URL:%s does not match %s' % (f.url, full_uri)

		size = len(f.read())
		e = time.time() - s
		total_time += e
marcink core: use py3 compatible prints	r3057	print('visited %s size:%s req:%s ms' % (full_uri, size, e))
marcink project: added all source files and assets	r1
marcink core: use py3 compatible prints	r3057	print('total_time {}'.format(total_time))
		print('average on req {}'.format(total_time / float(pages)))
marcink project: added all source files and assets	r1

		def test_commit_walk(proj, limit=None):
		repo, proj = _get_repo(proj)

marcink core: use py3 compatible prints	r3057	print('processing', jn(PROJECT_PATH, proj))
marcink project: added all source files and assets	r1	total_time = 0

		cnt = 0
		for i in repo:
		cnt += 1
		raw_cs = '/'.join((proj, 'changeset', i.raw_id))
		if limit and limit == cnt:
		break

		full_uri = (BASE_URI % raw_cs)
marcink core: use py3 compatible prints	r3057	print('%s visiting %s\%s' % (cnt, full_uri, i))
marcink project: added all source files and assets	r1	s = time.time()
		f = o.open(full_uri)
		size = len(f.read())
		e = time.time() - s
		total_time += e
marcink core: use py3 compatible prints	r3057	print('%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e))
marcink project: added all source files and assets	r1
marcink core: use py3 compatible prints	r3057	print('total_time {}'.format(total_time))
		print('average on req {}'.format(total_time / float(cnt)))
marcink project: added all source files and assets	r1

		def test_files_walk(proj, limit=100):
		repo, proj = _get_repo(proj)

marcink core: use py3 compatible prints	r3057	print('processing {}'.format(jn(PROJECT_PATH, proj)))
marcink project: added all source files and assets	r1	total_time = 0

		paths_ = OrderedSet([''])
		try:
		tip = repo.get_commit('tip')
		for topnode, dirs, files in tip.walk('/'):

		for dir in dirs:
		paths_.add(dir.path)
		for f in dir:
		paths_.add(f.path)

		for f in files:
		paths_.add(f.path)

		except RepositoryError as e:
		pass

		cnt = 0
		for f in paths_:
		cnt += 1
		if limit and limit == cnt:
		break

		file_path = '/'.join((proj, 'files', 'tip', f))
		full_uri = (BASE_URI % file_path)
marcink core: use py3 compatible prints	r3057	print('%s visiting %s' % (cnt, full_uri))
marcink project: added all source files and assets	r1	s = time.time()
		f = o.open(full_uri)
		size = len(f.read())
		e = time.time() - s
		total_time += e
marcink core: use py3 compatible prints	r3057	print('%s visited OK size:%s req:%s ms' % (cnt, size, e))
marcink project: added all source files and assets	r1
marcink core: use py3 compatible prints	r3057	print('total_time {}'.format(total_time))
		print('average on req {}'.format(total_time / float(cnt)))
marcink project: added all source files and assets	r1
		if __name__ == '__main__':
		for path in PROJECTS:
		repo = vcs.get_repo(jn(PROJECT_PATH, path))
		for i in range(PASES):
marcink core: use py3 compatible prints	r3057	print('PASS %s/%s' % (i, PASES))
marcink project: added all source files and assets	r1	test_changelog_walk(repo, pages=80)
		test_commit_walk(repo, limit=100)
		test_files_walk(repo, limit=100)