##// END OF EJS Templates
integrations: restructure code and added better coverage.
integrations: restructure code and added better coverage.

File last commit:

r5088:8f6d1ed6 default
r5123:1d3bc909 default
Show More
test_crawler.py
189 lines | 5.0 KiB | text/x-python | PythonLexer
project: added all source files and assets
r1
copyrights: updated for 2023
r5088 # Copyright (C) 2010-2023 RhodeCode GmbH
project: added all source files and assets
r1 #
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License, version 3
# (only), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# This program is dual-licensed. If you wish to learn more about the
# RhodeCode Enterprise Edition, including its added features, Support services,
# and proprietary license terms, please see https://rhodecode.com/licenses/
"""
Test for crawling a project for memory usage
This should be runned just as regular script together
with a watch script that will show memory usage.
watch -n1 ./rhodecode/tests/mem_watch
"""
import cookielib
tests: fixed all tests for python3 BIG changes
r5087 import urllib.request
import urllib.parse
import urllib.error
import urllib.request
import urllib.error
import urllib.parse
project: added all source files and assets
r1 import time
import os
import sys
from os.path import join as jn
from os.path import dirname as dn
from sqlalchemy.util import OrderedSet
__here__ = os.path.abspath(__file__)
__root__ = dn(dn(dn(__here__)))
sys.path.append(__root__)
from rhodecode.lib import vcs
from rhodecode.lib.vcs.exceptions import RepositoryError
PASES = 3
HOST = 'http://127.0.0.1'
PORT = 5001
BASE_URI = '%s:%s/' % (HOST, PORT)
if len(sys.argv) == 2:
BASE_URI = sys.argv[1]
if not BASE_URI.endswith('/'):
BASE_URI += '/'
core: use py3 compatible prints
r3057 print('Crawling @ %s' % BASE_URI)
project: added all source files and assets
r1 BASE_URI += '%s'
PROJECT_PATH = jn('/', 'home', 'marcink', 'repos')
PROJECTS = [
#'linux-magx-pbranch',
'CPython',
'rhodecode_tip',
]
cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt')
python3: fix urllib usage
r4914 o = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
project: added all source files and assets
r1 o.addheaders = [
('User-agent', 'rhodecode-crawler'),
('Accept-Language', 'en - us, en;q = 0.5')
]
python3: fix urllib usage
r4914 urllib.request.install_opener(o)
project: added all source files and assets
r1
def _get_repo(proj):
py3: remove use of pyramid.compat
r4908 if isinstance(proj, str):
project: added all source files and assets
r1 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
proj = proj
else:
repo = proj
proj = repo.name
return repo, proj
def test_changelog_walk(proj, pages=100):
repo, proj = _get_repo(proj)
total_time = 0
for i in range(1, pages):
page = '/'.join((proj, 'changelog',))
python3: fix urllib usage
r4914 full_uri = (BASE_URI % page) + '?' + urllib.parse.urlencode({'page': i})
project: added all source files and assets
r1 s = time.time()
f = o.open(full_uri)
assert f.url == full_uri, 'URL:%s does not match %s' % (f.url, full_uri)
size = len(f.read())
e = time.time() - s
total_time += e
core: use py3 compatible prints
r3057 print('visited %s size:%s req:%s ms' % (full_uri, size, e))
project: added all source files and assets
r1
core: use py3 compatible prints
r3057 print('total_time {}'.format(total_time))
print('average on req {}'.format(total_time / float(pages)))
project: added all source files and assets
r1
def test_commit_walk(proj, limit=None):
repo, proj = _get_repo(proj)
core: use py3 compatible prints
r3057 print('processing', jn(PROJECT_PATH, proj))
project: added all source files and assets
r1 total_time = 0
cnt = 0
for i in repo:
cnt += 1
raw_cs = '/'.join((proj, 'changeset', i.raw_id))
if limit and limit == cnt:
break
full_uri = (BASE_URI % raw_cs)
core: use py3 compatible prints
r3057 print('%s visiting %s\%s' % (cnt, full_uri, i))
project: added all source files and assets
r1 s = time.time()
f = o.open(full_uri)
size = len(f.read())
e = time.time() - s
total_time += e
core: use py3 compatible prints
r3057 print('%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e))
project: added all source files and assets
r1
core: use py3 compatible prints
r3057 print('total_time {}'.format(total_time))
print('average on req {}'.format(total_time / float(cnt)))
project: added all source files and assets
r1
def test_files_walk(proj, limit=100):
repo, proj = _get_repo(proj)
core: use py3 compatible prints
r3057 print('processing {}'.format(jn(PROJECT_PATH, proj)))
project: added all source files and assets
r1 total_time = 0
paths_ = OrderedSet([''])
try:
tip = repo.get_commit('tip')
for topnode, dirs, files in tip.walk('/'):
for dir in dirs:
paths_.add(dir.path)
for f in dir:
paths_.add(f.path)
for f in files:
paths_.add(f.path)
except RepositoryError as e:
pass
cnt = 0
for f in paths_:
cnt += 1
if limit and limit == cnt:
break
file_path = '/'.join((proj, 'files', 'tip', f))
full_uri = (BASE_URI % file_path)
core: use py3 compatible prints
r3057 print('%s visiting %s' % (cnt, full_uri))
project: added all source files and assets
r1 s = time.time()
f = o.open(full_uri)
size = len(f.read())
e = time.time() - s
total_time += e
core: use py3 compatible prints
r3057 print('%s visited OK size:%s req:%s ms' % (cnt, size, e))
project: added all source files and assets
r1
core: use py3 compatible prints
r3057 print('total_time {}'.format(total_time))
print('average on req {}'.format(total_time / float(cnt)))
project: added all source files and assets
r1
if __name__ == '__main__':
for path in PROJECTS:
repo = vcs.get_repo(jn(PROJECT_PATH, path))
for i in range(PASES):
core: use py3 compatible prints
r3057 print('PASS %s/%s' % (i, PASES))
project: added all source files and assets
r1 test_changelog_walk(repo, pages=80)
test_commit_walk(repo, limit=100)
test_files_walk(repo, limit=100)