rhodecode_crawler.py
184 lines
| 4.8 KiB
| text/x-python
|
PythonLexer
r1374 | # -*- coding: utf-8 -*- | |||
""" | ||||
rhodecode.tests.test_crawer | ||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
Test for crawling a project for memory usage | ||||
This should be runned just as regular script together | ||||
with a watch script that will show memory usage. | ||||
r1818 | ||||
r1374 | watch -n1 ./rhodecode/tests/mem_watch | |||
:created_on: Apr 21, 2010 | ||||
:author: marcink | ||||
r1824 | :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com> | |||
r1374 | :license: GPLv3, see COPYING for more details. | |||
""" | ||||
# This program is free software: you can redistribute it and/or modify | ||||
# it under the terms of the GNU General Public License as published by | ||||
# the Free Software Foundation, either version 3 of the License, or | ||||
# (at your option) any later version. | ||||
# | ||||
# This program is distributed in the hope that it will be useful, | ||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
# GNU General Public License for more details. | ||||
# | ||||
# You should have received a copy of the GNU General Public License | ||||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
import cookielib | ||||
import urllib | ||||
import urllib2 | ||||
import time | ||||
r2211 | import os | |||
import sys | ||||
from os.path import join as jn | ||||
from os.path import dirname as dn | ||||
r1374 | ||||
r2211 | __here__ = os.path.abspath(__file__) | |||
__root__ = dn(dn(dn(__here__))) | ||||
sys.path.append(__root__) | ||||
r2007 | from rhodecode.lib import vcs | |||
r2226 | from rhodecode.lib.compat import OrderedSet | |||
from rhodecode.lib.vcs.exceptions import RepositoryError | ||||
r1374 | ||||
r2226 | PASES = 3 | |||
HOST = 'http://127.0.0.1' | ||||
PORT = 5000 | ||||
BASE_URI = '%s:%s/' % (HOST, PORT) | ||||
if len(sys.argv) == 2: | ||||
BASE_URI = sys.argv[1] | ||||
if not BASE_URI.endswith('/'): | ||||
BASE_URI += '/' | ||||
print 'Crawling @ %s' % BASE_URI | ||||
BASE_URI += '%s' | ||||
r1374 | PROJECT_PATH = jn('/', 'home', 'marcink', 'hg_repos') | |||
r2213 | PROJECTS = [ | |||
'linux-magx-pbranch', | ||||
'CPython', | ||||
'rhodecode_tip', | ||||
] | ||||
r1374 | ||||
cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt') | ||||
o = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) | ||||
o.addheaders = [ | ||||
r2211 | ('User-agent', 'rhodecode-crawler'), | |||
('Accept-Language', 'en - us, en;q = 0.5') | ||||
] | ||||
r1374 | ||||
urllib2.install_opener(o) | ||||
r2226 | def _get_repo(proj): | |||
if isinstance(proj, basestring): | ||||
repo = vcs.get_repo(jn(PROJECT_PATH, proj)) | ||||
proj = proj | ||||
else: | ||||
repo = proj | ||||
proj = repo.name | ||||
return repo, proj | ||||
r2211 | def test_changelog_walk(proj, pages=100): | |||
r2226 | repo, proj = _get_repo(proj) | |||
r1374 | total_time = 0 | |||
for i in range(1, pages): | ||||
r2211 | page = '/'.join((proj, 'changelog',)) | |||
r1374 | ||||
full_uri = (BASE_URI % page) + '?' + urllib.urlencode({'page':i}) | ||||
s = time.time() | ||||
f = o.open(full_uri) | ||||
size = len(f.read()) | ||||
e = time.time() - s | ||||
total_time += e | ||||
print 'visited %s size:%s req:%s ms' % (full_uri, size, e) | ||||
print 'total_time', total_time | ||||
print 'average on req', total_time / float(pages) | ||||
r2211 | def test_changeset_walk(proj, limit=None): | |||
r2226 | repo, proj = _get_repo(proj) | |||
r2211 | print 'processing', jn(PROJECT_PATH, proj) | |||
r1374 | total_time = 0 | |||
cnt = 0 | ||||
for i in repo: | ||||
cnt += 1 | ||||
r2211 | raw_cs = '/'.join((proj, 'changeset', i.raw_id)) | |||
r1374 | if limit and limit == cnt: | |||
break | ||||
full_uri = (BASE_URI % raw_cs) | ||||
r2211 | print '%s visiting %s\%s' % (cnt, full_uri, i) | |||
r1374 | s = time.time() | |||
f = o.open(full_uri) | ||||
size = len(f.read()) | ||||
e = time.time() - s | ||||
total_time += e | ||||
print '%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e) | ||||
print 'total_time', total_time | ||||
print 'average on req', total_time / float(cnt) | ||||
r2211 | def test_files_walk(proj, limit=100): | |||
r2226 | repo, proj = _get_repo(proj) | |||
r2211 | print 'processing', jn(PROJECT_PATH, proj) | |||
r1374 | total_time = 0 | |||
paths_ = OrderedSet(['']) | ||||
try: | ||||
tip = repo.get_changeset('tip') | ||||
for topnode, dirs, files in tip.walk('/'): | ||||
for dir in dirs: | ||||
paths_.add(dir.path) | ||||
for f in dir: | ||||
paths_.add(f.path) | ||||
for f in files: | ||||
paths_.add(f.path) | ||||
r1977 | except RepositoryError, e: | |||
r1374 | pass | |||
cnt = 0 | ||||
for f in paths_: | ||||
cnt += 1 | ||||
if limit and limit == cnt: | ||||
break | ||||
r2211 | file_path = '/'.join((proj, 'files', 'tip', f)) | |||
r1374 | full_uri = (BASE_URI % file_path) | |||
r2211 | print '%s visiting %s' % (cnt, full_uri) | |||
r1374 | s = time.time() | |||
f = o.open(full_uri) | ||||
size = len(f.read()) | ||||
e = time.time() - s | ||||
total_time += e | ||||
r2211 | print '%s visited OK size:%s req:%s ms' % (cnt, size, e) | |||
r1374 | ||||
print 'total_time', total_time | ||||
print 'average on req', total_time / float(cnt) | ||||
r2211 | if __name__ == '__main__': | |||
r2226 | for path in PROJECTS: | |||
repo = vcs.get_repo(jn(PROJECT_PATH, path)) | ||||
for i in range(PASES): | ||||
print 'PASS %s/%s' % (i, PASES) | ||||
test_changelog_walk(repo, pages=80) | ||||
test_changeset_walk(repo, limit=100) | ||||
test_files_walk(repo, limit=100) | ||||