##// END OF EJS Templates
Starting RhodeCode 1.4 Branch
Starting RhodeCode 1.4 Branch

File last commit:

r2213:884fbc54 beta
r2214:2fd474e6 codereview
Show More
rhodecode_crawler.py
159 lines | 4.3 KiB | text/x-python | PythonLexer
/ rhodecode / tests / rhodecode_crawler.py
renamed crawler to not be runned at tests, bug found by slestak.
r1374 # -*- coding: utf-8 -*-
"""
rhodecode.tests.test_crawer
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Test for crawling a project for memory usage
This should be runned just as regular script together
with a watch script that will show memory usage.
auto white-space removal
r1818
renamed crawler to not be runned at tests, bug found by slestak.
r1374 watch -n1 ./rhodecode/tests/mem_watch
:created_on: Apr 21, 2010
:author: marcink
2012 copyrights
r1824 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
renamed crawler to not be runned at tests, bug found by slestak.
r1374 :license: GPLv3, see COPYING for more details.
"""
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import cookielib
import urllib
import urllib2
import time
small improvements in rhodecode_crawler
r2211 import os
import sys
from os.path import join as jn
from os.path import dirname as dn
renamed crawler to not be runned at tests, bug found by slestak.
r1374
small improvements in rhodecode_crawler
r2211 __here__ = os.path.abspath(__file__)
__root__ = dn(dn(dn(__here__)))
sys.path.append(__root__)
Added VCS into rhodecode core for faster and easier deployments of new versions
r2007 from rhodecode.lib import vcs
renamed crawler to not be runned at tests, bug found by slestak.
r1374
small improvements in rhodecode_crawler
r2211 BASE_URI = 'http://127.0.0.1:5001/%s'
renamed crawler to not be runned at tests, bug found by slestak.
r1374 PROJECT_PATH = jn('/', 'home', 'marcink', 'hg_repos')
added more repos to crawler
r2213 PROJECTS = [
'linux-magx-pbranch',
'CPython',
'rhodecode_tip',
]
renamed crawler to not be runned at tests, bug found by slestak.
r1374
cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt')
o = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
o.addheaders = [
small improvements in rhodecode_crawler
r2211 ('User-agent', 'rhodecode-crawler'),
('Accept-Language', 'en - us, en;q = 0.5')
]
renamed crawler to not be runned at tests, bug found by slestak.
r1374
urllib2.install_opener(o)
small improvements in rhodecode_crawler
r2211 def test_changelog_walk(proj, pages=100):
renamed crawler to not be runned at tests, bug found by slestak.
r1374 total_time = 0
for i in range(1, pages):
small improvements in rhodecode_crawler
r2211 page = '/'.join((proj, 'changelog',))
renamed crawler to not be runned at tests, bug found by slestak.
r1374
full_uri = (BASE_URI % page) + '?' + urllib.urlencode({'page':i})
s = time.time()
f = o.open(full_uri)
size = len(f.read())
e = time.time() - s
total_time += e
print 'visited %s size:%s req:%s ms' % (full_uri, size, e)
print 'total_time', total_time
print 'average on req', total_time / float(pages)
small improvements in rhodecode_crawler
r2211 def test_changeset_walk(proj, limit=None):
print 'processing', jn(PROJECT_PATH, proj)
renamed crawler to not be runned at tests, bug found by slestak.
r1374 total_time = 0
small improvements in rhodecode_crawler
r2211 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
renamed crawler to not be runned at tests, bug found by slestak.
r1374 cnt = 0
for i in repo:
cnt += 1
small improvements in rhodecode_crawler
r2211 raw_cs = '/'.join((proj, 'changeset', i.raw_id))
renamed crawler to not be runned at tests, bug found by slestak.
r1374 if limit and limit == cnt:
break
full_uri = (BASE_URI % raw_cs)
small improvements in rhodecode_crawler
r2211 print '%s visiting %s\%s' % (cnt, full_uri, i)
renamed crawler to not be runned at tests, bug found by slestak.
r1374 s = time.time()
f = o.open(full_uri)
size = len(f.read())
e = time.time() - s
total_time += e
print '%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e)
print 'total_time', total_time
print 'average on req', total_time / float(cnt)
small improvements in rhodecode_crawler
r2211 def test_files_walk(proj, limit=100):
print 'processing', jn(PROJECT_PATH, proj)
renamed crawler to not be runned at tests, bug found by slestak.
r1374 total_time = 0
small improvements in rhodecode_crawler
r2211 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
renamed crawler to not be runned at tests, bug found by slestak.
r1374
fixed issues with python2.5...
r1514 from rhodecode.lib.compat import OrderedSet
Added VCS into rhodecode core for faster and easier deployments of new versions
r2007 from rhodecode.lib.vcs.exceptions import RepositoryError
renamed crawler to not be runned at tests, bug found by slestak.
r1374
paths_ = OrderedSet([''])
try:
tip = repo.get_changeset('tip')
for topnode, dirs, files in tip.walk('/'):
for dir in dirs:
paths_.add(dir.path)
for f in dir:
paths_.add(f.path)
for f in files:
paths_.add(f.path)
fixed exception in rhodecode_crawler
r1977 except RepositoryError, e:
renamed crawler to not be runned at tests, bug found by slestak.
r1374 pass
cnt = 0
for f in paths_:
cnt += 1
if limit and limit == cnt:
break
small improvements in rhodecode_crawler
r2211 file_path = '/'.join((proj, 'files', 'tip', f))
renamed crawler to not be runned at tests, bug found by slestak.
r1374 full_uri = (BASE_URI % file_path)
small improvements in rhodecode_crawler
r2211 print '%s visiting %s' % (cnt, full_uri)
renamed crawler to not be runned at tests, bug found by slestak.
r1374 s = time.time()
f = o.open(full_uri)
size = len(f.read())
e = time.time() - s
total_time += e
small improvements in rhodecode_crawler
r2211 print '%s visited OK size:%s req:%s ms' % (cnt, size, e)
renamed crawler to not be runned at tests, bug found by slestak.
r1374
print 'total_time', total_time
print 'average on req', total_time / float(cnt)
small improvements in rhodecode_crawler
r2211 if __name__ == '__main__':
renamed crawler to not be runned at tests, bug found by slestak.
r1374
small improvements in rhodecode_crawler
r2211 for p in PROJECTS:
test_changelog_walk(p, 40)
time.sleep(2)
test_changeset_walk(p, limit=100)
time.sleep(2)
test_files_walk(p, 100)