##// END OF EJS Templates
small improvements in rhodecode_crawler
marcink -
r2211:c00ab8b2 beta
parent child Browse files
Show More
@@ -32,30 +32,37 b' import cookielib'
32 import urllib
32 import urllib
33 import urllib2
33 import urllib2
34 import time
34 import time
35 import os
36 import sys
37 from os.path import join as jn
38 from os.path import dirname as dn
35
39
36 from os.path import join as jn
40 __here__ = os.path.abspath(__file__)
41 __root__ = dn(dn(dn(__here__)))
42 sys.path.append(__root__)
43
37 from rhodecode.lib import vcs
44 from rhodecode.lib import vcs
38
45
39 BASE_URI = 'http://127.0.0.1:5000/%s'
46 BASE_URI = 'http://127.0.0.1:5001/%s'
40 PROJECT = 'CPython'
41 PROJECT_PATH = jn('/', 'home', 'marcink', 'hg_repos')
47 PROJECT_PATH = jn('/', 'home', 'marcink', 'hg_repos')
48 PROJECTS = ['CPython', 'rhodecode_tip', 'mastergmat']
42
49
43
50
44 cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt')
51 cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt')
45 o = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
52 o = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
46 o.addheaders = [
53 o.addheaders = [
47 ('User-agent', 'rhodecode-crawler'),
54 ('User-agent', 'rhodecode-crawler'),
48 ('Accept-Language', 'en - us, en;q = 0.5')
55 ('Accept-Language', 'en - us, en;q = 0.5')
49 ]
56 ]
50
57
51 urllib2.install_opener(o)
58 urllib2.install_opener(o)
52
59
53
60
54 def test_changelog_walk(pages=100):
61 def test_changelog_walk(proj, pages=100):
55 total_time = 0
62 total_time = 0
56 for i in range(1, pages):
63 for i in range(1, pages):
57
64
58 page = '/'.join((PROJECT, 'changelog',))
65 page = '/'.join((proj, 'changelog',))
59
66
60 full_uri = (BASE_URI % page) + '?' + urllib.urlencode({'page':i})
67 full_uri = (BASE_URI % page) + '?' + urllib.urlencode({'page':i})
61 s = time.time()
68 s = time.time()
@@ -69,19 +76,20 b' def test_changelog_walk(pages=100):'
69 print 'average on req', total_time / float(pages)
76 print 'average on req', total_time / float(pages)
70
77
71
78
72 def test_changeset_walk(limit=None):
79 def test_changeset_walk(proj, limit=None):
73 print 'processing', jn(PROJECT_PATH, PROJECT)
80 print 'processing', jn(PROJECT_PATH, proj)
74 total_time = 0
81 total_time = 0
75
82
76 repo = vcs.get_repo(jn(PROJECT_PATH, PROJECT))
83 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
77 cnt = 0
84 cnt = 0
78 for i in repo:
85 for i in repo:
79 cnt += 1
86 cnt += 1
80 raw_cs = '/'.join((PROJECT, 'changeset', i.raw_id))
87 raw_cs = '/'.join((proj, 'changeset', i.raw_id))
81 if limit and limit == cnt:
88 if limit and limit == cnt:
82 break
89 break
83
90
84 full_uri = (BASE_URI % raw_cs)
91 full_uri = (BASE_URI % raw_cs)
92 print '%s visiting %s\%s' % (cnt, full_uri, i)
85 s = time.time()
93 s = time.time()
86 f = o.open(full_uri)
94 f = o.open(full_uri)
87 size = len(f.read())
95 size = len(f.read())
@@ -93,11 +101,11 b' def test_changeset_walk(limit=None):'
93 print 'average on req', total_time / float(cnt)
101 print 'average on req', total_time / float(cnt)
94
102
95
103
96 def test_files_walk(limit=100):
104 def test_files_walk(proj, limit=100):
97 print 'processing', jn(PROJECT_PATH, PROJECT)
105 print 'processing', jn(PROJECT_PATH, proj)
98 total_time = 0
106 total_time = 0
99
107
100 repo = vcs.get_repo(jn(PROJECT_PATH, PROJECT))
108 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
101
109
102 from rhodecode.lib.compat import OrderedSet
110 from rhodecode.lib.compat import OrderedSet
103 from rhodecode.lib.vcs.exceptions import RepositoryError
111 from rhodecode.lib.vcs.exceptions import RepositoryError
@@ -124,22 +132,24 b' def test_files_walk(limit=100):'
124 if limit and limit == cnt:
132 if limit and limit == cnt:
125 break
133 break
126
134
127 file_path = '/'.join((PROJECT, 'files', 'tip', f))
135 file_path = '/'.join((proj, 'files', 'tip', f))
128
129 full_uri = (BASE_URI % file_path)
136 full_uri = (BASE_URI % file_path)
137 print '%s visiting %s' % (cnt, full_uri)
130 s = time.time()
138 s = time.time()
131 f = o.open(full_uri)
139 f = o.open(full_uri)
132 size = len(f.read())
140 size = len(f.read())
133 e = time.time() - s
141 e = time.time() - s
134 total_time += e
142 total_time += e
135 print '%s visited %s size:%s req:%s ms' % (cnt, full_uri, size, e)
143 print '%s visited OK size:%s req:%s ms' % (cnt, size, e)
136
144
137 print 'total_time', total_time
145 print 'total_time', total_time
138 print 'average on req', total_time / float(cnt)
146 print 'average on req', total_time / float(cnt)
139
147
148 if __name__ == '__main__':
140
149
141 test_changelog_walk(40)
150 for p in PROJECTS:
142 time.sleep(2)
151 test_changelog_walk(p, 40)
143 test_changeset_walk(limit=100)
152 time.sleep(2)
144 time.sleep(2)
153 test_changeset_walk(p, limit=100)
145 test_files_walk(100)
154 time.sleep(2)
155 test_files_walk(p, 100)
General Comments 0
You need to be logged in to leave comments. Login now