##// END OF EJS Templates
improvements for rhodecode crawler
marcink -
r2226:ce04e6ef beta
parent child Browse files
Show More
@@ -1,159 +1,184 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 rhodecode.tests.test_crawer
3 rhodecode.tests.test_crawer
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
5
6 Test for crawling a project for memory usage
6 Test for crawling a project for memory usage
7 This should be runned just as regular script together
7 This should be runned just as regular script together
8 with a watch script that will show memory usage.
8 with a watch script that will show memory usage.
9
9
10 watch -n1 ./rhodecode/tests/mem_watch
10 watch -n1 ./rhodecode/tests/mem_watch
11
11
12 :created_on: Apr 21, 2010
12 :created_on: Apr 21, 2010
13 :author: marcink
13 :author: marcink
14 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
14 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
15 :license: GPLv3, see COPYING for more details.
15 :license: GPLv3, see COPYING for more details.
16 """
16 """
17 # This program is free software: you can redistribute it and/or modify
17 # This program is free software: you can redistribute it and/or modify
18 # it under the terms of the GNU General Public License as published by
18 # it under the terms of the GNU General Public License as published by
19 # the Free Software Foundation, either version 3 of the License, or
19 # the Free Software Foundation, either version 3 of the License, or
20 # (at your option) any later version.
20 # (at your option) any later version.
21 #
21 #
22 # This program is distributed in the hope that it will be useful,
22 # This program is distributed in the hope that it will be useful,
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 # GNU General Public License for more details.
25 # GNU General Public License for more details.
26 #
26 #
27 # You should have received a copy of the GNU General Public License
27 # You should have received a copy of the GNU General Public License
28 # along with this program. If not, see <http://www.gnu.org/licenses/>.
28 # along with this program. If not, see <http://www.gnu.org/licenses/>.
29
29
30
30
31 import cookielib
31 import cookielib
32 import urllib
32 import urllib
33 import urllib2
33 import urllib2
34 import time
34 import time
35 import os
35 import os
36 import sys
36 import sys
37 from os.path import join as jn
37 from os.path import join as jn
38 from os.path import dirname as dn
38 from os.path import dirname as dn
39
39
40 __here__ = os.path.abspath(__file__)
40 __here__ = os.path.abspath(__file__)
41 __root__ = dn(dn(dn(__here__)))
41 __root__ = dn(dn(dn(__here__)))
42 sys.path.append(__root__)
42 sys.path.append(__root__)
43
43
44 from rhodecode.lib import vcs
44 from rhodecode.lib import vcs
45 from rhodecode.lib.compat import OrderedSet
46 from rhodecode.lib.vcs.exceptions import RepositoryError
45
47
46 BASE_URI = 'http://127.0.0.1:5001/%s'
48 PASES = 3
49 HOST = 'http://127.0.0.1'
50 PORT = 5000
51 BASE_URI = '%s:%s/' % (HOST, PORT)
52
53 if len(sys.argv) == 2:
54 BASE_URI = sys.argv[1]
55
56 if not BASE_URI.endswith('/'):
57 BASE_URI += '/'
58
59 print 'Crawling @ %s' % BASE_URI
60 BASE_URI += '%s'
47 PROJECT_PATH = jn('/', 'home', 'marcink', 'hg_repos')
61 PROJECT_PATH = jn('/', 'home', 'marcink', 'hg_repos')
48 PROJECTS = [
62 PROJECTS = [
49 'linux-magx-pbranch',
63 'linux-magx-pbranch',
50 'CPython',
64 'CPython',
51 'rhodecode_tip',
65 'rhodecode_tip',
52 ]
66 ]
53
67
54
68
55 cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt')
69 cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt')
56 o = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
70 o = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
57 o.addheaders = [
71 o.addheaders = [
58 ('User-agent', 'rhodecode-crawler'),
72 ('User-agent', 'rhodecode-crawler'),
59 ('Accept-Language', 'en - us, en;q = 0.5')
73 ('Accept-Language', 'en - us, en;q = 0.5')
60 ]
74 ]
61
75
62 urllib2.install_opener(o)
76 urllib2.install_opener(o)
63
77
64
78
79 def _get_repo(proj):
80 if isinstance(proj, basestring):
81 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
82 proj = proj
83 else:
84 repo = proj
85 proj = repo.name
86
87 return repo, proj
88
89
65 def test_changelog_walk(proj, pages=100):
90 def test_changelog_walk(proj, pages=100):
91 repo, proj = _get_repo(proj)
92
66 total_time = 0
93 total_time = 0
67 for i in range(1, pages):
94 for i in range(1, pages):
68
95
69 page = '/'.join((proj, 'changelog',))
96 page = '/'.join((proj, 'changelog',))
70
97
71 full_uri = (BASE_URI % page) + '?' + urllib.urlencode({'page':i})
98 full_uri = (BASE_URI % page) + '?' + urllib.urlencode({'page':i})
72 s = time.time()
99 s = time.time()
73 f = o.open(full_uri)
100 f = o.open(full_uri)
74 size = len(f.read())
101 size = len(f.read())
75 e = time.time() - s
102 e = time.time() - s
76 total_time += e
103 total_time += e
77 print 'visited %s size:%s req:%s ms' % (full_uri, size, e)
104 print 'visited %s size:%s req:%s ms' % (full_uri, size, e)
78
105
79 print 'total_time', total_time
106 print 'total_time', total_time
80 print 'average on req', total_time / float(pages)
107 print 'average on req', total_time / float(pages)
81
108
82
109
83 def test_changeset_walk(proj, limit=None):
110 def test_changeset_walk(proj, limit=None):
111 repo, proj = _get_repo(proj)
112
84 print 'processing', jn(PROJECT_PATH, proj)
113 print 'processing', jn(PROJECT_PATH, proj)
85 total_time = 0
114 total_time = 0
86
115
87 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
88 cnt = 0
116 cnt = 0
89 for i in repo:
117 for i in repo:
90 cnt += 1
118 cnt += 1
91 raw_cs = '/'.join((proj, 'changeset', i.raw_id))
119 raw_cs = '/'.join((proj, 'changeset', i.raw_id))
92 if limit and limit == cnt:
120 if limit and limit == cnt:
93 break
121 break
94
122
95 full_uri = (BASE_URI % raw_cs)
123 full_uri = (BASE_URI % raw_cs)
96 print '%s visiting %s\%s' % (cnt, full_uri, i)
124 print '%s visiting %s\%s' % (cnt, full_uri, i)
97 s = time.time()
125 s = time.time()
98 f = o.open(full_uri)
126 f = o.open(full_uri)
99 size = len(f.read())
127 size = len(f.read())
100 e = time.time() - s
128 e = time.time() - s
101 total_time += e
129 total_time += e
102 print '%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e)
130 print '%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e)
103
131
104 print 'total_time', total_time
132 print 'total_time', total_time
105 print 'average on req', total_time / float(cnt)
133 print 'average on req', total_time / float(cnt)
106
134
107
135
108 def test_files_walk(proj, limit=100):
136 def test_files_walk(proj, limit=100):
137 repo, proj = _get_repo(proj)
138
109 print 'processing', jn(PROJECT_PATH, proj)
139 print 'processing', jn(PROJECT_PATH, proj)
110 total_time = 0
140 total_time = 0
111
141
112 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
113
114 from rhodecode.lib.compat import OrderedSet
115 from rhodecode.lib.vcs.exceptions import RepositoryError
116
117 paths_ = OrderedSet([''])
142 paths_ = OrderedSet([''])
118 try:
143 try:
119 tip = repo.get_changeset('tip')
144 tip = repo.get_changeset('tip')
120 for topnode, dirs, files in tip.walk('/'):
145 for topnode, dirs, files in tip.walk('/'):
121
146
122 for dir in dirs:
147 for dir in dirs:
123 paths_.add(dir.path)
148 paths_.add(dir.path)
124 for f in dir:
149 for f in dir:
125 paths_.add(f.path)
150 paths_.add(f.path)
126
151
127 for f in files:
152 for f in files:
128 paths_.add(f.path)
153 paths_.add(f.path)
129
154
130 except RepositoryError, e:
155 except RepositoryError, e:
131 pass
156 pass
132
157
133 cnt = 0
158 cnt = 0
134 for f in paths_:
159 for f in paths_:
135 cnt += 1
160 cnt += 1
136 if limit and limit == cnt:
161 if limit and limit == cnt:
137 break
162 break
138
163
139 file_path = '/'.join((proj, 'files', 'tip', f))
164 file_path = '/'.join((proj, 'files', 'tip', f))
140 full_uri = (BASE_URI % file_path)
165 full_uri = (BASE_URI % file_path)
141 print '%s visiting %s' % (cnt, full_uri)
166 print '%s visiting %s' % (cnt, full_uri)
142 s = time.time()
167 s = time.time()
143 f = o.open(full_uri)
168 f = o.open(full_uri)
144 size = len(f.read())
169 size = len(f.read())
145 e = time.time() - s
170 e = time.time() - s
146 total_time += e
171 total_time += e
147 print '%s visited OK size:%s req:%s ms' % (cnt, size, e)
172 print '%s visited OK size:%s req:%s ms' % (cnt, size, e)
148
173
149 print 'total_time', total_time
174 print 'total_time', total_time
150 print 'average on req', total_time / float(cnt)
175 print 'average on req', total_time / float(cnt)
151
176
152 if __name__ == '__main__':
177 if __name__ == '__main__':
153
178 for path in PROJECTS:
154 for p in PROJECTS:
179 repo = vcs.get_repo(jn(PROJECT_PATH, path))
155 test_changelog_walk(p, 40)
180 for i in range(PASES):
156 time.sleep(2)
181 print 'PASS %s/%s' % (i, PASES)
157 test_changeset_walk(p, limit=100)
182 test_changelog_walk(repo, pages=80)
158 time.sleep(2)
183 test_changeset_walk(repo, limit=100)
159 test_files_walk(p, 100)
184 test_files_walk(repo, limit=100)
General Comments 0
You need to be logged in to leave comments. Login now