##// END OF EJS Templates
improvements for rhodecode crawler
marcink -
r2226:ce04e6ef beta
parent child Browse files
Show More
@@ -1,159 +1,184 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.tests.test_crawer
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 Test for crawling a project for memory usage
7 7 This should be runned just as regular script together
8 8 with a watch script that will show memory usage.
9 9
10 10 watch -n1 ./rhodecode/tests/mem_watch
11 11
12 12 :created_on: Apr 21, 2010
13 13 :author: marcink
14 14 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
15 15 :license: GPLv3, see COPYING for more details.
16 16 """
17 17 # This program is free software: you can redistribute it and/or modify
18 18 # it under the terms of the GNU General Public License as published by
19 19 # the Free Software Foundation, either version 3 of the License, or
20 20 # (at your option) any later version.
21 21 #
22 22 # This program is distributed in the hope that it will be useful,
23 23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 25 # GNU General Public License for more details.
26 26 #
27 27 # You should have received a copy of the GNU General Public License
28 28 # along with this program. If not, see <http://www.gnu.org/licenses/>.
29 29
30 30
31 31 import cookielib
32 32 import urllib
33 33 import urllib2
34 34 import time
35 35 import os
36 36 import sys
37 37 from os.path import join as jn
38 38 from os.path import dirname as dn
39 39
40 40 __here__ = os.path.abspath(__file__)
41 41 __root__ = dn(dn(dn(__here__)))
42 42 sys.path.append(__root__)
43 43
44 44 from rhodecode.lib import vcs
45 from rhodecode.lib.compat import OrderedSet
46 from rhodecode.lib.vcs.exceptions import RepositoryError
45 47
46 BASE_URI = 'http://127.0.0.1:5001/%s'
48 PASES = 3
49 HOST = 'http://127.0.0.1'
50 PORT = 5000
51 BASE_URI = '%s:%s/' % (HOST, PORT)
52
53 if len(sys.argv) == 2:
54 BASE_URI = sys.argv[1]
55
56 if not BASE_URI.endswith('/'):
57 BASE_URI += '/'
58
59 print 'Crawling @ %s' % BASE_URI
60 BASE_URI += '%s'
47 61 PROJECT_PATH = jn('/', 'home', 'marcink', 'hg_repos')
48 62 PROJECTS = [
49 63 'linux-magx-pbranch',
50 64 'CPython',
51 65 'rhodecode_tip',
52 66 ]
53 67
54 68
55 69 cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt')
56 70 o = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
57 71 o.addheaders = [
58 72 ('User-agent', 'rhodecode-crawler'),
59 73 ('Accept-Language', 'en - us, en;q = 0.5')
60 74 ]
61 75
62 76 urllib2.install_opener(o)
63 77
64 78
79 def _get_repo(proj):
80 if isinstance(proj, basestring):
81 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
82 proj = proj
83 else:
84 repo = proj
85 proj = repo.name
86
87 return repo, proj
88
89
65 90 def test_changelog_walk(proj, pages=100):
91 repo, proj = _get_repo(proj)
92
66 93 total_time = 0
67 94 for i in range(1, pages):
68 95
69 96 page = '/'.join((proj, 'changelog',))
70 97
71 98 full_uri = (BASE_URI % page) + '?' + urllib.urlencode({'page':i})
72 99 s = time.time()
73 100 f = o.open(full_uri)
74 101 size = len(f.read())
75 102 e = time.time() - s
76 103 total_time += e
77 104 print 'visited %s size:%s req:%s ms' % (full_uri, size, e)
78 105
79 106 print 'total_time', total_time
80 107 print 'average on req', total_time / float(pages)
81 108
82 109
83 110 def test_changeset_walk(proj, limit=None):
111 repo, proj = _get_repo(proj)
112
84 113 print 'processing', jn(PROJECT_PATH, proj)
85 114 total_time = 0
86 115
87 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
88 116 cnt = 0
89 117 for i in repo:
90 118 cnt += 1
91 119 raw_cs = '/'.join((proj, 'changeset', i.raw_id))
92 120 if limit and limit == cnt:
93 121 break
94 122
95 123 full_uri = (BASE_URI % raw_cs)
96 124 print '%s visiting %s\%s' % (cnt, full_uri, i)
97 125 s = time.time()
98 126 f = o.open(full_uri)
99 127 size = len(f.read())
100 128 e = time.time() - s
101 129 total_time += e
102 130 print '%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e)
103 131
104 132 print 'total_time', total_time
105 133 print 'average on req', total_time / float(cnt)
106 134
107 135
108 136 def test_files_walk(proj, limit=100):
137 repo, proj = _get_repo(proj)
138
109 139 print 'processing', jn(PROJECT_PATH, proj)
110 140 total_time = 0
111 141
112 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
113
114 from rhodecode.lib.compat import OrderedSet
115 from rhodecode.lib.vcs.exceptions import RepositoryError
116
117 142 paths_ = OrderedSet([''])
118 143 try:
119 144 tip = repo.get_changeset('tip')
120 145 for topnode, dirs, files in tip.walk('/'):
121 146
122 147 for dir in dirs:
123 148 paths_.add(dir.path)
124 149 for f in dir:
125 150 paths_.add(f.path)
126 151
127 152 for f in files:
128 153 paths_.add(f.path)
129 154
130 155 except RepositoryError, e:
131 156 pass
132 157
133 158 cnt = 0
134 159 for f in paths_:
135 160 cnt += 1
136 161 if limit and limit == cnt:
137 162 break
138 163
139 164 file_path = '/'.join((proj, 'files', 'tip', f))
140 165 full_uri = (BASE_URI % file_path)
141 166 print '%s visiting %s' % (cnt, full_uri)
142 167 s = time.time()
143 168 f = o.open(full_uri)
144 169 size = len(f.read())
145 170 e = time.time() - s
146 171 total_time += e
147 172 print '%s visited OK size:%s req:%s ms' % (cnt, size, e)
148 173
149 174 print 'total_time', total_time
150 175 print 'average on req', total_time / float(cnt)
151 176
152 177 if __name__ == '__main__':
153
154 for p in PROJECTS:
155 test_changelog_walk(p, 40)
156 time.sleep(2)
157 test_changeset_walk(p, limit=100)
158 time.sleep(2)
159 test_files_walk(p, 100)
178 for path in PROJECTS:
179 repo = vcs.get_repo(jn(PROJECT_PATH, path))
180 for i in range(PASES):
181 print 'PASS %s/%s' % (i, PASES)
182 test_changelog_walk(repo, pages=80)
183 test_changeset_walk(repo, limit=100)
184 test_files_walk(repo, limit=100)
General Comments 0
You need to be logged in to leave comments. Login now