##// END OF EJS Templates
small improvements in rhodecode_crawler
marcink -
r2211:c00ab8b2 beta
parent child Browse files
Show More
@@ -1,145 +1,155
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.tests.test_crawer
4 4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 Test for crawling a project for memory usage
7 7 This should be runned just as regular script together
8 8 with a watch script that will show memory usage.
9 9
10 10 watch -n1 ./rhodecode/tests/mem_watch
11 11
12 12 :created_on: Apr 21, 2010
13 13 :author: marcink
14 14 :copyright: (C) 2010-2012 Marcin Kuzminski <marcin@python-works.com>
15 15 :license: GPLv3, see COPYING for more details.
16 16 """
17 17 # This program is free software: you can redistribute it and/or modify
18 18 # it under the terms of the GNU General Public License as published by
19 19 # the Free Software Foundation, either version 3 of the License, or
20 20 # (at your option) any later version.
21 21 #
22 22 # This program is distributed in the hope that it will be useful,
23 23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 25 # GNU General Public License for more details.
26 26 #
27 27 # You should have received a copy of the GNU General Public License
28 28 # along with this program. If not, see <http://www.gnu.org/licenses/>.
29 29
30 30
31 31 import cookielib
32 32 import urllib
33 33 import urllib2
34 34 import time
35 import os
36 import sys
37 from os.path import join as jn
38 from os.path import dirname as dn
35 39
36 from os.path import join as jn
40 __here__ = os.path.abspath(__file__)
41 __root__ = dn(dn(dn(__here__)))
42 sys.path.append(__root__)
43
37 44 from rhodecode.lib import vcs
38 45
39 BASE_URI = 'http://127.0.0.1:5000/%s'
40 PROJECT = 'CPython'
46 BASE_URI = 'http://127.0.0.1:5001/%s'
41 47 PROJECT_PATH = jn('/', 'home', 'marcink', 'hg_repos')
48 PROJECTS = ['CPython', 'rhodecode_tip', 'mastergmat']
42 49
43 50
44 51 cj = cookielib.FileCookieJar('/tmp/rc_test_cookie.txt')
45 52 o = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
46 53 o.addheaders = [
47 ('User-agent', 'rhodecode-crawler'),
48 ('Accept-Language', 'en - us, en;q = 0.5')
49 ]
54 ('User-agent', 'rhodecode-crawler'),
55 ('Accept-Language', 'en - us, en;q = 0.5')
56 ]
50 57
51 58 urllib2.install_opener(o)
52 59
53 60
54 def test_changelog_walk(pages=100):
61 def test_changelog_walk(proj, pages=100):
55 62 total_time = 0
56 63 for i in range(1, pages):
57 64
58 page = '/'.join((PROJECT, 'changelog',))
65 page = '/'.join((proj, 'changelog',))
59 66
60 67 full_uri = (BASE_URI % page) + '?' + urllib.urlencode({'page':i})
61 68 s = time.time()
62 69 f = o.open(full_uri)
63 70 size = len(f.read())
64 71 e = time.time() - s
65 72 total_time += e
66 73 print 'visited %s size:%s req:%s ms' % (full_uri, size, e)
67 74
68 75 print 'total_time', total_time
69 76 print 'average on req', total_time / float(pages)
70 77
71 78
72 def test_changeset_walk(limit=None):
73 print 'processing', jn(PROJECT_PATH, PROJECT)
79 def test_changeset_walk(proj, limit=None):
80 print 'processing', jn(PROJECT_PATH, proj)
74 81 total_time = 0
75 82
76 repo = vcs.get_repo(jn(PROJECT_PATH, PROJECT))
83 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
77 84 cnt = 0
78 85 for i in repo:
79 86 cnt += 1
80 raw_cs = '/'.join((PROJECT, 'changeset', i.raw_id))
87 raw_cs = '/'.join((proj, 'changeset', i.raw_id))
81 88 if limit and limit == cnt:
82 89 break
83 90
84 91 full_uri = (BASE_URI % raw_cs)
92 print '%s visiting %s\%s' % (cnt, full_uri, i)
85 93 s = time.time()
86 94 f = o.open(full_uri)
87 95 size = len(f.read())
88 96 e = time.time() - s
89 97 total_time += e
90 98 print '%s visited %s\%s size:%s req:%s ms' % (cnt, full_uri, i, size, e)
91 99
92 100 print 'total_time', total_time
93 101 print 'average on req', total_time / float(cnt)
94 102
95 103
96 def test_files_walk(limit=100):
97 print 'processing', jn(PROJECT_PATH, PROJECT)
104 def test_files_walk(proj, limit=100):
105 print 'processing', jn(PROJECT_PATH, proj)
98 106 total_time = 0
99 107
100 repo = vcs.get_repo(jn(PROJECT_PATH, PROJECT))
108 repo = vcs.get_repo(jn(PROJECT_PATH, proj))
101 109
102 110 from rhodecode.lib.compat import OrderedSet
103 111 from rhodecode.lib.vcs.exceptions import RepositoryError
104 112
105 113 paths_ = OrderedSet([''])
106 114 try:
107 115 tip = repo.get_changeset('tip')
108 116 for topnode, dirs, files in tip.walk('/'):
109 117
110 118 for dir in dirs:
111 119 paths_.add(dir.path)
112 120 for f in dir:
113 121 paths_.add(f.path)
114 122
115 123 for f in files:
116 124 paths_.add(f.path)
117 125
118 126 except RepositoryError, e:
119 127 pass
120 128
121 129 cnt = 0
122 130 for f in paths_:
123 131 cnt += 1
124 132 if limit and limit == cnt:
125 133 break
126 134
127 file_path = '/'.join((PROJECT, 'files', 'tip', f))
128
135 file_path = '/'.join((proj, 'files', 'tip', f))
129 136 full_uri = (BASE_URI % file_path)
137 print '%s visiting %s' % (cnt, full_uri)
130 138 s = time.time()
131 139 f = o.open(full_uri)
132 140 size = len(f.read())
133 141 e = time.time() - s
134 142 total_time += e
135 print '%s visited %s size:%s req:%s ms' % (cnt, full_uri, size, e)
143 print '%s visited OK size:%s req:%s ms' % (cnt, size, e)
136 144
137 145 print 'total_time', total_time
138 146 print 'average on req', total_time / float(cnt)
139 147
148 if __name__ == '__main__':
140 149
141 test_changelog_walk(40)
142 time.sleep(2)
143 test_changeset_walk(limit=100)
144 time.sleep(2)
145 test_files_walk(100)
150 for p in PROJECTS:
151 test_changelog_walk(p, 40)
152 time.sleep(2)
153 test_changeset_walk(p, limit=100)
154 time.sleep(2)
155 test_files_walk(p, 100)
General Comments 0
You need to be logged in to leave comments. Login now