##// END OF EJS Templates
extended safe_str and safe_unicode with chardet fallback....
marcink -
r1490:76b358f8 beta
parent child Browse files
Show More
@@ -1,364 +1,386 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 rhodecode.lib.__init__
3 rhodecode.lib.__init__
4 ~~~~~~~~~~~~~~~~~~~~~~~
4 ~~~~~~~~~~~~~~~~~~~~~~~
5
5
6 Some simple helper functions
6 Some simple helper functions
7
7
8 :created_on: Jan 5, 2011
8 :created_on: Jan 5, 2011
9 :author: marcink
9 :author: marcink
10 :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
10 :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
11 :license: GPLv3, see COPYING for more details.
11 :license: GPLv3, see COPYING for more details.
12 """
12 """
13 # This program is free software: you can redistribute it and/or modify
13 # This program is free software: you can redistribute it and/or modify
14 # it under the terms of the GNU General Public License as published by
14 # it under the terms of the GNU General Public License as published by
15 # the Free Software Foundation, either version 3 of the License, or
15 # the Free Software Foundation, either version 3 of the License, or
16 # (at your option) any later version.
16 # (at your option) any later version.
17 #
17 #
18 # This program is distributed in the hope that it will be useful,
18 # This program is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 # GNU General Public License for more details.
21 # GNU General Public License for more details.
22 #
22 #
23 # You should have received a copy of the GNU General Public License
23 # You should have received a copy of the GNU General Public License
24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25
25
26
26
27 try:
27 try:
28 import json
28 import json
29 except ImportError:
29 except ImportError:
30 #python 2.5 compatibility
30 #python 2.5 compatibility
31 import simplejson as json
31 import simplejson as json
32
32
33
33
34 def __get_lem():
34 def __get_lem():
35 from pygments import lexers
35 from pygments import lexers
36 from string import lower
36 from string import lower
37 from collections import defaultdict
37 from collections import defaultdict
38
38
39 d = defaultdict(lambda: [])
39 d = defaultdict(lambda: [])
40
40
41 def __clean(s):
41 def __clean(s):
42 s = s.lstrip('*')
42 s = s.lstrip('*')
43 s = s.lstrip('.')
43 s = s.lstrip('.')
44
44
45 if s.find('[') != -1:
45 if s.find('[') != -1:
46 exts = []
46 exts = []
47 start, stop = s.find('['), s.find(']')
47 start, stop = s.find('['), s.find(']')
48
48
49 for suffix in s[start + 1:stop]:
49 for suffix in s[start + 1:stop]:
50 exts.append(s[:s.find('[')] + suffix)
50 exts.append(s[:s.find('[')] + suffix)
51 return map(lower, exts)
51 return map(lower, exts)
52 else:
52 else:
53 return map(lower, [s])
53 return map(lower, [s])
54
54
55 for lx, t in sorted(lexers.LEXERS.items()):
55 for lx, t in sorted(lexers.LEXERS.items()):
56 m = map(__clean, t[-2])
56 m = map(__clean, t[-2])
57 if m:
57 if m:
58 m = reduce(lambda x, y: x + y, m)
58 m = reduce(lambda x, y: x + y, m)
59 for ext in m:
59 for ext in m:
60 desc = lx.replace('Lexer', '')
60 desc = lx.replace('Lexer', '')
61 d[ext].append(desc)
61 d[ext].append(desc)
62
62
63 return dict(d)
63 return dict(d)
64
64
65 # language map is also used by whoosh indexer, which for those specified
65 # language map is also used by whoosh indexer, which for those specified
66 # extensions will index it's content
66 # extensions will index it's content
67 LANGUAGES_EXTENSIONS_MAP = __get_lem()
67 LANGUAGES_EXTENSIONS_MAP = __get_lem()
68
68
69 # Additional mappings that are not present in the pygments lexers
69 # Additional mappings that are not present in the pygments lexers
70 # NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
70 # NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
71 ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
71 ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
72
72
73 LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
73 LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
74
74
75
75
76 def str2bool(_str):
76 def str2bool(_str):
77 """
77 """
78 returs True/False value from given string, it tries to translate the
78 returs True/False value from given string, it tries to translate the
79 string into boolean
79 string into boolean
80
80
81 :param _str: string value to translate into boolean
81 :param _str: string value to translate into boolean
82 :rtype: boolean
82 :rtype: boolean
83 :returns: boolean from given string
83 :returns: boolean from given string
84 """
84 """
85 if _str is None:
85 if _str is None:
86 return False
86 return False
87 if _str in (True, False):
87 if _str in (True, False):
88 return _str
88 return _str
89 _str = str(_str).strip().lower()
89 _str = str(_str).strip().lower()
90 return _str in ('t', 'true', 'y', 'yes', 'on', '1')
90 return _str in ('t', 'true', 'y', 'yes', 'on', '1')
91
91
92
92
93 def convert_line_endings(line, mode):
93 def convert_line_endings(line, mode):
94 """
94 """
95 Converts a given line "line end" accordingly to given mode
95 Converts a given line "line end" accordingly to given mode
96
96
97 Available modes are::
97 Available modes are::
98 0 - Unix
98 0 - Unix
99 1 - Mac
99 1 - Mac
100 2 - DOS
100 2 - DOS
101
101
102 :param line: given line to convert
102 :param line: given line to convert
103 :param mode: mode to convert to
103 :param mode: mode to convert to
104 :rtype: str
104 :rtype: str
105 :return: converted line according to mode
105 :return: converted line according to mode
106 """
106 """
107 from string import replace
107 from string import replace
108
108
109 if mode == 0:
109 if mode == 0:
110 line = replace(line, '\r\n', '\n')
110 line = replace(line, '\r\n', '\n')
111 line = replace(line, '\r', '\n')
111 line = replace(line, '\r', '\n')
112 elif mode == 1:
112 elif mode == 1:
113 line = replace(line, '\r\n', '\r')
113 line = replace(line, '\r\n', '\r')
114 line = replace(line, '\n', '\r')
114 line = replace(line, '\n', '\r')
115 elif mode == 2:
115 elif mode == 2:
116 import re
116 import re
117 line = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", line)
117 line = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", line)
118 return line
118 return line
119
119
120
120
121 def detect_mode(line, default):
121 def detect_mode(line, default):
122 """
122 """
123 Detects line break for given line, if line break couldn't be found
123 Detects line break for given line, if line break couldn't be found
124 given default value is returned
124 given default value is returned
125
125
126 :param line: str line
126 :param line: str line
127 :param default: default
127 :param default: default
128 :rtype: int
128 :rtype: int
129 :return: value of line end on of 0 - Unix, 1 - Mac, 2 - DOS
129 :return: value of line end on of 0 - Unix, 1 - Mac, 2 - DOS
130 """
130 """
131 if line.endswith('\r\n'):
131 if line.endswith('\r\n'):
132 return 2
132 return 2
133 elif line.endswith('\n'):
133 elif line.endswith('\n'):
134 return 0
134 return 0
135 elif line.endswith('\r'):
135 elif line.endswith('\r'):
136 return 1
136 return 1
137 else:
137 else:
138 return default
138 return default
139
139
140
140
141 def generate_api_key(username, salt=None):
141 def generate_api_key(username, salt=None):
142 """
142 """
143 Generates unique API key for given username, if salt is not given
143 Generates unique API key for given username, if salt is not given
144 it'll be generated from some random string
144 it'll be generated from some random string
145
145
146 :param username: username as string
146 :param username: username as string
147 :param salt: salt to hash generate KEY
147 :param salt: salt to hash generate KEY
148 :rtype: str
148 :rtype: str
149 :returns: sha1 hash from username+salt
149 :returns: sha1 hash from username+salt
150 """
150 """
151 from tempfile import _RandomNameSequence
151 from tempfile import _RandomNameSequence
152 import hashlib
152 import hashlib
153
153
154 if salt is None:
154 if salt is None:
155 salt = _RandomNameSequence().next()
155 salt = _RandomNameSequence().next()
156
156
157 return hashlib.sha1(username + salt).hexdigest()
157 return hashlib.sha1(username + salt).hexdigest()
158
158
159
159
160 def safe_unicode(_str, from_encoding='utf8'):
160 def safe_unicode(str_, from_encoding='utf8'):
161 """
161 """
162 safe unicode function. In case of UnicodeDecode error we try to return
162 safe unicode function. Does few trick to turn str_ into unicode
163 unicode with errors replaceed
163
164 In case of UnicodeDecode error we try to return it with encoding detected
165 by chardet library if it fails fallback to unicode with errors replaced
164
166
165 :param _str: string to decode
167 :param str_: string to decode
166 :rtype: unicode
168 :rtype: unicode
167 :returns: unicode object
169 :returns: unicode object
168 """
170 """
169
171
170 if isinstance(_str, unicode):
172 if isinstance(str_, unicode):
171 return _str
173 return str_
172
174
173 try:
175 try:
174 u_str = unicode(_str, from_encoding)
176 return unicode(str_, from_encoding)
175 except UnicodeDecodeError:
177 except UnicodeDecodeError:
176 u_str = unicode(_str, from_encoding, 'replace')
178 pass
177
179
178 return u_str
180 try:
179
181 import chardet
182 encoding = chardet.detect(str_)['encoding']
183 if encoding is None:
184 raise UnicodeDecodeError()
185
186 return str_.decode(encoding)
187 except (ImportError, UnicodeDecodeError):
188 return unicode(str_, from_encoding, 'replace')
180
189
181 def safe_str(_unicode, to_encoding='utf8'):
190 def safe_str(unicode_, to_encoding='utf8'):
182 """
191 """
183 safe str function. In case of UnicodeEncode error we try to return
192 safe str function. Does few trick to turn unicode_ into string
184 str with errors replaceed
193
194 In case of UnicodeEncodeError we try to return it with encoding detected
195 by chardet library if it fails fallback to string with errors replaced
185
196
186 :param _unicode: unicode to encode
197 :param unicode_: unicode to encode
187 :rtype: str
198 :rtype: str
188 :returns: str object
199 :returns: str object
189 """
200 """
190
201
191 if isinstance(_unicode, str):
202 if isinstance(unicode_, str):
192 return _unicode
203 return unicode_
193
204
194 try:
205 try:
195 safe_str = str(_unicode)
206 return str(unicode_)
196 except UnicodeEncodeError:
207 except UnicodeEncodeError:
197 safe_str = _unicode.encode(to_encoding, 'replace')
208 pass
209
210 try:
211 import chardet
212 encoding = chardet.detect(unicode_)['encoding']
213 print encoding
214 if encoding is None:
215 raise UnicodeEncodeError()
216
217 return unicode_.encode(encoding)
218 except (ImportError, UnicodeEncodeError):
219 return unicode_.encode(to_encoding, 'replace')
198
220
199 return safe_str
221 return safe_str
200
222
201
223
202
224
203 def engine_from_config(configuration, prefix='sqlalchemy.', **kwargs):
225 def engine_from_config(configuration, prefix='sqlalchemy.', **kwargs):
204 """
226 """
205 Custom engine_from_config functions that makes sure we use NullPool for
227 Custom engine_from_config functions that makes sure we use NullPool for
206 file based sqlite databases. This prevents errors on sqlite. This only
228 file based sqlite databases. This prevents errors on sqlite. This only
207 applies to sqlalchemy versions < 0.7.0
229 applies to sqlalchemy versions < 0.7.0
208
230
209 """
231 """
210 import sqlalchemy
232 import sqlalchemy
211 from sqlalchemy import engine_from_config as efc
233 from sqlalchemy import engine_from_config as efc
212 import logging
234 import logging
213
235
214 if int(sqlalchemy.__version__.split('.')[1]) < 7:
236 if int(sqlalchemy.__version__.split('.')[1]) < 7:
215
237
216 # This solution should work for sqlalchemy < 0.7.0, and should use
238 # This solution should work for sqlalchemy < 0.7.0, and should use
217 # proxy=TimerProxy() for execution time profiling
239 # proxy=TimerProxy() for execution time profiling
218
240
219 from sqlalchemy.pool import NullPool
241 from sqlalchemy.pool import NullPool
220 url = configuration[prefix + 'url']
242 url = configuration[prefix + 'url']
221
243
222 if url.startswith('sqlite'):
244 if url.startswith('sqlite'):
223 kwargs.update({'poolclass': NullPool})
245 kwargs.update({'poolclass': NullPool})
224 return efc(configuration, prefix, **kwargs)
246 return efc(configuration, prefix, **kwargs)
225 else:
247 else:
226 import time
248 import time
227 from sqlalchemy import event
249 from sqlalchemy import event
228 from sqlalchemy.engine import Engine
250 from sqlalchemy.engine import Engine
229
251
230 log = logging.getLogger('sqlalchemy.engine')
252 log = logging.getLogger('sqlalchemy.engine')
231 BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = xrange(30, 38)
253 BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = xrange(30, 38)
232 engine = efc(configuration, prefix, **kwargs)
254 engine = efc(configuration, prefix, **kwargs)
233
255
234 def color_sql(sql):
256 def color_sql(sql):
235 COLOR_SEQ = "\033[1;%dm"
257 COLOR_SEQ = "\033[1;%dm"
236 COLOR_SQL = YELLOW
258 COLOR_SQL = YELLOW
237 normal = '\x1b[0m'
259 normal = '\x1b[0m'
238 return ''.join([COLOR_SEQ % COLOR_SQL, sql, normal])
260 return ''.join([COLOR_SEQ % COLOR_SQL, sql, normal])
239
261
240 if configuration['debug']:
262 if configuration['debug']:
241 #attach events only for debug configuration
263 #attach events only for debug configuration
242
264
243 def before_cursor_execute(conn, cursor, statement,
265 def before_cursor_execute(conn, cursor, statement,
244 parameters, context, executemany):
266 parameters, context, executemany):
245 context._query_start_time = time.time()
267 context._query_start_time = time.time()
246 log.info(color_sql(">>>>> STARTING QUERY >>>>>"))
268 log.info(color_sql(">>>>> STARTING QUERY >>>>>"))
247
269
248
270
249 def after_cursor_execute(conn, cursor, statement,
271 def after_cursor_execute(conn, cursor, statement,
250 parameters, context, executemany):
272 parameters, context, executemany):
251 total = time.time() - context._query_start_time
273 total = time.time() - context._query_start_time
252 log.info(color_sql("<<<<< TOTAL TIME: %f <<<<<" % total))
274 log.info(color_sql("<<<<< TOTAL TIME: %f <<<<<" % total))
253
275
254 event.listen(engine, "before_cursor_execute",
276 event.listen(engine, "before_cursor_execute",
255 before_cursor_execute)
277 before_cursor_execute)
256 event.listen(engine, "after_cursor_execute",
278 event.listen(engine, "after_cursor_execute",
257 after_cursor_execute)
279 after_cursor_execute)
258
280
259 return engine
281 return engine
260
282
261
283
262 def age(curdate):
284 def age(curdate):
263 """
285 """
264 turns a datetime into an age string.
286 turns a datetime into an age string.
265
287
266 :param curdate: datetime object
288 :param curdate: datetime object
267 :rtype: unicode
289 :rtype: unicode
268 :returns: unicode words describing age
290 :returns: unicode words describing age
269 """
291 """
270
292
271 from datetime import datetime
293 from datetime import datetime
272 from webhelpers.date import time_ago_in_words
294 from webhelpers.date import time_ago_in_words
273
295
274 _ = lambda s:s
296 _ = lambda s:s
275
297
276 if not curdate:
298 if not curdate:
277 return ''
299 return ''
278
300
279 agescales = [(_(u"year"), 3600 * 24 * 365),
301 agescales = [(_(u"year"), 3600 * 24 * 365),
280 (_(u"month"), 3600 * 24 * 30),
302 (_(u"month"), 3600 * 24 * 30),
281 (_(u"day"), 3600 * 24),
303 (_(u"day"), 3600 * 24),
282 (_(u"hour"), 3600),
304 (_(u"hour"), 3600),
283 (_(u"minute"), 60),
305 (_(u"minute"), 60),
284 (_(u"second"), 1), ]
306 (_(u"second"), 1), ]
285
307
286 age = datetime.now() - curdate
308 age = datetime.now() - curdate
287 age_seconds = (age.days * agescales[2][1]) + age.seconds
309 age_seconds = (age.days * agescales[2][1]) + age.seconds
288 pos = 1
310 pos = 1
289 for scale in agescales:
311 for scale in agescales:
290 if scale[1] <= age_seconds:
312 if scale[1] <= age_seconds:
291 if pos == 6:pos = 5
313 if pos == 6:pos = 5
292 return '%s %s' % (time_ago_in_words(curdate,
314 return '%s %s' % (time_ago_in_words(curdate,
293 agescales[pos][0]), _('ago'))
315 agescales[pos][0]), _('ago'))
294 pos += 1
316 pos += 1
295
317
296 return _(u'just now')
318 return _(u'just now')
297
319
298
320
299 def uri_filter(uri):
321 def uri_filter(uri):
300 """
322 """
301 Removes user:password from given url string
323 Removes user:password from given url string
302
324
303 :param uri:
325 :param uri:
304 :rtype: unicode
326 :rtype: unicode
305 :returns: filtered list of strings
327 :returns: filtered list of strings
306 """
328 """
307 if not uri:
329 if not uri:
308 return ''
330 return ''
309
331
310 proto = ''
332 proto = ''
311
333
312 for pat in ('https://', 'http://'):
334 for pat in ('https://', 'http://'):
313 if uri.startswith(pat):
335 if uri.startswith(pat):
314 uri = uri[len(pat):]
336 uri = uri[len(pat):]
315 proto = pat
337 proto = pat
316 break
338 break
317
339
318 # remove passwords and username
340 # remove passwords and username
319 uri = uri[uri.find('@') + 1:]
341 uri = uri[uri.find('@') + 1:]
320
342
321 # get the port
343 # get the port
322 cred_pos = uri.find(':')
344 cred_pos = uri.find(':')
323 if cred_pos == -1:
345 if cred_pos == -1:
324 host, port = uri, None
346 host, port = uri, None
325 else:
347 else:
326 host, port = uri[:cred_pos], uri[cred_pos + 1:]
348 host, port = uri[:cred_pos], uri[cred_pos + 1:]
327
349
328 return filter(None, [proto, host, port])
350 return filter(None, [proto, host, port])
329
351
330
352
331 def credentials_filter(uri):
353 def credentials_filter(uri):
332 """
354 """
333 Returns a url with removed credentials
355 Returns a url with removed credentials
334
356
335 :param uri:
357 :param uri:
336 """
358 """
337
359
338 uri = uri_filter(uri)
360 uri = uri_filter(uri)
339 #check if we have port
361 #check if we have port
340 if len(uri) > 2 and uri[2]:
362 if len(uri) > 2 and uri[2]:
341 uri[2] = ':' + uri[2]
363 uri[2] = ':' + uri[2]
342
364
343 return ''.join(uri)
365 return ''.join(uri)
344
366
345 def get_changeset_safe(repo, rev):
367 def get_changeset_safe(repo, rev):
346 """
368 """
347 Safe version of get_changeset if this changeset doesn't exists for a
369 Safe version of get_changeset if this changeset doesn't exists for a
348 repo it returns a Dummy one instead
370 repo it returns a Dummy one instead
349
371
350 :param repo:
372 :param repo:
351 :param rev:
373 :param rev:
352 """
374 """
353 from vcs.backends.base import BaseRepository
375 from vcs.backends.base import BaseRepository
354 from vcs.exceptions import RepositoryError
376 from vcs.exceptions import RepositoryError
355 if not isinstance(repo, BaseRepository):
377 if not isinstance(repo, BaseRepository):
356 raise Exception('You must pass an Repository '
378 raise Exception('You must pass an Repository '
357 'object as first argument got %s', type(repo))
379 'object as first argument got %s', type(repo))
358
380
359 try:
381 try:
360 cs = repo.get_changeset(rev)
382 cs = repo.get_changeset(rev)
361 except RepositoryError:
383 except RepositoryError:
362 from rhodecode.lib.utils import EmptyChangeset
384 from rhodecode.lib.utils import EmptyChangeset
363 cs = EmptyChangeset(requested_revision=rev)
385 cs = EmptyChangeset(requested_revision=rev)
364 return cs No newline at end of file
386 return cs
General Comments 0
You need to be logged in to leave comments. Login now