##// END OF EJS Templates
extended safe_str and safe_unicode with chardet fallback....
marcink -
r1490:76b358f8 beta
parent child Browse files
Show More
@@ -1,364 +1,386 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 rhodecode.lib.__init__
4 4 ~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 6 Some simple helper functions
7 7
8 8 :created_on: Jan 5, 2011
9 9 :author: marcink
10 10 :copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
11 11 :license: GPLv3, see COPYING for more details.
12 12 """
13 13 # This program is free software: you can redistribute it and/or modify
14 14 # it under the terms of the GNU General Public License as published by
15 15 # the Free Software Foundation, either version 3 of the License, or
16 16 # (at your option) any later version.
17 17 #
18 18 # This program is distributed in the hope that it will be useful,
19 19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 21 # GNU General Public License for more details.
22 22 #
23 23 # You should have received a copy of the GNU General Public License
24 24 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 25
26 26
27 27 try:
28 28 import json
29 29 except ImportError:
30 30 #python 2.5 compatibility
31 31 import simplejson as json
32 32
33 33
34 34 def __get_lem():
35 35 from pygments import lexers
36 36 from string import lower
37 37 from collections import defaultdict
38 38
39 39 d = defaultdict(lambda: [])
40 40
41 41 def __clean(s):
42 42 s = s.lstrip('*')
43 43 s = s.lstrip('.')
44 44
45 45 if s.find('[') != -1:
46 46 exts = []
47 47 start, stop = s.find('['), s.find(']')
48 48
49 49 for suffix in s[start + 1:stop]:
50 50 exts.append(s[:s.find('[')] + suffix)
51 51 return map(lower, exts)
52 52 else:
53 53 return map(lower, [s])
54 54
55 55 for lx, t in sorted(lexers.LEXERS.items()):
56 56 m = map(__clean, t[-2])
57 57 if m:
58 58 m = reduce(lambda x, y: x + y, m)
59 59 for ext in m:
60 60 desc = lx.replace('Lexer', '')
61 61 d[ext].append(desc)
62 62
63 63 return dict(d)
64 64
65 65 # language map is also used by whoosh indexer, which for those specified
66 66 # extensions will index it's content
67 67 LANGUAGES_EXTENSIONS_MAP = __get_lem()
68 68
69 69 # Additional mappings that are not present in the pygments lexers
70 70 # NOTE: that this will overide any mappings in LANGUAGES_EXTENSIONS_MAP
71 71 ADDITIONAL_MAPPINGS = {'xaml': 'XAML'}
72 72
73 73 LANGUAGES_EXTENSIONS_MAP.update(ADDITIONAL_MAPPINGS)
74 74
75 75
76 76 def str2bool(_str):
77 77 """
78 78 returs True/False value from given string, it tries to translate the
79 79 string into boolean
80 80
81 81 :param _str: string value to translate into boolean
82 82 :rtype: boolean
83 83 :returns: boolean from given string
84 84 """
85 85 if _str is None:
86 86 return False
87 87 if _str in (True, False):
88 88 return _str
89 89 _str = str(_str).strip().lower()
90 90 return _str in ('t', 'true', 'y', 'yes', 'on', '1')
91 91
92 92
93 93 def convert_line_endings(line, mode):
94 94 """
95 95 Converts a given line "line end" accordingly to given mode
96 96
97 97 Available modes are::
98 98 0 - Unix
99 99 1 - Mac
100 100 2 - DOS
101 101
102 102 :param line: given line to convert
103 103 :param mode: mode to convert to
104 104 :rtype: str
105 105 :return: converted line according to mode
106 106 """
107 107 from string import replace
108 108
109 109 if mode == 0:
110 110 line = replace(line, '\r\n', '\n')
111 111 line = replace(line, '\r', '\n')
112 112 elif mode == 1:
113 113 line = replace(line, '\r\n', '\r')
114 114 line = replace(line, '\n', '\r')
115 115 elif mode == 2:
116 116 import re
117 117 line = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", line)
118 118 return line
119 119
120 120
121 121 def detect_mode(line, default):
122 122 """
123 123 Detects line break for given line, if line break couldn't be found
124 124 given default value is returned
125 125
126 126 :param line: str line
127 127 :param default: default
128 128 :rtype: int
129 129 :return: value of line end on of 0 - Unix, 1 - Mac, 2 - DOS
130 130 """
131 131 if line.endswith('\r\n'):
132 132 return 2
133 133 elif line.endswith('\n'):
134 134 return 0
135 135 elif line.endswith('\r'):
136 136 return 1
137 137 else:
138 138 return default
139 139
140 140
141 141 def generate_api_key(username, salt=None):
142 142 """
143 143 Generates unique API key for given username, if salt is not given
144 144 it'll be generated from some random string
145 145
146 146 :param username: username as string
147 147 :param salt: salt to hash generate KEY
148 148 :rtype: str
149 149 :returns: sha1 hash from username+salt
150 150 """
151 151 from tempfile import _RandomNameSequence
152 152 import hashlib
153 153
154 154 if salt is None:
155 155 salt = _RandomNameSequence().next()
156 156
157 157 return hashlib.sha1(username + salt).hexdigest()
158 158
159 159
160 def safe_unicode(_str, from_encoding='utf8'):
160 def safe_unicode(str_, from_encoding='utf8'):
161 161 """
162 safe unicode function. In case of UnicodeDecode error we try to return
163 unicode with errors replaceed
162 safe unicode function. Does few trick to turn str_ into unicode
164 163
165 :param _str: string to decode
164 In case of UnicodeDecode error we try to return it with encoding detected
165 by chardet library if it fails fallback to unicode with errors replaced
166
167 :param str_: string to decode
166 168 :rtype: unicode
167 169 :returns: unicode object
168 170 """
169 171
170 if isinstance(_str, unicode):
171 return _str
172 if isinstance(str_, unicode):
173 return str_
174
175 try:
176 return unicode(str_, from_encoding)
177 except UnicodeDecodeError:
178 pass
172 179
173 180 try:
174 u_str = unicode(_str, from_encoding)
175 except UnicodeDecodeError:
176 u_str = unicode(_str, from_encoding, 'replace')
181 import chardet
182 encoding = chardet.detect(str_)['encoding']
183 if encoding is None:
184 raise UnicodeDecodeError()
177 185
178 return u_str
179
186 return str_.decode(encoding)
187 except (ImportError, UnicodeDecodeError):
188 return unicode(str_, from_encoding, 'replace')
180 189
181 def safe_str(_unicode, to_encoding='utf8'):
190 def safe_str(unicode_, to_encoding='utf8'):
182 191 """
183 safe str function. In case of UnicodeEncode error we try to return
184 str with errors replaceed
192 safe str function. Does few trick to turn unicode_ into string
185 193
186 :param _unicode: unicode to encode
194 In case of UnicodeEncodeError we try to return it with encoding detected
195 by chardet library if it fails fallback to string with errors replaced
196
197 :param unicode_: unicode to encode
187 198 :rtype: str
188 199 :returns: str object
189 200 """
190 201
191 if isinstance(_unicode, str):
192 return _unicode
202 if isinstance(unicode_, str):
203 return unicode_
204
205 try:
206 return str(unicode_)
207 except UnicodeEncodeError:
208 pass
193 209
194 210 try:
195 safe_str = str(_unicode)
196 except UnicodeEncodeError:
197 safe_str = _unicode.encode(to_encoding, 'replace')
211 import chardet
212 encoding = chardet.detect(unicode_)['encoding']
213 print encoding
214 if encoding is None:
215 raise UnicodeEncodeError()
216
217 return unicode_.encode(encoding)
218 except (ImportError, UnicodeEncodeError):
219 return unicode_.encode(to_encoding, 'replace')
198 220
199 221 return safe_str
200 222
201 223
202 224
203 225 def engine_from_config(configuration, prefix='sqlalchemy.', **kwargs):
204 226 """
205 227 Custom engine_from_config functions that makes sure we use NullPool for
206 228 file based sqlite databases. This prevents errors on sqlite. This only
207 229 applies to sqlalchemy versions < 0.7.0
208 230
209 231 """
210 232 import sqlalchemy
211 233 from sqlalchemy import engine_from_config as efc
212 234 import logging
213 235
214 236 if int(sqlalchemy.__version__.split('.')[1]) < 7:
215 237
216 238 # This solution should work for sqlalchemy < 0.7.0, and should use
217 239 # proxy=TimerProxy() for execution time profiling
218 240
219 241 from sqlalchemy.pool import NullPool
220 242 url = configuration[prefix + 'url']
221 243
222 244 if url.startswith('sqlite'):
223 245 kwargs.update({'poolclass': NullPool})
224 246 return efc(configuration, prefix, **kwargs)
225 247 else:
226 248 import time
227 249 from sqlalchemy import event
228 250 from sqlalchemy.engine import Engine
229 251
230 252 log = logging.getLogger('sqlalchemy.engine')
231 253 BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = xrange(30, 38)
232 254 engine = efc(configuration, prefix, **kwargs)
233 255
234 256 def color_sql(sql):
235 257 COLOR_SEQ = "\033[1;%dm"
236 258 COLOR_SQL = YELLOW
237 259 normal = '\x1b[0m'
238 260 return ''.join([COLOR_SEQ % COLOR_SQL, sql, normal])
239 261
240 262 if configuration['debug']:
241 263 #attach events only for debug configuration
242 264
243 265 def before_cursor_execute(conn, cursor, statement,
244 266 parameters, context, executemany):
245 267 context._query_start_time = time.time()
246 268 log.info(color_sql(">>>>> STARTING QUERY >>>>>"))
247 269
248 270
249 271 def after_cursor_execute(conn, cursor, statement,
250 272 parameters, context, executemany):
251 273 total = time.time() - context._query_start_time
252 274 log.info(color_sql("<<<<< TOTAL TIME: %f <<<<<" % total))
253 275
254 276 event.listen(engine, "before_cursor_execute",
255 277 before_cursor_execute)
256 278 event.listen(engine, "after_cursor_execute",
257 279 after_cursor_execute)
258 280
259 281 return engine
260 282
261 283
262 284 def age(curdate):
263 285 """
264 286 turns a datetime into an age string.
265 287
266 288 :param curdate: datetime object
267 289 :rtype: unicode
268 290 :returns: unicode words describing age
269 291 """
270 292
271 293 from datetime import datetime
272 294 from webhelpers.date import time_ago_in_words
273 295
274 296 _ = lambda s:s
275 297
276 298 if not curdate:
277 299 return ''
278 300
279 301 agescales = [(_(u"year"), 3600 * 24 * 365),
280 302 (_(u"month"), 3600 * 24 * 30),
281 303 (_(u"day"), 3600 * 24),
282 304 (_(u"hour"), 3600),
283 305 (_(u"minute"), 60),
284 306 (_(u"second"), 1), ]
285 307
286 308 age = datetime.now() - curdate
287 309 age_seconds = (age.days * agescales[2][1]) + age.seconds
288 310 pos = 1
289 311 for scale in agescales:
290 312 if scale[1] <= age_seconds:
291 313 if pos == 6:pos = 5
292 314 return '%s %s' % (time_ago_in_words(curdate,
293 315 agescales[pos][0]), _('ago'))
294 316 pos += 1
295 317
296 318 return _(u'just now')
297 319
298 320
299 321 def uri_filter(uri):
300 322 """
301 323 Removes user:password from given url string
302 324
303 325 :param uri:
304 326 :rtype: unicode
305 327 :returns: filtered list of strings
306 328 """
307 329 if not uri:
308 330 return ''
309 331
310 332 proto = ''
311 333
312 334 for pat in ('https://', 'http://'):
313 335 if uri.startswith(pat):
314 336 uri = uri[len(pat):]
315 337 proto = pat
316 338 break
317 339
318 340 # remove passwords and username
319 341 uri = uri[uri.find('@') + 1:]
320 342
321 343 # get the port
322 344 cred_pos = uri.find(':')
323 345 if cred_pos == -1:
324 346 host, port = uri, None
325 347 else:
326 348 host, port = uri[:cred_pos], uri[cred_pos + 1:]
327 349
328 350 return filter(None, [proto, host, port])
329 351
330 352
331 353 def credentials_filter(uri):
332 354 """
333 355 Returns a url with removed credentials
334 356
335 357 :param uri:
336 358 """
337 359
338 360 uri = uri_filter(uri)
339 361 #check if we have port
340 362 if len(uri) > 2 and uri[2]:
341 363 uri[2] = ':' + uri[2]
342 364
343 365 return ''.join(uri)
344 366
345 367 def get_changeset_safe(repo, rev):
346 368 """
347 369 Safe version of get_changeset if this changeset doesn't exists for a
348 370 repo it returns a Dummy one instead
349 371
350 372 :param repo:
351 373 :param rev:
352 374 """
353 375 from vcs.backends.base import BaseRepository
354 376 from vcs.exceptions import RepositoryError
355 377 if not isinstance(repo, BaseRepository):
356 378 raise Exception('You must pass an Repository '
357 379 'object as first argument got %s', type(repo))
358 380
359 381 try:
360 382 cs = repo.get_changeset(rev)
361 383 except RepositoryError:
362 384 from rhodecode.lib.utils import EmptyChangeset
363 385 cs = EmptyChangeset(requested_revision=rev)
364 386 return cs No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now