Show More
@@ -70,21 +70,21 b' def safe_int(val, default=None):' | |||||
70 |
|
70 | |||
71 | def safe_unicode(s): |
|
71 | def safe_unicode(s): | |
72 | """ |
|
72 | """ | |
73 |
Safe unicode function. Use a few tricks to turn s into |
|
73 | Safe unicode str function. Use a few tricks to turn s into str: | |
74 | In case of UnicodeDecodeError with configured default encodings, try to |
|
74 | In case of UnicodeDecodeError with configured default encodings, try to | |
75 | detect encoding with chardet library, then fall back to first encoding with |
|
75 | detect encoding with chardet library, then fall back to first encoding with | |
76 | errors replaced. |
|
76 | errors replaced. | |
77 | """ |
|
77 | """ | |
78 |
if isinstance(s, |
|
78 | if isinstance(s, str): | |
79 | return s |
|
79 | return s | |
80 |
|
80 | |||
81 |
if not isinstance(s, bytes): # use __str__ |
|
81 | if not isinstance(s, bytes): # use __str__ and don't expect UnicodeDecodeError | |
82 |
return |
|
82 | return str(s) | |
83 |
|
83 | |||
84 | from kallithea.lib.vcs.conf import settings |
|
84 | from kallithea.lib.vcs.conf import settings | |
85 | for enc in settings.DEFAULT_ENCODINGS: |
|
85 | for enc in settings.DEFAULT_ENCODINGS: | |
86 | try: |
|
86 | try: | |
87 |
return |
|
87 | return str(s, enc) | |
88 | except UnicodeDecodeError: |
|
88 | except UnicodeDecodeError: | |
89 | pass |
|
89 | pass | |
90 |
|
90 | |||
@@ -96,7 +96,7 b' def safe_unicode(s):' | |||||
96 | except (ImportError, UnicodeDecodeError): |
|
96 | except (ImportError, UnicodeDecodeError): | |
97 | pass |
|
97 | pass | |
98 |
|
98 | |||
99 |
return |
|
99 | return str(s, settings.DEFAULT_ENCODINGS[0], 'replace') | |
100 |
|
100 | |||
101 |
|
101 | |||
102 | def safe_bytes(s): |
|
102 | def safe_bytes(s): | |
@@ -108,7 +108,7 b' def safe_bytes(s):' | |||||
108 | if isinstance(s, bytes): |
|
108 | if isinstance(s, bytes): | |
109 | return s |
|
109 | return s | |
110 |
|
110 | |||
111 |
assert isinstance(s, |
|
111 | assert isinstance(s, str), repr(s) # bytes cannot coerse with __str__ or handle None or int | |
112 |
|
112 | |||
113 | from kallithea.lib.vcs.conf import settings |
|
113 | from kallithea.lib.vcs.conf import settings | |
114 | for enc in settings.DEFAULT_ENCODINGS: |
|
114 | for enc in settings.DEFAULT_ENCODINGS: | |
@@ -120,12 +120,12 b' def safe_bytes(s):' | |||||
120 | return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace') |
|
120 | return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace') | |
121 |
|
121 | |||
122 |
|
122 | |||
123 | safe_str = safe_bytes # safe_str is deprecated - it will be redefined when changing to py3 |
|
123 | safe_str = safe_unicode | |
124 |
|
124 | |||
125 |
|
125 | |||
126 | def ascii_bytes(s): |
|
126 | def ascii_bytes(s): | |
127 | """ |
|
127 | """ | |
128 |
Simple conversion from |
|
128 | Simple conversion from str to bytes, *assuming* all codepoints are | |
129 | 7-bit and it thus is pure ASCII. |
|
129 | 7-bit and it thus is pure ASCII. | |
130 | Will fail badly with UnicodeError on invalid input. |
|
130 | Will fail badly with UnicodeError on invalid input. | |
131 | This should be used where enocding and "safe" ambiguity should be avoided. |
|
131 | This should be used where enocding and "safe" ambiguity should be avoided. | |
@@ -134,17 +134,17 b' def ascii_bytes(s):' | |||||
134 | identifiers. |
|
134 | identifiers. | |
135 |
|
135 | |||
136 | >>> ascii_bytes('a') |
|
136 | >>> ascii_bytes('a') | |
137 | 'a' |
|
137 | b'a' | |
138 | >>> ascii_bytes(u'a') |
|
138 | >>> ascii_bytes(u'a') | |
139 | 'a' |
|
139 | b'a' | |
140 | >>> ascii_bytes('å') |
|
140 | >>> ascii_bytes('å') | |
141 | Traceback (most recent call last): |
|
141 | Traceback (most recent call last): | |
142 |
Unicode |
|
142 | UnicodeEncodeError: 'ascii' codec can't encode character '\xe5' in position 0: ordinal not in range(128) | |
143 |
>>> ascii_bytes( |
|
143 | >>> ascii_bytes('å'.encode('utf8')) | |
144 | Traceback (most recent call last): |
|
144 | Traceback (most recent call last): | |
145 | UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128) |
|
145 | AssertionError: b'\xc3\xa5' | |
146 | """ |
|
146 | """ | |
147 |
assert isinstance(s, |
|
147 | assert isinstance(s, str), repr(s) | |
148 | return s.encode('ascii') |
|
148 | return s.encode('ascii') | |
149 |
|
149 | |||
150 |
|
150 | |||
@@ -158,23 +158,20 b' def ascii_str(s):' | |||||
158 | where a unicode string is wanted without caring about encoding. For example |
|
158 | where a unicode string is wanted without caring about encoding. For example | |
159 | to hex, base64, urlencoding, or are known to be identifiers. |
|
159 | to hex, base64, urlencoding, or are known to be identifiers. | |
160 |
|
160 | |||
161 | >>> ascii_str('a') |
|
161 | >>> ascii_str(b'a') | |
162 | 'a' |
|
162 | 'a' | |
163 | >>> ascii_str(u'a') |
|
163 | >>> ascii_str(u'a') | |
164 | Traceback (most recent call last): |
|
164 | Traceback (most recent call last): | |
165 |
AssertionError: |
|
165 | AssertionError: 'a' | |
166 | >>> ascii_str('å') |
|
166 | >>> ascii_str('å'.encode('utf8')) | |
167 | Traceback (most recent call last): |
|
167 | Traceback (most recent call last): | |
168 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128) |
|
168 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128) | |
169 | >>> ascii_str(u'å') |
|
169 | >>> ascii_str(u'å') | |
170 | Traceback (most recent call last): |
|
170 | Traceback (most recent call last): | |
171 |
AssertionError: |
|
171 | AssertionError: 'å' | |
172 | """ |
|
172 | """ | |
173 | assert isinstance(s, bytes), repr(s) |
|
173 | assert isinstance(s, bytes), repr(s) | |
174 | # Note: we use "encode", even though we really *should* use "decode". But |
|
174 | return s.decode('ascii') | |
175 | # we are in py2 and don't want py2, and encode is doing what we need for the |
|
|||
176 | # ascii subset. |
|
|||
177 | return s.encode('ascii') |
|
|||
178 |
|
175 | |||
179 |
|
176 | |||
180 | # Regex taken from http://www.regular-expressions.info/email.html |
|
177 | # Regex taken from http://www.regular-expressions.info/email.html |
General Comments 0
You need to be logged in to leave comments.
Login now