Show More
@@ -70,21 +70,21 b' def safe_int(val, default=None):' | |||
|
70 | 70 | |
|
71 | 71 | def safe_unicode(s): |
|
72 | 72 | """ |
|
73 |
Safe unicode function. Use a few tricks to turn s into |
|
|
73 | Safe unicode str function. Use a few tricks to turn s into str: | |
|
74 | 74 | In case of UnicodeDecodeError with configured default encodings, try to |
|
75 | 75 | detect encoding with chardet library, then fall back to first encoding with |
|
76 | 76 | errors replaced. |
|
77 | 77 | """ |
|
78 |
if isinstance(s, |
|
|
78 | if isinstance(s, str): | |
|
79 | 79 | return s |
|
80 | 80 | |
|
81 |
if not isinstance(s, bytes): # use __str__ |
|
|
82 |
return |
|
|
81 | if not isinstance(s, bytes): # use __str__ and don't expect UnicodeDecodeError | |
|
82 | return str(s) | |
|
83 | 83 | |
|
84 | 84 | from kallithea.lib.vcs.conf import settings |
|
85 | 85 | for enc in settings.DEFAULT_ENCODINGS: |
|
86 | 86 | try: |
|
87 |
return |
|
|
87 | return str(s, enc) | |
|
88 | 88 | except UnicodeDecodeError: |
|
89 | 89 | pass |
|
90 | 90 | |
@@ -96,7 +96,7 b' def safe_unicode(s):' | |||
|
96 | 96 | except (ImportError, UnicodeDecodeError): |
|
97 | 97 | pass |
|
98 | 98 | |
|
99 |
return |
|
|
99 | return str(s, settings.DEFAULT_ENCODINGS[0], 'replace') | |
|
100 | 100 | |
|
101 | 101 | |
|
102 | 102 | def safe_bytes(s): |
@@ -108,7 +108,7 b' def safe_bytes(s):' | |||
|
108 | 108 | if isinstance(s, bytes): |
|
109 | 109 | return s |
|
110 | 110 | |
|
111 |
assert isinstance(s, |
|
|
111 | assert isinstance(s, str), repr(s) # bytes cannot coerse with __str__ or handle None or int | |
|
112 | 112 | |
|
113 | 113 | from kallithea.lib.vcs.conf import settings |
|
114 | 114 | for enc in settings.DEFAULT_ENCODINGS: |
@@ -120,12 +120,12 b' def safe_bytes(s):' | |||
|
120 | 120 | return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace') |
|
121 | 121 | |
|
122 | 122 | |
|
123 | safe_str = safe_bytes # safe_str is deprecated - it will be redefined when changing to py3 | |
|
123 | safe_str = safe_unicode | |
|
124 | 124 | |
|
125 | 125 | |
|
126 | 126 | def ascii_bytes(s): |
|
127 | 127 | """ |
|
128 |
Simple conversion from |
|
|
128 | Simple conversion from str to bytes, *assuming* all codepoints are | |
|
129 | 129 | 7-bit and it thus is pure ASCII. |
|
130 | 130 | Will fail badly with UnicodeError on invalid input. |
|
131 | 131 | This should be used where enocding and "safe" ambiguity should be avoided. |
@@ -134,17 +134,17 b' def ascii_bytes(s):' | |||
|
134 | 134 | identifiers. |
|
135 | 135 | |
|
136 | 136 | >>> ascii_bytes('a') |
|
137 | 'a' | |
|
137 | b'a' | |
|
138 | 138 | >>> ascii_bytes(u'a') |
|
139 | 'a' | |
|
139 | b'a' | |
|
140 | 140 | >>> ascii_bytes('å') |
|
141 | 141 | Traceback (most recent call last): |
|
142 |
Unicode |
|
|
143 |
>>> ascii_bytes( |
|
|
142 | UnicodeEncodeError: 'ascii' codec can't encode character '\xe5' in position 0: ordinal not in range(128) | |
|
143 | >>> ascii_bytes('å'.encode('utf8')) | |
|
144 | 144 | Traceback (most recent call last): |
|
145 | UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128) | |
|
145 | AssertionError: b'\xc3\xa5' | |
|
146 | 146 | """ |
|
147 |
assert isinstance(s, |
|
|
147 | assert isinstance(s, str), repr(s) | |
|
148 | 148 | return s.encode('ascii') |
|
149 | 149 | |
|
150 | 150 | |
@@ -158,23 +158,20 b' def ascii_str(s):' | |||
|
158 | 158 | where a unicode string is wanted without caring about encoding. For example |
|
159 | 159 | to hex, base64, urlencoding, or are known to be identifiers. |
|
160 | 160 | |
|
161 | >>> ascii_str('a') | |
|
161 | >>> ascii_str(b'a') | |
|
162 | 162 | 'a' |
|
163 | 163 | >>> ascii_str(u'a') |
|
164 | 164 | Traceback (most recent call last): |
|
165 |
AssertionError: |
|
|
166 | >>> ascii_str('å') | |
|
165 | AssertionError: 'a' | |
|
166 | >>> ascii_str('å'.encode('utf8')) | |
|
167 | 167 | Traceback (most recent call last): |
|
168 | 168 | UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128) |
|
169 | 169 | >>> ascii_str(u'å') |
|
170 | 170 | Traceback (most recent call last): |
|
171 |
AssertionError: |
|
|
171 | AssertionError: 'å' | |
|
172 | 172 | """ |
|
173 | 173 | assert isinstance(s, bytes), repr(s) |
|
174 | # Note: we use "encode", even though we really *should* use "decode". But | |
|
175 | # we are in py2 and don't want py2, and encode is doing what we need for the | |
|
176 | # ascii subset. | |
|
177 | return s.encode('ascii') | |
|
174 | return s.decode('ascii') | |
|
178 | 175 | |
|
179 | 176 | |
|
180 | 177 | # Regex taken from http://www.regular-expressions.info/email.html |
General Comments 0
You need to be logged in to leave comments.
Login now