##// END OF EJS Templates
py3: update safe_* functions for how unicode pretty much has been renamed to str...
Mads Kiilerich -
r8077:b1a3e6df default
parent child Browse files
Show More
@@ -70,21 +70,21 b' def safe_int(val, default=None):'
70
70
71 def safe_unicode(s):
71 def safe_unicode(s):
72 """
72 """
73 Safe unicode function. Use a few tricks to turn s into unicode string:
73 Safe unicode str function. Use a few tricks to turn s into str:
74 In case of UnicodeDecodeError with configured default encodings, try to
74 In case of UnicodeDecodeError with configured default encodings, try to
75 detect encoding with chardet library, then fall back to first encoding with
75 detect encoding with chardet library, then fall back to first encoding with
76 errors replaced.
76 errors replaced.
77 """
77 """
78 if isinstance(s, unicode):
78 if isinstance(s, str):
79 return s
79 return s
80
80
81 if not isinstance(s, bytes): # use __str__ / __unicode__ and don't expect UnicodeDecodeError
81 if not isinstance(s, bytes): # use __str__ and don't expect UnicodeDecodeError
82 return unicode(s)
82 return str(s)
83
83
84 from kallithea.lib.vcs.conf import settings
84 from kallithea.lib.vcs.conf import settings
85 for enc in settings.DEFAULT_ENCODINGS:
85 for enc in settings.DEFAULT_ENCODINGS:
86 try:
86 try:
87 return unicode(s, enc)
87 return str(s, enc)
88 except UnicodeDecodeError:
88 except UnicodeDecodeError:
89 pass
89 pass
90
90
@@ -96,7 +96,7 b' def safe_unicode(s):'
96 except (ImportError, UnicodeDecodeError):
96 except (ImportError, UnicodeDecodeError):
97 pass
97 pass
98
98
99 return unicode(s, settings.DEFAULT_ENCODINGS[0], 'replace')
99 return str(s, settings.DEFAULT_ENCODINGS[0], 'replace')
100
100
101
101
102 def safe_bytes(s):
102 def safe_bytes(s):
@@ -108,7 +108,7 b' def safe_bytes(s):'
108 if isinstance(s, bytes):
108 if isinstance(s, bytes):
109 return s
109 return s
110
110
111 assert isinstance(s, unicode), repr(s) # bytes cannot coerse with __str__ or handle None or int
111 assert isinstance(s, str), repr(s) # bytes cannot coerse with __str__ or handle None or int
112
112
113 from kallithea.lib.vcs.conf import settings
113 from kallithea.lib.vcs.conf import settings
114 for enc in settings.DEFAULT_ENCODINGS:
114 for enc in settings.DEFAULT_ENCODINGS:
@@ -120,12 +120,12 b' def safe_bytes(s):'
120 return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')
120 return s.encode(settings.DEFAULT_ENCODINGS[0], 'replace')
121
121
122
122
123 safe_str = safe_bytes # safe_str is deprecated - it will be redefined when changing to py3
123 safe_str = safe_unicode
124
124
125
125
126 def ascii_bytes(s):
126 def ascii_bytes(s):
127 """
127 """
128 Simple conversion from unicode/str to bytes, *assuming* all codepoints are
128 Simple conversion from str to bytes, *assuming* all codepoints are
129 7-bit and it thus is pure ASCII.
129 7-bit and it thus is pure ASCII.
130 Will fail badly with UnicodeError on invalid input.
130 Will fail badly with UnicodeError on invalid input.
131 This should be used where enocding and "safe" ambiguity should be avoided.
131 This should be used where enocding and "safe" ambiguity should be avoided.
@@ -134,17 +134,17 b' def ascii_bytes(s):'
134 identifiers.
134 identifiers.
135
135
136 >>> ascii_bytes('a')
136 >>> ascii_bytes('a')
137 'a'
137 b'a'
138 >>> ascii_bytes(u'a')
138 >>> ascii_bytes(u'a')
139 'a'
139 b'a'
140 >>> ascii_bytes('å')
140 >>> ascii_bytes('å')
141 Traceback (most recent call last):
141 Traceback (most recent call last):
142 UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
142 UnicodeEncodeError: 'ascii' codec can't encode character '\xe5' in position 0: ordinal not in range(128)
143 >>> ascii_bytes(u'å')
143 >>> ascii_bytes('å'.encode('utf8'))
144 Traceback (most recent call last):
144 Traceback (most recent call last):
145 UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-1: ordinal not in range(128)
145 AssertionError: b'\xc3\xa5'
146 """
146 """
147 assert isinstance(s, (unicode, str)), repr(s)
147 assert isinstance(s, str), repr(s)
148 return s.encode('ascii')
148 return s.encode('ascii')
149
149
150
150
@@ -158,23 +158,20 b' def ascii_str(s):'
158 where a unicode string is wanted without caring about encoding. For example
158 where a unicode string is wanted without caring about encoding. For example
159 to hex, base64, urlencoding, or are known to be identifiers.
159 to hex, base64, urlencoding, or are known to be identifiers.
160
160
161 >>> ascii_str('a')
161 >>> ascii_str(b'a')
162 'a'
162 'a'
163 >>> ascii_str(u'a')
163 >>> ascii_str(u'a')
164 Traceback (most recent call last):
164 Traceback (most recent call last):
165 AssertionError: u'a'
165 AssertionError: 'a'
166 >>> ascii_str('å')
166 >>> ascii_str('å'.encode('utf8'))
167 Traceback (most recent call last):
167 Traceback (most recent call last):
168 UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
168 UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
169 >>> ascii_str(u'å')
169 >>> ascii_str(u'å')
170 Traceback (most recent call last):
170 Traceback (most recent call last):
171 AssertionError: u'\xc3\xa5'
171 AssertionError: '
172 """
172 """
173 assert isinstance(s, bytes), repr(s)
173 assert isinstance(s, bytes), repr(s)
174 # Note: we use "encode", even though we really *should* use "decode". But
174 return s.decode('ascii')
175 # we are in py2 and don't want py2, and encode is doing what we need for the
176 # ascii subset.
177 return s.encode('ascii')
178
175
179
176
180 # Regex taken from http://www.regular-expressions.info/email.html
177 # Regex taken from http://www.regular-expressions.info/email.html
General Comments 0
You need to be logged in to leave comments. Login now