##// END OF EJS Templates
merge with stable
Matt Mackall -
r19135:1c2dd751 merge default
parent child Browse files
Show More
@@ -194,6 +194,22 b" if sys.platform == 'darwin':"
194 import fcntl # only needed on darwin, missing on jython
194 import fcntl # only needed on darwin, missing on jython
195
195
196 def normcase(path):
196 def normcase(path):
197 '''
198 Normalize a filename for OS X-compatible comparison:
199 - escape-encode invalid characters
200 - decompose to NFD
201 - lowercase
202
203 >>> normcase('UPPER')
204 'upper'
205 >>> normcase('Caf\xc3\xa9')
206 'cafe\\xcc\\x81'
207 >>> normcase('\xc3\x89')
208 'e\\xcc\\x81'
209 >>> normcase('\xb8\xca\xc3\xca\xbe\xc8.JPG') # issue3918
210 '%b8%ca%c3\\xca\\xbe%c8.jpg'
211 '''
212
197 try:
213 try:
198 path.decode('ascii') # throw exception for non-ASCII character
214 path.decode('ascii') # throw exception for non-ASCII character
199 return path.lower()
215 return path.lower()
@@ -202,16 +218,42 b" if sys.platform == 'darwin':"
202 try:
218 try:
203 u = path.decode('utf-8')
219 u = path.decode('utf-8')
204 except UnicodeDecodeError:
220 except UnicodeDecodeError:
205 # percent-encode any characters that don't round-trip
221 # OS X percent-encodes any bytes that aren't valid utf-8
206 p2 = path.decode('utf-8', 'ignore').encode('utf-8')
222 s = ''
207 s = ""
223 g = ''
208 pos = 0
224 l = 0
209 for c in path:
225 for c in path:
210 if p2[pos:pos + 1] == c:
226 o = ord(c)
227 if l and o < 128 or o >= 192:
228 # we want a continuation byte, but didn't get one
229 s += ''.join(["%%%02X" % ord(x) for x in g])
230 g = ''
231 l = 0
232 if l == 0 and o < 128:
233 # ascii
211 s += c
234 s += c
212 pos += 1
235 elif l == 0 and 194 <= o < 245:
236 # valid leading bytes
237 if o < 224:
238 l = 1
239 elif o < 240:
240 l = 2
241 else:
242 l = 3
243 g = c
244 elif l > 0 and 128 <= o < 192:
245 # valid continuations
246 g += c
247 l -= 1
248 if not l:
249 s += g
250 g = ''
213 else:
251 else:
214 s += "%%%02X" % ord(c)
252 # invalid
253 s += "%%%02X" % o
254
255 # any remaining partial characters
256 s += ''.join(["%%%02X" % ord(x) for x in g])
215 u = s.decode('utf-8')
257 u = s.decode('utf-8')
216
258
217 # Decompose then lowercase (HFS+ technote specifies lower)
259 # Decompose then lowercase (HFS+ technote specifies lower)
General Comments 0
You need to be logged in to leave comments. Login now