##// END OF EJS Templates
encoding: add hfsignoreclean to clean out HFS-ignored characters...
Augie Fackler -
r23596:885bd7c5 stable
parent child Browse files
Show More
@@ -8,6 +8,28 b''
8 8 import error
9 9 import unicodedata, locale, os
10 10
11 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
12 # "Unicode Subtleties"), so we need to ignore them in some places for
13 # sanity.
14 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
15 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
16 "206a 206b 206c 206d 206e 206f feff".split()]
17 # verify the next function will work
18 assert set([i[0] for i in _ignore]) == set(["\xe2", "\xef"])
19
20 def hfsignoreclean(s):
21 """Remove codepoints ignored by HFS+ from s.
22
23 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
24 '.hg'
25 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
26 '.hg'
27 """
28 if "\xe2" in s or "\xef" in s:
29 for c in _ignore:
30 s = s.replace(c, '')
31 return s
32
11 33 def _getpreferredencoding():
12 34 '''
13 35 On darwin, getpreferredencoding ignores the locale environment and
General Comments 0
You need to be logged in to leave comments. Login now