Show More
@@ -8,6 +8,28 b'' | |||||
8 | import error |
|
8 | import error | |
9 | import unicodedata, locale, os |
|
9 | import unicodedata, locale, os | |
10 |
|
10 | |||
|
11 | # These unicode characters are ignored by HFS+ (Apple Technote 1150, | |||
|
12 | # "Unicode Subtleties"), so we need to ignore them in some places for | |||
|
13 | # sanity. | |||
|
14 | _ignore = [unichr(int(x, 16)).encode("utf-8") for x in | |||
|
15 | "200c 200d 200e 200f 202a 202b 202c 202d 202e " | |||
|
16 | "206a 206b 206c 206d 206e 206f feff".split()] | |||
|
17 | # verify the next function will work | |||
|
18 | assert set([i[0] for i in _ignore]) == set(["\xe2", "\xef"]) | |||
|
19 | ||||
|
20 | def hfsignoreclean(s): | |||
|
21 | """Remove codepoints ignored by HFS+ from s. | |||
|
22 | ||||
|
23 | >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) | |||
|
24 | '.hg' | |||
|
25 | >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8')) | |||
|
26 | '.hg' | |||
|
27 | """ | |||
|
28 | if "\xe2" in s or "\xef" in s: | |||
|
29 | for c in _ignore: | |||
|
30 | s = s.replace(c, '') | |||
|
31 | return s | |||
|
32 | ||||
11 | def _getpreferredencoding(): |
|
33 | def _getpreferredencoding(): | |
12 | ''' |
|
34 | ''' | |
13 | On darwin, getpreferredencoding ignores the locale environment and |
|
35 | On darwin, getpreferredencoding ignores the locale environment and |
General Comments 0
You need to be logged in to leave comments.
Login now