Show More
@@ -8,6 +8,28 b'' | |||
|
8 | 8 | import error |
|
9 | 9 | import unicodedata, locale, os |
|
10 | 10 | |
|
11 | # These unicode characters are ignored by HFS+ (Apple Technote 1150, | |
|
12 | # "Unicode Subtleties"), so we need to ignore them in some places for | |
|
13 | # sanity. | |
|
14 | _ignore = [unichr(int(x, 16)).encode("utf-8") for x in | |
|
15 | "200c 200d 200e 200f 202a 202b 202c 202d 202e " | |
|
16 | "206a 206b 206c 206d 206e 206f feff".split()] | |
|
17 | # verify the next function will work | |
|
18 | assert set([i[0] for i in _ignore]) == set(["\xe2", "\xef"]) | |
|
19 | ||
|
20 | def hfsignoreclean(s): | |
|
21 | """Remove codepoints ignored by HFS+ from s. | |
|
22 | ||
|
23 | >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) | |
|
24 | '.hg' | |
|
25 | >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8')) | |
|
26 | '.hg' | |
|
27 | """ | |
|
28 | if "\xe2" in s or "\xef" in s: | |
|
29 | for c in _ignore: | |
|
30 | s = s.replace(c, '') | |
|
31 | return s | |
|
32 | ||
|
11 | 33 | def _getpreferredencoding(): |
|
12 | 34 | ''' |
|
13 | 35 | On darwin, getpreferredencoding ignores the locale environment and |
General Comments 0
You need to be logged in to leave comments.
Login now