Show More
@@ -18,6 +18,58 b' demandload(globals(), "cStringIO errno g' | |||
|
18 | 18 | demandload(globals(), "os threading time calendar ConfigParser locale") |
|
19 | 19 | |
|
20 | 20 | _encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding() |
|
21 | _encodingmode = os.environ.get("HGENCODINGMODE", "strict") | |
|
22 | ||
|
23 | def tolocal(s): | |
|
24 | """ | |
|
25 | Convert a string from internal UTF-8 to local encoding | |
|
26 | ||
|
27 | All internal strings should be UTF-8 but some repos before the | |
|
28 | implementation of locale support may contain latin1 or possibly | |
|
29 | other character sets. We attempt to decode everything strictly | |
|
30 | using UTF-8, then Latin-1, and failing that, we use UTF-8 and | |
|
31 | replace unknown characters. | |
|
32 | """ | |
|
33 | for e in "utf-8 latin1".split(): | |
|
34 | try: | |
|
35 | u = s.decode(e) # attempt strict decoding | |
|
36 | return u.encode(_encoding, "replace") | |
|
37 | except UnicodeDecodeError: | |
|
38 | pass | |
|
39 | u = s.decode("utf-8", "replace") # last ditch | |
|
40 | return u.encode(_encoding, "replace") | |
|
41 | ||
|
42 | def fromlocal(s): | |
|
43 | """ | |
|
44 | Convert a string from the local character encoding to UTF-8 | |
|
45 | ||
|
46 | We attempt to decode strings using the encoding mode set by | |
|
47 | HG_ENCODINGMODE, which defaults to 'strict'. In this mode, unknown | |
|
48 | characters will cause an error message. Other modes include | |
|
49 | 'replace', which replaces unknown characters with a special | |
|
50 | Unicode character, and 'ignore', which drops the character. | |
|
51 | """ | |
|
52 | try: | |
|
53 | return s.decode(_encoding, _encodingmode).encode("utf-8") | |
|
54 | except UnicodeDecodeError, inst: | |
|
55 | sub = s[max(0, inst.start-10):inst.start+10] | |
|
56 | raise Abort("decoding near '%s': %s!\n" % (sub, inst)) | |
|
57 | ||
|
58 | def locallen(s): | |
|
59 | """Find the length in characters of a local string""" | |
|
60 | return len(s.decode(_encoding, "replace")) | |
|
61 | ||
|
62 | def localsub(s, a, b=None): | |
|
63 | try: | |
|
64 | u = s.decode(_encoding, _encodingmode) | |
|
65 | if b is not None: | |
|
66 | u = u[a:b] | |
|
67 | else: | |
|
68 | u = u[:a] | |
|
69 | return u.encode(_encoding, _encodingmode) | |
|
70 | except UnicodeDecodeError, inst: | |
|
71 | sub = s[max(0, inst.start-10), inst.start+10] | |
|
72 | raise Abort("decoding near '%s': %s!\n" % (sub, inst)) | |
|
21 | 73 | |
|
22 | 74 | # used by parsedate |
|
23 | 75 | defaultdateformats = ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M', |
General Comments 0
You need to be logged in to leave comments.
Login now