##// END OF EJS Templates
Add functions for transcoding and manipulating multibyte strings
Matt Mackall -
r3770:f96c158e default
parent child Browse files
Show More
@@ -18,6 +18,58 b' demandload(globals(), "cStringIO errno g'
18 demandload(globals(), "os threading time calendar ConfigParser locale")
18 demandload(globals(), "os threading time calendar ConfigParser locale")
19
19
20 _encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding()
20 _encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding()
21 _encodingmode = os.environ.get("HGENCODINGMODE", "strict")
22
23 def tolocal(s):
24 """
25 Convert a string from internal UTF-8 to local encoding
26
27 All internal strings should be UTF-8 but some repos before the
28 implementation of locale support may contain latin1 or possibly
29 other character sets. We attempt to decode everything strictly
30 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
31 replace unknown characters.
32 """
33 for e in "utf-8 latin1".split():
34 try:
35 u = s.decode(e) # attempt strict decoding
36 return u.encode(_encoding, "replace")
37 except UnicodeDecodeError:
38 pass
39 u = s.decode("utf-8", "replace") # last ditch
40 return u.encode(_encoding, "replace")
41
42 def fromlocal(s):
43 """
44 Convert a string from the local character encoding to UTF-8
45
46 We attempt to decode strings using the encoding mode set by
47 HG_ENCODINGMODE, which defaults to 'strict'. In this mode, unknown
48 characters will cause an error message. Other modes include
49 'replace', which replaces unknown characters with a special
50 Unicode character, and 'ignore', which drops the character.
51 """
52 try:
53 return s.decode(_encoding, _encodingmode).encode("utf-8")
54 except UnicodeDecodeError, inst:
55 sub = s[max(0, inst.start-10):inst.start+10]
56 raise Abort("decoding near '%s': %s!\n" % (sub, inst))
57
58 def locallen(s):
59 """Find the length in characters of a local string"""
60 return len(s.decode(_encoding, "replace"))
61
62 def localsub(s, a, b=None):
63 try:
64 u = s.decode(_encoding, _encodingmode)
65 if b is not None:
66 u = u[a:b]
67 else:
68 u = u[:a]
69 return u.encode(_encoding, _encodingmode)
70 except UnicodeDecodeError, inst:
71 sub = s[max(0, inst.start-10), inst.start+10]
72 raise Abort("decoding near '%s': %s!\n" % (sub, inst))
21
73
22 # used by parsedate
74 # used by parsedate
23 defaultdateformats = ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M',
75 defaultdateformats = ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M',
General Comments 0
You need to be logged in to leave comments. Login now