##// END OF EJS Templates
move encoding bits from util to encoding...
Matt Mackall -
r7948:de377b1a default
parent child Browse files
Show More
@@ -0,0 +1,77 b''
1 """
2 encoding.py - character transcoding support for Mercurial
3
4 Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
5
6 This software may be used and distributed according to the terms of
7 the GNU General Public License version 2, incorporated herein by
8 reference.
9 """
10
11 import sys, unicodedata, locale, os, error
12
13 _encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
14
15 try:
16 encoding = os.environ.get("HGENCODING")
17 if sys.platform == 'darwin' and not encoding:
18 # On darwin, getpreferredencoding ignores the locale environment and
19 # always returns mac-roman. We override this if the environment is
20 # not C (has been customized by the user).
21 locale.setlocale(locale.LC_CTYPE, '')
22 encoding = locale.getlocale()[1]
23 if not encoding:
24 encoding = locale.getpreferredencoding() or 'ascii'
25 encoding = _encodingfixup.get(encoding, encoding)
26 except locale.Error:
27 encoding = 'ascii'
28 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
29 fallbackencoding = 'ISO-8859-1'
30
31 def tolocal(s):
32 """
33 Convert a string from internal UTF-8 to local encoding
34
35 All internal strings should be UTF-8 but some repos before the
36 implementation of locale support may contain latin1 or possibly
37 other character sets. We attempt to decode everything strictly
38 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
39 replace unknown characters.
40 """
41 for e in ('UTF-8', fallbackencoding):
42 try:
43 u = s.decode(e) # attempt strict decoding
44 return u.encode(encoding, "replace")
45 except LookupError, k:
46 raise error.Abort("%s, please check your locale settings" % k)
47 except UnicodeDecodeError:
48 pass
49 u = s.decode("utf-8", "replace") # last ditch
50 return u.encode(encoding, "replace")
51
52 def fromlocal(s):
53 """
54 Convert a string from the local character encoding to UTF-8
55
56 We attempt to decode strings using the encoding mode set by
57 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
58 characters will cause an error message. Other modes include
59 'replace', which replaces unknown characters with a special
60 Unicode character, and 'ignore', which drops the character.
61 """
62 try:
63 return s.decode(encoding, encodingmode).encode("utf-8")
64 except UnicodeDecodeError, inst:
65 sub = s[max(0, inst.start-10):inst.start+10]
66 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
67 except LookupError, k:
68 raise error.Abort("%s, please check your locale settings" % k)
69
70 def colwidth(s):
71 "Find the column width of a UTF-8 string for display"
72 d = s.decode(encoding, 'replace')
73 if hasattr(unicodedata, 'east_asian_width'):
74 w = unicodedata.east_asian_width
75 return sum([w(c) in 'WF' and 2 or 1 for c in d])
76 return len(d)
77
@@ -18,7 +18,7 b' from p4 import p4_source'
18 18 import filemap
19 19
20 20 import os, shutil
21 from mercurial import hg, util
21 from mercurial import hg, util, encoding
22 22 from mercurial.i18n import _
23 23
24 24 orig_encoding = 'ascii'
@@ -275,7 +275,7 b' class converter(object):'
275 275 if "\n" in desc:
276 276 desc = desc.splitlines()[0]
277 277 # convert log message to local encoding without using
278 # tolocal() because util._encoding conver() use it as
278 # tolocal() because encoding.encoding conver() use it as
279 279 # 'utf-8'
280 280 self.ui.status("%d %s\n" % (num, recode(desc)))
281 281 self.ui.note(_("source: %s\n") % recode(c))
@@ -308,8 +308,8 b' class converter(object):'
308 308
309 309 def convert(ui, src, dest=None, revmapfile=None, **opts):
310 310 global orig_encoding
311 orig_encoding = util._encoding
312 util._encoding = 'UTF-8'
311 orig_encoding = encoding.encoding
312 encoding.encoding = 'UTF-8'
313 313
314 314 if not dest:
315 315 dest = hg.defaultdest(src) + "-hg"
@@ -6,7 +6,7 b''
6 6 from mercurial import demandimport
7 7 demandimport.ignore.extend(['pkgutil', 'pkg_resources', '__main__',])
8 8
9 from mercurial import util
9 from mercurial import util, encoding
10 10 from mercurial.templatefilters import filters
11 11
12 12 from pygments import highlight
@@ -30,19 +30,19 b' def pygmentize(field, fctx, style, tmpl)'
30 30 return
31 31
32 32 # avoid UnicodeDecodeError in pygments
33 text = util.tolocal(text)
33 text = encoding.tolocal(text)
34 34
35 35 # To get multi-line strings right, we can't format line-by-line
36 36 try:
37 37 lexer = guess_lexer_for_filename(fctx.path(), text[:1024],
38 encoding=util._encoding)
38 encoding=encoding.encoding)
39 39 except (ClassNotFound, ValueError):
40 40 try:
41 lexer = guess_lexer(text[:1024], encoding=util._encoding)
41 lexer = guess_lexer(text[:1024], encoding=encoding.encoding)
42 42 except (ClassNotFound, ValueError):
43 lexer = TextLexer(encoding=util._encoding)
43 lexer = TextLexer(encoding=encoding.encoding)
44 44
45 formatter = HtmlFormatter(style=style, encoding=util._encoding)
45 formatter = HtmlFormatter(style=style, encoding=encoding.encoding)
46 46
47 47 colorized = highlight(text, lexer, formatter)
48 48 # strip wrapping div
@@ -36,19 +36,19 b' To use this extension, enable the extens'
36 36 [extensions]
37 37 hgext.win32mbcs =
38 38
39 Path encoding conversion are done between unicode and util._encoding
39 Path encoding conversion are done between unicode and encoding.encoding
40 40 which is decided by mercurial from current locale setting or HGENCODING.
41 41
42 42 """
43 43
44 44 import os
45 45 from mercurial.i18n import _
46 from mercurial import util
46 from mercurial import util, encoding
47 47
48 48 def decode(arg):
49 49 if isinstance(arg, str):
50 uarg = arg.decode(util._encoding)
51 if arg == uarg.encode(util._encoding):
50 uarg = arg.decode(encoding.encoding)
51 if arg == uarg.encode(encoding.encoding):
52 52 return uarg
53 53 raise UnicodeError("Not local encoding")
54 54 elif isinstance(arg, tuple):
@@ -59,7 +59,7 b' def decode(arg):'
59 59
60 60 def encode(arg):
61 61 if isinstance(arg, unicode):
62 return arg.encode(util._encoding)
62 return arg.encode(encoding.encoding)
63 63 elif isinstance(arg, tuple):
64 64 return tuple(map(encode, arg))
65 65 elif isinstance(arg, list):
@@ -76,10 +76,10 b' def wrapper(func, args):'
76 76 # convert arguments to unicode, call func, then convert back
77 77 return encode(func(*decode(args)))
78 78 except UnicodeError:
79 # If not encoded with util._encoding, report it then
79 # If not encoded with encoding.encoding, report it then
80 80 # continue with calling original function.
81 81 raise util.Abort(_("[win32mbcs] filename conversion fail with"
82 " %s encoding\n") % (util._encoding))
82 " %s encoding\n") % (encoding.encoding))
83 83
84 84 def wrapname(name):
85 85 idx = name.rfind('.')
@@ -115,8 +115,9 b' def reposetup(ui, repo):'
115 115 return
116 116
117 117 # fake is only for relevant environment.
118 if util._encoding.lower() in problematic_encodings.split():
118 if encoding.encoding.lower() in problematic_encodings.split():
119 119 for f in funcs.split():
120 120 wrapname(f)
121 ui.debug(_("[win32mbcs] activated with encoding: %s\n") % util._encoding)
121 ui.debug(_("[win32mbcs] activated with encoding: %s\n")
122 % encoding.encoding)
122 123
@@ -7,7 +7,7 b''
7 7
8 8 from node import bin, hex, nullid
9 9 from i18n import _
10 import util, error, revlog
10 import util, error, revlog, encoding
11 11
12 12 def _string_escape(text):
13 13 """
@@ -175,10 +175,10 b' class changelog(revlog.revlog):'
175 175 if not text:
176 176 return (nullid, "", (0, 0), [], "", {'branch': 'default'})
177 177 last = text.index("\n\n")
178 desc = util.tolocal(text[last + 2:])
178 desc = encoding.tolocal(text[last + 2:])
179 179 l = text[:last].split('\n')
180 180 manifest = bin(l[0])
181 user = util.tolocal(l[1])
181 user = encoding.tolocal(l[1])
182 182
183 183 extra_data = l[2].split(' ', 2)
184 184 if len(extra_data) != 3:
@@ -205,7 +205,7 b' class changelog(revlog.revlog):'
205 205 if "\n" in user:
206 206 raise error.RevlogError(_("username %s contains a newline")
207 207 % repr(user))
208 user, desc = util.fromlocal(user), util.fromlocal(desc)
208 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
209 209
210 210 if date:
211 211 parseddate = "%d %d" % util.parsedate(date)
@@ -7,7 +7,7 b''
7 7
8 8 from node import hex, nullid, nullrev, short
9 9 from i18n import _
10 import os, sys, bisect, stat
10 import os, sys, bisect, stat, encoding
11 11 import mdiff, bdiff, util, templater, templatefilters, patch, errno, error
12 12 import match as _match
13 13
@@ -626,7 +626,7 b' class changeset_printer(object):'
626 626
627 627 # don't show the default branch name
628 628 if branch != 'default':
629 branch = util.tolocal(branch)
629 branch = encoding.tolocal(branch)
630 630 self.ui.write(_("branch: %s\n") % branch)
631 631 for tag in self.repo.nodetags(changenode):
632 632 self.ui.write(_("tag: %s\n") % tag)
@@ -791,7 +791,7 b' class changeset_templater(changeset_prin'
791 791 def showbranches(**args):
792 792 branch = ctx.branch()
793 793 if branch != 'default':
794 branch = util.tolocal(branch)
794 branch = encoding.tolocal(branch)
795 795 return showlist('branch', [branch], plural='branches', **args)
796 796
797 797 def showparents(**args):
@@ -9,7 +9,7 b' from node import hex, nullid, nullrev, s'
9 9 from i18n import _, gettext
10 10 import os, re, sys
11 11 import hg, util, revlog, bundlerepo, extensions, copies, context, error
12 import difflib, patch, time, help, mdiff, tempfile, url
12 import difflib, patch, time, help, mdiff, tempfile, url, encoding
13 13 import archival, changegroup, cmdutil, hgweb.server, sshserver, hbisect
14 14 import merge as merge_
15 15
@@ -415,10 +415,10 b' def branch(ui, repo, label=None, **opts)'
415 415 if label not in [p.branch() for p in repo.parents()]:
416 416 raise util.Abort(_('a branch of the same name already exists'
417 417 ' (use --force to override)'))
418 repo.dirstate.setbranch(util.fromlocal(label))
418 repo.dirstate.setbranch(encoding.fromlocal(label))
419 419 ui.status(_('marked working directory as branch %s\n') % label)
420 420 else:
421 ui.write("%s\n" % util.tolocal(repo.dirstate.branch()))
421 ui.write("%s\n" % encoding.tolocal(repo.dirstate.branch()))
422 422
423 423 def branches(ui, repo, active=False):
424 424 """list repository named branches
@@ -431,7 +431,7 b' def branches(ui, repo, active=False):'
431 431 Use the command 'hg update' to switch to an existing branch.
432 432 """
433 433 hexfunc = ui.debugflag and hex or short
434 activebranches = [util.tolocal(repo[n].branch())
434 activebranches = [encoding.tolocal(repo[n].branch())
435 435 for n in repo.heads(closed=False)]
436 436 branches = util.sort([(tag in activebranches, repo.changelog.rev(node), tag)
437 437 for tag, node in repo.branchtags().items()])
@@ -449,7 +449,7 b' def branches(ui, repo, active=False):'
449 449 notice = ' (closed)'
450 450 else:
451 451 notice = ' (inactive)'
452 rev = str(node).rjust(31 - util.colwidth(tag))
452 rev = str(node).rjust(31 - encoding.colwidth(tag))
453 453 data = tag, rev, hexfunc(hn), notice
454 454 ui.write("%s %s:%s%s\n" % data)
455 455
@@ -882,9 +882,9 b' def debuginstall(ui):'
882 882 problems = 0
883 883
884 884 # encoding
885 ui.status(_("Checking encoding (%s)...\n") % util._encoding)
885 ui.status(_("Checking encoding (%s)...\n") % encoding.encoding)
886 886 try:
887 util.fromlocal("test")
887 encoding.fromlocal("test")
888 888 except util.Abort, inst:
889 889 ui.write(" %s\n" % inst)
890 890 ui.write(_(" (check that your locale is properly set)\n"))
@@ -1579,7 +1579,7 b' def identify(ui, repo, source=None,'
1579 1579 output.append(str(ctx.rev()))
1580 1580
1581 1581 if repo.local() and default and not ui.quiet:
1582 b = util.tolocal(ctx.branch())
1582 b = encoding.tolocal(ctx.branch())
1583 1583 if b != 'default':
1584 1584 output.append("(%s)" % b)
1585 1585
@@ -1589,7 +1589,7 b' def identify(ui, repo, source=None,'
1589 1589 output.append(t)
1590 1590
1591 1591 if branch:
1592 output.append(util.tolocal(ctx.branch()))
1592 output.append(encoding.tolocal(ctx.branch()))
1593 1593
1594 1594 if tags:
1595 1595 output.extend(ctx.tags())
@@ -2855,7 +2855,7 b' def tags(ui, repo):'
2855 2855 except error.LookupError:
2856 2856 r = " ?:%s" % hn
2857 2857 else:
2858 spaces = " " * (30 - util.colwidth(t))
2858 spaces = " " * (30 - encoding.colwidth(t))
2859 2859 if ui.verbose:
2860 2860 if repo.tagtype(t) == 'local':
2861 2861 tagtype = " local"
@@ -2976,8 +2976,9 b' globalopts = ['
2976 2976 ('', 'config', [], _('set/override config option')),
2977 2977 ('', 'debug', None, _('enable debugging output')),
2978 2978 ('', 'debugger', None, _('start debugger')),
2979 ('', 'encoding', util._encoding, _('set the charset encoding')),
2980 ('', 'encodingmode', util._encodingmode, _('set the charset encoding mode')),
2979 ('', 'encoding', encoding.encoding, _('set the charset encoding')),
2980 ('', 'encodingmode', encoding.encodingmode,
2981 _('set the charset encoding mode')),
2981 2982 ('', 'lsprof', None, _('print improved command execution profile')),
2982 2983 ('', 'traceback', None, _('print traceback on exception')),
2983 2984 ('', 'time', None, _('time how long the command takes')),
@@ -8,7 +8,7 b''
8 8 from i18n import _
9 9 import os, sys, atexit, signal, pdb, socket, errno, shlex, time
10 10 import util, commands, hg, fancyopts, extensions, hook, error
11 import cmdutil
11 import cmdutil, encoding
12 12 import ui as _ui
13 13
14 14 def run():
@@ -304,7 +304,7 b' def _dispatch(ui, args):'
304 304 # check for fallback encoding
305 305 fallback = lui.config('ui', 'fallbackencoding')
306 306 if fallback:
307 util._fallbackencoding = fallback
307 encoding.fallbackencoding = fallback
308 308
309 309 fullargs = args
310 310 cmd, func, args, options, cmdoptions = _parse(lui, args)
@@ -319,9 +319,9 b' def _dispatch(ui, args):'
319 319 "and --repository may only be abbreviated as --repo!"))
320 320
321 321 if options["encoding"]:
322 util._encoding = options["encoding"]
322 encoding.encoding = options["encoding"]
323 323 if options["encodingmode"]:
324 util._encodingmode = options["encodingmode"]
324 encoding.encodingmode = options["encodingmode"]
325 325 if options["time"]:
326 326 def get_times():
327 327 t = os.times()
@@ -7,7 +7,7 b''
7 7 # of the GNU General Public License, incorporated herein by reference.
8 8
9 9 import os
10 from mercurial import ui, hg, util, hook, error
10 from mercurial import ui, hg, util, hook, error, encoding
11 11 from mercurial import templater, templatefilters
12 12 from common import get_mtime, style_map, ErrorResponse
13 13 from common import HTTP_OK, HTTP_BAD_REQUEST, HTTP_NOT_FOUND, HTTP_SERVER_ERROR
@@ -65,7 +65,7 b' class hgweb(object):'
65 65 self.maxshortchanges = int(self.config("web", "maxshortchanges", 60))
66 66 self.maxfiles = int(self.config("web", "maxfiles", 10))
67 67 self.allowpull = self.configbool("web", "allowpull", True)
68 self.encoding = self.config("web", "encoding", util._encoding)
68 self.encoding = self.config("web", "encoding", encoding.encoding)
69 69
70 70 def run(self):
71 71 if not os.environ.get('GATEWAY_INTERFACE', '').startswith("CGI/1."):
@@ -8,7 +8,7 b''
8 8
9 9 import os
10 10 from mercurial.i18n import _
11 from mercurial import ui, hg, util, templater, templatefilters, error
11 from mercurial import ui, hg, util, templater, templatefilters, error, encoding
12 12 from common import ErrorResponse, get_mtime, staticfile, style_map, paritygen,\
13 13 get_contact, HTTP_OK, HTTP_NOT_FOUND, HTTP_SERVER_ERROR
14 14 from hgweb_mod import hgweb
@@ -119,7 +119,7 b' class hgwebdir(object):'
119 119
120 120 virtual = req.env.get("PATH_INFO", "").strip('/')
121 121 tmpl = self.templater(req)
122 ctype = tmpl('mimetype', encoding=util._encoding)
122 ctype = tmpl('mimetype', encoding=encoding.encoding)
123 123 ctype = templater.stringify(ctype)
124 124
125 125 # a static file
@@ -285,7 +285,7 b' class hgwebdir(object):'
285 285 def templater(self, req):
286 286
287 287 def header(**map):
288 yield tmpl('header', encoding=util._encoding, **map)
288 yield tmpl('header', encoding=encoding.encoding, **map)
289 289
290 290 def footer(**map):
291 291 yield tmpl("footer", **map)
@@ -7,7 +7,7 b' This software may be used and distribute'
7 7 of the GNU General Public License, incorporated herein by reference.
8 8 """
9 9
10 import gettext, sys, os
10 import gettext, sys, os, encoding
11 11
12 12 # modelled after templater.templatepath:
13 13 if hasattr(sys, 'frozen'):
@@ -37,15 +37,13 b' def gettext(message):'
37 37 if message is None:
38 38 return message
39 39
40 # We cannot just run the text through util.tolocal since that
41 # leads to infinite recursion when util._encoding is invalid.
40 # We cannot just run the text through encoding.tolocal since that
41 # leads to infinite recursion when encoding._encoding is invalid.
42 42 try:
43 43 u = t.ugettext(message)
44 return u.encode(util._encoding, "replace")
44 return u.encode(encoding.encoding, "replace")
45 45 except LookupError:
46 46 return message
47 47
48 48 _ = gettext
49 49
50 # Moved after _ because of circular import.
51 import util
@@ -9,7 +9,7 b' from node import bin, hex, nullid, nullr'
9 9 from i18n import _
10 10 import repo, changegroup
11 11 import changelog, dirstate, filelog, manifest, context, weakref
12 import lock, transaction, stat, errno, ui, store
12 import lock, transaction, stat, errno, ui, store, encoding
13 13 import os, time, util, extensions, hook, inspect, error
14 14 import match as match_
15 15 import merge as merge_
@@ -188,7 +188,7 b' class localrepository(repo.repository):'
188 188 fp.write(prevtags)
189 189
190 190 # committed tags are stored in UTF-8
191 writetags(fp, names, util.fromlocal, prevtags)
191 writetags(fp, names, encoding.fromlocal, prevtags)
192 192
193 193 if use_dirstate and '.hgtags' not in self.dirstate:
194 194 self.add(['.hgtags'])
@@ -254,7 +254,7 b' class localrepository(repo.repository):'
254 254 warn(_("cannot parse entry"))
255 255 continue
256 256 node, key = s
257 key = util.tolocal(key.strip()) # stored in UTF-8
257 key = encoding.tolocal(key.strip()) # stored in UTF-8
258 258 try:
259 259 bin_n = bin(node)
260 260 except TypeError:
@@ -297,7 +297,7 b' class localrepository(repo.repository):'
297 297 readtags(f.data().splitlines(), f, "global")
298 298
299 299 try:
300 data = util.fromlocal(self.opener("localtags").read())
300 data = encoding.fromlocal(self.opener("localtags").read())
301 301 # localtags are stored in the local character set
302 302 # while the internal tag table is stored in UTF-8
303 303 readtags(data.splitlines(), "localtags", "local")
@@ -397,7 +397,7 b' class localrepository(repo.repository):'
397 397 # the branch cache is stored on disk as UTF-8, but in the local
398 398 # charset internally
399 399 for k, v in partial.iteritems():
400 self.branchcache[util.tolocal(k)] = v
400 self.branchcache[encoding.tolocal(k)] = v
401 401 return self.branchcache
402 402
403 403
@@ -647,7 +647,7 b' class localrepository(repo.repository):'
647 647 except IOError:
648 648 self.ui.warn(_("Named branch could not be reset, "
649 649 "current branch still is: %s\n")
650 % util.tolocal(self.dirstate.branch()))
650 % encoding.tolocal(self.dirstate.branch()))
651 651 self.invalidate()
652 652 self.dirstate.invalidate()
653 653 else:
@@ -943,7 +943,8 b' class localrepository(repo.repository):'
943 943 if p2 != nullid:
944 944 edittext.append("HG: branch merge")
945 945 if branchname:
946 edittext.append("HG: branch '%s'" % util.tolocal(branchname))
946 edittext.append("HG: branch '%s'"
947 % encoding.tolocal(branchname))
947 948 edittext.extend(["HG: added %s" % f for f in added])
948 949 edittext.extend(["HG: changed %s" % f for f in updated])
949 950 edittext.extend(["HG: removed %s" % f for f in removed])
@@ -8,7 +8,7 b''
8 8 from i18n import _
9 9 import os, smtplib, socket
10 10 import email.Header, email.MIMEText, email.Utils
11 import util
11 import util, encoding
12 12
13 13 def _smtp(ui):
14 14 '''build an smtp connection and return a function to send mail'''
@@ -100,8 +100,8 b" def mimetextpatch(s, subtype='plain', di"
100 100 def _charsets(ui):
101 101 '''Obtains charsets to send mail parts not containing patches.'''
102 102 charsets = [cs.lower() for cs in ui.configlist('email', 'charsets')]
103 fallbacks = [util._fallbackencoding.lower(),
104 util._encoding.lower(), 'utf-8']
103 fallbacks = [encoding.fallbackencoding.lower(),
104 encoding.encoding.lower(), 'utf-8']
105 105 for cs in fallbacks: # util.unique does not keep order
106 106 if cs not in charsets:
107 107 charsets.append(cs)
@@ -110,14 +110,14 b' def _charsets(ui):'
110 110 def _encode(ui, s, charsets):
111 111 '''Returns (converted) string, charset tuple.
112 112 Finds out best charset by cycling through sendcharsets in descending
113 order. Tries both _encoding and _fallbackencoding for input. Only as
113 order. Tries both encoding and fallbackencoding for input. Only as
114 114 last resort send as is in fake ascii.
115 115 Caveat: Do not use for mail parts containing patches!'''
116 116 try:
117 117 s.decode('ascii')
118 118 except UnicodeDecodeError:
119 119 sendcharsets = charsets or _charsets(ui)
120 for ics in (util._encoding, util._fallbackencoding):
120 for ics in (encoding.encoding, encoding.fallbackencoding):
121 121 try:
122 122 u = s.decode(ics)
123 123 except UnicodeDecodeError:
@@ -6,7 +6,7 b''
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 import cgi, re, os, time, urllib, textwrap
9 import util, templater
9 import util, templater, encoding
10 10
11 11 agescales = [("second", 1),
12 12 ("minute", 60),
@@ -76,7 +76,7 b' def nl2br(text):'
76 76 return text.replace('\n', '<br/>\n')
77 77
78 78 def obfuscate(text):
79 text = unicode(text, util._encoding, 'replace')
79 text = unicode(text, encoding.encoding, 'replace')
80 80 return ''.join(['&#%d;' % ord(c) for c in text])
81 81
82 82 def domain(author):
@@ -14,8 +14,8 b' platform-specific details from the core.'
14 14
15 15 from i18n import _
16 16 import cStringIO, errno, re, shutil, sys, tempfile, traceback, error
17 import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil
18 import imp, unicodedata
17 import os, stat, threading, time, calendar, ConfigParser, glob, osutil
18 import imp
19 19
20 20 # Python compatibility
21 21
@@ -81,71 +81,6 b' except ImportError:'
81 81 popen3 = os.popen3
82 82
83 83
84 _encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
85
86 try:
87 _encoding = os.environ.get("HGENCODING")
88 if sys.platform == 'darwin' and not _encoding:
89 # On darwin, getpreferredencoding ignores the locale environment and
90 # always returns mac-roman. We override this if the environment is
91 # not C (has been customized by the user).
92 locale.setlocale(locale.LC_CTYPE, '')
93 _encoding = locale.getlocale()[1]
94 if not _encoding:
95 _encoding = locale.getpreferredencoding() or 'ascii'
96 _encoding = _encodingfixup.get(_encoding, _encoding)
97 except locale.Error:
98 _encoding = 'ascii'
99 _encodingmode = os.environ.get("HGENCODINGMODE", "strict")
100 _fallbackencoding = 'ISO-8859-1'
101
102 def tolocal(s):
103 """
104 Convert a string from internal UTF-8 to local encoding
105
106 All internal strings should be UTF-8 but some repos before the
107 implementation of locale support may contain latin1 or possibly
108 other character sets. We attempt to decode everything strictly
109 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
110 replace unknown characters.
111 """
112 for e in ('UTF-8', _fallbackencoding):
113 try:
114 u = s.decode(e) # attempt strict decoding
115 return u.encode(_encoding, "replace")
116 except LookupError, k:
117 raise Abort(_("%s, please check your locale settings") % k)
118 except UnicodeDecodeError:
119 pass
120 u = s.decode("utf-8", "replace") # last ditch
121 return u.encode(_encoding, "replace")
122
123 def fromlocal(s):
124 """
125 Convert a string from the local character encoding to UTF-8
126
127 We attempt to decode strings using the encoding mode set by
128 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
129 characters will cause an error message. Other modes include
130 'replace', which replaces unknown characters with a special
131 Unicode character, and 'ignore', which drops the character.
132 """
133 try:
134 return s.decode(_encoding, _encodingmode).encode("utf-8")
135 except UnicodeDecodeError, inst:
136 sub = s[max(0, inst.start-10):inst.start+10]
137 raise Abort("decoding near '%s': %s!" % (sub, inst))
138 except LookupError, k:
139 raise Abort(_("%s, please check your locale settings") % k)
140
141 def colwidth(s):
142 "Find the column width of a UTF-8 string for display"
143 d = s.decode(_encoding, 'replace')
144 if hasattr(unicodedata, 'east_asian_width'):
145 w = unicodedata.east_asian_width
146 return sum([w(c) in 'WF' and 2 or 1 for c in d])
147 return len(d)
148
149 84 def version():
150 85 """Return version information if available."""
151 86 try:
@@ -16,7 +16,7 b' import win32api'
16 16
17 17 import errno, os, sys, pywintypes, win32con, win32file, win32process
18 18 import cStringIO, winerror
19 import osutil
19 import osutil, encoding
20 20 import util
21 21 from win32com.shell import shell,shellcon
22 22
@@ -213,7 +213,7 b' def lookup_reg(key, valname=None, scope='
213 213 try:
214 214 val = QueryValueEx(OpenKey(s, key), valname)[0]
215 215 # never let a Unicode string escape into the wild
216 return util.tolocal(val.encode('UTF-8'))
216 return encoding.tolocal(val.encode('UTF-8'))
217 217 except EnvironmentError:
218 218 pass
219 219
General Comments 0
You need to be logged in to leave comments. Login now